def pollForClosable(self): """ _pollForClosable_ Search WMBS for filesets that can be closed and mark them as closed. """ myThread = threading.currentThread() myThread.transaction.begin() closableFilesetDAO = self.daoFactory(classname="Fileset.ListClosable") closableFilesets = closableFilesetDAO.execute() for closableFileset in closableFilesets: openFileset = Fileset(id=closableFileset) openFileset.load() logging.debug("Closing fileset %s" % openFileset.name) openFileset.markOpen(False) myThread.transaction.commit()
def pollForClosable(self): """ _pollForClosable_ Search WMBS for filesets that can be closed and mark them as closed. """ myThread = threading.currentThread() myThread.transaction.begin() closableFilesetDAO = self.daoFactory(classname="Fileset.ListClosable") closableFilesets = closableFilesetDAO.execute() for closableFileset in closableFilesets: openFileset = Fileset(id=closableFileset) openFileset.load() logging.debug("Closing fileset %s", openFileset.name) openFileset.markOpen(False) myThread.transaction.commit()
class RepackMergeTest(unittest.TestCase): """ _RepackMergeTest_ Test for RepackMerge job splitter """ def setUp(self): """ _setUp_ """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules = ["WMComponent.DBS3Buffer", "T0.WMBS"]) self.splitterFactory = SplitterFactory(package = "T0.JobSplitting") myThread = threading.currentThread() daoFactory = DAOFactory(package = "T0.WMBS", logger = logging, dbinterface = myThread.dbi) wmbsDaoFactory = DAOFactory(package = "WMCore.WMBS", logger = logging, dbinterface = myThread.dbi) myThread.dbi.processData("""INSERT INTO wmbs_location (id, site_name, state, state_time) VALUES (1, 'SomeSite', 1, 1) """, transaction = False) myThread.dbi.processData("""INSERT INTO wmbs_pnns (id, pnn) VALUES (2, 'SomePNN') """, transaction = False) myThread.dbi.processData("""INSERT INTO wmbs_location_pnns (location, pnn) VALUES (1, 2) """, transaction = False) insertRunDAO = daoFactory(classname = "RunConfig.InsertRun") insertRunDAO.execute(binds = { 'RUN' : 1, 'HLTKEY' : "someHLTKey" }, transaction = False) insertLumiDAO = daoFactory(classname = "RunConfig.InsertLumiSection") insertLumiDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 1 }, transaction = False) insertLumiDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 2 }, transaction = False) insertLumiDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 3 }, transaction = False) insertLumiDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 4 }, transaction = False) insertLumiDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 5 }, transaction = False) insertStreamDAO = daoFactory(classname = "RunConfig.InsertStream") insertStreamDAO.execute(binds = { 'STREAM' : "A" }, transaction = False) insertCMSSVersionDAO = daoFactory(classname = "RunConfig.InsertCMSSWVersion") insertCMSSVersionDAO.execute(binds = { 'VERSION' : "CMSSW_4_2_7" }, transaction = False) insertStreamCMSSWVersionDAO = daoFactory(classname = "RunConfig.InsertStreamCMSSWVersion") insertStreamCMSSWVersionDAO.execute(binds = { 'RUN' : 1, 'STREAM' : 'A', 'VERSION' : "CMSSW_4_2_7" }, transaction = False) insertStreamerDAO = daoFactory(classname = "RunConfig.InsertStreamer") insertStreamerDAO.execute(streamerPNN = "SomePNN", binds = { 'RUN' : 1, 'P5_ID' : 1, 'LUMI' : 4, 'STREAM' : "A", 'LFN' : "/testLFN/A", 'FILESIZE' : 100, 'EVENTS' : 100, 'TIME' : int(time.time()) }, transaction = False) insertStreamFilesetDAO = daoFactory(classname = "RunConfig.InsertStreamFileset") insertStreamFilesetDAO.execute(1, "A", "TestFileset1") self.fileset1 = Fileset(name = "TestFileset1") self.fileset2 = Fileset(name = "TestFileset2") self.fileset1.load() self.fileset2.create() workflow1 = Workflow(spec = "spec.xml", owner = "hufnagel", name = "TestWorkflow1", task="Test") workflow2 = Workflow(spec = "spec.xml", owner = "hufnagel", name = "TestWorkflow2", task="Test") workflow1.create() workflow2.create() self.subscription1 = Subscription(fileset = self.fileset1, workflow = workflow1, split_algo = "Repack", type = "Repack") self.subscription2 = Subscription(fileset = self.fileset2, workflow = workflow2, split_algo = "RepackMerge", type = "RepackMerge") self.subscription1.create() self.subscription2.create() myThread.dbi.processData("""INSERT INTO wmbs_workflow_output (WORKFLOW_ID, OUTPUT_IDENTIFIER, OUTPUT_FILESET) VALUES (%d, 'SOMEOUTPUT', %d) """ % (workflow1.id, self.fileset2.id), transaction = False) # keep for later self.insertSplitLumisDAO = daoFactory(classname = "JobSplitting.InsertSplitLumis") self.insertClosedLumiDAO = daoFactory(classname = "RunLumiCloseout.InsertClosedLumi") self.feedStreamersDAO = daoFactory(classname = "Tier0Feeder.FeedStreamers") self.acquireFilesDAO = wmbsDaoFactory(classname = "Subscriptions.AcquireFiles") self.completeFilesDAO = wmbsDaoFactory(classname = "Subscriptions.CompleteFiles") self.currentTime = int(time.time()) # default split parameters self.splitArgs = {} self.splitArgs['minInputSize'] = 2.1 * 1024 * 1024 * 1024 self.splitArgs['maxInputSize'] = 4.0 * 1024 * 1024 * 1024 self.splitArgs['maxInputEvents'] = 100000000 self.splitArgs['maxInputFiles'] = 1000 self.splitArgs['maxEdmSize'] = 20 * 1024 * 1024 * 1024 self.splitArgs['maxOverSize'] = 10 * 1024 * 1024 * 1024 self.SplitArgs['maxLatency'] = 50000 return def tearDown(self): """ _tearDown_ """ self.testInit.clearDatabase() return def deleteSplitLumis(self): """ _deleteSplitLumis_ """ myThread = threading.currentThread() myThread.dbi.processData("""DELETE FROM lumi_section_split_active """, transaction = False) return def test00(self): """ _test00_ Test that the job name prefix feature works Test max edm size threshold for single lumi small lumi, followed by over-large lumi expect 1 job for small lumi and 4 jobs for over-large """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2]: for i in range(2 * lumi): newFile = File(makeUUID(), size = 1000 * lumi * lumi, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxEdmSize'] = 13000 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 3, "ERROR: JobFactory didn't create three jobs") job = jobGroups[0].jobs[0] self.assertTrue(job['name'].startswith("RepackMerge-"), "ERROR: Job has wrong name") self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") job = jobGroups[0].jobs[1] self.assertEqual(len(job.getFiles()), 3, "ERROR: Job does not process 3 files") job = jobGroups[0].jobs[2] self.assertEqual(len(job.getFiles()), 1, "ERROR: Job does not process 1 file") return def test01(self): """ _test01_ Test max size threshold for single lumi small lumi, followed by large lumi expect 1 job for small lumi and 1 job for large """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2]: for i in range(2): newFile = File(makeUUID(), size = 1000 * lumi, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxInputSize'] = 3000 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 2, "ERROR: JobFactory didn't create two jobs") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") job = jobGroups[0].jobs[1] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") return def test02(self): """ _test02_ Test max event threshold for single lumi small lumi, followed by large lumi expect 1 job for small lumi and 1 job for large """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2]: for i in range(2): newFile = File(makeUUID(), size = 1000, events = 100 * lumi) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxInputEvents'] = 300 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 2, "ERROR: JobFactory didn't create two jobs") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") job = jobGroups[0].jobs[1] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") return def test03(self): """ _test03_ Test max input files threshold for single lumi small lumi, followed by large lumi expect 1 job for small lumi and 1 job for large """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2]: for i in range(lumi * 2): newFile = File(makeUUID(), size = 1000, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxInputFiles'] = 3 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 2, "ERROR: JobFactory didn't create two jobs") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") job = jobGroups[0].jobs[1] self.assertEqual(len(job.getFiles()), 4, "ERROR: Job does not process 4 files") return def test04(self): """ _test04_ Test max size threshold for multi lumi 3 same size lumis """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2, 3]: for i in range(2): newFile = File(makeUUID(), size = 1000, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) mySplitArgs['minInputSize'] = 3000 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxInputSize'] = 5000 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 4, "ERROR: Job does not process 4 files") self.fileset2.markOpen(False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") return def test05(self): """ _test05_ Test max event threshold for multi lumi 3 same size lumis """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2, 3]: for i in range(2): newFile = File(makeUUID(), size = 1000, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) mySplitArgs['minInputSize'] = 3000 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxInputEvents'] = 500 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 4, "ERROR: Job does not process 4 files") self.fileset2.markOpen(False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") return def test06(self): """ _test06_ Test max input files threshold for multi lumi 3 same size lumis """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2, 3]: for i in range(2): newFile = File(makeUUID(), size = 1000, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) mySplitArgs['minInputSize'] = 3000 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxInputFiles'] = 5 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 4, "ERROR: Job does not process 4 files") self.fileset2.markOpen(False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") return def test07(self): """ _test07_ Test over merge one small lumi, one large lumi (small below min size, large below max size, but both together above max size) """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2]: for i in range(2): newFile = File(makeUUID(), size = 1000 * lumi * lumi, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) mySplitArgs['minInputSize'] = 3000 mySplitArgs['maxInputSize'] = 9000 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 4, "ERROR: Job does not process 4 files") return def test08(self): """ _test08_ Test under merge (over merge size threshold) one small lumi, one large lumi (small below min size, large below max size, but both together above max size) """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2]: for i in range(2): newFile = File(makeUUID(), size = 1000 * lumi * lumi, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) mySplitArgs['minInputSize'] = 3000 mySplitArgs['maxInputSize'] = 9000 mySplitArgs['maxOverSize'] = 9500 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") self.fileset2.markOpen(False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") return def test09(self): """ _test09_ Test under merge (over merge event threshold) one small lumi, one large lumi (small below min size, large below max size, but both together above max size) It was changed due to maxinputevents not being used anymore. """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2]: for i in range(2): newFile = File(makeUUID(), size = 1000 * lumi * lumi, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) mySplitArgs['minInputSize'] = 1500 mySplitArgs['maxInputSize'] = 9000 mySplitArgs['maxOverSize'] = 9500 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") self.fileset2.markOpen(False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") return def test10(self): """ _test10_ Test merging of multiple lumis with holes in the lumi sequence Hole is due to no streamer files for the lumi Multi lumi input It only works with a single hole, as it creates a merged file even with it being of a smaller size than the mininputsize. It was changed due to the maxinputevents not being used anymore """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2, 4]: for i in range(2): newFile = File(makeUUID(), size = 1000, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) mySplitArgs['minInputSize'] = 100000 mySplitArgs['maxInputSize'] = 200000 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") self.insertClosedLumiDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 3, 'STREAM' : "A", 'FILECOUNT' : 0, 'INSERT_TIME' : self.currentTime, 'CLOSE_TIME' : self.currentTime }, transaction = False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 4, "ERROR: Job does not process 4 files") return
class RepackTest(unittest.TestCase): """ _RepackTest_ Test for Repack job splitter """ def setUp(self): """ _setUp_ """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules = ["T0.WMBS"]) self.splitterFactory = SplitterFactory(package = "T0.JobSplitting") myThread = threading.currentThread() daoFactory = DAOFactory(package = "T0.WMBS", logger = logging, dbinterface = myThread.dbi) myThread.dbi.processData("""INSERT INTO wmbs_location (id, site_name, state) VALUES (1, 'SomeSite', 1) """, transaction = False) myThread.dbi.processData("""INSERT INTO wmbs_location_senames (location, se_name) VALUES (1, 'SomeSE') """, transaction = False) myThread.dbi.processData("""INSERT INTO wmbs_location_senames (location, se_name) VALUES (1, 'SomeSE2') """, transaction = False) insertRunDAO = daoFactory(classname = "RunConfig.InsertRun") insertRunDAO.execute(binds = { 'RUN' : 1, 'TIME' : int(time.time()), 'HLTKEY' : "someHLTKey" }, transaction = False) insertLumiDAO = daoFactory(classname = "RunConfig.InsertLumiSection") for lumi in [1, 2, 3, 4]: insertLumiDAO.execute(binds = { 'RUN' : 1, 'LUMI' : lumi }, transaction = False) insertStreamDAO = daoFactory(classname = "RunConfig.InsertStream") insertStreamDAO.execute(binds = { 'STREAM' : "A" }, transaction = False) insertStreamFilesetDAO = daoFactory(classname = "RunConfig.InsertStreamFileset") insertStreamFilesetDAO.execute(1, "A", "TestFileset1") self.fileset1 = Fileset(name = "TestFileset1") self.fileset1.load() workflow1 = Workflow(spec = "spec.xml", owner = "hufnagel", name = "TestWorkflow1", task="Test") workflow1.create() self.subscription1 = Subscription(fileset = self.fileset1, workflow = workflow1, split_algo = "Repack", type = "Repack") self.subscription1.create() # keep for later self.insertClosedLumiDAO = daoFactory(classname = "RunLumiCloseout.InsertClosedLumi") self.currentTime = int(time.time()) # default split parameters self.splitArgs = {} self.splitArgs['maxSizeSingleLumi'] = 20*1024*1024*1024 self.splitArgs['maxSizeMultiLumi'] = 10*1024*1024*1024 self.splitArgs['maxInputEvents'] = 500000 self.splitArgs['maxInputFiles'] = 1000 return def tearDown(self): """ _tearDown_ """ self.testInit.clearDatabase() return def getNumActiveSplitLumis(self): """ _getNumActiveSplitLumis_ helper function that counts the number of active split lumis """ myThread = threading.currentThread() results = myThread.dbi.processData("""SELECT COUNT(*) FROM lumi_section_split_active """, transaction = False)[0].fetchall() return results[0][0] def test00(self): """ _test00_ Test that the job name prefix feature works Test multi lumi size threshold Multi lumi input """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2, 3, 4]: filecount = 2 for i in range(filecount): newFile = File(makeUUID(), size = 1000, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomeSE", immediateSave = False) newFile.create() self.fileset1.addFile(newFile) self.fileset1.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription1) mySplitArgs['maxSizeMultiLumi'] = self.splitArgs['maxSizeMultiLumi'] jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxSizeMultiLumi'] = 5000 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create a single job") job = jobGroups[0].jobs[0] self.assertTrue(job['name'].startswith("Repack-"), "ERROR: Job has wrong name") self.assertEqual(len(job.getFiles()), 4, "ERROR: Job does not process 4 files") self.fileset1.markOpen(False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create a single job") job = jobGroups[0].jobs[0] self.assertTrue(job['name'].startswith("Repack-"), "ERROR: Job has wrong name") self.assertEqual(len(job.getFiles()), 4, "ERROR: Job does not process 4 files") self.assertEqual(self.getNumActiveSplitLumis(), 0, "ERROR: Split lumis were created") return def test01(self): """ _test01_ Test multi lumi event threshold Multi lumi input """ mySplitArgs = self.splitArgs.copy() insertClosedLumiBinds = [] for lumi in [1, 2, 3, 4]: filecount = 2 for i in range(filecount): newFile = File(makeUUID(), size = 1000, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomeSE", immediateSave = False) newFile.create() self.fileset1.addFile(newFile) insertClosedLumiBinds.append( { 'RUN' : 1, 'LUMI' : lumi, 'STREAM' : "A", 'FILECOUNT' : filecount, 'INSERT_TIME' : self.currentTime, 'CLOSE_TIME' : self.currentTime } ) self.fileset1.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription1) self.insertClosedLumiDAO.execute(binds = insertClosedLumiBinds, transaction = False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxInputEvents'] = 500 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create a single job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 4, "ERROR: Job does not process 4 files") self.fileset1.markOpen(False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create a single job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 4, "ERROR: Job does not process 4 files") self.assertEqual(self.getNumActiveSplitLumis(), 0, "ERROR: Split lumis were created") return def test02(self): """ _test02_ Test single lumi size threshold Single lumi input """ mySplitArgs = self.splitArgs.copy() insertClosedLumiBinds = [] for lumi in [1]: filecount = 8 for i in range(filecount): newFile = File(makeUUID(), size = 1000, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomeSE", immediateSave = False) newFile.create() self.fileset1.addFile(newFile) insertClosedLumiBinds.append( { 'RUN' : 1, 'LUMI' : lumi, 'STREAM' : "A", 'FILECOUNT' : filecount, 'INSERT_TIME' : self.currentTime, 'CLOSE_TIME' : self.currentTime } ) self.fileset1.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription1) self.insertClosedLumiDAO.execute(binds = insertClosedLumiBinds, transaction = False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxSizeSingleLumi'] = 6500 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 2, "ERROR: JobFactory didn't create two jobs") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 6, "ERROR: Job does not process 6 files") job = jobGroups[0].jobs[1] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") self.assertEqual(self.getNumActiveSplitLumis(), 1, "ERROR: Split lumis were not created") return def test03(self): """ _test03_ Test single lumi event threshold Single lumi input """ mySplitArgs = self.splitArgs.copy() insertClosedLumiBinds = [] for lumi in [1]: filecount = 8 for i in range(filecount): newFile = File(makeUUID(), size = 1000, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomeSE", immediateSave = False) newFile.create() self.fileset1.addFile(newFile) insertClosedLumiBinds.append( { 'RUN' : 1, 'LUMI' : lumi, 'STREAM' : "A", 'FILECOUNT' : filecount, 'INSERT_TIME' : self.currentTime, 'CLOSE_TIME' : self.currentTime } ) self.fileset1.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription1) self.insertClosedLumiDAO.execute(binds = insertClosedLumiBinds, transaction = False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxInputEvents'] = 650 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 2, "ERROR: JobFactory didn't create two jobs") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 6, "ERROR: Job does not process 6 files") job = jobGroups[0].jobs[1] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") self.assertEqual(self.getNumActiveSplitLumis(), 1, "ERROR: Split lumis were not created") return def test04(self): """ _test04_ Test streamer count threshold (only multi lumi) Multi lumi input """ mySplitArgs = self.splitArgs.copy() insertClosedLumiBinds = [] for lumi in [1, 2, 3, 4]: filecount = 2 for i in range(filecount): newFile = File(makeUUID(), size = 1000, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomeSE", immediateSave = False) newFile.create() self.fileset1.addFile(newFile) insertClosedLumiBinds.append( { 'RUN' : 1, 'LUMI' : lumi, 'STREAM' : "A", 'FILECOUNT' : filecount, 'INSERT_TIME' : self.currentTime, 'CLOSE_TIME' : self.currentTime } ) self.fileset1.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription1) self.insertClosedLumiDAO.execute(binds = insertClosedLumiBinds, transaction = False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxInputFiles'] = 5 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create a single job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 4, "ERROR: Job does not process 4 files") self.fileset1.markOpen(False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create a single job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 4, "ERROR: Job does not process 4 files") self.assertEqual(self.getNumActiveSplitLumis(), 0, "ERROR: Split lumis were created") return def test05(self): """ _test05_ Test repacking of multiple lumis with holes in the lumi sequence Multi lumi input """ mySplitArgs = self.splitArgs.copy() insertClosedLumiBinds = [] for lumi in [1, 2, 4]: filecount = 2 for i in range(filecount): newFile = File(makeUUID(), size = 1000, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomeSE", immediateSave = False) newFile.create() self.fileset1.addFile(newFile) insertClosedLumiBinds.append( { 'RUN' : 1, 'LUMI' : lumi, 'STREAM' : "A", 'FILECOUNT' : filecount, 'INSERT_TIME' : self.currentTime, 'CLOSE_TIME' : self.currentTime } ) self.fileset1.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription1) self.insertClosedLumiDAO.execute(binds = insertClosedLumiBinds, transaction = False) mySplitArgs['maxInputFiles'] = 5 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") self.insertClosedLumiDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 3, 'STREAM' : "A", 'FILECOUNT' : 0, 'INSERT_TIME' : self.currentTime, 'CLOSE_TIME' : self.currentTime }, transaction = False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") self.assertEqual(len(jobGroups[0].jobs[0].getFiles()), 4, "ERROR: first job does not process 4 files") return def test06(self): """ _test06_ Test repacking of 3 lumis 2 small lumis (single job), followed by a big one (multiple jobs) files for lumi 1 and 2 are below multi-lumi thresholds files for lumi 3 are above single-lumi threshold """ mySplitArgs = self.splitArgs.copy() insertClosedLumiBinds = [] for lumi in [1, 2, 3]: filecount = 2 for i in range(filecount): if lumi == 3: nevents = 500 else: nevents = 100 newFile = File(makeUUID(), size = 1000, events = nevents) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomeSE", immediateSave = False) newFile.create() self.fileset1.addFile(newFile) insertClosedLumiBinds.append( { 'RUN' : 1, 'LUMI' : lumi, 'STREAM' : "A", 'FILECOUNT' : filecount, 'INSERT_TIME' : self.currentTime, 'CLOSE_TIME' : self.currentTime } ) self.fileset1.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription1) self.insertClosedLumiDAO.execute(binds = insertClosedLumiBinds, transaction = False) mySplitArgs['maxInputEvents'] = 900 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 3, "ERROR: JobFactory didn't create three jobs") self.assertEqual(len(jobGroups[0].jobs[0].getFiles()), 4, "ERROR: first job does not process 4 files") self.assertEqual(len(jobGroups[0].jobs[1].getFiles()), 1, "ERROR: second job does not process 1 file") self.assertEqual(len(jobGroups[0].jobs[2].getFiles()), 1, "ERROR: third job does not process 1 file") return
def configureRunStream(tier0Config, run, stream, specDirectory, dqmUploadProxy): """ _configureRunStream_ Called by Tier0Feeder for new run/streams. Retrieve global run settings and build the part of the configuration relevant to run/stream and write it to the database. Create workflows, filesets and subscriptions for the processing of runs/streams. """ logging.debug("configureRunStream() : %d , %s" % (run, stream)) myThread = threading.currentThread() daoFactory = DAOFactory(package="T0.WMBS", logger=logging, dbinterface=myThread.dbi) # retrieve some basic run information getRunInfoDAO = daoFactory(classname="RunConfig.GetRunInfo") runInfo = getRunInfoDAO.execute(run, transaction=False)[0] # treat centralDAQ or miniDAQ runs (have an HLT key) different from local runs if runInfo['hltkey'] != None: # streams not explicitely configured are repacked if stream not in tier0Config.Streams.dictionary_().keys(): addRepackConfig(tier0Config, stream) streamConfig = tier0Config.Streams.dictionary_()[stream] # consistency check to make sure stream exists and has datasets defined # only run if we don't ignore the stream if streamConfig.ProcessingStyle != "Ignore": getStreamDatasetsDAO = daoFactory( classname="RunConfig.GetStreamDatasets") datasets = getStreamDatasetsDAO.execute(run, stream, transaction=False) if len(datasets) == 0: raise RuntimeError( "Stream is not defined in HLT menu or has no datasets !") # write stream/dataset mapping (for special express and error datasets) insertDatasetDAO = daoFactory( classname="RunConfig.InsertPrimaryDataset") insertStreamDatasetDAO = daoFactory( classname="RunConfig.InsertStreamDataset") # write stream configuration insertCMSSWVersionDAO = daoFactory( classname="RunConfig.InsertCMSSWVersion") insertStreamStyleDAO = daoFactory( classname="RunConfig.InsertStreamStyle") insertRepackConfigDAO = daoFactory( classname="RunConfig.InsertRepackConfig") insertPromptCalibrationDAO = daoFactory( classname="RunConfig.InsertPromptCalibration") insertExpressConfigDAO = daoFactory( classname="RunConfig.InsertExpressConfig") insertSpecialDatasetDAO = daoFactory( classname="RunConfig.InsertSpecialDataset") insertDatasetScenarioDAO = daoFactory( classname="RunConfig.InsertDatasetScenario") insertStreamFilesetDAO = daoFactory( classname="RunConfig.InsertStreamFileset") insertRecoReleaseConfigDAO = daoFactory( classname="RunConfig.InsertRecoReleaseConfig") insertWorkflowMonitoringDAO = daoFactory( classname="RunConfig.InsertWorkflowMonitoring") insertStorageNodeDAO = daoFactory( classname="RunConfig.InsertStorageNode") insertPhEDExConfigDAO = daoFactory( classname="RunConfig.InsertPhEDExConfig") bindsCMSSWVersion = [] bindsDataset = [] bindsStreamDataset = [] bindsStreamStyle = { 'RUN': run, 'STREAM': stream, 'STYLE': streamConfig.ProcessingStyle } bindsRepackConfig = {} bindsPromptCalibration = {} bindsExpressConfig = {} bindsSpecialDataset = {} bindsDatasetScenario = [] bindsStorageNode = [] bindsPhEDExConfig = [] # mark workflows as injected wmbsDaoFactory = DAOFactory(package="WMCore.WMBS", logger=logging, dbinterface=myThread.dbi) markWorkflowsInjectedDAO = wmbsDaoFactory( classname="Workflow.MarkInjectedWorkflows") # # for spec creation, details for all outputs # outputModuleDetails = [] # # special dataset for some express output # specialDataset = None # # for PromptReco delay settings # promptRecoDelay = {} promptRecoDelayOffset = {} # # for PhEDEx subscription settings # subscriptions = [] # some hardcoded PhEDEx defaults expressPhEDExInjectNode = "T2_CH_CERN" expressPhEDExSubscribeNode = "T2_CH_CERN" # # first take care of all stream settings # getStreamOnlineVersionDAO = daoFactory( classname="RunConfig.GetStreamOnlineVersion") onlineVersion = getStreamOnlineVersionDAO.execute(run, stream, transaction=False) if streamConfig.ProcessingStyle == "Bulk": streamConfig.Repack.CMSSWVersion = streamConfig.VersionOverride.get( onlineVersion, onlineVersion) bindsCMSSWVersion.append( {'VERSION': streamConfig.Repack.CMSSWVersion}) streamConfig.Repack.ScramArch = tier0Config.Global.ScramArches.get( streamConfig.Repack.CMSSWVersion, tier0Config.Global.DefaultScramArch) bindsRepackConfig = { 'RUN': run, 'STREAM': stream, 'PROC_VER': streamConfig.Repack.ProcessingVersion, 'MAX_SIZE_SINGLE_LUMI': streamConfig.Repack.MaxSizeSingleLumi, 'MAX_SIZE_MULTI_LUMI': streamConfig.Repack.MaxSizeMultiLumi, 'MIN_SIZE': streamConfig.Repack.MinInputSize, 'MAX_SIZE': streamConfig.Repack.MaxInputSize, 'MAX_EDM_SIZE': streamConfig.Repack.MaxEdmSize, 'MAX_OVER_SIZE': streamConfig.Repack.MaxOverSize, 'MAX_EVENTS': streamConfig.Repack.MaxInputEvents, 'MAX_FILES': streamConfig.Repack.MaxInputFiles, 'BLOCK_DELAY': streamConfig.Repack.BlockCloseDelay, 'CMSSW': streamConfig.Repack.CMSSWVersion, 'SCRAM_ARCH': streamConfig.Repack.ScramArch } elif streamConfig.ProcessingStyle == "Express": specialDataset = "Stream%s" % stream bindsDataset.append({'PRIMDS': specialDataset}) bindsStreamDataset.append({ 'RUN': run, 'PRIMDS': specialDataset, 'STREAM': stream }) bindsSpecialDataset = {'STREAM': stream, 'PRIMDS': specialDataset} bindsDatasetScenario.append({ 'RUN': run, 'PRIMDS': specialDataset, 'SCENARIO': streamConfig.Express.Scenario }) if streamConfig.Express.WriteDQM: outputModuleDetails.append({ 'dataTier': tier0Config.Global.DQMDataTier, 'eventContent': tier0Config.Global.DQMDataTier, 'primaryDataset': specialDataset }) bindsStorageNode.append({'NODE': expressPhEDExSubscribeNode}) bindsPhEDExConfig.append({ 'RUN': run, 'PRIMDS': specialDataset, 'ARCHIVAL_NODE': None, 'TAPE_NODE': None, 'DISK_NODE': expressPhEDExSubscribeNode }) subscriptions.append({ 'custodialSites': [], 'nonCustodialSites': [expressPhEDExSubscribeNode], 'autoApproveSites': [expressPhEDExSubscribeNode], 'priority': "high", 'primaryDataset': specialDataset }) alcaSkim = None if len(streamConfig.Express.AlcaSkims) > 0: outputModuleDetails.append({ 'dataTier': "ALCARECO", 'eventContent': "ALCARECO", 'primaryDataset': specialDataset }) alcaSkim = ",".join(streamConfig.Express.AlcaSkims) numPromptCalibProd = 0 for producer in streamConfig.Express.AlcaSkims: if producer.startswith("PromptCalibProd"): numPromptCalibProd += 1 if numPromptCalibProd > 0: bindsPromptCalibration = { 'RUN': run, 'STREAM': stream, 'NUM_PRODUCER': numPromptCalibProd } dqmSeq = None if len(streamConfig.Express.DqmSequences) > 0: dqmSeq = ",".join(streamConfig.Express.DqmSequences) streamConfig.Express.CMSSWVersion = streamConfig.VersionOverride.get( onlineVersion, onlineVersion) bindsCMSSWVersion.append( {'VERSION': streamConfig.Express.CMSSWVersion}) streamConfig.Express.ScramArch = tier0Config.Global.ScramArches.get( streamConfig.Express.CMSSWVersion, tier0Config.Global.DefaultScramArch) streamConfig.Express.RecoScramArch = None if streamConfig.Express.RecoCMSSWVersion != None: bindsCMSSWVersion.append( {'VERSION': streamConfig.Express.RecoCMSSWVersion}) streamConfig.Express.RecoScramArch = tier0Config.Global.ScramArches.get( streamConfig.Express.RecoCMSSWVersion, tier0Config.Global.DefaultScramArch) bindsExpressConfig = { 'RUN': run, 'STREAM': stream, 'PROC_VER': streamConfig.Express.ProcessingVersion, 'WRITE_TIERS': ",".join(streamConfig.Express.DataTiers), 'WRITE_DQM': streamConfig.Express.WriteDQM, 'GLOBAL_TAG': streamConfig.Express.GlobalTag, 'MAX_RATE': streamConfig.Express.MaxInputRate, 'MAX_EVENTS': streamConfig.Express.MaxInputEvents, 'MAX_SIZE': streamConfig.Express.MaxInputSize, 'MAX_FILES': streamConfig.Express.MaxInputFiles, 'MAX_LATENCY': streamConfig.Express.MaxLatency, 'DQM_INTERVAL': streamConfig.Express.PeriodicHarvestInterval, 'BLOCK_DELAY': streamConfig.Express.BlockCloseDelay, 'CMSSW': streamConfig.Express.CMSSWVersion, 'SCRAM_ARCH': streamConfig.Express.ScramArch, 'RECO_CMSSW': streamConfig.Express.RecoCMSSWVersion, 'RECO_SCRAM_ARCH': streamConfig.Express.RecoScramArch, 'MULTICORE': streamConfig.Express.Multicore, 'ALCA_SKIM': alcaSkim, 'DQM_SEQ': dqmSeq } # # then configure datasets # getStreamDatasetTriggersDAO = daoFactory( classname="RunConfig.GetStreamDatasetTriggers") datasetTriggers = getStreamDatasetTriggersDAO.execute( run, stream, transaction=False) for dataset, paths in datasetTriggers.items(): if dataset == "Unassigned path": if stream == "Express" and run in [ 210114, 210116, 210120, 210121, 210178 ]: continue if stream == "A" and run in [216120, 216125, 216130]: continue datasetConfig = retrieveDatasetConfig(tier0Config, dataset) selectEvents = [] for path in sorted(paths): selectEvents.append("%s:%s" % (path, runInfo['process'])) if streamConfig.ProcessingStyle == "Bulk": promptRecoDelay[datasetConfig.Name] = datasetConfig.RecoDelay promptRecoDelayOffset[ datasetConfig.Name] = datasetConfig.RecoDelayOffset outputModuleDetails.append({ 'dataTier': "RAW", 'eventContent': "ALL", 'selectEvents': selectEvents, 'primaryDataset': dataset }) bindsPhEDExConfig.append({ 'RUN': run, 'PRIMDS': dataset, 'ARCHIVAL_NODE': datasetConfig.ArchivalNode, 'TAPE_NODE': datasetConfig.TapeNode, 'DISK_NODE': datasetConfig.DiskNode }) custodialSites = [] nonCustodialSites = [] autoApproveSites = [] if datasetConfig.ArchivalNode != None: bindsStorageNode.append( {'NODE': datasetConfig.ArchivalNode}) custodialSites.append(datasetConfig.ArchivalNode) autoApproveSites.append(datasetConfig.ArchivalNode) if datasetConfig.TapeNode != None: bindsStorageNode.append({'NODE': datasetConfig.TapeNode}) custodialSites.append(datasetConfig.TapeNode) if datasetConfig.DiskNode != None: bindsStorageNode.append({'NODE': datasetConfig.DiskNode}) nonCustodialSites.append(datasetConfig.DiskNode) autoApproveSites.append(datasetConfig.DiskNode) if len(custodialSites) > 0 or len(nonCustodialSites) > 0: subscriptions.append({ 'custodialSites': custodialSites, 'custodialSubType': "Replica", 'nonCustodialSites': nonCustodialSites, 'autoApproveSites': autoApproveSites, 'priority': "high", 'primaryDataset': dataset, 'dataTier': "RAW" }) # # set subscriptions for error dataset # custodialSites = [] nonCustodialSites = [] autoApproveSites = [] if datasetConfig.ArchivalNode != None: custodialSites.append(datasetConfig.ArchivalNode) autoApproveSites.append(datasetConfig.ArchivalNode) if datasetConfig.ArchivalNode != expressPhEDExInjectNode: nonCustodialSites.append(expressPhEDExInjectNode) autoApproveSites.append(expressPhEDExInjectNode) if len(custodialSites) > 0 or len(nonCustodialSites) > 0: subscriptions.append({ 'custodialSites': custodialSites, 'custodialSubType': "Replica", 'nonCustodialSites': nonCustodialSites, 'autoApproveSites': autoApproveSites, 'priority': "high", 'primaryDataset': "%s-Error" % dataset, 'dataTier': "RAW" }) elif streamConfig.ProcessingStyle == "Express": for dataTier in streamConfig.Express.DataTiers: if dataTier not in ["ALCARECO", "DQM", "DQMIO"]: outputModuleDetails.append({ 'dataTier': dataTier, 'eventContent': dataTier, 'selectEvents': selectEvents, 'primaryDataset': dataset }) bindsPhEDExConfig.append({ 'RUN': run, 'PRIMDS': dataset, 'ARCHIVAL_NODE': None, 'TAPE_NODE': None, 'DISK_NODE': expressPhEDExSubscribeNode }) subscriptions.append({ 'custodialSites': [], 'nonCustodialSites': [expressPhEDExSubscribeNode], 'autoApproveSites': [expressPhEDExSubscribeNode], 'priority': "high", 'primaryDataset': dataset }) # # finally create WMSpec # outputs = {} if streamConfig.ProcessingStyle == "Bulk": taskName = "Repack" workflowName = "Repack_Run%d_Stream%s" % (run, stream) specArguments = {} specArguments['TimePerEvent'] = 1 specArguments['SizePerEvent'] = 200 specArguments['Memory'] = 1800 specArguments['RequestPriority'] = 0 specArguments['CMSSWVersion'] = streamConfig.Repack.CMSSWVersion specArguments['ScramArch'] = streamConfig.Repack.ScramArch specArguments[ 'ProcessingVersion'] = streamConfig.Repack.ProcessingVersion specArguments[ 'MaxSizeSingleLumi'] = streamConfig.Repack.MaxSizeSingleLumi specArguments[ 'MaxSizeMultiLumi'] = streamConfig.Repack.MaxSizeMultiLumi specArguments['MinInputSize'] = streamConfig.Repack.MinInputSize specArguments['MaxInputSize'] = streamConfig.Repack.MaxInputSize specArguments['MaxEdmSize'] = streamConfig.Repack.MaxEdmSize specArguments['MaxOverSize'] = streamConfig.Repack.MaxOverSize specArguments[ 'MaxInputEvents'] = streamConfig.Repack.MaxInputEvents specArguments['MaxInputFiles'] = streamConfig.Repack.MaxInputFiles specArguments['UnmergedLFNBase'] = "/store/unmerged/%s" % runInfo[ 'bulk_data_type'] if runInfo['backfill']: specArguments['MergedLFNBase'] = "/store/backfill/%s/%s" % ( runInfo['backfill'], runInfo['bulk_data_type']) else: specArguments[ 'MergedLFNBase'] = "/store/%s" % runInfo['bulk_data_type'] specArguments[ 'BlockCloseDelay'] = streamConfig.Repack.BlockCloseDelay elif streamConfig.ProcessingStyle == "Express": taskName = "Express" workflowName = "Express_Run%d_Stream%s" % (run, stream) specArguments = {} specArguments['TimePerEvent'] = 12 specArguments['SizePerEvent'] = 512 specArguments['Memory'] = 1800 if streamConfig.Express.Multicore: specArguments['Multicore'] = streamConfig.Express.Multicore specArguments['Memory'] = 1800 * streamConfig.Express.Multicore specArguments['RequestPriority'] = 0 specArguments['ProcessingString'] = "Express" specArguments[ 'ProcessingVersion'] = streamConfig.Express.ProcessingVersion specArguments['Scenario'] = streamConfig.Express.Scenario specArguments['CMSSWVersion'] = streamConfig.Express.CMSSWVersion specArguments['ScramArch'] = streamConfig.Express.ScramArch specArguments[ 'RecoCMSSWVersion'] = streamConfig.Express.RecoCMSSWVersion specArguments['RecoScramArch'] = streamConfig.Express.RecoScramArch specArguments['GlobalTag'] = streamConfig.Express.GlobalTag specArguments['GlobalTagTransaction'] = "Express_%d" % run specArguments[ 'GlobalTagConnect'] = streamConfig.Express.GlobalTagConnect specArguments['MaxInputRate'] = streamConfig.Express.MaxInputRate specArguments[ 'MaxInputEvents'] = streamConfig.Express.MaxInputEvents specArguments['MaxInputSize'] = streamConfig.Express.MaxInputSize specArguments['MaxInputFiles'] = streamConfig.Express.MaxInputFiles specArguments['MaxLatency'] = streamConfig.Express.MaxLatency specArguments['AlcaSkims'] = streamConfig.Express.AlcaSkims specArguments['DqmSequences'] = streamConfig.Express.DqmSequences specArguments['AlcaHarvestTimeout'] = runInfo['ah_timeout'] specArguments['AlcaHarvestDir'] = runInfo['ah_dir'] specArguments['DQMUploadProxy'] = dqmUploadProxy specArguments['DQMUploadUrl'] = runInfo['dqmuploadurl'] specArguments['StreamName'] = stream specArguments['SpecialDataset'] = specialDataset specArguments['UnmergedLFNBase'] = "/store/unmerged/express" specArguments['MergedLFNBase'] = "/store/express" if runInfo['backfill']: specArguments[ 'MergedLFNBase'] = "/store/backfill/%s/express" % runInfo[ 'backfill'] else: specArguments['MergedLFNBase'] = "/store/express" specArguments[ 'PeriodicHarvestInterval'] = streamConfig.Express.PeriodicHarvestInterval specArguments[ 'BlockCloseDelay'] = streamConfig.Express.BlockCloseDelay if streamConfig.ProcessingStyle in ['Bulk', 'Express']: specArguments['RunNumber'] = run specArguments['AcquisitionEra'] = tier0Config.Global.AcquisitionEra specArguments['Outputs'] = outputModuleDetails specArguments[ 'OverrideCatalog'] = "trivialcatalog_file:/cvmfs/cms.cern.ch/SITECONF/T2_CH_CERN/Tier0/override_catalog.xml?protocol=override" specArguments['ValidStatus'] = "VALID" specArguments['SiteWhitelist'] = ["T2_CH_CERN_T0"] specArguments['SiteBlacklist'] = [] if streamConfig.ProcessingStyle == "Bulk": factory = RepackWorkloadFactory() wmSpec = factory.factoryWorkloadConstruction( workflowName, specArguments) wmSpec.setPhEDExInjectionOverride(runInfo['bulk_data_loc']) for subscription in subscriptions: wmSpec.setSubscriptionInformation(**subscription) elif streamConfig.ProcessingStyle == "Express": factory = ExpressWorkloadFactory() wmSpec = factory.factoryWorkloadConstruction( workflowName, specArguments) wmSpec.setPhEDExInjectionOverride(expressPhEDExInjectNode) for subscription in subscriptions: wmSpec.setSubscriptionInformation(**subscription) if streamConfig.ProcessingStyle in ['Bulk', 'Express']: wmSpec.setOwnerDetails( "*****@*****.**", "T0", { 'vogroup': 'DEFAULT', 'vorole': 'DEFAULT', 'dn': "*****@*****.**" }) wmSpec.setupPerformanceMonitoring(maxRSS=10485760, maxVSize=10485760, softTimeout=604800, gracePeriod=3600) wmbsHelper = WMBSHelper(wmSpec, taskName, cachepath=specDirectory) filesetName = "Run%d_Stream%s" % (run, stream) fileset = Fileset(filesetName) # # create workflow (currently either repack or express) # try: myThread.transaction.begin() if len(bindsCMSSWVersion) > 0: insertCMSSWVersionDAO.execute(bindsCMSSWVersion, conn=myThread.transaction.conn, transaction=True) if len(bindsDataset) > 0: insertDatasetDAO.execute(bindsDataset, conn=myThread.transaction.conn, transaction=True) if len(bindsStreamDataset) > 0: insertStreamDatasetDAO.execute(bindsStreamDataset, conn=myThread.transaction.conn, transaction=True) if len(bindsRepackConfig) > 0: insertRepackConfigDAO.execute(bindsRepackConfig, conn=myThread.transaction.conn, transaction=True) if len(bindsPromptCalibration) > 0: insertPromptCalibrationDAO.execute( bindsPromptCalibration, conn=myThread.transaction.conn, transaction=True) if len(bindsExpressConfig) > 0: insertExpressConfigDAO.execute(bindsExpressConfig, conn=myThread.transaction.conn, transaction=True) if len(bindsSpecialDataset) > 0: insertSpecialDatasetDAO.execute(bindsSpecialDataset, conn=myThread.transaction.conn, transaction=True) if len(bindsDatasetScenario) > 0: insertDatasetScenarioDAO.execute( bindsDatasetScenario, conn=myThread.transaction.conn, transaction=True) if len(bindsStorageNode) > 0: insertStorageNodeDAO.execute(bindsStorageNode, conn=myThread.transaction.conn, transaction=True) if len(bindsPhEDExConfig) > 0: insertPhEDExConfigDAO.execute(bindsPhEDExConfig, conn=myThread.transaction.conn, transaction=True) insertStreamStyleDAO.execute(bindsStreamStyle, conn=myThread.transaction.conn, transaction=True) if streamConfig.ProcessingStyle in ['Bulk', 'Express']: insertStreamFilesetDAO.execute(run, stream, filesetName, conn=myThread.transaction.conn, transaction=True) fileset.load() wmbsHelper.createSubscription(wmSpec.getTask(taskName), fileset, alternativeFilesetClose=True) insertWorkflowMonitoringDAO.execute( [fileset.id], conn=myThread.transaction.conn, transaction=True) if streamConfig.ProcessingStyle == "Bulk": bindsRecoReleaseConfig = [] for fileset, primds in wmbsHelper.getMergeOutputMapping( ).items(): bindsRecoReleaseConfig.append({ 'RUN': run, 'PRIMDS': primds, 'FILESET': fileset, 'RECODELAY': promptRecoDelay[primds], 'RECODELAYOFFSET': promptRecoDelayOffset[primds] }) insertRecoReleaseConfigDAO.execute( bindsRecoReleaseConfig, conn=myThread.transaction.conn, transaction=True) elif streamConfig.ProcessingStyle == "Express": markWorkflowsInjectedDAO.execute( [workflowName], injected=True, conn=myThread.transaction.conn, transaction=True) except Exception as ex: logging.exception(ex) myThread.transaction.rollback() raise RuntimeError( "Problem in configureRunStream() database transaction !") else: myThread.transaction.commit() else: # should we do anything for local runs ? pass return
class JobGroup(WMBSBase, WMJobGroup): """ A group (set) of Jobs """ def __init__(self, subscription = None, jobs = None, id = -1, uid = None, location = None): WMBSBase.__init__(self) WMJobGroup.__init__(self, subscription=subscription, jobs = jobs) self.id = id self.lastUpdate = None self.uid = uid if location != None: self.setSite(location) return def create(self): """ Add the new jobgroup to WMBS, create the output Fileset object """ myThread = threading.currentThread() existingTransaction = self.beginTransaction() #overwrite base class self.output for WMBS fileset self.output = Fileset(name = makeUUID()) self.output.create() if self.uid == None: self.uid = makeUUID() action = self.daofactory(classname = "JobGroup.New") action.execute(self.uid, self.subscription["id"], self.output.id, conn = self.getDBConn(), transaction = self.existingTransaction()) self.id = self.exists() self.commitTransaction(existingTransaction) return def delete(self): """ Remove a jobgroup from WMBS """ deleteAction = self.daofactory(classname = "JobGroup.Delete") deleteAction.execute(id = self.id, conn = self.getDBConn(), transaction = self.existingTransaction()) return def exists(self): """ Does a jobgroup exist with id if id is not provided, use the uid, return the id """ if self.id != -1: action = self.daofactory(classname = "JobGroup.ExistsByID") result = action.execute(id = self.id, conn = self.getDBConn(), transaction = self.existingTransaction()) else: action = self.daofactory(classname = "JobGroup.Exists") result = action.execute(uid = self.uid, conn = self.getDBConn(), transaction = self.existingTransaction()) return result def load(self): """ _load_ Load all meta data associated with the JobGroup. This includes the JobGroup id, uid, last_update time, subscription id and output fileset id. Either the JobGroup id or uid must be specified for this to work. """ existingTransaction = self.beginTransaction() if self.id > 0: loadAction = self.daofactory(classname = "JobGroup.LoadFromID") result = loadAction.execute(self.id, conn = self.getDBConn(), transaction = self.existingTransaction()) else: loadAction = self.daofactory(classname = "JobGroup.LoadFromUID") result = loadAction.execute(self.uid, conn = self.getDBConn(), transaction = self.existingTransaction()) self.id = result["id"] self.uid = result["uid"] self.lastUpdate = result["last_update"] self.subscription = Subscription(id = result["subscription"]) self.subscription.load() self.output = Fileset(id = result["output"]) self.output.load() self.jobs = [] self.commitTransaction(existingTransaction) return def loadData(self): """ _loadData_ Load all data that is associated with the jobgroup. This includes loading all the subscription information, the output fileset information and all the jobs that are associated with the group. """ existingTransaction = self.beginTransaction() if self.id < 0 or self.uid == None: self.load() self.subscription.loadData() self.output.loadData() loadAction = self.daofactory(classname = "JobGroup.LoadJobs") result = loadAction.execute(self.id, conn = self.getDBConn(), transaction = self.existingTransaction()) self.jobs = [] self.newjobs = [] for jobID in result: newJob = Job(id = jobID["id"]) newJob.loadData() self.add(newJob) WMJobGroup.commit(self) self.commitTransaction(existingTransaction) return def commit(self): """ _commit_ Write any new jobs to the database, creating them in the database if necessary. """ existingTransaction = self.beginTransaction() if self.id == -1: self.create() for j in self.newjobs: j.create(group = self) WMJobGroup.commit(self) self.commitTransaction(existingTransaction) return def setSite(self, site_name = None): """ Updates the jobGroup with a site_name from the wmbs_location table """ if not self.exists(): return action = self.daofactory(classname = "JobGroup.SetSite") result = action.execute(site_name = site_name, jobGroupID = self.id, conn = self.getDBConn(), transaction = self.existingTransaction()) return result def getSite(self): """ Updates the jobGroup with a site_name from the wmbs_location table """ if not self.exists(): return action = self.daofactory(classname = "JobGroup.GetSite") result = action.execute(jobGroupID = self.id, conn = self.getDBConn(), transaction = self.existingTransaction()) return result def listJobIDs(self): """ Returns a list of job IDs Useful for times when threading the loading of jobGroups, where running loadData can overload UUID """ existingTransaction = self.beginTransaction() if self.id < 0 or self.uid == None: self.load() loadAction = self.daofactory(classname = "JobGroup.LoadJobs") result = loadAction.execute(self.id, conn = self.getDBConn(), transaction = self.existingTransaction()) jobIDList = [] for jobID in result: jobIDList.append(jobID["id"]) self.commitTransaction(existingTransaction) return jobIDList def commitBulk(self): """ Creates jobs in a group instead of singly, as is done in jobGroup.commit() """ myThread = threading.currentThread() if self.id == -1: myThread.transaction.begin() #existingTransaction = self.beginTransaction() self.create() #self.commitTransaction(existingTransaction) myThread.transaction.commit() existingTransaction = self.beginTransaction() listOfJobs = [] for job in self.newjobs: #First do all the header stuff if job["id"] != None: continue job["jobgroup"] = self.id if job["name"] == None: job["name"] = makeUUID() listOfJobs.append(job) bulkAction = self.daofactory(classname = "Jobs.New") result = bulkAction.execute(jobList = listOfJobs) #Use the results of the bulk commit to get the jobIDs fileDict = {} for job in listOfJobs: job['id'] = result[job['name']] fileDict[job['id']] = [] for file in job['input_files']: fileDict[job['id']].append(file['id']) maskAction = self.daofactory(classname = "Masks.New") maskAction.execute(jobList = listOfJobs, conn = self.getDBConn(), \ transaction = self.existingTransaction()) fileAction = self.daofactory(classname = "Jobs.AddFiles") fileAction.execute(jobDict = fileDict, conn = self.getDBConn(), \ transaction = self.existingTransaction()) WMJobGroup.commit(self) self.commitTransaction(existingTransaction) return def getLocationsForJobs(self): """ Gets a list of the locations that jobs can run at """ if not self.exists(): return action = self.daofactory(classname = "JobGroup.GetLocationsForJobs") result = action.execute(id = self.id, conn = self.getDBConn(), transaction = self.existingTransaction()) return result def __str__(self): """ __str__ Print out some information about the jobGroup as if jobGroup inherited from dict() """ d = {'id': self.id, 'uid': self.uid, 'subscription': self.subscription, 'output': self.output, 'jobs': self.jobs, 'newjobs': self.newjobs} return str(d)
def configureRunStream(tier0Config, run, stream, specDirectory, dqmUploadProxy): """ _configureRunStream_ Called by Tier0Feeder for new run/streams. Retrieve global run settings and build the part of the configuration relevant to run/stream and write it to the database. Create workflows, filesets and subscriptions for the processing of runs/streams. """ logging.debug("configureRunStream() : %d , %s" % (run, stream)) myThread = threading.currentThread() daoFactory = DAOFactory(package = "T0.WMBS", logger = logging, dbinterface = myThread.dbi) # retrieve some basic run information getRunInfoDAO = daoFactory(classname = "RunConfig.GetRunInfo") runInfo = getRunInfoDAO.execute(run, transaction = False)[0] # # treat centralDAQ or miniDAQ runs (have an HLT key) different from local runs # if runInfo['hltkey'] != None: # streams not explicitely configured are repacked if stream not in tier0Config.Streams.dictionary_().keys(): addRepackConfig(tier0Config, stream) streamConfig = tier0Config.Streams.dictionary_()[stream] # consistency check to make sure stream exists and has datasets defined # only run if we don't ignore the stream if streamConfig.ProcessingStyle != "Ignore": getStreamDatasetsDAO = daoFactory(classname = "RunConfig.GetStreamDatasets") datasets = getStreamDatasetsDAO.execute(run, stream, transaction = False) if len(datasets) == 0: raise RuntimeError("Stream is not defined in HLT menu or has no datasets !") # write stream/dataset mapping (for special express and error datasets) insertDatasetDAO = daoFactory(classname = "RunConfig.InsertPrimaryDataset") insertStreamDatasetDAO = daoFactory(classname = "RunConfig.InsertStreamDataset") # write stream configuration insertCMSSWVersionDAO = daoFactory(classname = "RunConfig.InsertCMSSWVersion") insertStreamStyleDAO = daoFactory(classname = "RunConfig.InsertStreamStyle") insertRepackConfigDAO = daoFactory(classname = "RunConfig.InsertRepackConfig") insertPromptCalibrationDAO = daoFactory(classname = "RunConfig.InsertPromptCalibration") insertExpressConfigDAO = daoFactory(classname = "RunConfig.InsertExpressConfig") insertSpecialDatasetDAO = daoFactory(classname = "RunConfig.InsertSpecialDataset") insertDatasetScenarioDAO = daoFactory(classname = "RunConfig.InsertDatasetScenario") insertStreamFilesetDAO = daoFactory(classname = "RunConfig.InsertStreamFileset") insertRecoReleaseConfigDAO = daoFactory(classname = "RunConfig.InsertRecoReleaseConfig") insertWorkflowMonitoringDAO = daoFactory(classname = "RunConfig.InsertWorkflowMonitoring") insertStorageNodeDAO = daoFactory(classname = "RunConfig.InsertStorageNode") insertPhEDExConfigDAO = daoFactory(classname = "RunConfig.InsertPhEDExConfig") bindsCMSSWVersion = [] bindsDataset = [] bindsStreamDataset = [] bindsStreamStyle = {'RUN' : run, 'STREAM' : stream, 'STYLE': streamConfig.ProcessingStyle } bindsRepackConfig = {} bindsPromptCalibration = {} bindsExpressConfig = {} bindsSpecialDataset = {} bindsDatasetScenario = [] bindsStorageNode = [] bindsPhEDExConfig = [] # mark workflows as injected wmbsDaoFactory = DAOFactory(package = "WMCore.WMBS", logger = logging, dbinterface = myThread.dbi) markWorkflowsInjectedDAO = wmbsDaoFactory(classname = "Workflow.MarkInjectedWorkflows") # # for spec creation, details for all outputs # outputModuleDetails = [] # # special dataset for some express output # specialDataset = None # # for PromptReco delay settings # promptRecoDelay = {} promptRecoDelayOffset = {} # # for PhEDEx subscription settings # subscriptions = [] # some hardcoded PhEDEx defaults expressPhEDExInjectNode = "T2_CH_CERN" expressPhEDExSubscribeNode = "T2_CH_CERN" # # first take care of all stream settings # getStreamOnlineVersionDAO = daoFactory(classname = "RunConfig.GetStreamOnlineVersion") onlineVersion = getStreamOnlineVersionDAO.execute(run, stream, transaction = False) if streamConfig.ProcessingStyle == "Bulk": streamConfig.Repack.CMSSWVersion = streamConfig.VersionOverride.get(onlineVersion, onlineVersion) bindsCMSSWVersion.append( { 'VERSION' : streamConfig.Repack.CMSSWVersion } ) streamConfig.Repack.ScramArch = tier0Config.Global.ScramArches.get(streamConfig.Repack.CMSSWVersion, tier0Config.Global.DefaultScramArch) bindsRepackConfig = { 'RUN' : run, 'STREAM' : stream, 'PROC_VER': streamConfig.Repack.ProcessingVersion, 'MAX_SIZE_SINGLE_LUMI' : streamConfig.Repack.MaxSizeSingleLumi, 'MAX_SIZE_MULTI_LUMI' : streamConfig.Repack.MaxSizeMultiLumi, 'MIN_SIZE' : streamConfig.Repack.MinInputSize, 'MAX_SIZE' : streamConfig.Repack.MaxInputSize, 'MAX_EDM_SIZE' : streamConfig.Repack.MaxEdmSize, 'MAX_OVER_SIZE' : streamConfig.Repack.MaxOverSize, 'MAX_EVENTS' : streamConfig.Repack.MaxInputEvents, 'MAX_FILES' : streamConfig.Repack.MaxInputFiles, 'BLOCK_DELAY' : streamConfig.Repack.BlockCloseDelay, 'CMSSW' : streamConfig.Repack.CMSSWVersion, 'SCRAM_ARCH' : streamConfig.Repack.ScramArch } elif streamConfig.ProcessingStyle == "Express": specialDataset = "Stream%s" % stream bindsDataset.append( { 'PRIMDS' : specialDataset } ) bindsStreamDataset.append( { 'RUN' : run, 'PRIMDS' : specialDataset, 'STREAM' : stream } ) bindsSpecialDataset = { 'STREAM' : stream, 'PRIMDS' : specialDataset } bindsDatasetScenario.append( { 'RUN' : run, 'PRIMDS' : specialDataset, 'SCENARIO' : streamConfig.Express.Scenario } ) if "DQM" in streamConfig.Express.DataTiers: outputModuleDetails.append( { 'dataTier' : "DQM", 'eventContent' : "DQM", 'primaryDataset' : specialDataset } ) bindsStorageNode.append( { 'NODE' : expressPhEDExSubscribeNode } ) bindsPhEDExConfig.append( { 'RUN' : run, 'PRIMDS' : specialDataset, 'ARCHIVAL_NODE' : None, 'TAPE_NODE' : None, 'DISK_NODE' : expressPhEDExSubscribeNode } ) subscriptions.append( { 'custodialSites' : [], 'nonCustodialSites' : [ expressPhEDExSubscribeNode ], 'autoApproveSites' : [ expressPhEDExSubscribeNode ], 'priority' : "high", 'primaryDataset' : specialDataset } ) alcaSkim = None if "ALCARECO" in streamConfig.Express.DataTiers: if len(streamConfig.Express.AlcaSkims) > 0: outputModuleDetails.append( { 'dataTier' : "ALCARECO", 'eventContent' : "ALCARECO", 'primaryDataset' : specialDataset } ) alcaSkim = ",".join(streamConfig.Express.AlcaSkims) numPromptCalibProd = 0 for producer in streamConfig.Express.AlcaSkims: if producer.startswith("PromptCalibProd"): numPromptCalibProd += 1 if numPromptCalibProd > 0: bindsPromptCalibration = { 'RUN' : run, 'STREAM' : stream, 'NUM_PRODUCER' : numPromptCalibProd } dqmSeq = None if len(streamConfig.Express.DqmSequences) > 0: dqmSeq = ",".join(streamConfig.Express.DqmSequences) streamConfig.Express.CMSSWVersion = streamConfig.VersionOverride.get(onlineVersion, onlineVersion) bindsCMSSWVersion.append( { 'VERSION' : streamConfig.Express.CMSSWVersion } ) streamConfig.Express.ScramArch = tier0Config.Global.ScramArches.get(streamConfig.Express.CMSSWVersion, tier0Config.Global.DefaultScramArch) streamConfig.Express.RecoScramArch = None if streamConfig.Express.RecoCMSSWVersion != None: bindsCMSSWVersion.append( { 'VERSION' : streamConfig.Express.RecoCMSSWVersion } ) streamConfig.Express.RecoScramArch = tier0Config.Global.ScramArches.get(streamConfig.Express.RecoCMSSWVersion, tier0Config.Global.DefaultScramArch) bindsExpressConfig = { 'RUN' : run, 'STREAM' : stream, 'PROC_VER' : streamConfig.Express.ProcessingVersion, 'WRITE_TIERS' : ",".join(streamConfig.Express.DataTiers), 'GLOBAL_TAG' : streamConfig.Express.GlobalTag, 'MAX_RATE' : streamConfig.Express.MaxInputRate, 'MAX_EVENTS' : streamConfig.Express.MaxInputEvents, 'MAX_SIZE' : streamConfig.Express.MaxInputSize, 'MAX_FILES' : streamConfig.Express.MaxInputFiles, 'MAX_LATENCY' : streamConfig.Express.MaxLatency, 'DQM_INTERVAL' : streamConfig.Express.PeriodicHarvestInterval, 'BLOCK_DELAY' : streamConfig.Express.BlockCloseDelay, 'CMSSW' : streamConfig.Express.CMSSWVersion, 'SCRAM_ARCH' : streamConfig.Express.ScramArch, 'RECO_CMSSW' : streamConfig.Express.RecoCMSSWVersion, 'RECO_SCRAM_ARCH' : streamConfig.Express.RecoScramArch, 'MULTICORE' : streamConfig.Express.Multicore, 'ALCA_SKIM' : alcaSkim, 'DQM_SEQ' : dqmSeq } # # then configure datasets # getStreamDatasetTriggersDAO = daoFactory(classname = "RunConfig.GetStreamDatasetTriggers") datasetTriggers = getStreamDatasetTriggersDAO.execute(run, stream, transaction = False) for dataset, paths in datasetTriggers.items(): if dataset == "Unassigned path": if stream == "Express" and run in [ 210114, 210116, 210120, 210121, 210178 ]: continue if stream == "A" and run in [ 216120, 216125, 216130 ]: continue datasetConfig = retrieveDatasetConfig(tier0Config, dataset) selectEvents = [] for path in sorted(paths): selectEvents.append("%s:%s" % (path, runInfo['process'])) if streamConfig.ProcessingStyle == "Bulk": promptRecoDelay[datasetConfig.Name] = datasetConfig.RecoDelay promptRecoDelayOffset[datasetConfig.Name] = datasetConfig.RecoDelayOffset outputModuleDetails.append( { 'dataTier' : "RAW", 'eventContent' : "ALL", 'selectEvents' : selectEvents, 'primaryDataset' : dataset } ) bindsPhEDExConfig.append( { 'RUN' : run, 'PRIMDS' : dataset, 'ARCHIVAL_NODE' : datasetConfig.ArchivalNode, 'TAPE_NODE' : datasetConfig.TapeNode, 'DISK_NODE' : datasetConfig.DiskNode } ) custodialSites = [] nonCustodialSites = [] autoApproveSites = [] if datasetConfig.ArchivalNode != None: bindsStorageNode.append( { 'NODE' : datasetConfig.ArchivalNode } ) custodialSites.append(datasetConfig.ArchivalNode) autoApproveSites.append(datasetConfig.ArchivalNode) if datasetConfig.TapeNode != None: bindsStorageNode.append( { 'NODE' : datasetConfig.TapeNode } ) custodialSites.append(datasetConfig.TapeNode) if datasetConfig.DiskNode != None: bindsStorageNode.append( { 'NODE' : datasetConfig.DiskNode } ) nonCustodialSites.append(datasetConfig.DiskNode) autoApproveSites.append(datasetConfig.DiskNode) if len(custodialSites) > 0 or len(nonCustodialSites) > 0: subscriptions.append( { 'custodialSites' : custodialSites, 'custodialSubType' : "Replica", 'nonCustodialSites' : nonCustodialSites, 'autoApproveSites' : autoApproveSites, 'priority' : "high", 'primaryDataset' : dataset, 'dataTier' : "RAW" } ) # # set subscriptions for error dataset # custodialSites = [] nonCustodialSites = [] autoApproveSites = [] if datasetConfig.ArchivalNode != None: custodialSites.append(datasetConfig.ArchivalNode) autoApproveSites.append(datasetConfig.ArchivalNode) if datasetConfig.ArchivalNode != expressPhEDExInjectNode: nonCustodialSites.append(expressPhEDExInjectNode) autoApproveSites.append(expressPhEDExInjectNode) if len(custodialSites) > 0 or len(nonCustodialSites) > 0: subscriptions.append( { 'custodialSites' : custodialSites, 'custodialSubType' : "Replica", 'nonCustodialSites' : nonCustodialSites, 'autoApproveSites' : autoApproveSites, 'priority' : "high", 'primaryDataset' : "%s-Error" % dataset, 'dataTier' : "RAW" } ) elif streamConfig.ProcessingStyle == "Express": for dataTier in streamConfig.Express.DataTiers: if dataTier not in [ "ALCARECO", "DQM" ]: outputModuleDetails.append( { 'dataTier' : dataTier, 'eventContent' : dataTier, 'selectEvents' : selectEvents, 'primaryDataset' : dataset } ) bindsPhEDExConfig.append( { 'RUN' : run, 'PRIMDS' : dataset, 'ARCHIVAL_NODE' : None, 'TAPE_NODE' : None, 'DISK_NODE' : expressPhEDExSubscribeNode } ) subscriptions.append( { 'custodialSites' : [], 'nonCustodialSites' : [ expressPhEDExSubscribeNode ], 'autoApproveSites' : [ expressPhEDExSubscribeNode ], 'priority' : "high", 'primaryDataset' : dataset } ) # # finally create WMSpec # outputs = {} if streamConfig.ProcessingStyle == "Bulk": taskName = "Repack" workflowName = "Repack_Run%d_Stream%s" % (run, stream) specArguments = {} specArguments['Group'] = "unknown" specArguments['Requestor'] = "unknown" specArguments['RequestorDN'] = "unknown" specArguments['TimePerEvent'] = 1 specArguments['SizePerEvent'] = 200 specArguments['Memory'] = 1800 specArguments['RequestPriority'] = 0 specArguments['CMSSWVersion'] = streamConfig.Repack.CMSSWVersion specArguments['ScramArch'] = streamConfig.Repack.ScramArch specArguments['ProcessingVersion'] = streamConfig.Repack.ProcessingVersion specArguments['MaxSizeSingleLumi'] = streamConfig.Repack.MaxSizeSingleLumi specArguments['MaxSizeMultiLumi'] = streamConfig.Repack.MaxSizeMultiLumi specArguments['MinInputSize'] = streamConfig.Repack.MinInputSize specArguments['MaxInputSize'] = streamConfig.Repack.MaxInputSize specArguments['MaxEdmSize'] = streamConfig.Repack.MaxEdmSize specArguments['MaxOverSize'] = streamConfig.Repack.MaxOverSize specArguments['MaxInputEvents'] = streamConfig.Repack.MaxInputEvents specArguments['MaxInputFiles'] = streamConfig.Repack.MaxInputFiles specArguments['UnmergedLFNBase'] = "/store/unmerged/%s" % runInfo['bulk_data_type'] if runInfo['backfill']: specArguments['MergedLFNBase'] = "/store/backfill/%s/%s" % (runInfo['backfill'], runInfo['bulk_data_type']) else: specArguments['MergedLFNBase'] = "/store/%s" % runInfo['bulk_data_type'] specArguments['BlockCloseDelay'] = streamConfig.Repack.BlockCloseDelay elif streamConfig.ProcessingStyle == "Express": taskName = "Express" workflowName = "Express_Run%d_Stream%s" % (run, stream) specArguments = {} specArguments['Group'] = "unknown" specArguments['Requestor'] = "unknown" specArguments['RequestorDN'] = "unknown" specArguments['TimePerEvent'] = 12 specArguments['SizePerEvent'] = 512 specArguments['Memory'] = 1800 if streamConfig.Express.Multicore: specArguments['Multicore'] = streamConfig.Express.Multicore specArguments['Memory'] = 1800 * streamConfig.Express.Multicore specArguments['RequestPriority'] = 0 specArguments['ProcessingString'] = "Express" specArguments['ProcessingVersion'] = streamConfig.Express.ProcessingVersion specArguments['Scenario'] = streamConfig.Express.Scenario specArguments['CMSSWVersion'] = streamConfig.Express.CMSSWVersion specArguments['ScramArch'] = streamConfig.Express.ScramArch specArguments['RecoCMSSWVersion'] = streamConfig.Express.RecoCMSSWVersion specArguments['RecoScramArch'] = streamConfig.Express.RecoScramArch specArguments['GlobalTag'] = streamConfig.Express.GlobalTag specArguments['GlobalTagTransaction'] = "Express_%d" % run specArguments['GlobalTagConnect'] = streamConfig.Express.GlobalTagConnect specArguments['MaxInputRate'] = streamConfig.Express.MaxInputRate specArguments['MaxInputEvents'] = streamConfig.Express.MaxInputEvents specArguments['MaxInputSize'] = streamConfig.Express.MaxInputSize specArguments['MaxInputFiles'] = streamConfig.Express.MaxInputFiles specArguments['MaxLatency'] = streamConfig.Express.MaxLatency specArguments['AlcaSkims'] = streamConfig.Express.AlcaSkims specArguments['DqmSequences'] = streamConfig.Express.DqmSequences specArguments['AlcaHarvestTimeout'] = runInfo['ah_timeout'] specArguments['AlcaHarvestDir'] = runInfo['ah_dir'] specArguments['DQMUploadProxy'] = dqmUploadProxy specArguments['DQMUploadUrl'] = runInfo['dqmuploadurl'] specArguments['StreamName'] = stream specArguments['SpecialDataset'] = specialDataset specArguments['UnmergedLFNBase'] = "/store/unmerged/express" specArguments['MergedLFNBase'] = "/store/express" if runInfo['backfill']: specArguments['MergedLFNBase'] = "/store/backfill/%s/express" % runInfo['backfill'] else: specArguments['MergedLFNBase'] = "/store/express" specArguments['PeriodicHarvestInterval'] = streamConfig.Express.PeriodicHarvestInterval specArguments['BlockCloseDelay'] = streamConfig.Express.BlockCloseDelay if streamConfig.ProcessingStyle in [ 'Bulk', 'Express' ]: specArguments['RunNumber'] = run specArguments['AcquisitionEra'] = tier0Config.Global.AcquisitionEra specArguments['Outputs'] = outputModuleDetails specArguments['OverrideCatalog'] = "trivialcatalog_file:/cvmfs/cms.cern.ch/SITECONF/T2_CH_CERN/Tier0/override_catalog.xml?protocol=override" specArguments['ValidStatus'] = "VALID" specArguments['SiteWhitelist'] = [ "T2_CH_CERN_T0" ] specArguments['SiteBlacklist'] = [] if streamConfig.ProcessingStyle == "Bulk": factory = RepackWorkloadFactory() wmSpec = factory.factoryWorkloadConstruction(workflowName, specArguments) wmSpec.setPhEDExInjectionOverride(runInfo['bulk_data_loc']) for subscription in subscriptions: wmSpec.setSubscriptionInformation(**subscription) elif streamConfig.ProcessingStyle == "Express": factory = ExpressWorkloadFactory() wmSpec = factory.factoryWorkloadConstruction(workflowName, specArguments) wmSpec.setPhEDExInjectionOverride(expressPhEDExInjectNode) for subscription in subscriptions: wmSpec.setSubscriptionInformation(**subscription) if streamConfig.ProcessingStyle in [ 'Bulk', 'Express' ]: wmSpec.setOwnerDetails("*****@*****.**", "T0", { 'vogroup': 'DEFAULT', 'vorole': 'DEFAULT', 'dn' : "*****@*****.**" } ) wmSpec.setupPerformanceMonitoring(maxRSS = 10485760, maxVSize = 10485760, softTimeout = 604800, gracePeriod = 3600) wmbsHelper = WMBSHelper(wmSpec, taskName, cachepath = specDirectory) filesetName = "Run%d_Stream%s" % (run, stream) fileset = Fileset(filesetName) # # create workflow (currently either repack or express) # try: myThread.transaction.begin() if len(bindsCMSSWVersion) > 0: insertCMSSWVersionDAO.execute(bindsCMSSWVersion, conn = myThread.transaction.conn, transaction = True) if len(bindsDataset) > 0: insertDatasetDAO.execute(bindsDataset, conn = myThread.transaction.conn, transaction = True) if len(bindsStreamDataset) > 0: insertStreamDatasetDAO.execute(bindsStreamDataset, conn = myThread.transaction.conn, transaction = True) if len(bindsRepackConfig) > 0: insertRepackConfigDAO.execute(bindsRepackConfig, conn = myThread.transaction.conn, transaction = True) if len(bindsPromptCalibration) > 0: insertPromptCalibrationDAO.execute(bindsPromptCalibration, conn = myThread.transaction.conn, transaction = True) if len(bindsExpressConfig) > 0: insertExpressConfigDAO.execute(bindsExpressConfig, conn = myThread.transaction.conn, transaction = True) if len(bindsSpecialDataset) > 0: insertSpecialDatasetDAO.execute(bindsSpecialDataset, conn = myThread.transaction.conn, transaction = True) if len(bindsDatasetScenario) > 0: insertDatasetScenarioDAO.execute(bindsDatasetScenario, conn = myThread.transaction.conn, transaction = True) if len(bindsStorageNode) > 0: insertStorageNodeDAO.execute(bindsStorageNode, conn = myThread.transaction.conn, transaction = True) if len(bindsPhEDExConfig) > 0: insertPhEDExConfigDAO.execute(bindsPhEDExConfig, conn = myThread.transaction.conn, transaction = True) insertStreamStyleDAO.execute(bindsStreamStyle, conn = myThread.transaction.conn, transaction = True) if streamConfig.ProcessingStyle in [ 'Bulk', 'Express' ]: insertStreamFilesetDAO.execute(run, stream, filesetName, conn = myThread.transaction.conn, transaction = True) fileset.load() wmbsHelper.createSubscription(wmSpec.getTask(taskName), fileset, alternativeFilesetClose = True) insertWorkflowMonitoringDAO.execute([fileset.id], conn = myThread.transaction.conn, transaction = True) if streamConfig.ProcessingStyle == "Bulk": bindsRecoReleaseConfig = [] for fileset, primds in wmbsHelper.getMergeOutputMapping().items(): bindsRecoReleaseConfig.append( { 'RUN' : run, 'PRIMDS' : primds, 'FILESET' : fileset, 'RECODELAY' : promptRecoDelay[primds], 'RECODELAYOFFSET' : promptRecoDelayOffset[primds] } ) insertRecoReleaseConfigDAO.execute(bindsRecoReleaseConfig, conn = myThread.transaction.conn, transaction = True) elif streamConfig.ProcessingStyle == "Express": markWorkflowsInjectedDAO.execute([workflowName], injected = True, conn = myThread.transaction.conn, transaction = True) except Exception as ex: logging.exception(ex) myThread.transaction.rollback() raise RuntimeError("Problem in configureRunStream() database transaction !") else: myThread.transaction.commit() else: # should we do anything for local runs ? pass return
def databaseWork(self): """ completed, set the fileset to close (Not implemented yet) """ fileset_watch_temp = [] listFileset = {} fileset_new = {} myThread = threading.currentThread() myThread.transaction.begin() # Get All managed filesets managedFilesets = self.getAllManagedFilesets.execute(\ conn = myThread.transaction.conn, transaction = True) myThread.transaction.commit() logging.debug("Found %s managed filesets" % len(managedFilesets)) for fileset in range(len(managedFilesets)): logging.debug("Processing %s %s" % \ ( managedFilesets[fileset]['id'] , managedFilesets[fileset]['name'] ) ) filesetToUpdate = Fileset(id=managedFilesets[fileset]['id']) filesetToUpdate.load() if managedFilesets[fileset]['name'] not in self.fileset_watch: self.fileset_watch[filesetToUpdate.name] = filesetToUpdate fileset_new[filesetToUpdate.name] = filesetToUpdate listFileset[filesetToUpdate.name] = filesetToUpdate # Update the list of the fileset to watch for oldFileset in self.fileset_watch: if oldFileset not in listFileset: fileset_watch_temp.append(oldFileset) # Remove from the list of the fileset to update the ones which are not # in ManagedFilesets anymore for oldTempFileset in fileset_watch_temp: del self.fileset_watch[oldTempFileset] logging.debug("NEW FILESETS %s" %fileset_new) logging.debug("OLD FILESETS %s" %self.fileset_watch) # WorkQueue work for name, fileset in fileset_new.items(): logging.debug("Will poll %s : %s" % (name, fileset.id)) self.workq.enqueue(name, fileset) for key, filesets in self.workq.__iter__(): fileset = self.fileset_watch[key] logging.debug \ ("the poll key %s result %s is ready !" % (key, str(fileset.id))) myThread.transaction.begin() feederId = self.getFeederId.execute( \ feederType = (fileset.name).split(":")[1], \ conn = myThread.transaction.conn, transaction = True ) myThread.transaction.commit() logging.debug("the Feeder %s has processed %s and is \ removing it if closed" % (feederId, fileset.name) ) # Finally delete fileset # If the fileset is closed remove it fileset.load() if fileset.open == False: myThread.transaction.begin() self.removeManagedFilesets.execute( \ filesetId = fileset.id, feederType = feederId, \ conn = myThread.transaction.conn, transaction = True ) myThread.transaction.commit() # Handles old filesets. We update old filesets every 10 mn # We need to make old filesets update cycle configurable if ((time.time()/60) - self.last_poll_time) > 10 : # WorkQueue handles old filesets for name, fileset in self.fileset_watch.items(): logging.debug("Will poll %s : %s" % (name, fileset.id)) self.workq.enqueue(name, fileset) for key, filesets in self.workq.__iter__(): fileset = self.fileset_watch[key] logging.debug \ ("the poll key %s result %s is ready !" % (key, str(fileset.id))) myThread.transaction.begin() feederId = self.getFeederId.execute(\ feederType = (fileset.name).split(":")[1], \ conn = myThread.transaction.conn, transaction = True ) myThread.transaction.commit() logging.debug("the Feeder %s has processed %s and is \ removing it if closed" % (feederId, fileset.name) ) # Finally delete fileset # If the fileset is closed remove it fileset.load() if fileset.open == False: myThread.transaction.begin() self.removeManagedFilesets.execute(\ filesetId = fileset.id, feederType = feederId, \ conn = myThread.transaction.conn, transaction = True ) myThread.transaction.commit() # Update the last update time of old filesets self.last_poll_time = time.time()/60
def databaseWork(self): """ completed, set the fileset to close (Not implemented yet) """ fileset_watch_temp = [] listFileset = {} fileset_new = {} myThread = threading.currentThread() myThread.transaction.begin() # Get All managed filesets managedFilesets = self.getAllManagedFilesets.execute(\ conn = myThread.transaction.conn, transaction = True) myThread.transaction.commit() logging.debug("Found %s managed filesets" % len(managedFilesets)) for fileset in range(len(managedFilesets)): logging.debug("Processing %s %s" % \ ( managedFilesets[fileset]['id'] , managedFilesets[fileset]['name'] ) ) filesetToUpdate = Fileset(id=managedFilesets[fileset]['id']) filesetToUpdate.load() if managedFilesets[fileset]['name'] not in self.fileset_watch: self.fileset_watch[filesetToUpdate.name] = filesetToUpdate fileset_new[filesetToUpdate.name] = filesetToUpdate listFileset[filesetToUpdate.name] = filesetToUpdate # Update the list of the fileset to watch for oldFileset in self.fileset_watch: if oldFileset not in listFileset: fileset_watch_temp.append(oldFileset) # Remove from the list of the fileset to update the ones which are not # in ManagedFilesets anymore for oldTempFileset in fileset_watch_temp: del self.fileset_watch[oldTempFileset] logging.debug("NEW FILESETS %s" % fileset_new) logging.debug("OLD FILESETS %s" % self.fileset_watch) # WorkQueue work for name, fileset in fileset_new.items(): logging.debug("Will poll %s : %s" % (name, fileset.id)) self.workq.enqueue(name, fileset) for key, filesets in self.workq.__iter__(): fileset = self.fileset_watch[key] logging.debug \ ("the poll key %s result %s is ready !" % (key, str(fileset.id))) myThread.transaction.begin() feederId = self.getFeederId.execute( \ feederType = (fileset.name).split(":")[1], \ conn = myThread.transaction.conn, transaction = True ) myThread.transaction.commit() logging.debug("the Feeder %s has processed %s and is \ removing it if closed" % (feederId, fileset.name)) # Finally delete fileset # If the fileset is closed remove it fileset.load() if fileset.open == False: myThread.transaction.begin() self.removeManagedFilesets.execute( \ filesetId = fileset.id, feederType = feederId, \ conn = myThread.transaction.conn, transaction = True ) myThread.transaction.commit() # Handles old filesets. We update old filesets every 10 mn # We need to make old filesets update cycle configurable if ((time.time() / 60) - self.last_poll_time) > 10: # WorkQueue handles old filesets for name, fileset in self.fileset_watch.items(): logging.debug("Will poll %s : %s" % (name, fileset.id)) self.workq.enqueue(name, fileset) for key, filesets in self.workq.__iter__(): fileset = self.fileset_watch[key] logging.debug \ ("the poll key %s result %s is ready !" % (key, str(fileset.id))) myThread.transaction.begin() feederId = self.getFeederId.execute(\ feederType = (fileset.name).split(":")[1], \ conn = myThread.transaction.conn, transaction = True ) myThread.transaction.commit() logging.debug("the Feeder %s has processed %s and is \ removing it if closed" % (feederId, fileset.name)) # Finally delete fileset # If the fileset is closed remove it fileset.load() if fileset.open == False: myThread.transaction.begin() self.removeManagedFilesets.execute(\ filesetId = fileset.id, feederType = feederId, \ conn = myThread.transaction.conn, transaction = True ) myThread.transaction.commit() # Update the last update time of old filesets self.last_poll_time = time.time() / 60
def databaseWork(self): """ Queries DB for all watched filesets, if a filesets matches become available, create the subscriptions """ # Get all watched workflows availableWorkflows = self.getUnsubscribedWorkflows.execute() logging.debug("Found %s unsubscribed managed workflows" \ % len(availableWorkflows)) # Get all filesets to check if they match a wrokflow availableFilesets = self.getAllFilesets.execute() logging.debug("Found %s filesets" % len(availableFilesets)) # Loop on unsubscribed workflows to match filesets for managedWorkflow in availableWorkflows: # Workflow object cache to pass into Subscription constructor wfObj = None for fileset in availableFilesets: # Fileset object cache fsObj = None # Load the location information #whitelist = Set() #blacklist = Set() # Location is only caf #locations = self.queries.getLocations(managedWorkflow['id']) #for location in locations: # if bool(int(location['valid'])) == True: # whitelist.add(location['site_name']) # else: # blacklist.add(location['site_name']) # Attempt to match workflows to filesets if re.match(managedWorkflow['fileset_match'], fileset['name']): # Log in debug msg = "Creating subscription for %s to workflow id %s" msg %= (fileset['name'], managedWorkflow['workflow']) logging.debug(msg) # Match found - Load the fileset if not already loaded if not fsObj: fsObj = Fileset(id = fileset['id']) fsObj.load() # Load the workflow if not already loaded if not wfObj: wfObj = Workflow(id = managedWorkflow['workflow']) wfObj.load() # Create the subscription newSub = Subscription(fileset = fsObj, \ workflow = wfObj, \ #whitelist = whitelist, \ #blacklist = blacklist, \ split_algo = managedWorkflow['split_algo'], type = managedWorkflow['type']) newSub.create() managedWorkflows = self.getManagedWorkflows.execute() logging.debug("Found %s managed workflows" \ % len(managedWorkflows)) unsubscribedFilesets = self.getUnsubscribedFilesets.execute() logging.debug("Found %s unsubscribed filesets" % \ len(unsubscribedFilesets)) # Loop on unsubscribed filesets to match workflows for unsubscribedFileset in unsubscribedFilesets: # Workflow object cache to pass into Subscription constructor # FIXME wfObj = None for managedWork in managedWorkflows: logging.debug("The workflow %s" %managedWork['workflow']) # Fileset object cache wfObj = None fsObj = None # Load the location information #whitelist = Set() #blacklist = Set() # Location is only caf #locations = self.queries.getLocations(managedWorkflow['id']) #for location in locations: # if bool(int(location['valid'])) == True: # whitelist.add(location['site_name']) # else: # blacklist.add(location['site_name']) # Attempt to match workflows to filesets if re.match(managedWork['fileset_match'], \ unsubscribedFileset['name']): # Log in debug msg = "Creating subscription for %s to workflow id %s" msg %= (unsubscribedFileset['name'], \ managedWork['workflow']) logging.debug(msg) # Match found - Load the fileset if not already loaded if not fsObj: fsObj = Fileset(id = unsubscribedFileset['id']) fsObj.load() # Load the workflow if not already loaded if not wfObj: wfObj = Workflow(id = managedWork['workflow']) wfObj.load() # Create the subscription newSub = Subscription(fileset = fsObj, \ workflow = wfObj, \ #whitelist = whitelist, \ #blacklist = blacklist, \ split_algo = managedWork['split_algo'], type = managedWork['type']) newSub.create() newSub.load()
def __call__(self, parameters): """ Perform the work required with the given parameters """ DefaultSlave.__call__(self, parameters) # Handle the message message = self.messageArgs # Lock on the running feeders list myThread = threading.currentThread() myThread.runningFeedersLock.acquire() # Create empty fileset if fileset.name doesn't exist filesetName = message["dataset"] feederType = message["FeederType"] fileType = message["FileType"] startRun = message["StartRun"] logging.debug("Dataset " + filesetName + " arrived") fileset = Fileset(name = filesetName+':'\ +feederType+':'+fileType+':'+startRun) # Check if the fileset is already there if fileset.exists() == False: # Empty fileset creation fileset.create() fileset.setLastUpdate(0) logging.info("Fileset %s whith id %s is added" \ %(fileset.name, str(fileset.id))) # Get feeder type feederType = message["FeederType"] # Check if there is a running feeder if myThread.runningFeeders.has_key(feederType): logging.info("HAVE FEEDER " + feederType + " RUNNING") logging.info(myThread.runningFeeders[feederType]) else: logging.info("NO FEEDER " + feederType + " RUNNING") # Check if we have a feeder in DB if self.queries.checkFeeder(feederType): # Have feeder, get info logging.info("Getting Feeder from DB") feederId = self.queries.getFeederId(feederType) logging.info(feederId) myThread.runningFeeders[feederType] = feederId else: # Create feeder logging.info("Adding Feeder to DB") self.queries.addFeeder(feederType, "StatePath") feederId = self.queries.getFeederId(feederType) logging.info(feederId) myThread.runningFeeders[feederType] = feederId # Fileset/Feeder association self.queries.addFilesetToManage(fileset.id, \ myThread.runningFeeders[feederType]) logging.info("Fileset %s is added to feeder %s" %(fileset.id, \ myThread.runningFeeders[feederType])) else: # If fileset already exist a new subscription # will be created for its workflow logging.info("Fileset exists: Subscription will be created for it") # Open it if close fileset.load() if fileset.open == False: fileset.markOpen(True) logging.info("Getting Feeder from DB") feederId = self.queries.getFeederId(feederType) logging.info(feederId) myThread.runningFeeders[feederType] = feederId self.queries.addFilesetToManage(fileset.id, \ myThread.runningFeeders[feederType]) logging.info("Fileset %s is added to feeder %s" %(fileset.id, \ myThread.runningFeeders[feederType])) myThread.runningFeedersLock.release() myThread.msgService.finish()
def setupPromptRecoWorkflow(self): """ _setupPromptRecoWorkflow_ Populate WMBS with a real PromptReco workflow, every subscription must be unfinished at first """ # Populate disk and WMBS testArguments = PromptRecoWorkloadFactory.getTestArguments() workflowName = 'PromptReco_Run195360_Cosmics' factory = PromptRecoWorkloadFactory() testArguments["EnableHarvesting"] = True testArguments["CouchURL"] = os.environ["COUCHURL"] workload = factory.factoryWorkloadConstruction(workflowName, testArguments) wmbsHelper = WMBSHelper(workload, 'Reco', 'SomeBlock', cachepath=self.testDir) wmbsHelper.createTopLevelFileset() wmbsHelper._createSubscriptionsInWMBS(wmbsHelper.topLevelTask, wmbsHelper.topLevelFileset) self.stateMap = { 'AlcaSkim': [], 'Merge': [], 'Harvesting': [], 'Processing Done': [] } self.orderedStates = [ 'AlcaSkim', 'Merge', 'Harvesting', 'Processing Done' ] # Populate WMStats self.requestDBWriter.insertGenericRequest( {'RequestName': workflowName}) self.requestDBWriter.updateRequestStatus(workflowName, 'Closed') topLevelTask = '/%s/Reco' % workflowName alcaSkimTask = '%s/AlcaSkim' % topLevelTask mergeTasks = [ '%s/AlcaSkim/AlcaSkimMergeALCARECOStreamHcalCalHOCosmics', '%s/AlcaSkim/AlcaSkimMergeALCARECOStreamTkAlCosmics0T', '%s/AlcaSkim/AlcaSkimMergeALCARECOStreamMuAlGlobalCosmics', '%s/RecoMergewrite_AOD', '%s/RecoMergewrite_DQM', '%s/RecoMergewrite_RECO' ] harvestingTask = '%s/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged' % topLevelTask self.stateMap['AlcaSkim'].append(wmbsHelper.topLevelSubscription) alcaSkimWorkflow = Workflow(name=workflowName, task=alcaSkimTask) alcaSkimWorkflow.load() alcarecoFileset = Fileset( name= '/PromptReco_Run195360_Cosmics/Reco/unmerged-write_ALCARECOALCARECO' ) alcarecoFileset.load() alcaSkimSub = Subscription(alcarecoFileset, alcaSkimWorkflow) alcaSkimSub.load() self.stateMap['Merge'].append(alcaSkimSub) for task in mergeTasks: mergeTask = task % topLevelTask mergeWorkflow = Workflow(name=workflowName, task=mergeTask) mergeWorkflow.load() if 'AlcaSkim' in mergeTask: stream = mergeTask.split('/')[-1][13:] unmergedFileset = Fileset(name='%s/unmerged-%sALCARECO' % (alcaSkimTask, stream)) unmergedFileset.load() else: dataTier = mergeTask.split('/')[-1].split('_')[-1] unmergedFileset = Fileset(name='%s/unmerged-write_%s%s' % (topLevelTask, dataTier, dataTier)) unmergedFileset.load() mergeSub = Subscription(unmergedFileset, mergeWorkflow) mergeSub.load() self.stateMap['Harvesting'].append(mergeSub) harvestingWorkflow = Workflow(name=workflowName, task=harvestingTask) harvestingWorkflow.load() harvestingFileset = Fileset( name= '/PromptReco_Run195360_Cosmics/Reco/RecoMergewrite_DQM/merged-MergedDQM' ) harvestingFileset.load() harvestingSub = Subscription(harvestingFileset, harvestingWorkflow) harvestingSub.load() self.stateMap['Processing Done'].append(harvestingSub) return
def __call__(self, filesetToProcess): """ The algorithm itself """ # Get configuration initObj = WMInit() initObj.setLogging() initObj.setDatabaseConnection(os.getenv("DATABASE"), \ os.getenv('DIALECT'), os.getenv("DBSOCK")) myThread = threading.currentThread() daofactory = DAOFactory(package = "WMCore.WMBS" , \ logger = myThread.logger, \ dbinterface = myThread.dbi) lastFileset = daofactory(classname = "Fileset.ListFilesetByTask") lastWorkflow = daofactory(classname = "Workflow.LoadFromTask") subsRun = daofactory(\ classname = "Subscriptions.LoadFromFilesetWorkflow") successJob = daofactory(classname = "Subscriptions.SucceededJobs") allJob = daofactory(classname = "Subscriptions.Jobs") fileInFileset = daofactory(classname = "Files.InFileset") # Get the start Run if asked startRun = (filesetToProcess.name).split(":")[3] logging.debug("the T0Feeder is processing %s" % \ filesetToProcess.name) logging.debug("the fileset name %s" % \ (filesetToProcess.name).split(":")[0]) fileType = (filesetToProcess.name).split(":")[2] crabTask = filesetToProcess.name.split(":")[0] LASTIME = filesetToProcess.lastUpdate tries = 1 while True: try: myRequester = JSONRequests(url = "vocms52.cern.ch:8889") requestResult = myRequester.get("/tier0/runs") except: logging.debug("T0Reader call error...") if tries == self.maxRetries: return else: tries += 1 continue logging.debug("T0ASTRunChain feeder queries done ...") now = time.time() break for listRun in requestResult[0]: if startRun != 'None' and int(listRun['run']) >= int(startRun): if listRun['status'] =='CloseOutExport' or listRun\ ['status']=='Complete' or listRun['status']=='CloseOutT1Skimming': crabWorkflow = lastWorkflow.execute(task=crabTask) crabFileset = lastFileset.execute\ (task=crabTask) crabrunFileset = Fileset(\ name = crabFileset[0]["name"].split(':')[0].split\ ('-Run')[0]+ '-Run' + str(listRun['run']) + ":" + \ ":".join(crabFileset[0]['name'].split(':')[1:]) ) if crabrunFileset.exists() > 0: crabrunFileset.load() currSubs = subsRun.execute\ (crabrunFileset.id, crabWorkflow[0]['id']) if currSubs: listsuccessJob = successJob.execute(\ subscription=currSubs['id']) listallJob = allJob.execute(\ subscription=currSubs['id']) if len(listsuccessJob) == len(listallJob): for currid in listsuccessJob: currjob = Job( id = currid ) currjob.load() logging.debug("Reading FJR %s" %currjob['fwjr_path']) jobReport = readJobReport(currjob['fwjr_path']) if len(jobReport) > 0: if jobReport[0].files: for newFile in jobReport[0].files: logging.debug(\ "Output path %s" %newFile['LFN']) newFileToAdd = File(\ lfn=newFile['LFN'], locations ='caf.cern.ch') LOCK.acquire() if newFileToAdd.exists\ () == False : newFileToAdd.create() else: newFileToAdd.loadData() LOCK.release() listFile = \ fileInFileset.execute(filesetToProcess.id) if {'fileid': \ newFileToAdd['id']} not in listFile: filesetToProcess.addFile(\ newFileToAdd) filesetToProcess\ .setLastUpdate(now) filesetToProcess.commit() logging.debug(\ "new file created/loaded and added by T0ASTRunChain...") elif jobReport[0].analysisFiles: for newFile in jobReport\ [0].analysisFiles: logging.debug(\ "Ouput path %s " %newFile['LFN']) newFileToAdd = File(\ lfn=newFile['LFN'], locations ='caf.cern.ch') LOCK.acquire() if newFileToAdd.exists\ () == False : newFileToAdd.create() else: newFileToAdd.loadData() LOCK.release() listFile = \ fileInFileset.execute(filesetToProcess.id) if {'fileid': newFileToAdd\ ['id']} not in listFile: logging.debug\ ("%s loaded and added by T0ASTRunChain" %newFile['LFN']) filesetToProcess.addFile\ (newFileToAdd) filesetToProcess.\ setLastUpdate(now) filesetToProcess.commit() logging.debug(\ "new file created/loaded added by T0ASTRunChain...") else: break #Missed fjr - Try next time # Commit the fileset logging.debug("Test purge in T0ASTRunChain ...") filesetToProcess.load() LASTIME = filesetToProcess.lastUpdate # For re-opned fileset or empty, try until the purge time if (int(now)/3600 - LASTIME/3600) > self.reopenTime: filesetToProcess.setLastUpdate(time.time()) filesetToProcess.commit() if (int(now)/3600 - LASTIME/3600) > self.purgeTime: filesetToProcess.markOpen(False) logging.debug("Purge Done...")
def __call__(self, filesetToProcess): """ The algorithm itself """ # Get configuration initObj = WMInit() initObj.setLogging() initObj.setDatabaseConnection(os.getenv("DATABASE"), \ os.getenv('DIALECT'), os.getenv("DBSOCK")) myThread = threading.currentThread() daofactory = DAOFactory(package = "WMCore.WMBS" , \ logger = myThread.logger, \ dbinterface = myThread.dbi) lastFileset = daofactory(classname="Fileset.ListFilesetByTask") lastWorkflow = daofactory(classname="Workflow.LoadFromTask") subsRun = daofactory(\ classname = "Subscriptions.LoadFromFilesetWorkflow") successJob = daofactory(classname="Subscriptions.SucceededJobs") allJob = daofactory(classname="Subscriptions.Jobs") fileInFileset = daofactory(classname="Files.InFileset") # Get the start Run if asked startRun = (filesetToProcess.name).split(":")[3] logging.debug("the T0Feeder is processing %s" % \ filesetToProcess.name) logging.debug("the fileset name %s" % \ (filesetToProcess.name).split(":")[0]) fileType = (filesetToProcess.name).split(":")[2] crabTask = filesetToProcess.name.split(":")[0] LASTIME = filesetToProcess.lastUpdate tries = 1 while True: try: myRequester = JSONRequests(url="vocms52.cern.ch:8889") requestResult = myRequester.get("/tier0/runs") except: logging.debug("T0Reader call error...") if tries == self.maxRetries: return else: tries += 1 continue logging.debug("T0ASTRunChain feeder queries done ...") now = time.time() break for listRun in requestResult[0]: if startRun != 'None' and int(listRun['run']) >= int(startRun): if listRun['status'] =='CloseOutExport' or listRun\ ['status']=='Complete' or listRun['status']=='CloseOutT1Skimming': crabWorkflow = lastWorkflow.execute(task=crabTask) crabFileset = lastFileset.execute\ (task=crabTask) crabrunFileset = Fileset(\ name = crabFileset[0]["name"].split(':')[0].split\ ('-Run')[0]+ '-Run' + str(listRun['run']) + ":" + \ ":".join(crabFileset[0]['name'].split(':')[1:]) ) if crabrunFileset.exists() > 0: crabrunFileset.load() currSubs = subsRun.execute\ (crabrunFileset.id, crabWorkflow[0]['id']) if currSubs: listsuccessJob = successJob.execute(\ subscription=currSubs['id']) listallJob = allJob.execute(\ subscription=currSubs['id']) if len(listsuccessJob) == len(listallJob): for currid in listsuccessJob: currjob = Job(id=currid) currjob.load() logging.debug("Reading FJR %s" % currjob['fwjr_path']) jobReport = readJobReport( currjob['fwjr_path']) if len(jobReport) > 0: if jobReport[0].files: for newFile in jobReport[0].files: logging.debug(\ "Output path %s" %newFile['LFN']) newFileToAdd = File(\ lfn=newFile['LFN'], locations ='caf.cern.ch') LOCK.acquire() if newFileToAdd.exists\ () == False : newFileToAdd.create() else: newFileToAdd.loadData() LOCK.release() listFile = \ fileInFileset.execute(filesetToProcess.id) if {'fileid': \ newFileToAdd['id']} not in listFile: filesetToProcess.addFile(\ newFileToAdd) filesetToProcess\ .setLastUpdate(now) filesetToProcess.commit() logging.debug(\ "new file created/loaded and added by T0ASTRunChain...") elif jobReport[0].analysisFiles: for newFile in jobReport\ [0].analysisFiles: logging.debug(\ "Ouput path %s " %newFile['LFN']) newFileToAdd = File(\ lfn=newFile['LFN'], locations ='caf.cern.ch') LOCK.acquire() if newFileToAdd.exists\ () == False : newFileToAdd.create() else: newFileToAdd.loadData() LOCK.release() listFile = \ fileInFileset.execute(filesetToProcess.id) if {'fileid': newFileToAdd\ ['id']} not in listFile: logging.debug\ ("%s loaded and added by T0ASTRunChain" %newFile['LFN']) filesetToProcess.addFile\ (newFileToAdd) filesetToProcess.\ setLastUpdate(now) filesetToProcess.commit() logging.debug(\ "new file created/loaded added by T0ASTRunChain...") else: break #Missed fjr - Try next time # Commit the fileset logging.debug("Test purge in T0ASTRunChain ...") filesetToProcess.load() LASTIME = filesetToProcess.lastUpdate # For re-opned fileset or empty, try until the purge time if (int(now) / 3600 - LASTIME / 3600) > self.reopenTime: filesetToProcess.setLastUpdate(time.time()) filesetToProcess.commit() if (int(now) / 3600 - LASTIME / 3600) > self.purgeTime: filesetToProcess.markOpen(False) logging.debug("Purge Done...")
class ExpressTest(unittest.TestCase): """ _ExpressTest_ Test for Express job splitter """ def setUp(self): """ _setUp_ """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules=["T0.WMBS"]) self.splitterFactory = SplitterFactory(package="T0.JobSplitting") myThread = threading.currentThread() daoFactory = DAOFactory(package="T0.WMBS", logger=logging, dbinterface=myThread.dbi) myThread.dbi.processData("""INSERT INTO wmbs_location (id, site_name, state) VALUES (1, 'SomeSite', 1) """, transaction=False) myThread.dbi.processData("""INSERT INTO wmbs_location_pnns (location, pnn) VALUES (1, 'SomePNN') """, transaction=False) myThread.dbi.processData("""INSERT INTO wmbs_location_pnns (location, pnn) VALUES (1, 'SomePNN2') """, transaction=False) insertRunDAO = daoFactory(classname="RunConfig.InsertRun") insertRunDAO.execute(binds={ 'RUN': 1, 'TIME': int(time.time()), 'HLTKEY': "someHLTKey" }, transaction=False) insertLumiDAO = daoFactory(classname="RunConfig.InsertLumiSection") for lumi in [1, 2]: insertLumiDAO.execute(binds={ 'RUN': 1, 'LUMI': lumi }, transaction=False) insertStreamDAO = daoFactory(classname="RunConfig.InsertStream") insertStreamDAO.execute(binds={'STREAM': "Express"}, transaction=False) insertStreamFilesetDAO = daoFactory( classname="RunConfig.InsertStreamFileset") insertStreamFilesetDAO.execute(1, "Express", "TestFileset1") self.fileset1 = Fileset(name="TestFileset1") self.fileset1.load() workflow1 = Workflow(spec="spec.xml", owner="hufnagel", name="TestWorkflow1", task="Test") workflow1.create() self.subscription1 = Subscription(fileset=self.fileset1, workflow=workflow1, split_algo="Express", type="Express") self.subscription1.create() # keep for later self.insertClosedLumiDAO = daoFactory( classname="RunLumiCloseout.InsertClosedLumi") self.releaseExpressDAO = daoFactory( classname="Tier0Feeder.ReleaseExpress") self.currentTime = int(time.time()) return def tearDown(self): """ _tearDown_ """ self.testInit.clearDatabase() return def finalCloseLumis(self): """ _finalCloseLumis_ """ myThread = threading.currentThread() myThread.dbi.processData("""UPDATE lumi_section_closed SET close_time = 1 """, transaction=False) return def getNumActiveSplitLumis(self): """ _getNumActiveSplitLumis_ helper function that counts the number of active split lumis """ myThread = threading.currentThread() results = myThread.dbi.processData("""SELECT COUNT(*) FROM lumi_section_split_active """, transaction=False)[0].fetchall() return results[0][0] def test00(self): """ _test00_ Test that the job name prefix feature works Test event threshold (single job creation) Test that only closed lumis are used Test check on express release """ insertClosedLumiBinds = [] for lumi in [1]: filecount = 2 for i in range(filecount): newFile = File(makeUUID(), size=1000, events=100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave=False) newFile.create() self.fileset1.addFile(newFile) insertClosedLumiBinds.append({ 'RUN': 1, 'LUMI': lumi, 'STREAM': "Express", 'FILECOUNT': filecount, 'INSERT_TIME': self.currentTime, 'CLOSE_TIME': 0 }) self.fileset1.commit() jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription1) jobGroups = jobFactory(maxInputEvents=200) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") self.insertClosedLumiDAO.execute(binds=insertClosedLumiBinds, transaction=False) jobGroups = jobFactory(maxInputEvents=200) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") self.finalCloseLumis() jobGroups = jobFactory(maxInputEvents=200) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") self.releaseExpressDAO.execute(binds={'RUN': 1}, transaction=False) jobGroups = jobFactory(maxInputEvents=200) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create a single job") job = jobGroups[0].jobs[0] self.assertTrue(job['name'].startswith("Express-"), "ERROR: Job has wrong name") self.assertEqual(self.getNumActiveSplitLumis(), 0, "ERROR: Split lumis were created") return def test01(self): """ _test01_ Test event threshold (multiple job creation) """ insertClosedLumiBinds = [] for lumi in [1]: filecount = 2 for i in range(filecount): newFile = File(makeUUID(), size=1000, events=100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave=False) newFile.create() self.fileset1.addFile(newFile) insertClosedLumiBinds.append({ 'RUN': 1, 'LUMI': lumi, 'STREAM': "Express", 'FILECOUNT': filecount, 'INSERT_TIME': self.currentTime, 'CLOSE_TIME': self.currentTime }) self.fileset1.commit() jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription1) self.insertClosedLumiDAO.execute(binds=insertClosedLumiBinds, transaction=False) self.releaseExpressDAO.execute(binds={'RUN': 1}, transaction=False) jobGroups = jobFactory(maxInputEvents=199) self.assertEqual(len(jobGroups[0].jobs), 2, "ERROR: JobFactory didn't create two jobs") self.assertEqual(self.getNumActiveSplitLumis(), 1, "ERROR: Didn't create a single split lumi") return def test02(self): """ _test02_ Test multi lumis """ insertClosedLumiBinds = [] for lumi in [1, 2]: filecount = 1 for i in range(filecount): newFile = File(makeUUID(), size=1000, events=100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave=False) newFile.create() self.fileset1.addFile(newFile) insertClosedLumiBinds.append({ 'RUN': 1, 'LUMI': lumi, 'STREAM': "Express", 'FILECOUNT': filecount, 'INSERT_TIME': self.currentTime, 'CLOSE_TIME': self.currentTime }) self.fileset1.commit() jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription1) self.insertClosedLumiDAO.execute(binds=insertClosedLumiBinds, transaction=False) self.releaseExpressDAO.execute(binds={'RUN': 1}, transaction=False) jobGroups = jobFactory(maxInputEvents=100) self.assertEqual(len(jobGroups[0].jobs), 2, "ERROR: JobFactory didn't create two jobs") self.assertEqual(self.getNumActiveSplitLumis(), 0, "ERROR: Split lumis were created") return
class JobGroup(WMBSBase, WMJobGroup): """ A group (set) of Jobs """ def __init__(self, subscription=None, jobs=None, id=-1, uid=None, location=None): WMBSBase.__init__(self) WMJobGroup.__init__(self, subscription=subscription, jobs=jobs) self.id = id self.lastUpdate = None self.uid = uid if location != None: self.setSite(location) return def create(self): """ Add the new jobgroup to WMBS, create the output Fileset object """ myThread = threading.currentThread() existingTransaction = self.beginTransaction() #overwrite base class self.output for WMBS fileset self.output = Fileset(name=makeUUID()) self.output.create() if self.uid == None: self.uid = makeUUID() action = self.daofactory(classname="JobGroup.New") action.execute(self.uid, self.subscription["id"], self.output.id, conn=self.getDBConn(), transaction=self.existingTransaction()) self.id = self.exists() self.commitTransaction(existingTransaction) return def delete(self): """ Remove a jobgroup from WMBS """ deleteAction = self.daofactory(classname="JobGroup.Delete") deleteAction.execute(id=self.id, conn=self.getDBConn(), transaction=self.existingTransaction()) return def exists(self): """ Does a jobgroup exist with id if id is not provided, use the uid, return the id """ if self.id != -1: action = self.daofactory(classname="JobGroup.ExistsByID") result = action.execute(id=self.id, conn=self.getDBConn(), transaction=self.existingTransaction()) else: action = self.daofactory(classname="JobGroup.Exists") result = action.execute(uid=self.uid, conn=self.getDBConn(), transaction=self.existingTransaction()) return result def load(self): """ _load_ Load all meta data associated with the JobGroup. This includes the JobGroup id, uid, last_update time, subscription id and output fileset id. Either the JobGroup id or uid must be specified for this to work. """ existingTransaction = self.beginTransaction() if self.id > 0: loadAction = self.daofactory(classname="JobGroup.LoadFromID") result = loadAction.execute(self.id, conn=self.getDBConn(), transaction=self.existingTransaction()) else: loadAction = self.daofactory(classname="JobGroup.LoadFromUID") result = loadAction.execute(self.uid, conn=self.getDBConn(), transaction=self.existingTransaction()) self.id = result["id"] self.uid = result["uid"] self.lastUpdate = result["last_update"] self.subscription = Subscription(id=result["subscription"]) self.subscription.load() self.output = Fileset(id=result["output"]) self.output.load() self.jobs = [] self.commitTransaction(existingTransaction) return def loadData(self): """ _loadData_ Load all data that is associated with the jobgroup. This includes loading all the subscription information, the output fileset information and all the jobs that are associated with the group. """ existingTransaction = self.beginTransaction() if self.id < 0 or self.uid == None: self.load() self.subscription.loadData() self.output.loadData() loadAction = self.daofactory(classname="JobGroup.LoadJobs") result = loadAction.execute(self.id, conn=self.getDBConn(), transaction=self.existingTransaction()) self.jobs = [] self.newjobs = [] for jobID in result: newJob = Job(id=jobID["id"]) newJob.loadData() self.add(newJob) WMJobGroup.commit(self) self.commitTransaction(existingTransaction) return def commit(self): """ _commit_ Write any new jobs to the database, creating them in the database if necessary. """ existingTransaction = self.beginTransaction() if self.id == -1: self.create() for j in self.newjobs: j.create(group=self) WMJobGroup.commit(self) self.commitTransaction(existingTransaction) return def setSite(self, site_name=None): """ Updates the jobGroup with a site_name from the wmbs_location table """ if not self.exists(): return action = self.daofactory(classname="JobGroup.SetSite") result = action.execute(site_name=site_name, jobGroupID=self.id, conn=self.getDBConn(), transaction=self.existingTransaction()) return result def getSite(self): """ Updates the jobGroup with a site_name from the wmbs_location table """ if not self.exists(): return action = self.daofactory(classname="JobGroup.GetSite") result = action.execute(jobGroupID=self.id, conn=self.getDBConn(), transaction=self.existingTransaction()) return result def listJobIDs(self): """ Returns a list of job IDs Useful for times when threading the loading of jobGroups, where running loadData can overload UUID """ existingTransaction = self.beginTransaction() if self.id < 0 or self.uid == None: self.load() loadAction = self.daofactory(classname="JobGroup.LoadJobs") result = loadAction.execute(self.id, conn=self.getDBConn(), transaction=self.existingTransaction()) jobIDList = [] for jobID in result: jobIDList.append(jobID["id"]) self.commitTransaction(existingTransaction) return jobIDList def commitBulk(self): """ Creates jobs in a group instead of singly, as is done in jobGroup.commit() """ myThread = threading.currentThread() if self.id == -1: myThread.transaction.begin() #existingTransaction = self.beginTransaction() self.create() #self.commitTransaction(existingTransaction) myThread.transaction.commit() existingTransaction = self.beginTransaction() listOfJobs = [] for job in self.newjobs: #First do all the header stuff if job["id"] != None: continue job["jobgroup"] = self.id if job["name"] == None: job["name"] = makeUUID() listOfJobs.append(job) bulkAction = self.daofactory(classname="Jobs.New") result = bulkAction.execute(jobList=listOfJobs) #Use the results of the bulk commit to get the jobIDs fileDict = {} for job in listOfJobs: job['id'] = result[job['name']] fileDict[job['id']] = [] for file in job['input_files']: fileDict[job['id']].append(file['id']) maskAction = self.daofactory(classname="Masks.New") maskAction.execute(jobList = listOfJobs, conn = self.getDBConn(), \ transaction = self.existingTransaction()) fileAction = self.daofactory(classname="Jobs.AddFiles") fileAction.execute(jobDict = fileDict, conn = self.getDBConn(), \ transaction = self.existingTransaction()) WMJobGroup.commit(self) self.commitTransaction(existingTransaction) return def getLocationsForJobs(self): """ Gets a list of the locations that jobs can run at """ if not self.exists(): return action = self.daofactory(classname="JobGroup.GetLocationsForJobs") result = action.execute(id=self.id, conn=self.getDBConn(), transaction=self.existingTransaction()) return result def __str__(self): """ __str__ Print out some information about the jobGroup as if jobGroup inherited from dict() """ d = { 'id': self.id, 'uid': self.uid, 'subscription': self.subscription, 'output': self.output, 'jobs': self.jobs, 'newjobs': self.newjobs } return str(d)
def setupPromptRecoWorkflow(self): """ _setupPromptRecoWorkflow_ Populate WMBS with a real PromptReco workflow, every subscription must be unfinished at first """ # Populate disk and WMBS testArguments = PromptRecoWorkloadFactory.getTestArguments() workflowName = 'PromptReco_Run195360_Cosmics' factory = PromptRecoWorkloadFactory() testArguments["EnableHarvesting"] = True testArguments["CouchURL"] = os.environ["COUCHURL"] workload = factory.factoryWorkloadConstruction(workflowName, testArguments) wmbsHelper = WMBSHelper(workload, 'Reco', 'SomeBlock', cachepath=self.testDir) wmbsHelper.createTopLevelFileset() wmbsHelper._createSubscriptionsInWMBS(wmbsHelper.topLevelTask, wmbsHelper.topLevelFileset) self.stateMap = {'AlcaSkim': [], 'Merge': [], 'Harvesting': [], 'Processing Done': []} self.orderedStates = ['AlcaSkim', 'Merge', 'Harvesting', 'Processing Done'] # Populate WMStats self.requestDBWriter.insertGenericRequest({'RequestName': workflowName}) self.requestDBWriter.updateRequestStatus(workflowName, 'Closed') topLevelTask = '/%s/Reco' % workflowName alcaSkimTask = '%s/AlcaSkim' % topLevelTask mergeTasks = ['%s/AlcaSkim/AlcaSkimMergeALCARECOStreamHcalCalHOCosmics', '%s/AlcaSkim/AlcaSkimMergeALCARECOStreamTkAlCosmics0T', '%s/AlcaSkim/AlcaSkimMergeALCARECOStreamMuAlGlobalCosmics', '%s/RecoMergewrite_AOD', '%s/RecoMergewrite_DQM', '%s/RecoMergewrite_RECO'] harvestingTask = '%s/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged' % topLevelTask self.stateMap['AlcaSkim'].append(wmbsHelper.topLevelSubscription) alcaSkimWorkflow = Workflow(name=workflowName, task=alcaSkimTask) alcaSkimWorkflow.load() alcarecoFileset = Fileset(name='/PromptReco_Run195360_Cosmics/Reco/unmerged-write_ALCARECOALCARECO') alcarecoFileset.load() alcaSkimSub = Subscription(alcarecoFileset, alcaSkimWorkflow) alcaSkimSub.load() self.stateMap['Merge'].append(alcaSkimSub) for task in mergeTasks: mergeTask = task % topLevelTask mergeWorkflow = Workflow(name=workflowName, task=mergeTask) mergeWorkflow.load() if 'AlcaSkim' in mergeTask: stream = mergeTask.split('/')[-1][13:] unmergedFileset = Fileset(name='%s/unmerged-%sALCARECO' % (alcaSkimTask, stream)) unmergedFileset.load() else: dataTier = mergeTask.split('/')[-1].split('_')[-1] unmergedFileset = Fileset(name='%s/unmerged-write_%s%s' % (topLevelTask, dataTier, dataTier)) unmergedFileset.load() mergeSub = Subscription(unmergedFileset, mergeWorkflow) mergeSub.load() self.stateMap['Harvesting'].append(mergeSub) harvestingWorkflow = Workflow(name=workflowName, task=harvestingTask) harvestingWorkflow.load() harvestingFileset = Fileset(name='/PromptReco_Run195360_Cosmics/Reco/RecoMergewrite_DQM/merged-MergedDQM') harvestingFileset.load() harvestingSub = Subscription(harvestingFileset, harvestingWorkflow) harvestingSub.load() self.stateMap['Processing Done'].append(harvestingSub) return
def __call__(self, parameters): """ Perform the work required with the given parameters """ DefaultSlave.__call__(self, parameters) # Handle the message message = self.messageArgs # Lock on the running feeders list myThread = threading.currentThread() myThread.runningFeedersLock.acquire() # Create empty fileset if fileset.name doesn't exist filesetName = message["dataset"] feederType = message["FeederType"] fileType = message["FileType"] startRun = message["StartRun"] logging.debug("Dataset " + filesetName + " arrived") fileset = Fileset(name = filesetName+':'\ +feederType+':'+fileType+':'+startRun) # Check if the fileset is already there if fileset.exists() == False: # Empty fileset creation fileset.create() fileset.setLastUpdate(0) logging.info("Fileset %s whith id %s is added" \ %(fileset.name, str(fileset.id))) # Get feeder type feederType = message["FeederType"] # Check if there is a running feeder if feederType in myThread.runningFeeders: logging.info("HAVE FEEDER " + feederType + " RUNNING") logging.info(myThread.runningFeeders[feederType]) else: logging.info("NO FEEDER " + feederType + " RUNNING") # Check if we have a feeder in DB if self.queries.checkFeeder(feederType): # Have feeder, get info logging.info("Getting Feeder from DB") feederId = self.queries.getFeederId(feederType) logging.info(feederId) myThread.runningFeeders[feederType] = feederId else: # Create feeder logging.info("Adding Feeder to DB") self.queries.addFeeder(feederType, "StatePath") feederId = self.queries.getFeederId(feederType) logging.info(feederId) myThread.runningFeeders[feederType] = feederId # Fileset/Feeder association self.queries.addFilesetToManage(fileset.id, \ myThread.runningFeeders[feederType]) logging.info("Fileset %s is added to feeder %s" %(fileset.id, \ myThread.runningFeeders[feederType])) else: # If fileset already exist a new subscription # will be created for its workflow logging.info("Fileset exists: Subscription will be created for it") # Open it if close fileset.load() if fileset.open == False: fileset.markOpen(True) logging.info("Getting Feeder from DB") feederId = self.queries.getFeederId(feederType) logging.info(feederId) myThread.runningFeeders[feederType] = feederId self.queries.addFilesetToManage(fileset.id, \ myThread.runningFeeders[feederType]) logging.info("Fileset %s is added to feeder %s" %(fileset.id, \ myThread.runningFeeders[feederType])) myThread.runningFeedersLock.release() myThread.msgService.finish()
class ExpressTest(unittest.TestCase): """ _ExpressTest_ Test for Express job splitter """ def setUp(self): """ _setUp_ """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules = ["WMComponent.DBS3Buffer", "T0.WMBS"]) self.splitterFactory = SplitterFactory(package = "T0.JobSplitting") myThread = threading.currentThread() daoFactory = DAOFactory(package = "T0.WMBS", logger = logging, dbinterface = myThread.dbi) myThread.dbi.processData("""INSERT INTO wmbs_location (id, site_name, state, state_time) VALUES (1, 'SomeSite', 1, 1) """, transaction = False) myThread.dbi.processData("""INSERT INTO wmbs_pnns (id, pnn) VALUES (2, 'SomePNN') """, transaction = False) myThread.dbi.processData("""INSERT INTO wmbs_location_pnns (location, pnn) VALUES (1, 2) """, transaction = False) insertRunDAO = daoFactory(classname = "RunConfig.InsertRun") insertRunDAO.execute(binds = { 'RUN' : 1, 'HLTKEY' : "someHLTKey" }, transaction = False) insertLumiDAO = daoFactory(classname = "RunConfig.InsertLumiSection") for lumi in [1, 2]: insertLumiDAO.execute(binds = { 'RUN' : 1, 'LUMI' : lumi }, transaction = False) insertStreamDAO = daoFactory(classname = "RunConfig.InsertStream") insertStreamDAO.execute(binds = { 'STREAM' : "Express" }, transaction = False) insertStreamFilesetDAO = daoFactory(classname = "RunConfig.InsertStreamFileset") insertStreamFilesetDAO.execute(1, "Express", "TestFileset1") self.fileset1 = Fileset(name = "TestFileset1") self.fileset1.load() workflow1 = Workflow(spec = "spec.xml", owner = "hufnagel", name = "TestWorkflow1", task="Test") workflow1.create() self.subscription1 = Subscription(fileset = self.fileset1, workflow = workflow1, split_algo = "Express", type = "Express") self.subscription1.create() # keep for later self.insertClosedLumiDAO = daoFactory(classname = "RunLumiCloseout.InsertClosedLumi") self.releaseExpressDAO = daoFactory(classname = "Tier0Feeder.ReleaseExpress") self.currentTime = int(time.time()) return def tearDown(self): """ _tearDown_ """ self.testInit.clearDatabase() return def finalCloseLumis(self): """ _finalCloseLumis_ """ myThread = threading.currentThread() myThread.dbi.processData("""UPDATE lumi_section_closed SET close_time = 1 """, transaction = False) return def getNumActiveSplitLumis(self): """ _getNumActiveSplitLumis_ helper function that counts the number of active split lumis """ myThread = threading.currentThread() results = myThread.dbi.processData("""SELECT COUNT(*) FROM lumi_section_split_active """, transaction = False)[0].fetchall() return results[0][0] def test00(self): """ _test00_ Test that the job name prefix feature works Test event threshold (single job creation) Test that only closed lumis are used Test check on express release """ insertClosedLumiBinds = [] for lumi in [1]: filecount = 2 for i in range(filecount): newFile = File(makeUUID(), size = 1000, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset1.addFile(newFile) insertClosedLumiBinds.append( { 'RUN' : 1, 'LUMI' : lumi, 'STREAM' : "Express", 'FILECOUNT' : filecount, 'INSERT_TIME' : self.currentTime, 'CLOSE_TIME' : 0 } ) self.fileset1.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription1) jobGroups = jobFactory(maxInputEvents = 200, maxInputRate = 23000) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") self.insertClosedLumiDAO.execute(binds = insertClosedLumiBinds, transaction = False) jobGroups = jobFactory(maxInputEvents = 200, maxInputRate = 23000) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") self.finalCloseLumis() jobGroups = jobFactory(maxInputEvents = 200, maxInputRate = 23000) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") self.releaseExpressDAO.execute(binds = { 'RUN' : 1 }, transaction = False) jobGroups = jobFactory(maxInputEvents = 200, maxInputRate = 23000) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create a single job") job = jobGroups[0].jobs[0] self.assertTrue(job['name'].startswith("Express-"), "ERROR: Job has wrong name") self.assertEqual(self.getNumActiveSplitLumis(), 0, "ERROR: Split lumis were created") return def test01(self): """ _test01_ Test event threshold (multiple job creation) """ insertClosedLumiBinds = [] for lumi in [1]: filecount = 2 for i in range(filecount): newFile = File(makeUUID(), size = 1000, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset1.addFile(newFile) insertClosedLumiBinds.append( { 'RUN' : 1, 'LUMI' : lumi, 'STREAM' : "Express", 'FILECOUNT' : filecount, 'INSERT_TIME' : self.currentTime, 'CLOSE_TIME' : self.currentTime } ) self.fileset1.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription1) self.insertClosedLumiDAO.execute(binds = insertClosedLumiBinds, transaction = False) self.releaseExpressDAO.execute(binds = { 'RUN' : 1 }, transaction = False) jobGroups = jobFactory(maxInputEvents = 199, maxInputRate = 23000) self.assertEqual(len(jobGroups[0].jobs), 2, "ERROR: JobFactory didn't create two jobs") self.assertEqual(self.getNumActiveSplitLumis(), 1, "ERROR: Didn't create a single split lumi") return def test02(self): """ _test02_ Test multi lumis """ insertClosedLumiBinds = [] for lumi in [1, 2]: filecount = 1 for i in range(filecount): newFile = File(makeUUID(), size = 1000, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset1.addFile(newFile) insertClosedLumiBinds.append( { 'RUN' : 1, 'LUMI' : lumi, 'STREAM' : "Express", 'FILECOUNT' : filecount, 'INSERT_TIME' : self.currentTime, 'CLOSE_TIME' : self.currentTime } ) self.fileset1.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription1) self.insertClosedLumiDAO.execute(binds = insertClosedLumiBinds, transaction = False) self.releaseExpressDAO.execute(binds = { 'RUN' : 1 }, transaction = False) jobGroups = jobFactory(maxInputEvents = 100, maxInputRate = 23000) self.assertEqual(len(jobGroups[0].jobs), 2, "ERROR: JobFactory didn't create two jobs") self.assertEqual(self.getNumActiveSplitLumis(), 0, "ERROR: Split lumis were created") return
def databaseWork(self): """ Queries DB for all watched filesets, if a filesets matches become available, create the subscriptions """ # Get all watched workflows availableWorkflows = self.getUnsubscribedWorkflows.execute() logging.debug("Found %s unsubscribed managed workflows" \ % len(availableWorkflows)) # Get all filesets to check if they match a wrokflow availableFilesets = self.getAllFilesets.execute() logging.debug("Found %s filesets" % len(availableFilesets)) # Loop on unsubscribed workflows to match filesets for managedWorkflow in availableWorkflows: # Workflow object cache to pass into Subscription constructor wfObj = None for fileset in availableFilesets: # Fileset object cache fsObj = None # Load the location information #whitelist = Set() #blacklist = Set() # Location is only caf #locations = self.queries.getLocations(managedWorkflow['id']) #for location in locations: # if bool(int(location['valid'])) == True: # whitelist.add(location['site_name']) # else: # blacklist.add(location['site_name']) # Attempt to match workflows to filesets if re.match(managedWorkflow['fileset_match'], fileset['name']): # Log in debug msg = "Creating subscription for %s to workflow id %s" msg %= (fileset['name'], managedWorkflow['workflow']) logging.debug(msg) # Match found - Load the fileset if not already loaded if not fsObj: fsObj = Fileset(id=fileset['id']) fsObj.load() # Load the workflow if not already loaded if not wfObj: wfObj = Workflow(id=managedWorkflow['workflow']) wfObj.load() # Create the subscription newSub = Subscription(fileset = fsObj, \ workflow = wfObj, \ #whitelist = whitelist, \ #blacklist = blacklist, \ split_algo = managedWorkflow['split_algo'], type = managedWorkflow['type']) newSub.create() managedWorkflows = self.getManagedWorkflows.execute() logging.debug("Found %s managed workflows" \ % len(managedWorkflows)) unsubscribedFilesets = self.getUnsubscribedFilesets.execute() logging.debug("Found %s unsubscribed filesets" % \ len(unsubscribedFilesets)) # Loop on unsubscribed filesets to match workflows for unsubscribedFileset in unsubscribedFilesets: # Workflow object cache to pass into Subscription constructor # FIXME wfObj = None for managedWork in managedWorkflows: logging.debug("The workflow %s" % managedWork['workflow']) # Fileset object cache wfObj = None fsObj = None # Load the location information #whitelist = Set() #blacklist = Set() # Location is only caf #locations = self.queries.getLocations(managedWorkflow['id']) #for location in locations: # if bool(int(location['valid'])) == True: # whitelist.add(location['site_name']) # else: # blacklist.add(location['site_name']) # Attempt to match workflows to filesets if re.match(managedWork['fileset_match'], \ unsubscribedFileset['name']): # Log in debug msg = "Creating subscription for %s to workflow id %s" msg %= (unsubscribedFileset['name'], \ managedWork['workflow']) logging.debug(msg) # Match found - Load the fileset if not already loaded if not fsObj: fsObj = Fileset(id=unsubscribedFileset['id']) fsObj.load() # Load the workflow if not already loaded if not wfObj: wfObj = Workflow(id=managedWork['workflow']) wfObj.load() # Create the subscription newSub = Subscription(fileset = fsObj, \ workflow = wfObj, \ #whitelist = whitelist, \ #blacklist = blacklist, \ split_algo = managedWork['split_algo'], type = managedWork['type']) newSub.create() newSub.load()
def setUp(self): """ _setUp_ """ import WMQuality.TestInit WMQuality.TestInit.deleteDatabaseAfterEveryTest("I'm Serious") self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema( customModules=["WMComponent.DBS3Buffer", "T0.WMBS"]) self.splitterFactory = SplitterFactory(package="T0.JobSplitting") myThread = threading.currentThread() daoFactory = DAOFactory(package="T0.WMBS", logger=logging, dbinterface=myThread.dbi) myThread.dbi.processData("""INSERT INTO wmbs_location (id, site_name, state, state_time) VALUES (1, 'SomeSite', 1, 1) """, transaction=False) myThread.dbi.processData("""INSERT INTO wmbs_pnns (id, pnn) VALUES (2, 'SomePNN') """, transaction=False) myThread.dbi.processData("""INSERT INTO wmbs_location_pnns (location, pnn) VALUES (1, 2) """, transaction=False) insertRunDAO = daoFactory(classname="RunConfig.InsertRun") insertRunDAO.execute(binds={ 'RUN': 1, 'HLTKEY': "someHLTKey" }, transaction=False) insertLumiDAO = daoFactory(classname="RunConfig.InsertLumiSection") for lumi in range(1, 5): insertLumiDAO.execute(binds={ 'RUN': 1, 'LUMI': lumi }, transaction=False) insertStreamDAO = daoFactory(classname="RunConfig.InsertStream") insertStreamDAO.execute(binds={'STREAM': "Express"}, transaction=False) insertStreamFilesetDAO = daoFactory( classname="RunConfig.InsertStreamFileset") insertStreamFilesetDAO.execute(1, "Express", "TestFileset1") fileset1 = Fileset(name="TestFileset1") self.fileset2 = Fileset(name="TestFileset2") fileset1.load() self.fileset2.create() workflow1 = Workflow(spec="spec.xml", owner="hufnagel", name="TestWorkflow1", task="Test") workflow2 = Workflow(spec="spec.xml", owner="hufnagel", name="TestWorkflow2", task="Test") workflow1.create() workflow2.create() self.subscription1 = Subscription(fileset=fileset1, workflow=workflow1, split_algo="Express", type="Express") self.subscription2 = Subscription(fileset=self.fileset2, workflow=workflow2, split_algo="ExpressMerge", type="ExpressMerge") self.subscription1.create() self.subscription2.create() myThread.dbi.processData("""INSERT INTO wmbs_workflow_output (WORKFLOW_ID, OUTPUT_IDENTIFIER, OUTPUT_FILESET) VALUES (%d, 'SOMEOUTPUT', %d) """ % (workflow1.id, self.fileset2.id), transaction=False) # keep for later self.insertSplitLumisDAO = daoFactory( classname="JobSplitting.InsertSplitLumis") # default split parameters self.splitArgs = {} self.splitArgs['maxInputSize'] = 2 * 1024 * 1024 * 1024 self.splitArgs['maxInputFiles'] = 500, self.splitArgs['maxLatency'] = 15 * 23 return
def setUp(self): """ _setUp_ """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules = ["T0.WMBS"]) self.splitterFactory = SplitterFactory(package = "T0.JobSplitting") myThread = threading.currentThread() daoFactory = DAOFactory(package = "T0.WMBS", logger = logging, dbinterface = myThread.dbi) myThread.dbi.processData("""INSERT INTO wmbs_location (id, site_name, state) VALUES (1, 'SomeSite', 1) """, transaction = False) myThread.dbi.processData("""INSERT INTO wmbs_location_pnns (location, pnn) VALUES (1, 'SomePNN') """, transaction = False) myThread.dbi.processData("""INSERT INTO wmbs_location_pnns (location, pnn) VALUES (1, 'SomePNN2') """, transaction = False) insertRunDAO = daoFactory(classname = "RunConfig.InsertRun") insertRunDAO.execute(binds = { 'RUN' : 1, 'TIME' : int(time.time()), 'HLTKEY' : "someHLTKey" }, transaction = False) insertLumiDAO = daoFactory(classname = "RunConfig.InsertLumiSection") for lumi in range(1, 5): insertLumiDAO.execute(binds = { 'RUN' : 1, 'LUMI' : lumi }, transaction = False) insertStreamDAO = daoFactory(classname = "RunConfig.InsertStream") insertStreamDAO.execute(binds = { 'STREAM' : "Express" }, transaction = False) insertStreamFilesetDAO = daoFactory(classname = "RunConfig.InsertStreamFileset") insertStreamFilesetDAO.execute(1, "Express", "TestFileset1") fileset1 = Fileset(name = "TestFileset1") self.fileset2 = Fileset(name = "TestFileset2") fileset1.load() self.fileset2.create() workflow1 = Workflow(spec = "spec.xml", owner = "hufnagel", name = "TestWorkflow1", task="Test") workflow2 = Workflow(spec = "spec.xml", owner = "hufnagel", name = "TestWorkflow2", task="Test") workflow1.create() workflow2.create() self.subscription1 = Subscription(fileset = fileset1, workflow = workflow1, split_algo = "Express", type = "Express") self.subscription2 = Subscription(fileset = self.fileset2, workflow = workflow2, split_algo = "ExpressMerge", type = "ExpressMerge") self.subscription1.create() self.subscription2.create() myThread.dbi.processData("""INSERT INTO wmbs_workflow_output (WORKFLOW_ID, OUTPUT_IDENTIFIER, OUTPUT_FILESET) VALUES (%d, 'SOMEOUTPUT', %d) """ % (workflow1.id, self.fileset2.id), transaction = False) # keep for later self.insertSplitLumisDAO = daoFactory(classname = "JobSplitting.InsertSplitLumis") # default split parameters self.splitArgs = {} self.splitArgs['maxInputSize'] = 2 * 1024 * 1024 * 1024 self.splitArgs['maxInputFiles'] = 500, self.splitArgs['maxLatency'] = 15 * 23 return
class RepackTest(unittest.TestCase): """ _RepackTest_ Test for Repack job splitter """ def setUp(self): """ _setUp_ """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules=["T0.WMBS"]) self.splitterFactory = SplitterFactory(package="T0.JobSplitting") myThread = threading.currentThread() daoFactory = DAOFactory(package="T0.WMBS", logger=logging, dbinterface=myThread.dbi) myThread.dbi.processData("""INSERT INTO wmbs_location (id, site_name, state) VALUES (1, 'SomeSite', 1) """, transaction=False) myThread.dbi.processData("""INSERT INTO wmbs_location_senames (location, se_name) VALUES (1, 'SomeSE') """, transaction=False) myThread.dbi.processData("""INSERT INTO wmbs_location_senames (location, se_name) VALUES (1, 'SomeSE2') """, transaction=False) insertRunDAO = daoFactory(classname="RunConfig.InsertRun") insertRunDAO.execute(binds={ 'RUN': 1, 'TIME': int(time.time()), 'HLTKEY': "someHLTKey" }, transaction=False) insertLumiDAO = daoFactory(classname="RunConfig.InsertLumiSection") for lumi in [1, 2, 3, 4]: insertLumiDAO.execute(binds={ 'RUN': 1, 'LUMI': lumi }, transaction=False) insertStreamDAO = daoFactory(classname="RunConfig.InsertStream") insertStreamDAO.execute(binds={'STREAM': "A"}, transaction=False) insertStreamFilesetDAO = daoFactory( classname="RunConfig.InsertStreamFileset") insertStreamFilesetDAO.execute(1, "A", "TestFileset1") self.fileset1 = Fileset(name="TestFileset1") self.fileset1.load() workflow1 = Workflow(spec="spec.xml", owner="hufnagel", name="TestWorkflow1", task="Test") workflow1.create() self.subscription1 = Subscription(fileset=self.fileset1, workflow=workflow1, split_algo="Repack", type="Repack") self.subscription1.create() # keep for later self.insertClosedLumiDAO = daoFactory( classname="RunLumiCloseout.InsertClosedLumi") self.currentTime = int(time.time()) # default split parameters self.splitArgs = {} self.splitArgs['maxSizeSingleLumi'] = 20 * 1024 * 1024 * 1024 self.splitArgs['maxSizeMultiLumi'] = 10 * 1024 * 1024 * 1024 self.splitArgs['maxInputEvents'] = 500000 self.splitArgs['maxInputFiles'] = 1000 return def tearDown(self): """ _tearDown_ """ self.testInit.clearDatabase() return def getNumActiveSplitLumis(self): """ _getNumActiveSplitLumis_ helper function that counts the number of active split lumis """ myThread = threading.currentThread() results = myThread.dbi.processData("""SELECT COUNT(*) FROM lumi_section_split_active """, transaction=False)[0].fetchall() return results[0][0] def test00(self): """ _test00_ Test that the job name prefix feature works Test multi lumi size threshold Multi lumi input """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2, 3, 4]: filecount = 2 for i in range(filecount): newFile = File(makeUUID(), size=1000, events=100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomeSE", immediateSave=False) newFile.create() self.fileset1.addFile(newFile) self.fileset1.commit() jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription1) mySplitArgs['maxSizeMultiLumi'] = self.splitArgs['maxSizeMultiLumi'] jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxSizeMultiLumi'] = 5000 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create a single job") job = jobGroups[0].jobs[0] self.assertTrue(job['name'].startswith("Repack-"), "ERROR: Job has wrong name") self.assertEqual(len(job.getFiles()), 4, "ERROR: Job does not process 4 files") self.fileset1.markOpen(False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create a single job") job = jobGroups[0].jobs[0] self.assertTrue(job['name'].startswith("Repack-"), "ERROR: Job has wrong name") self.assertEqual(len(job.getFiles()), 4, "ERROR: Job does not process 4 files") self.assertEqual(self.getNumActiveSplitLumis(), 0, "ERROR: Split lumis were created") return def test01(self): """ _test01_ Test multi lumi event threshold Multi lumi input """ mySplitArgs = self.splitArgs.copy() insertClosedLumiBinds = [] for lumi in [1, 2, 3, 4]: filecount = 2 for i in range(filecount): newFile = File(makeUUID(), size=1000, events=100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomeSE", immediateSave=False) newFile.create() self.fileset1.addFile(newFile) insertClosedLumiBinds.append({ 'RUN': 1, 'LUMI': lumi, 'STREAM': "A", 'FILECOUNT': filecount, 'INSERT_TIME': self.currentTime, 'CLOSE_TIME': self.currentTime }) self.fileset1.commit() jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription1) self.insertClosedLumiDAO.execute(binds=insertClosedLumiBinds, transaction=False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxInputEvents'] = 500 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create a single job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 4, "ERROR: Job does not process 4 files") self.fileset1.markOpen(False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create a single job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 4, "ERROR: Job does not process 4 files") self.assertEqual(self.getNumActiveSplitLumis(), 0, "ERROR: Split lumis were created") return def test02(self): """ _test02_ Test single lumi size threshold Single lumi input """ mySplitArgs = self.splitArgs.copy() insertClosedLumiBinds = [] for lumi in [1]: filecount = 8 for i in range(filecount): newFile = File(makeUUID(), size=1000, events=100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomeSE", immediateSave=False) newFile.create() self.fileset1.addFile(newFile) insertClosedLumiBinds.append({ 'RUN': 1, 'LUMI': lumi, 'STREAM': "A", 'FILECOUNT': filecount, 'INSERT_TIME': self.currentTime, 'CLOSE_TIME': self.currentTime }) self.fileset1.commit() jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription1) self.insertClosedLumiDAO.execute(binds=insertClosedLumiBinds, transaction=False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxSizeSingleLumi'] = 6500 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 2, "ERROR: JobFactory didn't create two jobs") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 6, "ERROR: Job does not process 6 files") job = jobGroups[0].jobs[1] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") self.assertEqual(self.getNumActiveSplitLumis(), 1, "ERROR: Split lumis were not created") return def test03(self): """ _test03_ Test single lumi event threshold Single lumi input """ mySplitArgs = self.splitArgs.copy() insertClosedLumiBinds = [] for lumi in [1]: filecount = 8 for i in range(filecount): newFile = File(makeUUID(), size=1000, events=100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomeSE", immediateSave=False) newFile.create() self.fileset1.addFile(newFile) insertClosedLumiBinds.append({ 'RUN': 1, 'LUMI': lumi, 'STREAM': "A", 'FILECOUNT': filecount, 'INSERT_TIME': self.currentTime, 'CLOSE_TIME': self.currentTime }) self.fileset1.commit() jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription1) self.insertClosedLumiDAO.execute(binds=insertClosedLumiBinds, transaction=False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxInputEvents'] = 650 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 2, "ERROR: JobFactory didn't create two jobs") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 6, "ERROR: Job does not process 6 files") job = jobGroups[0].jobs[1] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") self.assertEqual(self.getNumActiveSplitLumis(), 1, "ERROR: Split lumis were not created") return def test04(self): """ _test04_ Test streamer count threshold (only multi lumi) Multi lumi input """ mySplitArgs = self.splitArgs.copy() insertClosedLumiBinds = [] for lumi in [1, 2, 3, 4]: filecount = 2 for i in range(filecount): newFile = File(makeUUID(), size=1000, events=100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomeSE", immediateSave=False) newFile.create() self.fileset1.addFile(newFile) insertClosedLumiBinds.append({ 'RUN': 1, 'LUMI': lumi, 'STREAM': "A", 'FILECOUNT': filecount, 'INSERT_TIME': self.currentTime, 'CLOSE_TIME': self.currentTime }) self.fileset1.commit() jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription1) self.insertClosedLumiDAO.execute(binds=insertClosedLumiBinds, transaction=False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxInputFiles'] = 5 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create a single job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 4, "ERROR: Job does not process 4 files") self.fileset1.markOpen(False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create a single job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 4, "ERROR: Job does not process 4 files") self.assertEqual(self.getNumActiveSplitLumis(), 0, "ERROR: Split lumis were created") return def test05(self): """ _test05_ Test repacking of multiple lumis with holes in the lumi sequence Multi lumi input """ mySplitArgs = self.splitArgs.copy() insertClosedLumiBinds = [] for lumi in [1, 2, 4]: filecount = 2 for i in range(filecount): newFile = File(makeUUID(), size=1000, events=100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomeSE", immediateSave=False) newFile.create() self.fileset1.addFile(newFile) insertClosedLumiBinds.append({ 'RUN': 1, 'LUMI': lumi, 'STREAM': "A", 'FILECOUNT': filecount, 'INSERT_TIME': self.currentTime, 'CLOSE_TIME': self.currentTime }) self.fileset1.commit() jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription1) self.insertClosedLumiDAO.execute(binds=insertClosedLumiBinds, transaction=False) mySplitArgs['maxInputFiles'] = 5 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") self.insertClosedLumiDAO.execute(binds={ 'RUN': 1, 'LUMI': 3, 'STREAM': "A", 'FILECOUNT': 0, 'INSERT_TIME': self.currentTime, 'CLOSE_TIME': self.currentTime }, transaction=False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") self.assertEqual(len(jobGroups[0].jobs[0].getFiles()), 4, "ERROR: first job does not process 4 files") return def test06(self): """ _test06_ Test repacking of 3 lumis 2 small lumis (single job), followed by a big one (multiple jobs) files for lumi 1 and 2 are below multi-lumi thresholds files for lumi 3 are above single-lumi threshold """ mySplitArgs = self.splitArgs.copy() insertClosedLumiBinds = [] for lumi in [1, 2, 3]: filecount = 2 for i in range(filecount): if lumi == 3: nevents = 500 else: nevents = 100 newFile = File(makeUUID(), size=1000, events=nevents) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomeSE", immediateSave=False) newFile.create() self.fileset1.addFile(newFile) insertClosedLumiBinds.append({ 'RUN': 1, 'LUMI': lumi, 'STREAM': "A", 'FILECOUNT': filecount, 'INSERT_TIME': self.currentTime, 'CLOSE_TIME': self.currentTime }) self.fileset1.commit() jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription1) self.insertClosedLumiDAO.execute(binds=insertClosedLumiBinds, transaction=False) mySplitArgs['maxInputEvents'] = 900 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 3, "ERROR: JobFactory didn't create three jobs") self.assertEqual(len(jobGroups[0].jobs[0].getFiles()), 4, "ERROR: first job does not process 4 files") self.assertEqual(len(jobGroups[0].jobs[1].getFiles()), 1, "ERROR: second job does not process 1 file") self.assertEqual(len(jobGroups[0].jobs[2].getFiles()), 1, "ERROR: third job does not process 1 file") return
def configureRunStream(tier0Config, run, stream, specDirectory, dqmUploadProxy): """ _configureRunStream_ Called by Tier0Feeder for new run/streams. Retrieve global run settings and build the part of the configuration relevant to run/stream and write it to the database. Create workflows, filesets and subscriptions for the processing of runs/streams. """ logging.debug("configureRunStream() : %d , %s" % (run, stream)) myThread = threading.currentThread() daoFactory = DAOFactory(package = "T0.WMBS", logger = logging, dbinterface = myThread.dbi) # retrieve some basic run information getRunInfoDAO = daoFactory(classname = "RunConfig.GetRunInfo") runInfo = getRunInfoDAO.execute(run, transaction = False)[0] # # treat centralDAQ or miniDAQ runs (have an HLT key) different from local runs # if runInfo['hltkey'] != None: # streams not explicitely configured are repacked if stream not in tier0Config.Streams.dictionary_().keys(): addRepackConfig(tier0Config, stream) streamConfig = tier0Config.Streams.dictionary_()[stream] # write stream/dataset mapping (for special express and error datasets) insertDatasetDAO = daoFactory(classname = "RunConfig.InsertPrimaryDataset") insertStreamDatasetDAO = daoFactory(classname = "RunConfig.InsertStreamDataset") # write stream configuration insertStreamStyleDAO = daoFactory(classname = "RunConfig.InsertStreamStyle") insertRepackConfigDAO = daoFactory(classname = "RunConfig.InsertRepackConfig") insertPromptCalibrationDAO = daoFactory(classname = "RunConfig.InsertPromptCalibration") insertExpressConfigDAO = daoFactory(classname = "RunConfig.InsertExpressConfig") insertSpecialDatasetDAO = daoFactory(classname = "RunConfig.InsertSpecialDataset") insertDatasetScenarioDAO = daoFactory(classname = "RunConfig.InsertDatasetScenario") insertCMSSWVersionDAO = daoFactory(classname = "RunConfig.InsertCMSSWVersion") updateStreamOverrideDAO = daoFactory(classname = "RunConfig.UpdateStreamOverride") insertStreamFilesetDAO = daoFactory(classname = "RunConfig.InsertStreamFileset") insertRecoReleaseConfigDAO = daoFactory(classname = "RunConfig.InsertRecoReleaseConfig") insertWorkflowMonitoringDAO = daoFactory(classname = "RunConfig.InsertWorkflowMonitoring") insertStorageNodeDAO = daoFactory(classname = "RunConfig.InsertStorageNode") insertPhEDExConfigDAO = daoFactory(classname = "RunConfig.InsertPhEDExConfig") bindsDataset = [] bindsStreamDataset = [] bindsStreamStyle = {'RUN' : run, 'STREAM' : stream, 'STYLE': streamConfig.ProcessingStyle } bindsRepackConfig = {} bindsPromptCalibration = {} bindsExpressConfig = {} bindsSpecialDataset = {} bindsDatasetScenario = [] bindsCMSSWVersion = [] bindsStreamOverride = {} bindsStorageNode = [] bindsPhEDExConfig = [] # mark workflows as injected wmbsDaoFactory = DAOFactory(package = "WMCore.WMBS", logger = logging, dbinterface = myThread.dbi) markWorkflowsInjectedDAO = wmbsDaoFactory(classname = "Workflow.MarkInjectedWorkflows") # # for spec creation, details for all outputs # outputModuleDetails = [] # # for PromptReco delay settings # promptRecoDelay = {} promptRecoDelayOffset = {} # # for PhEDEx subscription settings # subscriptions = { 'Express' : [], 'Bulk' : [] } # some hardcoded PhEDEx defaults expressPhEDExInjectNode = "T2_CH_CERN" expressPhEDExSubscribeNode = "T2_CH_CERN" # # first take care of all stream settings # getStreamOnlineVersionDAO = daoFactory(classname = "RunConfig.GetStreamOnlineVersion") onlineVersion = getStreamOnlineVersionDAO.execute(run, stream, transaction = False) if streamConfig.ProcessingStyle == "Bulk": bindsRepackConfig = { 'RUN' : run, 'STREAM' : stream, 'PROC_VER': streamConfig.Repack.ProcessingVersion, 'MAX_SIZE_SINGLE_LUMI' : streamConfig.Repack.MaxSizeSingleLumi, 'MAX_SIZE_MULTI_LUMI' : streamConfig.Repack.MaxSizeMultiLumi, 'MIN_SIZE' : streamConfig.Repack.MinInputSize, 'MAX_SIZE' : streamConfig.Repack.MaxInputSize, 'MAX_EDM_SIZE' : streamConfig.Repack.MaxEdmSize, 'MAX_OVER_SIZE' : streamConfig.Repack.MaxOverSize, 'MAX_EVENTS' : streamConfig.Repack.MaxInputEvents, 'MAX_FILES' : streamConfig.Repack.MaxInputFiles } elif streamConfig.ProcessingStyle == "Express": specialDataset = "Stream%s" % stream bindsDataset.append( { 'PRIMDS' : specialDataset } ) bindsStreamDataset.append( { 'RUN' : run, 'PRIMDS' : specialDataset, 'STREAM' : stream } ) bindsSpecialDataset = { 'STREAM' : stream, 'PRIMDS' : specialDataset } bindsDatasetScenario.append( { 'RUN' : run, 'PRIMDS' : specialDataset, 'SCENARIO' : streamConfig.Express.Scenario } ) if "DQM" in streamConfig.Express.DataTiers: outputModuleDetails.append( { 'dataTier' : "DQM", 'eventContent' : "DQM", 'primaryDataset' : specialDataset } ) bindsStorageNode.append( { 'NODE' : expressPhEDExSubscribeNode } ) bindsPhEDExConfig.append( { 'RUN' : run, 'PRIMDS' : specialDataset, 'NODE' : expressPhEDExSubscribeNode, 'CUSTODIAL' : 1, 'REQ_ONLY' : "n", 'PRIO' : "high" } ) subscriptions['Express'].append( { 'custodialSites' : [], 'nonCustodialSites' : [expressPhEDExSubscribeNode], 'autoApproveSites' : [expressPhEDExSubscribeNode], 'priority' : "high", 'primaryDataset' : specialDataset } ) alcaSkim = None if "ALCARECO" in streamConfig.Express.DataTiers: if len(streamConfig.Express.AlcaSkims) > 0: outputModuleDetails.append( { 'dataTier' : "ALCARECO", 'eventContent' : "ALCARECO", 'primaryDataset' : specialDataset } ) alcaSkim = ",".join(streamConfig.Express.AlcaSkims) if "PromptCalibProd" in streamConfig.Express.AlcaSkims: bindsPromptCalibration = { 'RUN' : run, 'STREAM' : stream } dqmSeq = None if len(streamConfig.Express.DqmSequences) > 0: dqmSeq = ",".join(streamConfig.Express.DqmSequences) bindsExpressConfig = { 'RUN' : run, 'STREAM' : stream, 'PROC_VER' : streamConfig.Express.ProcessingVersion, 'WRITE_TIERS' : ",".join(streamConfig.Express.DataTiers), 'GLOBAL_TAG' : streamConfig.Express.GlobalTag, 'MAX_EVENTS' : streamConfig.Express.MaxInputEvents, 'MAX_SIZE' : streamConfig.Express.MaxInputSize, 'MAX_FILES' : streamConfig.Express.MaxInputFiles, 'MAX_LATENCY' : streamConfig.Express.MaxLatency, 'ALCA_SKIM' : alcaSkim, 'DQM_SEQ' : dqmSeq } overrideVersion = streamConfig.VersionOverride.get(onlineVersion, None) if overrideVersion != None: bindsCMSSWVersion.append( { 'VERSION' : overrideVersion } ) bindsStreamOverride = { "RUN" : run, "STREAM" : stream, "OVERRIDE" : overrideVersion } # # then configure datasets # getStreamDatasetTriggersDAO = daoFactory(classname = "RunConfig.GetStreamDatasetTriggers") datasetTriggers = getStreamDatasetTriggersDAO.execute(run, stream, transaction = False) for dataset, paths in datasetTriggers.items(): if dataset == "Unassigned path": if stream == "Express" and run in [ 210114, 210116, 210120, 210121, 210178 ]: continue datasetConfig = retrieveDatasetConfig(tier0Config, dataset) selectEvents = [] for path in sorted(paths): selectEvents.append("%s:%s" % (path, runInfo['process'])) if streamConfig.ProcessingStyle == "Bulk": promptRecoDelay[datasetConfig.Name] = datasetConfig.RecoDelay promptRecoDelayOffset[datasetConfig.Name] = datasetConfig.RecoDelayOffset outputModuleDetails.append( { 'dataTier' : "RAW", 'eventContent' : "ALL", 'selectEvents' : selectEvents, 'primaryDataset' : dataset } ) custodialSites = [] nonCustodialSites = [] autoApproveSites = [] if datasetConfig.CustodialNode != None: custodialSites.append(datasetConfig.CustodialNode) requestOnly = "y" if datasetConfig.CustodialAutoApprove: requestOnly = "n" autoApproveSites.append(datasetConfig.CustodialNode) bindsStorageNode.append( { 'NODE' : datasetConfig.CustodialNode } ) bindsPhEDExConfig.append( { 'RUN' : run, 'PRIMDS' : dataset, 'NODE' : datasetConfig.CustodialNode, 'CUSTODIAL' : 1, 'REQ_ONLY' : requestOnly, 'PRIO' : datasetConfig.CustodialPriority } ) if datasetConfig.ArchivalNode != None: custodialSites.append(datasetConfig.ArchivalNode) autoApproveSites.append(datasetConfig.ArchivalNode) bindsStorageNode.append( { 'NODE' : datasetConfig.ArchivalNode } ) bindsPhEDExConfig.append( { 'RUN' : run, 'PRIMDS' : dataset, 'NODE' : datasetConfig.ArchivalNode, 'CUSTODIAL' : 1, 'REQ_ONLY' : "n", 'PRIO' : datasetConfig.CustodialPriority } ) if len(custodialSites) + len(nonCustodialSites) > 0: subscriptions['Bulk'].append( { 'custodialSites' : custodialSites, 'nonCustodialSites' : nonCustodialSites, 'autoApproveSites' : autoApproveSites, 'priority' : datasetConfig.CustodialPriority, 'primaryDataset' : dataset } ) elif streamConfig.ProcessingStyle == "Express": for dataTier in streamConfig.Express.DataTiers: if dataTier not in [ "ALCARECO", "DQM" ]: outputModuleDetails.append( { 'dataTier' : dataTier, 'eventContent' : dataTier, 'selectEvents' : selectEvents, 'primaryDataset' : dataset } ) bindsPhEDExConfig.append( { 'RUN' : run, 'PRIMDS' : dataset, 'NODE' : expressPhEDExSubscribeNode, 'CUSTODIAL' : 1, 'REQ_ONLY' : "n", 'PRIO' : "high" } ) subscriptions['Express'].append( { 'custodialSites' : [], 'nonCustodialSites' : [expressPhEDExSubscribeNode], 'autoApproveSites' : [expressPhEDExSubscribeNode], 'priority' : "high", 'primaryDataset' : dataset } ) # # finally create WMSpec # outputs = {} if streamConfig.ProcessingStyle == "Bulk": taskName = "Repack" workflowName = "Repack_Run%d_Stream%s" % (run, stream) specArguments = getRepackArguments() specArguments['ProcessingVersion'] = streamConfig.Repack.ProcessingVersion specArguments['MaxSizeSingleLumi'] = streamConfig.Repack.MaxSizeSingleLumi specArguments['MaxSizeMultiLumi'] = streamConfig.Repack.MaxSizeMultiLumi specArguments['MinInputSize'] = streamConfig.Repack.MinInputSize specArguments['MaxInputSize'] = streamConfig.Repack.MaxInputSize specArguments['MaxEdmSize'] = streamConfig.Repack.MaxEdmSize specArguments['MaxOverSize'] = streamConfig.Repack.MaxOverSize specArguments['MaxInputEvents'] = streamConfig.Repack.MaxInputEvents specArguments['MaxInputFiles'] = streamConfig.Repack.MaxInputFiles specArguments['UnmergedLFNBase'] = "%s/t0temp/%s" % (runInfo['lfn_prefix'], runInfo['bulk_data_type']) specArguments['MergedLFNBase'] = "%s/%s" % (runInfo['lfn_prefix'], runInfo['bulk_data_type']) elif streamConfig.ProcessingStyle == "Express": taskName = "Express" workflowName = "Express_Run%d_Stream%s" % (run, stream) specArguments = getExpressArguments() specArguments['ProcessingString'] = "Express" specArguments['ProcessingVersion'] = streamConfig.Express.ProcessingVersion specArguments['ProcScenario'] = streamConfig.Express.Scenario specArguments['GlobalTag'] = streamConfig.Express.GlobalTag specArguments['GlobalTagTransaction'] = "Express_%d" % run specArguments['MaxInputEvents'] = streamConfig.Express.MaxInputEvents specArguments['MaxInputSize'] = streamConfig.Express.MaxInputSize specArguments['MaxInputFiles'] = streamConfig.Express.MaxInputFiles specArguments['MaxLatency'] = streamConfig.Express.MaxLatency specArguments['AlcaSkims'] = streamConfig.Express.AlcaSkims specArguments['DqmSequences'] = streamConfig.Express.DqmSequences specArguments['UnmergedLFNBase'] = "%s/t0temp/express" % runInfo['lfn_prefix'] specArguments['MergedLFNBase'] = "%s/express" % runInfo['lfn_prefix'] specArguments['AlcaHarvestTimeout'] = runInfo['ah_timeout'] specArguments['AlcaHarvestDir'] = runInfo['ah_dir'] specArguments['DQMUploadProxy'] = dqmUploadProxy specArguments['DQMUploadUrl'] = runInfo['dqmuploadurl'] specArguments['StreamName'] = stream if streamConfig.ProcessingStyle in [ 'Bulk', 'Express' ]: specArguments['RunNumber'] = run specArguments['AcquisitionEra'] = tier0Config.Global.AcquisitionEra specArguments['CMSSWVersion'] = streamConfig.VersionOverride.get(onlineVersion, onlineVersion) specArguments['Outputs'] = outputModuleDetails specArguments['OverrideCatalog'] = "trivialcatalog_file:/afs/cern.ch/cms/SITECONF/T0_CH_CERN/Tier0/override_catalog.xml?protocol=override" specArguments['ValidStatus'] = "VALID" if streamConfig.ProcessingStyle == "Bulk": wmSpec = repackWorkload(workflowName, specArguments) wmSpec.setPhEDExInjectionOverride(runInfo['bulk_data_loc']) for subscription in subscriptions['Bulk']: wmSpec.setSubscriptionInformation(**subscription) elif streamConfig.ProcessingStyle == "Express": wmSpec = expressWorkload(workflowName, specArguments) wmSpec.setPhEDExInjectionOverride(expressPhEDExInjectNode) for subscription in subscriptions['Express']: wmSpec.setSubscriptionInformation(**subscription) if streamConfig.ProcessingStyle in [ 'Bulk', 'Express' ]: wmSpec.setOwnerDetails("*****@*****.**", "T0", { 'vogroup': 'DEFAULT', 'vorole': 'DEFAULT', 'dn' : "*****@*****.**" } ) wmSpec.setupPerformanceMonitoring(maxRSS = 10485760, maxVSize = 10485760, softTimeout = 604800, gracePeriod = 3600) wmbsHelper = WMBSHelper(wmSpec, taskName, cachepath = specDirectory) filesetName = "Run%d_Stream%s" % (run, stream) fileset = Fileset(filesetName) # # create workflow (currently either repack or express) # try: myThread.transaction.begin() if len(bindsDataset) > 0: insertDatasetDAO.execute(bindsDataset, conn = myThread.transaction.conn, transaction = True) if len(bindsStreamDataset) > 0: insertStreamDatasetDAO.execute(bindsStreamDataset, conn = myThread.transaction.conn, transaction = True) if len(bindsRepackConfig) > 0: insertRepackConfigDAO.execute(bindsRepackConfig, conn = myThread.transaction.conn, transaction = True) if len(bindsPromptCalibration) > 0: insertPromptCalibrationDAO.execute(bindsPromptCalibration, conn = myThread.transaction.conn, transaction = True) if len(bindsExpressConfig) > 0: insertExpressConfigDAO.execute(bindsExpressConfig, conn = myThread.transaction.conn, transaction = True) if len(bindsSpecialDataset) > 0: insertSpecialDatasetDAO.execute(bindsSpecialDataset, conn = myThread.transaction.conn, transaction = True) if len(bindsDatasetScenario) > 0: insertDatasetScenarioDAO.execute(bindsDatasetScenario, conn = myThread.transaction.conn, transaction = True) if len(bindsCMSSWVersion) > 0: insertCMSSWVersionDAO.execute(bindsCMSSWVersion, conn = myThread.transaction.conn, transaction = True) if len(bindsStreamOverride) > 0: updateStreamOverrideDAO.execute(bindsStreamOverride, conn = myThread.transaction.conn, transaction = True) if len(bindsStorageNode) > 0: insertStorageNodeDAO.execute(bindsStorageNode, conn = myThread.transaction.conn, transaction = True) if len(bindsPhEDExConfig) > 0: insertPhEDExConfigDAO.execute(bindsPhEDExConfig, conn = myThread.transaction.conn, transaction = True) insertStreamStyleDAO.execute(bindsStreamStyle, conn = myThread.transaction.conn, transaction = True) if streamConfig.ProcessingStyle in [ 'Bulk', 'Express' ]: insertStreamFilesetDAO.execute(run, stream, filesetName, conn = myThread.transaction.conn, transaction = True) fileset.load() wmbsHelper.createSubscription(wmSpec.getTask(taskName), fileset, alternativeFilesetClose = True) insertWorkflowMonitoringDAO.execute([fileset.id], conn = myThread.transaction.conn, transaction = True) if streamConfig.ProcessingStyle == "Bulk": bindsRecoReleaseConfig = [] for fileset, primds in wmbsHelper.getMergeOutputMapping().items(): bindsRecoReleaseConfig.append( { 'RUN' : run, 'PRIMDS' : primds, 'FILESET' : fileset, 'RECODELAY' : promptRecoDelay[primds], 'RECODELAYOFFSET' : promptRecoDelayOffset[primds] } ) insertRecoReleaseConfigDAO.execute(bindsRecoReleaseConfig, conn = myThread.transaction.conn, transaction = True) elif streamConfig.ProcessingStyle == "Express": markWorkflowsInjectedDAO.execute([workflowName], injected = True, conn = myThread.transaction.conn, transaction = True) except: myThread.transaction.rollback() raise else: myThread.transaction.commit() else: # should we do anything for local runs ? pass return