def testSetLocationOrder(self): """ _testSetLocationOrder_ This tests that you can specify a location before creating the file, instead of having to do it afterwards. """ myThread = threading.currentThread() testFileA = File(lfn="/this/is/a/lfn", size=1024, events=10) testFileA.setLocation("se1.cern.ch") testFileA.create() testFileB = File(lfn=testFileA["lfn"]) testFileB.load() daoFactory = DAOFactory(package="WMCore.WMBS", logger=logging, dbinterface=myThread.dbi) locationFac = daoFactory(classname="Files.GetLocation") location = locationFac.execute(testFileB['lfn']).pop() self.assertEqual(location, 'se1.cern.ch') return
def testSetLocation(self): """ _testSetLocation_ Create a file and add a couple locations. Load the file from the database to make sure that the locations were set correctly. """ testFileA = File(lfn="/this/is/a/lfn", size=1024, events=10, checksums={'cksum': 1}) testFileA.addRun(Run(1, *[45])) testFileA.create() testFileA.setLocation(["se1.fnal.gov", "se1.cern.ch"]) testFileA.setLocation(["bunkse1.fnal.gov", "bunkse1.cern.ch"], immediateSave=False) testFileB = File(id=testFileA["id"]) testFileB.loadData() goldenLocations = ["se1.fnal.gov", "se1.cern.ch"] for location in testFileB["locations"]: assert location in goldenLocations, \ "ERROR: Unknown file location" goldenLocations.remove(location) assert len(goldenLocations) == 0, \ "ERROR: Some locations are missing" return
def test05(self): """ _test05_ Test multi lumis express merges with holes """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2, 4]: for i in range(2): newFile = File(makeUUID(), size = 1000, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) time.sleep(1) mySplitArgs['maxLatency'] = 1 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 2, "ERROR: JobFactory didn't create two jobs") return
def createFileFromDataStructsFile(self, file, jobID): """ _createFileFromDataStructsFile_ This function will create a WMBS File given a DataStructs file """ wmbsFile = File() wmbsFile.update(file) if type(file["locations"]) == set: seName = list(file["locations"])[0] elif type(file["locations"]) == list: if len(file['locations']) > 1: logging.error("Have more then one location for a file in job %i" % (jobID)) logging.error("Choosing location %s" % (file['locations'][0])) seName = file["locations"][0] else: seName = file["locations"] wmbsFile["locations"] = set() if seName != None: wmbsFile.setLocation(se = seName, immediateSave = False) wmbsFile['jid'] = jobID self.wmbsFilesToBuild.append(wmbsFile) return wmbsFile
def test05(self): """ _test05_ Test repacking of multiple lumis with holes in the lumi sequence Multi lumi input """ mySplitArgs = self.splitArgs.copy() insertClosedLumiBinds = [] for lumi in [1, 2, 4]: filecount = 2 for i in range(filecount): newFile = File(makeUUID(), size = 1000, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomeSE", immediateSave = False) newFile.create() self.fileset1.addFile(newFile) insertClosedLumiBinds.append( { 'RUN' : 1, 'LUMI' : lumi, 'STREAM' : "A", 'FILECOUNT' : filecount, 'INSERT_TIME' : self.currentTime, 'CLOSE_TIME' : self.currentTime } ) self.fileset1.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription1) self.insertClosedLumiDAO.execute(binds = insertClosedLumiBinds, transaction = False) mySplitArgs['maxInputFiles'] = 5 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") self.insertClosedLumiDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 3, 'STREAM' : "A", 'FILECOUNT' : 0, 'INSERT_TIME' : self.currentTime, 'CLOSE_TIME' : self.currentTime }, transaction = False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") self.assertEqual(len(jobGroups[0].jobs[0].getFiles()), 4, "ERROR: first job does not process 4 files") return
def createJob(self, streamerList, jobEvents, jobSize): """ _createJob_ create an express job processing the passed in list of streamers """ if not self.createdGroup: self.newGroup() self.createdGroup = True self.newJob(name = "%s-%s" % (self.jobNamePrefix, makeUUID())) for streamer in streamerList: f = File(id = streamer['id'], lfn = streamer['lfn']) f.setLocation(streamer['location'], immediateSave = False) self.currentJob.addFile(f) # job time based on # - 5 min initialization # - 0.5MB/s repack speed # - 45s/evt reco speed # - checksum calculation at 5MB/s (twice) # - stageout at 5MB/s # job disk based on # - streamer on local disk (factor 1) # - RAW on local disk (factor 1) # - FEVT/ALCARECO/DQM on local disk (factor 4) jobTime = 300 + jobSize/500000 + jobEvents*45 + (jobSize*4*3)/5000000 self.currentJob.addResourceEstimates(jobTime = jobTime, disk = (jobSize*6)/1024) return
def createFileFromDataStructsFile(self, file, jobID): """ _createFileFromDataStructsFile_ This function will create a WMBS File given a DataStructs file """ wmbsFile = File() wmbsFile.update(file) if isinstance(file["locations"], set): pnn = list(file["locations"])[0] elif isinstance(file["locations"], list): if len(file['locations']) > 1: logging.error( "Have more then one location for a file in job %i" % (jobID)) logging.error("Choosing location %s" % (file['locations'][0])) pnn = file["locations"][0] else: pnn = file["locations"] wmbsFile["locations"] = set() if pnn != None: wmbsFile.setLocation(pnn=pnn, immediateSave=False) wmbsFile['jid'] = jobID return wmbsFile
def createFileFromDataStructsFile(self, file, jobID): """ _createFileFromDataStructsFile_ This function will create a WMBS File given a DataStructs file """ wmbsFile = File() wmbsFile.update(file) if type(file["locations"]) == set: seName = list(file["locations"])[0] elif type(file["locations"]) == list: if len(file['locations']) > 1: logging.error( "Have more then one location for a file in job %i" % (jobID)) logging.error("Choosing location %s" % (file['locations'][0])) seName = file["locations"][0] else: seName = file["locations"] wmbsFile["locations"] = set() if seName != None: wmbsFile.setLocation(se=seName, immediateSave=False) wmbsFile['jid'] = jobID return wmbsFile
def test03(self): """ _test03_ Test input size threshold on multi lumis """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2]: for i in range(2): newFile = File(makeUUID(), size=1000, events=100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomeSE", immediateSave=False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription2) mySplitArgs["maxInputSize"] = 1 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") time.sleep(1) mySplitArgs["maxLatency"] = 1 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 2, "ERROR: JobFactory didn't create two jobs") return
def createJob(self, fileList, jobSize): """ _createJob_ create an express merge job for the passed in list of files """ if not self.createdGroup: self.newGroup() self.createdGroup = True self.newJob(name="%s-%s" % (self.jobNamePrefix, makeUUID())) largestFile = 0 for fileInfo in fileList: largestFile = max(largestFile, fileInfo['filesize']) f = File(id=fileInfo['id'], lfn=fileInfo['lfn']) f.setLocation(fileInfo['location'], immediateSave=False) self.currentJob.addFile(f) # job time based on # - 5 min initialization # - 5MB/s merge speed # - checksum calculation at 5MB/s # - stageout at 5MB/s # job disk based on # - input for largest file on local disk # - output on local disk (factor 1) jobTime = 300 + (jobSize * 3) / 5000000 self.currentJob.addResourceEstimates(jobTime=jobTime, disk=(jobSize + largestFile) / 1024) return
def testGetInfo(self): """ _testGetInfo_ Test the getInfo() method of the File class to make sure that it returns the correct information. """ testFileParent = File(lfn = "/this/is/a/parent/lfn", size = 1024, events = 20, checksums={'cksum': 1111}) testFileParent.addRun(Run(1, *[45])) testFileParent.create() testFile = File(lfn = "/this/is/a/lfn", size = 1024, events = 10, checksums={'cksum': 222}) testFile.addRun(Run(1, *[45])) testFile.addRun(Run(2, *[46, 47])) testFile.addRun(Run(2, *[47, 48])) testFile.create() testFile.setLocation(se = "se1.fnal.gov", immediateSave = False) testFile.setLocation(se = "se1.cern.ch", immediateSave = False) testFile.addParent("/this/is/a/parent/lfn") info = testFile.getInfo() assert info[0] == testFile["lfn"], \ "ERROR: File returned wrong LFN" assert info[1] == testFile["id"], \ "ERROR: File returned wrong ID" assert info[2] == testFile["size"], \ "ERROR: File returned wrong size" assert info[3] == testFile["events"], \ "ERROR: File returned wrong events" assert info[4] == testFile["checksums"], \ "ERROR: File returned wrong cksum" assert len(info[5]) == 2, \ "ERROR: File returned wrong runs" assert info[5] == [Run(1, *[45]), Run(2, *[46, 47, 48])], \ "Error: Run hasn't been combined correctly" assert len(info[6]) == 2, \ "ERROR: File returned wrong locations" for testLocation in info[6]: assert testLocation in ["se1.fnal.gov", "se1.cern.ch"], \ "ERROR: File returned wrong locations" assert len(info[7]) == 1, \ "ERROR: File returned wrong parents" assert info[7][0] == testFileParent, \ "ERROR: File returned wrong parents" testFile.delete() testFileParent.delete() return
def testSetLocation(self): """ _testSetLocation_ Create a file and add a couple locations. Load the file from the database to make sure that the locations were set correctly. """ testFileA = File(lfn = "/this/is/a/lfn", size = 1024, events = 10, checksums = {'cksum':1}) testFileA.addRun(Run( 1, *[45])) testFileA.create() testFileA.setLocation(["se1.fnal.gov", "se1.cern.ch"]) testFileA.setLocation(["bunkse1.fnal.gov", "bunkse1.cern.ch"], immediateSave = False) testFileB = File(id = testFileA["id"]) testFileB.loadData() goldenLocations = ["se1.fnal.gov", "se1.cern.ch"] for location in testFileB["locations"]: assert location in goldenLocations, \ "ERROR: Unknown file location" goldenLocations.remove(location) assert len(goldenLocations) == 0, \ "ERROR: Some locations are missing" return
def test05(self): """ _test05_ Test multi lumis express merges with holes """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2, 4]: for i in range(2): newFile = File(makeUUID(), size=1000, events=100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave=False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription2) time.sleep(1) mySplitArgs['maxLatency'] = 1 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 2, "ERROR: JobFactory didn't create two jobs") return
def createJob(self, fileList, jobSize): """ _createJob_ create an express merge job for the passed in list of files """ if not self.createdGroup: self.newGroup() self.createdGroup = True self.newJob(name = "%s-%s" % (self.jobNamePrefix, makeUUID())) largestFile = 0 for fileInfo in fileList: largestFile = max(largestFile, fileInfo['filesize']) f = File(id = fileInfo['id'], lfn = fileInfo['lfn']) f.setLocation(fileInfo['location'], immediateSave = False) self.currentJob.addFile(f) # job time based on # - 5 min initialization # - 5MB/s merge speed # - checksum calculation at 5MB/s (twice) # - stageout at 5MB/s # job disk based on # - input for largest file on local disk # - output on local disk (factor 1) jobTime = 300 + (jobSize*4)/5000000 self.currentJob.addResourceEstimates(jobTime = jobTime, disk = (jobSize+largestFile)/1024) return
def createJob(self, streamerList, jobEvents, jobSize, timePerEvent, sizePerEvent, memoryRequirement): """ _createJob_ create an express job processing the passed in list of streamers """ if not self.createdGroup: self.newGroup() self.createdGroup = True self.newJob(name = "%s-%s" % (self.jobNamePrefix, makeUUID())) for streamer in streamerList: f = File(id = streamer['id'], lfn = streamer['lfn']) f.setLocation(streamer['location'], immediateSave = False) self.currentJob.addFile(f) # job time based on # - 5 min initialization (twice) # - 0.5MB/s repack speed # - reco with timePerEvent # - checksum calculation at 5MB/s # - stageout at 5MB/s # job disk based on # - streamer or RAW on local disk (factor 1) # - FEVT/ALCARECO/DQM on local disk (sizePerEvent) jobTime = 600 + jobSize/500000 + jobEvents*timePerEvent + (jobEvents*sizePerEvent*2)/5000000 self.currentJob.addResourceEstimates(jobTime = min(jobTime, 47*3600), disk = min(jobSize/1024 + jobEvents*sizePerEvent, 20000000), memory = memoryRequirement) return
def createJob(self, streamerList, jobEvents, jobSize, memoryRequirement, numberOfCores = 1): """ _createJob_ """ if not self.createdGroup: self.newGroup() self.createdGroup = True self.newJob(name = "%s-%s" % (self.jobNamePrefix, makeUUID())) if numberOfCores > 1: self.currentJob.addBaggageParameter("numberOfCores", numberOfCores) for streamer in streamerList: f = File(id = streamer['id'], lfn = streamer['lfn']) f.setLocation(streamer['location'], immediateSave = False) self.currentJob.addFile(f) # job time based on # - 5 min initialization # - 0.5MB/s repack speed # - checksum calculation at 5MB/s # - stageout at 5MB/s # job disk based on # - RAW on local disk (factor 1) jobTime = 300 + jobSize/500000 + (jobSize*2)/5000000 self.currentJob.addResourceEstimates(jobTime = jobTime, disk = jobSize/1024, memory = memoryRequirement) return
def createJob(self, streamerList, jobEvents, jobSize, memoryRequirement): """ _createJob_ create an express job processing the passed in list of streamers """ if not self.createdGroup: self.newGroup() self.createdGroup = True self.newJob(name="%s-%s" % (self.jobNamePrefix, makeUUID())) for streamer in streamerList: f = File(id=streamer['id'], lfn=streamer['lfn']) f.setLocation(streamer['location'], immediateSave=False) self.currentJob.addFile(f) # job time based on # - 5 min initialization # - 0.5MB/s repack speed # - 45s/evt reco speed # - checksum calculation at 5MB/s # - stageout at 5MB/s # job disk based on # - streamer or RAW on local disk (factor 1) # - FEVT/ALCARECO/DQM on local disk (factor 4) jobTime = 300 + jobSize / 500000 + jobEvents * 45 + (jobSize * 4 * 2) / 5000000 self.currentJob.addResourceEstimates(jobTime=jobTime, disk=(jobSize * 5) / 1024, memory=memoryRequirement) return
def createFileFromDataStructsFile(self, file, jobID): """ _createFileFromDataStructsFile_ This function will create a WMBS File given a DataStructs file """ wmbsFile = File() wmbsFile.update(file) if isinstance(file["locations"], set): pnn = list(file["locations"])[0] elif isinstance(file["locations"], list): if len(file['locations']) > 1: logging.error("Have more then one location for a file in job %i" % (jobID)) logging.error("Choosing location %s" % (file['locations'][0])) pnn = file["locations"][0] else: pnn = file["locations"] wmbsFile["locations"] = set() if pnn != None: wmbsFile.setLocation(pnn = pnn, immediateSave = False) wmbsFile['jid'] = jobID return wmbsFile
def test03(self): """ _test03_ Test single lumi event threshold Single lumi input """ mySplitArgs = self.splitArgs.copy() insertClosedLumiBinds = [] for lumi in [1]: filecount = 8 for i in range(filecount): newFile = File(makeUUID(), size = 1000, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset1.addFile(newFile) insertClosedLumiBinds.append( { 'RUN' : 1, 'LUMI' : lumi, 'STREAM' : "A", 'FILECOUNT' : filecount, 'INSERT_TIME' : self.currentTime, 'CLOSE_TIME' : self.currentTime } ) self.fileset1.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription1) self.insertClosedLumiDAO.execute(binds = insertClosedLumiBinds, transaction = False) mySplitArgs['maxLatency'] = 50000 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxInputEvents'] = 650 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 2, "ERROR: JobFactory didn't create two jobs") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 6, "ERROR: Job does not process 6 files") job = jobGroups[0].jobs[1] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") self.assertEqual(self.getNumActiveSplitLumis(), 1, "ERROR: Split lumis were not created") return
def test06(self): """ _test06_ Test repacking of 3 lumis 2 small lumis (single job), followed by a big one (multiple jobs) files for lumi 1 and 2 are below multi-lumi thresholds files for lumi 3 are above single-lumi threshold """ mySplitArgs = self.splitArgs.copy() insertClosedLumiBinds = [] for lumi in [1, 2, 3]: filecount = 2 for i in range(filecount): if lumi == 3: nevents = 500 else: nevents = 100 newFile = File(makeUUID(), size = 1000, events = nevents) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset1.addFile(newFile) insertClosedLumiBinds.append( { 'RUN' : 1, 'LUMI' : lumi, 'STREAM' : "A", 'FILECOUNT' : filecount, 'INSERT_TIME' : self.currentTime, 'CLOSE_TIME' : self.currentTime } ) self.fileset1.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription1) self.insertClosedLumiDAO.execute(binds = insertClosedLumiBinds, transaction = False) mySplitArgs['maxLatency'] = 50000 mySplitArgs['maxInputEvents'] = 900 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 3, "ERROR: JobFactory didn't create three jobs") self.assertEqual(len(jobGroups[0].jobs[0].getFiles()), 4, "ERROR: first job does not process 4 files") self.assertEqual(len(jobGroups[0].jobs[1].getFiles()), 1, "ERROR: second job does not process 1 file") self.assertEqual(len(jobGroups[0].jobs[2].getFiles()), 1, "ERROR: third job does not process 1 file") return
def setUp(self): """ _setUp_ """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules=["WMCore.WMBS"]) self.splitterFactory = SplitterFactory(package="WMCore.JobSplitting") myThread = threading.currentThread() self.myThread = myThread daoFactory = DAOFactory(package="WMCore.WMBS", logger=logging, dbinterface=myThread.dbi) self.WMBSFactory = daoFactory config = self.getConfig() self.changer = ChangeState(config) myResourceControl = ResourceControl() myResourceControl.insertSite("T1_US_FNAL", 10, 20, "T1_US_FNAL_Disk", "T1_US_FNAL") myResourceControl.insertSite("T1_US_FNAL", 10, 20, "T3_US_FNALLPC", "T1_US_FNAL") myResourceControl.insertSite("T2_CH_CERN", 10, 20, "T2_CH_CERN", "T2_CH_CERN") self.fileset1 = Fileset(name="TestFileset1") for fileNum in range(11): newFile = File("/some/file/name%d" % fileNum, size=1000, events=100) newFile.addRun(Run(1, *[1])) newFile.setLocation('T1_US_FNAL_Disk') self.fileset1.addFile(newFile) self.fileset1.create() workflow1 = Workflow(spec="spec.xml", owner="hufnagel", name="TestWorkflow1", task="Test") workflow1.create() self.subscription1 = Subscription(fileset=self.fileset1, workflow=workflow1, split_algo="Harvest", type="Harvesting") self.subscription1.create() self.configFile = EmulatorSetup.setupWMAgentConfig() return
def loadFiles(self, size=10): """ _loadFiles_ Grab some files from the resultProxy Should handle multiple proxies. Not really sure about that """ if len(self.proxies) < 1: # Well, you don't have any proxies. # This is what happens when you ran out of files last time logging.info("No additional files found; Ending.") return set() resultProxy = self.proxies[0] rawResults = [] if type(resultProxy.keys) == list: keys = resultProxy.keys else: keys = resultProxy.keys() if type(keys) == set: # If it's a set, handle it keys = list(keys) files = set() while len(rawResults) < size and len(self.proxies) > 0: length = size - len(rawResults) newResults = resultProxy.fetchmany(size=length) if len(newResults) < length: # Assume we're all out # Eliminate this proxy self.proxies.remove(resultProxy) rawResults.extend(newResults) if rawResults == []: # Nothing to do return set() fileList = self.formatDict(results=rawResults, keys=keys) fileIDs = list(set([x["fileid"] for x in fileList])) myThread = threading.currentThread() fileInfoAct = self.daoFactory(classname="Files.GetForJobSplittingByID") fileInfoDict = fileInfoAct.execute(file=fileIDs, conn=myThread.transaction.conn, transaction=True) getLocAction = self.daoFactory(classname="Files.GetLocationBulk") getLocDict = getLocAction.execute(files=fileIDs, conn=myThread.transaction.conn, transaction=True) for fID in fileIDs: fl = WMBSFile(id=fID) fl.update(fileInfoDict[fID]) locations = getLocDict.get((fID), []) for loc in locations: fl.setLocation(loc, immediateSave=False) files.add(fl) return files
def test06(self): """ _test06_ Test max input files threshold for multi lumi 3 same size lumis """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2, 3]: for i in range(2): newFile = File(makeUUID(), size = 1000, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) mySplitArgs['minInputSize'] = 3000 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxInputFiles'] = 5 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 4, "ERROR: Job does not process 4 files") self.fileset2.markOpen(False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") return
def test00(self): """ _test00_ Test that the job name prefix feature works Test max edm size threshold for single lumi small lumi, followed by over-large lumi expect 1 job for small lumi and 4 jobs for over-large """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2]: for i in range(2 * lumi): newFile = File(makeUUID(), size = 1000 * lumi * lumi, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxEdmSize'] = 13000 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 3, "ERROR: JobFactory didn't create three jobs") job = jobGroups[0].jobs[0] self.assertTrue(job['name'].startswith("RepackMerge-"), "ERROR: Job has wrong name") self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") job = jobGroups[0].jobs[1] self.assertEqual(len(job.getFiles()), 3, "ERROR: Job does not process 3 files") job = jobGroups[0].jobs[2] self.assertEqual(len(job.getFiles()), 1, "ERROR: Job does not process 1 file") return
def makeNJobs(self, name, task, nJobs, jobGroup, fileset, sub, site = None, bl = [], wl = []): """ _makeNJobs_ Make and return a WMBS Job and File This handles all those damn add-ons """ # Set the CacheDir cacheDir = os.path.join(self.testDir, 'CacheDir') for n in range(nJobs): # First make a file #site = self.sites[0] testFile = File(lfn = "/singleLfn/%s/%s" %(name, n), size = 1024, events = 10) if site: testFile.setLocation(site) else: for tmpSite in self.sites: testFile.setLocation('se.%s' % (tmpSite)) testFile.create() fileset.addFile(testFile) fileset.commit() index = 0 for f in fileset.files: index += 1 testJob = Job(name = '%s-%i' %(name, index)) testJob.addFile(f) testJob["location"] = f.getLocations()[0] testJob['custom']['location'] = f.getLocations()[0] testJob['task'] = task.getPathName() testJob['sandbox'] = task.data.input.sandbox testJob['spec'] = os.path.join(self.testDir, 'basicWorkload.pcl') testJob['mask']['FirstEvent'] = 101 testJob['owner'] = 'tapas' testJob["siteBlacklist"] = bl testJob["siteWhitelist"] = wl testJob['ownerDN'] = 'tapas' testJob['ownerRole'] = 'cmsrole' testJob['ownerGroup'] = 'phgroup' jobCache = os.path.join(cacheDir, 'Sub_%i' % (sub), 'Job_%i' % (index)) os.makedirs(jobCache) testJob.create(jobGroup) testJob['cache_dir'] = jobCache testJob.save() jobGroup.add(testJob) output = open(os.path.join(jobCache, 'job.pkl'),'w') pickle.dump(testJob, output) output.close() return testJob, testFile
def test10(self): """ _test10_ Test merging of multiple lumis with holes in the lumi sequence Hole is due to no streamer files for the lumi Multi lumi input It only works with a single hole, as it creates a merged file even with it being of a smaller size than the mininputsize. It was changed due to the maxinputevents not being used anymore """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2, 4]: for i in range(2): newFile = File(makeUUID(), size=1000, events=100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave=False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription2) mySplitArgs['minInputSize'] = 100000 mySplitArgs['maxInputSize'] = 200000 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") self.insertClosedLumiDAO.execute(binds={ 'RUN': 1, 'LUMI': 3, 'STREAM': "A", 'FILECOUNT': 0, 'INSERT_TIME': self.currentTime, 'CLOSE_TIME': self.currentTime }, transaction=False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 4, "ERROR: Job does not process 4 files") return
def createJob(self, fileList, jobSize, errorDataset=False): """ _createJob_ create a repack merge job for the passed in list of files """ # find largest file largestFile = 0 for fileInfo in fileList: largestFile = max(largestFile, fileInfo['filesize']) # calculate number of cores based on disk usage numberOfCores = 1 + (int)( (jobSize + largestFile) / (20 * 1000 * 1000 * 1000)) # jobs requesting more than 8 cores would never run if numberOfCores > 8: self.markFailed(streamerList) return if not self.createdGroup: self.newGroup() self.createdGroup = True self.newJob(name="%s-%s" % (self.jobNamePrefix, makeUUID())) for fileInfo in fileList: f = File(id=fileInfo['id'], lfn=fileInfo['lfn']) f.setLocation(fileInfo['location'], immediateSave=False) self.currentJob.addFile(f) if errorDataset: self.currentJob.addBaggageParameter("useErrorDataset", True) # allow large (single lumi) repackmerge to use multiple cores if numberOfCores > 1: self.currentJob.addBaggageParameter("numberOfCores", numberOfCores) # job time based on # - 5 min initialization # - 5MB/s merge speed # - checksum calculation at 5MB/s # - stageout at 5MB/s # job disk based on # - input for largest file on local disk # - output on local disk (factor 1) jobTime = 300 + (jobSize * 3) / 5000000 self.currentJob.addResourceEstimates(jobTime=jobTime, disk=(jobSize + largestFile) / 1024, memory=1000) return
def makeNJobs(self, name, task, nJobs, jobGroup, fileset, sub, site=None, bl=[], wl=[]): """ _makeNJobs_ Make and return a WMBS Job and File This handles all those damn add-ons """ # Set the CacheDir cacheDir = os.path.join(self.testDir, "CacheDir") for n in range(nJobs): # First make a file # site = self.sites[0] testFile = File(lfn="/singleLfn/%s/%s" % (name, n), size=1024, events=10) if site: testFile.setLocation(site) else: for tmpSite in self.sites: testFile.setLocation("se.%s" % (tmpSite)) testFile.create() fileset.addFile(testFile) fileset.commit() index = 0 for f in fileset.files: index += 1 testJob = Job(name="%s-%i" % (name, index)) testJob.addFile(f) testJob["location"] = f.getLocations()[0] testJob["custom"]["location"] = f.getLocations()[0] testJob["task"] = task.getPathName() testJob["sandbox"] = task.data.input.sandbox testJob["spec"] = os.path.join(self.testDir, "basicWorkload.pcl") testJob["mask"]["FirstEvent"] = 101 testJob["owner"] = "tapas" testJob["siteBlacklist"] = bl testJob["siteWhitelist"] = wl testJob["ownerDN"] = "tapas" testJob["ownerRole"] = "cmsrole" testJob["ownerGroup"] = "phgroup" jobCache = os.path.join(cacheDir, "Sub_%i" % (sub), "Job_%i" % (index)) os.makedirs(jobCache) testJob.create(jobGroup) testJob["cache_dir"] = jobCache testJob.save() jobGroup.add(testJob) output = open(os.path.join(jobCache, "job.pkl"), "w") pickle.dump(testJob, output) output.close() return testJob, testFile
def test09(self): """ _test09_ Test under merge (over merge event threshold) one small lumi, one large lumi (small below min size, large below max size, but both together above max size) """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2]: for i in range(2): newFile = File(makeUUID(), size = 1000 * lumi * lumi, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) mySplitArgs['minInputSize'] = 3000 mySplitArgs['maxInputSize'] = 9000 mySplitArgs['maxInputEvents'] = 300 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") self.fileset2.markOpen(False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") return
def makeNJobs(self, name, task, nJobs, jobGroup, fileset, sub, site, bl = [], wl = []): """ _makeNJobs_ Make and return a WMBS Job and File This handles all those damn add-ons """ # Set the CacheDir cacheDir = os.path.join(self.testDir, 'CacheDir') for n in range(nJobs): # First make a file #site = self.sites[0] testFile = File(lfn = "/singleLfn/%s/%s" % (name, n), size = 1024, events = 10) if type(site) == list: for singleSite in site: testFile.setLocation(singleSite) else: testFile.setLocation(site) testFile.create() fileset.addFile(testFile) fileset.commit() index = 0 for f in fileset.files: index += 1 testJob = Job(name = '%s-%i' % (name, index)) testJob.addFile(f) testJob["location"] = f.getLocations()[0] testJob['task'] = task.getPathName() testJob['sandbox'] = task.data.input.sandbox testJob['spec'] = os.path.join(self.testDir, 'basicWorkload.pcl') testJob['mask']['FirstEvent'] = 101 testJob["siteBlacklist"] = bl testJob["siteWhitelist"] = wl testJob['priority'] = 101 testJob['multicoreEnabled'] = False testJob['numberOfCores'] = 1 jobCache = os.path.join(cacheDir, 'Sub_%i' % (sub), 'Job_%i' % (index)) os.makedirs(jobCache) testJob.create(jobGroup) testJob['cache_dir'] = jobCache testJob.save() jobGroup.add(testJob) output = open(os.path.join(jobCache, 'job.pkl'), 'w') pickle.dump(testJob, output) output.close() return testJob, testFile
def test10(self): """ _test10_ Test merging of multiple lumis with holes in the lumi sequence Hole is due to no streamer files for the lumi Multi lumi input It only works with a single hole, as it creates a merged file even with it being of a smaller size than the mininputsize. It was changed due to the maxinputevents not being used anymore """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2, 4]: for i in range(2): newFile = File(makeUUID(), size = 1000, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) mySplitArgs['minInputSize'] = 100000 mySplitArgs['maxInputSize'] = 200000 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") self.insertClosedLumiDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 3, 'STREAM' : "A", 'FILECOUNT' : 0, 'INSERT_TIME' : self.currentTime, 'CLOSE_TIME' : self.currentTime }, transaction = False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 4, "ERROR: Job does not process 4 files") return
def filesOfStatus(self, status, limit=0, loadChecksums=True, doingJobSplitting=False): """ _filesOfStatus_ Return a Set of File objects that have the given status with respect to this subscription. """ existingTransaction = self.beginTransaction() status = status.title() files = set() if limit > 0: action = self.daofactory( classname="Subscriptions.Get%sFilesByLimit" % status) fileList = action.execute(self["id"], limit, conn=self.getDBConn(), transaction=self.existingTransaction()) else: action = self.daofactory(classname="Subscriptions.Get%sFiles" % status) fileList = action.execute(self["id"], conn=self.getDBConn(), transaction=self.existingTransaction()) if doingJobSplitting: fileInfoAct = self.daofactory( classname="Files.GetForJobSplittingByID") else: fileInfoAct = self.daofactory(classname="Files.GetByID") fileInfoDict = fileInfoAct.execute( file=[x["file"] for x in fileList], conn=self.getDBConn(), transaction=self.existingTransaction()) #Run through all files for f in fileList: fl = File(id=f['file']) if loadChecksums: fl.loadChecksum() fl.update(fileInfoDict[f['file']]) if 'locations' in f.keys(): fl.setLocation(f['locations'], immediateSave=False) files.add(fl) self.commitTransaction(existingTransaction) return files
def createFile(self, lfn, events, run, lumis, location): """ _createFile_ Create a file for testing """ newFile = File(lfn=lfn, size=1000, events=events) lumiList = [] for lumi in range(lumis): lumiList.append((run * lumis) + lumi) newFile.addRun(Run(run, *lumiList)) newFile.setLocation(location) return newFile
def createTestJobs(self, nJobs, cacheDir): """ _createTestJobs_ Create several jobs """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow, type="Processing", split_algo="FileBased") testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() # Create a file testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileA.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileA.create() baseName = makeUUID() # Now create a job for i in range(nJobs): testJob = Job(name='%s-%i' % (baseName, i)) testJob.addFile(testFileA) testJob['location'] = 'malpaquet' testJob['retry_count'] = 1 testJob['retry_max'] = 10 testJob.create(testJobGroup) testJob.save() testJobGroup.add(testJob) testJobGroup.commit() # Set test job caches for job in testJobGroup.jobs: job.setCache(cacheDir) return testJobGroup
def createCommonFileset(): """ Create a simple fileset with 2 files at the same location """ multipleFilesFileset = Fileset(name="TestFileset") newFile = File("/some/file/test1", size=1000, events=100) newFile.addRun(Run(1, *[1, 3, 4, 5, 6, 7])) newFile.addRun(Run(2, *[1, 2, 4, 5, 6, 7])) newFile.setLocation('T2_CH_CERN') multipleFilesFileset.addFile(newFile) newFile = File("/some/file/test2", size=2000, events=200) newFile.addRun(Run(3, *[2, 8])) newFile.addRun(Run(4, *[3, 8])) newFile.setLocation('T2_CH_CERN') multipleFilesFileset.addFile(newFile) newFile = File("/some/file/test3", size=3000, events=300) newFile.addRun(Run(5, *[10, 11, 12])) newFile.addRun(Run(6, *[10, 11, 12])) newFile.setLocation('T2_CH_CERN') multipleFilesFileset.addFile(newFile) newFile = File("/some/file/test4", size=4000, events=400) newFile.addRun(Run(2, *[3, 8, 9])) newFile.addRun(Run(3, *[3, 4, 5, 6])) newFile.setLocation('T2_CH_CERN') multipleFilesFileset.addFile(newFile) multipleFilesFileset.create() return multipleFilesFileset
def createTestJobs(self, nJobs, cacheDir): """ _createTestJobs_ Create several jobs """ testWorkflow = Workflow(spec = "spec.xml", owner = "Simon", name = "wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name = "TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset = testWMBSFileset, workflow = testWorkflow, type = "Processing", split_algo = "FileBased") testSubscription.create() testJobGroup = JobGroup(subscription = testSubscription) testJobGroup.create() # Create a file testFileA = File(lfn = "/this/is/a/lfnA", size = 1024, events = 10) testFileA.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileA.create() baseName = makeUUID() # Now create a job for i in range(nJobs): testJob = Job(name = '%s-%i' % (baseName, i)) testJob.addFile(testFileA) testJob['location'] = 'malpaquet' testJob['retry_count'] = 1 testJob['retry_max'] = 10 testJob.create(testJobGroup) testJob.save() testJobGroup.add(testJob) testJobGroup.commit() # Set test job caches for job in testJobGroup.jobs: job.setCache(cacheDir) return testJobGroup
def stuffWMBS(self): """ _stuffWMBS_ Inject the workflow in WMBS and add the subscriptions """ testWorkflow = Workflow(spec = os.path.join(getTestBase(), "WMComponent_t/PhEDExInjector_t/specs/TestWorkload.pkl"), owner = "/CN=OU/DN=SomeoneWithPermissions", name = "BogusRequest", task = "BogusTask", owner_vogroup = "", owner_vorole = "") testWorkflow.create() testMergeWorkflow = Workflow(spec = os.path.join(getTestBase(), "WMComponent_t/PhEDExInjector_t/specs/TestWorkload.pkl"), owner = "/CN=OU/DN=SomeoneWithPermissions", name = "BogusRequest", task = "BogusTask/Merge", owner_vogroup = "", owner_vorole = "") testMergeWorkflow.create() testWMBSFileset = Fileset(name = "TopFileset") testWMBSFileset.create() testWMBSFilesetUnmerged = Fileset(name = "UnmergedFileset") testWMBSFilesetUnmerged.create() testFileA = File(lfn = "/this/is/a/lfnA" , size = 1024, events = 10) testFileA.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileB = File(lfn = "/this/is/a/lfnB", size = 1024, events = 10) testFileB.addRun(Run(10, *[12314])) testFileB.setLocation('malpaquet') testFileA.create() testFileB.create() testWMBSFileset.addFile(testFileA) testWMBSFilesetUnmerged.addFile(testFileB) testWMBSFileset.commit() testWMBSFilesetUnmerged.commit() testSubscription = Subscription(fileset = testWMBSFileset, workflow = testWorkflow) testSubscription.create() testSubscriptionMerge = Subscription(fileset = testWMBSFilesetUnmerged, workflow = testMergeWorkflow, type = "Merge") testSubscriptionMerge.create() return (testSubscription, testSubscriptionMerge)
def createJob(self, streamerList, jobEvents, jobSize, memoryRequirement): """ _createJob_ """ # find largest file largestFile = 0 for streamer in streamerList: largestFile = max(largestFile, streamer['filesize']) # calculate number of cores based on disk usage numberOfCores = 1 + (int)( (jobSize + largestFile) / (20 * 1000 * 1000 * 1000)) # jobs requesting more than 8 cores would never run if numberOfCores > 8: self.markFailed(streamerList) return if not self.createdGroup: self.newGroup() self.createdGroup = True self.newJob(name="%s-%s" % (self.jobNamePrefix, makeUUID())) for streamer in streamerList: f = File(id=streamer['id'], lfn=streamer['lfn']) f.setLocation(streamer['location'], immediateSave=False) self.currentJob.addFile(f) # allow large (single lumi) repack to use multiple cores if numberOfCores > 1: self.currentJob.addBaggageParameter("numberOfCores", numberOfCores) # job time based on # - 5 min initialization # - 1.5MB/s repack speed # - checksum calculation at 5MB/s # - stageout at 5MB/s # job disk based on # - input for largest file on local disk # - output on local disk (factor 1) jobTime = 300 + jobSize / 1500000 + (jobSize * 2) / 5000000 self.currentJob.addResourceEstimates(jobTime=jobTime, disk=(jobSize + largestFile) / 1024, memory=memoryRequirement) return
def setUp(self): """ _setUp_ """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules = ["WMCore.WMBS"]) self.splitterFactory = SplitterFactory(package = "WMCore.JobSplitting") myThread = threading.currentThread() self.myThread = myThread daoFactory = DAOFactory(package = "WMCore.WMBS", logger = logging, dbinterface = myThread.dbi) self.WMBSFactory = daoFactory config = self.getConfig() self.changer = ChangeState(config) myResourceControl = ResourceControl() myResourceControl.insertSite("SomeSite", 10, 20, "SomeSE", "SomeCE") myResourceControl.insertSite("SomeSite", 10, 20, "SomeSE2", "SomeCE") myResourceControl.insertSite("SomeSite2", 10, 20, "SomeSE3", "SomeCE2") self.fileset1 = Fileset(name = "TestFileset1") for file in range(11): newFile = File("/some/file/name%d" % file, size = 1000, events = 100) newFile.addRun(Run(1,*[1])) newFile.setLocation('SomeSE') self.fileset1.addFile(newFile) self.fileset1.create() workflow1 = Workflow(spec = "spec.xml", owner = "hufnagel", name = "TestWorkflow1", task="Test") workflow1.create() self.subscription1 = Subscription(fileset = self.fileset1, workflow = workflow1, split_algo = "Harvest", type = "Harvesting") self.subscription1.create() self.configFile = EmulatorSetup.setupWMAgentConfig() return
def createFileCollection(self, name, nSubs, nFiles, workflowURL='test', site=None): """ _createFileCollection_ Create a collection of files for splitting into jobs """ myThread = threading.currentThread() testWorkflow = Workflow(spec=workflowURL, owner="mnorman", name=name, task="/TestWorkload/ReReco") testWorkflow.create() for sub in range(nSubs): nameStr = '%s-%i' % (name, sub) testFileset = Fileset(name=nameStr) testFileset.create() for f in range(nFiles): # pick a random site if not site: tmpSite = 'se.%s' % (random.choice(self.sites)) else: tmpSite = 'se.%s' % (site) testFile = File(lfn="/lfn/%s/%i" % (nameStr, f), size=1024, events=10) testFile.setLocation(tmpSite) testFile.create() testFileset.addFile(testFile) testFileset.commit() testFileset.markOpen(isOpen=0) testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, type="Processing", split_algo="FileBased") testSubscription.create() return
def createJob(self, fileList): """ _createJob_ Create an alcaharvest job """ self.newGroup() self.newJob(name="%s-%s" % (self.jobNamePrefix, makeUUID())) for fileInfo in fileList: f = File(id=fileInfo["id"], lfn=fileInfo["lfn"]) f.setLocation(fileInfo["location"], immediateSave=False) self.currentJob.addFile(f)
def createJob(self, fileList): """ _createJob_ Create an alcaharvest job """ self.newGroup() self.newJob(name="%s-%s" % (self.jobNamePrefix, makeUUID())) for fileInfo in fileList: f = File(id=fileInfo['id'], lfn=fileInfo['lfn']) f.setLocation(fileInfo['location'], immediateSave=False) self.currentJob.addFile(f)
def createJob(self, streamerList, jobEvents, jobSize, memoryRequirement): """ _createJob_ """ # find largest file largestFile = 0 for streamer in streamerList: largestFile = max(largestFile, streamer['filesize']) # calculate number of cores based on disk usage numberOfCores = 1 + (int)((jobSize+largestFile)/(20*1000*1000*1000)) # jobs requesting more than 8 cores would never run if numberOfCores > 8: self.markFailed(streamerList) return if not self.createdGroup: self.newGroup() self.createdGroup = True self.newJob(name = "%s-%s" % (self.jobNamePrefix, makeUUID())) for streamer in streamerList: f = File(id = streamer['id'], lfn = streamer['lfn']) f.setLocation(streamer['location'], immediateSave = False) self.currentJob.addFile(f) # allow large (single lumi) repack to use multiple cores if numberOfCores > 1: self.currentJob.addBaggageParameter("numberOfCores", numberOfCores) # job time based on # - 5 min initialization # - 1.5MB/s repack speed # - checksum calculation at 5MB/s # - stageout at 5MB/s # job disk based on # - input for largest file on local disk # - output on local disk (factor 1) jobTime = 300 + jobSize/1500000 + (jobSize*2)/5000000 self.currentJob.addResourceEstimates(jobTime = jobTime, disk = (jobSize+largestFile)/1024, memory = memoryRequirement) return
def createJob(self, streamerList): """ _createJob_ """ if not self.createdGroup: self.newGroup() self.createdGroup = True self.newJob(name = "%s-%s" % (self.jobNamePrefix, makeUUID())) for streamer in streamerList: f = File(id = streamer['id'], lfn = streamer['lfn']) f.setLocation(streamer['location'], immediateSave = False) self.currentJob.addFile(f)
def createFile(lfn, events, run, lumis, location, lumiMultiplier=None): """ _createFile_ Create a file for testing """ if lumiMultiplier is None: lumiMultiplier = run newFile = File(lfn=lfn, size=1000, events=events) lumiList = [] for lumi in range(lumis): lumiList.append((lumiMultiplier * lumis) + lumi) newFile.addRun(Run(run, *lumiList)) newFile.setLocation(location) return newFile
def test03(self): """ _test03_ Test max input files threshold for single lumi small lumi, followed by large lumi expect 1 job for small lumi and 1 job for large """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2]: for i in range(lumi * 2): newFile = File(makeUUID(), size = 1000, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxInputFiles'] = 3 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 2, "ERROR: JobFactory didn't create two jobs") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") job = jobGroups[0].jobs[1] self.assertEqual(len(job.getFiles()), 4, "ERROR: Job does not process 4 files") return
def testLoadData(self): """ _testLoadData_ Test the loading of all data from a file, including run/lumi associations, location information and parentage information. """ testFileParentA = File(lfn="/this/is/a/parent/lfnA", size=1024, events=20, checksums={'cksum': 1}) testFileParentA.addRun(Run(1, *[45])) testFileParentB = File(lfn="/this/is/a/parent/lfnB", size=1024, events=20, checksums={'cksum': 1}) testFileParentB.addRun(Run(1, *[45])) testFileParentA.create() testFileParentB.create() testFileA = File(lfn="/this/is/a/lfn", size=1024, events=10, checksums={'cksum': 1}) testFileA.addRun(Run(1, *[45])) testFileA.create() testFileA.setLocation(se="se1.fnal.gov", immediateSave=False) testFileA.setLocation(se="se1.cern.ch", immediateSave=False) testFileA.addParent("/this/is/a/parent/lfnA") testFileA.addParent("/this/is/a/parent/lfnB") testFileA.updateLocations() testFileB = File(lfn=testFileA["lfn"]) testFileB.loadData(parentage=1) testFileC = File(id=testFileA["id"]) testFileC.loadData(parentage=1) assert testFileA == testFileB, \ "ERROR: File load by LFN didn't work" assert testFileA == testFileC, \ "ERROR: File load by ID didn't work" testFileA.delete() testFileParentA.delete() testFileParentB.delete() return
def createTestJobGroup(self): """ Creates a group of several jobs """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileA.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) testFileB.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileA.create() testFileB.create() testWMBSFileset.addFile(testFileA) testWMBSFileset.addFile(testFileB) testWMBSFileset.commit() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() for i in range(0, self.nJobs): testJob = Job(name=makeUUID()) testJob.addFile(testFileA) testJob.addFile(testFileB) testJob['retry_count'] = 1 testJob['retry_max'] = 10 testJobGroup.add(testJob) testJobGroup.commit() return testJobGroup