def _getDBSBlock(self, match, wmspec): """Get DBS info for this block""" blockName = match['Inputs'].keys()[0] #TODO: Allow more than one if match['ACDC']: acdcInfo = match['ACDC'] acdc = DataCollectionService(acdcInfo["server"], acdcInfo["database"]) collection = acdc.getDataCollection(acdcInfo['collection']) splitedBlockName = ACDCBlock.splitBlockName(blockName) fileLists = acdc.getChunkFiles(acdcInfo['collection'], acdcInfo['fileset'], splitedBlockName['Offset'], splitedBlockName['NumOfFiles'], user = wmspec.getOwner().get("name"), group = wmspec.getOwner().get("group")) block = {} block["Files"] = fileLists return blockName, block else: dbs = get_dbs(match['Dbs']) if wmspec.getTask(match['TaskName']).parentProcessingFlag(): dbsBlockDict = dbs.getFileBlockWithParents(blockName) else: dbsBlockDict = dbs.getFileBlock(blockName) return blockName, dbsBlockDict[blockName]
def _getDBSBlock(self, match, wmspec): """Get DBS info for this block""" blockName = match['Inputs'].keys()[0] #TODO: Allow more than one if match['ACDC']: acdcInfo = match['ACDC'] acdc = DataCollectionService(acdcInfo["server"], acdcInfo["database"]) collection = acdc.getDataCollection(acdcInfo['collection']) splitedBlockName = ACDCBlock.splitBlockName(blockName) fileLists = acdc.getChunkFiles(acdcInfo['collection'], acdcInfo['fileset'], splitedBlockName['Offset'], splitedBlockName['NumOfFiles'], user = wmspec.getOwner().get("name"), group = wmspec.getOwner().get("group")) block = {} block["Files"] = fileLists return blockName, block else: dbs = get_dbs(match['Dbs']) if wmspec.getTask(match['TaskName']).parentProcessingFlag(): dbsBlockDict = dbs.getFileBlockWithParents(blockName) else: dbsBlockDict = dbs.getFileBlock(blockName) if wmspec.locationDataSourceFlag(): blockInfo = dbsBlockDict[blockName] seElements = [] for cmsSite in match['Inputs'].values()[0]: #TODO: Allow more than one ses = self.SiteDB.cmsNametoSE(cmsSite) seElements.extend(ses) seElements = list(set(seElements)) blockInfo['StorageElements'] = seElements return blockName, dbsBlockDict[blockName]
def main(): start = time.time() # blockName = match['Inputs'].keys()[0] blockName = "/acdc/vlimant_ACDC0_task_SUS-RunIIFall18wmLHEGS-00025__v1_T_190218_145226_481/:pdmvserv_task_SUS-RunIIFall18wmLHEGS-00025__v1_T_181211_005112_2222:SUS-RunIIFall18wmLHEGS-00025_0/0/31055" # acdcInfo = match['ACDC'] acdcInfo = {"database": "acdcserver", "fileset": "/pdmvserv_task_SUS-RunIIFall18wmLHEGS-00025__v1_T_181211_005112_2222/SUS-RunIIFall18wmLHEGS-00025_0", "collection": "pdmvserv_task_SUS-RunIIFall18wmLHEGS-00025__v1_T_181211_005112_2222", "server": "https://cmsweb.cern.ch/couchdb"} acdc = DataCollectionService(acdcInfo["server"], acdcInfo["database"]) splitedBlockName = ACDCBlock.splitBlockName(blockName) print("Splitted block name: %s" % splitedBlockName) fileLists = acdc.getChunkFiles(acdcInfo['collection'], acdcInfo['fileset'], splitedBlockName['Offset'], splitedBlockName['NumOfFiles']) print("Retrieved %d unique files from the ACDCServer" % len(fileLists)) block = {} block["Files"] = fileLists wantedLumis = set([252052, 240646]) for f in fileLists: for run in f['runs']: maskDict = run.json() lumisSet = set(maskDict['Lumis'].keys()) if wantedLumis.intersection(lumisSet): print("File: %s with events: %s, contains these lumis: %s" % (f['lfn'], f['events'], wantedLumis.intersection(lumisSet))) # with open("chunkfiles.json", 'w') as fo: # json.dump(block, fo) end = time.time() print("Spent %s secs running so far" % (end - start)) sys.exit(1) ### Now doing the WMBSHelper stuff reqUrl = "https://cmsweb.cern.ch/couchdb/reqmgr_workload_cache" requestName = "vlimant_ACDC0_task_HIG-RunIIFall17wmLHEGS-01122__v1_T_180808_130708_5376" wmspec = WMWorkloadHelper() wmspec.loadSpecFromCouch(reqUrl, requestName) taskName = "HIG-RunIIFall17DRPremix-00788_0" mask = None cacheDir = "/data/srv/wmagent/v1.1.14.patch6/install/wmagent/WorkQueueManager/cache" # wmbsHelper = WMBSHelper(wmspec, match['TaskName'], blockName, mask, self.params['CacheDir']) wmbsHelper = WMBSHelper(wmspec, taskName, blockName, mask, cacheDir) sub, numFilesAdded = wmbsHelper.createSubscriptionAndAddFiles(block=block)
def _getDBSBlock(self, match, wmspec): """Get DBS info for this block""" blockName = match['Inputs'].keys()[0] #TODO: Allow more than one if match['ACDC']: acdcInfo = match['ACDC'] acdc = DataCollectionService(acdcInfo["server"], acdcInfo["database"]) collection = acdc.getDataCollection(acdcInfo['collection']) splitedBlockName = ACDCBlock.splitBlockName(blockName) fileLists = acdc.getChunkFiles( acdcInfo['collection'], acdcInfo['fileset'], splitedBlockName['Offset'], splitedBlockName['NumOfFiles'], user=wmspec.getOwner().get("name"), group=wmspec.getOwner().get("group")) block = {} block["Files"] = fileLists return blockName, block else: dbs = get_dbs(match['Dbs']) if wmspec.getTask(match['TaskName']).parentProcessingFlag(): dbsBlockDict = dbs.getFileBlockWithParents(blockName) else: dbsBlockDict = dbs.getFileBlock(blockName) if wmspec.locationDataSourceFlag(): blockInfo = dbsBlockDict[blockName] seElements = [] for cmsSite in match['Inputs'].values( )[0]: #TODO: Allow more than one ses = self.SiteDB.cmsNametoSE(cmsSite) seElements.extend(ses) seElements = list(set(seElements)) blockInfo['StorageElements'] = seElements return blockName, dbsBlockDict[blockName]
def testChunking(self): """ _testChunking_ Insert a workload and files that have several distinct sets of locations. Verify that the chunks are created correctly and that they only groups files that have the same set of locations. Also verify that the chunks are pulled out of ACDC correctly. """ dcs = DataCollectionService(url = self.testInit.couchUrl, database = "wmcore-acdc-datacollectionsvc") def getJob(): job = Job() job["task"] = "/ACDCTest/reco" job["workflow"] = "ACDCTest" job["location"] = "cmssrm.fnal.gov" job["owner"] = "cmsdataops" job["group"] = "cmsdataops" return job testFileA = File(lfn = makeUUID(), size = 1024, events = 1024) testFileA.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"]) testFileA.addRun(Run(1, 1, 2)) testFileB = File(lfn = makeUUID(), size = 1024, events = 1024) testFileB.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"]) testFileB.addRun(Run(1, 3, 4)) testFileC = File(lfn = makeUUID(), size = 1024, events = 1024) testFileC.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"]) testFileC.addRun(Run(1, 5, 6)) testJobA = getJob() testJobA.addFile(testFileA) testJobA.addFile(testFileB) testJobA.addFile(testFileC) testFileD = File(lfn = makeUUID(), size = 1024, events = 1024) testFileD.setLocation(["cmssrm.fnal.gov"]) testFileD.addRun(Run(2, 1, 2)) testFileE = File(lfn = makeUUID(), size = 1024, events = 1024) testFileE.setLocation(["cmssrm.fnal.gov"]) testFileE.addRun(Run(2, 3, 4)) testJobB = getJob() testJobB.addFile(testFileD) testJobB.addFile(testFileE) testFileF = File(lfn = makeUUID(), size = 1024, events = 1024, parents = set(["/some/parent/F"])) testFileF.setLocation(["cmssrm.fnal.gov", "castor.cern.ch", "srm.ral.uk"]) testFileF.addRun(Run(3, 1, 2)) testFileG = File(lfn = makeUUID(), size = 1024, events = 1024, parents = set(["/some/parent/G"])) testFileG.setLocation(["cmssrm.fnal.gov", "castor.cern.ch", "srm.ral.uk"] ) testFileG.addRun(Run(3, 3, 4)) testFileH = File(lfn = makeUUID(), size = 1024, events = 1024, parents = set(["/some/parent/H"])) testFileH.setLocation(["cmssrm.fnal.gov", "castor.cern.ch", "srm.ral.uk"]) testFileH.addRun(Run(3, 5, 6)) testJobC = getJob() testJobC.addFile(testFileF) testJobC.addFile(testFileG) testJobC.addFile(testFileH) testFileI = File(lfn = makeUUID(), size = 1024, events = 1024, merged = True) testFileI.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"]) testFileI.addRun(Run(4, 1, 2)) testFileJ = File(lfn = makeUUID(), size = 1024, events = 1024, merged = True) testFileJ.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"] ) testFileJ.addRun(Run(4, 3, 4)) testFileK = File(lfn = makeUUID(), size = 1024, events = 1024, merged = True) testFileK.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"]) testFileK.addRun(Run(4, 5, 6)) testJobD = getJob() testJobD.addFile(testFileI) testJobD.addFile(testFileJ) testJobD.addFile(testFileK) dcs.failedJobs([testJobA, testJobB, testJobC, testJobD]) chunks = dcs.chunkFileset("ACDCTest", "/ACDCTest/reco", chunkSize = 5) self.assertEqual(len(chunks), 4, "Error: There should be four chunks: %s" % len(chunks)) goldenMetaData = {1: {"lumis": 2, "locations": ["castor.cern.ch", "cmssrm.fnal.gov"], "events": 1024}, 2: {"lumis": 4, "locations": ["cmssrm.fnal.gov"], "events": 2048}, 3: {"lumis": 6, "locations": ["castor.cern.ch", "cmssrm.fnal.gov", "srm.ral.uk"], "events": 3072}, 5: {"lumis": 10, "locations": ["castor.cern.ch", "cmssrm.fnal.gov"], "events": 5120}} testFiles =[testFileA, testFileB, testFileC, testFileI, testFileJ, testFileK] lastFile = testFileA for testFile in testFiles: if lastFile["lfn"] < testFile["lfn"]: lastFile = testFile testFiles.remove(lastFile) goldenFiles = {1: [lastFile], 2: [testFileD, testFileE], 3: [testFileF, testFileG, testFileH], 5: testFiles} for chunk in chunks: chunkMetaData = dcs.getChunkInfo("ACDCTest", "/ACDCTest/reco", chunk["offset"], chunk["files"]) self.assertEqual(chunkMetaData["files"], chunk["files"], "Error: Metadata doesn't match.") self.assertEqual(chunkMetaData["lumis"], chunk["lumis"], "Error: Metadata doesn't match.") self.assertEqual(chunkMetaData["events"], chunk["events"], "Error: Metadata doesn't match.") self.assertEqual(chunkMetaData["locations"], chunk["locations"], "Error: Metadata doesn't match.") self.assertTrue(chunk["files"] in goldenMetaData.keys(), "Error: Extra chunk found.") self.assertEqual(chunk["lumis"], goldenMetaData[chunk["files"]]["lumis"], "Error: Lumis in chunk is wrong.") self.assertEqual(chunk["locations"], goldenMetaData[chunk["files"]]["locations"], "Error: Locations in chunk is wrong.") self.assertEqual(chunk["events"], goldenMetaData[chunk["files"]]["events"], "Error: Events in chunk is wrong.") del goldenMetaData[chunk["files"]] chunkFiles = dcs.getChunkFiles("ACDCTest", "/ACDCTest/reco", chunk["offset"], chunk["files"]) self.assertTrue(chunk["files"] in goldenFiles.keys(), "Error: Extra chunk found.") goldenChunkFiles = goldenFiles[chunk["files"]] self.assertEqual(len(chunkFiles), len(goldenChunkFiles)) for chunkFile in chunkFiles: foundFile = None for goldenChunkFile in goldenChunkFiles: if chunkFile["lfn"] == goldenChunkFile["lfn"]: foundFile = goldenChunkFile break self.assertTrue(foundFile != None, "Error: Missing chunk file: %s, %s" % (chunkFiles, goldenChunkFiles)) self.assertEqual(foundFile["parents"], chunkFile["parents"], "Error: File parents should match.") self.assertEqual(foundFile["merged"], chunkFile["merged"], "Error: File merged status should match.") self.assertEqual(foundFile["locations"], chunkFile["locations"], "Error: File locations should match.") self.assertEqual(foundFile["events"], chunkFile["events"], "Error: File locations should match: %s" % chunk["files"]) self.assertEqual(foundFile["size"], chunkFile["size"], "Error: File locations should match.") self.assertEqual(len(foundFile["runs"]), len(chunkFile["runs"]), "Error: Wrong number of runs.") for run in foundFile["runs"]: runMatch = False for chunkRun in chunkFile["runs"]: if chunkRun.run == run.run and chunkRun.lumis == run.lumis: runMatch = True break self.assertTrue(runMatch, "Error: Run information is wrong.") del goldenFiles[chunk["files"]] singleChunk = dcs.singleChunkFileset("ACDCTest", "/ACDCTest/reco") self.assertEqual(singleChunk, {"offset" : 0, "files" : 11, "events" : 11264, "lumis" : 22, "locations" : set(["castor.cern.ch", "cmssrm.fnal.gov", "srm.ral.uk"])}, "Error: Single chunk metadata is wrong") return
def testChunking(self): """ _testChunking_ Insert a workload and files that have several distinct sets of locations. Verify that the chunks are created correctly and that they only groups files that have the same set of locations. Also verify that the chunks are pulled out of ACDC correctly. """ dcs = DataCollectionService(url=self.testInit.couchUrl, database="wmcore-acdc-datacollectionsvc") testFileA = File(lfn=makeUUID(), size=1024, events=1024) testFileA.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"]) testFileA.addRun(Run(1, 1, 2)) testFileB = File(lfn=makeUUID(), size=1024, events=1024) testFileB.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"]) testFileB.addRun(Run(1, 3, 4)) testFileC = File(lfn=makeUUID(), size=1024, events=1024) testFileC.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"]) testFileC.addRun(Run(1, 5, 6)) testJobA = self.getMinimalJob() testJobA.addFile(testFileA) testJobA.addFile(testFileB) testJobA.addFile(testFileC) testFileD = File(lfn=makeUUID(), size=1024, events=1024) testFileD.setLocation(["cmssrm.fnal.gov"]) testFileD.addRun(Run(2, 1, 2)) testFileE = File(lfn=makeUUID(), size=1024, events=1024) testFileE.setLocation(["cmssrm.fnal.gov"]) testFileE.addRun(Run(2, 3, 4)) testJobB = self.getMinimalJob() testJobB.addFile(testFileD) testJobB.addFile(testFileE) testFileF = File(lfn=makeUUID(), size=1024, events=1024, parents={"/some/parent/F"}) testFileF.setLocation( ["cmssrm.fnal.gov", "castor.cern.ch", "srm.ral.uk"]) testFileF.addRun(Run(3, 1, 2)) testFileG = File(lfn=makeUUID(), size=1024, events=1024, parents={"/some/parent/G"}) testFileG.setLocation( ["cmssrm.fnal.gov", "castor.cern.ch", "srm.ral.uk"]) testFileG.addRun(Run(3, 3, 4)) testFileH = File(lfn=makeUUID(), size=1024, events=1024, parents={"/some/parent/H"}) testFileH.setLocation( ["cmssrm.fnal.gov", "castor.cern.ch", "srm.ral.uk"]) testFileH.addRun(Run(3, 5, 6)) testJobC = self.getMinimalJob() testJobC.addFile(testFileF) testJobC.addFile(testFileG) testJobC.addFile(testFileH) testFileI = File(lfn=makeUUID(), size=1024, events=1024, merged=True) testFileI.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"]) testFileI.addRun(Run(4, 1, 2)) testFileJ = File(lfn=makeUUID(), size=1024, events=1024, merged=True) testFileJ.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"]) testFileJ.addRun(Run(4, 3, 4)) testFileK = File(lfn=makeUUID(), size=1024, events=1024, merged=True) testFileK.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"]) testFileK.addRun(Run(4, 5, 6)) testJobD = self.getMinimalJob() testJobD.addFile(testFileI) testJobD.addFile(testFileJ) testJobD.addFile(testFileK) dcs.failedJobs([testJobA, testJobB, testJobC, testJobD]) chunks = dcs.chunkFileset("ACDCTest", "/ACDCTest/reco", chunkSize=5) self.assertEqual( len(chunks), 4, "Error: There should be four chunks: %s" % len(chunks)) goldenMetaData = { 1: { "lumis": 2, "locations": ["castor.cern.ch", "cmssrm.fnal.gov"], "events": 1024 }, 2: { "lumis": 4, "locations": ["cmssrm.fnal.gov"], "events": 2048 }, 3: { "lumis": 6, "locations": ["castor.cern.ch", "cmssrm.fnal.gov", "srm.ral.uk"], "events": 3072 }, 5: { "lumis": 10, "locations": ["castor.cern.ch", "cmssrm.fnal.gov"], "events": 5120 } } testFiles = [ testFileA, testFileB, testFileC, testFileI, testFileJ, testFileK ] lastFile = testFileA for testFile in testFiles: if lastFile["lfn"] < testFile["lfn"]: lastFile = testFile testFiles.remove(lastFile) goldenFiles = { 1: [lastFile], 2: [testFileD, testFileE], 3: [testFileF, testFileG, testFileH], 5: testFiles } for chunk in chunks: chunkMetaData = dcs.getChunkInfo("ACDCTest", "/ACDCTest/reco", chunk["offset"], chunk["files"]) self.assertEqual(chunkMetaData["files"], chunk["files"]) self.assertEqual(chunkMetaData["lumis"], chunk["lumis"]) self.assertEqual(chunkMetaData["events"], chunk["events"]) self.assertEqual(chunkMetaData["locations"], chunk["locations"]) self.assertTrue(chunk["files"] in goldenMetaData.keys(), "Error: Extra chunk found.") self.assertEqual(chunk["lumis"], goldenMetaData[chunk["files"]]["lumis"], "Error: Lumis in chunk is wrong.") self.assertEqual(chunk["locations"], goldenMetaData[chunk["files"]]["locations"], "Error: Locations in chunk is wrong.") self.assertEqual(chunk["events"], goldenMetaData[chunk["files"]]["events"], "Error: Events in chunk is wrong.") del goldenMetaData[chunk["files"]] chunkFiles = dcs.getChunkFiles("ACDCTest", "/ACDCTest/reco", chunk["offset"], chunk["files"]) self.assertTrue(chunk["files"] in goldenFiles.keys(), "Error: Extra chunk found.") goldenChunkFiles = goldenFiles[chunk["files"]] self.assertEqual(len(chunkFiles), len(goldenChunkFiles)) for chunkFile in chunkFiles: foundFile = None for goldenChunkFile in goldenChunkFiles: if chunkFile["lfn"] == goldenChunkFile["lfn"]: foundFile = goldenChunkFile break self.assertIsNotNone( foundFile, "Error: Missing chunk file: %s, %s" % (chunkFiles, goldenChunkFiles)) self.assertEqual(set(foundFile["parents"]), chunkFile["parents"], "Error: File parents should match.") self.assertEqual(foundFile["merged"], chunkFile["merged"], "Error: File merged status should match.") self.assertEqual(foundFile["locations"], chunkFile["locations"], "Error: File locations should match.") self.assertEqual(foundFile["events"], chunkFile["events"]) self.assertEqual(foundFile["size"], chunkFile["size"]) self.assertEqual(len(foundFile["runs"]), len(chunkFile["runs"]), "Error: Wrong number of runs.") for run in foundFile["runs"]: runMatch = False for chunkRun in chunkFile["runs"]: if chunkRun.run == run.run and chunkRun.lumis == run.lumis: runMatch = True break self.assertTrue(runMatch, "Error: Run information is wrong.") del goldenFiles[chunk["files"]] singleChunk = dcs.singleChunkFileset("ACDCTest", "/ACDCTest/reco") self.assertEqual( singleChunk, { "offset": 0, "files": 11, "events": 11264, "lumis": 22, "locations": {"castor.cern.ch", "cmssrm.fnal.gov", "srm.ral.uk"} }, "Error: Single chunk metadata is wrong") return