def getBlock(self, newFile, dasBlocks, location, das): """ _getBlock_ This gets a new block by checking whether there is a pre-existant block. """ for blockName in dasBlocks: block = self.blockCache.get(blockName) if not self.isBlockOpen(newFile=newFile, block=block): # Then the block can't fit the file # Close the block block.status = 'Pending' self.blockCache[blockName] = block dasBlocks.remove(blockName) else: # Load it out of the cache currentBlock = blockName return currentBlock # If there are no open blocks # Or we run out of blocks blockname = '%s#%s' % (newFile['datasetPath'], makeUUID()) newBlock = DBSBlock(name=blockname, location=location, das=das) self.addNewBlock(block=newBlock) dasBlocks.append(blockname) return newBlock
def getBlock(self, newFile, location, das, skipOpenCheck=False): """ _getBlock_ Retrieve a block is one exists and is open. If no open block is found create and return a new one. """ if das in self.dasCache.keys() and location in self.dasCache[das].keys( ): for blockName in self.dasCache[das][location]: block = self.blockCache.get(blockName) if not self.isBlockOpen(newFile=newFile, block=block) and not skipOpenCheck: # Block isn't open anymore. Mark it as pending so that it gets # uploaded. block.setPendingAndCloseBlock() self.blockCache[blockName] = block else: return block # A suitable open block does not exist. Create a new one. blockname = "%s#%s" % (newFile["datasetPath"], makeUUID()) newBlock = DBSBlock(name=blockname, location=location, das=das, workflow=newFile["workflow"]) self.addNewBlock(block=newBlock) return newBlock
def testXSetBlock(self): """ _testSetBlock_ Verify that the [Set|Get]Block DAOs work correctly. """ myThread = threading.currentThread() dataset = "/Cosmics/CRUZET09-PromptReco-v1/RECO" uploadFactory = DAOFactory(package = "WMComponent.DBS3Buffer", logger = myThread.logger, dbinterface = myThread.dbi) datasetAction = uploadFactory(classname = "NewDataset") createAction = uploadFactory(classname = "CreateBlocks") datasetAction.execute(datasetPath = dataset) newBlock = DBSBlock(name = "someblockname", location = "se1.cern.ch", das = None, workflow = None) newBlock.setDataset(dataset, 'data', 'VALID') createAction.execute(blocks = [newBlock]) setBlockAction = self.daoFactory(classname = "DBSBufferFiles.SetBlock") getBlockAction = self.daoFactory(classname = "DBSBufferFiles.GetBlock") testFile = DBSBufferFile(lfn = "/this/is/a/lfn", size = 1024, events = 10, locations = "se1.fnal.gov") testFile.setAlgorithm(appName = "cmsRun", appVer = "CMSSW_2_1_8", appFam = "RECO", psetHash = "GIBBERISH", configContent = "MOREGIBBERISH") testFile.setDatasetPath(dataset) testFile.create() setBlockAction.execute(lfn = testFile["lfn"], blockName = "someblockname") blockName = getBlockAction.execute(lfn = testFile["lfn"]) assert blockName[0][0] == "someblockname", \ "Error: Incorrect block returned: %s" % blockName[0][0] return
def stuffDatabase(self): """ _stuffDatabase_ Fill the dbsbuffer with some files and blocks. We'll insert a total of 5 files spanning two blocks. There will be a total of two datasets inserted into the database. All files will be already in GLOBAL and in_phedex """ myThread = threading.currentThread() buffer3Factory = DAOFactory(package = "WMComponent.DBS3Buffer", logger = myThread.logger, dbinterface = myThread.dbi) insertWorkflow = buffer3Factory(classname = "InsertWorkflow") insertWorkflow.execute("BogusRequestA", "BogusTask", 0, 0, 0, 0) insertWorkflow.execute("BogusRequestB", "BogusTask", 0, 0, 0, 0) checksums = {"adler32": "1234", "cksum": "5678"} testFileA = DBSBufferFile(lfn = makeUUID(), size = 1024, events = 10, checksums = checksums, locations = set(["srm-cms.cern.ch"])) testFileA.setAlgorithm(appName = "cmsRun", appVer = "CMSSW_2_1_8", appFam = "RECO", psetHash = "GIBBERISH", configContent = "MOREGIBBERISH") testFileA.setDatasetPath(self.testDatasetA) testFileA.addRun(Run(2, *[45])) testFileA.create() testFileB = DBSBufferFile(lfn = makeUUID(), size = 1024, events = 10, checksums = checksums, locations = set(["srm-cms.cern.ch"])) testFileB.setAlgorithm(appName = "cmsRun", appVer = "CMSSW_2_1_8", appFam = "RECO", psetHash = "GIBBERISH", configContent = "MOREGIBBERISH") testFileB.setDatasetPath(self.testDatasetA) testFileB.addRun(Run(2, *[45])) testFileB.create() testFileC = DBSBufferFile(lfn = makeUUID(), size = 1024, events = 10, checksums = checksums, locations = set(["srm-cms.cern.ch"])) testFileC.setAlgorithm(appName = "cmsRun", appVer = "CMSSW_2_1_8", appFam = "RECO", psetHash = "GIBBERISH", configContent = "MOREGIBBERISH") testFileC.setDatasetPath(self.testDatasetA) testFileC.addRun(Run(2, *[45])) testFileC.create() self.testFilesA.append(testFileA) self.testFilesA.append(testFileB) self.testFilesA.append(testFileC) testFileD = DBSBufferFile(lfn = makeUUID(), size = 1024, events = 10, checksums = checksums, locations = set(["srm-cms.cern.ch"])) testFileD.setAlgorithm(appName = "cmsRun", appVer = "CMSSW_2_1_8", appFam = "RECO", psetHash = "GIBBERISH", configContent = "MOREGIBBERISH") testFileD.setDatasetPath(self.testDatasetB) testFileD.addRun(Run(2, *[45])) testFileD.create() testFileE = DBSBufferFile(lfn = makeUUID(), size = 1024, events = 10, checksums = checksums, locations = set(["srm-cms.cern.ch"])) testFileE.setAlgorithm(appName = "cmsRun", appVer = "CMSSW_2_1_8", appFam = "RECO", psetHash = "GIBBERISH", configContent = "MOREGIBBERISH") testFileE.setDatasetPath(self.testDatasetB) testFileE.addRun(Run(2, *[45])) testFileE.create() self.testFilesB.append(testFileD) self.testFilesB.append(testFileE) uploadFactory = DAOFactory(package = "WMComponent.DBS3Buffer", logger = myThread.logger, dbinterface = myThread.dbi) datasetAction = uploadFactory(classname = "NewDataset") createAction = uploadFactory(classname = "CreateBlocks") datasetAction.execute(datasetPath = self.testDatasetA) datasetAction.execute(datasetPath = self.testDatasetB) self.blockAName = self.testDatasetA + "#" + makeUUID() self.blockBName = self.testDatasetB + "#" + makeUUID() newBlockA = DBSBlock(name = self.blockAName, location = "srm-cms.cern.ch", das = None, workflow = None) newBlockA.setDataset(self.testDatasetA, 'data', 'VALID') newBlockA.status = 'Closed' newBlockB = DBSBlock(name = self.blockBName, location = "srm-cms.cern.ch", das = None, workflow = None) newBlockB.setDataset(self.testDatasetB, 'data', 'VALID') newBlockB.status = 'Closed' createAction.execute(blocks = [newBlockA, newBlockB]) bufferFactory = DAOFactory(package = "WMComponent.DBS3Buffer", logger = myThread.logger, dbinterface = myThread.dbi) setBlock = bufferFactory(classname = "DBSBufferFiles.SetBlock") setBlock.execute(testFileA["lfn"], self.blockAName) setBlock.execute(testFileB["lfn"], self.blockAName) setBlock.execute(testFileC["lfn"], self.blockAName) setBlock.execute(testFileD["lfn"], self.blockBName) setBlock.execute(testFileE["lfn"], self.blockBName) fileStatus = bufferFactory(classname = "DBSBufferFiles.SetStatus") fileStatus.execute(testFileA["lfn"], "GLOBAL") fileStatus.execute(testFileB["lfn"], "GLOBAL") fileStatus.execute(testFileC["lfn"], "GLOBAL") fileStatus.execute(testFileD["lfn"], "GLOBAL") fileStatus.execute(testFileE["lfn"], "GLOBAL") phedexStatus = bufferFactory(classname = "DBSBufferFiles.SetPhEDExStatus") phedexStatus.execute(testFileA["lfn"], 1) phedexStatus.execute(testFileB["lfn"], 1) phedexStatus.execute(testFileC["lfn"], 1) phedexStatus.execute(testFileD["lfn"], 1) phedexStatus.execute(testFileE["lfn"], 1) associateWorkflow = buffer3Factory(classname = "DBSBufferFiles.AssociateWorkflowToFile") associateWorkflow.execute(testFileA["lfn"], "BogusRequestA", "BogusTask") associateWorkflow.execute(testFileB["lfn"], "BogusRequestA", "BogusTask") associateWorkflow.execute(testFileC["lfn"], "BogusRequestA", "BogusTask") associateWorkflow.execute(testFileD["lfn"], "BogusRequestB", "BogusTask") associateWorkflow.execute(testFileE["lfn"], "BogusRequestB", "BogusTask") # Make the desired subscriptions insertSubAction = buffer3Factory(classname = "NewSubscription") datasetA = DBSBufferDataset(path = self.testDatasetA) datasetB = DBSBufferDataset(path = self.testDatasetB) workload = WMWorkloadHelper() workload.load(os.path.join(getTestBase(), 'WMComponent_t/PhEDExInjector_t/specs/TestWorkload.pkl')) insertSubAction.execute(datasetA.exists(), workload.getSubscriptionInformation()[self.testDatasetA]) insertSubAction.execute(datasetB.exists(), workload.getSubscriptionInformation()[self.testDatasetB]) return
def testDualUpload(self): """ _testDualUpload_ Verify that the dual upload mode works correctly. """ self.dbsApi = DbsApi(url = self.dbsUrl) config = self.getConfig(dbs3UploadOnly = True) dbsUploader = DBSUploadPoller(config = config) dbsUtil = DBSBufferUtil() # First test verifies that uploader will poll and then not do anything # as the database is empty. dbsUploader.algorithm() acqEra = "Summer%s" % (int(time.time())) parentFiles = self.createParentFiles(acqEra) (moreParentFiles, childFiles) = \ self.createFilesWithChildren(parentFiles, acqEra) allFiles = parentFiles + moreParentFiles allBlocks = [] for i in range(4): blockName = parentFiles[0]["datasetPath"] + "#" + makeUUID() dbsBlock = DBSBlock(blockName, "malpaquet", 1) dbsBlock.status = "Open" dbsUtil.createBlocks([dbsBlock]) for file in allFiles[i * 5 : (i * 5) + 5]: dbsBlock.addFile(file) dbsUtil.setBlockFiles({"block": blockName, "filelfn": file["lfn"]}) if i < 2: dbsBlock.status = "InDBS" dbsUtil.updateBlocks([dbsBlock]) dbsUtil.updateFileStatus([dbsBlock], "InDBS") allBlocks.append(dbsBlock) blockName = childFiles[0]["datasetPath"] + "#" + makeUUID() dbsBlock = DBSBlock(blockName, "malpaquet", 1) dbsBlock.status = "InDBS" dbsUtil.createBlocks([dbsBlock]) for file in childFiles: dbsBlock.addFile(file) dbsUtil.setBlockFiles({"block": blockName, "filelfn": file["lfn"]}) dbsUtil.updateFileStatus([dbsBlock], "InDBS") dbsUploader.algorithm() time.sleep(5) dbsUploader.algorithm() time.sleep(5) self.verifyData(parentFiles[0]["datasetPath"], parentFiles) # Change the status of the rest of the parent blocks so we can upload # them and the children. for dbsBlock in allBlocks: dbsBlock.status = "InDBS" dbsUtil.updateBlocks([dbsBlock]) dbsUploader.algorithm() time.sleep(5) self.verifyData(parentFiles[0]["datasetPath"], parentFiles + moreParentFiles) # Run the uploader one more time to upload the children. dbsUploader.algorithm() time.sleep(5) self.verifyData(childFiles[0]["datasetPath"], childFiles) return
try: loadedBlocks = self.dbsUtil.loadBlocks(blocksToLoad, self.dbs3UploadOnly) logging.info("Loaded blocks: %s" % loadedBlocks) except WMException: raise except Exception, ex: msg = "Unhandled exception while loading blocks.\n" msg += str(ex) logging.error(msg) logging.debug("Blocks to load: %s\n" % blocksToLoad) raise DBSUploadException(msg) for blockInfo in loadedBlocks: das = blockInfo['DatasetAlgo'] loc = blockInfo['origin_site_name'] block = DBSBlock(name = blockInfo['block_name'], location = loc, das = das) block.FillFromDBSBuffer(blockInfo) blockname = block.getName() # Now we have to load files... try: files = self.dbsUtil.loadFilesByBlock(blockname = blockname) logging.info("Have %i files for block %s" % (len(files), blockname)) except WMException: raise except Exception, ex: msg = "Unhandled exception while loading files for existing blocks.\n" msg += str(ex) logging.error(msg) logging.debug("Blocks being loaded: %s\n" % blockname) raise DBSUploadException(msg)
def testDualUpload(self): """ _testDualUpload_ Verify that the dual upload mode works correctly. """ self.dbsApi = DbsApi(url=self.dbsUrl) dbsUploader = DBSUploadPoller(config=config) dbsUtil = DBSBufferUtil() # First test verifies that uploader will poll and then not do anything # as the database is empty. dbsUploader.algorithm() acqEra = "Summer%s" % (int(time.time())) parentFiles = self.createParentFiles(acqEra) (moreParentFiles, childFiles) = \ self.createFilesWithChildren(parentFiles, acqEra) allFiles = parentFiles + moreParentFiles allBlocks = [] for i in range(4): DBSBufferDataset(parentFiles[0]["datasetPath"]).create() blockName = parentFiles[0]["datasetPath"] + "#" + makeUUID() dbsBlock = DBSBlock(blockName, location="malpaquet", das=None, workflow=None) dbsBlock.status = "Open" dbsBlock.setDataset(parentFiles[0]["datasetPath"], 'data', 'VALID') dbsUtil.createBlocks([dbsBlock]) for file in allFiles[i * 5:(i * 5) + 5]: dbsBlock.addFile(file, 'data', 'VALID') dbsUtil.setBlockFiles({ "block": blockName, "filelfn": file["lfn"] }) if i < 2: dbsBlock.status = "InDBS" dbsUtil.updateBlocks([dbsBlock]) dbsUtil.updateFileStatus([dbsBlock], "InDBS") allBlocks.append(dbsBlock) DBSBufferDataset(childFiles[0]["datasetPath"]).create() blockName = childFiles[0]["datasetPath"] + "#" + makeUUID() dbsBlock = DBSBlock(blockName, location="malpaquet", das=None, workflow=None) dbsBlock.status = "InDBS" dbsBlock.setDataset(childFiles[0]["datasetPath"], 'data', 'VALID') dbsUtil.createBlocks([dbsBlock]) for file in childFiles: dbsBlock.addFile(file, 'data', 'VALID') dbsUtil.setBlockFiles({"block": blockName, "filelfn": file["lfn"]}) dbsUtil.updateFileStatus([dbsBlock], "InDBS") dbsUploader.algorithm() time.sleep(5) dbsUploader.algorithm() time.sleep(5) self.verifyData(parentFiles[0]["datasetPath"], parentFiles) # Change the status of the rest of the parent blocks so we can upload # them and the children. for dbsBlock in allBlocks: dbsBlock.status = "InDBS" dbsUtil.updateBlocks([dbsBlock]) dbsUploader.algorithm() time.sleep(5) self.verifyData(parentFiles[0]["datasetPath"], parentFiles + moreParentFiles) # Run the uploader one more time to upload the children. dbsUploader.algorithm() time.sleep(5) self.verifyData(childFiles[0]["datasetPath"], childFiles) return
def stuffDatabase(self, spec="TestWorkload.pkl"): """ _stuffDatabase_ Fill the dbsbuffer with some files and blocks. We'll insert a total of 5 files spanning two blocks. There will be a total of two datasets inserted into the datbase. We'll inject files with the location set as an SE name as well as a PhEDEx node name as well. """ myThread = threading.currentThread() buffer3Factory = DAOFactory(package="WMComponent.DBS3Buffer", logger=myThread.logger, dbinterface=myThread.dbi) insertWorkflow = buffer3Factory(classname="InsertWorkflow") insertWorkflow.execute("BogusRequest", "BogusTask", 0, 0, 0, 0) checksums = {"adler32": "1234", "cksum": "5678"} testFileA = DBSBufferFile(lfn=makeUUID(), size=1024, events=10, checksums=checksums, locations=set(["srm-cms.cern.ch"])) testFileA.setAlgorithm(appName="cmsRun", appVer="CMSSW_2_1_8", appFam="RECO", psetHash="GIBBERISH", configContent="MOREGIBBERISH") testFileA.setDatasetPath(self.testDatasetA) testFileA.addRun(Run(2, *[45])) testFileA.create() testFileB = DBSBufferFile(lfn=makeUUID(), size=1024, events=10, checksums=checksums, locations=set(["srm-cms.cern.ch"])) testFileB.setAlgorithm(appName="cmsRun", appVer="CMSSW_2_1_8", appFam="RECO", psetHash="GIBBERISH", configContent="MOREGIBBERISH") testFileB.setDatasetPath(self.testDatasetA) testFileB.addRun(Run(2, *[45])) testFileB.create() testFileC = DBSBufferFile(lfn=makeUUID(), size=1024, events=10, checksums=checksums, locations=set(["srm-cms.cern.ch"])) testFileC.setAlgorithm(appName="cmsRun", appVer="CMSSW_2_1_8", appFam="RECO", psetHash="GIBBERISH", configContent="MOREGIBBERISH") testFileC.setDatasetPath(self.testDatasetA) testFileC.addRun(Run(2, *[45])) testFileC.create() self.testFilesA.append(testFileA) self.testFilesA.append(testFileB) self.testFilesA.append(testFileC) testFileD = DBSBufferFile(lfn=makeUUID(), size=1024, events=10, checksums=checksums, locations=set(["srm-cms.cern.ch"])) testFileD.setAlgorithm(appName="cmsRun", appVer="CMSSW_2_1_8", appFam="RECO", psetHash="GIBBERISH", configContent="MOREGIBBERISH") testFileD.setDatasetPath(self.testDatasetB) testFileD.addRun(Run(2, *[45])) testFileD.create() testFileE = DBSBufferFile(lfn=makeUUID(), size=1024, events=10, checksums=checksums, locations=set(["srm-cms.cern.ch"])) testFileE.setAlgorithm(appName="cmsRun", appVer="CMSSW_2_1_8", appFam="RECO", psetHash="GIBBERISH", configContent="MOREGIBBERISH") testFileE.setDatasetPath(self.testDatasetB) testFileE.addRun(Run(2, *[45])) testFileE.create() self.testFilesB.append(testFileD) self.testFilesB.append(testFileE) uploadFactory = DAOFactory(package="WMComponent.DBS3Buffer", logger=myThread.logger, dbinterface=myThread.dbi) datasetAction = uploadFactory(classname="NewDataset") createAction = uploadFactory(classname="CreateBlocks") datasetAction.execute(datasetPath=self.testDatasetA) datasetAction.execute(datasetPath=self.testDatasetB) self.blockAName = self.testDatasetA + "#" + makeUUID() self.blockBName = self.testDatasetB + "#" + makeUUID() newBlockA = DBSBlock(name=self.blockAName, location="srm-cms.cern.ch", das=None, workflow=None) newBlockA.setDataset(self.testDatasetA, 'data', 'VALID') newBlockA.status = 'Closed' newBlockB = DBSBlock(name=self.blockBName, location="srm-cms.cern.ch", das=None, workflow=None) newBlockB.setDataset(self.testDatasetB, 'data', 'VALID') newBlockB.status = 'Closed' createAction.execute(blocks=[newBlockA, newBlockB]) bufferFactory = DAOFactory(package="WMComponent.DBSBuffer.Database", logger=myThread.logger, dbinterface=myThread.dbi) setBlock = bufferFactory(classname="DBSBufferFiles.SetBlock") setBlock.execute(testFileA["lfn"], self.blockAName) setBlock.execute(testFileB["lfn"], self.blockAName) setBlock.execute(testFileC["lfn"], self.blockAName) setBlock.execute(testFileD["lfn"], self.blockBName) setBlock.execute(testFileE["lfn"], self.blockBName) fileStatus = bufferFactory(classname="DBSBufferFiles.SetStatus") fileStatus.execute(testFileA["lfn"], "LOCAL") fileStatus.execute(testFileB["lfn"], "LOCAL") fileStatus.execute(testFileC["lfn"], "LOCAL") fileStatus.execute(testFileD["lfn"], "LOCAL") fileStatus.execute(testFileE["lfn"], "LOCAL") associateWorkflow = buffer3Factory( classname="DBSBufferFiles.AssociateWorkflowToFile") associateWorkflow.execute(testFileA["lfn"], "BogusRequest", "BogusTask") associateWorkflow.execute(testFileB["lfn"], "BogusRequest", "BogusTask") associateWorkflow.execute(testFileC["lfn"], "BogusRequest", "BogusTask") associateWorkflow.execute(testFileD["lfn"], "BogusRequest", "BogusTask") associateWorkflow.execute(testFileE["lfn"], "BogusRequest", "BogusTask") return
try: loadedBlocks = self.dbsUtil.loadBlocks(blocknames=blocksToLoad) logging.info("Loaded blocks: %s" % loadedBlocks) except WMException: raise except Exception, ex: msg = "Unhandled exception while loading blocks.\n" msg += str(ex) logging.error(msg) logging.debug("Blocks to load: %s\n" % blocksToLoad) raise DBSUploadException(msg) for blockInfo in loadedBlocks: das = blockInfo["DatasetAlgo"] loc = blockInfo["origin_site_name"] block = DBSBlock(name=blockInfo["block_name"], location=loc, das=das) block.FillFromDBSBuffer(blockInfo) blockname = block.getName() # Now we have to load files... try: files = self.dbsUtil.loadFilesByBlock(blockname=blockname) logging.info("Have %i files for block %s" % (len(files), blockname)) except WMException: raise except Exception, ex: msg = "Unhandled exception while loading files for existing blocks.\n" msg += str(ex) logging.error(msg) logging.debug("Blocks being loaded: %s\n" % blockname) raise DBSUploadException(msg)
logging.info("Loaded blocks: %s" % loadedBlocks) except WMException: raise except Exception, ex: msg = "Unhandled exception while loading blocks.\n" msg += str(ex) logging.error(msg) logging.debug("Blocks to load: %s\n" % blocksToLoad) raise DBSUploadException(msg) for blockInfo in loadedBlocks: das = blockInfo['DatasetAlgo'] loc = blockInfo['origin_site_name'] workflow = blockInfo['workflow'] block = DBSBlock(name=blockInfo['block_name'], location=loc, das=das, workflow=workflow) block.FillFromDBSBuffer(blockInfo) blockname = block.getName() # Now we have to load files... try: files = self.dbsUtil.loadFilesByBlock(blockname=blockname) logging.info("Have %i files for block %s" % (len(files), blockname)) except WMException: raise except Exception, ex: msg = "Unhandled exception while loading files for existing blocks.\n" msg += str(ex) logging.error(msg)
def loadFiles(self): """ _loadFiles_ Load all files that need to be loaded. I will do this by DAS for now to break the monstrous calls down into smaller chunks. """ # Grab all the Dataset-Algo combindations dasList = self.dbsUtil.findUploadableDAS() if len(dasList) < 1: # Then there's nothing to do return [] readyBlocks = [] for dasInfo in dasList: dasID = dasInfo['DAS_ID'] # Get the files try: loadedFiles = self.dbsUtil.findUploadableFilesByDAS(das=dasID) except WMException: raise except Exception, ex: msg = "Unhandled exception while loading uploadable files for DAS.\n" msg += str(ex) logging.error(msg) logging.debug("DAS being loaded: %s\n" % dasID) raise DBSUploadException(msg) # Get the blocks if not dasID in self.dasCache.keys(): # Then we have a new DAS # Add it self.dasCache[dasID] = {} dasBlocks = self.dasCache.get(dasID) # Sort the files and blocks by location fileDict = sortListByKey(input=loadedFiles, key='locations') # Now add each file for location in fileDict.keys(): files = fileDict.get(location) if len(files) < 1: # Nothing to do here continue dasBlocks = self.dasCache[dasID].get(location, []) if len(dasBlocks) > 0: # Load from cache currentBlock = self.blockCache.get(dasBlocks[0]) else: blockname = '%s#%s' % (files[0]['datasetPath'], makeUUID()) currentBlock = DBSBlock(name=blockname, location=location, das=dasID) # Add the era info currentBlock.setAcquisitionEra( era=dasInfo['AcquisitionEra']) currentBlock.setProcessingVer( procVer=dasInfo['ProcessingVer']) self.addNewBlock(block=currentBlock) dasBlocks.append(currentBlock.getName()) for newFile in files: if not newFile.get('block', 1) == None: # Then this file already has a block # It should be accounted for somewhere # Or loaded with the block continue # Check if we can put files in this block if not self.isBlockOpen(newFile=newFile, block=currentBlock): # Then we have to close the block and get a new one currentBlock.status = 'Pending' readyBlocks.append(currentBlock) dasBlocks.remove(currentBlock.getName()) currentBlock = self.getBlock(newFile=newFile, dasBlocks=dasBlocks, location=location, das=dasID) currentBlock.setAcquisitionEra( era=dasInfo['AcquisitionEra']) currentBlock.setProcessingVer( procVer=dasInfo['ProcessingVer']) # Now deal with the file currentBlock.addFile(dbsFile=newFile) self.filesToUpdate.append({ 'filelfn': newFile['lfn'], 'block': currentBlock.getName() }) # Done with the location readyBlocks.append(currentBlock)
def stuffDatabase(self, spec = "TestWorkload.pkl"): """ _stuffDatabase_ Fill the dbsbuffer with some files and blocks. We'll insert a total of 5 files spanning two blocks. There will be a total of two datasets inserted into the datbase. We'll inject files with the location set as an SE name as well as a PhEDEx node name as well. """ myThread = threading.currentThread() buffer3Factory = DAOFactory(package = "WMComponent.DBS3Buffer", logger = myThread.logger, dbinterface = myThread.dbi) insertWorkflow = buffer3Factory(classname = "InsertWorkflow") insertWorkflow.execute("BogusRequest", "BogusTask", 0, 0, 0, 0) checksums = {"adler32": "1234", "cksum": "5678"} testFileA = DBSBufferFile(lfn = makeUUID(), size = 1024, events = 10, checksums = checksums, locations = set(["srm-cms.cern.ch"])) testFileA.setAlgorithm(appName = "cmsRun", appVer = "CMSSW_2_1_8", appFam = "RECO", psetHash = "GIBBERISH", configContent = "MOREGIBBERISH") testFileA.setDatasetPath(self.testDatasetA) testFileA.addRun(Run(2, *[45])) testFileA.create() testFileB = DBSBufferFile(lfn = makeUUID(), size = 1024, events = 10, checksums = checksums, locations = set(["srm-cms.cern.ch"])) testFileB.setAlgorithm(appName = "cmsRun", appVer = "CMSSW_2_1_8", appFam = "RECO", psetHash = "GIBBERISH", configContent = "MOREGIBBERISH") testFileB.setDatasetPath(self.testDatasetA) testFileB.addRun(Run(2, *[45])) testFileB.create() testFileC = DBSBufferFile(lfn = makeUUID(), size = 1024, events = 10, checksums = checksums, locations = set(["srm-cms.cern.ch"])) testFileC.setAlgorithm(appName = "cmsRun", appVer = "CMSSW_2_1_8", appFam = "RECO", psetHash = "GIBBERISH", configContent = "MOREGIBBERISH") testFileC.setDatasetPath(self.testDatasetA) testFileC.addRun(Run(2, *[45])) testFileC.create() self.testFilesA.append(testFileA) self.testFilesA.append(testFileB) self.testFilesA.append(testFileC) testFileD = DBSBufferFile(lfn = makeUUID(), size = 1024, events = 10, checksums = checksums, locations = set(["srm-cms.cern.ch"])) testFileD.setAlgorithm(appName = "cmsRun", appVer = "CMSSW_2_1_8", appFam = "RECO", psetHash = "GIBBERISH", configContent = "MOREGIBBERISH") testFileD.setDatasetPath(self.testDatasetB) testFileD.addRun(Run(2, *[45])) testFileD.create() testFileE = DBSBufferFile(lfn = makeUUID(), size = 1024, events = 10, checksums = checksums, locations = set(["srm-cms.cern.ch"])) testFileE.setAlgorithm(appName = "cmsRun", appVer = "CMSSW_2_1_8", appFam = "RECO", psetHash = "GIBBERISH", configContent = "MOREGIBBERISH") testFileE.setDatasetPath(self.testDatasetB) testFileE.addRun(Run(2, *[45])) testFileE.create() self.testFilesB.append(testFileD) self.testFilesB.append(testFileE) uploadFactory = DAOFactory(package = "WMComponent.DBS3Buffer", logger = myThread.logger, dbinterface = myThread.dbi) datasetAction = uploadFactory(classname = "NewDataset") createAction = uploadFactory(classname = "CreateBlocks") datasetAction.execute(datasetPath = self.testDatasetA) datasetAction.execute(datasetPath = self.testDatasetB) self.blockAName = self.testDatasetA + "#" + makeUUID() self.blockBName = self.testDatasetB + "#" + makeUUID() newBlockA = DBSBlock(name = self.blockAName, location = "srm-cms.cern.ch", das = None, workflow = None) newBlockA.setDataset(self.testDatasetA, 'data', 'VALID') newBlockA.status = 'Closed' newBlockB = DBSBlock(name = self.blockBName, location = "srm-cms.cern.ch", das = None, workflow = None) newBlockB.setDataset(self.testDatasetB, 'data', 'VALID') newBlockB.status = 'Closed' createAction.execute(blocks = [newBlockA, newBlockB]) bufferFactory = DAOFactory(package = "WMComponent.DBSBuffer.Database", logger = myThread.logger, dbinterface = myThread.dbi) setBlock = bufferFactory(classname = "DBSBufferFiles.SetBlock") setBlock.execute(testFileA["lfn"], self.blockAName) setBlock.execute(testFileB["lfn"], self.blockAName) setBlock.execute(testFileC["lfn"], self.blockAName) setBlock.execute(testFileD["lfn"], self.blockBName) setBlock.execute(testFileE["lfn"], self.blockBName) fileStatus = bufferFactory(classname = "DBSBufferFiles.SetStatus") fileStatus.execute(testFileA["lfn"], "LOCAL") fileStatus.execute(testFileB["lfn"], "LOCAL") fileStatus.execute(testFileC["lfn"], "LOCAL") fileStatus.execute(testFileD["lfn"], "LOCAL") fileStatus.execute(testFileE["lfn"], "LOCAL") associateWorkflow = buffer3Factory(classname = "DBSBufferFiles.AssociateWorkflowToFile") associateWorkflow.execute(testFileA["lfn"], "BogusRequest", "BogusTask") associateWorkflow.execute(testFileB["lfn"], "BogusRequest", "BogusTask") associateWorkflow.execute(testFileC["lfn"], "BogusRequest", "BogusTask") associateWorkflow.execute(testFileD["lfn"], "BogusRequest", "BogusTask") associateWorkflow.execute(testFileE["lfn"], "BogusRequest", "BogusTask") return
loadedBlocks = self.dbsUtil.loadBlocks(blocksToLoad, self.dbs3UploadOnly) logging.info("Loaded blocks: %s" % loadedBlocks) except WMException: raise except Exception, ex: msg = "Unhandled exception while loading blocks.\n" msg += str(ex) logging.error(msg) logging.debug("Blocks to load: %s\n" % blocksToLoad) raise DBSUploadException(msg) for blockInfo in loadedBlocks: das = blockInfo['DatasetAlgo'] loc = blockInfo['origin_site_name'] workflow = blockInfo['workflow'] block = DBSBlock(name = blockInfo['block_name'], location = loc, das = das, workflow = workflow) block.FillFromDBSBuffer(blockInfo) blockname = block.getName() # Now we have to load files... try: files = self.dbsUtil.loadFilesByBlock(blockname = blockname) logging.info("Have %i files for block %s" % (len(files), blockname)) except WMException: raise except Exception, ex: msg = "Unhandled exception while loading files for existing blocks.\n" msg += str(ex) logging.error(msg) logging.debug("Blocks being loaded: %s\n" % blockname) raise DBSUploadException(msg)
def loadBlocks(self): """ _loadBlocks_ Find all blocks; make sure they're in the cache """ openBlocks = self.dbsUtil.findOpenBlocks() logging.info("These are the openblocks: %s" % openBlocks) # Load them if we don't have them blocksToLoad = [] for block in openBlocks: if not block['blockname'] in self.blockCache.keys(): blocksToLoad.append(block['blockname']) # Now load the blocks try: loadedBlocks = self.dbsUtil.loadBlocks(blocksToLoad) logging.info("Loaded blocks: %s" % loadedBlocks) except WMException: raise except Exception as ex: msg = "Unhandled exception while loading blocks.\n" msg += str(ex) logging.error(msg) logging.debug("Blocks to load: %s\n" % blocksToLoad) raise DBSUploadException(msg) for blockInfo in loadedBlocks: das = blockInfo['DatasetAlgo'] loc = blockInfo['origin_site_name'] workflow = blockInfo['workflow'] block = DBSBlock(name = blockInfo['block_name'], location = loc, das = das, workflow = workflow) block.FillFromDBSBuffer(blockInfo) blockname = block.getName() # Now we have to load files... try: files = self.dbsUtil.loadFilesByBlock(blockname = blockname) logging.info("Have %i files for block %s" % (len(files), blockname)) except WMException: raise except Exception as ex: msg = "Unhandled exception while loading files for existing blocks.\n" msg += str(ex) logging.error(msg) logging.debug("Blocks being loaded: %s\n" % blockname) raise DBSUploadException(msg) # Add the loaded files to the block for file in files: block.addFile(file, self.datasetType, self.primaryDatasetType) # Add to the cache self.addNewBlock(block = block) # All blocks should now be loaded and present # in both the block cache (which has all the info) # and the dasCache (which is a list of name pointers # to the keys in the block cache). return
def loadBlocks(self): """ _loadBlocks_ Find all blocks; make sure they're in the cache """ openBlocks = self.dbsUtil.findOpenBlocks(self.dbs3UploadOnly) logging.info("These are the openblocks: %s" % openBlocks) # Load them if we don't have them blocksToLoad = [] for block in openBlocks: if not block['blockname'] in self.blockCache.keys(): blocksToLoad.append(block['blockname']) # Now load the blocks try: loadedBlocks = self.dbsUtil.loadBlocks(blocksToLoad, self.dbs3UploadOnly) logging.info("Loaded blocks: %s" % loadedBlocks) except WMException: raise except Exception as ex: msg = "Unhandled exception while loading blocks.\n" msg += str(ex) logging.error(msg) logging.debug("Blocks to load: %s\n" % blocksToLoad) raise DBSUploadException(msg) for blockInfo in loadedBlocks: das = blockInfo['DatasetAlgo'] loc = blockInfo['origin_site_name'] workflow = blockInfo['workflow'] block = DBSBlock(name = blockInfo['block_name'], location = loc, das = das, workflow = workflow) block.FillFromDBSBuffer(blockInfo) blockname = block.getName() # Now we have to load files... try: files = self.dbsUtil.loadFilesByBlock(blockname = blockname) logging.info("Have %i files for block %s" % (len(files), blockname)) except WMException: raise except Exception as ex: msg = "Unhandled exception while loading files for existing blocks.\n" msg += str(ex) logging.error(msg) logging.debug("Blocks being loaded: %s\n" % blockname) raise DBSUploadException(msg) # Add the loaded files to the block for file in files: block.addFile(file, self.datasetType, self.primaryDatasetType) # Add to the cache self.addNewBlock(block = block) # All blocks should now be loaded and present # in both the block cache (which has all the info) # and the dasCache (which is a list of name pointers # to the keys in the block cache). return
# Now load the blocks try: loadedBlocks = self.dbsUtil.loadBlocks(blocknames = blocksToLoad) except WMException: raise except Exception, ex: msg = "Unhandled exception while loading blocks.\n" msg += str(ex) logging.error(msg) logging.debug("Blocks to load: %s\n" % blocksToLoad) raise DBSUploadException(msg) for blockInfo in loadedBlocks: das = blockInfo['DatasetAlgo'] loc = blockInfo['location'] block = DBSBlock(name = blockInfo['Name'], location = loc, das = das) block.FillFromDBSBuffer(blockInfo) blockname = block.getName() # Now we have to load files... try: files = self.dbsUtil.loadFilesByBlock(blockname = blockname) except WMException: raise except Exception, ex: msg = "Unhandled exception while loading files for existing blocks.\n" msg += str(ex) logging.error(msg) logging.debug("Blocks being loaded: %s\n" % blockname) raise DBSUploadException(msg) for file in files:
def loadFiles(self): """ _loadFiles_ Load all files that need to be loaded. I will do this by DAS for now to break the monstrous calls down into smaller chunks. """ # Grab all the Dataset-Algo combindations dasList = self.dbsUtil.findUploadableDAS() if len(dasList) < 1: # Then there's nothing to do return [] readyBlocks = [] for dasInfo in dasList: dasID = dasInfo['DAS_ID'] # Get the files try: loadedFiles = self.dbsUtil.findUploadableFilesByDAS(das = dasID) except WMException: raise except Exception, ex: msg = "Unhandled exception while loading uploadable files for DAS.\n" msg += str(ex) logging.error(msg) logging.debug("DAS being loaded: %s\n" % dasID) raise DBSUploadException(msg) # Get the blocks if not dasID in self.dasCache.keys(): # Then we have a new DAS # Add it self.dasCache[dasID] = {} dasBlocks = self.dasCache.get(dasID) # Sort the files and blocks by location fileDict = sortListByKey(input = loadedFiles, key = 'locations') # Now we have both files and blocks # We need a sorting algorithm of sorts... # Now add each file for location in fileDict.keys(): files = fileDict.get(location) if len(files) < 1: # Nothing to do here continue dasBlocks = self.dasCache[dasID].get(location, []) if len(dasBlocks) > 0: # Load from cache currentBlock = self.blockCache.get(dasBlocks[0]) else: blockname = '%s#%s' % (files[0]['datasetPath'], makeUUID()) currentBlock = DBSBlock(name = blockname, location = location, das = dasID) # Add the era info currentBlock.setAcquisitionEra(era = dasInfo['AcquisitionEra']) currentBlock.setProcessingVer(era = dasInfo['ProcessingVer']) self.addNewBlock(block = currentBlock) dasBlocks.append(currentBlock.getName()) for newFile in files: if not newFile.get('block', 1) == None: # Then this file already has a block # It should be accounted for somewhere # Or loaded with the block continue # Check if we can put files in this block if not self.isBlockOpen(newFile = newFile, block = currentBlock): # Then we have to close the block and get a new one currentBlock.status = 'Pending' readyBlocks.append(currentBlock) dasBlocks.remove(currentBlock.getName()) currentBlock = self.getBlock(newFile = newFile, dasBlocks = dasBlocks, location = location, das = dasID) currentBlock.setAcquisitionEra(era = dasInfo['AcquisitionEra']) currentBlock.setProcessingVer(era = dasInfo['ProcessingVer']) # Now deal with the file currentBlock.addFile(dbsFile = newFile) self.filesToUpdate.append({'filelfn': newFile['lfn'], 'block': currentBlock.getName()}) # Done with the location readyBlocks.append(currentBlock)
def stuffDatabase(self): """ _stuffDatabase_ Fill the dbsbuffer with some files and blocks. We'll insert a total of 5 files spanning two blocks. There will be a total of two datasets inserted into the database. All files will be already in GLOBAL and in_phedex """ myThread = threading.currentThread() buffer3Factory = DAOFactory(package="WMComponent.DBS3Buffer", logger=myThread.logger, dbinterface=myThread.dbi) insertWorkflow = buffer3Factory(classname="InsertWorkflow") insertWorkflow.execute("BogusRequestA", "BogusTask", 0, 0, 0, 0) insertWorkflow.execute("BogusRequestB", "BogusTask", 0, 0, 0, 0) checksums = {"adler32": "1234", "cksum": "5678"} testFileA = DBSBufferFile(lfn=makeUUID(), size=1024, events=10, checksums=checksums, locations=set(["srm-cms.cern.ch"])) testFileA.setAlgorithm(appName="cmsRun", appVer="CMSSW_2_1_8", appFam="RECO", psetHash="GIBBERISH", configContent="MOREGIBBERISH") testFileA.setDatasetPath(self.testDatasetA) testFileA.addRun(Run(2, *[45])) testFileA.create() testFileB = DBSBufferFile(lfn=makeUUID(), size=1024, events=10, checksums=checksums, locations=set(["srm-cms.cern.ch"])) testFileB.setAlgorithm(appName="cmsRun", appVer="CMSSW_2_1_8", appFam="RECO", psetHash="GIBBERISH", configContent="MOREGIBBERISH") testFileB.setDatasetPath(self.testDatasetA) testFileB.addRun(Run(2, *[45])) testFileB.create() testFileC = DBSBufferFile(lfn=makeUUID(), size=1024, events=10, checksums=checksums, locations=set(["srm-cms.cern.ch"])) testFileC.setAlgorithm(appName="cmsRun", appVer="CMSSW_2_1_8", appFam="RECO", psetHash="GIBBERISH", configContent="MOREGIBBERISH") testFileC.setDatasetPath(self.testDatasetA) testFileC.addRun(Run(2, *[45])) testFileC.create() self.testFilesA.append(testFileA) self.testFilesA.append(testFileB) self.testFilesA.append(testFileC) testFileD = DBSBufferFile(lfn=makeUUID(), size=1024, events=10, checksums=checksums, locations=set(["srm-cms.cern.ch"])) testFileD.setAlgorithm(appName="cmsRun", appVer="CMSSW_2_1_8", appFam="RECO", psetHash="GIBBERISH", configContent="MOREGIBBERISH") testFileD.setDatasetPath(self.testDatasetB) testFileD.addRun(Run(2, *[45])) testFileD.create() testFileE = DBSBufferFile(lfn=makeUUID(), size=1024, events=10, checksums=checksums, locations=set(["srm-cms.cern.ch"])) testFileE.setAlgorithm(appName="cmsRun", appVer="CMSSW_2_1_8", appFam="RECO", psetHash="GIBBERISH", configContent="MOREGIBBERISH") testFileE.setDatasetPath(self.testDatasetB) testFileE.addRun(Run(2, *[45])) testFileE.create() self.testFilesB.append(testFileD) self.testFilesB.append(testFileE) uploadFactory = DAOFactory(package="WMComponent.DBS3Buffer", logger=myThread.logger, dbinterface=myThread.dbi) datasetAction = uploadFactory(classname="NewDataset") createAction = uploadFactory(classname="CreateBlocks") datasetAction.execute(datasetPath=self.testDatasetA) datasetAction.execute(datasetPath=self.testDatasetB) self.blockAName = self.testDatasetA + "#" + makeUUID() self.blockBName = self.testDatasetB + "#" + makeUUID() newBlockA = DBSBlock(name=self.blockAName, location="srm-cms.cern.ch", das=None, workflow=None) newBlockA.setDataset(self.testDatasetA, 'data', 'VALID') newBlockA.status = 'Closed' newBlockB = DBSBlock(name=self.blockBName, location="srm-cms.cern.ch", das=None, workflow=None) newBlockB.setDataset(self.testDatasetB, 'data', 'VALID') newBlockB.status = 'Closed' createAction.execute(blocks=[newBlockA, newBlockB]) bufferFactory = DAOFactory(package="WMComponent.DBS3Buffer", logger=myThread.logger, dbinterface=myThread.dbi) setBlock = bufferFactory(classname="DBSBufferFiles.SetBlock") setBlock.execute(testFileA["lfn"], self.blockAName) setBlock.execute(testFileB["lfn"], self.blockAName) setBlock.execute(testFileC["lfn"], self.blockAName) setBlock.execute(testFileD["lfn"], self.blockBName) setBlock.execute(testFileE["lfn"], self.blockBName) fileStatus = bufferFactory(classname="DBSBufferFiles.SetStatus") fileStatus.execute(testFileA["lfn"], "GLOBAL") fileStatus.execute(testFileB["lfn"], "GLOBAL") fileStatus.execute(testFileC["lfn"], "GLOBAL") fileStatus.execute(testFileD["lfn"], "GLOBAL") fileStatus.execute(testFileE["lfn"], "GLOBAL") phedexStatus = bufferFactory( classname="DBSBufferFiles.SetPhEDExStatus") phedexStatus.execute(testFileA["lfn"], 1) phedexStatus.execute(testFileB["lfn"], 1) phedexStatus.execute(testFileC["lfn"], 1) phedexStatus.execute(testFileD["lfn"], 1) phedexStatus.execute(testFileE["lfn"], 1) associateWorkflow = buffer3Factory( classname="DBSBufferFiles.AssociateWorkflowToFile") associateWorkflow.execute(testFileA["lfn"], "BogusRequestA", "BogusTask") associateWorkflow.execute(testFileB["lfn"], "BogusRequestA", "BogusTask") associateWorkflow.execute(testFileC["lfn"], "BogusRequestA", "BogusTask") associateWorkflow.execute(testFileD["lfn"], "BogusRequestB", "BogusTask") associateWorkflow.execute(testFileE["lfn"], "BogusRequestB", "BogusTask") # Make the desired subscriptions insertSubAction = buffer3Factory(classname="NewSubscription") datasetA = DBSBufferDataset(path=self.testDatasetA) datasetB = DBSBufferDataset(path=self.testDatasetB) workload = WMWorkloadHelper() workload.load( os.path.join( getTestBase(), 'WMComponent_t/PhEDExInjector_t/specs/TestWorkload.pkl')) insertSubAction.execute( datasetA.exists(), workload.getSubscriptionInformation()[self.testDatasetA]) insertSubAction.execute( datasetB.exists(), workload.getSubscriptionInformation()[self.testDatasetB]) return