def injectFilesFromDBS(inputFileset, datasetPath): """ _injectFilesFromDBS_ """ print "injecting files from %s into %s, please wait..." % ( datasetPath, inputFileset.name) args = {} args[ "url"] = "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet" args["version"] = "DBS_2_0_9" args["mode"] = "GET" dbsApi = DbsApi(args) dbsResults = dbsApi.listFiles(path=datasetPath, retriveList=["retrive_lumi", "retrive_run"]) dbsResults = dbsResults[0:10] print " found %d files, inserting into wmbs..." % (len(dbsResults)) for dbsResult in dbsResults: myFile = File(lfn=dbsResult["LogicalFileName"], size=dbsResult["FileSize"], events=dbsResult["NumberOfEvents"], checksums={"cksum": dbsResult["Checksum"]}, locations="cmssrm.fnal.gov", merged=True) myRun = Run(runNumber=dbsResult["LumiList"][0]["RunNumber"]) for lumi in dbsResult["LumiList"]: myRun.lumis.append(lumi["LumiSectionNumber"]) myFile.addRun(myRun) myFile.create() inputFileset.addFile(myFile) dbsFile = DBSBufferFile(lfn=dbsResult["LogicalFileName"], size=dbsResult["FileSize"], events=dbsResult["NumberOfEvents"], checksums={"cksum": dbsResult["Checksum"]}, locations="cmssrm.fnal.gov", status="LOCAL") dbsFile.setDatasetPath(datasetPath) dbsFile.setAlgorithm(appName="cmsRun", appVer="Unknown", appFam="Unknown", psetHash="Unknown", configContent="Unknown") dbsFile.create() inputFileset.commit() inputFileset.markOpen(False) return
def _addToDBSBuffer(self, dbsFile, checksums, locations): """ This step is just for increase the performance for Accountant doesn't neccessary to check the parentage """ dbsBuffer = DBSBufferFile(lfn = dbsFile["LogicalFileName"], size = dbsFile["FileSize"], events = dbsFile["NumberOfEvents"], checksums = checksums, locations = locations, status = "GLOBAL") dbsBuffer.setDatasetPath('bogus') dbsBuffer.setAlgorithm(appName = "cmsRun", appVer = "Unknown", appFam = "Unknown", psetHash = "Unknown", configContent = "Unknown") if not dbsBuffer.exists(): self.dbsFilesToCreate.append(dbsBuffer) #dbsBuffer.create() return
def _addToDBSBuffer(self, dbsFile, checksums, locations): """ This step is just for increase the performance for Accountant doesn't neccessary to check the parentage """ dbsBuffer = DBSBufferFile(lfn=dbsFile["LogicalFileName"], size=dbsFile["FileSize"], events=dbsFile["NumberOfEvents"], checksums=checksums, locations=locations, status="GLOBAL") dbsBuffer.setDatasetPath('bogus') dbsBuffer.setAlgorithm(appName="cmsRun", appVer="Unknown", appFam="Unknown", psetHash="Unknown", configContent="Unknown") if not dbsBuffer.exists(): self.dbsFilesToCreate.append(dbsBuffer) #dbsBuffer.create() return
def getFiles(self, name, tier, nFiles = 12, site = "malpaquet"): """ Create some quick dummy test files """ files = [] for f in range(0, nFiles): testFile = DBSBufferFile(lfn = '%s-%s-%i' % (name, site, f), size = 1024, events = 20, checksums = {'cksum': 1}) testFile.setAlgorithm(appName = "cmsRun", appVer = "CMSSW_3_1_1", appFam = "RECO", psetHash = "GIBBERISH", configContent = "MOREGIBBERISH") testFile.setDatasetPath("/%s/%s/%s" % (name, name, tier)) testFile.addRun(Run( 1, *[f])) testFile['locations'].add(site) files.append(testFile) return files
def injectFilesFromDBS(inputFileset, datasetPath): """ _injectFilesFromDBS_ """ print "injecting files from %s into %s, please wait..." % (datasetPath, inputFileset.name) args={} args["url"] = "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet" args["version"] = "DBS_2_0_9" args["mode"] = "GET" dbsApi = DbsApi(args) dbsResults = dbsApi.listFiles(path = datasetPath, retriveList = ["retrive_lumi", "retrive_run"]) dbsResults = dbsResults[0:10] print " found %d files, inserting into wmbs..." % (len(dbsResults)) for dbsResult in dbsResults: myFile = File(lfn = dbsResult["LogicalFileName"], size = dbsResult["FileSize"], events = dbsResult["NumberOfEvents"], checksums = {"cksum": dbsResult["Checksum"]}, locations = "cmssrm.fnal.gov", merged = True) myRun = Run(runNumber = dbsResult["LumiList"][0]["RunNumber"]) for lumi in dbsResult["LumiList"]: myRun.lumis.append(lumi["LumiSectionNumber"]) myFile.addRun(myRun) myFile.create() inputFileset.addFile(myFile) dbsFile = DBSBufferFile(lfn = dbsResult["LogicalFileName"], size = dbsResult["FileSize"], events = dbsResult["NumberOfEvents"], checksums = {"cksum": dbsResult["Checksum"]}, locations = "cmssrm.fnal.gov", status = "LOCAL") dbsFile.setDatasetPath(datasetPath) dbsFile.setAlgorithm(appName = "cmsRun", appVer = "Unknown", appFam = "Unknown", psetHash = "Unknown", configContent = "Unknown") dbsFile.create() inputFileset.commit() inputFileset.markOpen(False) return
def testReportHandling(self): """ _testReportHandling_ Verify that we're able to parse a CMSSW report, convert it to a Report() style report, pickle it and then have the accountant process it. """ self.procPath = os.path.join( WMCore.WMBase.getTestBase(), "WMCore_t/FwkJobReport_t/CMSSWProcessingReport.xml") myReport = Report("cmsRun1") myReport.parse(self.procPath) # Fake some metadata that should be added by the stageout scripts. for fileRef in myReport.getAllFileRefsFromStep("cmsRun1"): fileRef.size = 1024 fileRef.location = "cmssrm.fnal.gov" fwjrPath = os.path.join(self.tempDir, "ProcReport.pkl") cmsRunStep = myReport.retrieveStep("cmsRun1") cmsRunStep.status = 0 myReport.setTaskName('/TestWF/None') myReport.persist(fwjrPath) self.setFWJRAction.execute(jobID=self.testJob["id"], fwjrPath=fwjrPath) pFile = DBSBufferFile(lfn="/path/to/some/lfn", size=600000, events=60000) pFile.setAlgorithm(appName="cmsRun", appVer="UNKNOWN", appFam="RECO", psetHash="GIBBERISH", configContent="MOREGIBBERISH") pFile.setDatasetPath("/bogus/dataset/path") #pFile.addRun(Run(1, *[45])) pFile.create() config = self.createConfig(workerThreads=1) accountant = JobAccountantPoller(config) accountant.setup() accountant.algorithm() self.verifyJobSuccess(self.testJob["id"]) self.verifyFileMetaData(self.testJob["id"], myReport.getAllFilesFromStep("cmsRun1")) inputFile = File( lfn= "/store/backfill/2/unmerged/WMAgentCommissioining10/MinimumBias/RECO/rereco_GR09_R_34X_V5_All_v1/0000/outputRECORECO.root" ) inputFile.load() self.testMergeJob = Job(name="testMergeJob", files=[inputFile]) self.testMergeJob.create(group=self.mergeJobGroup) self.testMergeJob["state"] = "complete" self.stateChangeAction.execute(jobs=[self.testMergeJob]) self.mergePath = os.path.join( WMCore.WMBase.getTestBase(), "WMCore_t/FwkJobReport_t/CMSSWMergeReport.xml") myReport = Report("mergeReco") myReport.parse(self.mergePath) # Fake some metadata that should be added by the stageout scripts. for fileRef in myReport.getAllFileRefsFromStep("mergeReco"): fileRef.size = 1024 fileRef.location = "cmssrm.fnal.gov" fileRef.dataset = { "applicationName": "cmsRun", "applicationVersion": "CMSSW_3_4_2_patch1", "primaryDataset": "MinimumBias", "processedDataset": "Rereco-v1", "dataTier": "RECO" } fwjrPath = os.path.join(self.tempDir, "MergeReport.pkl") myReport.setTaskName('/MergeWF/None') cmsRunStep = myReport.retrieveStep("mergeReco") cmsRunStep.status = 0 myReport.persist(fwjrPath) self.setFWJRAction.execute(jobID=self.testMergeJob["id"], fwjrPath=fwjrPath) accountant.algorithm() self.verifyJobSuccess(self.testMergeJob["id"]) self.verifyFileMetaData(self.testMergeJob["id"], myReport.getAllFilesFromStep("mergeReco")) return
def testReportHandling(self): """ _testReportHandling_ Verify that we're able to parse a CMSSW report, convert it to a Report() style report, pickle it and then have the accountant process it. """ self.procPath = os.path.join(WMCore.WMBase.getTestBase(), "WMCore_t/FwkJobReport_t/CMSSWProcessingReport.xml") myReport = Report("cmsRun1") myReport.parse(self.procPath) # Fake some metadata that should be added by the stageout scripts. for fileRef in myReport.getAllFileRefsFromStep("cmsRun1"): fileRef.size = 1024 fileRef.location = "cmssrm.fnal.gov" fwjrPath = os.path.join(self.tempDir, "ProcReport.pkl") cmsRunStep = myReport.retrieveStep("cmsRun1") cmsRunStep.status = 0 myReport.setTaskName('/TestWF/None') myReport.persist(fwjrPath) self.setFWJRAction.execute(jobID = self.testJob["id"], fwjrPath = fwjrPath) pFile = DBSBufferFile(lfn = "/path/to/some/lfn", size = 600000, events = 60000) pFile.setAlgorithm(appName = "cmsRun", appVer = "UNKNOWN", appFam = "RECO", psetHash = "GIBBERISH", configContent = "MOREGIBBERISH") pFile.setDatasetPath("/bogus/dataset/path") #pFile.addRun(Run(1, *[45])) pFile.create() config = self.createConfig(workerThreads = 1) accountant = JobAccountantPoller(config) accountant.setup() accountant.algorithm() self.verifyJobSuccess(self.testJob["id"]) self.verifyFileMetaData(self.testJob["id"], myReport.getAllFilesFromStep("cmsRun1")) inputFile = File(lfn = "/store/backfill/2/unmerged/WMAgentCommissioining10/MinimumBias/RECO/rereco_GR09_R_34X_V5_All_v1/0000/outputRECORECO.root") inputFile.load() self.testMergeJob = Job(name = "testMergeJob", files = [inputFile]) self.testMergeJob.create(group = self.mergeJobGroup) self.testMergeJob["state"] = "complete" self.stateChangeAction.execute(jobs = [self.testMergeJob]) self.mergePath = os.path.join(WMCore.WMBase.getTestBase(), "WMCore_t/FwkJobReport_t/CMSSWMergeReport.xml") myReport = Report("mergeReco") myReport.parse(self.mergePath) # Fake some metadata that should be added by the stageout scripts. for fileRef in myReport.getAllFileRefsFromStep("mergeReco"): fileRef.size = 1024 fileRef.location = "cmssrm.fnal.gov" fileRef.dataset = {"applicationName": "cmsRun", "applicationVersion": "CMSSW_3_4_2_patch1", "primaryDataset": "MinimumBias", "processedDataset": "Rereco-v1", "dataTier": "RECO"} fwjrPath = os.path.join(self.tempDir, "MergeReport.pkl") myReport.setTaskName('/MergeWF/None') cmsRunStep = myReport.retrieveStep("mergeReco") cmsRunStep.status = 0 myReport.persist(fwjrPath) self.setFWJRAction.execute(jobID = self.testMergeJob["id"], fwjrPath = fwjrPath) accountant.algorithm() self.verifyJobSuccess(self.testMergeJob["id"]) self.verifyFileMetaData(self.testMergeJob["id"], myReport.getAllFilesFromStep("mergeReco")) return
def stuffDatabase(self): """ _stuffDatabase_ Fill the dbsbuffer with some files and blocks. We'll insert a total of 5 files spanning two blocks. There will be a total of two datasets inserted into the datbase. We'll inject files with the location set as an SE name as well as a PhEDEx node name as well. """ checksums = {"adler32": "1234", "cksum": "5678"} testFileA = DBSBufferFile(lfn = makeUUID(), size = 1024, events = 10, checksums = checksums, locations = set(["srm-cms.cern.ch"])) testFileA.setAlgorithm(appName = "cmsRun", appVer = "CMSSW_2_1_8", appFam = "RECO", psetHash = "GIBBERISH", configContent = "MOREGIBBERISH") testFileA.setDatasetPath(self.testDatasetA) testFileA.addRun(Run(2, *[45])) testFileA.create() testFileB = DBSBufferFile(lfn = makeUUID(), size = 1024, events = 10, checksums = checksums, locations = set(["srm-cms.cern.ch"])) testFileB.setAlgorithm(appName = "cmsRun", appVer = "CMSSW_2_1_8", appFam = "RECO", psetHash = "GIBBERISH", configContent = "MOREGIBBERISH") testFileB.setDatasetPath(self.testDatasetA) testFileB.addRun(Run(2, *[45])) testFileB.create() testFileC = DBSBufferFile(lfn = makeUUID(), size = 1024, events = 10, checksums = checksums, locations = set(["srm-cms.cern.ch"])) testFileC.setAlgorithm(appName = "cmsRun", appVer = "CMSSW_2_1_8", appFam = "RECO", psetHash = "GIBBERISH", configContent = "MOREGIBBERISH") testFileC.setDatasetPath(self.testDatasetA) testFileC.addRun(Run(2, *[45])) testFileC.create() self.testFilesA.append(testFileA) self.testFilesA.append(testFileB) self.testFilesA.append(testFileC) testFileD = DBSBufferFile(lfn = makeUUID(), size = 1024, events = 10, checksums = checksums, locations = set(["srm-cms.cern.ch"])) testFileD.setAlgorithm(appName = "cmsRun", appVer = "CMSSW_2_1_8", appFam = "RECO", psetHash = "GIBBERISH", configContent = "MOREGIBBERISH") testFileD.setDatasetPath(self.testDatasetB) testFileD.addRun(Run(2, *[45])) testFileD.create() testFileE = DBSBufferFile(lfn = makeUUID(), size = 1024, events = 10, checksums = checksums, locations = set(["srm-cms.cern.ch"])) testFileE.setAlgorithm(appName = "cmsRun", appVer = "CMSSW_2_1_8", appFam = "RECO", psetHash = "GIBBERISH", configContent = "MOREGIBBERISH") testFileE.setDatasetPath(self.testDatasetB) testFileE.addRun(Run(2, *[45])) testFileE.create() self.testFilesB.append(testFileD) self.testFilesB.append(testFileE) myThread = threading.currentThread() uploadFactory = DAOFactory(package = "WMComponent.DBSUpload.Database", logger = myThread.logger, dbinterface = myThread.dbi) createBlock = uploadFactory(classname = "SetBlockStatus") self.blockAName = self.testDatasetA + "#" + makeUUID() self.blockBName = self.testDatasetB + "#" + makeUUID() createBlock.execute(block = self.blockAName, locations = ["srm-cms.cern.ch"], open_status = 1) createBlock.execute(block = self.blockBName, locations = ["srm-cms.cern.ch"], open_status = 1) bufferFactory = DAOFactory(package = "WMComponent.DBSBuffer.Database", logger = myThread.logger, dbinterface = myThread.dbi) setBlock = bufferFactory(classname = "DBSBufferFiles.SetBlock") setBlock.execute(testFileA["lfn"], self.blockAName) setBlock.execute(testFileB["lfn"], self.blockAName) setBlock.execute(testFileC["lfn"], self.blockAName) setBlock.execute(testFileD["lfn"], self.blockBName) setBlock.execute(testFileE["lfn"], self.blockBName) fileStatus = bufferFactory(classname = "DBSBufferFiles.SetStatus") fileStatus.execute(testFileA["lfn"], "LOCAL") fileStatus.execute(testFileB["lfn"], "LOCAL") fileStatus.execute(testFileC["lfn"], "LOCAL") fileStatus.execute(testFileD["lfn"], "LOCAL") fileStatus.execute(testFileE["lfn"], "LOCAL") return