def createDBSFileFromBufferFile(bufferFile, procDataset): """ Take a DBSBufferFile and turn it into a DBSFile object """ lumiList = [] for run in bufferFile.getRuns(): for l in run: lumi = DbsLumiSection(LumiSectionNumber = long(l), StartEventNumber = 0, EndEventNumber = 0, LumiStartTime = 0, LumiEndTime = 0, RunNumber = long(run.run) ) lumiList.append(lumi) dbsfile = DbsFile(NumberOfEvents = bufferFile['events'], LogicalFileName = bufferFile['lfn'], FileSize = int(bufferFile['size']), Status = "VALID", ValidationStatus = 'VALID', FileType = 'EDM', Dataset = procDataset, TierList = procDataset['TierList'], AlgoList = procDataset['AlgoList'], LumiList = lumiList, ParentList = bufferFile.getParentLFNs() ) for entry in bufferFile['checksums'].keys(): # This should be a dictionary with a cktype key and cksum value if entry.lower() == 'cksum': dbsfile['Checksum'] = str(bufferFile['checksums'][entry]) elif entry.lower() == 'adler32': dbsfile['Adler32'] = str(bufferFile['checksums'][entry]) elif entry.lower() == 'md5': dbsfile['Md5'] = str(bufferFile['checksums'][entry]) return dbsfile
ApplicationFamily="Merge", ) path = "/test_primary_001/TestProcessedDS001/SIM" merge_proc = api.insertMergedDataset(path, "ThisISMergedDataset001", merge_algo) # File will go into THIS Block block = DbsFileBlock(StorageElement=['test1', 'test3'], Name="/test_primary_001/TestProcessedDS001/SIM#12345") merged_file = DbsFile( Checksum='00000', LogicalFileName='MERGEDFILE_001', NumberOfEvents=10000, FileSize=1000000, Status='VALID', ValidationStatus='VALID', FileType='EVD', Dataset=merge_proc, Block=block, AlgoList=[merge_algo], ) #api.insertFiles (proc, [myfile1], block) parentList = ['NEW_TEST0001', 'NEW_TEST0002'] # The parent Un-Merged files api.insertMergedFile(parentList, merged_file) print "Result: %s" % merged_file except DbsApiException, ex: print "Caught API Exception %s: %s " % (ex.getClassName(), ex.getErrorMessage())
f.write("\n***********************insertBlock API tests***************************") apiObj = DbsUnitTestApi(api.insertFiles, f) apiObj.setVerboseLevel(opts.verbose) f.write("\n\n***********************insertFiles API tests***************************") lfn1 = '1111-0909-9767-8764' + mytime lfn2 = '1111-0909-9767-876411' + mytime file1= DbsFile ( Checksum= '999', LogicalFileName= lfn1, #QueryableMetadata= 'This is a test file', NumberOfEvents= 10000, FileSize= 12340, Status= 'VALID', ValidationStatus = 'VALID', FileType= 'EDM', LumiList= [lumi1, lumi2], TierList= tierList, AlgoList = [algo1, algo2], ) file2= DbsFile ( Checksum= '999', LogicalFileName= lfn2, #QueryableMetadata= 'This is a test file', NumberOfEvents= 10000, FileSize= 12340, Status= 'VALID', ValidationStatus = 'VALID',
StartEventNumber=100, EndEventNumber=200, RunNumber=runNumber1) apiObj.run(lumi2, excep = False) #Insert File for j in range(maxFiles): apiObj = DbsUnitTestApi(api.insertFiles, f) lfn1 = mytime+str(j) file1= DbsFile ( Checksum= '999', LogicalFileName= lfn1, #QueryableMetadata= 'This is a test file', NumberOfEvents= 10000, FileSize= 12340, Status= 'VALID', FileType= 'EDM', LumiList= [lumi1, lumi2], TierList= tierList, FileTriggerMap=[{'TriggerTag':'TestTrig001', 'NumberOfEvents': 123 }, {'TriggerTag':'TestTrig002', 'NumberOfEvents': 345 }, {'TriggerTag':'TestTrig003', 'NumberOfEvents': 678 }] ) fileList.append(file1) lfn2 = mytime+str(j)+".index" file2= DbsFile ( Checksum= '911911', LogicalFileName= lfn2, #QueryableMetadata= 'This is a test file', NumberOfEvents= 10000, FileSize= 456778,
def createDBSFiles(fjrFileInfo, jobType=None, apiRef=None): """ _createDBSFiles_ Create a list of DBS File instances from the file details contained in a FwkJobRep.FileInfo instance describing an output file Does not insert files, returns as list of DbsFile objects Does insert runs and lumisections if DBS API reference is passed """ results = [] inputLFNs = [x['LFN'] for x in fjrFileInfo.inputFiles] checksum = fjrFileInfo.checksums['cksum'] adler32sum = fjrFileInfo.checksums.get('adler32', '') nEvents = int(fjrFileInfo['TotalEvents']) if len(fjrFileInfo.dataset) <= 0: logging.error("No dataset info found in FWJobReport!") return results # // # // Set FileType #// if 'FileType' in fjrFileInfo: fileType = fjrFileInfo['FileType'] else: fileType = 'EDM' # # FIXME: at this point I should use the mc or data event type from # the jobreport. Until this is supported by the framework, # we use the workaround that mc job reports have an empty # lumisections list (stripped in DBSInterface) # lumiList = [] if (len(fjrFileInfo.getLumiSections()) > 0): # # insert runs (for data files from detector) # if (apiRef != None): for runinfo in fjrFileInfo.runs: run = DbsRun( RunNumber=long(runinfo), NumberOfEvents=0, NumberOfLumiSections=0, TotalLuminosity=0, StoreNumber=0, StartOfRun=0, EndOfRun=0, ) apiRef.insertRun(run) # # insert lumisections (for data files from detector) # associate files with lumisections (for all data files) # for lumiinfo in fjrFileInfo.getLumiSections(): lumi = DbsLumiSection( LumiSectionNumber=long(lumiinfo['LumiSectionNumber']), StartEventNumber=0, EndEventNumber=0, LumiStartTime=0, LumiEndTime=0, RunNumber=long(lumiinfo['RunNumber']), ) # Isnt needed, causes monster slowdown #if ( apiRef != None ): # apiRef.insertLumiSection(lumi) lumiList.append(lumi) logging.debug("Lumi associated to file is: %s" % ([x for x in lumiList])) # // # // Dataset info related to files and creation of DbsFile object #// for dataset in fjrFileInfo.dataset: primary = createPrimaryDataset(dataset) if jobType == "Merge": algo = createMergeAlgorithm(dataset) else: algo = createAlgorithmForInsert(dataset) processed = createProcessedDataset(primary, algo, dataset) dbsFileInstance = DbsFile( Checksum=checksum, Adler32=adler32sum, NumberOfEvents=nEvents, LogicalFileName=fjrFileInfo['LFN'], FileSize=int(fjrFileInfo['Size']), Status="VALID", ValidationStatus='VALID', FileType=fileType, Dataset=processed, TierList=makeTierList(dataset['DataTier']), AlgoList=[algo], LumiList=lumiList, ParentList=inputLFNs, BranchList=fjrFileInfo.branches, ) results.append(dbsFileInstance) return results
EndEventNumber=200, LumiStartTime=1233, LumiEndTime=1234, RunNumber=1, ) myfile1 = DbsFile( Checksum='999', Adler32='Adler123', Md5='MD5123', LogicalFileName='NEW_TEST0001', #QueryableMetadata= 'This is a test file', NumberOfEvents=10000, FileSize=12340, Status='VALID', ValidationStatus='VALID', FileType='EDM', Dataset=proc, #Block= isDictType, AlgoList=[algo], LumiList=[lumi1, lumi2], TierList=['SIM', 'GEN'], BranchHash="001234565798685", #ParentList = ['NEW_TEST0003'], AutoCrossSection=0.0) myfile2 = DbsFile( Checksum='000', Adler32='Adler12311', Md5='MD51344', LogicalFileName='NEW_TEST0002',
LumiSectionNumber=1333, StartEventNumber=100, EndEventNumber=200, LumiStartTime=1234, LumiEndTime=1234, RunNumber=1, ) myfile1= DbsFile ( Checksum= '999', LogicalFileName= 'NEW_TEST0001' + mytime, NumberOfEvents= 10000, FileSize= 12340, Status= 'VALID', ValidationStatus = 'VALID', FileType= 'EDM', Dataset= proc, AlgoList = [algo], LumiList= [lumi1, lumi2], TierList= ['SIM', 'GEN'], ) myfile2= DbsFile ( Checksum= '000', LogicalFileName= 'NEW_TEST0002' + mytime, NumberOfEvents= 10000, FileSize= 12340, Status= 'VALID', ValidationStatus = 'VALID', FileType= 'EDM',
apiObj.run(lumi1, excep=False) lumi2 = DbsLumiSection(LumiSectionNumber=lumiNumber2, StartEventNumber=100, EndEventNumber=200, RunNumber=runNumber1) apiObj.run(lumi2, excep=False) #Insert File for j in range(maxFiles): apiObj = DbsUnitTestApi(api.insertFiles, f) lfn1 = mytime + str(j) file1 = DbsFile( Checksum='999', LogicalFileName=lfn1, #QueryableMetadata= 'This is a test file', NumberOfEvents=10000, FileSize=12340, Status='VALID', FileType='EDM', LumiList=[lumi1, lumi2], TierList=tierList, ) fileList.append(file1) print "\n\n\nNUMBER of FILES with which insertFile API is called: %s" % str( len(fileList)) apiObj.run(proc1, fileList, block1, excep=False) f.close()
blockObj3 = DbsFileBlock(Name=blockName3) blockObj4 = DbsFileBlock(Name=blockName4) blockObjM = DbsFileBlock(Name=blockNameM) blockObjG = DbsFileBlock(Name=blockNameG) fileObj1 = DbsFile(Checksum=fileCkecksum1, Adler32=fileAdler321, Md5=fileMd51, LogicalFileName=fileName1, NumberOfEvents=fileNumEvents1, FileSize=fileSize1, Status=fileStatus1, ValidationStatus=fileValidStatus1, FileType=fileType1, Dataset=procObj1, AlgoList=[algoObj1], LumiList=[lumiObj1], TierList=[tier1, tier2], AutoCrossSection=1.0) fileObj2 = DbsFile(Checksum=fileCkecksum2, Adler32=fileAdler322, Md5=fileMd52, LogicalFileName=fileName2, NumberOfEvents=fileNumEvents2, FileSize=fileSize2, Status=fileStatus2, ValidationStatus=fileValidStatus2,
def insertFilesForDBSBuffer(self, files, procDataset, algos, jobType="NotMerge", insertDetectorData=False, maxFiles=100, maxSize=99999999, timeOut=None, fileCommitLength=5): """ _insertFiles_ list of files inserted in DBS """ #TODO: Whats the purpose of insertDetectorData if len(files) < 1: return affectedBlocks = [] insertFiles = [] addedRuns = [] seName = None #Get the algos in insertable form # logging.error("About to input algos") # logging.error(algos) ialgos = [ DBSWriterObjects.createAlgorithmForInsert(dict(algo)) for algo in algos ] #print ialgos for outFile in files: # // # // Convert each file into a DBS File object #// lumiList = [] #Somehing similar should be the real deal when multiple runs/lumi could be returned from wmbs file for runlumiinfo in outFile.getRuns(): lrun = long(runlumiinfo.run) run = DbsRun( RunNumber=lrun, NumberOfEvents=0, NumberOfLumiSections=0, TotalLuminosity=0, StoreNumber=0, StartOfRun=0, EndOfRun=0, ) #Only added if not added by another file in this loop, why waste a call to DBS if lrun not in addedRuns: self.dbs.insertRun(run) addedRuns.append( lrun) #save it so we do not try to add it again to DBS logging.debug("run %s added to DBS " % str(lrun)) for alsn in runlumiinfo: lumi = DbsLumiSection( LumiSectionNumber=long(alsn), StartEventNumber=0, EndEventNumber=0, LumiStartTime=0, LumiEndTime=0, RunNumber=lrun, ) lumiList.append(lumi) logging.debug("lumi list created for the file") dbsfile = DbsFile( #Checksum = str(outFile['cksum']), NumberOfEvents=outFile['events'], LogicalFileName=outFile['lfn'], FileSize=int(outFile['size']), Status="VALID", ValidationStatus='VALID', FileType='EDM', Dataset=procDataset, TierList=DBSWriterObjects.makeTierList( procDataset['Path'].split('/')[3]), AlgoList=ialgos, LumiList=lumiList, ParentList=outFile.getParentLFNs(), #BranchHash = outFile['BranchHash'], ) #Set checksums by hand #dbsfile['Checksum'] = 0 #Set a default? for entry in outFile['checksums'].keys(): #This should be a dictionary with a cktype key and cksum value if entry.lower() == 'cksum': dbsfile['Checksum'] = str(outFile['checksums'][entry]) elif entry.lower() == 'adler32': dbsfile['Adler32'] = str(outFile['checksums'][entry]) elif entry.lower() == 'md5': dbsfile['Md5'] = str(outFile['checksums'][entry]) #This check comes from ProdAgent, not sure if its required if len(outFile["locations"]) > 0: seName = list(outFile["locations"])[0] logging.debug("SEname associated to file is: %s" % seName) else: msg = "Error in DBSWriter.insertFiles\n" msg += "No SEname associated to file" #print "FAKING seName for now" #seName="cmssrm.fnal.gov" raise DBSWriterError(msg) insertFiles.append(dbsfile) # //Processing Jobs: # // Insert the lists of sorted files into the appropriate #// fileblocks sumSize = 0 sumFiles = 0 tmpFiles = [] blockList = [] #First, get the block. See if the block already exists try: fileBlock = DBSWriterObjects.getDBSFileBlock( self.dbs, procDataset, seName) fileBlock['files'] = [] #if not fileBlock in affectedBlocks: # affectedBlocks.append(fileBlock) except DbsException, ex: msg = "Error in DBSWriter.insertFilesForDBSBuffer\n" msg += "Cannot retrieve FileBlock for dataset:\n" msg += " %s\n" % procDataset['Path'] msg += "%s\n" % formatEx(ex) raise DBSWriterError(msg)
def insertFilesForDBSBuffer(self, files, procDataset, algos, jobType="NotMerge", insertDetectorData=False, maxFiles=100, maxSize=99999999, timeOut=None, fileCommitLength=5): """ _insertFiles_ list of files inserted in DBS """ #TODO: Whats the purpose of insertDetectorData if len(files) < 1: return affectedBlocks = [] insertFiles = [] addedRuns = [] seName = None #Get the algos in insertable form # logging.error("About to input algos") # logging.error(algos) ialgos = [ DBSWriterObjects.createAlgorithmForInsert(dict(algo)) for algo in algos ] #print ialgos for outFile in files: # // # // Convert each file into a DBS File object #// lumiList = [] #Somehing similar should be the real deal when multiple runs/lumi could be returned from wmbs file for runlumiinfo in outFile.getRuns(): lrun = long(runlumiinfo.run) run = DbsRun( RunNumber=lrun, NumberOfEvents=0, NumberOfLumiSections=0, TotalLuminosity=0, StoreNumber=0, StartOfRun=0, EndOfRun=0, ) #Only added if not added by another file in this loop, why waste a call to DBS if lrun not in addedRuns: self.dbs.insertRun(run) addedRuns.append( lrun) #save it so we do not try to add it again to DBS logging.debug("run %s added to DBS " % str(lrun)) for alsn in runlumiinfo: lumi = DbsLumiSection( LumiSectionNumber=long(alsn), StartEventNumber=0, EndEventNumber=0, LumiStartTime=0, LumiEndTime=0, RunNumber=lrun, ) lumiList.append(lumi) logging.debug("lumi list created for the file") dbsfile = DbsFile( #Checksum = str(outFile['cksum']), NumberOfEvents=outFile['events'], LogicalFileName=outFile['lfn'], FileSize=int(outFile['size']), Status="VALID", ValidationStatus='VALID', FileType='EDM', Dataset=procDataset, TierList=DBSWriterObjects.makeTierList( procDataset['Path'].split('/')[3]), AlgoList=ialgos, LumiList=lumiList, ParentList=outFile.getParentLFNs(), #BranchHash = outFile['BranchHash'], ) #Set checksums by hand #dbsfile['Checksum'] = 0 #Set a default? for entry in outFile['checksums'].keys(): #This should be a dictionary with a cktype key and cksum value if entry.lower() == 'cksum': dbsfile['Checksum'] = str(outFile['checksums'][entry]) elif entry.lower() == 'adler32': dbsfile['Adler32'] = str(outFile['checksums'][entry]) elif entry.lower() == 'md5': dbsfile['Md5'] = str(outFile['checksums'][entry]) #This check comes from ProdAgent, not sure if its required if len(outFile["locations"]) > 0: seName = list(outFile["locations"])[0] logging.debug("SEname associated to file is: %s" % seName) else: msg = "Error in DBSWriter.insertFiles\n" msg += "No SEname associated to file" #print "FAKING seName for now" #seName="cmssrm.fnal.gov" raise DBSWriterError(msg) insertFiles.append(dbsfile) # //Processing Jobs: # // Insert the lists of sorted files into the appropriate #// fileblocks sumSize = 0 sumFiles = 0 tmpFiles = [] blockList = [] #First, get the block. See if the block already exists try: fileBlock = DBSWriterObjects.getDBSFileBlock( self.dbs, procDataset, seName) fileBlock['files'] = [] #if not fileBlock in affectedBlocks: # affectedBlocks.append(fileBlock) except DbsException as ex: msg = "Error in DBSWriter.insertFilesForDBSBuffer\n" msg += "Cannot retrieve FileBlock for dataset:\n" msg += " %s\n" % procDataset['Path'] msg += "%s\n" % formatEx(ex) raise DBSWriterError(msg) filesToCommit = [] for file in insertFiles: # First see if the block is full if self.manageFileBlock(fileBlock=fileBlock, maxFiles=maxFiles, maxSize=maxSize, timeOut=timeOut, algos=ialgos, filesToCommit=filesToCommit, procDataset=procDataset): fileBlock['OpenForWriting'] = 0 if not fileBlock in affectedBlocks: affectedBlocks.append(fileBlock) # Then we need a new block try: fileBlock = DBSWriterObjects.getDBSFileBlock( self.dbs, procDataset, seName) fileBlock['files'] = [] except DbsException as ex: msg = "Error in DBSWriter.insertFilesForDBSBuffer\n" msg += "Cannot retrieve FileBlock for dataset:\n" msg += " %s\n" % procDataset['Path'] msg += "%s\n" % formatEx(ex) raise DBSWriterError(msg) fileBlock['files'].append(file['LogicalFileName']) filesToCommit.append(file) if len(filesToCommit) >= fileCommitLength: # Only commit the files if there are more of them then the maximum length try: self.dbs.insertFiles(procDataset, filesToCommit, fileBlock) filesToCommit = [] logging.debug("Inserted files: %s to FileBlock: %s" \ % ( ([ x['LogicalFileName'] for x in insertFiles ]),fileBlock['Name'])) except DbsException as ex: msg = "Error in DBSWriter.insertFiles\n" msg += "Cannot insert processed files:\n" msg += " %s\n" % ( [x['LogicalFileName'] for x in insertFiles], ) msg += "%s\n" % formatEx(ex) raise DBSWriterError(msg) if len(filesToCommit) > 0: try: self.dbs.insertFiles(procDataset, filesToCommit, fileBlock) filesToCommit = [] logging.debug("Inserted files: %s to FileBlock: %s" \ % ( ([ x['LogicalFileName'] for x in insertFiles ]),fileBlock['Name'])) except DbsException as ex: msg = "Error in DBSWriter.insertFiles\n" msg += "Cannot insert processed files:\n" msg += " %s\n" % ([x['LogicalFileName'] for x in insertFiles], ) msg += "%s\n" % formatEx(ex) raise DBSWriterError(msg) if not fileBlock in affectedBlocks: affectedBlocks.append(fileBlock) ## Do bulk inserts now for DBS #filesToCommit = [] #count = 0 #count2 = 0 #for file in insertFiles: # count += 1 # #Try and close the box # logging.error("Should have a file") # logging.error(len(filesToCommit)) # count2 += len(filesToCommit) # if self.manageFileBlock(fileBlock = fileBlock, maxFiles = maxFiles, # maxSize = maxSize, timeOut = timeOut, algos = ialgos, # filesToCommit = filesToCommit, procDataset = procDataset): # fileBlock['OpenForWriting'] = '0' # if not fileBlock in affectedBlocks: # affectedBlocks.append(fileBlock) # # # # # Then we need a new block # try: # fileBlock = DBSWriterObjects.getDBSFileBlock( # self.dbs, # procDataset, # seName) # fileBlock['files'] = [] # except DbsException, ex: # msg = "Error in DBSWriter.insertFilesForDBSBuffer\n" # msg += "Cannot retrieve FileBlock for dataset:\n" # msg += " %s\n" % procDataset['Path'] # msg += "%s\n" % formatEx(ex) # raise DBSWriterError(msg) # #At this point, we should commit the block as is # fileBlock['files'].append(file['LogicalFileName']) # if jobType == "MergeSpecial": # for file in fileList: # file['Block'] = fileBlock # msg="calling: self.dbs.insertMergedFile(%s, %s)" % (str(file['ParentList']),str(file)) # logging.debug(msg) # try: # # # # # # NOTE To Anzar From Anzar (File cloning as in DBS API can be done here and then I can use Bulk insert on Merged files as well) # self.dbs.insertMergedFile(file['ParentList'], # file) # # except DbsException, ex: # msg = "Error in DBSWriter.insertFiles\n" # msg += "Cannot insert merged file:\n" # msg += " %s\n" % file['LogicalFileName'] # msg += "%s\n" % formatEx(ex) # raise DBSWriterError(msg) # logging.debug("Inserted merged file: %s to FileBlock: %s"%(file['LogicalFileName'],fileBlock['Name'])) # else: # filesToCommit.append(file) # if len(filesToCommit) >= fileCommitLength: # # Only commit the files if there are more of them then the maximum length # try: # logging.error("About to commit %i files" %(len(filesToCommit))) # count2 += len(filesToCommit) # self.dbs.insertFiles(procDataset, filesToCommit, fileBlock) # filesToCommit = [] # logging.debug("Inserted files: %s to FileBlock: %s" \ # % ( ([ x['LogicalFileName'] for x in insertFiles ]),fileBlock['Name'])) # # except DbsException, ex: # msg = "Error in DBSWriter.insertFiles\n" # msg += "Cannot insert processed files:\n" # msg += " %s\n" % ([ x['LogicalFileName'] for x in insertFiles ],) # msg += "%s\n" % formatEx(ex) # raise DBSWriterError(msg) # # # # ## If we still have files to commit, commit them #logging.error("Got to the end of the loop") #logging.error(len(filesToCommit)) #logging.error(count2) #if len(filesToCommit) > 0: # try: # logging.error("About to insert some files") # self.dbs.insertFiles(procDataset, filesToCommit, fileBlock) # filesToCommit = [] # logging.debug("Inserted files: %s to FileBlock: %s" \ # % ( ([ x['LogicalFileName'] for x in insertFiles ]),fileBlock['Name'])) # # except DbsException, ex: # msg = "Error in DBSWriter.insertFiles\n" # msg += "Cannot insert processed files:\n" # msg += " %s\n" % ([ x['LogicalFileName'] for x in insertFiles ],) # msg += "%s\n" % formatEx(ex) # raise DBSWriterError(msg) if not fileBlock in affectedBlocks: affectedBlocks.append(fileBlock) return list(affectedBlocks)
LumiEndTime=endLumiTime2, RunNumber=runNumber, ) blockObj1 = DbsFileBlock(Name=blockName1) blockObj2 = DbsFileBlock(Name=blockName2) blockObjM = DbsFileBlock(Name=blockNameM) fileObj1 = DbsFile(Checksum=fileCkecksum1, LogicalFileName=fileName1, NumberOfEvents=fileNumEvents1, FileSize=fileSize1, Status=fileStatus1, ValidationStatus=fileValidStatus1, FileType=fileType1, Dataset=procObj1, AlgoList=[algoObj1], LumiList=[lumiObj1], TierList=[tier1, tier2], AutoCrossSection=1.0) fileObj2 = DbsFile(Checksum=fileCkecksum2, LogicalFileName=fileName2, NumberOfEvents=fileNumEvents2, FileSize=fileSize2, Status=fileStatus2, ValidationStatus=fileValidStatus2, FileType=fileType2, Dataset=procObj2, AlgoList=[algoObj2],
"\n***********************insertLumiSection API tests***************************" ) apiObj = DbsUnitTestApi(api.insertFiles, f) apiObj.setVerboseLevel(opts.verbose) f.write( "\n\n***********************insertFiles API tests***************************" ) lfn1 = '1111-0909-9767-8764' + mytime lfn2 = '1111-0909-9767-876411' + mytime file1 = DbsFile( Checksum='999', LogicalFileName=lfn1, #QueryableMetadata= 'This is a test file', NumberOfEvents=10000, FileSize=12340, Status='VALID', ValidationStatus='VALID', FileType='EVD', LumiList=[lumi1, lumi2], TierList=tierList, ) file2 = DbsFile( Checksum='999', LogicalFileName=lfn2, #QueryableMetadata= 'This is a test file', NumberOfEvents=10000, FileSize=12340, Status='VALID', ValidationStatus='VALID', FileType='EVD',
def insertFilesForDBSBuffer(self, files, procDataset, algos, jobType="NotMerge", insertDetectorData=False): """ _insertFiles_ list of files inserted in DBS """ #TODO: Whats the purpose of insertDetectorData if len(files) < 1: return affectedBlocks = set() insertFiles = [] addedRuns = [] seName = None #Get the algos in insertable form ialgos = [ DBSWriterObjects.createAlgorithmForInsert(dict(algo)) for algo in algos ] for outFile in files: # // # // Convert each file into a DBS File object #// lumiList = [] #Somehing similar should be the real deal when multiple runs/lumi could be returned from wmbs file for runlumiinfo in outFile.getRuns(): lrun = long(runlumiinfo.run) run = DbsRun( RunNumber=lrun, NumberOfEvents=0, NumberOfLumiSections=0, TotalLuminosity=0, StoreNumber=0, StartOfRun=0, EndOfRun=0, ) #Only added if not added by another file in this loop, why waste a call to DBS if lrun not in addedRuns: self.dbs.insertRun(run) addedRuns.append( lrun) #save it so we do not try to add it again to DBS logging.debug("run %s added to DBS " % str(lrun)) for alsn in runlumiinfo: lumi = DbsLumiSection( LumiSectionNumber=long(alsn), StartEventNumber=0, EndEventNumber=0, LumiStartTime=0, LumiEndTime=0, RunNumber=lrun, ) lumiList.append(lumi) logging.debug("lumi list created for the file") dbsfile = DbsFile( Checksum=str(outFile['cksum']), NumberOfEvents=outFile['events'], LogicalFileName=outFile['lfn'], FileSize=int(outFile['size']), Status="VALID", ValidationStatus='VALID', FileType='EDM', Dataset=procDataset, TierList=DBSWriterObjects.makeTierList( procDataset['Path'].split('/')[3]), AlgoList=ialgos, LumiList=lumiList, ParentList=outFile.getParentLFNs(), #BranchHash = outFile['BranchHash'], ) #This check comes from ProdAgent, not sure if its required if len(outFile["locations"]) > 0: seName = list(outFile["locations"])[0] logging.debug("SEname associated to file is: %s" % seName) else: msg = "Error in DBSWriter.insertFiles\n" msg += "No SEname associated to file" #print "FAKING seName for now" #seName="cmssrm.fnal.gov" raise DBSWriterError(msg) insertFiles.append(dbsfile) # //Processing Jobs: # // Insert the lists of sorted files into the appropriate #// fileblocks try: fileBlock = DBSWriterObjects.getDBSFileBlock( self.dbs, procDataset, seName) except DbsException, ex: msg = "Error in DBSWriter.insertFiles\n" msg += "Cannot retrieve FileBlock for dataset:\n" msg += " %s\n" % procDataset['Path'] #msg += "In Storage Element:\n %s\n" % insertFiles.seName msg += "%s\n" % formatEx(ex) raise DBSWriterError(msg)
lumi2 = DbsLumiSection( LumiSectionNumber=1333, StartEventNumber=100, EndEventNumber=200, LumiStartTime='notime', LumiEndTime='neverending', RunNumber=1, ) myfile1 = DbsFile( Checksum='999', LogicalFileName='aaa1122-0909-9767-8764aaa', #QueryableMetadata= 'This is a test file', NumberOfEvents=10000, FileSize=12340, Status='VALID', ValidationStatus='VALID', FileType='EVD', Dataset=proc, #Block= isDictType, LumiList=[lumi1, lumi2], TierList=['SIM', 'RECO'], ) myfile2 = DbsFile( Checksum='000', LogicalFileName='aaaa2233-0909-9767-8764aaa', #QueryableMetadata= 'This is a test file', NumberOfEvents=10000, FileSize=12340, Status='VALID', ValidationStatus='VALID',
lumi3 = DbsLumiSection( LumiSectionNumber=1333, StartEventNumber=100, EndEventNumber=200, LumiStartTime=1233, LumiEndTime=1234, RunNumber=1, ) myfile1 = DbsFile( Checksum='999', NumberOfEvents=10000, FileSize=12340, Status='VALID', ValidationStatus='VALID', FileType='EDM', Dataset=proc, AlgoList=[algo], LumiList=[lumi1, lumi2], TierList=['GEN', 'SIM'], BranchHash="001234565798685", #ParentList = ['NEW_TEST0003'] ) # Make a choice block = DbsFileBlock(StorageElement=['test1', 'test3'], ) block['Name'] = "/test_primary_001/TestProcessedDS001/GEN-SIM#12345-" + str( HOW_MANY_FILES) print "Inserting Files Into", api.insertBlock(proc, block) #print "Wait........" try: