def importDataset(self): """ _importDataset_ Import the Dataset contents and inject it into the DB. """ # // # // Import the dataset to be processed into the local DBS #// localDBS = getLocalDBSURL() dbsWriter = DBSWriter(localDBS) globalDBS = getGlobalDBSURL() try: dbsWriter.importDataset( globalDBS, self.inputDataset(), localDBS, self.onlyClosedBlocks ) except Exception, ex: msg = "Error importing dataset to be processed into local DBS\n" msg += "Source Dataset: %s\n" % self.inputDataset() msg += "Source DBS: %s\n" % globalDBS msg += "Destination DBS: %s\n" % localDBS msg += str(ex) logging.error(msg) return 1
def __init__(self, blockName, localDbsUrl, globalDbsUrl, datasetPath): self.block = blockName self.dataset = datasetPath self.localDbs = DBSWriter(localDbsUrl) self.localUrl = localDbsUrl self.globalDbs = DBSWriter(globalDbsUrl) self.globalUrl = globalDbsUrl
class BlockManager: """ _BlockManager_ File block manager. Instantiate for a given block and provide API calls to close the block, migrate it to global DBS and inject the block to PhEDEx """ def __init__(self, blockName, localDbsUrl, globalDbsUrl, datasetPath): self.block = blockName self.dataset = datasetPath self.localDbs = DBSWriter(localDbsUrl) self.localUrl = localDbsUrl self.globalDbs = DBSWriter(globalDbsUrl) self.globalUrl = globalDbsUrl def closeBlock(self): """ _closeBlock_ Close the file block """ # // # // Close block if it has > 0 files in it. IE, force closure of block #// self.localDbs.manageFileBlock(self.block, maxFiles=1) return def migrateToGlobalDBS(self): """ _migrateToGlobalDBS_ Migrate the block to the global DBS Url provided """ self.globalDbs.migrateDatasetBlocks(self.localUrl, self.dataset, [self.block]) return def injectBlockToPhEDEx(self, phedexConfig, nodes=None): """ _injectBlockToPhEDEx_ Inject the file block to PhEDEx """ tmdbInjectBlock( self.globalUrl, self.dataset, self.block, phedexConfig, "/tmp", # temp dir to create drops nodes) return
def importDataset(self): """ _importDataset_ Import the Input Dataset contents and inject it into the DB. The DBSWriter.importDataset should also import the parent Dataset. The parent importing seems to work with DBS_2_0_8 """ # // # // Getting Local and Global DBS URLs #// localDBS = getLocalDBSURL() dbsWriter = DBSWriter(localDBS) globalDBS = self.dbsUrl try: dbsWriter.importDataset( globalDBS, self.inputDataset(), localDBS, onlyClosed=self.onlyClosedBlocks, skipNoSiteError=True ) except Exception, ex: msg = "Error importing dataset to be processed into local DBS\n" msg += "Source Dataset: %s\n" % self.inputDataset() msg += "Source DBS: %s\n" % globalDBS msg += "Destination DBS: %s\n" % localDBS msg += str(ex) logging.error(msg) raise RuntimeError, msg
class BlockManager: """ _BlockManager_ File block manager. Instantiate for a given block and provide API calls to close the block, migrate it to global DBS and inject the block to PhEDEx """ def __init__(self, blockName, localDbsUrl, globalDbsUrl, datasetPath): self.block = blockName self.dataset = datasetPath self.localDbs = DBSWriter(localDbsUrl) self.localUrl = localDbsUrl self.globalDbs = DBSWriter(globalDbsUrl) self.globalUrl = globalDbsUrl def closeBlock(self): """ _closeBlock_ Close the file block """ # // # // Close block if it has > 0 files in it. IE, force closure of block #// self.localDbs.manageFileBlock(self.block, maxFiles=1) return def migrateToGlobalDBS(self): """ _migrateToGlobalDBS_ Migrate the block to the global DBS Url provided """ self.globalDbs.migrateDatasetBlocks(self.localUrl, self.dataset, [self.block]) return def injectBlockToPhEDEx(self, phedexConfig, nodes=None): """ _injectBlockToPhEDEx_ Inject the file block to PhEDEx """ tmdbInjectBlock(self.globalUrl, self.dataset, self.block, phedexConfig, "/tmp", # temp dir to create drops nodes) return
def publishAJobReport(self, file, procdataset): """ input: xml file, processedDataset """ common.logger.debug("FJR = %s" % file) try: jobReport = readJobReport(file)[0] self.exit_status = '0' except IndexError: self.exit_status = '1' msg = "Error: Problem with " + file + " file" raise CrabException(msg) ### skip publication for 0 events files filestopublish = [] for file in jobReport.files: #### added check for problem with copy to SE and empty lfn if (string.find(file['LFN'], 'copy_problems') != -1): self.problemFiles.append(file['LFN']) elif (file['LFN'] == ''): self.noLFN.append(file['PFN']) else: if int(file['TotalEvents']) == 0: self.noEventsFiles.append(file['LFN']) for ds in file.dataset: ### Fede for production if (ds['PrimaryDataset'] == 'null'): ds['PrimaryDataset'] = self.userprocessedData filestopublish.append(file) jobReport.files = filestopublish for file in filestopublish: common.logger.debug("--->>> LFN of file to publish = " + str(file['LFN'])) ### if all files of FJR have number of events = 0 if (len(filestopublish) == 0): return None #// DBS to contact dbswriter = DBSWriter(self.DBSURL) # insert files Blocks = None try: ### FEDE added insertDetectorData = True to propagate in DBS info about run and lumi Blocks = dbswriter.insertFiles(jobReport, insertDetectorData=True) #Blocks=dbswriter.insertFiles(jobReport) common.logger.debug("--->>> Inserting file in blocks = %s" % Blocks) except DBSWriterError, ex: common.logger.debug("--->>> Insert file error: %s" % ex)
def publishAJobReport(self,file,procdataset): """ input: xml file, processedDataset """ common.logger.debug("FJR = %s"%file) try: jobReport = readJobReport(file)[0] self.exit_status = '0' except IndexError: self.exit_status = '1' msg = "Error: Problem with "+file+" file" raise CrabException(msg) ### skip publication for 0 events files filestopublish=[] for file in jobReport.files: #### added check for problem with copy to SE and empty lfn if (string.find(file['LFN'], 'copy_problems') != -1): self.problemFiles.append(file['LFN']) elif (file['LFN'] == ''): self.noLFN.append(file['PFN']) else: if int(file['TotalEvents']) == 0: self.noEventsFiles.append(file['LFN']) for ds in file.dataset: ### Fede for production if (ds['PrimaryDataset'] == 'null'): ds['PrimaryDataset']=self.userprocessedData filestopublish.append(file) jobReport.files = filestopublish for file in filestopublish: common.logger.debug("--->>> LFN of file to publish = " + str(file['LFN'])) ### if all files of FJR have number of events = 0 if (len(filestopublish) == 0): return None #// DBS to contact dbswriter = DBSWriter(self.DBSURL) # insert files Blocks=None try: ### FEDE added insertDetectorData = True to propagate in DBS info about run and lumi Blocks=dbswriter.insertFiles(jobReport, insertDetectorData = True) #Blocks=dbswriter.insertFiles(jobReport) common.logger.debug("--->>> Inserting file in blocks = %s"%Blocks) except DBSWriterError, ex: common.logger.debug("--->>> Insert file error: %s"%ex)
def updateDataset(self): """ _updateDataset_ Look for new fileblocks not in the DB for this dataset and import them """ owner = DatabaseAPI.ownerIndex(self.workflowSpec.workflowName()) if owner == None: knownBlocks = [] else: knownBlocks = DatabaseAPI.listKnownFileblocks(owner) logging.info("knownBlocks: %s" % str(knownBlocks)) # // # // Create a new splitter from the DBS/DLS containing all #// current fileblocks and filter out the blocks that are # //already known. # // #// # // # // Re Import the dataset to be processed into the local DBS #// to get any new blocks and files localDBS = getLocalDBSURL() dbsWriter = DBSWriter(localDBS) globalDBS = getGlobalDBSURL() try: dbsWriter.importDataset( globalDBS, self.inputDataset(), localDBS, self.onlyClosedBlocks ) except Exception, ex: msg = "Error importing dataset to be processed into local DBS\n" msg += "Source Dataset: %s\n" % self.inputDataset() msg += "Source DBS: %s\n" % self.dbsUrl msg += str(ex) logging.error(msg) return 1
def DBS2Publish(self, good_list): #################################################### if self.no_inp == 1: file_list = self.remove_input_from_fjr(good_list) else: file_list=good_list print "file_list = ", file_list #################################################### common.logger.log(10-1, "fjr with FrameworkJobReport Status='Success', file_list = "+str(file_list)) common.logger.log(10-1, "len(file_list) = "+str(len(file_list))) if (len(file_list)>0): BlocksList=[] common.logger.info("--->>> Start dataset publication") self.exit_status=self.publishDataset(file_list[0]) if (self.exit_status == '1'): return self.exit_status common.logger.info("--->>> End dataset publication") common.logger.info("--->>> Start files publication") for file in file_list: Blocks=self.publishAJobReport(file,self.processedData) if Blocks: for x in Blocks: # do not allow multiple entries of the same block if x not in BlocksList: BlocksList.append(x) # close the blocks common.logger.log(10-1, "BlocksList = %s"%BlocksList) dbswriter = DBSWriter(self.DBSURL) for BlockName in BlocksList: try: closeBlock=dbswriter.manageFileBlock(BlockName,maxFiles= 1) common.logger.log(10-1, "closeBlock %s"%closeBlock) except DBSWriterError, ex: common.logger.info("Close block error %s"%ex) if (len(self.noEventsFiles)>0): common.logger.info("--->>> WARNING: "+str(len(self.noEventsFiles))+" published files contain 0 events are:") for lfn in self.noEventsFiles: common.logger.info("------ LFN: %s"%lfn) if (len(self.noLFN)>0): common.logger.info("--->>> WARNING: there are "+str(len(self.noLFN))+" files not published because they have empty LFN") for pfn in self.noLFN: common.logger.info("------ pfn: %s"%pfn) if (len(self.problemFiles)>0): common.logger.info("--->>> WARNING: "+str(len(self.problemFiles))+" files not published because they had problem with copy to SE") for lfn in self.problemFiles: common.logger.info("------ LFN: %s"%lfn) common.logger.info("--->>> End files publication") #### FEDE for MULTI #### for dataset_to_check in self.published_datasets: self.cfg_params['USER.dataset_to_check']=dataset_to_check from InspectDBS import InspectDBS check=InspectDBS(self.cfg_params) check.checkPublication() ######################### return self.exit_status
#!/usr/bin/env python from ProdCommon.DataMgmt.DBS.DBSWriter import DBSWriter from ProdCommon.DataMgmt.DBS.DBSWriterObjects import createAlgorithm dbsUrl = "https://cmst0dbs.cern.ch:8443/cms_dbs_prod_tier0_writer/servlet/DBSServlet" procDataset = "/Cosmics/CRUZET3-v1/RAW" writer = DBSWriter(dbsUrl) datasetInfo = {} datasetInfo['ApplicationName'] = "cmsRun" datasetInfo['ApplicationVersion'] = "CMSSW_2_0_10" datasetInfo["ApplicationFamily"] = "Merged" datasetInfo['PSetHash'] = "PSET_HASH_NOT_AVAILABLE" datasetInfo['PSetContent'] = "PSET CONTENT NOT AVAILABLE" configMetadata = {} configMetadata['name'] = "RepackerMerger-%s" % procDataset # need dataset name configMetadata['version'] = "AutoGenerated" configMetadata['annotation'] = "AutoGenerated By Tier 0" configMetadata['Type'] = "data" # RequestCategory algo = createAlgorithm(datasetInfo, configMetadata) writer.dbs.insertAlgorithm(algo) writer.dbs.insertAlgoInPD(procDataset, algo)
if opt == "--dataset": dataset = arg if opt == "--source": source = arg if opt == "--target": target = arg if dataset == None or source == None or target == None : print usage sys.exit(1) print 'Migrating dataset: %s from %s to %s' % (dataset,source,target) dbsWriter = DBSWriter(target) try: dbsWriter.importDataset( source, dataset, target) except Exception, ex: msg = "Error importing dataset to be processed into local DBS\n" msg += "Source Dataset: %s\n" % dataset msg += "Source DBS: %s\n" % source msg += "Destination DBS: %s\n" % target msg += str(ex) logging.error(msg) sys.exit(1)
def DBS2Publish(self, good_list): #################################################### if self.no_inp == 1: file_list = self.remove_input_from_fjr(good_list) else: file_list = good_list print "file_list = ", file_list #################################################### common.logger.log( 10 - 1, "fjr with FrameworkJobReport Status='Success', file_list = " + str(file_list)) common.logger.log(10 - 1, "len(file_list) = " + str(len(file_list))) if (len(file_list) > 0): BlocksList = [] common.logger.info("--->>> Start dataset publication") self.exit_status = self.publishDataset(file_list[0]) if (self.exit_status == '1'): return self.exit_status common.logger.info("--->>> End dataset publication") common.logger.info("--->>> Start files publication") for file in file_list: Blocks = self.publishAJobReport(file, self.processedData) if Blocks: for x in Blocks: # do not allow multiple entries of the same block if x not in BlocksList: BlocksList.append(x) # close the blocks common.logger.log(10 - 1, "BlocksList = %s" % BlocksList) dbswriter = DBSWriter(self.DBSURL) for BlockName in BlocksList: try: closeBlock = dbswriter.manageFileBlock(BlockName, maxFiles=1) common.logger.log(10 - 1, "closeBlock %s" % closeBlock) except DBSWriterError, ex: common.logger.info("Close block error %s" % ex) if (len(self.noEventsFiles) > 0): common.logger.info("--->>> WARNING: " + str(len(self.noEventsFiles)) + " published files contain 0 events are:") for lfn in self.noEventsFiles: common.logger.info("------ LFN: %s" % lfn) if (len(self.noLFN) > 0): common.logger.info( "--->>> WARNING: there are " + str(len(self.noLFN)) + " files not published because they have empty LFN") for pfn in self.noLFN: common.logger.info("------ pfn: %s" % pfn) if (len(self.problemFiles) > 0): common.logger.info( "--->>> WARNING: " + str(len(self.problemFiles)) + " files not published because they had problem with copy to SE" ) for lfn in self.problemFiles: common.logger.info("------ LFN: %s" % lfn) common.logger.info("--->>> End files publication") #### FEDE for MULTI #### for dataset_to_check in self.published_datasets: self.cfg_params['USER.dataset_to_check'] = dataset_to_check from InspectDBS import InspectDBS check = InspectDBS(self.cfg_params) check.checkPublication() ######################### return self.exit_status
def publishDataset(self, file): """ """ try: jobReport = readJobReport(file)[0] self.exit_status = '0' except IndexError: self.exit_status = '1' msg = "Error: Problem with " + file + " file" common.logger.info(msg) return self.exit_status if (len(self.dataset_to_import) != 0): for dataset in self.dataset_to_import: common.logger.info( "--->>> Importing parent dataset in the dbs: " + dataset) status_import = self.importParentDataset( self.globalDBS, dataset) if (status_import == 1): common.logger.info('Problem with parent ' + dataset + ' import from the global DBS ' + self.globalDBS + 'to the local one ' + self.DBSURL) self.exit_status = '1' return self.exit_status else: common.logger.info('Import ok of dataset ' + dataset) if (len(jobReport.files) <= 0): self.exit_status = '1' msg = "Error: No EDM file to publish in xml file" + file + " file" common.logger.info(msg) return self.exit_status else: msg = "fjr contains some files to publish" common.logger.debug(msg) #### datasets creation in dbs #// DBS to contact write and read of the same dbs dbsReader = DBSReader(self.DBSURL, level='ERROR') dbswriter = DBSWriter(self.DBSURL) ##### self.published_datasets = [] for fileinfo in jobReport.files: datasets_info = fileinfo.dataset if len(datasets_info) <= 0: self.exit_status = '1' msg = "Error: No info about dataset in the xml file " + file common.logger.info(msg) return self.exit_status else: for dataset in datasets_info: #### for production data self.processedData = dataset['ProcessedDataset'] if (dataset['PrimaryDataset'] == 'null'): dataset['PrimaryDataset'] = self.userprocessedData elif self.datasetpath.upper() != 'NONE': dataset['ParentDataset'] = self.datasetpath dataset['PSetContent'] = self.content cfgMeta = { 'name': self.pset, 'Type': 'user', 'annotation': 'user cfg', 'version': 'private version' } # add real name of user cfg common.logger.info("PrimaryDataset = %s" % dataset['PrimaryDataset']) common.logger.info("ProcessedDataset = %s" % dataset['ProcessedDataset']) common.logger.info("<User Dataset Name> = /" + dataset['PrimaryDataset'] + "/" + dataset['ProcessedDataset'] + "/USER") self.dataset_to_check = "/" + dataset[ 'PrimaryDataset'] + "/" + dataset[ 'ProcessedDataset'] + "/USER" self.published_datasets.append(self.dataset_to_check) common.logger.log( 10 - 1, "--->>> Inserting primary: %s processed : %s" % (dataset['PrimaryDataset'], dataset['ProcessedDataset'])) #### check if dataset already exists in the DBS result = dbsReader.matchProcessedDatasets( dataset['PrimaryDataset'], 'USER', dataset['ProcessedDataset']) if (len(result) != 0): result = dbsReader.listDatasetFiles( self.dataset_to_check) primary = DBSWriterObjects.createPrimaryDataset( dataset, dbswriter.dbs) common.logger.log(10 - 1, "Primary: %s " % primary) print "primary = ", primary algo = DBSWriterObjects.createAlgorithm( dataset, cfgMeta, dbswriter.dbs) common.logger.log(10 - 1, "Algo: %s " % algo) processed = DBSWriterObjects.createProcessedDataset( primary, algo, dataset, dbswriter.dbs) common.logger.log(10 - 1, "Processed: %s " % processed) print "processed = ", processed common.logger.log( 10 - 1, "Inserted primary %s processed %s" % (primary, processed)) ####################################################################################### common.logger.log(10 - 1, "exit_status = %s " % self.exit_status) return self.exit_status
dataset = None source = None target = None for opt, arg in opts: if opt == "--dataset": dataset = arg if opt == "--source": source = arg if opt == "--target": target = arg if dataset == None or source == None or target == None: print usage sys.exit(1) print 'Migrating dataset: %s from %s to %s' % (dataset, source, target) dbsWriter = DBSWriter(target) try: dbsWriter.importDataset(source, dataset, target) except Exception, ex: msg = "Error importing dataset to be processed into local DBS\n" msg += "Source Dataset: %s\n" % dataset msg += "Source DBS: %s\n" % source msg += "Destination DBS: %s\n" % target msg += str(ex) logging.error(msg) sys.exit(1)
jobReportFile.addRunAndLumi(long(runNumber), []) datasetStrmr = jobReportFile.newDataset() datasetStrmr['PrimaryDataset'] = primaryDataset datasetStrmr['PrimaryDatasetType'] = 'data' datasetStrmr['ProcessedDataset'] = processedDataset datasetStrmr['DataTier'] = dataTier jobReportFile['TotalEvents'] = nEvents jobReportFile['SEName'] = "srm.cern.ch" ##jobReport.write('FrameworkJobReport.xml') localDbsUrl = "https://cmst0dbs.cern.ch:8443/cms_dbs_prod_tier0_writer/servlet/DBSServlet" dbswriter = DBSWriter(localDbsUrl,level='ERROR') primary = DBSWriterObjects.createPrimaryDataset(datasetStrmr, dbswriter.dbs) datasetStrmr['ApplicationName'] = appName datasetStrmr['ApplicationVersion'] = appVersion datasetStrmr['ApplicationFamily'] = 'DAQ' datasetStrmr['PSetHash'] = 'NA' datasetStrmr['PSetContent'] = 'NA' algo = DBSWriterObjects.createAlgorithm(datasetStrmr, None, dbswriter.dbs) processed = DBSWriterObjects.createProcessedDataset(primary, algo, datasetStrmr, dbswriter.dbs) try: blocks = dbswriter.insertFiles(jobReport, insertDetectorData = True)
def run(self): """ parse of all xml file on res dir and creation of distionary """ task = common._db.getTask() good_list=[] for job in task.getJobs(): fjr = self.fjrDirectory + job['outputFiles'][-1] if (job.runningJob['applicationReturnCode']!=0 or job.runningJob['wrapperReturnCode']!=0): continue # get FJR filename fjr = self.fjrDirectory + job['outputFiles'][-1] reports = readJobReport(fjr) if len(reports)>0: if reports[0].status == "Success": good_list.append(fjr) #################################################### if self.no_inp == 1: file_list = self.remove_input_from_fjr(good_list) else: file_list=good_list print "file_list = ", file_list #################################################### common.logger.log(10-1, "fjr with FrameworkJobReport Status='Success', file_list = "+str(file_list)) common.logger.log(10-1, "len(file_list) = "+str(len(file_list))) if (len(file_list)>0): BlocksList=[] common.logger.info("--->>> Start dataset publication") self.exit_status=self.publishDataset(file_list[0]) if (self.exit_status == '1'): return self.exit_status common.logger.info("--->>> End dataset publication") common.logger.info("--->>> Start files publication") for file in file_list: Blocks=self.publishAJobReport(file,self.processedData) if Blocks: for x in Blocks: # do not allow multiple entries of the same block if x not in BlocksList: BlocksList.append(x) # close the blocks common.logger.log(10-1, "BlocksList = %s"%BlocksList) dbswriter = DBSWriter(self.DBSURL) for BlockName in BlocksList: try: closeBlock=dbswriter.manageFileBlock(BlockName,maxFiles= 1) common.logger.log(10-1, "closeBlock %s"%closeBlock) except DBSWriterError, ex: common.logger.info("Close block error %s"%ex) if (len(self.noEventsFiles)>0): common.logger.info("--->>> WARNING: "+str(len(self.noEventsFiles))+" published files contain 0 events are:") for lfn in self.noEventsFiles: common.logger.info("------ LFN: %s"%lfn) if (len(self.noLFN)>0): common.logger.info("--->>> WARNING: there are "+str(len(self.noLFN))+" files not published because they have empty LFN") for pfn in self.noLFN: common.logger.info("------ pfn: %s"%pfn) if (len(self.problemFiles)>0): common.logger.info("--->>> WARNING: "+str(len(self.problemFiles))+" files not published because they had problem with copy to SE") for lfn in self.problemFiles: common.logger.info("------ LFN: %s"%lfn) common.logger.info("--->>> End files publication") #### FEDE for MULTI #### for dataset_to_check in self.published_datasets: self.cfg_params['USER.dataset_to_check']=dataset_to_check from InspectDBS import InspectDBS check=InspectDBS(self.cfg_params) check.checkPublication() ######################### return self.exit_status