def __init__(self, blockName, localDbsUrl, globalDbsUrl, datasetPath): self.block = blockName self.dataset = datasetPath self.localDbs = DBSWriter(localDbsUrl) self.localUrl = localDbsUrl self.globalDbs = DBSWriter(globalDbsUrl) self.globalUrl = globalDbsUrl
def publishAJobReport(self, file, procdataset): """ input: xml file, processedDataset """ common.logger.debug("FJR = %s" % file) try: jobReport = readJobReport(file)[0] self.exit_status = '0' except IndexError: self.exit_status = '1' msg = "Error: Problem with " + file + " file" raise CrabException(msg) ### skip publication for 0 events files filestopublish = [] for file in jobReport.files: #### added check for problem with copy to SE and empty lfn if (string.find(file['LFN'], 'copy_problems') != -1): self.problemFiles.append(file['LFN']) elif (file['LFN'] == ''): self.noLFN.append(file['PFN']) else: if int(file['TotalEvents']) == 0: self.noEventsFiles.append(file['LFN']) for ds in file.dataset: ### Fede for production if (ds['PrimaryDataset'] == 'null'): ds['PrimaryDataset'] = self.userprocessedData filestopublish.append(file) jobReport.files = filestopublish for file in filestopublish: common.logger.debug("--->>> LFN of file to publish = " + str(file['LFN'])) ### if all files of FJR have number of events = 0 if (len(filestopublish) == 0): return None #// DBS to contact dbswriter = DBSWriter(self.DBSURL) # insert files Blocks = None try: ### FEDE added insertDetectorData = True to propagate in DBS info about run and lumi Blocks = dbswriter.insertFiles(jobReport, insertDetectorData=True) #Blocks=dbswriter.insertFiles(jobReport) common.logger.debug("--->>> Inserting file in blocks = %s" % Blocks) except DBSWriterError, ex: common.logger.debug("--->>> Insert file error: %s" % ex)
#!/usr/bin/env python from ProdCommon.DataMgmt.DBS.DBSWriter import DBSWriter from ProdCommon.DataMgmt.DBS.DBSWriterObjects import createAlgorithm dbsUrl = "https://cmst0dbs.cern.ch:8443/cms_dbs_prod_tier0_writer/servlet/DBSServlet" procDataset = "/Cosmics/CRUZET3-v1/RAW" writer = DBSWriter(dbsUrl) datasetInfo = {} datasetInfo['ApplicationName'] = "cmsRun" datasetInfo['ApplicationVersion'] = "CMSSW_2_0_10" datasetInfo["ApplicationFamily"] = "Merged" datasetInfo['PSetHash'] = "PSET_HASH_NOT_AVAILABLE" datasetInfo['PSetContent'] = "PSET CONTENT NOT AVAILABLE" configMetadata = {} configMetadata['name'] = "RepackerMerger-%s" % procDataset # need dataset name configMetadata['version'] = "AutoGenerated" configMetadata['annotation'] = "AutoGenerated By Tier 0" configMetadata['Type'] = "data" # RequestCategory algo = createAlgorithm(datasetInfo, configMetadata) writer.dbs.insertAlgorithm(algo) writer.dbs.insertAlgoInPD(procDataset, algo)
dataset = None source = None target = None for opt, arg in opts: if opt == "--dataset": dataset = arg if opt == "--source": source = arg if opt == "--target": target = arg if dataset == None or source == None or target == None: print usage sys.exit(1) print 'Migrating dataset: %s from %s to %s' % (dataset, source, target) dbsWriter = DBSWriter(target) try: dbsWriter.importDataset(source, dataset, target) except Exception, ex: msg = "Error importing dataset to be processed into local DBS\n" msg += "Source Dataset: %s\n" % dataset msg += "Source DBS: %s\n" % source msg += "Destination DBS: %s\n" % target msg += str(ex) logging.error(msg) sys.exit(1)
def DBS2Publish(self, good_list): #################################################### if self.no_inp == 1: file_list = self.remove_input_from_fjr(good_list) else: file_list = good_list print "file_list = ", file_list #################################################### common.logger.log( 10 - 1, "fjr with FrameworkJobReport Status='Success', file_list = " + str(file_list)) common.logger.log(10 - 1, "len(file_list) = " + str(len(file_list))) if (len(file_list) > 0): BlocksList = [] common.logger.info("--->>> Start dataset publication") self.exit_status = self.publishDataset(file_list[0]) if (self.exit_status == '1'): return self.exit_status common.logger.info("--->>> End dataset publication") common.logger.info("--->>> Start files publication") for file in file_list: Blocks = self.publishAJobReport(file, self.processedData) if Blocks: for x in Blocks: # do not allow multiple entries of the same block if x not in BlocksList: BlocksList.append(x) # close the blocks common.logger.log(10 - 1, "BlocksList = %s" % BlocksList) dbswriter = DBSWriter(self.DBSURL) for BlockName in BlocksList: try: closeBlock = dbswriter.manageFileBlock(BlockName, maxFiles=1) common.logger.log(10 - 1, "closeBlock %s" % closeBlock) except DBSWriterError, ex: common.logger.info("Close block error %s" % ex) if (len(self.noEventsFiles) > 0): common.logger.info("--->>> WARNING: " + str(len(self.noEventsFiles)) + " published files contain 0 events are:") for lfn in self.noEventsFiles: common.logger.info("------ LFN: %s" % lfn) if (len(self.noLFN) > 0): common.logger.info( "--->>> WARNING: there are " + str(len(self.noLFN)) + " files not published because they have empty LFN") for pfn in self.noLFN: common.logger.info("------ pfn: %s" % pfn) if (len(self.problemFiles) > 0): common.logger.info( "--->>> WARNING: " + str(len(self.problemFiles)) + " files not published because they had problem with copy to SE" ) for lfn in self.problemFiles: common.logger.info("------ LFN: %s" % lfn) common.logger.info("--->>> End files publication") #### FEDE for MULTI #### for dataset_to_check in self.published_datasets: self.cfg_params['USER.dataset_to_check'] = dataset_to_check from InspectDBS import InspectDBS check = InspectDBS(self.cfg_params) check.checkPublication() ######################### return self.exit_status
def publishDataset(self, file): """ """ try: jobReport = readJobReport(file)[0] self.exit_status = '0' except IndexError: self.exit_status = '1' msg = "Error: Problem with " + file + " file" common.logger.info(msg) return self.exit_status if (len(self.dataset_to_import) != 0): for dataset in self.dataset_to_import: common.logger.info( "--->>> Importing parent dataset in the dbs: " + dataset) status_import = self.importParentDataset( self.globalDBS, dataset) if (status_import == 1): common.logger.info('Problem with parent ' + dataset + ' import from the global DBS ' + self.globalDBS + 'to the local one ' + self.DBSURL) self.exit_status = '1' return self.exit_status else: common.logger.info('Import ok of dataset ' + dataset) if (len(jobReport.files) <= 0): self.exit_status = '1' msg = "Error: No EDM file to publish in xml file" + file + " file" common.logger.info(msg) return self.exit_status else: msg = "fjr contains some files to publish" common.logger.debug(msg) #### datasets creation in dbs #// DBS to contact write and read of the same dbs dbsReader = DBSReader(self.DBSURL, level='ERROR') dbswriter = DBSWriter(self.DBSURL) ##### self.published_datasets = [] for fileinfo in jobReport.files: datasets_info = fileinfo.dataset if len(datasets_info) <= 0: self.exit_status = '1' msg = "Error: No info about dataset in the xml file " + file common.logger.info(msg) return self.exit_status else: for dataset in datasets_info: #### for production data self.processedData = dataset['ProcessedDataset'] if (dataset['PrimaryDataset'] == 'null'): dataset['PrimaryDataset'] = self.userprocessedData elif self.datasetpath.upper() != 'NONE': dataset['ParentDataset'] = self.datasetpath dataset['PSetContent'] = self.content cfgMeta = { 'name': self.pset, 'Type': 'user', 'annotation': 'user cfg', 'version': 'private version' } # add real name of user cfg common.logger.info("PrimaryDataset = %s" % dataset['PrimaryDataset']) common.logger.info("ProcessedDataset = %s" % dataset['ProcessedDataset']) common.logger.info("<User Dataset Name> = /" + dataset['PrimaryDataset'] + "/" + dataset['ProcessedDataset'] + "/USER") self.dataset_to_check = "/" + dataset[ 'PrimaryDataset'] + "/" + dataset[ 'ProcessedDataset'] + "/USER" self.published_datasets.append(self.dataset_to_check) common.logger.log( 10 - 1, "--->>> Inserting primary: %s processed : %s" % (dataset['PrimaryDataset'], dataset['ProcessedDataset'])) #### check if dataset already exists in the DBS result = dbsReader.matchProcessedDatasets( dataset['PrimaryDataset'], 'USER', dataset['ProcessedDataset']) if (len(result) != 0): result = dbsReader.listDatasetFiles( self.dataset_to_check) primary = DBSWriterObjects.createPrimaryDataset( dataset, dbswriter.dbs) common.logger.log(10 - 1, "Primary: %s " % primary) print "primary = ", primary algo = DBSWriterObjects.createAlgorithm( dataset, cfgMeta, dbswriter.dbs) common.logger.log(10 - 1, "Algo: %s " % algo) processed = DBSWriterObjects.createProcessedDataset( primary, algo, dataset, dbswriter.dbs) common.logger.log(10 - 1, "Processed: %s " % processed) print "processed = ", processed common.logger.log( 10 - 1, "Inserted primary %s processed %s" % (primary, processed)) ####################################################################################### common.logger.log(10 - 1, "exit_status = %s " % self.exit_status) return self.exit_status
def run(self): """ parse of all xml file on res dir and creation of distionary """ task = common._db.getTask() good_list=[] for job in task.getJobs(): fjr = self.fjrDirectory + job['outputFiles'][-1] if (job.runningJob['applicationReturnCode']!=0 or job.runningJob['wrapperReturnCode']!=0): continue # get FJR filename fjr = self.fjrDirectory + job['outputFiles'][-1] reports = readJobReport(fjr) if len(reports)>0: if reports[0].status == "Success": good_list.append(fjr) #################################################### if self.no_inp == 1: file_list = self.remove_input_from_fjr(good_list) else: file_list=good_list print "file_list = ", file_list #################################################### common.logger.log(10-1, "fjr with FrameworkJobReport Status='Success', file_list = "+str(file_list)) common.logger.log(10-1, "len(file_list) = "+str(len(file_list))) if (len(file_list)>0): BlocksList=[] common.logger.info("--->>> Start dataset publication") self.exit_status=self.publishDataset(file_list[0]) if (self.exit_status == '1'): return self.exit_status common.logger.info("--->>> End dataset publication") common.logger.info("--->>> Start files publication") for file in file_list: Blocks=self.publishAJobReport(file,self.processedData) if Blocks: for x in Blocks: # do not allow multiple entries of the same block if x not in BlocksList: BlocksList.append(x) # close the blocks common.logger.log(10-1, "BlocksList = %s"%BlocksList) dbswriter = DBSWriter(self.DBSURL) for BlockName in BlocksList: try: closeBlock=dbswriter.manageFileBlock(BlockName,maxFiles= 1) common.logger.log(10-1, "closeBlock %s"%closeBlock) except DBSWriterError, ex: common.logger.info("Close block error %s"%ex) if (len(self.noEventsFiles)>0): common.logger.info("--->>> WARNING: "+str(len(self.noEventsFiles))+" published files contain 0 events are:") for lfn in self.noEventsFiles: common.logger.info("------ LFN: %s"%lfn) if (len(self.noLFN)>0): common.logger.info("--->>> WARNING: there are "+str(len(self.noLFN))+" files not published because they have empty LFN") for pfn in self.noLFN: common.logger.info("------ pfn: %s"%pfn) if (len(self.problemFiles)>0): common.logger.info("--->>> WARNING: "+str(len(self.problemFiles))+" files not published because they had problem with copy to SE") for lfn in self.problemFiles: common.logger.info("------ LFN: %s"%lfn) common.logger.info("--->>> End files publication") #### FEDE for MULTI #### for dataset_to_check in self.published_datasets: self.cfg_params['USER.dataset_to_check']=dataset_to_check from InspectDBS import InspectDBS check=InspectDBS(self.cfg_params) check.checkPublication() ######################### return self.exit_status