def validateDataset( datasetPath, dbsUrl): """ _validateDataset_ Util method to check that the datasetPath provided exists in the dbsUrl provided """ datasetDetails = DatasetConventions.parseDatasetPath(datasetPath) for key in ['Primary', 'DataTier', 'Processed']: if datasetDetails[key] == None: msg = "Invalid Dataset Name: \n ==> %s\n" % datasetPath msg += "Does not contain %s information" % key raise WorkflowMakerError(msg) datasets = [] try: reader = DBSReader(dbsUrl) datasets = reader.matchProcessedDatasets( datasetDetails['Primary'], datasetDetails['DataTier'], datasetDetails['Processed']) except Exception, ex: msg = "Error calling DBS to validate dataset:\n%s\n" % datasetPath msg += str(ex) raise WorkflowMakerError(msg)
def checkPublication(self): """ check dataset publication in a dbs """ common.logger.info('--->>> Check data publication: dataset '+self.dataset_to_check+' in DBS url '+ self.DBSURL+'\n') # // # // Get API to DBS #// dbsreader = DBSReader(self.DBSURL) # // # // Get list of datasets #// if len(self.dataset_to_check.split('/')) < 4: msg = "the provided dataset name is not correct" raise CrabException(msg) else: primds=self.dataset_to_check.split('/')[1] procds=self.dataset_to_check.split('/')[2] tier=self.dataset_to_check.split('/')[3] datasets=dbsreader.matchProcessedDatasets(primds,tier,procds) if common.debugLevel: print "PrimaryDataset = ", primds print "ProcessedDataset = ", procds print "DataTier = ", tier print "datasets matching your requirements= ", datasets for dataset in datasets: # // # // Get list of blocks for the dataset and their location #// if len(dataset.get('PathList'))==0: print "===== Empty dataset yet /%s/%s with tiers %s"%(dataset.get('PrimaryDataset')['Name'],dataset.get('Name'),dataset.get('TierList')) else: for datasetpath in dataset.get('PathList'): nevttot=0 print "=== dataset %s"%datasetpath ### FEDE ####### if dataset['Description'] != None: print "=== dataset description = ", dataset['Description'] ################ blocks=dbsreader.getFileBlocksInfo(datasetpath) for block in blocks: SEList=dbsreader.listFileBlockLocation(block['Name']) # replace that with DLS query print "===== File block name: %s" %block['Name'] print " File block located at: ", SEList print " File block status: %s" %block['OpenForWriting'] print " Number of files: %s"%block['NumberOfFiles'] print " Number of Bytes: %s"%block['BlockSize'] print " Number of Events: %s"%block['NumberOfEvents'] if common.debugLevel: print "--------- info about files --------" print " Size \t Events \t LFN \t FileStatus " files=dbsreader.listFilesInBlock(block['Name']) for file in files: print "%s %s %s %s"%(file['FileSize'],file['NumberOfEvents'],file['LogicalFileName'],file['Status']) nevttot = nevttot + block['NumberOfEvents'] print "\n total events: %s in dataset: %s\n"%(nevttot,datasetpath) if not common.debugLevel: common.logger.info('You can obtain more info about files of the dataset using: crab -checkPublication -USER.dataset_to_check='+self.dataset_to_check+' -USER.dbs_url_for_publication='+self.DBSURL+' -debug')
def publishDataset(self,file): """ """ try: jobReport = readJobReport(file)[0] self.exit_status = '0' except IndexError: self.exit_status = '1' msg = "Error: Problem with "+file+" file" common.logger.info(msg) return self.exit_status if (len(self.dataset_to_import) != 0): for dataset in self.dataset_to_import: common.logger.info("--->>> Importing parent dataset in the dbs: " +dataset) status_import=self.importParentDataset(self.globalDBS, dataset) if (status_import == 1): common.logger.info('Problem with parent '+ dataset +' import from the global DBS '+self.globalDBS+ 'to the local one '+self.DBSURL) self.exit_status='1' return self.exit_status else: common.logger.info('Import ok of dataset '+dataset) if (len(jobReport.files) <= 0) : self.exit_status = '1' msg = "Error: No EDM file to publish in xml file"+file+" file" common.logger.info(msg) return self.exit_status else: msg = "fjr contains some files to publish" common.logger.debug(msg) #### datasets creation in dbs #// DBS to contact write and read of the same dbs dbsReader = DBSReader(self.DBSURL,level='ERROR') dbswriter = DBSWriter(self.DBSURL) ##### self.published_datasets = [] for fileinfo in jobReport.files: datasets_info=fileinfo.dataset if len(datasets_info)<=0: self.exit_status = '1' msg = "Error: No info about dataset in the xml file "+file common.logger.info(msg) return self.exit_status else: for dataset in datasets_info: #### for production data self.processedData = dataset['ProcessedDataset'] if (dataset['PrimaryDataset'] == 'null'): dataset['PrimaryDataset'] = self.userprocessedData elif self.datasetpath.upper() != 'NONE': dataset['ParentDataset']= self.datasetpath dataset['PSetContent']=self.content cfgMeta = {'name' : self.pset , 'Type' : 'user' , 'annotation': 'user cfg', 'version' : 'private version'} # add real name of user cfg common.logger.info("PrimaryDataset = %s"%dataset['PrimaryDataset']) common.logger.info("ProcessedDataset = %s"%dataset['ProcessedDataset']) common.logger.info("<User Dataset Name> = /"+dataset['PrimaryDataset']+"/"+dataset['ProcessedDataset']+"/USER") self.dataset_to_check="/"+dataset['PrimaryDataset']+"/"+dataset['ProcessedDataset']+"/USER" self.published_datasets.append(self.dataset_to_check) common.logger.log(10-1,"--->>> Inserting primary: %s processed : %s"%(dataset['PrimaryDataset'],dataset['ProcessedDataset'])) #### check if dataset already exists in the DBS result = dbsReader.matchProcessedDatasets(dataset['PrimaryDataset'], 'USER', dataset['ProcessedDataset']) if (len(result) != 0): result = dbsReader.listDatasetFiles(self.dataset_to_check) primary = DBSWriterObjects.createPrimaryDataset( dataset, dbswriter.dbs) common.logger.log(10-1,"Primary: %s "%primary) print "primary = ", primary algo = DBSWriterObjects.createAlgorithm(dataset, cfgMeta, dbswriter.dbs) common.logger.log(10-1,"Algo: %s "%algo) processed = DBSWriterObjects.createProcessedDataset(primary, algo, dataset, dbswriter.dbs) common.logger.log(10-1,"Processed: %s "%processed) print "processed = ", processed common.logger.log(10-1,"Inserted primary %s processed %s"%(primary,processed)) ####################################################################################### common.logger.log(10-1,"exit_status = %s "%self.exit_status) return self.exit_status
def checkPublication(self): """ check dataset publication in a dbs """ common.logger.info('--->>> Check data publication: dataset ' + self.dataset_to_check + ' in DBS url ' + self.DBSURL + '\n') # // # // Get API to DBS #// dbsreader = DBSReader(self.DBSURL) # // # // Get list of datasets #// if len(self.dataset_to_check.split('/')) < 4: msg = "the provided dataset name is not correct" raise CrabException(msg) else: primds = self.dataset_to_check.split('/')[1] procds = self.dataset_to_check.split('/')[2] tier = self.dataset_to_check.split('/')[3] datasets = dbsreader.matchProcessedDatasets(primds, tier, procds) if common.debugLevel: print "PrimaryDataset = ", primds print "ProcessedDataset = ", procds print "DataTier = ", tier print "datasets matching your requirements= ", datasets for dataset in datasets: # // # // Get list of blocks for the dataset and their location #// if len(dataset.get('PathList')) == 0: print "===== Empty dataset yet /%s/%s with tiers %s" % ( dataset.get('PrimaryDataset')['Name'], dataset.get('Name'), dataset.get('TierList')) else: for datasetpath in dataset.get('PathList'): nevttot = 0 print "=== dataset %s" % datasetpath ### FEDE ####### if dataset['Description'] != None: print "=== dataset description = ", dataset[ 'Description'] ################ blocks = dbsreader.getFileBlocksInfo(datasetpath) for block in blocks: SEList = dbsreader.listFileBlockLocation( block['Name']) # replace that with DLS query print "===== File block name: %s" % block['Name'] print " File block located at: ", SEList print " File block status: %s" % block[ 'OpenForWriting'] print " Number of files: %s" % block[ 'NumberOfFiles'] print " Number of Bytes: %s" % block['BlockSize'] print " Number of Events: %s" % block[ 'NumberOfEvents'] if common.debugLevel: print "--------- info about files --------" print " Size \t Events \t LFN \t FileStatus " files = dbsreader.listFilesInBlock(block['Name']) for file in files: print "%s %s %s %s" % ( file['FileSize'], file['NumberOfEvents'], file['LogicalFileName'], file['Status']) nevttot = nevttot + block['NumberOfEvents'] print "\n total events: %s in dataset: %s\n" % ( nevttot, datasetpath) if not common.debugLevel: common.logger.info( 'You can obtain more info about files of the dataset using: crab -checkPublication -USER.dataset_to_check=' + self.dataset_to_check + ' -USER.dbs_url_for_publication=' + self.DBSURL + ' -debug')
import logging logging.disable(logging.INFO) # // # // Get API to DBS #// dbsreader = DBSReader(url) # // # // Get list of datasets #// if datasetPath: primds=datasetPath.split('/')[1] procds=datasetPath.split('/')[2] tier=datasetPath.split('/')[3] # print " matchProcessedDatasets(%s,%s,%s)"%(primds,tier,procds) datasets=dbsreader.matchProcessedDatasets(primds,tier,procds) else: datasets=dbsreader.matchProcessedDatasets("*","*","*") for dataset in datasets: # // # // Get list of blocks for the dataset and their location #// for datasetpath in dataset.get('PathList'): nevttot=0 print "===== dataset %s"%datasetpath blocks=dbsreader.getFileBlocksInfo(datasetpath) for block in blocks: SEList=dbsreader.listFileBlockLocation(block['Name']) # replace that with DLS query print "== File block %s is located at: %s"%(block['Name'],SEList)
def publishDataset(self, file): """ """ try: jobReport = readJobReport(file)[0] self.exit_status = '0' except IndexError: self.exit_status = '1' msg = "Error: Problem with " + file + " file" common.logger.info(msg) return self.exit_status if (len(self.dataset_to_import) != 0): for dataset in self.dataset_to_import: common.logger.info( "--->>> Importing parent dataset in the dbs: " + dataset) status_import = self.importParentDataset( self.globalDBS, dataset) if (status_import == 1): common.logger.info('Problem with parent ' + dataset + ' import from the global DBS ' + self.globalDBS + 'to the local one ' + self.DBSURL) self.exit_status = '1' return self.exit_status else: common.logger.info('Import ok of dataset ' + dataset) if (len(jobReport.files) <= 0): self.exit_status = '1' msg = "Error: No EDM file to publish in xml file" + file + " file" common.logger.info(msg) return self.exit_status else: msg = "fjr contains some files to publish" common.logger.debug(msg) #### datasets creation in dbs #// DBS to contact write and read of the same dbs dbsReader = DBSReader(self.DBSURL, level='ERROR') dbswriter = DBSWriter(self.DBSURL) ##### self.published_datasets = [] for fileinfo in jobReport.files: datasets_info = fileinfo.dataset if len(datasets_info) <= 0: self.exit_status = '1' msg = "Error: No info about dataset in the xml file " + file common.logger.info(msg) return self.exit_status else: for dataset in datasets_info: #### for production data self.processedData = dataset['ProcessedDataset'] if (dataset['PrimaryDataset'] == 'null'): dataset['PrimaryDataset'] = self.userprocessedData elif self.datasetpath.upper() != 'NONE': dataset['ParentDataset'] = self.datasetpath dataset['PSetContent'] = self.content cfgMeta = { 'name': self.pset, 'Type': 'user', 'annotation': 'user cfg', 'version': 'private version' } # add real name of user cfg common.logger.info("PrimaryDataset = %s" % dataset['PrimaryDataset']) common.logger.info("ProcessedDataset = %s" % dataset['ProcessedDataset']) common.logger.info("<User Dataset Name> = /" + dataset['PrimaryDataset'] + "/" + dataset['ProcessedDataset'] + "/USER") self.dataset_to_check = "/" + dataset[ 'PrimaryDataset'] + "/" + dataset[ 'ProcessedDataset'] + "/USER" self.published_datasets.append(self.dataset_to_check) common.logger.log( 10 - 1, "--->>> Inserting primary: %s processed : %s" % (dataset['PrimaryDataset'], dataset['ProcessedDataset'])) #### check if dataset already exists in the DBS result = dbsReader.matchProcessedDatasets( dataset['PrimaryDataset'], 'USER', dataset['ProcessedDataset']) if (len(result) != 0): result = dbsReader.listDatasetFiles( self.dataset_to_check) primary = DBSWriterObjects.createPrimaryDataset( dataset, dbswriter.dbs) common.logger.log(10 - 1, "Primary: %s " % primary) print "primary = ", primary algo = DBSWriterObjects.createAlgorithm( dataset, cfgMeta, dbswriter.dbs) common.logger.log(10 - 1, "Algo: %s " % algo) processed = DBSWriterObjects.createProcessedDataset( primary, algo, dataset, dbswriter.dbs) common.logger.log(10 - 1, "Processed: %s " % processed) print "processed = ", processed common.logger.log( 10 - 1, "Inserted primary %s processed %s" % (primary, processed)) ####################################################################################### common.logger.log(10 - 1, "exit_status = %s " % self.exit_status) return self.exit_status