def dasquery(dasQuery, dasLimit=0): dasData = das_client.get_data(dasQuery, dasLimit) if isinstance(dasData, str): jsondict = json.loads( dasData ) else: jsondict = dasData # Check, if the DAS query fails try: error = findinjson(jsondict, "data","error") except KeyError: error = None if error or findinjson(jsondict, "status") != 'ok' or "data" not in jsondict: try: jsonstr = findinjson(jsondict, "reason") except KeyError: jsonstr = str(jsondict) if len(jsonstr) > 10000: jsonfile = "das_query_output_%i.txt" i = 0 while os.path.lexists(jsonfile % i): i += 1 jsonfile = jsonfile % i theFile = open( jsonfile, "w" ) theFile.write( jsonstr ) theFile.close() msg = "The DAS query returned an error. The output is very long, and has been stored in:\n" + jsonfile else: msg = "The DAS query returned a error. Here is the output\n" + jsonstr msg += "\nIt's possible that this was a server error. If so, it may work if you try again later" raise DatasetError(msg) return findinjson(jsondict, "data")
def das_client(query, check_key = None): """ Submit `query` to DAS client and handle possible errors. Further treatment of the output might be necessary. Arguments: - `query`: DAS query - `check_key`: optional key to be checked for; retriggers query if needed """ error = True for i in xrange(5): # maximum of 5 tries das_data = cmssw_das_client.get_data(query, limit = 0) if das_data["status"] == "ok": if das_data["nresults"] == 0 or check_key is None: error = False break result_count = 0 for d in find_key(das_data["data"], check_key): result_count += len(d) if result_count == 0: das_data["status"] = "error" das_data["reason"] = ("DAS did not return required data.") continue else: error = False break if das_data["status"] == "error": print_msg("DAS query '{}' failed 5 times. " "The last time for the the following reason:".format(query)) print das_data["reason"] sys.exit(1) return das_data["data"]
def add_rawRelVals(process): query='dataset file=%s' % process.source.fileNames[0] dataset = cmssw_das_client.get_data(query, limit = 0) if not dataset: raise RuntimeError( 'Das returned no dataset parent of the input file: %s \n' 'The parenthood is needed to add RAW secondary input files' % process.source.fileNames[0] ) raw_dataset = dataset['data'][0]['dataset'][0]['name'].replace('GEN-SIM-RECO','GEN-SIM-DIGI-RAW-HLTDEBUG') raw_files = cmssw_das_client.get_data('file dataset=%s' % raw_dataset, limit=0)['data'] if not raw_files: raise RuntimeError('No files found belonging to the GEN-SIM-DIGI-RAW-HLTDEBUG sample!') #convert from unicode into normal string since vstring does not pick it up raw_files = [str(i) for i in raw_files] process.source.secondaryFileNames = cms.untracked.vstring(*raw_files) return process
def add_rawRelVals(process, inputName): query='dataset='+inputName dataset = cmssw_das_client.get_data(query, limit = 0) if not dataset: raise RuntimeError( 'Das returned no dataset parent of the input file: %s \n' 'The parenthood is needed to add RAW secondary input files' % process.source.fileNames[0] ) for i in dataset['data']: try: n_files = i['dataset'][0]['num_file'] except: pass raw_files = cmssw_das_client.get_data('file '+query, limit = 0) files = [] for i in raw_files['data']: files.append( i['file'][0]['name']) raw_files = ['root://cms-xrd-global.cern.ch/'+str(i) for i in files] process.source = cms.Source("PoolSource", fileNames = cms.untracked.vstring(raw_files)) return process
def getFileNames_das_client(): ################################################################### """Return files for given DAS query via das_client""" files = [] query = "dataset dataset=/ZeroBias/Run2*SiStripCalMinBias-*/ALCARECO site=T2_CH_CERN" jsondict = das_client.get_data(query) status = jsondict['status'] if status != 'ok': print "DAS query status: %s"%(status) return files data = jsondict['data'] viableDS = [] for element in data: viableDS.append(element['dataset'][0]['name']) print "Using Dataset:",viableDS[-1] query = "file dataset=%s site=T2_CH_CERN | grep file.name" % viableDS[-1] jsondict = das_client.get_data(query) status = jsondict['status'] if status != 'ok': print "DAS query status: %s"%(status) return files mongo_query = jsondict['mongo_query'] filters = mongo_query['filters'] data = jsondict['data'] files = [] for row in data: the_file = [r for r in das_client.get_value(row, filters['grep'])][0] if len(the_file) > 0 and not the_file in files: files.append(the_file) return files
def das_client(query): """ Submit `query` to DAS client and handle possible errors. Further treatment of the output might be necessary. Arguments: - `query`: DAS query """ for _ in xrange(3): # maximum of 3 tries das_data = cmssw_das_client.get_data(query, limit = 0) if das_data["status"] != "error": break if das_data["status"] == "error": print_msg("DAS query '{}' failed 3 times. " "The last time for the the following reason:".format(query)) print das_data["reason"] sys.exit(1) return das_data["data"]
def das_client(query, check_key = None): """ Submit `query` to DAS client and handle possible errors. Further treatment of the output might be necessary. Arguments: - `query`: DAS query - `check_key`: optional key to be checked for; retriggers query if needed """ error = True for i in range(5): # maximum of 5 tries try: das_data = cmssw_das_client.get_data(query, limit = 0) except IOError as e: if e.errno == 14: #https://stackoverflow.com/q/36397853/5228524 continue except ValueError as e: if str(e) == "No JSON object could be decoded": continue if das_data["status"] == "ok": if das_data["nresults"] == 0 or check_key is None: error = False break result_count = 0 for d in find_key(das_data["data"], [check_key]): result_count += len(d) if result_count == 0: das_data["status"] = "error" das_data["reason"] = ("DAS did not return required data.") continue else: error = False break if das_data["status"] == "error": print_msg("DAS query '{}' failed 5 times. " "The last time for the the following reason:".format(query)) print(das_data["reason"]) sys.exit(1) return das_data["data"]
def das_client(query, check_key=None): """ Submit `query` to DAS client and handle possible errors. Further treatment of the output might be necessary. Arguments: - `query`: DAS query - `check_key`: optional key to be checked for; retriggers query if needed """ error = True for i in range(5): # maximum of 5 tries try: das_data = cmssw_das_client.get_data(query, limit=0) except IOError as e: if e.errno == 14: #https://stackoverflow.com/q/36397853/5228524 continue except ValueError as e: if str(e) == "No JSON object could be decoded": continue if das_data["status"] == "ok": if das_data["nresults"] == 0 or check_key is None: error = False break result_count = 0 for d in find_key(das_data["data"], [check_key]): result_count += len(d) if result_count == 0: das_data["status"] = "error" das_data["reason"] = ("DAS did not return required data.") continue else: error = False break if das_data["status"] == "error": print_msg("DAS query '{}' failed 5 times. " "The last time for the the following reason:".format(query)) print(das_data["reason"]) sys.exit(1) return das_data["data"]
def get_mc_lumi_list(inputDataset="/QCD_Pt_300to470_TuneCP5_13TeV_pythia8/RunIIFall17MiniAODv2-PU2017_12Apr2018_94X_mc2017_realistic*/MINIAODSIM"): """Get the LumiList object(s) for dataset(s) matching `inputDataset` inputDataset: if a str, will ask DAS to autocomplate (can contain wildcards) if a list/tuple/set[str], will iterate over each entry in the list, without asking DAS to autocomplete. This is because the user might have cached the dataset names before calling this function, and we don't want to call DAS more than necessary. returns: a dict with an entry for each dataset user inputs with das string as key and LumiList as value raises RuntimeError if no valid voms proxy raises TypeError if inputDataset incorrect type """ if not check_voms(): raise RuntimeError("Missing voms proxy") if isinstance(inputDataset, str): inputDatasets = autocomplete_Datasets([inputDataset]) elif not isinstance(inputDataset, (list, set, tuple)): raise TypeError('get_mc_lumi_list: `inputDataset` expects str or list/tuple/set[str]') result = {} for dataset in inputDatasets: print(dataset) json_dict = get_data(host='https://cmsweb.cern.ch', query="run lumi file dataset="+dataset, idx=0, limit=0, threshold=300) lumi_list = LumiList.LumiList() try: n_files = len(json_dict['data']) printout = round(n_files / 10) for i, file_info in enumerate(json_dict['data']): if (i>n_files): break if i % printout == 0: print("{}% done...".format(100 * i / n_files)) ls = file_info['lumi'][0]['number'] run = file_info['run'][0]['run_number'] lumi_list += LumiList.LumiList(runsAndLumis={run: ls}) except Exception as e: print('Did not find lumis for', dataset) print(e) result.update({dataset:lumi_list}) return result
def getFileNames(event): files = [] query = "file dataset=%(dataset)s run=%(run)i lumi=%(lumi)i | grep file.name" % event jsondict = das_client.get_data(query) status = jsondict['status'] if status != 'ok': print "DAS query status: %s" % (status) return files mongo_query = jsondict['mongo_query'] filters = mongo_query['filters'] data = jsondict['data'] files = [] for row in data: file = [r for r in das_client.get_value(row, filters['grep'])][0] if len(file) > 0 and not file in files: files.append(file) return files
def getFileNames (event): files = [] query = "file dataset=%(dataset)s run=%(run)i lumi=%(lumi)i | grep file.name" % event jsondict = das_client.get_data(query) status = jsondict['status'] if status != 'ok': print "DAS query status: %s"%(status) return files mongo_query = jsondict['mongo_query'] filters = mongo_query['filters'] data = jsondict['data'] files = [] for row in data: file = [r for r in das_client.get_value(row, filters['grep'])][0] if len(file) > 0 and not file in files: files.append(file) return files
def __getData(self, dasQuery, dasLimit=0): dasData = das_client.get_data( dasQuery, dasLimit, ############################################ #can remove this once dasgoclient is updated cmd="das_client" if olddas else None ############################################ ) if isinstance(dasData, str): jsondict = json.loads(dasData) else: jsondict = dasData # Check, if the DAS query fails try: error = self.__findInJson(jsondict, ["data", "error"]) except KeyError: error = None if error or self.__findInJson( jsondict, "status") != 'ok' or "data" not in jsondict: try: jsonstr = self.__findInJson(jsondict, "reason") except KeyError: jsonstr = str(jsondict) if len(jsonstr) > 10000: jsonfile = "das_query_output_%i.txt" i = 0 while os.path.lexists(jsonfile % i): i += 1 jsonfile = jsonfile % i theFile = open(jsonfile, "w") theFile.write(jsonstr) theFile.close() msg = "The DAS query returned an error. The output is very long, and has been stored in:\n" + jsonfile else: msg = "The DAS query returned a error. Here is the output\n" + jsonstr msg += "\nIt's possible that this was a server error. If so, it may work if you try again later" raise AllInOneError(msg) return self.__findInJson(jsondict, "data")
def autocomplete_Datasets(data): result_array =[] for element in data: if '*' in element: jsondict = get_data(host='https://cmsweb.cern.ch',query="dataset="+element,idx=0,limit=0,threshold=300) #print json.dumps(jsondict, indent=4, sort_keys=True) #print json.dumps(jsondict['data'], indent=4, sort_keys=True) try: for i in range(len(jsondict['data'])): result_array.append(jsondict['data'][i]['dataset'][0]['name']) except: print '='*10 print 'Not found',element print '='*10 else: result_array.append(element) if len(result_array) == 0: print "No samples found going to exit" sys.exit(0) # Do this to remove duplicates but maintain order of insertion # We get duplicates because it queries ALL databases not just the main one # https://github.com/dmwm/DAS/issues/4287#issuecomment-390278822 return sorted(set(result_array), key=result_array.index)
def das_client(query, check_key=None): """ Submit `query` to DAS client and handle possible errors. Further treatment of the output might be necessary. Arguments: - `query`: DAS query - `check_key`: optional key to be checked for; retriggers query if needed """ error = True for i in xrange(5): # maximum of 5 tries das_data = cmssw_das_client.get_data(query, limit=0) if das_data["status"] == "ok": if das_data["nresults"] == 0 or check_key is None: error = False break result_count = 0 for d in find_key(das_data["data"], check_key): result_count += len(d) if result_count == 0: das_data["status"] = "error" das_data["reason"] = ("DAS did not return required data.") continue else: error = False break if das_data["status"] == "error": print_msg("DAS query '{}' failed 5 times. " "The last time for the the following reason:".format(query)) print das_data["reason"] sys.exit(1) return das_data["data"]
def apply( self ): useDAS = self._parameters[ 'useDAS' ].value cmsswVersion = self._parameters[ 'cmsswVersion' ].value formerVersion = self._parameters[ 'formerVersion' ].value relVal = self._parameters[ 'relVal' ].value dataTier = self._parameters[ 'dataTier' ].value condition = self._parameters[ 'condition' ].value # only used for GT determination in initialization, if GT not explicitly given globalTag = self._parameters[ 'globalTag' ].value maxVersions = self._parameters[ 'maxVersions' ].value skipFiles = self._parameters[ 'skipFiles' ].value numberOfFiles = self._parameters[ 'numberOfFiles' ].value debug = self._parameters[ 'debug' ].value filePaths = [] # Determine corresponding CMSSW version for RelVals preId = '_pre' patchId = '_patch' # patch releases hltPatchId = '_hltpatch' # HLT patch releases dqmPatchId = '_dqmpatch' # DQM patch releases slhcId = '_SLHC' # SLHC releases rootId = '_root' # ROOT test releases ibId = '_X_' # IBs if patchId in cmsswVersion: cmsswVersion = cmsswVersion.split( patchId )[ 0 ] elif hltPatchId in cmsswVersion: cmsswVersion = cmsswVersion.split( hltPatchId )[ 0 ] elif dqmPatchId in cmsswVersion: cmsswVersion = cmsswVersion.split( dqmPatchId )[ 0 ] elif rootId in cmsswVersion: cmsswVersion = cmsswVersion.split( rootId )[ 0 ] elif slhcId in cmsswVersion: cmsswVersion = cmsswVersion.split( slhcId )[ 0 ] elif ibId in cmsswVersion or formerVersion: outputTuple = Popen( [ 'scram', 'l -c CMSSW' ], stdout = PIPE, stderr = PIPE ).communicate() if len( outputTuple[ 1 ] ) != 0: print('%s INFO : SCRAM error'%( self._label )) if debug: print(' from trying to determine last valid releases before \'%s\''%( cmsswVersion )) print() print(outputTuple[ 1 ]) print() self.messageEmptyList() return filePaths versions = { 'last' :'' , 'lastToLast':'' } for line in outputTuple[ 0 ].splitlines(): version = line.split()[ 1 ] if cmsswVersion.split( ibId )[ 0 ] in version or cmsswVersion.rpartition( '_' )[ 0 ] in version: if not ( patchId in version or hltPatchId in version or dqmPatchId in version or slhcId in version or ibId in version or rootId in version ): versions[ 'lastToLast' ] = versions[ 'last' ] versions[ 'last' ] = version if version == cmsswVersion: break # FIXME: ordering of output problematic ('XYZ_pre10' before 'XYZ_pre2', no "formerVersion" for 'XYZ_pre1') if formerVersion: # Don't use pre-releases as "former version" for other releases than CMSSW_X_Y_0 if preId in versions[ 'lastToLast' ] and not preId in versions[ 'last' ] and not versions[ 'last' ].endswith( '_0' ): versions[ 'lastToLast' ] = versions[ 'lastToLast' ].split( preId )[ 0 ] # works only, if 'CMSSW_X_Y_0' esists ;-) # Use pre-release as "former version" for CMSSW_X_Y_0 elif versions[ 'last' ].endswith( '_0' ) and not ( preId in versions[ 'lastToLast' ] and versions[ 'lastToLast' ].startswith( versions[ 'last' ] ) ): versions[ 'lastToLast' ] = '' for line in outputTuple[ 0 ].splitlines(): version = line.split()[ 1 ] versionParts = version.partition( preId ) if versionParts[ 0 ] == versions[ 'last' ] and versionParts[ 1 ] == preId: versions[ 'lastToLast' ] = version elif versions[ 'lastToLast' ] != '': break # Don't use CMSSW_X_Y_0 as "former version" for pre-releases elif preId in versions[ 'last' ] and not preId in versions[ 'lastToLast' ] and versions[ 'lastToLast' ].endswith( '_0' ): versions[ 'lastToLast' ] = '' # no alternative :-( cmsswVersion = versions[ 'lastToLast' ] else: cmsswVersion = versions[ 'last' ] # Debugging output if debug: print('%s DEBUG: Called with...'%( self._label )) for key in self._parameters.keys(): print(' %s:\t'%( key ), end=' ') print(self._parameters[ key ].value, end=' ') if self._parameters[ key ].value is self.getDefaultParameters()[ key ].value: print(' (default)') else: print() if key == 'cmsswVersion' and cmsswVersion != self._parameters[ key ].value: if formerVersion: print(' ==> modified to last to last valid release %s (s. \'formerVersion\' parameter)'%( cmsswVersion )) else: print(' ==> modified to last valid release %s'%( cmsswVersion )) # Check domain domain = socket.getfqdn().split( '.' ) domainSE = '' if len( domain ) == 0: print('%s INFO : Cannot determine domain of this computer'%( self._label )) if debug: self.messageEmptyList() return filePaths elif os.uname()[0] == "Darwin": print('%s INFO : Running on MacOSX without direct access to RelVal files.'%( self._label )) if debug: self.messageEmptyList() return filePaths elif len( domain ) == 1: print('%s INFO : Running on local host \'%s\' without direct access to RelVal files'%( self._label, domain[ 0 ] )) if debug: self.messageEmptyList() return filePaths if not ( ( domain[ -2 ] == 'cern' and domain[ -1 ] == 'ch' ) or ( domain[ -2 ] == 'fnal' and domain[ -1 ] == 'gov' ) ): print('%s INFO : Running on site \'%s.%s\' without direct access to RelVal files'%( self._label, domain[ -2 ], domain[ -1 ] )) if debug: self.messageEmptyList() return filePaths if domain[ -2 ] == 'cern': domainSE = 'T2_CH_CERN' elif domain[ -2 ] == 'fnal': domainSE = 'T1_US_FNAL_MSS' if debug: print('%s DEBUG: Running at site \'%s.%s\''%( self._label, domain[ -2 ], domain[ -1 ] )) print('%s DEBUG: Looking for SE \'%s\''%( self._label, domainSE )) # Find files validVersion = 0 dataset = '' datasetAll = '/%s/%s-%s-v*/%s'%( relVal, cmsswVersion, globalTag, dataTier ) if useDAS: if debug: print('%s DEBUG: Using DAS query'%( self._label )) dasLimit = numberOfFiles if dasLimit <= 0: dasLimit = 1 for version in range( maxVersions, 0, -1 ): filePaths = [] filePathsTmp = [] fileCount = 0 dataset = '/%s/%s-%s-v%i/%s'%( relVal, cmsswVersion, globalTag, version, dataTier ) dasQuery = 'file dataset=%s | grep file.name'%( dataset ) if debug: print('%s DEBUG: Querying dataset \'%s\' with'%( self._label, dataset )) print(' \'%s\''%( dasQuery )) jsondict = das_client.get_data(dasQuery,dasLimit) if debug: print('%s DEBUG: Received DAS JSON dictionary:'%( self._label )) print(' \'%s\''%( jsondict )) if jsondict[ 'status' ] != 'ok': print('There was a problem while querying DAS with query \'%s\'. Server reply was:\n %s' % (dasQuery, jsondict)) exit( 1 ) mongo_query = jsondict[ 'mongo_query' ] filters = mongo_query[ 'filters' ] data = jsondict[ 'data' ] if debug: print('%s DEBUG: Query in JSON dictionary:'%( self._label )) print(' \'%s\''%( mongo_query )) print('%s DEBUG: Filters in query:'%( self._label )) print(' \'%s\''%( filters )) print('%s DEBUG: Data in JSON dictionary:'%( self._label )) print(' \'%s\''%( data )) for row in data: filePath = [ r for r in das_client.get_value( row, filters[ 'grep' ] ) ][ 0 ] if debug: print('%s DEBUG: Testing file entry \'%s\''%( self._label, filePath )) if len( filePath ) > 0: if validVersion != version: jsontestdict = das_client.get_data('site dataset=%s | grep site.name' % ( dataset ), 999) mongo_testquery = jsontestdict[ 'mongo_query' ] testfilters = mongo_testquery[ 'filters' ] testdata = jsontestdict[ 'data' ] if debug: print('%s DEBUG: Received DAS JSON dictionary (site test):'%( self._label )) print(' \'%s\''%( jsontestdict )) print('%s DEBUG: Query in JSON dictionary (site test):'%( self._label )) print(' \'%s\''%( mongo_testquery )) print('%s DEBUG: Filters in query (site test):'%( self._label )) print(' \'%s\''%( testfilters )) print('%s DEBUG: Data in JSON dictionary (site test):'%( self._label )) print(' \'%s\''%( testdata )) foundSE = False for testrow in testdata: siteName = [ tr for tr in das_client.get_value( testrow, testfilters[ 'grep' ] ) ][ 0 ] if siteName == domainSE: foundSE = True break if not foundSE: if debug: print('%s DEBUG: Possible version \'v%s\' not available on SE \'%s\''%( self._label, version, domainSE )) break validVersion = version if debug: print('%s DEBUG: Valid version set to \'v%i\''%( self._label, validVersion )) if numberOfFiles == 0: break # protect from double entries ( 'unique' flag in query does not work here) if not filePath in filePathsTmp: filePathsTmp.append( filePath ) if debug: print('%s DEBUG: File \'%s\' found'%( self._label, filePath )) fileCount += 1 # needed, since and "limit" overrides "idx" in 'get_data' (==> "idx" set to '0' rather than "skipFiles") if fileCount > skipFiles: filePaths.append( filePath ) elif debug: print('%s DEBUG: File \'%s\' found again'%( self._label, filePath )) if validVersion > 0: if numberOfFiles == 0 and debug: print('%s DEBUG: No files requested'%( self._label )) break else: if debug: print('%s DEBUG: Using DBS query'%( self._label )) print('%s WARNING: DBS query disabled for DBS3 transition to new API'%( self._label )) #for version in range( maxVersions, 0, -1 ): #filePaths = [] #fileCount = 0 #dataset = '/%s/%s-%s-v%i/%s'%( relVal, cmsswVersion, globalTag, version, dataTier ) #dbsQuery = 'find file where dataset = %s'%( dataset ) #if debug: #print '%s DEBUG: Querying dataset \'%s\' with'%( self._label, dataset ) #print ' \'%s\''%( dbsQuery ) #foundSE = False #for line in os.popen( 'dbs search --query="%s"'%( dbsQuery ) ).readlines(): #if line.find( '.root' ) != -1: #if validVersion != version: #if not foundSE: #dbsSiteQuery = 'find dataset where dataset = %s and site = %s'%( dataset, domainSE ) #if debug: #print '%s DEBUG: Querying site \'%s\' with'%( self._label, domainSE ) #print ' \'%s\''%( dbsSiteQuery ) #for lineSite in os.popen( 'dbs search --query="%s"'%( dbsSiteQuery ) ).readlines(): #if lineSite.find( dataset ) != -1: #foundSE = True #break #if not foundSE: #if debug: #print '%s DEBUG: Possible version \'v%s\' not available on SE \'%s\''%( self._label, version, domainSE ) #break #validVersion = version #if debug: #print '%s DEBUG: Valid version set to \'v%i\''%( self._label, validVersion ) #if numberOfFiles == 0: #break #filePath = line.replace( '\n', '' ) #if debug: #print '%s DEBUG: File \'%s\' found'%( self._label, filePath ) #fileCount += 1 #if fileCount > skipFiles: #filePaths.append( filePath ) #if not numberOfFiles < 0: #if numberOfFiles <= len( filePaths ): #break #if validVersion > 0: #if numberOfFiles == 0 and debug: #print '%s DEBUG: No files requested'%( self._label ) #break # Check output and return if validVersion == 0: print('%s WARNING : No RelVal file(s) found at all in datasets \'%s*\' on SE \'%s\''%( self._label, datasetAll, domainSE )) if debug: self.messageEmptyList() elif len( filePaths ) == 0: print('%s WARNING : No RelVal file(s) picked up in dataset \'%s\''%( self._label, dataset )) if debug: self.messageEmptyList() elif len( filePaths ) < numberOfFiles: print('%s INFO : Only %i RelVal file(s) instead of %i picked up in dataset \'%s\''%( self._label, len( filePaths ), numberOfFiles, dataset )) if debug: print('%s DEBUG: returning %i file(s):\n%s'%( self._label, len( filePaths ), filePaths )) return filePaths
def das(query): data = das_client.get_data(query, 0) if data["status"] == "error": raise RuntimeError(data["reason"]) return data["data"]
name=row['name'] weight=row['weighting on 1fb-1'] generator=row['generator'] isData=row['isData'] isBoostedMiniAOD=row['boosted_dataset'] #makeSystematicsTrees=row['makeSystematicsTrees'] globalTag=row['globalTag'] # if len(dataset)>r3: # requestname="'"+(dataset.split('/')[1])+"'" if dataset!='' and name!='': print 'checking dataset info for',dataset #ckey=das_client.x509() #cert=das_client.x509() #das_client.check_auth(ckey) #das_data=das_client.get_data("https://cmsweb.cern.ch","dataset="+dataset+" instance=phys/prod",0,0,0,300,ckey,cert) das_data=das_client.get_data("file dataset="+dataset+" instance=phys/prod") #print das_data #for d in das_data['data']: ##print d #for dd in d['dataset']: ##print dd #print dd['mcm']['nevents'] outfilename=crab_cfg_path+'/crab_'+name+'.py' if os.path.exists(outfilename): for i in range(2,10): outfilename=crab_cfg_path+'/crab_'+name+'_v'+str(i)+'.py' if not os.path.exists(outfilename): break crabout=open(outfilename,'w')
def common_search(dd_tier): dd_tier_re = re.compile(dd_tier.replace('*', '.*')) if os.environ['DD_SOURCE'] == "das": query = "dataset instance=cms_dbs_prod_global" if os.environ['DD_RELEASE'] != "": query = query + " release=" + os.environ['DD_RELEASE'] if os.environ['DD_SAMPLE'] != "": query = query + " primary_dataset=" + os.environ['DD_SAMPLE'] if dd_tier != "": query = query + " tier=" + dd_tier if os.environ['DD_COND'] != "": query = query + " dataset=*" + os.environ['DD_COND'] + "*" if os.environ['DD_RUN'] != "": query = query + " run=" + os.environ['DD_RUN'] # query = query + " | unique" # too long ?? # data = os.popen('das_client.py --limit=0 --query "'+query+'"') # datalines = data.readlines() # data.close() # datasets = [] # for line in datalines: # line = line.rstrip() # if line != "" and line[0] =="/": # datasets.append(line) # dataset = datasets[0] data = das_client.json.loads( das_client.get_data('https://cmsweb.cern.ch', query, 0, 0, 0)) if data['nresults'] == 0: print('[electronDataDiscovery.py] No DAS dataset for query:', query) return [] while data['nresults'] > 1: if data['data'][0]['dataset'][0]['name'] == data['data'][1][ 'dataset'][0]['name']: data['data'].pop(0) data['nresults'] -= 1 else: print( '[electronDataDiscovery.py] Several DAS datasets for query:', query) for i in range(data['nresults']): print('[electronDataDiscovery.py] dataset[' + str(i) + ']: ' + data['data'][i]['dataset'][0]['name']) return [] dataset = data['data'][0]['dataset'][0]['name'] query = "file instance=cms_dbs_prod_global dataset=" + dataset # data = os.popen('das_client.py --limit=0 --query "'+query+'"') # datalines = data.readlines() # data.close() # result = [] # for line in datalines: # line = line.rstrip() # if line != "" and line[0] =="/": # result.append(line) data = das_client.json.loads( das_client.get_data('https://cmsweb.cern.ch', query, 0, 0, 0)) if data['nresults'] == 0: print('[electronDataDiscovery.py] No DAS file in dataset:', dataset) return [] else: print('there is %d results' % nresults) result = [] for i in range(0, data['nresults']): result.append(str(data['data'][i]['file'][0]['name'])) elif os.environ['DD_SOURCE'] == "dbs": input = "find file" separator = " where " if os.environ['DD_RELEASE'] != "": input = input + separator + "release = " + os.environ['DD_RELEASE'] separator = " and " if os.environ['DD_SAMPLE'] != "": input = input + separator + "primds = " + os.environ['DD_SAMPLE'] separator = " and " if os.environ['DD_RUN'] != "": input = input + separator + "run = " + os.environ['DD_RUN'] separator = " and " input = input + separator + "dataset like *" + os.environ[ 'DD_COND'] + "*" + dd_tier + "*" data = os.popen( 'dbs search --url="http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet" --query "' + input + '"') datalines = data.readlines() data.close() result = [] for line in datalines: line = line.rstrip() if line != "" and line[0] == "/": result.append(line) elif os.environ['DD_SOURCE'] == "http": input = "find file" separator = " where " if os.environ['DD_RELEASE'] != "": input = input + separator + "release = " + os.environ['DD_RELEASE'] separator = " and " if os.environ['DD_SAMPLE'] != "": input = input + separator + "primds = " + os.environ['DD_SAMPLE'] separator = " and " if os.environ['DD_RUN'] != "": input = input + separator + "run = " + os.environ['DD_RUN'] separator = " and " input = input + separator + "dataset like *" + os.environ[ 'DD_COND'] + "*" + dd_tier + "*" url = "https://cmsweb.cern.ch:443/dbs_discovery/aSearch" final_input = urllib.quote(input) agent = "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)" ctypes = "text/plain" headers = {'User-Agent': agent, 'Accept': ctypes} params = { 'dbsInst': 'cms_dbs_prod_global', 'html': 0, 'caseSensitive': 'on', '_idx': 0, 'pagerStep': -1, 'userInput': final_input, 'xml': 0, 'details': 0, 'cff': 0, 'method': 'dbsapi' } data = urllib.urlencode(params, doseq=True) req = urllib2.Request(url, data, headers) data = "" try: response = urllib2.urlopen(req) data = response.read() except urllib2.HTTPError as e: if e.code == 201: print(e.headers) print(e.msg) pass else: raise e datalines = data.readlines() data.close() result = [] for line in datalines: line = line.rstrip() if line != "" and line[0] == "/": result.append(line) elif os.environ['DD_SOURCE'] == "lsf": dbs_path = '/' + os.environ['DD_SAMPLE'] + '/' + os.environ['DD_RELEASE'] + '-' + os.environ['DD_COND'] + '/' + \ os.environ['DD_TIER'] + '"' if __name__ == "__main__": print('dbs path:', dbs_path) data = os.popen('dbs lsf --path="' + dbs_path + '"') datalines = data.readlines() data.close() result = [] for line in datalines: line = line.rstrip() if line != "" and line[0] == "/": result.append(line) elif os.environ['DD_SOURCE'].startswith( '/castor/cern.ch/cms/'): # assumed to be a castor dir castor_dir = os.environ['DD_SOURCE'].replace('/castor/cern.ch/cms/', '/', 1) result = [] data = os.popen('rfdir /castor/cern.ch/cms' + castor_dir) subdirs = data.readlines() data.close() datalines = [] for line in subdirs: line = line.rstrip() subdir = line.split()[8] data = os.popen('rfdir /castor/cern.ch/cms' + castor_dir + '/' + subdir) datalines = data.readlines() for line in datalines: line = line.rstrip() file = line.split()[8] if file != "": result.append(castor_dir + '/' + subdir + '/' + file) data.close() elif os.environ['DD_SOURCE'].startswith( '/eos/cms/'): # assumed to be an eos dir data = os.popen('eos find -f ' + os.environ['DD_SOURCE']) lines = data.readlines() data.close() result = [] for line in lines: line = line.strip().replace('/eos/cms/', '/', 1) if line == "": continue if dd_sample_re.search(line) == None: continue if dd_cond_re.search(line) == None: continue if dd_tier_re.search(line) == None: continue if dd_run_re.search(line) == None: continue result.append(line) else: # os.environ['DD_SOURCE'] is assumed to be a file name result = [] for line in open(os.environ['DD_SOURCE']).readlines(): line = os.path.expandvars(line.strip()) if line == "": continue if dd_sample_re.search(line) == None: continue if dd_cond_re.search(line) == None: continue if dd_tier_re.search(line) == None: continue if dd_run_re.search(line) == None: continue result.append(line) if len(result) == 0: diag = '[electronDataDiscovery.py] No more files after filtering with :' if os.environ['DD_SAMPLE'] != '': diag += ' ' + os.environ['DD_SAMPLE'] if os.environ['DD_COND'] != '': diag += ' ' + os.environ['DD_COND'] if dd_tier != '': diag += ' ' + dd_tier if os.environ['DD_RUN'] != '': diag += ' ' + os.environ['DD_RUN'] print(diag) return result