def createDataDatasets(self): self.dataDatasets = {} for d in self.datasets: dsLumiList = None if not os.path.isfile(d['json']): oldSsArgv = sys.argv; sys.argv=[] # sys argv fix dasC = dasTools.myDasClient();dasC.limit=0 dsLumiList = dasC.getJsonOfDataset(d["dataset"]) dsLumiList.writeJSON(d['json']) sys.argv = oldSsArgv else: dsLumiList = LumiList(compactList=json.load(open(d['json']))) dsRuns = dsLumiList.getRuns() self.dataDatasets[d['label']] = ('{ \n ' '\t"xSec":None\n' '\t,"localFile":None\n' '\t,"datasetName":"'+d["dataset"]+'"\n' '\t,"label":"Data_'+d['label']+'"\n' '\t,"datasetJSON":"'+d['json']+'"\n' '\t,"crabConfig":{\n' '\t\t"CMSSW":{"lumis_per_job":5\n' '\t\t\t,"lumi_mask": os.getenv("CMSSW_BASE") + '+'"/'+d['goldenJson'].lstrip('/')+'"\n' '\t\t\t,"total_number_of_lumis" : -1}\n' '\t\t}\n' '\t,"color":0\n' '\t,"runRange":"'+str(dsRuns[0])+"-"+str(dsRuns[-1])+'"\n' '\t}\n');
def getRuns(name=None, bfield=None, bunchSpacing=None): ll = LumiList() for rp in runPeriods: if name is None or rp.name == name: if bfield is None or rp.bfield == bfield: if bunchSpacing is None or rp.bunchSpacing == bunchSpacing: newll = LumiListForRunPeriod(rp) ll += LumiListForRunPeriod(rp) return ll.getRuns()
def getRuns(name=None,bfield=None,bunchSpacing=None): ll = LumiList() for rp in runPeriods: if name is None or rp.name == name: if bfield is None or rp.bfield == bfield: if bunchSpacing is None or rp.bunchSpacing == bunchSpacing: newll = LumiListForRunPeriod(rp) ll += LumiListForRunPeriod(rp) return ll.getRuns()
def LumiListForRunPeriod(rp, MIN_LUMIS=0): ll = LumiList(filename = rp.json) runs = [ run for run in map(int,ll.getRuns()) if run >= rp.firstRun and run <= rp.lastRun] lumis = ll.getLumis() nlumis = defaultdict(int) for r,l in lumis: nlumis[r]+=1 select_runs = [run for run in runs if nlumis[run] > MIN_LUMIS] ll.selectRuns(select_runs) return ll
def LumiListForRunPeriod(rp, MIN_LUMIS=0): ll = LumiList(filename=rp.json) runs = [ run for run in map(int, ll.getRuns()) if run >= rp.firstRun and run <= rp.lastRun ] lumis = ll.getLumis() nlumis = defaultdict(int) for r, l in lumis: nlumis[r] += 1 select_runs = [run for run in runs if nlumis[run] > MIN_LUMIS] ll.selectRuns(select_runs) return ll
def shortenJson(jsonFile,minRun=0,maxRun=-1,output=None,debug=False): from copy import deepcopy runList = jsonFile if isinstance(runList,LumiList): runList = deepcopy(jsonFile) else: runList = LumiList (filename = jsonFile) # Read in first JSON file allRuns = runList.getRuns() runsToRemove=[] for run in allRuns: if int(run) < minRun: runsToRemove.append (run) if maxRun > 0 and int(run) > maxRun: runsToRemove.append (run) if debug: print " runsToRemove ",runsToRemove runList.removeRuns (runsToRemove) if output: runList.writeJSON (output) else: return runList
raise RuntimeError, "Trying to do a DAS query while in a LXBatch job (env variable LSB_JOBID defined)\nquery was: %s" % dbs if 'X509_USER_PROXY' in os.environ: dbs += " --key {0} --cert {0}".format( os.environ['X509_USER_PROXY']) logger.info('DAS query\t: %s', dbs) return os.popen(dbs) dbs = 'das_client --query="run dataset=%s instance=prod/%s" --limit %i' % ( prompt.heppy.dataset, 'global', 0) prompt_runs = [int(r) for r in _dasPopen(dbs).readlines()] dbs = 'das_client --query="run dataset=%s instance=prod/%s" --limit %i' % ( rereco.heppy.dataset, 'global', 0) rereco_runs = [int(r) for r in _dasPopen(dbs).readlines()] runs = [] for str_run in lumiList.getRuns(): run = int(str_run) if run in prompt_runs and run in rereco_runs: runs.append(run) print "Now running %i jobs: %r" % (len(runs), runs) import subprocess def wrapper(run_): subprocess.call([ "python", "jetTreeMaker.py", ("--era=%s" % args.era), ("--run=%i" % run_) ]) from multiprocessing import Pool
#!/usr/bin/env python import os, re, sys from collections import defaultdict from itertools import combinations from FWCore.PythonUtilities.LumiList import LumiList # Should rewrite not to hit the db for every cfg, but just get the HLT # key for each run and then only get cfgs for unique keys. dcsonly_ll = LumiList( "/afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification/Collisions12/8TeV/DCSOnly/json_DCSONLY.txt" ) # JMTBAD use DCSOnly_ll from goodlumis once sorted runs = sorted(int(run) for run in dcsonly_ll.getRuns()) cmd = "edmConfigFromDB --cff --runNumber %i --noedsources --noes --noservices --nomodules" path_re = re.compile(r"(HLT_Mu40_eta2p1_v\d+)") prescaled_path_re = re.compile(r"(HLT_Mu15_eta2p1_v\d+)") paths_and_filters = defaultdict(list) for run in runs: print "run:", run, sys.stdout.flush() path = prescaled_path = filter = prescaled_filter = None for line in os.popen(cmd % run): if "cms.Path" not in line: continue filt = line.split(" + ")[-2] # JMTBAD fragile mo = path_re.search(line) if mo is not None:
# required parameters (options, args) = parser.parse_args() if len(args) != 1: raise RuntimeError("Must provide exactly one input file") if options.min and options.max and options.min > options.max: raise RuntimeError( "Minimum value (%d) is greater than maximum value (%d)" % (options.min, options.max)) commaRE = re.compile(r',') runsToRemove = [] for chunk in options.runs: runs = commaRE.split(chunk) runsToRemove.extend(runs) alphaList = LumiList(filename=args[0]) # Read in first JSON file allRuns = alphaList.getRuns() for run in allRuns: if options.min and int(run) < options.min: runsToRemove.append(run) if options.max and int(run) > options.max: runsToRemove.append(run) alphaList.removeRuns(runsToRemove) if options.output: alphaList.writeJSON(options.output) else: print(alphaList)
def __lumiSelectionSnippet( self, jsonPath = None, firstRun = None, lastRun = None ): lumiSecExtend = "" if firstRun or lastRun or jsonPath: if not jsonPath: selectedRunList = self.__getRunList() if firstRun: selectedRunList = [ run for run in selectedRunList \ if self.__findInJson(run, "run_number") >= firstRun ] if lastRun: selectedRunList = [ run for run in selectedRunList \ if self.__findInJson(run, "run_number") <= lastRun ] lumiList = [ str( self.__findInJson(run, "run_number") ) + ":1-" \ + str( self.__findInJson(run, "run_number") ) + ":max" \ for run in selectedRunList ] splitLumiList = list( self.__chunks( lumiList, 255 ) ) else: theLumiList = None try: theLumiList = LumiList ( filename = jsonPath ) except ValueError: pass if theLumiList is not None: allRuns = theLumiList.getRuns() runsToRemove = [] for run in allRuns: if firstRun and int( run ) < firstRun: runsToRemove.append( run ) if lastRun and int( run ) > lastRun: runsToRemove.append( run ) theLumiList.removeRuns( runsToRemove ) splitLumiList = list( self.__chunks( theLumiList.getCMSSWString().split(','), 255 ) ) if not (splitLumiList and splitLumiList[0] and splitLumiList[0][0]): splitLumiList = None else: with open(jsonPath) as f: jsoncontents = f.read() if "process.source.lumisToProcess" in jsoncontents: msg = "%s is not a json file, but it seems to be a CMSSW lumi selection cff snippet. Trying to use it" % jsonPath if firstRun or lastRun: msg += ("\n (after applying firstRun and/or lastRun)") msg += ".\nPlease note that, depending on the format of this file, it may not work as expected." msg += "\nCheck your config file to make sure that it worked properly." print msg runlist = self.__getRunList() if firstRun or lastRun: self.__firstusedrun = -1 self.__lastusedrun = -1 jsoncontents = re.sub(r"\d+:(\d+|max)(-\d+:(\d+|max))?", self.getForceRunRangeFunction(firstRun, lastRun), jsoncontents) jsoncontents = (jsoncontents.replace("'',\n","").replace("''\n","") .replace('"",\n','').replace('""\n','')) self.__firstusedrun = max(self.__firstusedrun, int(self.__findInJson(runlist[0],"run_number"))) self.__lastusedrun = min(self.__lastusedrun, int(self.__findInJson(runlist[-1],"run_number"))) if self.__lastusedrun < self.__firstusedrun: jsoncontents = None else: self.__firstusedrun = int(self.__findInJson(runlist[0],"run_number")) self.__lastusedrun = int(self.__findInJson(runlist[-1],"run_number")) lumiSecExtend = jsoncontents splitLumiList = None else: raise AllInOneError("%s is not a valid json file!" % jsonPath) if splitLumiList and splitLumiList[0] and splitLumiList[0][0]: lumiSecStr = [ "',\n'".join( lumis ) \ for lumis in splitLumiList ] lumiSecStr = [ "lumiSecs.extend( [\n'" + lumis + "'\n] )" \ for lumis in lumiSecStr ] lumiSecExtend = "\n".join( lumiSecStr ) runlist = self.__getRunList() self.__firstusedrun = max(int(splitLumiList[0][0].split(":")[0]), int(self.__findInJson(runlist[0],"run_number"))) self.__lastusedrun = min(int(splitLumiList[-1][-1].split(":")[0]), int(self.__findInJson(runlist[-1],"run_number"))) elif lumiSecExtend: pass else: msg = "You are trying to run a validation without any runs! Check that:" if firstRun or lastRun: msg += "\n - firstRun/begin and lastRun/end are correct for this dataset, and there are runs in between containing data" if jsonPath: msg += "\n - your JSON file is correct for this dataset, and the runs contain data" if (firstRun or lastRun) and jsonPath: msg += "\n - firstRun/begin and lastRun/end are consistent with your JSON file" raise AllInOneError(msg) else: if self.__inputMagneticField is not None: pass #never need self.__firstusedrun or self.__lastusedrun else: runlist = self.__getRunList() self.__firstusedrun = int(self.__findInJson(self.__getRunList()[0],"run_number")) self.__lastusedrun = int(self.__findInJson(self.__getRunList()[-1],"run_number")) return lumiSecExtend
def __createSnippet( self, jsonPath = None, begin = None, end = None, firstRun = None, lastRun = None, repMap = None, crab = False ): if firstRun: firstRun = int( firstRun ) if lastRun: lastRun = int( lastRun ) if ( begin and firstRun ) or ( end and lastRun ): msg = ( "The Usage of " + "'begin' & 'firstRun' " * int( bool( begin and firstRun ) ) + "and " * int( bool( ( begin and firstRun ) and ( end and lastRun ) ) ) + "'end' & 'lastRun' " * int( bool( end and lastRun ) ) + "is ambigous." ) raise AllInOneError( msg ) if begin or end: ( firstRun, lastRun ) = self.convertTimeToRun( begin = begin, end = end, firstRun = firstRun, lastRun = lastRun ) if ( firstRun and lastRun ) and ( firstRun > lastRun ): msg = ( "The lower time/runrange limit ('begin'/'firstRun') " "chosen is greater than the upper time/runrange limit " "('end'/'lastRun').") raise AllInOneError( msg ) goodLumiSecStr = "" lumiStr = "" lumiSecExtend = "" if firstRun or lastRun: goodLumiSecStr = ( "lumiSecs = cms.untracked." "VLuminosityBlockRange()\n" ) lumiStr = " lumisToProcess = lumiSecs,\n" if not jsonPath: selectedRunList = self.__getRunList() if firstRun: selectedRunList = [ run for run in selectedRunList \ if run["run_number"] >= firstRun ] if lastRun: selectedRunList = [ run for run in selectedRunList \ if run["run_number"] <= lastRun ] lumiList = [ str( run["run_number"] ) + ":1-" \ + str( run["run_number"] ) + ":max" \ for run in selectedRunList ] splitLumiList = list( self.__chunks( lumiList, 255 ) ) else: theLumiList = LumiList ( filename = jsonPath ) allRuns = theLumiList.getRuns() runsToRemove = [] for run in allRuns: if firstRun and int( run ) < firstRun: runsToRemove.append( run ) if lastRun and int( run ) > lastRun: runsToRemove.append( run ) theLumiList.removeRuns( runsToRemove ) splitLumiList = list( self.__chunks( theLumiList.getCMSSWString().split(','), 255 ) ) if not len(splitLumiList[0][0]) == 0: lumiSecStr = [ "',\n'".join( lumis ) \ for lumis in splitLumiList ] lumiSecStr = [ "lumiSecs.extend( [\n'" + lumis + "'\n] )" \ for lumis in lumiSecStr ] lumiSecExtend = "\n".join( lumiSecStr ) elif jsonPath: goodLumiSecStr = ( "goodLumiSecs = LumiList.LumiList(filename" "= '%(json)s').getCMSSWString().split(',')\n" "lumiSecs = cms.untracked" ".VLuminosityBlockRange()\n" ) lumiStr = " lumisToProcess = lumiSecs,\n" lumiSecExtend = "lumiSecs.extend(goodLumiSecs)\n" if crab: files = "" else: splitFileList = list( self.__chunks( self.fileList(), 255 ) ) fileStr = [ "',\n'".join( files ) for files in splitFileList ] fileStr = [ "readFiles.extend( [\n'" + files + "'\n] )" \ for files in fileStr ] files = "\n".join( fileStr ) theMap = repMap theMap["files"] = files theMap["json"] = jsonPath theMap["lumiStr"] = lumiStr theMap["goodLumiSecStr"] = goodLumiSecStr%( theMap ) theMap["lumiSecExtend"] = lumiSecExtend if crab: dataset_snippet = self.__dummy_source_template%( theMap ) else: dataset_snippet = self.__source_template%( theMap ) return dataset_snippet
def run(self, filecacheurl=None): """ Override run() for JobType """ taskDict, webdir = self.getTaskDict() addoutputfiles = literal_eval(getColumn(taskDict, 'tm_outfiles')) tfileoutfiles = literal_eval(getColumn(taskDict, 'tm_tfile_outfiles')) edmoutfiles = literal_eval(getColumn(taskDict, 'tm_edm_outfiles')) jobarch = getColumn(taskDict, 'tm_job_arch') jobsw = getColumn(taskDict, 'tm_job_sw') sandboxFilename = os.path.join(self.workdir, 'sandbox.tar.gz') curlGetFileFromURL(webdir + '/sandbox.tar.gz', sandboxFilename, self.proxyfilename) configArguments = { 'addoutputfiles': addoutputfiles, 'tfileoutfiles': tfileoutfiles, 'edmoutfiles': edmoutfiles, 'jobarch': jobarch, 'jobsw': jobsw, } # Maybe the user wnat to change the dataset if getattr(self.config.Data, 'inputDataset', None): configArguments['inputdata'] = self.config.Data.inputDataset ufc = CRABClient.Emulator.getEmulator('ufc')({ 'endpoint': filecacheurl, "pycurl": True }) result = ufc.upload(sandboxFilename, excludeList=NEW_USER_SANDBOX_EXCLUSIONS) if 'hashkey' not in result: self.logger.error("Failed to upload source files: %s" % str(result)) raise CachefileNotFoundException configArguments['cacheurl'] = filecacheurl configArguments['cachefilename'] = "%s.tar.gz" % str(result['hashkey']) # Upload list of user-defined input files to process as the primary input userFilesList = getattr(self.config.Data, 'userInputFiles', None) if userFilesList: self.logger.debug( "Attaching list of user-specified primary input files.") userFilesList = map(string.strip, userFilesList) userFilesList = [file for file in userFilesList if file] if len(userFilesList) != len(set(userFilesList)): msg = "%sWarning%s:" % (colors.RED, colors.NORMAL) msg += " CRAB configuration parameter Data.userInputFiles contains duplicated entries." msg += " Duplicated entries will be removed." self.logger.warning(msg) configArguments['userfiles'] = set(userFilesList) configArguments['primarydataset'] = getattr( self.config.Data, 'outputPrimaryDataset', 'CRAB_UserFiles') lumi_mask_name = getattr(self.config.Data, 'lumiMask', None) lumi_list = None if lumi_mask_name: self.logger.debug("Attaching lumi mask %s to the request" % (lumi_mask_name)) try: lumi_list = getLumiList(lumi_mask_name, logger=self.logger) except ValueError as ex: msg = "%sError%s:" % (colors.RED, colors.NORMAL) msg += " Failed to load lumi mask %s : %s" % (lumi_mask_name, ex) raise ConfigurationException(msg) run_ranges = getattr(self.config.Data, 'runRange', None) if run_ranges: run_ranges_is_valid = re.match('^\d+((?!(-\d+-))(\,|\-)\d+)*$', run_ranges) if run_ranges_is_valid: run_list = getRunList(run_ranges) if lumi_list: lumi_list.selectRuns(run_list) if not lumi_list: msg = "Invalid CRAB configuration: The intersection between the lumi mask and the run range is null." raise ConfigurationException(msg) else: if len(run_list) > 50000: msg = "CRAB configuration parameter Data.runRange includes %s runs." % str( len(run_list)) msg += " When Data.lumiMask is not specified, Data.runRange can not include more than 50000 runs." raise ConfigurationException(msg) lumi_list = LumiList(runs=run_list) else: msg = "Invalid CRAB configuration: Parameter Data.runRange should be a comma separated list of integers or (inclusive) ranges. Example: '12345,99900-99910'" raise ConfigurationException(msg) if lumi_list: configArguments['runs'] = lumi_list.getRuns() ## For each run we encode the lumis as a string representing a list of integers: [[1,2],[5,5]] ==> '1,2,5,5' lumi_mask = lumi_list.getCompactList() configArguments['lumis'] = [ str(reduce(lambda x, y: x + y, lumi_mask[run]))[1:-1].replace(' ', '') for run in configArguments['runs'] ] configArguments['jobtype'] = 'Analysis' return sandboxFilename, configArguments
from JMTucker.Tools.general import from_pickle, to_pickle os.system('mkdir -p prescales_temp') def popen(cmd): return subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True).communicate()[0] ll = LumiList( 'prescales_temp/Cert_190456-208686_8TeV_PromptReco_Collisions12_JSON.txt') ll_compact = ll.getCompactList() runs = [int(i) for i in ll.getRuns()] runs.sort() def dump_lumibyls(runs): l = float(len(runs)) for i, run in enumerate(runs): out_fn = 'prescales_temp/lumibyls/%i.csv' % run already = os.path.isfile(out_fn) print 'run %i (%i/%i)%s' % (run, i + 1, l, ' (skipping since already dumped)' if already else '') if already: continue popen('lumiCalc2.py lumibyls -r %i -o %s' % (run, out_fn))
parser.add_option ('--output', dest='output', type='string', help='Save output to file OUTPUT') # required parameters (options, args) = parser.parse_args() if len (args) != 1: raise RuntimeError, "Must provide exactly one input file" if options.min and options.max and options.min > options.max: raise RuntimeError, "Minimum value (%d) is greater than maximum value (%d)" % (options.min, options.max) commaRE = re.compile (r',') runsToRemove = [] for chunk in options.runs: runs = commaRE.split (chunk) runsToRemove.extend (runs) alphaList = LumiList (filename = args[0]) # Read in first JSON file allRuns = alphaList.getRuns() for run in allRuns: if options.min and int(run) < options.min: runsToRemove.append (run) if options.max and int(run) > options.max: runsToRemove.append (run) alphaList.removeRuns (runsToRemove) if options.output: alphaList.writeJSON (options.output) else: print alphaList
def __lumiSelectionSnippet(self, jsonPath=None, firstRun=None, lastRun=None): lumiSecExtend = "" if firstRun or lastRun or jsonPath: if not jsonPath: selectedRunList = self.__getRunList() if firstRun: selectedRunList = [ run for run in selectedRunList \ if self.__findInJson(run, "run_number") >= firstRun ] if lastRun: selectedRunList = [ run for run in selectedRunList \ if self.__findInJson(run, "run_number") <= lastRun ] lumiList = [ str( self.__findInJson(run, "run_number") ) + ":1-" \ + str( self.__findInJson(run, "run_number") ) + ":max" \ for run in selectedRunList ] splitLumiList = list(self.__chunks(lumiList, 255)) else: theLumiList = None try: theLumiList = LumiList(filename=jsonPath) except ValueError: pass if theLumiList is not None: allRuns = theLumiList.getRuns() runsToRemove = [] for run in allRuns: if firstRun and int(run) < firstRun: runsToRemove.append(run) if lastRun and int(run) > lastRun: runsToRemove.append(run) theLumiList.removeRuns(runsToRemove) splitLumiList = list( self.__chunks(theLumiList.getCMSSWString().split(','), 255)) if not (splitLumiList and splitLumiList[0] and splitLumiList[0][0]): splitLumiList = None else: with open(jsonPath) as f: jsoncontents = f.read() if "process.source.lumisToProcess" in jsoncontents: msg = "%s is not a json file, but it seems to be a CMSSW lumi selection cff snippet. Trying to use it" % jsonPath if firstRun or lastRun: msg += ( "\n (after applying firstRun and/or lastRun)" ) msg += ".\nPlease note that, depending on the format of this file, it may not work as expected." msg += "\nCheck your config file to make sure that it worked properly." print msg runlist = self.__getRunList() if firstRun or lastRun: self.__firstusedrun = -1 self.__lastusedrun = -1 jsoncontents = re.sub( r"\d+:(\d+|max)(-\d+:(\d+|max))?", self.getForceRunRangeFunction( firstRun, lastRun), jsoncontents) jsoncontents = (jsoncontents.replace( "'',\n", "").replace("''\n", "").replace( '"",\n', '').replace('""\n', '')) self.__firstusedrun = max( self.__firstusedrun, int( self.__findInJson( runlist[0], "run_number"))) self.__lastusedrun = min( self.__lastusedrun, int( self.__findInJson( runlist[-1], "run_number"))) if self.__lastusedrun < self.__firstusedrun: jsoncontents = None else: self.__firstusedrun = int( self.__findInJson(runlist[0], "run_number")) self.__lastusedrun = int( self.__findInJson(runlist[-1], "run_number")) lumiSecExtend = jsoncontents splitLumiList = None else: raise AllInOneError( "%s is not a valid json file!" % jsonPath) if splitLumiList and splitLumiList[0] and splitLumiList[0][0]: lumiSecStr = [ "',\n'".join( lumis ) \ for lumis in splitLumiList ] lumiSecStr = [ "lumiSecs.extend( [\n'" + lumis + "'\n] )" \ for lumis in lumiSecStr ] lumiSecExtend = "\n".join(lumiSecStr) runlist = self.__getRunList() self.__firstusedrun = max( int(splitLumiList[0][0].split(":")[0]), int(self.__findInJson(runlist[0], "run_number"))) self.__lastusedrun = min( int(splitLumiList[-1][-1].split(":")[0]), int(self.__findInJson(runlist[-1], "run_number"))) elif lumiSecExtend: pass else: msg = "You are trying to run a validation without any runs! Check that:" if firstRun or lastRun: msg += "\n - firstRun/begin and lastRun/end are correct for this dataset, and there are runs in between containing data" if jsonPath: msg += "\n - your JSON file is correct for this dataset, and the runs contain data" if (firstRun or lastRun) and jsonPath: msg += "\n - firstRun/begin and lastRun/end are consistent with your JSON file" raise AllInOneError(msg) else: runlist = self.__getRunList() self.__firstusedrun = int( self.__findInJson(self.__getRunList()[0], "run_number")) self.__lastusedrun = int( self.__findInJson(self.__getRunList()[-1], "run_number")) return lumiSecExtend
#!/usr/bin/env python import os, re, sys from collections import defaultdict from itertools import combinations from FWCore.PythonUtilities.LumiList import LumiList # Should rewrite not to hit the db for every cfg, but just get the HLT # key for each run and then only get cfgs for unique keys. dcsonly_ll = LumiList( '/afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification/Collisions12/8TeV/DCSOnly/json_DCSONLY.txt' ) # JMTBAD use DCSOnly_ll from goodlumis once sorted runs = sorted(int(run) for run in dcsonly_ll.getRuns()) cmd = 'edmConfigFromDB --cff --runNumber %i --noedsources --noes --noservices --nomodules' path_re = re.compile(r'(HLT_Mu40_eta2p1_v\d+)') prescaled_path_re = re.compile(r'(HLT_Mu15_eta2p1_v\d+)') paths_and_filters = defaultdict(list) for run in runs: print 'run:', run, sys.stdout.flush() path = prescaled_path = filter = prescaled_filter = None for line in os.popen(cmd % run): if 'cms.Path' not in line: continue filt = line.split(' + ')[-2] # JMTBAD fragile mo = path_re.search(line) if mo is not None:
def __createSnippet( self, jsonPath = None, begin = None, end = None, firstRun = None, lastRun = None, repMap = None, crab = False, parent = False ): if firstRun: firstRun = int( firstRun ) if lastRun: lastRun = int( lastRun ) if ( begin and firstRun ) or ( end and lastRun ): msg = ( "The Usage of " + "'begin' & 'firstRun' " * int( bool( begin and firstRun ) ) + "and " * int( bool( ( begin and firstRun ) and ( end and lastRun ) ) ) + "'end' & 'lastRun' " * int( bool( end and lastRun ) ) + "is ambigous." ) raise AllInOneError( msg ) if begin or end: ( firstRun, lastRun ) = self.convertTimeToRun( begin = begin, end = end, firstRun = firstRun, lastRun = lastRun ) if ( firstRun and lastRun ) and ( firstRun > lastRun ): msg = ( "The lower time/runrange limit ('begin'/'firstRun') " "chosen is greater than the upper time/runrange limit " "('end'/'lastRun').") raise AllInOneError( msg ) if self.predefined() and (jsonPath or begin or end or firstRun or lastRun): msg = ( "The parameters 'JSON', 'begin', 'end', 'firstRun', and 'lastRun'" "only work for official datasets, not predefined _cff.py files" ) raise AllInOneError( msg ) goodLumiSecStr = "" lumiStr = "" lumiSecExtend = "" if firstRun or lastRun or jsonPath: goodLumiSecStr = ( "lumiSecs = cms.untracked." "VLuminosityBlockRange()\n" ) lumiStr = " lumisToProcess = lumiSecs,\n" if not jsonPath: selectedRunList = self.__getRunList() if firstRun: selectedRunList = [ run for run in selectedRunList \ if self.__findInJson(run, "run_number") >= firstRun ] if lastRun: selectedRunList = [ run for run in selectedRunList \ if self.__findInJson(run, "run_number") <= lastRun ] lumiList = [ str( self.__findInJson(run, "run_number") ) + ":1-" \ + str( self.__findInJson(run, "run_number") ) + ":max" \ for run in selectedRunList ] splitLumiList = list( self.__chunks( lumiList, 255 ) ) else: theLumiList = None try: theLumiList = LumiList ( filename = jsonPath ) except ValueError: pass if theLumiList is not None: allRuns = theLumiList.getRuns() runsToRemove = [] for run in allRuns: if firstRun and int( run ) < firstRun: runsToRemove.append( run ) if lastRun and int( run ) > lastRun: runsToRemove.append( run ) theLumiList.removeRuns( runsToRemove ) splitLumiList = list( self.__chunks( theLumiList.getCMSSWString().split(','), 255 ) ) else: with open(jsonPath) as f: jsoncontents = f.read() if "process.source.lumisToProcess" in jsoncontents: msg = "%s is not a json file, but it seems to be a CMSSW lumi selection cff snippet. Trying to use it" % jsonPath if firstRun or lastRun: msg += ("\n (after applying firstRun and/or lastRun)") msg += ".\nPlease note that, depending on the format of this file, it may not work as expected." msg += "\nCheck your config file to make sure that it worked properly." print msg runlist = self.__getRunList() if firstRun or lastRun: self.__firstusedrun = -1 self.__lastusedrun = -1 jsoncontents = re.sub("\d+:(\d+|max)-\d+:(\d+|max)", self.getForceRunRangeFunction(firstRun, lastRun), jsoncontents) self.__firstusedrun = max(self.__firstusedrun, int(self.__findInJson(runlist[0],"run_number"))) self.__lastusedrun = min(self.__lastusedrun, int(self.__findInJson(runlist[-1],"run_number"))) else: self.__firstusedrun = int(self.__findInJson(runlist[0],"run_number")) self.__lastusedrun = int(self.__findInJson(runlist[-1],"run_number")) lumiSecExtend = jsoncontents splitLumiList = [[""]] if not len(splitLumiList[0][0]) == 0: lumiSecStr = [ "',\n'".join( lumis ) \ for lumis in splitLumiList ] lumiSecStr = [ "lumiSecs.extend( [\n'" + lumis + "'\n] )" \ for lumis in lumiSecStr ] lumiSecExtend = "\n".join( lumiSecStr ) runlist = self.__getRunList() self.__firstusedrun = max(int(splitLumiList[0][0].split(":")[0]), int(self.__findInJson(runlist[0],"run_number"))) self.__lastusedrun = min(int(splitLumiList[-1][-1].split(":")[0]), int(self.__findInJson(runlist[-1],"run_number"))) else: runlist = self.__getRunList() self.__firstusedrun = int(self.__findInJson(self.__getRunList()[0],"run_number")) self.__lastusedrun = int(self.__findInJson(self.__getRunList()[-1],"run_number")) if crab: files = "" else: splitFileList = list( self.__chunks( self.fileList(), 255 ) ) fileStr = [ "',\n'".join( files ) for files in splitFileList ] fileStr = [ "readFiles.extend( [\n'" + files + "'\n] )" \ for files in fileStr ] files = "\n".join( fileStr ) if parent: splitParentFileList = list( self.__chunks( self.fileList(parent = True), 255 ) ) parentFileStr = [ "',\n'".join( parentFiles ) for parentFiles in splitParentFileList ] parentFileStr = [ "secFiles.extend( [\n'" + parentFiles + "'\n] )" \ for parentFiles in parentFileStr ] parentFiles = "\n".join( parentFileStr ) files += "\n\n" + parentFiles theMap = repMap theMap["files"] = files theMap["json"] = jsonPath theMap["lumiStr"] = lumiStr theMap["goodLumiSecStr"] = goodLumiSecStr%( theMap ) theMap["lumiSecExtend"] = lumiSecExtend if crab: dataset_snippet = self.__dummy_source_template%( theMap ) else: dataset_snippet = self.__source_template%( theMap ) return dataset_snippet
import re, os, subprocess from pprint import pprint from collections import defaultdict from FWCore.PythonUtilities.LumiList import LumiList from RecoLuminosity.LumiDB import sessionManager, lumiCalcAPI, revisionDML from JMTucker.Tools.general import from_pickle, to_pickle os.system('mkdir -p prescales_temp') def popen(cmd): return subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True).communicate()[0] ll = LumiList('prescales_temp/Cert_190456-208686_8TeV_PromptReco_Collisions12_JSON.txt') ll_compact = ll.getCompactList() runs = [int(i) for i in ll.getRuns()] runs.sort() def dump_lumibyls(runs): l = float(len(runs)) for i,run in enumerate(runs): out_fn = 'prescales_temp/lumibyls/%i.csv' % run already = os.path.isfile(out_fn) print 'run %i (%i/%i)%s' % (run, i+1, l, ' (skipping since already dumped)' if already else '') if already: continue popen('lumiCalc2.py lumibyls -r %i -o %s' % (run, out_fn)) def parse_lumibyls(run): d = defaultdict(dict) for line in open('prescales_temp/lumibyls/%i.csv' % run):
def __createSnippet(self, jsonPath=None, begin=None, end=None, firstRun=None, lastRun=None, repMap=None, crab=False): if firstRun: firstRun = int(firstRun) if lastRun: lastRun = int(lastRun) if (begin and firstRun) or (end and lastRun): msg = ( "The Usage of " + "'begin' & 'firstRun' " * int(bool(begin and firstRun)) + "and " * int(bool( (begin and firstRun) and (end and lastRun))) + "'end' & 'lastRun' " * int(bool(end and lastRun)) + "is ambigous.") raise AllInOneError(msg) if begin or end: (firstRun, lastRun) = self.convertTimeToRun(begin=begin, end=end, firstRun=firstRun, lastRun=lastRun) if (firstRun and lastRun) and (firstRun > lastRun): msg = ("The lower time/runrange limit ('begin'/'firstRun') " "chosen is greater than the upper time/runrange limit " "('end'/'lastRun').") raise AllInOneError(msg) goodLumiSecStr = "" lumiStr = "" lumiSecExtend = "" if firstRun or lastRun: goodLumiSecStr = ("lumiSecs = cms.untracked." "VLuminosityBlockRange()\n") lumiStr = " lumisToProcess = lumiSecs,\n" if not jsonPath: selectedRunList = self.__getRunList() if firstRun: selectedRunList = [ run for run in selectedRunList \ if run["run_number"] >= firstRun ] if lastRun: selectedRunList = [ run for run in selectedRunList \ if run["run_number"] <= lastRun ] lumiList = [ str( run["run_number"] ) + ":1-" \ + str( run["run_number"] ) + ":max" \ for run in selectedRunList ] splitLumiList = list(self.__chunks(lumiList, 255)) else: theLumiList = LumiList(filename=jsonPath) allRuns = theLumiList.getRuns() runsToRemove = [] for run in allRuns: if firstRun and int(run) < firstRun: runsToRemove.append(run) if lastRun and int(run) > lastRun: runsToRemove.append(run) theLumiList.removeRuns(runsToRemove) splitLumiList = list( self.__chunks(theLumiList.getCMSSWString().split(','), 255)) if not len(splitLumiList[0][0]) == 0: lumiSecStr = [ "',\n'".join( lumis ) \ for lumis in splitLumiList ] lumiSecStr = [ "lumiSecs.extend( [\n'" + lumis + "'\n] )" \ for lumis in lumiSecStr ] lumiSecExtend = "\n".join(lumiSecStr) elif jsonPath: goodLumiSecStr = ("goodLumiSecs = LumiList.LumiList(filename" "= '%(json)s').getCMSSWString().split(',')\n" "lumiSecs = cms.untracked" ".VLuminosityBlockRange()\n") lumiStr = " lumisToProcess = lumiSecs,\n" lumiSecExtend = "lumiSecs.extend(goodLumiSecs)\n" if crab: files = "" else: splitFileList = list(self.__chunks(self.fileList(), 255)) fileStr = ["',\n'".join(files) for files in splitFileList] fileStr = [ "readFiles.extend( [\n'" + files + "'\n] )" \ for files in fileStr ] files = "\n".join(fileStr) theMap = repMap theMap["files"] = files theMap["json"] = jsonPath theMap["lumiStr"] = lumiStr theMap["goodLumiSecStr"] = goodLumiSecStr % (theMap) theMap["lumiSecExtend"] = lumiSecExtend if crab: dataset_snippet = self.__dummy_source_template % (theMap) else: dataset_snippet = self.__source_template % (theMap) return dataset_snippet
def run( self, filecacheurl=None, ): # pylint: disable=arguments-differ """ Override run() for JobType """ configArguments = { 'addoutputfiles': [], 'tfileoutfiles': [], 'edmoutfiles': [], } if getattr(self.config.Data, 'useParent', False) and getattr( self.config.Data, 'secondaryInputDataset', None): msg = "Invalid CRAB configuration: Parameters Data.useParent and Data.secondaryInputDataset cannot be used together." raise ConfigurationException(msg) # Get SCRAM environment scram = ScramEnvironment(logger=self.logger) configArguments.update({ 'jobarch': scram.getScramArch(), 'jobsw': scram.getCmsswVersion() }) # Build tarball if self.workdir: tarUUID = str(uuid.uuid4()) self.logger.debug('UNIQUE NAME: tarUUID %s ' % tarUUID) if len(tarUUID): tarFilename = os.path.join(self.workdir, tarUUID + 'default.tgz') debugTarFilename = os.path.join(self.workdir, 'debugFiles.tgz') cfgOutputName = os.path.join(self.workdir, BOOTSTRAP_CFGFILE) else: raise EnvironmentException( 'Problem with uuidgen while preparing for Sandbox upload.') else: _, tarFilename = tempfile.mkstemp(suffix='.tgz') _, cfgOutputName = tempfile.mkstemp(suffix='_cfg.py') if getattr(self.config.Data, 'inputDataset', None): configArguments['inputdata'] = self.config.Data.inputDataset ## Create CMSSW config. self.logger.debug("self.config: %s" % (self.config)) self.logger.debug("self.config.JobType.psetName: %s" % (self.config.JobType.psetName)) ## The loading of a CMSSW pset in the CMSSWConfig constructor is not idempotent ## in the sense that a second loading of the same pset may not produce the same ## result. Therefore there is a cache in CMSSWConfig to avoid loading any CMSSW ## pset twice. However, some "complicated" psets seem to evade the caching. ## Thus, to be safe, keep the CMSSWConfig instance in a class variable, so that ## it can be reused later if wanted (for example, in PrivateMC when checking if ## the pset has an LHE source) instead of having to load the pset again. ## As for what does "complicated" psets mean, Daniel Riley said that there are ## some psets where one module modifies the configuration from another module. self.cmsswCfg = CMSSWConfig(config=self.config, logger=self.logger, userConfig=self.config.JobType.psetName) ## If there is a CMSSW pset, do a basic validation of it. if not bootstrapDone() and self.config.JobType.psetName: valid, msg = self.cmsswCfg.validateConfig() if not valid: raise ConfigurationException(msg) ## We need to put the pickled CMSSW configuration in the right place. ## Here, we determine if the bootstrap script already run and prepared everything ## for us. In such case we move the file, otherwise we pickle.dump the pset if not bootstrapDone(): # Write out CMSSW config self.cmsswCfg.writeFile(cfgOutputName) else: # Move the pickled and the configuration files created by the bootstrap script self.moveCfgFile(cfgOutputName) ## Interrogate the CMSSW pset for output files (only output files produced by ## PoolOutputModule or TFileService are identified automatically). Do this ## automatic detection even if JobType.disableAutomaticOutputCollection = True, ## so that we can still classify the output files in EDM, TFile and additional ## output files in the Task DB (and the job ad). ## TODO: Do we really need this classification at all? cmscp and PostJob read ## the FJR to know if an output file is EDM, TFile or other. edmfiles, tfiles = self.cmsswCfg.outputFiles() ## If JobType.disableAutomaticOutputCollection = True, ignore the EDM and TFile ## output files that are not listed in JobType.outputFiles. if getattr( self.config.JobType, 'disableAutomaticOutputCollection', getParamDefaultValue( 'JobType.disableAutomaticOutputCollection')): outputFiles = [ re.sub(r'^file:', '', f) for f in getattr(self.config.JobType, 'outputFiles', []) ] edmfiles = [f for f in edmfiles if f in outputFiles] tfiles = [f for f in tfiles if f in outputFiles] ## Get the list of additional output files that have to be collected as given ## in JobType.outputFiles, but remove duplicates listed already as EDM files or ## TFiles. addoutputFiles = [ re.sub(r'^file:', '', f) for f in getattr(self.config.JobType, 'outputFiles', []) if re.sub(r'^file:', '', f) not in edmfiles + tfiles ] outputWarn = "The following user output files (not listed as PoolOuputModule or TFileService in the CMSSW PSet) will be collected: %s" % ", ".join( ["'{0}'".format(x) for x in addoutputFiles]) self.logger.debug( "The following EDM output files will be collected: %s" % edmfiles) self.logger.debug( "The following TFile output files will be collected: %s" % tfiles) if getattr(self.config.Data, 'publication', False) and len(edmfiles) > 1: self.logger.error( "The input PSet produces multiple EDM output files: %s", edmfiles) self.logger.error( "But current CRAB version can't publish more than one dataset per task" ) self.logger.error( "Either disable publication or submit multiple times with only one output at a time" ) msg = "Submission refused" raise ClientException(msg) if addoutputFiles: self.logger.warning(outputWarn) else: self.logger.debug(outputWarn) configArguments['edmoutfiles'] = edmfiles configArguments['tfileoutfiles'] = tfiles configArguments['addoutputfiles'].extend(addoutputFiles) ## Give warning message in case no output file was detected in the CMSSW pset ## nor was any specified in the CRAB configuration. if not configArguments['edmoutfiles'] and not configArguments[ 'tfileoutfiles'] and not configArguments['addoutputfiles']: msg = "%sWarning%s:" % (colors.RED, colors.NORMAL) if getattr( self.config.JobType, 'disableAutomaticOutputCollection', getParamDefaultValue( 'JobType.disableAutomaticOutputCollection')): msg += " Automatic detection of output files in the CMSSW configuration is disabled from the CRAB configuration" msg += " and no output file was explicitly specified in the CRAB configuration." else: msg += " CRAB could not detect any output file in the CMSSW configuration" msg += " nor was any explicitly specified in the CRAB configuration." msg += " Hence CRAB will not collect any output file from this task." self.logger.warning(msg) ## UserTarball calls ScramEnvironment which can raise EnvironmentException. ## Since ScramEnvironment is already called above and the exception is not ## handled, we are sure that if we reached this point it will not raise EnvironmentException. ## But otherwise we should take this into account. with UserTarball(name=tarFilename, logger=self.logger, config=self.config, crabserver=self.crabserver, s3tester=self.s3tester) as tb: inputFiles = [ re.sub(r'^file:', '', f) for f in getattr(self.config.JobType, 'inputFiles', []) ] tb.addFiles(userFiles=inputFiles, cfgOutputName=cfgOutputName) try: uploadResult = tb.upload(filecacheurl=filecacheurl) except HTTPException as hte: if 'X-Error-Info' in hte.headers: reason = hte.headers['X-Error-Info'] reason_re = re.compile( r'\AFile size is ([0-9]*)B\. This is bigger than the maximum allowed size of ([0-9]*)B\.$' ) re_match = reason_re.match(reason) if re_match: ISBSize = int(re_match.group(1)) ISBSizeLimit = int(re_match.group(2)) reason = "%sError%s:" % (colors.RED, colors.NORMAL) reason += " Input sandbox size is ~%sMB. This is bigger than the maximum allowed size of %sMB." % ( ISBSize / 1024 / 1024, ISBSizeLimit / 1024 / 1024) reason += tb.printSortedContent() raise ClientException(reason) raise hte except Exception as e: msg = ( "Impossible to upload the sandbox tarball.\nError message: %s.\n" "More details can be found in %s" % (e, self.logger.logfile)) raise ClientException(msg) # upload debug files debugFilesUploadResult = None with UserTarball(name=debugTarFilename, logger=self.logger, config=self.config, crabserver=self.crabserver, s3tester=self.s3tester) as dtb: dtb.addMonFiles() try: debugFilesUploadResult = dtb.upload(filecacheurl=filecacheurl) except Exception as e: msg = ( "Problem uploading debug_files.tar.gz.\nError message: %s.\n" "More details can be found in %s" % (e, self.logger.logfile)) LOGGERS['CRAB3'].exception( msg) #the traceback is only printed into the logfile configArguments['cacheurl'] = filecacheurl configArguments['cachefilename'] = "%s.tar.gz" % uploadResult if debugFilesUploadResult is not None: configArguments[ 'debugfilename'] = "%s.tar.gz" % debugFilesUploadResult self.logger.debug("Result uploading input files: %(cachefilename)s " % configArguments) # Upload list of user-defined input files to process as the primary input userFilesList = getattr(self.config.Data, 'userInputFiles', None) if userFilesList: self.logger.debug( "Attaching list of user-specified primary input files.") userFilesList = [f.strip() for f in userFilesList] userFilesList = [f for f in userFilesList if f] if len(userFilesList) != len(set(userFilesList)): msg = "%sWarning%s:" % (colors.RED, colors.NORMAL) msg += " CRAB configuration parameter Data.userInputFiles contains duplicated entries." msg += " Duplicated entries will be removed." self.logger.warning(msg) configArguments['userfiles'] = set(userFilesList) configArguments['primarydataset'] = getattr( self.config.Data, 'outputPrimaryDataset', 'CRAB_UserFiles') lumi_mask_name = getattr(self.config.Data, 'lumiMask', None) lumi_list = None if lumi_mask_name: self.logger.debug("Attaching lumi mask %s to the request" % (lumi_mask_name)) try: lumi_list = getLumiList(lumi_mask_name, logger=self.logger) except ValueError as ex: msg = "%sError%s:" % (colors.RED, colors.NORMAL) msg += " Failed to load lumi mask %s : %s" % (lumi_mask_name, ex) raise ConfigurationException(msg) run_ranges = getattr(self.config.Data, 'runRange', None) if run_ranges: run_ranges_is_valid = re.match('^\d+((?!(-\d+-))(\,|\-)\d+)*$', run_ranges) if run_ranges_is_valid: run_list = getRunList(run_ranges) if lumi_list: lumi_list.selectRuns(run_list) if not lumi_list: msg = "Invalid CRAB configuration: The intersection between the lumi mask and the run range is null." raise ConfigurationException(msg) else: if len(run_list) > 50000: msg = "CRAB configuration parameter Data.runRange includes %s runs." % str( len(run_list)) msg += " When Data.lumiMask is not specified, Data.runRange can not include more than 50000 runs." raise ConfigurationException(msg) lumi_list = LumiList(runs=run_list) else: msg = "Invalid CRAB configuration: Parameter Data.runRange should be a comma separated list of integers or (inclusive) ranges. Example: '12345,99900-99910'" raise ConfigurationException(msg) if lumi_list: configArguments['runs'] = lumi_list.getRuns() ## For each run we encode the lumis as a string representing a list of integers: [[1,2],[5,5]] ==> '1,2,5,5' lumi_mask = lumi_list.getCompactList() configArguments['lumis'] = [ str(reduce(lambda x, y: x + y, lumi_mask[run]))[1:-1].replace(' ', '') for run in configArguments['runs'] ] configArguments['jobtype'] = 'Analysis' return tarFilename, configArguments
def __createSnippet(self, jsonPath=None, begin=None, end=None, firstRun=None, lastRun=None, repMap=None, crab=False, parent=False): if firstRun: firstRun = int(firstRun) if lastRun: lastRun = int(lastRun) if (begin and firstRun) or (end and lastRun): msg = ( "The Usage of " + "'begin' & 'firstRun' " * int(bool(begin and firstRun)) + "and " * int(bool( (begin and firstRun) and (end and lastRun))) + "'end' & 'lastRun' " * int(bool(end and lastRun)) + "is ambigous.") raise AllInOneError(msg) if begin or end: (firstRun, lastRun) = self.convertTimeToRun(begin=begin, end=end, firstRun=firstRun, lastRun=lastRun) if (firstRun and lastRun) and (firstRun > lastRun): msg = ("The lower time/runrange limit ('begin'/'firstRun') " "chosen is greater than the upper time/runrange limit " "('end'/'lastRun').") raise AllInOneError(msg) if self.predefined() and (jsonPath or begin or end or firstRun or lastRun): msg = ( "The parameters 'JSON', 'begin', 'end', 'firstRun', and 'lastRun'" "only work for official datasets, not predefined _cff.py files" ) raise AllInOneError(msg) goodLumiSecStr = "" lumiStr = "" lumiSecExtend = "" if firstRun or lastRun or jsonPath: goodLumiSecStr = ("lumiSecs = cms.untracked." "VLuminosityBlockRange()\n") lumiStr = " lumisToProcess = lumiSecs,\n" if not jsonPath: selectedRunList = self.__getRunList() if firstRun: selectedRunList = [ run for run in selectedRunList \ if self.__findInJson(run, "run_number") >= firstRun ] if lastRun: selectedRunList = [ run for run in selectedRunList \ if self.__findInJson(run, "run_number") <= lastRun ] lumiList = [ str( self.__findInJson(run, "run_number") ) + ":1-" \ + str( self.__findInJson(run, "run_number") ) + ":max" \ for run in selectedRunList ] splitLumiList = list(self.__chunks(lumiList, 255)) else: theLumiList = None try: theLumiList = LumiList(filename=jsonPath) except ValueError: pass if theLumiList is not None: allRuns = theLumiList.getRuns() runsToRemove = [] for run in allRuns: if firstRun and int(run) < firstRun: runsToRemove.append(run) if lastRun and int(run) > lastRun: runsToRemove.append(run) theLumiList.removeRuns(runsToRemove) splitLumiList = list( self.__chunks(theLumiList.getCMSSWString().split(','), 255)) else: with open(jsonPath) as f: jsoncontents = f.read() if "process.source.lumisToProcess" in jsoncontents: msg = "%s is not a json file, but it seems to be a CMSSW lumi selection cff snippet. Trying to use it" % jsonPath if firstRun or lastRun: msg += ( "\n (after applying firstRun and/or lastRun)" ) msg += ".\nPlease note that, depending on the format of this file, it may not work as expected." msg += "\nCheck your config file to make sure that it worked properly." print msg runlist = self.__getRunList() if firstRun or lastRun: self.__firstusedrun = -1 self.__lastusedrun = -1 jsoncontents = re.sub( "\d+:(\d+|max)-\d+:(\d+|max)", self.getForceRunRangeFunction( firstRun, lastRun), jsoncontents) self.__firstusedrun = max( self.__firstusedrun, int( self.__findInJson( runlist[0], "run_number"))) self.__lastusedrun = min( self.__lastusedrun, int( self.__findInJson( runlist[-1], "run_number"))) else: self.__firstusedrun = int( self.__findInJson(runlist[0], "run_number")) self.__lastusedrun = int( self.__findInJson(runlist[-1], "run_number")) lumiSecExtend = jsoncontents splitLumiList = [[""]] if splitLumiList and splitLumiList[0]: if splitLumiList[0][0]: lumiSecStr = [ "',\n'".join( lumis ) \ for lumis in splitLumiList ] lumiSecStr = [ "lumiSecs.extend( [\n'" + lumis + "'\n] )" \ for lumis in lumiSecStr ] lumiSecExtend = "\n".join(lumiSecStr) runlist = self.__getRunList() self.__firstusedrun = max( int(splitLumiList[0][0].split(":")[0]), int(self.__findInJson(runlist[0], "run_number"))) self.__lastusedrun = min( int(splitLumiList[-1][-1].split(":")[0]), int(self.__findInJson(runlist[-1], "run_number"))) else: msg = "You are trying to run a validation without any runs! Check that:" if firstRun or lastRun: msg += "\n - firstRun and lastRun are correct for this dataset, and there are runs in between containing data" if jsonPath: msg += "\n - your JSON file is correct for this dataset, and the runs contain data" if (firstRun or lastRun) and jsonPath: msg += "\n - firstRun and lastRun are consistent with your JSON file" if begin: msg = msg.replace("firstRun", "begin") if end: msg = msg.replace("lastRun", "end") raise AllInOneError(msg) else: runlist = self.__getRunList() self.__firstusedrun = int( self.__findInJson(self.__getRunList()[0], "run_number")) self.__lastusedrun = int( self.__findInJson(self.__getRunList()[-1], "run_number")) if crab: files = "" else: splitFileList = list(self.__chunks(self.fileList(), 255)) fileStr = ["',\n'".join(files) for files in splitFileList] fileStr = [ "readFiles.extend( [\n'" + files + "'\n] )" \ for files in fileStr ] files = "\n".join(fileStr) if parent: splitParentFileList = list( self.__chunks(self.fileList(parent=True), 255)) parentFileStr = [ "',\n'".join(parentFiles) for parentFiles in splitParentFileList ] parentFileStr = [ "secFiles.extend( [\n'" + parentFiles + "'\n] )" \ for parentFiles in parentFileStr ] parentFiles = "\n".join(parentFileStr) files += "\n\n" + parentFiles theMap = repMap theMap["files"] = files theMap["json"] = jsonPath theMap["lumiStr"] = lumiStr theMap["goodLumiSecStr"] = goodLumiSecStr % (theMap) theMap["lumiSecExtend"] = lumiSecExtend if crab: dataset_snippet = self.__dummy_source_template % (theMap) else: dataset_snippet = self.__source_template % (theMap) return dataset_snippet
def _dasPopen(dbs): if 'LSB_JOBID' in os.environ: raise RuntimeError, "Trying to do a DAS query while in a LXBatch job (env variable LSB_JOBID defined)\nquery was: %s" % dbs if 'X509_USER_PROXY' in os.environ: dbs += " --key {0} --cert {0}".format(os.environ['X509_USER_PROXY']) logger.info('DAS query\t: %s', dbs) return os.popen(dbs) dbs='das_client --query="run dataset=%s instance=prod/%s" --limit %i'%(prompt.heppy.dataset, 'global', 0) prompt_runs = [int(r) for r in _dasPopen(dbs).readlines()] dbs='das_client --query="run dataset=%s instance=prod/%s" --limit %i'%(rereco.heppy.dataset, 'global', 0) rereco_runs =[int(r) for r in _dasPopen(dbs).readlines()] runs = [] for str_run in lumiList.getRuns(): run = int(str_run) if run in prompt_runs and run in rereco_runs: runs.append(run) print "Now running %i jobs: %r"%( len(runs), runs ) import subprocess def wrapper( run_ ): subprocess.call(["python", "jetTreeMaker.py", ("--era=%s"%args.era), ("--run=%i"%run_) ]) from multiprocessing import Pool pool = Pool( 10 ) results = pool.map(wrapper, runs) pool.close()