def createDatasetOutTag(dataset,tag='',datatier=None,year=None,version=""): """Create dataset output tag from DAS path.""" outtags = dataset.strip('/').split('/') assert len(outtags)>=2, "Invalid DAS path '%s'!"%(dataset) outtag = outtags[1] outtag = outtag.replace(getUsernameFromSiteDB()+'-',"") outtag = hashpattern.sub("",outtag) dtype = 'data' if '/Run201' in dataset else 'mc' if datatier=='nanoAOD': if 'miniaod' in outtag.lower(): newtier = 'NanoAOD'+version outtag = minipattern.sub(newtier,outtag) globaltag = globaltags[dtype]['nanoAOD'].get(year,False) if globaltag: outtag = outtag[:outtag.index(newtier)+len(newtier)]+'_'+globaltag if tag and not outtag.endswith(tag): outtag += formatTag(tag) return outtag
def submitSampleToCRAB(pset, year, samples, **kwargs): """Create a CRAB configuration and submit a given list of samples.""" assert isinstance( samples, list), "Samples list should be a list or tuple! Given %s" % samples # USER OPTIONS year = year test = kwargs.get('test', 0) force = kwargs.get('force', False) datatier = 'nanoAOD' if 'nanoaod' in pset.lower() else 'miniAOD' version = re.findall("(?<=AOD)v\d+", pset) version = version[0] if version else "" pluginName = 'Analysis' #'PrivateMC' splitting = kwargs.get( 'split', 'FileBased') #if year==2018 or datatier=='nanoAOD' else 'Automatic') tag = kwargs.get('tag', "") instance = kwargs.get('instance', 'global') nevents = -1 unitsPerJob = 1 # files per job for 'FileBased' eventsPerJob = kwargs.get( 'eventsPerJob', 10000) # unitsPerJob for 'EventAwareLumiBased' splitting njobs = -1 ncores = kwargs.get('ncores', 1) # make sure nCores > nThreads in pset.py maxRunTime = kwargs.get('maxRunTime', 6 * 60) #1250 # minutes maxMemory = kwargs.get('maxMemory', 3000) # MB priority = kwargs.get('priority', 10) workArea = "crab_tasks" #"crab_projects" outdir = '/store/user/%s/%s_%s%s' % (getUsernameFromSiteDB(), datatier, year, formatTag(tag)) publish = True #and False site = 'T2_CH_CSCS' # OVERRIDE if test > 0: splitting = 'FileBased' unitsPerJob = 1 # files per job njobs = int(test) outdir += '_test' publish = False samples = samples[:1] if nevents < 0: nevents = 2500 if splitting == 'Automatic': unitsPerJob = -1 njobs = -1 maxRunTime = -1 if splitting == 'EventAwareLumiBased': unitsPerJob = eventsPerJob njobs = -1 # PRINT print ">>> " + '=' * 70 print ">>> year = %s" % year print ">>> pset = '%s'" % bold(pset) print ">>> pluginName = '%s'" % pluginName print ">>> splitting = '%s'" % splitting print ">>> unitsPerJob = %s" % unitsPerJob print ">>> nevents = %s" % nevents print ">>> tag = '%s'" % bold(tag) print ">>> njobs = %s" % njobs print ">>> nCores = %s" % ncores print ">>> maxRunTime = %s" % maxRunTime print ">>> maxMemory = %s" % maxMemory print ">>> priority = %s" % priority print ">>> workArea = '%s'" % workArea print ">>> site = '%s'" % site print ">>> outdir = '%s'" % outdir print ">>> publish = %r" % publish print ">>> test = %r" % test print ">>> " + '=' * 70 if len(samples) == 0: print ">>> No samples given..." print ">>> " return # CRAB CONFIGURATION config = crabconfig() config.General.workArea = workArea config.General.transferOutputs = True config.General.transferLogs = False config.JobType.pluginName = pluginName config.JobType.psetName = pset config.JobType.pyCfgParams = ["year=%s" % year, "nThreads=%s" % ncores] config.JobType.numCores = ncores if maxRunTime > 0: config.JobType.maxJobRuntimeMin = maxRunTime # minutes if maxMemory > 0: config.JobType.maxMemoryMB = maxMemory # MB config.JobType.priority = priority config.Data.splitting = splitting if unitsPerJob > 0: config.Data.unitsPerJob = unitsPerJob if njobs > 0: config.Data.totalUnits = unitsPerJob * njobs config.Site.storageSite = site config.Data.outLFNDirBase = outdir config.Data.publication = publish for dataset in samples: # INDIVIDUAL CONFIG request = (datatier.lower().replace('aod', '') + '_' + shortenDASPath(dataset))[:100] private = dataset.endswith('/USER') sites = getSampleSites(dataset, instance=None) if private: ignoreLocal = True inputDBS = "https://cmsweb.cern.ch/dbs/prod/phys03/DBSReader/" whitelist = getOptimalWhitelist(sites, instance=instance) #whitelist = ['T2_CH_*','T2_DE_*','T2_IT_*'] else: ignoreLocal = False inputDBS = "https://cmsweb.cern.ch/dbs/prod/%s/DBSReader/" % instance whitelist = [] outtag = createDatasetOutTag(dataset, tag=tag, datatier=datatier, version=version, year=year) # PRINT print ">>> " + '-' * 5 + " Submitting... " + '-' * 50 print ">>> request = '%s'" % bold(request) print ">>> dataset = '%s'" % bold(dataset) print ">>> inputDBS = '%s'" % inputDBS print ">>> sites = %s" % sites print ">>> whitelist = %s" % whitelist print ">>> ignoreLocal = %s" % ignoreLocal print ">>> outtag = '%s'" % outtag print ">>> " + '-' * 70 # INDIVIDUAL CONFIG config.General.requestName = request # max. 100 characters config.Data.inputDataset = dataset config.Data.inputDBS = inputDBS #config.Data.outputPrimaryDataset = 'LQ_test' # only for 'PrivateMC' config.Data.outputDatasetTag = outtag config.Data.ignoreLocality = ignoreLocal # do not run on same site the dataset is stored on if whitelist: config.Site.whitelist = whitelist print str(config).rstrip('\n') print ">>> " + '-' * 70 # SUBMIT if force: print ">>> Do you want to submit this job to CRAB? [y/n]? force" print ">>> Submitting..." submitCRABConfig(config) else: while True: submit = raw_input( ">>> Do you want to submit this job to CRAB? [y/n]? " ).strip().lower() if any(s in submit for s in ['quit', 'exit']): print ">>> Exiting..." exit(0) elif 'force' in submit: submit = 'y' force = True if 'y' in submit: print ">>> Submitting..." submitCRABConfig(config) break elif 'n' in submit: print ">>> Not submitting." break else: print ">>> '%s' is not a valid answer, please choose 'y' or 'n'." % submit print ">>> "
options.register('year', year, mytype=VarParsing.varType.int) options.register('nevents', nevents, mytype=VarParsing.varType.int) options.register('seed', seed, mytype=VarParsing.varType.int) options.register('nThreads', nThreads, mytype=VarParsing.varType.int) options.parseArguments() sample = options.sample index = options.index gridpack = os.path.abspath(options.gridpack) year = options.year nevents = options.nevents seed = options.seed nThreads = options.nThreads ###globaltag = globaltags['miniAOD'].get(year,'auto:phase1_2017_realistic') tag = sample if index>0: tag += formatTag('_'+str(index)) outfile_RAW = "file:GENSIM%s.root"%(tag) outfile_LHE = "file:GENSIM_LHE%s.root"%(tag) print ">>> sample = '%s'"%sample print ">>> index = %s"%index print ">>> gridpack = '%s'"%gridpack print ">>> year = %s"%year print ">>> nevents = %s"%nevents print ">>> nThreads = %s"%nThreads print ">>> globaltag = '%s'"%globaltag print ">>> outfile_RAW = '%s'"%outfile_RAW print ">>> outfile_LHE = '%s'"%outfile_LHE print ">>> "+'-'*52 # PROCESS
options = VarParsing('analysis') options.register('year', year, mytype=VarParsing.varType.int) options.register('nThreads', nThreads, mytype=VarParsing.varType.int) options.register('events', maxEvents, mytype=VarParsing.varType.int) options.register('sample', sample, mytype=VarParsing.varType.string) options.register('dtype', dtype, mytype=VarParsing.varType.string) options.parseArguments() year = options.year dtype = options.dtype nThreads = options.nThreads maxEvents = options.events sample = options.sample globaltag = globaltags[dtype]['NanoAODv5'].get(year,'auto:phase1_2017_realistic') era = eras['NanoAODv5'].get(year,None) if index>0: outfile = "file:nanoAOD_%s%s_%s.root"%(year,formatTag(sample),index) else: outfile = "file:nanoAOD_%s%s.root"%(year,formatTag(sample)) if year==2016: infiles = filter(lambda f: 'RunIISummer16' in f or '/Run2016' in f or '_2016_' in f,infiles) elif year==2017: infiles = filter(lambda f: 'RunIIFall17' in f or '/Run2017' in f or '_2017_' in f,infiles) elif year==2018: infiles = filter(lambda f: 'RunIIAutumn' in f or '/Run2018' in f or '_2018_' in f,infiles) if dtype=='data': infiles = filter(lambda f: '/store/mc/' not in f,infiles) elif dtype=='mc': infiles = filter(lambda f: '/store/data/' not in f,infiles) print ">>> sample = '%s'"%sample print ">>> index = %s"%index print ">>> year = %s"%year print ">>> dtype = '%s'"%dtype print ">>> maxEvents = %s"%maxEvents print ">>> globaltag = '%s'"%globaltag print ">>> infiles = %s"%infiles
#director+"/store/mc/RunIIAutumn18MiniAOD/PairVectorLQ_InclusiveDecay_M-1100_TuneCP2_13TeV-madgraph-pythia8/MINIAODSIM/102X_upgrade2018_realistic_v15-v1/30000/F3649753-9038-CF4B-A178-BD03655BE7BD.root", "file:input/VLQ-p_M1100_%s.root" % year, #"file:miniAOD_%s_%s_%s.root"%(sample,year,index), ] # USER OPTIONS from FWCore.ParameterSet.VarParsing import VarParsing options = VarParsing('analysis') options.register('year', year, mytype=VarParsing.varType.int) options.register('nThreads', nThreads, mytype=VarParsing.varType.int) options.parseArguments() year = options.year nThreads = options.nThreads globaltag = globaltags['mc']['miniAOD'].get(year, 'auto:phase1_2017_realistic') if index > 0: outfile = "file:miniAOD_rerun_%s%s_%s.root" % (year, formatTag(sample), index) else: outfile = "file:miniAOD_rerun_%s%s.root" % (year, formatTag(sample)) print ">>> sample = '%s'" % sample print ">>> index = '%s'" % index print ">>> year = '%s" % year print ">>> maxEvents = %s" % maxEvents print ">>> globaltag = '%s'" % globaltag print ">>> infiles = %s" % infiles print ">>> outfile = %s" % outfile print ">>> " + '-' * 69 # PROCESS process = cms.Process('TauID')