def getTestFiles(analysis,sample,n=1,version=None): if not version: version = getCMSSWVersion() sampleMap = { 'wz' : 'WZTo3LNu_TuneCUETP8M1_13TeV-powheg-pythia8', 'zz' : 'ZZTo4L_13TeV_powheg_pythia8', 'data' : 'DoubleMuon', 'hpp' : 'HPlusPlusHMinusMinusHTo4L_M-500_13TeV-pythia8' if version=='76X' else 'HPlusPlusHMinusMinusHTo4L_M-500_TuneCUETP8M1_13TeV_pythia8', 'hpp4l' : 'HPlusPlusHMinusMinusHTo4L_M-500_13TeV-pythia8' if version=='76X' else 'HPlusPlusHMinusMinusHTo4L_M-500_TuneCUETP8M1_13TeV_pythia8', 'hppr4l': 'HPlusPlusHMinusMinusHRTo4L_M-500_13TeV-pythia8' if version=='76X' else 'HPlusPlusHMinusMinusHRTo4L_M-500_TuneCUETP8M1_13TeV-pythia8', 'hpp3l' : 'HPlusPlusHMinusHTo3L_M-500_TuneCUETP8M1_13TeV_calchep-pythia8' if version=='76X' else 'HPlusPlusHMinusHTo3L_M-500_13TeV-calchep-pythia8', 'haa' : 'SUSYGluGluToHToAA_AToMuMu_AToTauTau_M-15_TuneCUETP8M1_13TeV_madgraph_pythia8', #'dy' : 'DYJetsToLL_M-50_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8', 'dy' : 'DYJetsToLL_M-50_TuneCUETP8M1_13TeV-madgraphMLM-pythia8', 'w' : 'WJetsToLNu_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8', 'qcd' : 'QCD_Pt_300to470_TuneCUETP8M1_13TeV_pythia8', 'SingleMuon' : 'SingleMuon', 'SingleElectron': 'SingleElectron', 'DoubleMuon' : 'DoubleMuon', 'DoubleEG' : 'DoubleEG', 'MuonEG' : 'MuonEG', 'Tau' : 'Tau', } if sample not in sampleMap: return [] files = get_hdfs_root_files('{0}/{1}'.format(getNtupleDirectory(analysis,version=version),sampleMap[sample])) return files[:min(n,len(files))]
def getTestFiles(analysis, sample, n=1, version=None): if not version: version = getCMSSWVersion() sampleMap = { 'wz': 'WZTo3LNu_TuneCUETP8M1_13TeV-powheg-pythia8', 'zz': 'ZZTo4L_13TeV_powheg_pythia8', 'data': 'DoubleMuon', 'hpp': 'HPlusPlusHMinusMinusHTo4L_M-500_13TeV-pythia8' if version == '76X' else 'HPlusPlusHMinusMinusHTo4L_M-500_TuneCUETP8M1_13TeV_pythia8', 'hpp4l': 'HPlusPlusHMinusMinusHTo4L_M-500_13TeV-pythia8' if version == '76X' else 'HPlusPlusHMinusMinusHTo4L_M-500_TuneCUETP8M1_13TeV_pythia8', 'hppr4l': 'HPlusPlusHMinusMinusHRTo4L_M-500_13TeV-pythia8' if version == '76X' else 'HPlusPlusHMinusMinusHRTo4L_M-500_TuneCUETP8M1_13TeV-pythia8', 'hpp3l': 'HPlusPlusHMinusHTo3L_M-500_TuneCUETP8M1_13TeV_calchep-pythia8' if version == '76X' else 'HPlusPlusHMinusHTo3L_M-500_13TeV-calchep-pythia8', 'haa': 'SUSYGluGluToHToAA_AToMuMu_AToTauTau_M-15_TuneCUETP8M1_13TeV_madgraph_pythia8', #'dy' : 'DYJetsToLL_M-50_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8', 'dy': 'DYJetsToLL_M-50_TuneCUETP8M1_13TeV-madgraphMLM-pythia8', 'w': 'WJetsToLNu_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8', 'qcd': 'QCD_Pt_300to470_TuneCUETP8M1_13TeV_pythia8', 'SingleMuon': 'SingleMuon', 'SingleElectron': 'SingleElectron', 'DoubleMuon': 'DoubleMuon', 'DoubleEG': 'DoubleEG', 'MuonEG': 'MuonEG', 'Tau': 'Tau', } if sample not in sampleMap: return [] files = get_hdfs_root_files('{0}/{1}'.format( getNtupleDirectory(analysis, version=version), sampleMap[sample])) return files[:min(n, len(files))]
def getTestFiles(sample,n=1,version=None): if not version: version = getCMSSWVersion() sampleMap = { 'wz' : 'WZTo3LNu_TuneCUETP8M1_13TeV-powheg-pythia8', 'zz' : 'ZZTo4L_13TeV_powheg_pythia8', 'data' : 'DoubleMuon', 'hpp' : 'HPlusPlusHMinusMinusHTo4L_M-500_13TeV-pythia8' if version=='76X' else 'HPlusPlusHMinusMinusHTo4L_M-500_TuneCUETP8M1_13TeV_pythia8', 'hpp4l' : 'HPlusPlusHMinusMinusHTo4L_M-500_13TeV-pythia8' if version=='76X' else 'HPlusPlusHMinusMinusHTo4L_M-500_TuneCUETP8M1_13TeV_pythia8', 'hppr4l': 'HPlusPlusHMinusMinusHRTo4L_M-500_13TeV-pythia8' if version=='76X' else 'HPlusPlusHMinusMinusHRTo4L_M-500_TuneCUETP8M1_13TeV-pythia8', 'hpp3l' : 'HPlusPlusHMinusHTo3L_M-500_TuneCUETP8M1_13TeV_calchep-pythia8' if version=='76X' else 'HPlusPlusHMinusHTo3L_M-500_13TeV-calchep-pythia8', 'dy' : 'DYJetsToLL_M-50_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8', #'dy' : 'DYJetsToLL_M-50_TuneCUETP8M1_13TeV-madgraphMLM-pythia8', 'w' : 'WJetsToLNu_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8', 'qcd' : 'QCD_Pt_300to470_TuneCUETP8M1_13TeV_pythia8', 'gjet' : 'GJet_Pt-40toInf_DoubleEMEnriched_MGG-80toInf_TuneCUETP8M1_13TeV_Pythia8', 'SingleMuon' : 'SingleMuon', 'SingleElectron': 'SingleElectron', 'DoubleMuon' : 'DoubleMuon', 'DoubleEG' : 'DoubleEG', 'MuonEG' : 'MuonEG', 'Tau' : 'Tau', 'haa' : 'SUSYGluGluToHToAA_AToMuMu_AToTauTau_M-15_TuneCUETP8M1_13TeV_madgraph_pythia8', 'hzz' : 'GluGluHToZZTo4L_M125_13TeV_powheg2_JHUGenV7011_pythia8', 'hgg' : 'GluGluHToGG_M-125_13TeV_powheg_pythia8', } for h in [125,300,750]: for a in ['3p6',4,5,6,7,8,9,10,11,12,13,14,15,17,19,21]: if h == 125: name = 'SUSYGluGluToHToAA_AToMuMu_AToTauTau_M-{a}_TuneCUETP8M1_13TeV_madgraph_pythia8' nameNew = 'SUSYGluGluToHToAA_AToMuMu_AToTauTau_M-{h}_M-{a}_TuneCUETP8M1_13TeV_madgraph_pythia8' else: name = 'SUSYGluGluToHToAA_AToMuMu_AToTauTau_M-{h}_M-{a}_TuneCUETP8M1_13TeV_madgraph_pythia8' nameNew = 'SUSYGluGluToHToAA_AToMuMu_AToTauTau_M-{h}_M-{a}_TuneCUETP8M1_13TeV_madgraph_pythia8' sampleMap['haa_{h}_{a}'.format(h=h,a=a)] = name.format(h=h,a=a) sampleMap['haa_{h}_{a}_new'.format(h=h,a=a)] = nameNew.format(h=h,a=a) if sample not in sampleMap: return [] files = get_hdfs_root_files('{0}/{1}'.format(getNtupleDirectory(version=version),sampleMap[sample])) return files[:min(n,len(files))]
def submit_untracked_condor(args): '''Submit to condor using an input directory''' # get samples sampleList = hdfs_ls_directory(args.inputDirectory) workArea = get_condor_workArea(args) os.system('mkdir -p {0}'.format(workArea)) submitMap = {} # iterate over samples for sample in sampleList: if hasattr(args,'sampleFilter'): submitSample = False for sampleFilter in args.sampleFilter: if fnmatch.fnmatch(sample,sampleFilter): submitSample = True if not submitSample: continue # farmout config command = 'farmoutAnalysisJobs --infer-cmssw-path --input-basenames-not-unique' if hasattr(args,'scriptExe') and args.scriptExe: command += ' --fwklite' # submit dir submitDir = '{0}/{1}'.format(workArea, sample) command += ' --submit-dir={0}'.format(submitDir) if os.path.exists(submitDir) and not args.resubmit: logging.warning('Submit directory exists {0}'.format(submitDir)) continue if args.resubmit: prev = get_condor_status(submitDir) badjobs = [j for j in prev if prev[j]['status'] not in ['FINISHED','RUNNING']] if not badjobs: continue # don't resubmit if no failed # input files inputFiles = get_hdfs_root_files(args.inputDirectory,sample) totalFiles = len(inputFiles) if totalFiles==0: logging.warning('{0} {1} has no files.'.format(args.inputDirectory,sample)) continue fileList = '{0}_inputs.txt'.format(submitDir) with open(fileList,'w') as f: #if args.jobsPerFile>1: # jobStrings = [] # for job in range(args.jobsPerFile): # for inputFile in inputFiles: # jobStrings += ['{0}/{1}/{2}'.format(inputFile,args.jobsPerFile,job)] # f.write('\n'.join(jobStrings)) #else: f.write('\n'.join(inputFiles)) filesPerJob = args.filesPerJob #if args.jobsPerFile>1: filesPerJob = len(inputFiles) if args.gigabytesPerJob: totalSize = get_hdfs_directory_size(os.path.join(args.inputDirectory,sample)) averageSize = totalSize/totalFiles GB = 1024.*1024.*1024. filesPerJob = int(math.ceil(args.gigabytesPerJob*GB/averageSize)) if hasattr(args,'jsonFilesPerJob') and args.jsonFilesPerJob: if os.path.isfile(args.jsonFilesPerJob): with open(args.jsonFilesPerJob) as f: data = json.load(f) if sample in data: filesPerJob = data[sample] else: logging.error('JSON map {0} for jobs does not exist'.format(args.jsonFilesPerJob)) return command += ' --input-file-list={0} --assume-input-files-exist --input-files-per-job={1}'.format(fileList,filesPerJob) if args.vsize: command += ' --vsize-limit={0}'.format(args.vsize) if args.useAFS: command += ' --shared-fs' # output directory # srm no longer needed at uw outputDir = '/store/user/{0}/{1}/{2}'.format(args.user,args.jobName,sample) command += ' --output-dir={0}'.format(outputDir) if args.useHDFS: command += ' --use-hdfs' if args.resubmit: command += ' --resubmit-failed-jobs' if hasattr(args,'cfg'): command += ' {0} {1} {2}'.format(args.jobName, args.cfg, ' '.join(args.cmsRunArgs)) else: # its a merge command += ' --merge {0}'.format(args.jobName) if args.dryrun: logging.info(command) else: os.system(command)
def submit_untracked_crab(args): '''Submit jobs from an inputDirectory''' tblogger, logger, memhandler = initLoggers() tblogger.setLevel(logging.INFO) logger.setLevel(logging.INFO) memhandler.setLevel(logging.INFO) # crab config config = get_config(args) config.Site.whitelist = [args.site] # whitelist site, run on same site as files located # get samples sampleList = hdfs_ls_directory(args.inputDirectory) submitMap = {} # iterate over samples for sample in sampleList: if hasattr(args,'sampleFilter'): submitSample = False for sampleFilter in args.sampleFilter: if fnmatch.fnmatch(sample,sampleFilter): submitSample = True if not submitSample: continue primaryDataset = sample config.General.requestName = '{0}'.format(primaryDataset) # make it only 100 characters config.General.requestName = config.General.requestName[:99] # Warning: may not be unique now config.Data.outputPrimaryDataset = primaryDataset # get file list inputFiles = get_hdfs_root_files(args.inputDirectory,sample) config.Data.userInputFiles = inputFiles totalFiles = len(inputFiles) if totalFiles==0: logging.warning('{0} {1} has no files.'.format(inputDirectory,sample)) continue filesPerJob = args.filesPerJob if args.gigabytesPerJob: totalSize = get_hdfs_directory_size(os.path.join(args.inputDirectory,sample)) if totalSize: averageSize = totalSize/totalFiles GB = 1024.*1024.*1024. filesPerJob = int(math.ceil(args.gigabytesPerJob*GB/averageSize)) if hasattr(args,'jsonFilesPerJob') and args.jsonFilesPerJob: if os.path.isfile(args.jsonFilesPerJob): with open(args.jsonFilesPerJob) as f: data = json.load(f) if sample in data: filesPerJob = data[sample] else: logging.error('JSON map {0} for jobs does not exist'.format(args.jsonFilesPerJob)) return config.Data.unitsPerJob = filesPerJob # submit the job submitArgs = ['--config',config] if args.dryrun: submitArgs += ['--dryrun'] try: log.info("Submitting for input dataset {0}".format(sample)) submitMap[sample] = crabClientSubmit.submit(logger,submitArgs)() except HTTPException as hte: log.info("Submission for input dataset {0} failed: {1}".format(sample, hte.headers)) except ClientException as cle: log.info("Submission for input dataset {0} failed: {1}".format(sample, cle))
def submit_untracked_condor(args): '''Submit to condor using an input directory''' # get samples sampleList = hdfs_ls_directory(args.inputDirectory) workArea = get_condor_workArea(args) os.system('mkdir -p {0}'.format(workArea)) submitMap = {} # iterate over samples for sample in sampleList: if hasattr(args, 'sampleFilter'): submitSample = False for sampleFilter in args.sampleFilter: if fnmatch.fnmatch(sample, sampleFilter): submitSample = True if not submitSample: continue # farmout config command = 'farmoutAnalysisJobs --infer-cmssw-path --input-basenames-not-unique' if hasattr(args, 'scriptExe') and args.scriptExe: command += ' --fwklite' # submit dir submitDir = '{0}/{1}'.format(workArea, sample) command += ' --submit-dir={0}'.format(submitDir) if os.path.exists(submitDir) and not args.resubmit: logging.warning('Submit directory exists {0}'.format(submitDir)) continue if args.resubmit: prev = get_condor_status(submitDir) badjobs = [ j for j in prev if prev[j]['status'] not in ['FINISHED', 'RUNNING'] ] if not badjobs: continue # don't resubmit if no failed # input files inputFiles = get_hdfs_root_files(args.inputDirectory, sample) totalFiles = len(inputFiles) if totalFiles == 0: logging.warning('{0} {1} has no files.'.format( args.inputDirectory, sample)) continue fileList = '{0}_inputs.txt'.format(submitDir) with open(fileList, 'w') as f: #if args.jobsPerFile>1: # jobStrings = [] # for job in range(args.jobsPerFile): # for inputFile in inputFiles: # jobStrings += ['{0}/{1}/{2}'.format(inputFile,args.jobsPerFile,job)] # f.write('\n'.join(jobStrings)) #else: f.write('\n'.join(inputFiles)) filesPerJob = args.filesPerJob #if args.jobsPerFile>1: filesPerJob = len(inputFiles) if args.gigabytesPerJob: totalSize = get_hdfs_directory_size( os.path.join(args.inputDirectory, sample)) averageSize = totalSize / totalFiles GB = 1024. * 1024. * 1024. filesPerJob = int( math.ceil(args.gigabytesPerJob * GB / averageSize)) if hasattr(args, 'jsonFilesPerJob') and args.jsonFilesPerJob: if os.path.isfile(args.jsonFilesPerJob): with open(args.jsonFilesPerJob) as f: data = json.load(f) if sample in data: filesPerJob = data[sample] else: logging.error('JSON map {0} for jobs does not exist'.format( args.jsonFilesPerJob)) return command += ' --input-file-list={0} --assume-input-files-exist --input-files-per-job={1}'.format( fileList, filesPerJob) if args.vsize: command += ' --vsize-limit={0}'.format(args.vsize) if args.useAFS: command += ' --shared-fs' # output directory # srm no longer needed at uw outputDir = '/store/user/{0}/{1}/{2}'.format(args.user, args.jobName, sample) command += ' --output-dir={0}'.format(outputDir) if args.useHDFS: command += ' --use-hdfs' if args.resubmit: command += ' --resubmit-failed-jobs' if hasattr(args, 'cfg'): command += ' {0} {1} {2}'.format(args.jobName, args.cfg, ' '.join(args.cmsRunArgs)) else: # its a merge command += ' --merge {0}'.format(args.jobName) if args.dryrun: logging.info(command) else: os.system(command)
def submit_untracked_crab(args): '''Submit jobs from an inputDirectory''' tblogger, logger, memhandler = initLoggers() tblogger.setLevel(logging.INFO) logger.setLevel(logging.INFO) memhandler.setLevel(logging.INFO) # crab config config = get_config(args) config.Site.whitelist = [ args.site ] # whitelist site, run on same site as files located # get samples sampleList = hdfs_ls_directory(args.inputDirectory) submitMap = {} # iterate over samples for sample in sampleList: if hasattr(args, 'sampleFilter'): submitSample = False for sampleFilter in args.sampleFilter: if fnmatch.fnmatch(sample, sampleFilter): submitSample = True if not submitSample: continue primaryDataset = sample config.General.requestName = '{0}'.format(primaryDataset) # make it only 100 characters config.General.requestName = config.General.requestName[: 99] # Warning: may not be unique now config.Data.outputPrimaryDataset = primaryDataset # get file list inputFiles = get_hdfs_root_files(args.inputDirectory, sample) config.Data.userInputFiles = inputFiles totalFiles = len(inputFiles) if totalFiles == 0: logging.warning('{0} {1} has no files.'.format( inputDirectory, sample)) continue filesPerJob = args.filesPerJob if args.gigabytesPerJob: totalSize = get_hdfs_directory_size( os.path.join(args.inputDirectory, sample)) if totalSize: averageSize = totalSize / totalFiles GB = 1024. * 1024. * 1024. filesPerJob = int( math.ceil(args.gigabytesPerJob * GB / averageSize)) if hasattr(args, 'jsonFilesPerJob') and args.jsonFilesPerJob: if os.path.isfile(args.jsonFilesPerJob): with open(args.jsonFilesPerJob) as f: data = json.load(f) if sample in data: filesPerJob = data[sample] else: logging.error('JSON map {0} for jobs does not exist'.format( args.jsonFilesPerJob)) return config.Data.unitsPerJob = filesPerJob # submit the job submitArgs = ['--config', config] if args.dryrun: submitArgs += ['--dryrun'] try: log.info("Submitting for input dataset {0}".format(sample)) submitMap[sample] = crabClientSubmit.submit(logger, submitArgs)() except HTTPException as hte: log.info("Submission for input dataset {0} failed: {1}".format( sample, hte.headers)) except ClientException as cle: log.info("Submission for input dataset {0} failed: {1}".format( sample, cle))