def createCfg_addBackgrounds_Convs_LeptonFakeRate(self, jobOptions): lines = [] lines.append("process.fwliteInput.fileNames = cms.vstring('%s')" % jobOptions['inputFile']) lines.append("process.fwliteOutput.fileName = cms.string('%s')" % os.path.basename(jobOptions['outputFile'])) lines.append("process.addBackground_LeptonFakeRate.processData = cms.string('%s')" % self.processToSubtractConvsFrom) lines.append("process.addBackground_LeptonFakeRate.processLeptonFakes = cms.string('%s_NC')" % self.processToSubtractConvsFrom) lines.append("process.addBackground_LeptonFakeRate.processesToSubtract = cms.vstring('%sg')" % self.processToSubtractConvsFrom) # lines.append("process.addBackground_LeptonFakeRate.sysShifts = cms.vstring()" % self.central_or_shifts) logging.info("self.cfgFile_addBackgrounds_Convs_LeptonFakeRate => %s" % self.cfgFile_addBackgrounds_Convs_LeptonFakeRate) logging.info("jobOptions['cfgFile_modified'] => %s" % jobOptions['cfgFile_modified']) create_cfg(self.cfgFile_addBackgrounds_Convs_LeptonFakeRate, jobOptions['cfgFile_modified'], lines)
def createMakefile(self, lines_makefile): """Creates Makefile that runs the PU profile production. """ tools_createMakefile(makefileName=self.makefile, targets=self.targets, lines_makefile=lines_makefile, filesToClean=self.filesToClean, isSbatch=self.is_sbatch, phoniesToAdd=self.phoniesToAdd) logging.info("Run it with:\tmake -f %s -j %i " % (self.makefile, self.num_parallel_jobs))
def createMakefile(self, lines_makefile): """Creates Makefile that runs the Ntuple production. """ targets = None if self.is_sbatch: targets = [MAKEFILE_TARGET] else: targets = self.outputFiles.values() tools_createMakefile(self.makefile, targets, lines_makefile, self.filesToClean) logging.info("Run it with:\tmake -f %s -j %i " % (self.makefile, self.num_parallel_jobs))
def create(self): create_if_not_exists(self.hadd_log_dir_path) if self.running_method.lower() == 'sbatch': create_if_not_exists(self.hadd_script_dir_path) createScript_sbatch_hadd( sbatch_script_file_name=self.hadd_script_path, input_file_names=list(self.channel_info.keys()), output_file_name=self.final_output_file, script_file_name=self.hadd_script_path.replace('.py', '.sh'), log_file_name=self. hadd_log_executable_path, # the basename will be replaced anyways? working_dir=None, waitForJobs=True, auxDirName='', pool_id=uuid.uuid4(), verbose=False, max_input_files_per_job=len(self.channel_info), dry_run=self.dry_run, use_home=self.use_home, min_file_size=-1, ) logging.info("Generated hadd config file: %s" % self.hadd_script_path) self.hadd_script_path = 'python %s' % self.hadd_script_path additional_cmds = '' else: self.hadd_script_path = 'hadd -f {} {}'.format( os.path.basename(self.final_output_file), ' '.join(list(self.channel_info.keys()))) additional_cmds = 'mv {} {}'.format( os.path.basename(self.final_output_file), self.final_output_file) with open(self.makefile_path, 'w') as makefile: hadd_script_cmd = '{}{}'.format( 'sleep 60\n\t' if self.running_method.lower() == 'makefile' else '', self.hadd_script_path) makeFileContents = jinja2.Template(makeFileTemplate).render( output_file=self.final_output_file, channel_info=self.channel_info, hadd_script=hadd_script_cmd, hadd_wrapper_log=self.hadd_log_wrapper_path, additional_cmds=additional_cmds, validate_channels=' '.join(self.channels_to_validate), output_dir=self.output_dir, ) makefile.write(makeFileContents) logging.info("Created the makefile: %s" % self.makefile_path)
def get_paths(input_paths, whitelist, blacklist): valid_paths = {} for input_path in input_paths: input_path_split = [ subpath for subpath in input_path.split(os.path.sep) if subpath != '' ] nof_levels = len(input_path_split) if nof_levels == 6: input_path_subdir = os.path.join(input_path, OUTPUT_RLE) if not hdfs.isdir(input_path_subdir): raise ValueError("No such directory: %s" % input_path_subdir) for channel_dir in sorted(hdfs.listdir(input_path_subdir)): channel_name = os.path.basename(channel_dir) if whitelist and channel_name not in whitelist: logging.info("Excluding channel: {}".format(channel_name)) continue if channel_name in blacklist: logging.info("Excluding channel: {}".format(channel_name)) continue if channel_name in valid_paths: raise ValueError( "Found duplicate paths for the same channel: %s and %s" % (valid_paths[channel_name], input_path)) logging.debug('Found channel {} at path {}'.format( channel_name, channel_dir)) valid_paths[channel_name] = channel_dir elif nof_levels == 8: if input_path_split[-2] != OUTPUT_RLE: raise ValueError("Invalid path: %s" % input_path) channel_name = input_path_split[-1] if whitelist and channel_name not in whitelist: raise ValueError("Path %s conflicting with whitelist: %s" % (input_path, ', '.join(whitelist))) if channel_name in blacklist: raise ValueError("Path %s conflicting with blacklist: %s" % (input_path, ', '.join(blacklist))) if channel_name in valid_paths: raise ValueError( "Found duplicate paths for the same channel: %s and %s" % (valid_paths[channel_name], input_path)) logging.debug('Found channel {} at path {}'.format( channel_name, input_path)) valid_paths[channel_name] = input_path else: raise ValueError("Invalid path: %s" % input_path) assert (len(set(valid_paths.values())) == len(valid_paths)) return valid_paths
def validate_channels(rles): validation_set = collections.OrderedDict() for channel in rles: for region in rles[channel]: if 'Tight' not in region: continue for sample_name in rles[channel][region]: if sample_name not in validation_set: validation_set[sample_name] = collections.OrderedDict() for central_or_shift in rles[channel][region][sample_name]: if central_or_shift not in validation_set[sample_name]: validation_set[sample_name][ central_or_shift] = collections.OrderedDict() for rle in rles[channel][region][sample_name][ central_or_shift]: if rle not in validation_set[sample_name][ central_or_shift]: validation_set[sample_name][central_or_shift][ rle] = collections.OrderedDict() validation_set[sample_name][central_or_shift][rle][ channel] = region has_errors = False for sample_name in validation_set: for central_or_shift in validation_set[sample_name]: for rle in validation_set[sample_name][central_or_shift]: if len(validation_set[sample_name][central_or_shift][rle]) > 1: if '2los_1tau' in validation_set[sample_name][central_or_shift][rle] and \ validation_set[sample_name][central_or_shift][rle]['2los_1tau'] == 'Tight' and \ '2lss_1tau' in validation_set[sample_name][central_or_shift][rle] and \ validation_set[sample_name][central_or_shift][rle]['2lss_1tau'] == 'Tight_OS_OS': continue logging.error( "Found the same event {} from sample {} in multiple channels: {}" .format( rle, sample_name, ', '.join([ '%s (region %s, systematics %s)' % (channel, validation_set[sample_name] [central_or_shift][rle][channel], central_or_shift) for channel in validation_set[sample_name] [central_or_shift][rle] ]))) has_errors = True if not has_errors: logging.info( "No overlaps found between the signal regions of channels: {}". format(', '.join(rles.keys()))) return has_errors
def is_file_ok(output_file_name, validate_outputs=True, min_file_size=20000): if not (output_file_name and os.path.exists(output_file_name)): return False logging.info("Output file %s already exists" % output_file_name) if not output_file_name.lower().endswith('.root'): return True command = "rm %s" % output_file_name ret_value = False if min_file_size > 0: output_file_size = os.stat(output_file_name).st_size if output_file_size > min_file_size: if not validate_outputs: ret_value = True else: logging.info( "Deleting output file and resubmitting job because it has size smaller than %d bytes" % min_file_size) if validate_outputs: root_tfile = ROOT.TFile(output_file_name, "read") if not root_tfile: logging.info("Not a valid ROOT file, deleting it") else: if root_tfile.IsZombie(): logging.info( "Output file is corrupted, deleting file and resubmitting job" ) else: # Let's open the file via bash as well to see if ROOT tries to recover the file open_cmd = "root -b -l -q %s 2>&1 > /dev/null | grep 'trying to recover' | wc -l" % output_file_name open_out = run_cmd(open_cmd) if open_out.rstrip('\n') != '0': logging.info( "Output file is probably corrupted, deleting file and resubmitting job" ) else: ret_value = True root_tfile.Close() if not ret_value: run_cmd(command) return ret_value
def __init__(self, fn, sf): self.fn = fn logging.info("Scaling histograms in file {} ({}) by SF {}".format( self.fn, md5(self.fn), sf)) fptr = ROOT.TFile.Open(self.fn, 'read') keys = [key.GetName() for key in fptr.GetListOfKeys()] self.histograms = {} for key in keys: histogram = fptr.Get(key).Clone() histogram.SetDirectory(0) assert (type(histogram) == ROOT.TH2D) assert (key not in self.histograms) histogram.Scale(sf) logging.info("Found histogram {} in file {}".format(key, self.fn)) self.histograms[key] = histogram fptr.Close()
def run(self, clean): record_software_state(self.sw_ver_file_cfg, self.sw_ver_file_out, DEPENDENCIES) target = 'all' if clean: if not os.path.isfile(self.makefile_path): logging.error( "The makefile %s is missing and therefore it's not possible to clean anything; " "run sync Ntuple production first!" % self.makefile_path ) sys.exit(1) target = 'clean' nof_parallel_jobs = len(self.channel_info) make_cmd = "make -f %s -j %d %s 2>%s 1>%s" % \ (self.makefile_path, nof_parallel_jobs, target, self.stderr_file_path, self.stdout_file_path) logging.info("Running the make command: %s" % make_cmd) run_cmd(make_cmd) logging.info("All done")
def copy_dirs(fn, fo): root_keys = get_evt_subdir_names(fn) for root_key in root_keys: fptr = ROOT.TFile.Open(fn, 'read') logging.info('Opened file {} to copy {}'.format( fptr.GetName(), root_key)) for evt_subdir_name in root_keys[root_key]: evt_dir_key = os.path.basename(evt_subdir_name) evt_subdir = fptr.Get(evt_subdir_name) evt_subdir.ReadAll() out_key = os.path.join(root_key, evt_dir_key) fo.mkdir(out_key) fo.cd(out_key) evt_subdir.GetList().Write() logging.info('Closing file {} after copying {}'.format( fptr.GetName(), root_key)) fptr.Close()
def createCfg_addBackgrounds_LeptonFakeRate(self, jobOptions): """Create python configuration file for the addBackgrounds executable (sum either all "fake" or all "non-fake" contributions) Args: inputFiles: input file (the ROOT file produced by hadd_stage1) outputFile: output file of the job """ lines = [] lines.append("process.fwliteInput.fileNames = cms.vstring('%s')" % jobOptions['inputFile']) lines.append("process.fwliteOutput.fileName = cms.string('%s')" % os.path.basename(jobOptions['outputFile'])) # if self.use_QCD_fromMC: # lines.append("process.addBackground_LeptonFakeRate.processData = cms.string('%s')" % "QCD") # lines.append("process.addBackground_LeptonFakeRate.processLeptonFakes = cms.string('%s')" % "QCD") # lines.append("process.addBackground_LeptonFakeRate.processesToSubtract = cms.vstring()") # lines.append("process.addBackgrounds.categories = cms.vstring(%s)" % jobOptions['categories']) # lines.append("process.addBackgrounds.processes_input = cms.vstring(%s)" % jobOptions['processes_input']) # lines.append("process.addBackgrounds.process_output = cms.string('%s')" % jobOptions['process_output']) logging.info("self.cfgFile_addBackgrounds_LeptonFakeRate => %s" % self.cfgFile_addBackgrounds_LeptonFakeRate) logging.info("jobOptions['cfgFile_modified'] => %s" % jobOptions['cfgFile_modified']) create_cfg(self.cfgFile_addBackgrounds_LeptonFakeRate, jobOptions['cfgFile_modified'], lines)
def filter_samples(sample, condition, force = False): key = condition[0] regex = condition[1] sample_key = ALLOWED_CONDITION_KEYS[key] for sample_name, sample_entry in sample.items(): if sample_name == 'sum_events': continue if sample_key == 'path': use_it = bool(regex.match(sample_entry['local_paths'][0]['path'])) else: use_it = bool(regex.match(sample_entry[sample_key])) if force: sample_entry['use_it'] = use_it else: sample_entry['use_it'] &= use_it logging_str = 'Enabling' if sample_entry['use_it'] else 'Disabling' logging.info('%s sample %s' % (logging_str, sample_entry[ALLOWED_CONDITION_KEYS['name']])) return sample
def validate_regions(rles): has_errors = False for channel in rles: validation_set = collections.OrderedDict() for region in rles[channel]: if 'Fakeable_mcClosure' in region: continue for sample_name in rles[channel][region]: if sample_name not in validation_set: validation_set[sample_name] = collections.OrderedDict() for central_or_shift in rles[channel][region][sample_name]: if central_or_shift not in validation_set[sample_name]: validation_set[sample_name][ central_or_shift] = collections.OrderedDict() for rle in rles[channel][region][sample_name][ central_or_shift]: if rle not in validation_set[sample_name][ central_or_shift]: validation_set[sample_name][central_or_shift][ rle] = [] if region not in validation_set[sample_name][ central_or_shift][rle]: validation_set[sample_name][central_or_shift][ rle].append(region) for sample_name in validation_set: has_errors_sample = False for central_or_shift in validation_set[sample_name]: for rle in validation_set[sample_name][central_or_shift]: if len(validation_set[sample_name][central_or_shift] [rle]) > 1: logging.error( "Found duplicates in channel {} and sample {} for event {}: regions {}" .format( channel, sample_name, rle, ', '.join(validation_set[sample_name] [central_or_shift][rle]))) has_errors_sample = True if not has_errors_sample: logging.info( 'No overlapping events found between regions for sample {} in channel {}' .format(sample_name, channel)) has_errors = has_errors or has_errors_sample return has_errors
def record_weights(file_name): fptr = ROOT.TFile.Open(file_name, 'read') tree = fptr.Get('Events') genWeight = array.array('f', [0.]) tree.SetBranchAddress(GENWEIGHT_NAME, genWeight) tree.SetBranchStatus("*", 0) tree.SetBranchStatus(GENWEIGHT_NAME, 1) nof_events = tree.GetEntries() logging.info("Processing {} events from file {}".format( nof_events, file_name)) for event_idx in range(nof_events): tree.GetEntry(event_idx) genWeight_val = genWeight[0] if genWeight_val not in weights_map: weights_map[genWeight_val] = 0 weights_map[genWeight_val] += 1 fptr.Close()
def run(self, clean): record_software_state(self.sw_ver_file_cfg, self.sw_ver_file_out, DEPENDENCIES) target = 'all' if clean: if not os.path.isfile(self.makefile_path): logging.error( "The makefile %s is missing and therefore it's not possible to clean anything; " "run sync Ntuple production first!" % self.makefile_path) sys.exit(1) target = 'clean' nof_parallel_jobs = len(self.channel_info) make_cmd = "make -f %s -j %d %s 2>%s 1>%s" % \ (self.makefile_path, nof_parallel_jobs, target, self.stderr_file_path, self.stdout_file_path) if self.running_method.lower() == "makefile": run_dir = re.sub('^/home', '/scratch', self.config_dir) create_if_not_exists(run_dir) make_cmd = re.sub('^make', 'make -C {}'.format(run_dir), make_cmd) logging.info("Running the make command: %s" % make_cmd) run_cmd(make_cmd) logging.info("All done")
def validate_data(rles): has_errors = False for channel in rles: validation_set = collections.OrderedDict() has_errors_channel = False for region in rles[channel]: validation_set[region] = collections.OrderedDict() for sample_name in rles[channel][region]: if 'Run201' not in sample_name: continue for central_or_shift in rles[channel][region][sample_name]: for rle in rles[channel][region][sample_name][ central_or_shift]: if rle not in validation_set[region]: validation_set[region][ rle] = collections.OrderedDict() if sample_name in validation_set[region][rle]: validation_set[region][rle][sample_name].append( central_or_shift) else: if validation_set[region][rle]: logging.error( "Found duplicates in channel {} and region {} for event {}: samples {} and {}" .format( channel, region, rle, sample_name, ', '.join(validation_set[region] [rle].keys()))) has_errors_channel = True validation_set[region][rle][sample_name] = [ central_or_shift ] if not has_errors_channel: logging.info( 'No overlapping data events found in channel {} and region {}' .format(channel, region)) has_errors = has_errors or has_errors_channel return has_errors
def validate_pu(output_file, samples): error_code = 0 if not os.path.isfile(output_file): logging.error('File {} does not exist'.format(output_file)) return 1 histogram_file = ROOT.TFile.Open(output_file, 'read') if not histogram_file: logging.error('Not a valid ROOT file: {}'.format(output_file)) return 2 for sample_name, sample_info in samples.items(): is_mc = (sample_info["type"] == "mc") if not is_mc: continue process_name = sample_info["process_name_specific"] expected_nof_events = sample_info["nof_tree_events"] logging.info('Validating {} (expecting {} events)'.format( process_name, expected_nof_events)) histogram = histogram_file.Get(process_name) if not histogram: logging.error("Could not find histogram '{}' in file {}".format( process_name, output_file)) error_code = 3 continue nof_events = int(histogram.GetEntries()) if nof_events != expected_nof_events: logging.error( 'Histogram {} in file {} has {} events, but expected {} events' .format( process_name, output_file, nof_events, expected_nof_events, )) error_code = 4 else: logging.info( 'Validation successful for sample {}'.format(process_name)) histogram_file.Close() if error_code == 0: logging.info("Validation successful!") else: logging.error("Validation failed!") return error_code
def __init__( self, configDir, outputDir, cfgFile_prodNtuple, samples, max_files_per_job, era, preselection_cuts, leptonSelection, hadTauWP, check_output_files, running_method, version, num_parallel_jobs, pileup, golden_json, dry_run, isDebug, gen_matching_by_index, use_nonnominal, use_home, skip_tools_step, verbose=False, pool_id='', ): self.configDir = configDir self.outputDir = outputDir self.max_num_jobs = 200000 self.samples = samples self.max_files_per_job = max_files_per_job self.era = era self.preselection_cuts = preselection_cuts self.leptonSelection = leptonSelection self.hadTauWP = hadTauWP self.check_output_files = check_output_files self.verbose = verbose self.dry_run = dry_run self.isDebug = isDebug self.gen_matching_by_index = gen_matching_by_index self.use_nonnominal = use_nonnominal self.use_home = use_home self.pileup = pileup self.golden_json = golden_json if running_method.lower() not in ["sbatch", "makefile"]: raise ValueError("Invalid running method: %s" % running_method) if not os.path.isfile(self.pileup): raise ValueError('No such file: %s' % self.pileup) self.pileup_histograms = get_pileup_histograms(self.pileup) if not os.path.isfile(self.golden_json): raise ValueError('No such file: %s' % self.golden_json) self.running_method = running_method self.is_sbatch = self.running_method.lower() == "sbatch" self.is_makefile = not self.is_sbatch self.makefile = os.path.join(self.configDir, "Makefile_prodNtuple") self.num_parallel_jobs = num_parallel_jobs self.skip_tools_step = skip_tools_step self.pool_id = pool_id if pool_id else uuid.uuid4() self.workingDir = os.getcwd() logging.info("Working directory is: %s" % self.workingDir) self.template_dir = os.path.join(os.getenv('CMSSW_BASE'), 'src', 'tthAnalysis', 'HiggsToTauTau', 'test', 'templates') logging.info("Templates directory is: %s" % self.template_dir) self.version = version self.samples = samples create_if_not_exists(self.configDir) create_if_not_exists(self.outputDir) self.stdout_file_path = os.path.join(self.configDir, "stdout_prodNtuple.log") self.stderr_file_path = os.path.join(self.configDir, "stderr_prodNtuple.log") self.sw_ver_file_cfg = os.path.join(self.configDir, "VERSION_prodNtuple.log") self.sw_ver_file_out = os.path.join(self.outputDir, "VERSION_prodNtuple.log") self.stdout_file_path, self.stderr_file_path, self.sw_ver_file_cfg, self.sw_ver_file_out = get_log_version( (self.stdout_file_path, self.stderr_file_path, self.sw_ver_file_cfg, self.sw_ver_file_out)) self.cfgFile_prodNtuple_original = os.path.join( self.template_dir, cfgFile_prodNtuple) self.sbatchFile_prodNtuple = os.path.join(self.configDir, "sbatch_prodNtuple.py") self.cfgFiles_prodNtuple_modified = {} self.logFiles_prodNtuple = {} self.inputFiles = {} self.outputFiles = {} self.filesToClean = [] self.dirs = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] key_dir = getKey(sample_name) for dir_type in [DKEY_CFGS, DKEY_NTUPLES, DKEY_LOGS]: initDict(self.dirs, [key_dir, dir_type]) if dir_type in [DKEY_CFGS, DKEY_LOGS]: self.dirs[key_dir][dir_type] = os.path.join( self.configDir, dir_type, process_name) else: self.dirs[key_dir][dir_type] = os.path.join( self.outputDir, dir_type, process_name) for dir_type in [DKEY_CFGS, DKEY_LOGS]: initDict(self.dirs, [dir_type]) if dir_type in [DKEY_CFGS, DKEY_LOGS]: self.dirs[dir_type] = os.path.join(self.configDir, dir_type) else: self.dirs[dir_type] = os.path.join(self.outputDir, dir_type) self.cvmfs_error_log = {} self.executable = "produceNtuple.sh"
def create(self): """Creates all necessary config files and runs the Ntuple production -- either locally or on the batch system """ for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) else: create_if_not_exists(self.dirs[key]) self.inputFileIds = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] is_mc = (sample_info["type"] == "mc") if is_mc and process_name not in self.pileup_histograms: raise ValueError("Missing PU distribution for %s in file %s" % (process_name, self.pileup)) logging.info( "Creating configuration files to run '%s' for sample %s" % (self.executable, process_name)) inputFileList = generateInputFileList(sample_info, self.max_files_per_job) key_dir = getKey(sample_name) subDirs = list( map( lambda y: os.path.join(self.dirs[key_dir][DKEY_NTUPLES], '%04d' % y), set(map(lambda x: x // 1000, inputFileList.keys())))) for subDir in subDirs: create_if_not_exists(subDir) for jobId in inputFileList.keys(): key_file = getKey(sample_name, jobId) self.inputFiles[key_file] = inputFileList[jobId] if len(self.inputFiles[key_file]) == 0: logging.warning( "ntupleFiles['%s'] = %s --> skipping job !!" % (key_file, self.inputFiles[key_file])) continue self.cfgFiles_prodNtuple_modified[key_file] = os.path.join( self.dirs[key_dir][DKEY_CFGS], "produceNtuple_%s_%i_cfg.py" % (process_name, jobId)) self.outputFiles[key_file] = os.path.join( self.dirs[key_dir][DKEY_NTUPLES], "%04d" % (jobId // 1000), "tree_%i.root" % jobId) self.logFiles_prodNtuple[key_file] = os.path.join( self.dirs[key_dir][DKEY_LOGS], "produceNtuple_%s_%i.log" % (process_name, jobId)) hlt_paths = sample_info["hlt_paths"] if not is_mc else [] hlt_cuts = list( Triggers(self.era).triggers_flat ) if self.preselection_cuts["applyHLTcut"] else [] jobOptions = { 'inputFiles': self.inputFiles[key_file], 'cfgFile_modified': self.cfgFiles_prodNtuple_modified[key_file], 'outputFile': self.outputFiles[key_file], 'is_mc': is_mc, 'random_seed': jobId, 'process_name': process_name, 'category_name': sample_info["sample_category"], 'triggers': hlt_paths, 'HLTcuts': hlt_cuts, } self.createCfg_prodNtuple(jobOptions) num_jobs = 0 if self.is_sbatch: logging.info( "Creating script for submitting '%s' jobs to batch system" % self.executable) num_jobs = self.createScript_sbatch() logging.info("Generated %i job(s)" % num_jobs) logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_prodNtuple(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done") return num_jobs
sample_suffix = "sync" if use_nonnominal else "sync_nom" if use_preselected: sample_suffix = "preselected_{}".format(sample_suffix) samples = load_samples(era, suffix=sample_suffix) leptonSelection = "Fakeable" hadTauWP_map = { 'dR03mva': 'Loose', 'deepVSj': 'VLoose', } hadTauWP = tau_id + hadTauWP_map[tau_id] else: raise ValueError("Invalid mode: %s" % mode) if __name__ == '__main__': logging.info( "Running the jobs with the following systematic uncertainties enabled: %s" % \ ', '.join(central_or_shifts) ) if sample_filter: samples = filter_samples(samples, sample_filter) if args.tau_id_wp: logging.info("Changing tau ID WP: %s -> %s" % (hadTauWP, args.tau_id_wp)) hadTauWP = args.tau_id_wp hadTauSelectionAndWP = '%s|%s' % (hadTauSelection, hadTauWP) addMEMProduction = addMEMConfig_3l_1tau( treeName='Events', outputDir=os.path.join("/hdfs/local", getpass.getuser(), "addMEM", era, version),
if args.tau_id_wp: tau_id = args.tau_id[:7] hadTau_selection_relaxed = tau_id + hadTauWP_map_relaxed[tau_id] else: raise ValueError("Invalid mode: %s" % mode) for sample_name, sample_info in samples.items(): if sample_name == 'sum_events': continue if sample_name.startswith(("/DoubleEG/", "/DoubleMuon/", "/MuonEG/")): sample_info["use_it"] = False elif sample_name.startswith("/Tau/"): sample_info["use_it"] = True if __name__ == '__main__': logging.info( "Running the jobs with the following systematic uncertainties enabled: %s" % \ ', '.join(central_or_shifts) ) if not use_preselected: logging.warning('Running the analysis on fully inclusive samples!') if sample_filter: samples = filter_samples(samples, sample_filter) if args.tau_id_wp: logging.info("Changing tau ID working point: %s -> %s" % (hadTau_selection, args.tau_id_wp)) hadTau_selection = args.tau_id_wp analysis = analyzeConfig_hh_1l_3tau( configDir=os.path.join("/scratch-persistent", getpass.getuser(), "hhAnalysis", era, version),
def create(self): """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system """ for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue process_name = sample_info["process_name_specific"] for lepton_selection in self.lepton_selections: for lepton_frWeight in self.lepton_frWeights: if lepton_frWeight == "enabled" and not lepton_selection.startswith("Fakeable"): continue lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_selection, lepton_frWeight) central_or_shifts_extended = [ "" ] central_or_shifts_extended.extend(self.central_or_shifts) central_or_shifts_extended.extend([ "hadd", "addBackgrounds" ]) for central_or_shift_or_dummy in central_or_shifts_extended: process_name_extended = [ process_name, "hadd" ] for process_name_or_dummy in process_name_extended: key_dir = getKey(process_name_or_dummy, lepton_selection_and_frWeight, central_or_shift_or_dummy) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_ROOT, DKEY_RLES, DKEY_SYNC ]: initDict(self.dirs, [ key_dir, dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_LOGS ]: self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, "_".join([ lepton_selection_and_frWeight ]), process_name_or_dummy, central_or_shift_or_dummy) else: self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, "_".join([ lepton_selection_and_frWeight ]), process_name_or_dummy, central_or_shift_or_dummy) for subdirectory in [ "addBackgrounds", "addBackgroundLeptonFakes", "prepareDatacards", "addSystFakeRates", "makePlots" ]: key_dir = getKey(subdirectory) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_ROOT, DKEY_DCRD, DKEY_PLOT ]: initDict(self.dirs, [ key_dir, dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_LOGS ]: self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, subdirectory) else: self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, subdirectory) for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT, DKEY_SYNC ]: initDict(self.dirs, [ dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]: self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel) else: self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel) numDirectories = 0 for key in self.dirs.keys(): if type(self.dirs[key]) == dict: numDirectories += len(self.dirs[key]) else: numDirectories += 1 logging.info("Creating directory structure (numDirectories = %i)" % numDirectories) numDirectories_created = 0; frac = 1 for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) numDirectories_created += len(self.dirs[key]) else: create_if_not_exists(self.dirs[key]) numDirectories_created = numDirectories_created + 1 while 100*numDirectories_created >= frac*numDirectories: logging.info(" %i%% completed" % frac) frac = frac + 1 logging.info("Done.") inputFileLists = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue logging.info("Checking input files for sample %s" % sample_info["process_name_specific"]) inputFileLists[sample_name] = generateInputFileList(sample_info, self.max_files_per_job) mcClosure_regex = re.compile('Fakeable_mcClosure_(?P<type>m|e)_wFakeRateWeights') for lepton_selection in self.lepton_selections: electron_selection = lepton_selection muon_selection = lepton_selection hadTauVeto_selection = "Tight" hadTauVeto_selection = "|".join([ hadTauVeto_selection, self.hadTauVeto_selection_part2 ]) if lepton_selection == "Fakeable_mcClosure_e": electron_selection = "Fakeable" muon_selection = "Tight" elif lepton_selection == "Fakeable_mcClosure_m": electron_selection = "Tight" muon_selection = "Fakeable" for lepton_frWeight in self.lepton_frWeights: if lepton_frWeight == "enabled" and not lepton_selection.startswith("Fakeable"): continue if lepton_frWeight == "disabled" and not lepton_selection in [ "Tight" ]: continue lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_selection, lepton_frWeight) for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue process_name = sample_info["process_name_specific"] logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name)) sample_category = sample_info["sample_category"] is_mc = (sample_info["type"] == "mc") is_signal = (sample_category == "signal") for central_or_shift in self.central_or_shifts: inputFileList = inputFileLists[sample_name] for jobId in inputFileList.keys(): if central_or_shift != "central": isFR_shape_shift = (central_or_shift in systematics.FR_all) if not ((lepton_selection == "Fakeable" and isFR_shape_shift) or lepton_selection == "Tight"): continue if not is_mc and not isFR_shape_shift: continue if central_or_shift in systematics.LHE().ttH and sample_category != "signal": continue if central_or_shift in systematics.LHE().ttW and sample_category != "TTW": continue if central_or_shift in systematics.LHE().ttZ and sample_category != "TTZ": continue if central_or_shift in systematics.DYMCReweighting and not is_dymc_reweighting(sample_name): continue logging.info(" ... for '%s' and systematic uncertainty option '%s'" % (lepton_selection_and_frWeight, central_or_shift)) # build config files for executing analysis code key_analyze_dir = getKey(process_name, lepton_selection_and_frWeight, central_or_shift) analyze_job_tuple = (process_name, lepton_selection_and_frWeight, central_or_shift, jobId) key_analyze_job = getKey(*analyze_job_tuple) ntupleFiles = inputFileList[jobId] if len(ntupleFiles) == 0: logging.warning("No input ntuples for %s --> skipping job !!" % (key_analyze_job)) continue syncOutput = '' syncTree = '' syncRequireGenMatching = True if self.do_sync: mcClosure_match = mcClosure_regex.match(lepton_selection_and_frWeight) if lepton_selection_and_frWeight == 'Tight': syncOutput = os.path.join(self.dirs[key_analyze_dir][DKEY_SYNC], '%s_%s_SR.root' % (self.channel, central_or_shift)) syncTree = 'syncTree_%s_SR' % self.channel syncRequireGenMatching = True elif lepton_selection_and_frWeight == 'Fakeable_wFakeRateWeights': syncOutput = os.path.join(self.dirs[key_analyze_dir][DKEY_SYNC], '%s_%s_Fake.root' % (self.channel, central_or_shift)) syncTree = 'syncTree_%s_Fake' % self.channel elif mcClosure_match: mcClosure_type = mcClosure_match.group('type') syncOutput = os.path.join(self.dirs[key_analyze_dir][DKEY_SYNC], '%s_%s_mcClosure_%s.root' % (self.channel, central_or_shift, mcClosure_type)) syncTree = 'syncTree_%s_mcClosure_%s' % (self.channel, mcClosure_type) else: continue if syncTree and central_or_shift != "central": syncTree = os.path.join(central_or_shift, syncTree) syncRLE = '' if self.do_sync and self.rle_select: syncRLE = self.rle_select % syncTree if not os.path.isfile(syncRLE): logging.warning("Input RLE file for the sync is missing: %s; skipping the job" % syncRLE) continue if syncOutput: self.inputFiles_sync['sync'].append(syncOutput) cfgFile_modified_path = os.path.join(self.dirs[key_analyze_dir][DKEY_CFGS], "analyze_%s_%s_%s_%i_cfg.py" % analyze_job_tuple) logFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_LOGS], "analyze_%s_%s_%s_%i.log" % analyze_job_tuple) rleOutputFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_RLES], "rle_%s_%s_%s_%i.txt" % analyze_job_tuple) \ if self.select_rle_output else "" histogramFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_HIST], "analyze_%s_%s_%s_%i.root" % analyze_job_tuple) self.jobOptions_analyze[key_analyze_job] = { 'ntupleFiles' : ntupleFiles, 'cfgFile_modified' : cfgFile_modified_path, 'histogramFile' : histogramFile_path, 'logFile' : logFile_path, 'selEventsFileName_output' : rleOutputFile_path, 'electronSelection' : electron_selection, 'muonSelection' : muon_selection, 'apply_leptonGenMatching' : self.apply_leptonGenMatching, 'hadTauSelection_veto' : hadTauVeto_selection, 'applyFakeRateWeights' : self.applyFakeRateWeights if not lepton_selection == "Tight" else "disabled", 'central_or_shift' : central_or_shift, 'syncOutput' : syncOutput, 'syncTree' : syncTree, 'syncRLE' : syncRLE, 'syncRequireGenMatching' : syncRequireGenMatching, 'useNonNominal' : self.use_nonnominal, 'apply_hlt_filter' : self.hlt_filter, } self.createCfg_analyze(self.jobOptions_analyze[key_analyze_job], sample_info, lepton_selection) # initialize input and output file names for hadd_stage1 key_hadd_stage1_dir = getKey(process_name, lepton_selection_and_frWeight) hadd_stage1_job_tuple = (process_name, lepton_selection_and_frWeight) key_hadd_stage1_job = getKey(*hadd_stage1_job_tuple) if not key_hadd_stage1_job in self.inputFiles_hadd_stage1: self.inputFiles_hadd_stage1[key_hadd_stage1_job] = [] self.inputFiles_hadd_stage1[key_hadd_stage1_job].append(self.jobOptions_analyze[key_analyze_job]['histogramFile']) self.outputFile_hadd_stage1[key_hadd_stage1_job] = os.path.join(self.dirs[key_hadd_stage1_dir][DKEY_HIST], "hadd_stage1_%s_%s.root" % hadd_stage1_job_tuple) if self.do_sync: continue if is_mc: logging.info("Creating configuration files to run 'addBackgrounds' for sample %s" % process_name) sample_categories = [ sample_category ] if is_signal: sample_categories = [ "signal", "ttH", "ttH_htt", "ttH_hww", "ttH_hzz", "ttH_hmm", "ttH_hzg" ] for sample_category in sample_categories: # sum non-fake and fake contributions for each MC sample separately genMatch_categories = [ "nonfake", "conversions", "fake" ] for genMatch_category in genMatch_categories: key_hadd_stage1_job = getKey(process_name, lepton_selection_and_frWeight) key_addBackgrounds_dir = getKey(process_name, lepton_selection_and_frWeight, "addBackgrounds") addBackgrounds_job_tuple = None processes_input = None process_output = None if genMatch_category == "nonfake": # sum non-fake contributions for each MC sample separately # input processes: TT3l0g0j,... # output processes: TT; ... if sample_category in [ "signal" ]: lepton_genMatches = [] lepton_genMatches.extend(self.lepton_genMatches_nonfakes) lepton_genMatches.extend(self.lepton_genMatches_conversions) lepton_genMatches.extend(self.lepton_genMatches_fakes) processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in lepton_genMatches ] elif sample_category in [ "ttH" ]: lepton_genMatches = [] lepton_genMatches.extend(self.lepton_genMatches_nonfakes) lepton_genMatches.extend(self.lepton_genMatches_conversions) processes_input = [] processes_input.extend([ "%s%s" % ("ttH_htt", genMatch) for genMatch in lepton_genMatches ]) processes_input.extend([ "%s%s" % ("ttH_hww", genMatch) for genMatch in lepton_genMatches ]) processes_input.extend([ "%s%s" % ("ttH_hzz", genMatch) for genMatch in lepton_genMatches ]) processes_input.extend([ "%s%s" % ("ttH_hzg", genMatch) for genMatch in lepton_genMatches ]) processes_input.extend([ "%s%s" % ("ttH_hmm", genMatch) for genMatch in lepton_genMatches ]) else: processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_genMatches_nonfakes ] process_output = sample_category addBackgrounds_job_tuple = (process_name, sample_category, lepton_selection_and_frWeight) elif genMatch_category == "conversions": # sum fake contributions for each MC sample separately # input processes: TT2l1g0j, TT1l2g0j, TT0l3g0j; ... # output processes: TT_conversion; ... if sample_category in [ "signal" ]: processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_genMatches_conversions ] elif sample_category in [ "ttH" ]: processes_input = [] processes_input.extend([ "%s%s" % ("ttH_htt", genMatch) for genMatch in self.lepton_genMatches_conversions ]) processes_input.extend([ "%s%s" % ("ttH_hww", genMatch) for genMatch in self.lepton_genMatches_conversions ]) processes_input.extend([ "%s%s" % ("ttH_hzz", genMatch) for genMatch in self.lepton_genMatches_conversions ]) processes_input.extend([ "%s%s" % ("ttH_hzg", genMatch) for genMatch in self.lepton_genMatches_conversions ]) processes_input.extend([ "%s%s" % ("ttH_hmm", genMatch) for genMatch in self.lepton_genMatches_conversions ]) else: processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_genMatches_conversions ] process_output = "%s_conversion" % sample_category addBackgrounds_job_tuple = (process_name, "%s_conversion" % sample_category, lepton_selection_and_frWeight) elif genMatch_category == "fake": # sum fake contributions for each MC sample separately # input processes: TT2l0g1j, TT1l1g1j, TT1l0g2j, TT0l2g1j, TT0l1g2j, TT0l0g3j; ... # output processes: TT_fake; ... if sample_category in [ "signal" ]: processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_genMatches_fakes ] elif sample_category in [ "ttH" ]: processes_input = [] processes_input.extend([ "%s%s" % ("ttH_htt", genMatch) for genMatch in self.lepton_genMatches_fakes ]) processes_input.extend([ "%s%s" % ("ttH_hww", genMatch) for genMatch in self.lepton_genMatches_fakes ]) processes_input.extend([ "%s%s" % ("ttH_hzz", genMatch) for genMatch in self.lepton_genMatches_fakes ]) processes_input.extend([ "%s%s" % ("ttH_hzg", genMatch) for genMatch in self.lepton_genMatches_fakes ]) processes_input.extend([ "%s%s" % ("ttH_hmm", genMatch) for genMatch in self.lepton_genMatches_fakes ]) else: processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_genMatches_fakes ] process_output = "%s_fake" % sample_category addBackgrounds_job_tuple = (process_name, "%s_fake" % sample_category, lepton_selection_and_frWeight) if processes_input: logging.info(" ...for genMatch option = '%s'" % genMatch_category) key_addBackgrounds_job = getKey(*addBackgrounds_job_tuple) cfgFile_modified = os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_tuple) outputFile = os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_tuple) self.jobOptions_addBackgrounds[key_addBackgrounds_job] = { 'inputFile' : self.outputFile_hadd_stage1[key_hadd_stage1_job], 'cfgFile_modified' : cfgFile_modified, 'outputFile' : outputFile, 'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], os.path.basename(cfgFile_modified).replace("_cfg.py", ".log")), 'categories' : [ getHistogramDir(lepton_selection, lepton_frWeight) ], 'processes_input' : processes_input, 'process_output' : process_output } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds[key_addBackgrounds_job]) # initialize input and output file names for hadd_stage1_5 key_hadd_stage1_5_dir = getKey("hadd", lepton_selection_and_frWeight) key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight) if not key_hadd_stage1_5_job in self.inputFiles_hadd_stage1_5: self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job] = [] self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job].append(self.jobOptions_addBackgrounds[key_addBackgrounds_job]['outputFile']) self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job] = os.path.join(self.dirs[key_hadd_stage1_5_dir][DKEY_HIST], "hadd_stage1_5_%s.root" % lepton_selection_and_frWeight) # add output files of hadd_stage1 for data to list of input files for hadd_stage1_5 if not is_mc: key_hadd_stage1_job = getKey(process_name, lepton_selection_and_frWeight) key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight) if not key_hadd_stage1_5_job in self.inputFiles_hadd_stage1_5: self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job] = [] self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job].append(self.outputFile_hadd_stage1[key_hadd_stage1_job]) if self.do_sync: continue # sum fake background contributions for the total of all MC sample # input processes: TT2l0g1j, TT1l1g1j, TT1l0g2j, TT0l3j, TT0l3j, TT0l3j, TT0l3j; ... # output process: fakes_mc key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight) key_addBackgrounds_dir = getKey("addBackgrounds") addBackgrounds_job_fakes_tuple = ("fakes_mc", lepton_selection_and_frWeight) key_addBackgrounds_job_fakes = getKey(*addBackgrounds_job_fakes_tuple) sample_categories = [] sample_categories.extend(self.nonfake_backgrounds) sample_categories.extend([ "signal" ]) processes_input = [] for sample_category in sample_categories: processes_input.append("%s_fake" % sample_category) self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_cfg.py" % addBackgrounds_job_fakes_tuple), 'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s.root" % addBackgrounds_job_fakes_tuple), 'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s.log" % addBackgrounds_job_fakes_tuple), 'categories' : [ getHistogramDir(lepton_selection, lepton_frWeight) ], 'processes_input' : processes_input, 'process_output' : "fakes_mc" } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]) # sum conversion background contributions for the total of all MC sample # input processes: TT2l0g1j, TT1l1g1j, TT1l0g2j, TT0l3j, TT0l3j, TT0l3j, TT0l3j; ... # output process: conversions addBackgrounds_job_conversions_tuple = ("conversions", lepton_selection_and_frWeight) key_addBackgrounds_job_conversions = getKey(*addBackgrounds_job_conversions_tuple) sample_categories = [] sample_categories.extend(self.nonfake_backgrounds) sample_categories.extend([ "signal" ]) processes_input = [] for sample_category in sample_categories: processes_input.append("%s_conversion" % sample_category) self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_conversions] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_cfg.py" % addBackgrounds_job_conversions_tuple), 'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s.root" % addBackgrounds_job_conversions_tuple), 'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s.log" % addBackgrounds_job_conversions_tuple), 'categories' : [ getHistogramDir(lepton_selection, lepton_frWeight) ], 'processes_input' : processes_input, 'process_output' : "conversions" } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_conversions]) # initialize input and output file names for hadd_stage2 key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight) key_hadd_stage2_dir = getKey("hadd", lepton_selection_and_frWeight) key_hadd_stage2_job = getKey(lepton_selection_and_frWeight) if not key_hadd_stage2_job in self.inputFiles_hadd_stage2: self.inputFiles_hadd_stage2[key_hadd_stage2_job] = [] if lepton_selection == "Tight": self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile']) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_conversions]['outputFile']) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job]) self.outputFile_hadd_stage2[key_hadd_stage2_job] = os.path.join(self.dirs[key_hadd_stage2_dir][DKEY_HIST], "hadd_stage2_%s.root" % lepton_selection_and_frWeight) if self.do_sync: if self.is_sbatch: logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) self.createScript_sbatch_syncNtuple(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_syncNtuple(lines_makefile) outputFile_sync_path = os.path.join(self.outputDir, DKEY_SYNC, '%s.root' % self.channel) self.outputFile_sync['sync'] = outputFile_sync_path self.targets.append(outputFile_sync_path) self.addToMakefile_hadd_sync(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done.") return self.num_jobs logging.info("Creating configuration files to run 'addBackgroundFakes'") key_hadd_stage1_5_job = getKey(get_lepton_selection_and_frWeight("Fakeable", "enabled")) key_addFakes_job = getKey("fakes_data") category_sideband = "ttZctrl_Fakeable_wFakeRateWeights" self.jobOptions_addFakes[key_addFakes_job] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[DKEY_CFGS], "addBackgroundLeptonFakes_cfg.py"), 'outputFile' : os.path.join(self.dirs[DKEY_HIST], "addBackgroundLeptonFakes.root"), 'logFile' : os.path.join(self.dirs[DKEY_LOGS], "addBackgroundLeptonFakes.log"), 'category_signal' : "ttZctrl_Tight", 'category_sideband' : category_sideband } self.createCfg_addFakes(self.jobOptions_addFakes[key_addFakes_job]) key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled")) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addFakes[key_addFakes_job]['outputFile']) logging.info("Creating configuration files to run 'prepareDatacards'") for histogramToFit in self.histograms_to_fit: key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled")) key_prep_dcard_dir = getKey("prepareDatacards") prep_dcard_job_tuple = (self.channel, histogramToFit) key_prep_dcard_job = getKey(histogramToFit) self.jobOptions_prep_dcard[key_prep_dcard_job] = { 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_cfg.py" % prep_dcard_job_tuple), 'datacardFile' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s.root" % prep_dcard_job_tuple), 'histogramDir' : self.histogramDir_prep_dcard, 'histogramToFit' : histogramToFit, 'label' : None } self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job]) # add shape templates for the following systematic uncertainties: # - 'CMS_ttHl_Clos_norm_e' # - 'CMS_ttHl_Clos_shape_e' # - 'CMS_ttHl_Clos_norm_m' # - 'CMS_ttHl_Clos_shape_m' key_prep_dcard_job = getKey(histogramToFit) key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled")) key_add_syst_fakerate_dir = getKey("addSystFakeRates") add_syst_fakerate_job_tuple = (self.channel, histogramToFit) key_add_syst_fakerate_job = getKey(histogramToFit) self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job] = { 'inputFile' : self.jobOptions_prep_dcard[key_prep_dcard_job]['datacardFile'], 'cfgFile_modified' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_CFGS], "addSystFakeRates_%s_%s_cfg.py" % add_syst_fakerate_job_tuple), 'outputFile' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_DCRD], "addSystFakeRates_%s_%s.root" % add_syst_fakerate_job_tuple), 'category' : self.channel, 'histogramToFit' : histogramToFit, 'plots_outputFileName' : os.path.join(self.dirs[DKEY_PLOT], "addSystFakeRates.png") } histogramDir_nominal = self.histogramDir_prep_dcard for lepton_type in [ 'e', 'm' ]: lepton_mcClosure = "Fakeable_mcClosure_%s" % lepton_type if lepton_mcClosure not in self.lepton_selections: continue lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_mcClosure, "enabled") key_addBackgrounds_job_fakes = getKey("fakes_mc", lepton_selection_and_frWeight) histogramDir_mcClosure = self.mcClosure_dir[lepton_mcClosure] self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job].update({ 'add_Clos_%s' % lepton_type : ("Fakeable_mcClosure_%s" % lepton_type) in self.lepton_selections, 'inputFile_nominal_%s' % lepton_type : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'histogramName_nominal_%s' % lepton_type : "%s/sel/evt/fakes_mc/%s" % (histogramDir_nominal, histogramToFit), 'inputFile_mcClosure_%s' % lepton_type : self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile'], 'histogramName_mcClosure_%s' % lepton_type : "%s/sel/evt/fakes_mc/%s" % (histogramDir_mcClosure, histogramToFit) }) self.createCfg_add_syst_fakerate(self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job]) logging.info("Creating configuration files to run 'makePlots'") key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled")) key_makePlots_dir = getKey("makePlots") key_makePlots_job = getKey('') self.jobOptions_make_plots[key_makePlots_job] = { 'executable' : self.executable_make_plots, 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_cfg.py" % self.channel), 'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s.png" % self.channel), 'histogramDir' : self.histogramDir_prep_dcard, 'label' : "t#bar{t}Z control region", 'make_plots_backgrounds' : self.make_plots_backgrounds } self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job]) if self.is_sbatch: logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addBackgrounds) self.sbatchFile_addBackgrounds = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_%s.py" % self.channel) self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds, self.jobOptions_addBackgrounds) self.sbatchFile_addBackgrounds_sum = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_sum_%s.py" % self.channel) self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds_sum, self.jobOptions_addBackgrounds_sum) logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addFakes) self.sbatchFile_addFakes = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addFakes_%s.py" % self.channel) self.createScript_sbatch(self.executable_addFakes, self.sbatchFile_addFakes, self.jobOptions_addFakes) logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) self.addToMakefile_backgrounds_from_data(lines_makefile) self.addToMakefile_hadd_stage2(lines_makefile) self.addToMakefile_prep_dcard(lines_makefile) self.addToMakefile_add_syst_fakerate(lines_makefile) self.addToMakefile_make_plots(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done.") return self.num_jobs
if sample_name == 'sum_events': continue if re.match("(^WZTo3LNu$|^WZTo3LNu_ext(\d)?$)", sample_info["process_name_specific"]): sample_info["use_it"] = True if rle_filter_file: rle_filter_file = os.path.join(os.environ['CMSSW_BASE'], 'src', 'tthAnalysis', 'HiggsToTauTau', 'data', 'mem', rle_filter_file) if not os.path.isfile(rle_filter_file): raise ValueError("No such file: %s" % rle_filter_file) if __name__ == '__main__': logging.info( "Running the jobs with the following systematic uncertainties enabled: %s" % \ ', '.join(central_or_shifts) ) if sample_filter: samples = filter_samples(samples, sample_filter) addMEMProduction = addMEMConfig_3l( treeName='Events', outputDir=os.path.join("/hdfs/local", getpass.getuser(), "addMEM", era, version), cfgDir=os.path.join("/home", getpass.getuser(), "addMEM", era, version), executable_addMEM="addMEM_3l", samples=samples, era=era, check_output_files=check_output_files,
def create(self): """Creates all necessary config files and runs the PU profile production -- either locally or on the batch system """ for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) else: create_if_not_exists(self.dirs[key]) self.inputFileIds = {} for sample_name, sample_info in self.samples.items(): if not sample_info['use_it']: continue process_name = sample_info["process_name_specific"] is_mc = (sample_info["type"] == "mc") if not is_mc: continue logging.info( "Creating configuration files to run '%s' for sample %s" % (self.executable, process_name)) inputFileList = generateInputFileList(sample_info, self.max_files_per_job) key_dir = getKey(process_name) outputFile = os.path.join(self.dirs[key_dir][DKEY_HISTO], "%s.root" % process_name) if os.path.isfile(outputFile) and tools_is_file_ok( outputFile, min_file_size=2000): logging.info('File {} already exists --> skipping job'.format( outputFile)) continue self.outputFiles[process_name] = { 'inputFiles': [], 'outputFile': outputFile } for jobId in inputFileList.keys(): key_file = getKey(sample_name, jobId) self.inputFiles[key_file] = inputFileList[jobId] if len(self.inputFiles[key_file]) == 0: logging.warning( "ntupleFiles['%s'] = %s --> skipping job !!" % (key_file, self.inputFiles[key_file])) continue self.cfgFiles_puProfile[key_file] = os.path.join( self.dirs[key_dir][DKEY_CFGS], "puProfile_%s_%i_cfg.txt" % (process_name, jobId)) self.outputFiles_tmp[key_file] = os.path.join( self.dirs[key_dir][DKEY_HISTO_TMP], "histogram_%i.root" % jobId) self.logFiles_puProfile[key_file] = os.path.join( self.dirs[key_dir][DKEY_LOGS], "puProfile_%s_%i.log" % (process_name, jobId)) self.scriptFiles_puProfile[key_file] = os.path.join( self.dirs[key_dir][DKEY_CFGS], "puProfile_%s_%i_cfg.sh" % (process_name, jobId)) self.jobOptions_sbatch[key_file] = { 'histName': process_name, 'inputFiles': self.inputFiles[key_file], 'cfgFile_path': self.cfgFiles_puProfile[key_file], 'outputFile': self.outputFiles_tmp[key_file], 'logFile': self.logFiles_puProfile[key_file], 'scriptFile': self.scriptFiles_puProfile[key_file], } self.createCfg_puProfile(self.jobOptions_sbatch[key_file]) self.outputFiles[process_name]['inputFiles'].append( self.outputFiles_tmp[key_file]) if self.is_sbatch: logging.info( "Creating script for submitting '%s' jobs to batch system" % self.executable) self.num_jobs['puProfile'] += self.createScript_sbatch( self.executable, self.sbatchFile_puProfile, self.jobOptions_sbatch) logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_puProfile(lines_makefile) self.addToMakefile_hadd(lines_makefile) self.addToMakefile_plot(lines_makefile) self.addToMakefile_finalHadd(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done") return self.num_jobs
def create(self): """Creates all necessary config files and runs the PU profile production -- either locally or on the batch system """ for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) else: create_if_not_exists(self.dirs[key]) self.inputFileIds = {} for sample_name, sample_info in self.samples.items(): if not sample_info['use_it']: continue process_name = sample_info["process_name_specific"] is_mc = (sample_info["type"] == "mc") if not is_mc: continue logging.info( "Creating configuration files to run '%s' for sample %s" % (self.executable, process_name)) inputFileList = generateInputFileList(sample_info, self.max_files_per_job) key_dir = getKey(process_name) outputFile = os.path.join(self.dirs[key_dir][DKEY_HISTO], "%s.root" % process_name) self.outputFiles[process_name] = { 'inputFiles': [], 'outputFile': outputFile, } if os.path.isfile(outputFile) and tools_is_file_ok( outputFile, min_file_size=2000): logging.info('File {} already exists --> skipping job'.format( outputFile)) continue for jobId in inputFileList.keys(): key_file = getKey(sample_name, jobId) self.inputFiles[key_file] = inputFileList[jobId] if len(self.inputFiles[key_file]) == 0: logging.warning("'%s' = %s --> skipping job !!" % (key_file, self.inputFiles[key_file])) continue self.cfgFiles_projection[key_file] = os.path.join( self.dirs[key_dir][DKEY_CFGS], "project_%s_%i_cfg.txt" % (process_name, jobId)) self.outputFiles_tmp[key_file] = os.path.join( self.dirs[key_dir][DKEY_HISTO_TMP], "histogram_%i.root" % jobId) self.logFiles_projection[key_file] = os.path.join( self.dirs[key_dir][DKEY_LOGS], "project_%s_%i.log" % (process_name, jobId)) self.scriptFiles_projection[key_file] = os.path.join( self.dirs[key_dir][DKEY_CFGS], "project_%s_%i_cfg.sh" % (process_name, jobId)) projection_module = self.projection_module if projection_module == "count": projection_module = "countHistogramAll" if sample_name.startswith('/TTTo'): projection_module += "CompTopRwgt" elif sample_info['sample_category'].startswith('ttH'): projection_module += "CompHTXS" elif isSplitByNlheJet(process_name): projection_module += "SplitByLHENjet" elif isSplitByNlheHT(process_name): projection_module += "SplitByLHEHT" elif isSplitByNlheJetHT(process_name, sample_name): projection_module += "SplitByLHENjetHT" self.jobOptions_sbatch[key_file] = { 'histName': process_name, 'inputFiles': self.inputFiles[key_file], 'cfgFile_path': self.cfgFiles_projection[key_file], 'outputFile': self.outputFiles_tmp[key_file], 'logFile': self.logFiles_projection[key_file], 'scriptFile': self.scriptFiles_projection[key_file], 'projection_module': projection_module, } if self.projection_module != 'puHist': self.jobOptions_sbatch[key_file][ 'ref_genWeight'] = self.ref_genWeights[process_name] if process_name not in self.ref_genWeights: raise RuntimeError( "Unable to find reference LHE weight for process %s" % process_name) self.createCfg_project(self.jobOptions_sbatch[key_file]) self.outputFiles[process_name]['inputFiles'].append( self.outputFiles_tmp[key_file]) if self.is_sbatch: logging.info( "Creating script for submitting '%s' jobs to batch system" % self.executable) self.num_jobs['project'] += self.createScript_sbatch( self.executable, self.sbatchFile_projection, self.jobOptions_sbatch) logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_project(lines_makefile) self.addToMakefile_hadd(lines_makefile) if self.plot: self.addToMakefile_plot(lines_makefile) self.addToMakefile_finalHadd(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done") return self.num_jobs
def poll(self, nonBlocking): """Waits for all sbatch jobs submitted by this instance of sbatchManager to finish processing """ text_line = '-' * 120 # Set a delimiter, which distinguishes entries b/w different jobs delimiter = ',' # Explanation (the maximum pool ID length = 256 is configurable via self.max_pool_id_length): # 1) squeue -h -u {{user}} -o '%i %256k' # Collects the list of running jobs # a) -h omits header # b) -u {{user}} looks only for jobs submitted by {{user}} # c) -o '%i %256k' specifies the output format # i) %i -- job ID (1st column) # ii) %256k -- comment with width of 256 characters (2nd column) # If the job has no comments, the entry simply reads (null) # 2) grep {{comment}} # Filter the jobs by the comment which must be unique per sbatchManager instance at all times # 3) awk '{print $1}' # Filter only the jobIds out # 4) sed ':a;N;$!ba;s/\\n/{{delimiter}}/g' # Place all job IDs to one line, delimited by {{delimiter}} (otherwise the logs are hard to read) command_template = "squeue -h -u {{user}} -o '%i %{{ pool_id_length }}k' | grep {{comment}} | awk '{print $1}' | " \ "sed ':a;N;$!ba;s/\\n/{{delimiter}}/g'" command = jinja2.Template(command_template).render( user=self.user, pool_id_length=self.max_pool_id_length, comment=self.pool_id, delimiter=delimiter) # Initially, all jobs are marked as submitted so we have to go through all jobs and check their exit codes # even if some of them have already finished jobIds_set = set([ job_id for job_id in self.submittedJobs if self.submittedJobs[job_id]['status'] == Status.submitted ]) nofJobs_left = len(jobIds_set) + len(self.queuedJobs) while nofJobs_left > 0: # Get the list of jobs submitted to batch system and convert their jobIds to a set poll_result, poll_result_err = '', '' while True: poll_result, poll_result_err = run_cmd(command, do_not_log=False, return_stderr=True) if not poll_result and poll_result_err: logging.warning( 'squeue caught an error: {squeue_error}'.format( squeue_error=poll_result_err)) else: break # sleep a minute and then try again # in principle we could limit the number of retries, but hopefully that's not necessary logging.debug("sleeping for %i seconds." % 60) time.sleep(60) polled_ids = set() if poll_result != '': polled_ids = set(poll_result.split(delimiter)) # Check if number of jobs submitted to batch system is below maxSubmittedJobs; # if it is, take jobs from queuedJobs list and submit them, # until a total of maxSubmittedJobs is submitted to batch system nofJobs_toSubmit = min(len(self.queuedJobs), self.maxSubmittedJobs - len(polled_ids)) if nofJobs_toSubmit > 0: logging.debug( "Jobs: submitted = {}, in queue = {} --> submitting the next {} jobs." .format(len(polled_ids), len(self.queuedJobs), nofJobs_toSubmit)) else: logging.debug( "Jobs: submitted = {}, in queue = {} --> waiting for submitted jobs to finish processing." .format(len(polled_ids), len(self.queuedJobs))) for i in range(0, nofJobs_toSubmit): # randomly submit a job from the queue two_pow_sixteen = 65536 random.seed((abs(hash(uuid.uuid4()))) % two_pow_sixteen) max_idx = len(self.queuedJobs) - 1 random_idx = random.randint(0, max_idx) job = self.queuedJobs.pop(random_idx) job['status'] = Status.submitted job_id = self.submit(job['sbatch_command']) self.submittedJobs[job_id] = job # Now check status of jobs submitted to batch system: # Subtract the list of running jobs from the list of all submitted jobs -- the result is a list of # jobs that have finished already finished_ids = list(jobIds_set - polled_ids) # Do not poll anything if currently there are no finished jobs if finished_ids: # Based on job's exit code what if the job has failed or completed successfully # However, the sacct/scontrol commands yield too much output if too many jobs have been submitted here # Therefore, we want to restrict the output by grepping specific job IDs # There's another problem with that: the length of a bash command is limited by ARG_MAX kernel variable, # which is of order 2e6 # This means that we have to split the job IDs into chunks each of which we have to check separately finished_ids_chunks = [ finished_ids[i:i + self.max_nof_greps] for i in range(0, len(finished_ids), self.max_nof_greps) ] for finished_ids_chunk in finished_ids_chunks: completion = self.check_job_completion(finished_ids_chunk) completed_jobs, running_jobs, failed_jobs = [], [], [] for job_id, details in completion.iteritems(): if details.status == Status.completed: completed_jobs.append(job_id) elif details.status == Status.running: running_jobs.append(job_id) else: failed_jobs.append(job_id) # If there are any failed jobs, throw if failed_jobs: failed_jobs_str = ','.join(failed_jobs) errors = [ completion[job_id].status for job_id in failed_jobs ] logging.error( "Job(s) w/ ID(s) {jobIds} finished with errors: {reasons}" .format( jobIds=failed_jobs_str, reasons=', '.join(map(Status.toString, errors)), )) # Let's print a table where the first column corresponds to the job ID # and the second column lists the exit code, the derived exit code, the status # and the classification of the failed job logging.error("Error table:") for job_id in failed_jobs: sys.stderr.write( "{jobId} {exitCode} {derivedExitCode} {state} {status}\n" .format( jobId=job_id, exitCode=completion[job_id].exit_code, derivedExitCode=completion[job_id]. derived_exit_code, state=completion[job_id].state, status=Status.toString( completion[job_id].status), )) sys.stderr.write('%s\n' % text_line) for failed_job in failed_jobs: for log in zip(['wrapper', 'executable'], ['log_wrap', 'log_exec']): logfile = self.submittedJobs[failed_job][ log[1]] if os.path.isfile(logfile): logfile_contents = open(logfile, 'r').read() else: logfile_contents = '<file is missing>' sys.stderr.write( 'Job ID {id} {description} log ({path}):\n{line}\n{log}\n{line}\n' .format( id=failed_job, description=log[0], path=logfile, log=logfile_contents, line=text_line, )) if self.submittedJobs[failed_job]['nof_submissions'] < self.max_resubmissions and \ completion[failed_job].status == Status.io_error: # The job is eligible for resubmission if the job hasn't been resubmitted more # than a preset limit of resubmissions AND if the job failed due to I/O errors logging.warning( "Job w/ ID {id} and arguments {args} FAILED because: {reason} " "-> resubmission attempt #{attempt}". format( id=failed_job, args=self.submittedJobs[failed_job] ['args'], reason=Status.toString( completion[failed_job].status), attempt=self.submittedJobs[failed_job] ['nof_submissions'], )) self.submitJob( *self.submittedJobs[failed_job]['args']) # The old ID must be deleted, b/c otherwise it would be used to compare against # squeue output and we would resubmit the failed job ad infinitum del self.submittedJobs[failed_job] else: # We've exceeded the maximum number of resubmissions -> fail the workflow raise Status.raiseError( completion[failed_job].status) else: logging.debug( "Job(s) w/ ID(s) {completedIds} finished successfully {runningInfo}" .format( completedIds=','.join(completed_jobs), runningInfo='(%s still running)' % ','.join(running_jobs) if running_jobs else '', )) # Mark successfully finished jobs as completed so that won't request their status code again # Otherwise they will be still at ,,submitted'' state for job_id in completed_jobs: if not all( map( lambda outputFile: is_file_ok( outputFile, validate_outputs=True, min_file_size=self.min_file_size), self .submittedJobs[job_id]['outputFiles'])): if self.submittedJobs[job_id][ 'nof_submissions'] < self.max_resubmissions: logging.warning( "Job w/ ID {id} and arguments {args} FAILED to produce a valid output file " "-> resubmission attempt #{attempt}". format( id=job_id, args=self.submittedJobs[job_id] ['args'], attempt=self.submittedJobs[job_id] ['nof_submissions'], )) self.submitJob( *self.submittedJobs[job_id]['args']) del self.submittedJobs[job_id] else: raise ValueError( "Job w/ ID {id} FAILED because it repeatedly produces bogus output " "file {output} yet the job still exits w/o any errors" .format( id=job_id, output=', '.join( self.submittedJobs[job_id] ['outputFiles']), )) else: # Job completed just fine self.submittedJobs[job_id][ 'status'] = Status.completed jobIds_set = set([ job_id for job_id in self.submittedJobs if self.submittedJobs[job_id]['status'] == Status.submitted ]) nofJobs_left = len(jobIds_set) + len(self.queuedJobs) logging.info( "Waiting for sbatch to finish (%d job(s) still left) ..." % nofJobs_left) if nofJobs_left > 0: if nonBlocking: return False two_pow_sixteen = 65536 random.seed((abs(hash(uuid.uuid4()))) % two_pow_sixteen) max_delay = 300 random_delay = random.randint(0, max_delay) logging.debug("sleeping for %i seconds." % random_delay) time.sleep(self.poll_interval + random_delay) else: break return True
elif mode == "sync": sample_suffix = "sync" if use_nonnominal else "sync_nom" if use_preselected: sample_suffix = "preselected_{}".format(sample_suffix) samples = load_samples(era, suffix=sample_suffix) else: raise ValueError("Invalid mode: %s" % mode) for sample_name, sample_info in samples.items(): if sample_name == 'sum_events': continue if sample_name.startswith('/Tau/Run'): sample_info["use_it"] = False if __name__ == '__main__': logging.info( "Running the jobs with the following systematic uncertainties enabled: %s" % \ ', '.join(central_or_shifts) ) if sample_filter: samples = filter_samples(samples, sample_filter) configDir = os.path.join("/home", getpass.getuser(), "ttHAnalysis", era, version) outputDir = os.path.join("/hdfs/local", getpass.getuser(), "ttHAnalysis", era, version) analysis = analyzeConfig_2lss( configDir=configDir, outputDir=outputDir, executable_analyze="analyze_2lss", cfgFile_analyze="analyze_2lss_cfg.py",
def create(self): """Creates all necessary config files and runs the PU profile production -- either locally or on the batch system """ for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) else: create_if_not_exists(self.dirs[key]) self.inputFileIds = {} for sample_name, sample_info in self.samples.items(): if not sample_info['use_it']: continue process_name = sample_info["process_name_specific"] is_mc = (sample_info["type"] == "mc") if not is_mc: continue logging.info( "Creating configuration files to run '%s' for sample %s" % (self.executable, process_name)) inputFileList_map = generateInputFileList(sample_info, 1) key_dir = getKey(process_name) key_file = getKey(process_name) self.inputFiles[key_file] = list( itertools.chain(*inputFileList_map.values())) if len(self.inputFiles[key_file]) == 0: logging.warning("'%s' = %s --> skipping job !!" % (key_file, self.inputFiles[key_file])) continue outputFile = os.path.join(self.dirs[key_dir][DKEY_RESULTS], "%s.txt" % process_name) self.outputFiles[key_file] = outputFile if os.path.isfile(outputFile): logging.info('File {} already exists --> skipping job'.format( outputFile)) continue self.cfgFiles[key_file] = os.path.join( self.dirs[key_dir][DKEY_CFGS], "refGenWeight_%s_cfg.txt" % (process_name)) self.logFiles[key_file] = os.path.join( self.dirs[key_dir][DKEY_LOGS], "refGenWeight_%s.log" % (process_name)) self.scriptFiles[key_file] = os.path.join( self.dirs[key_dir][DKEY_CFGS], "refGenWeight_%s_cfg.sh" % (process_name)) self.plotFiles[key_file] = ' '.join([ os.path.join(self.dirs[key_dir][DKEY_PLOTS], "refGenWeight_%s.%s" % (process_name, extension)) for extension in ['pdf', 'png'] ]) self.jobOptions_sbatch[key_file] = { 'inputFiles': self.inputFiles[key_file], 'cfgFile_path': self.cfgFiles[key_file], 'cmdParams': "-i {} -o {} -p {} -v".format( self.cfgFiles[key_file], self.outputFiles[key_file], self.plotFiles[key_file], ), 'outputFile': self.outputFiles[key_file], 'logFile': self.logFiles[key_file], 'scriptFile': self.scriptFiles[key_file], } self.createCfg(self.jobOptions_sbatch[key_file]) if self.is_sbatch: logging.info( "Creating script for submitting '%s' jobs to batch system" % self.executable) self.num_jobs['refGenWeight'] += self.createScript_sbatch( self.executable, self.sbatchFile, self.jobOptions_sbatch) logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile(lines_makefile) self.addToMakefile_final(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done") return self.num_jobs
def __init__( self, configDir, outputDir, output_file, executable, projection_module, samples, max_files_per_job, era, plot, check_output_files, running_method, num_parallel_jobs, pool_id='', verbose=False, dry_run=False, use_home=False, submission_cmd=None, ): self.configDir = configDir self.outputDir = outputDir self.executable = executable self.projection_module = projection_module self.max_num_jobs = 200000 self.samples = samples self.max_files_per_job = max_files_per_job self.era = era self.plot = plot self.check_output_files = check_output_files self.verbose = verbose self.dry_run = dry_run self.use_home = use_home if running_method.lower() not in ["sbatch", "makefile"]: raise ValueError("Invalid running method: %s" % running_method) self.running_method = running_method self.is_sbatch = self.running_method.lower() == "sbatch" self.is_makefile = not self.is_sbatch self.makefile = os.path.join( self.configDir, "Makefile_{}".format(self.projection_module)) self.num_parallel_jobs = num_parallel_jobs self.pool_id = pool_id if pool_id else uuid.uuid4() self.workingDir = os.getcwd() logging.info("Working directory is: %s" % self.workingDir) self.template_dir = os.path.join(os.getenv('CMSSW_BASE'), 'src', 'tthAnalysis', 'HiggsToTauTau', 'test', 'templates') logging.info("Templates directory is: %s" % self.template_dir) create_if_not_exists(self.configDir) create_if_not_exists(self.outputDir) self.output_file = os.path.join(self.outputDir, output_file) self.stdout_file_path = os.path.join( self.configDir, "stdout_{}.log".format(self.projection_module)) self.stderr_file_path = os.path.join( self.configDir, "stderr_{}.log".format(self.projection_module)) self.sw_ver_file_cfg = os.path.join( self.configDir, "VERSION_{}.log".format(self.projection_module)) self.sw_ver_file_out = os.path.join( self.outputDir, "VERSION_{}.log".format(self.projection_module)) self.submission_out = os.path.join(self.configDir, "SUBMISSION.log") self.stdout_file_path, self.stderr_file_path, self.sw_ver_file_cfg, self.sw_ver_file_out, self.submission_out = get_log_version( (self.stdout_file_path, self.stderr_file_path, self.sw_ver_file_cfg, self.sw_ver_file_out, self.submission_out)) check_submission_cmd(self.submission_out, submission_cmd) self.sbatchFile_projection = os.path.join( self.configDir, "sbatch_{}.py".format(self.projection_module)) self.cfgFiles_projection = {} self.logFiles_projection = {} self.scriptFiles_projection = {} self.jobOptions_sbatch = {} self.inputFiles = {} self.outputFiles_tmp = {} self.outputFiles = {} self.phoniesToAdd = [] self.filesToClean = [] self.targets = [] self.makefile_target = "sbatch_{}".format(self.projection_module) self.dirs = {} all_dirs = [ DKEY_CFGS, DKEY_HISTO_TMP, DKEY_HISTO, DKEY_PLOTS, DKEY_LOGS, DKEY_SCRIPTS, DKEY_HADD_RT ] cfg_dirs = [ DKEY_CFGS, DKEY_LOGS, DKEY_PLOTS, DKEY_SCRIPTS, DKEY_HADD_RT ] ref_genWeightsFile = os.path.join( os.environ['CMSSW_BASE'], 'src', 'tthAnalysis', 'HiggsToTauTau', 'data', 'refGenWeight_{}.txt'.format(self.era)) self.ref_genWeights = load_refGenWeightsFromFile( ref_genWeightsFile) if projection_module != 'puHist' else {} for sample_name, sample_info in self.samples.items(): if not sample_info['use_it']: continue process_name = sample_info["process_name_specific"] key_dir = getKey(process_name) for dir_type in all_dirs: if dir_type == DKEY_PLOTS: continue initDict(self.dirs, [key_dir, dir_type]) if dir_type in cfg_dirs: self.dirs[key_dir][dir_type] = os.path.join( self.configDir, dir_type, process_name) else: self.dirs[key_dir][dir_type] = os.path.join( self.outputDir, dir_type, process_name) for dir_type in cfg_dirs: initDict(self.dirs, [dir_type]) self.dirs[dir_type] = os.path.join(self.configDir, dir_type) self.cvmfs_error_log = {} self.num_jobs = { 'hadd': 0, 'project': 0, 'plot': 0, }
def check_job_completion(self, jobsId_list, default_completion=Status.completed): completion = { k: JobCompletion(status=default_completion) for k in jobsId_list } # If the input list is empty, just return here (we don't want to mess up the subprocess commands here) if not completion: return completion # Set a delimiter, which distinguishes entries b/w different jobs delimiter = ',' # First, let's try with sacct; explanation: # 1) sacct -X -P -n -o JobID,ExitCode,DerivedExitCode,State # Shows job IDs, exit codes and comments of all submitted, running and finished jobs, one line per job # a) -X -- shows cumulative statistics of each job (has no effect here, though) # b) -P -- output will be '|' delimited without a '|' at the end # c) -n -- omit header # d) -o JobID,ExitCode,DerivedExitCode -- output format # e) -S {datetime} -- look only for jobs submitted after {datetime} # f) -j {jobs} -- filter out only the relevant jobs by their job ID (comma-separated list) # 2) sed ':a;N;$!ba;s/\\n/{delimiter}/g' # Place all entries to one line, delimited by {{delimiter}} (otherwise the logs are hard to read) sacct_cmd = "sacct -X -P -n -o JobID,ExitCode,DerivedExitCode,State -S {datetime} -j {jobs} | " \ "sed ':a;N;$!ba;s/\\n/{delimiter}/g'".format( datetime = self.datetime, jobs = ','.join(jobsId_list), delimiter = delimiter, ) sacct_out, sacct_err = run_cmd(sacct_cmd, do_not_log=not self.log_completion, return_stderr=True) if not sacct_err and sacct_out: # The output of sacct contains one line per job, each line has pipe-separated fields the order of which # is defined in the command that issued the output lines = sacct_out.split(delimiter) for line in lines: JobID, ExitCode, DerivedExitCode, State = line.split('|') if JobID in completion: completion[JobID] = JobCompletion( status=Status.classify_error(ExitCode, DerivedExitCode, State), exit_code=ExitCode, derived_exit_code=DerivedExitCode, state=State, ) return completion else: # Likely returned along the lines of (due to heavy load on the cluster since SQL DB is overloaded): # sacct: error: Problem talking to the database: Connection refused logging.info('sacct currently unavailable: %s' % sacct_err) # Let's try with scontrol if the sacct commands failed # scontrol doesn't have an option to take a list of Job IDs as an argument; thus, we have to grep the job IDs # Explanation: # 1) scontrol show -od job # Prints out everything about running or recently finished jobs # a) -o -- prints information one line per record # b) -d -- includes more detailed information about the job # c) job -- prints all jobs (it's possible to get information about other units like nodes and clusters) # 2) grep '{jobs}' # Filter out jobs by their job ID (by concatenating the list with escaped regex OR operator '|') # 3) sed ':a;N;$!ba;s/\\n/{delimiter}/g' # Put all the result on one line, where each record is delimited by {delimiter} scontrol_cmd = "scontrol show -od job | grep '{jobs}' | sed ':a;N;$!ba;s/\\n/{delimiter}/g'".format( jobs='\\|'.join(jobsId_list), delimiter=delimiter, ) scontrol_out, scontrol_err = run_cmd( scontrol_cmd, do_not_log=not self.log_completion, return_stderr=True) if not scontrol_err and scontrol_out: # The output of scontrol contains one entry per line, each line contains a space-delimited key-value pairs, # whereas the keys and values are separated by an equation sign # Although the keys do not contain any spaces, the values might, so we have to take care of that lines = scontrol_out.split(delimiter) for line in lines: line_dict = {} line_split_eq_spaces = map(lambda x: x.split(), line.split('=')) for i in range(len(line_split_eq_spaces) - 1): k = line_split_eq_spaces[i] v = line_split_eq_spaces[i + 1] line_dict[k[-1]] = ' '.join( v[:-1] if i != len(line_split_eq_spaces) - 2 else v) if not 'JobId' in line_dict.keys(): print("Skipping line = '%s'" % line) continue JobId = line_dict['JobId'] if JobId in completion: completion[JobId] = JobCompletion( status=Status.classify_error( line_dict['ExitCode'], line_dict['DerivedExitCode'], line_dict['JobState'], ), exit_code=line_dict['ExitCode'], derived_exit_code=line_dict['DerivedExitCode'], state=line_dict['JobState']) return completion else: # scontrol probably returned something like: # slurm_load_jobs error: Invalid job id specified # Probably because too much time has passed since the job completion and checking the exit status here logging.info('scontrol has errors: %s' % scontrol_err) # scontrol still might fail if too much time has passed since the jobs completion (the metadata about each # job is cached for a certain period of time, the length of which I don't know at the moment) # None of the SLURM commands work; let's just say that the job completed successfully logging.error( "Cannot tell if the job has completed successfully or not!") return completion