def create(self): """Creates all necessary config files and runs the Ntuple production -- either locally or on the batch system """ for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) else: create_if_not_exists(self.dirs[key]) self.inputFileIds = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue process_name = sample_info["process_name_specific"] logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_prodNtuple, process_name)) inputFileList = generateInputFileList(sample_name, sample_info, self.max_files_per_job, self.debug) for jobId in inputFileList.keys(): key_dir = getKey(sample_name) key_file = getKey(sample_name, jobId) self.inputFiles[key_file] = inputFileList[jobId] if len(self.inputFiles[key_file]) == 0: print "Warning: ntupleFiles['%s'] = %s --> skipping job !!" % (key_file, self.inputFiles[key_file]) continue ##print "sample = '%s', jobId = %i: number of input files = %i" % (sample_name, jobId, len(self.inputFiles[key_file])) ##print self.inputFiles[key_file] assert(len(self.inputFiles[key_file]) == 1), "There is more than one input file!" self.cfgFiles_prodNtuple_modified[key_file] = os.path.join(self.dirs[key_dir][DKEY_CFGS], "produceNtuple_%s_%s_%i_cfg.py" % \ (self.channel, process_name, jobId)) self.outputFiles[key_file] = os.path.join(self.dirs[key_dir][DKEY_NTUPLES], "%s_%i.root" % \ (process_name, jobId)) self.logFiles_prodNtuple[key_file] = os.path.join(self.dirs[key_dir][DKEY_LOGS], "produceNtuple_%s_%s_%i.log" % \ (self.channel, process_name, jobId)) self.createCfg_prodNtuple(self.inputFiles[key_file], self.outputFiles[key_file], self.era, self.cfgFiles_prodNtuple_modified[key_file]) if self.is_sbatch: logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_prodNtuple) self.createScript_sbatch() logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_prodNtuple(lines_makefile) #self.addToMakefile_clean(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done")
def create(self): """Creates all necessary config files and runs the Ntuple production -- either locally or on the batch system """ for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) else: create_if_not_exists(self.dirs[key]) self.inputFileIds = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] is_mc = (sample_info["type"] == "mc") if is_mc and process_name not in self.pileup_histograms: raise ValueError("Missing PU distribution for %s in file %s" % (process_name, self.pileup)) logging.info( "Creating configuration files to run '%s' for sample %s" % (self.executable, process_name)) inputFileList = generateInputFileList(sample_info, self.max_files_per_job) key_dir = getKey(sample_name) subDirs = list( map( lambda y: os.path.join(self.dirs[key_dir][DKEY_NTUPLES], '%04d' % y), set(map(lambda x: x // 1000, inputFileList.keys())))) for subDir in subDirs: create_if_not_exists(subDir) for jobId in inputFileList.keys(): key_file = getKey(sample_name, jobId) self.inputFiles[key_file] = inputFileList[jobId] if len(self.inputFiles[key_file]) == 0: logging.warning( "ntupleFiles['%s'] = %s --> skipping job !!" % (key_file, self.inputFiles[key_file])) continue self.cfgFiles_prodNtuple_modified[key_file] = os.path.join( self.dirs[key_dir][DKEY_CFGS], "produceNtuple_%s_%i_cfg.py" % (process_name, jobId)) self.outputFiles[key_file] = os.path.join( self.dirs[key_dir][DKEY_NTUPLES], "%04d" % (jobId // 1000), "tree_%i.root" % jobId) self.logFiles_prodNtuple[key_file] = os.path.join( self.dirs[key_dir][DKEY_LOGS], "produceNtuple_%s_%i.log" % (process_name, jobId)) hlt_paths = sample_info["hlt_paths"] if not is_mc else [] hlt_cuts = list( Triggers(self.era).triggers_flat ) if self.preselection_cuts["applyHLTcut"] else [] jobOptions = { 'inputFiles': self.inputFiles[key_file], 'cfgFile_modified': self.cfgFiles_prodNtuple_modified[key_file], 'outputFile': self.outputFiles[key_file], 'is_mc': is_mc, 'random_seed': jobId, 'process_name': process_name, 'category_name': sample_info["sample_category"], 'triggers': hlt_paths, 'HLTcuts': hlt_cuts, } self.createCfg_prodNtuple(jobOptions) num_jobs = 0 if self.is_sbatch: logging.info( "Creating script for submitting '%s' jobs to batch system" % self.executable) num_jobs = self.createScript_sbatch() logging.info("Generated %i job(s)" % num_jobs) logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_prodNtuple(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done") return num_jobs
def create(self): """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system """ for key in self.dirs.keys(): for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) self.inputFileIds = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue process_name = sample_info["process_name_specific"] logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name)) is_mc = (sample_info["type"] == "mc") lumi_scale = 1. if not (self.use_lumi and is_mc) else sample_info["xsection"] * self.lumi / sample_info["nof_events"] apply_genWeight = sample_info["apply_genWeight"] if (is_mc and "apply_genWeight" in sample_info.keys()) else False sample_category = sample_info["sample_category"] triggers = sample_info["triggers"] apply_trigger_bits = (is_mc and (self.era == "2015" or (self.era == "2016" and sample_info["reHLT"]))) or not is_mc for charge_selection in self.charge_selections: for central_or_shift in self.central_or_shifts: inputFileList = generateInputFileList(sample_name, sample_info, self.max_files_per_job, self.debug) for jobId in inputFileList.keys(): if central_or_shift != "central" and not is_mc: continue if central_or_shift.startswith("CMS_ttHl_thu_shape_ttH") and sample_category != "signal": continue if central_or_shift.startswith("CMS_ttHl_thu_shape_ttW") and sample_category != "TTW": continue if central_or_shift.startswith("CMS_ttHl_thu_shape_ttZ") and sample_category != "TTZ": continue key_dir = getKey(sample_name, charge_selection) key_file = getKey(sample_name, charge_selection, central_or_shift, jobId) self.ntupleFiles[key_file] = inputFileList[jobId] self.cfgFiles_analyze_modified[key_file] = os.path.join(self.dirs[key_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" % \ (self.channel, process_name, charge_selection, central_or_shift, jobId)) self.histogramFiles[key_file] = os.path.join(self.dirs[key_dir][DKEY_HIST], "%s_%s_%s_%i.root" % \ (process_name, charge_selection, central_or_shift, jobId)) self.logFiles_analyze[key_file] = os.path.join(self.dirs[key_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" % \ (self.channel, process_name, charge_selection, central_or_shift, jobId)) self.createCfg_analyze(self.ntupleFiles[key_file], self.histogramFiles[key_file], sample_category, self.era, triggers, charge_selection, self.jet_minPt, self.jet_maxPt, self.jet_minAbsEta, self.jet_maxAbsEta, self.hadTau_selections, self.absEtaBins, is_mc, central_or_shift, lumi_scale, apply_genWeight, apply_trigger_bits, self.cfgFiles_analyze_modified[key_file]) if self.is_sbatch: logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.createScript_sbatch() logging.info("Creating configuration files for executing 'comp_jetToTauFakeRate'") for charge_selection in self.charge_selections: self.histogramFile_comp_jetToTauFakeRate[charge_selection] = os.path.join( self.outputDir, DKEY_HIST, "comp_jetToTauFakeRate_%s.root" % charge_selection) self.histogramDir_numerator[charge_selection] = "jetToTauFakeRate_%s/numerator/" % charge_selection self.histogramDir_denominator[charge_selection] = "jetToTauFakeRate_%s/denominator/" % charge_selection self.cfgFile_comp_jetToTauFakeRate_modified[charge_selection] = os.path.join( self.outputDir, DKEY_CFGS, "comp_jetToTauFakeRate_%s_cfg.py" % charge_selection) self.createCfg_comp_jetToTauFakeRate(self.histogramFile_hadd_stage1, self.histogramFile_comp_jetToTauFakeRate[charge_selection], self.histogramDir_denominator[charge_selection], self.histogramDir_numerator[charge_selection], self.absEtaBins, self.ptBins, self.cfgFile_comp_jetToTauFakeRate_modified[charge_selection]) lines_makefile = [] self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) self.addToMakefile_comp_jetToTauFakeRate(lines_makefile) self.addToMakefile_hadd_stage2(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done")
def create(self): """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system """ for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue process_name = sample_info["process_name_specific"] for lepton_selection in self.lepton_selections: for lepton_frWeight in self.lepton_frWeights: if lepton_frWeight == "enabled" and not lepton_selection.startswith("Fakeable"): continue lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_selection, lepton_frWeight) central_or_shifts_extended = [ "" ] central_or_shifts_extended.extend(self.central_or_shifts) central_or_shifts_extended.extend([ "hadd", "addBackgrounds" ]) for central_or_shift_or_dummy in central_or_shifts_extended: process_name_extended = [ process_name, "hadd" ] for process_name_or_dummy in process_name_extended: key_dir = getKey(process_name_or_dummy, lepton_selection_and_frWeight, central_or_shift_or_dummy) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_ROOT, DKEY_RLES, DKEY_SYNC ]: initDict(self.dirs, [ key_dir, dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_LOGS ]: self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, "_".join([ lepton_selection_and_frWeight ]), process_name_or_dummy, central_or_shift_or_dummy) else: self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, "_".join([ lepton_selection_and_frWeight ]), process_name_or_dummy, central_or_shift_or_dummy) for subdirectory in [ "addBackgrounds", "addBackgroundLeptonFakes", "prepareDatacards", "addSystFakeRates", "makePlots" ]: key_dir = getKey(subdirectory) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_ROOT, DKEY_DCRD, DKEY_PLOT ]: initDict(self.dirs, [ key_dir, dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_LOGS ]: self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, subdirectory) else: self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, subdirectory) for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT, DKEY_SYNC ]: initDict(self.dirs, [ dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]: self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel) else: self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel) numDirectories = 0 for key in self.dirs.keys(): if type(self.dirs[key]) == dict: numDirectories += len(self.dirs[key]) else: numDirectories += 1 logging.info("Creating directory structure (numDirectories = %i)" % numDirectories) numDirectories_created = 0; frac = 1 for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) numDirectories_created += len(self.dirs[key]) else: create_if_not_exists(self.dirs[key]) numDirectories_created = numDirectories_created + 1 while 100*numDirectories_created >= frac*numDirectories: logging.info(" %i%% completed" % frac) frac = frac + 1 logging.info("Done.") inputFileLists = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue logging.info("Checking input files for sample %s" % sample_info["process_name_specific"]) inputFileLists[sample_name] = generateInputFileList(sample_info, self.max_files_per_job) mcClosure_regex = re.compile('Fakeable_mcClosure_(?P<type>m|e)_wFakeRateWeights') for lepton_selection in self.lepton_selections: electron_selection = lepton_selection muon_selection = lepton_selection hadTauVeto_selection = "Tight" hadTauVeto_selection = "|".join([ hadTauVeto_selection, self.hadTauVeto_selection_part2 ]) if lepton_selection == "Fakeable_mcClosure_e": electron_selection = "Fakeable" muon_selection = "Tight" elif lepton_selection == "Fakeable_mcClosure_m": electron_selection = "Tight" muon_selection = "Fakeable" for lepton_frWeight in self.lepton_frWeights: if lepton_frWeight == "enabled" and not lepton_selection.startswith("Fakeable"): continue if lepton_frWeight == "disabled" and not lepton_selection in [ "Tight" ]: continue lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_selection, lepton_frWeight) for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue process_name = sample_info["process_name_specific"] logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name)) sample_category = sample_info["sample_category"] is_mc = (sample_info["type"] == "mc") is_signal = (sample_category == "signal") for central_or_shift in self.central_or_shifts: inputFileList = inputFileLists[sample_name] for jobId in inputFileList.keys(): if central_or_shift != "central": isFR_shape_shift = (central_or_shift in systematics.FR_all) if not ((lepton_selection == "Fakeable" and isFR_shape_shift) or lepton_selection == "Tight"): continue if not is_mc and not isFR_shape_shift: continue if central_or_shift in systematics.LHE().ttH and sample_category != "signal": continue if central_or_shift in systematics.LHE().ttW and sample_category != "TTW": continue if central_or_shift in systematics.LHE().ttZ and sample_category != "TTZ": continue if central_or_shift in systematics.DYMCReweighting and not is_dymc_reweighting(sample_name): continue logging.info(" ... for '%s' and systematic uncertainty option '%s'" % (lepton_selection_and_frWeight, central_or_shift)) # build config files for executing analysis code key_analyze_dir = getKey(process_name, lepton_selection_and_frWeight, central_or_shift) analyze_job_tuple = (process_name, lepton_selection_and_frWeight, central_or_shift, jobId) key_analyze_job = getKey(*analyze_job_tuple) ntupleFiles = inputFileList[jobId] if len(ntupleFiles) == 0: logging.warning("No input ntuples for %s --> skipping job !!" % (key_analyze_job)) continue syncOutput = '' syncTree = '' syncRequireGenMatching = True if self.do_sync: mcClosure_match = mcClosure_regex.match(lepton_selection_and_frWeight) if lepton_selection_and_frWeight == 'Tight': syncOutput = os.path.join(self.dirs[key_analyze_dir][DKEY_SYNC], '%s_%s_SR.root' % (self.channel, central_or_shift)) syncTree = 'syncTree_%s_SR' % self.channel syncRequireGenMatching = True elif lepton_selection_and_frWeight == 'Fakeable_wFakeRateWeights': syncOutput = os.path.join(self.dirs[key_analyze_dir][DKEY_SYNC], '%s_%s_Fake.root' % (self.channel, central_or_shift)) syncTree = 'syncTree_%s_Fake' % self.channel elif mcClosure_match: mcClosure_type = mcClosure_match.group('type') syncOutput = os.path.join(self.dirs[key_analyze_dir][DKEY_SYNC], '%s_%s_mcClosure_%s.root' % (self.channel, central_or_shift, mcClosure_type)) syncTree = 'syncTree_%s_mcClosure_%s' % (self.channel, mcClosure_type) else: continue if syncTree and central_or_shift != "central": syncTree = os.path.join(central_or_shift, syncTree) syncRLE = '' if self.do_sync and self.rle_select: syncRLE = self.rle_select % syncTree if not os.path.isfile(syncRLE): logging.warning("Input RLE file for the sync is missing: %s; skipping the job" % syncRLE) continue if syncOutput: self.inputFiles_sync['sync'].append(syncOutput) cfgFile_modified_path = os.path.join(self.dirs[key_analyze_dir][DKEY_CFGS], "analyze_%s_%s_%s_%i_cfg.py" % analyze_job_tuple) logFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_LOGS], "analyze_%s_%s_%s_%i.log" % analyze_job_tuple) rleOutputFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_RLES], "rle_%s_%s_%s_%i.txt" % analyze_job_tuple) \ if self.select_rle_output else "" histogramFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_HIST], "analyze_%s_%s_%s_%i.root" % analyze_job_tuple) self.jobOptions_analyze[key_analyze_job] = { 'ntupleFiles' : ntupleFiles, 'cfgFile_modified' : cfgFile_modified_path, 'histogramFile' : histogramFile_path, 'logFile' : logFile_path, 'selEventsFileName_output' : rleOutputFile_path, 'electronSelection' : electron_selection, 'muonSelection' : muon_selection, 'apply_leptonGenMatching' : self.apply_leptonGenMatching, 'hadTauSelection_veto' : hadTauVeto_selection, 'applyFakeRateWeights' : self.applyFakeRateWeights if not lepton_selection == "Tight" else "disabled", 'central_or_shift' : central_or_shift, 'syncOutput' : syncOutput, 'syncTree' : syncTree, 'syncRLE' : syncRLE, 'syncRequireGenMatching' : syncRequireGenMatching, 'useNonNominal' : self.use_nonnominal, 'apply_hlt_filter' : self.hlt_filter, } self.createCfg_analyze(self.jobOptions_analyze[key_analyze_job], sample_info, lepton_selection) # initialize input and output file names for hadd_stage1 key_hadd_stage1_dir = getKey(process_name, lepton_selection_and_frWeight) hadd_stage1_job_tuple = (process_name, lepton_selection_and_frWeight) key_hadd_stage1_job = getKey(*hadd_stage1_job_tuple) if not key_hadd_stage1_job in self.inputFiles_hadd_stage1: self.inputFiles_hadd_stage1[key_hadd_stage1_job] = [] self.inputFiles_hadd_stage1[key_hadd_stage1_job].append(self.jobOptions_analyze[key_analyze_job]['histogramFile']) self.outputFile_hadd_stage1[key_hadd_stage1_job] = os.path.join(self.dirs[key_hadd_stage1_dir][DKEY_HIST], "hadd_stage1_%s_%s.root" % hadd_stage1_job_tuple) if self.do_sync: continue if is_mc: logging.info("Creating configuration files to run 'addBackgrounds' for sample %s" % process_name) sample_categories = [ sample_category ] if is_signal: sample_categories = [ "signal", "ttH", "ttH_htt", "ttH_hww", "ttH_hzz", "ttH_hmm", "ttH_hzg" ] for sample_category in sample_categories: # sum non-fake and fake contributions for each MC sample separately genMatch_categories = [ "nonfake", "conversions", "fake" ] for genMatch_category in genMatch_categories: key_hadd_stage1_job = getKey(process_name, lepton_selection_and_frWeight) key_addBackgrounds_dir = getKey(process_name, lepton_selection_and_frWeight, "addBackgrounds") addBackgrounds_job_tuple = None processes_input = None process_output = None if genMatch_category == "nonfake": # sum non-fake contributions for each MC sample separately # input processes: TT3l0g0j,... # output processes: TT; ... if sample_category in [ "signal" ]: lepton_genMatches = [] lepton_genMatches.extend(self.lepton_genMatches_nonfakes) lepton_genMatches.extend(self.lepton_genMatches_conversions) lepton_genMatches.extend(self.lepton_genMatches_fakes) processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in lepton_genMatches ] elif sample_category in [ "ttH" ]: lepton_genMatches = [] lepton_genMatches.extend(self.lepton_genMatches_nonfakes) lepton_genMatches.extend(self.lepton_genMatches_conversions) processes_input = [] processes_input.extend([ "%s%s" % ("ttH_htt", genMatch) for genMatch in lepton_genMatches ]) processes_input.extend([ "%s%s" % ("ttH_hww", genMatch) for genMatch in lepton_genMatches ]) processes_input.extend([ "%s%s" % ("ttH_hzz", genMatch) for genMatch in lepton_genMatches ]) processes_input.extend([ "%s%s" % ("ttH_hzg", genMatch) for genMatch in lepton_genMatches ]) processes_input.extend([ "%s%s" % ("ttH_hmm", genMatch) for genMatch in lepton_genMatches ]) else: processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_genMatches_nonfakes ] process_output = sample_category addBackgrounds_job_tuple = (process_name, sample_category, lepton_selection_and_frWeight) elif genMatch_category == "conversions": # sum fake contributions for each MC sample separately # input processes: TT2l1g0j, TT1l2g0j, TT0l3g0j; ... # output processes: TT_conversion; ... if sample_category in [ "signal" ]: processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_genMatches_conversions ] elif sample_category in [ "ttH" ]: processes_input = [] processes_input.extend([ "%s%s" % ("ttH_htt", genMatch) for genMatch in self.lepton_genMatches_conversions ]) processes_input.extend([ "%s%s" % ("ttH_hww", genMatch) for genMatch in self.lepton_genMatches_conversions ]) processes_input.extend([ "%s%s" % ("ttH_hzz", genMatch) for genMatch in self.lepton_genMatches_conversions ]) processes_input.extend([ "%s%s" % ("ttH_hzg", genMatch) for genMatch in self.lepton_genMatches_conversions ]) processes_input.extend([ "%s%s" % ("ttH_hmm", genMatch) for genMatch in self.lepton_genMatches_conversions ]) else: processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_genMatches_conversions ] process_output = "%s_conversion" % sample_category addBackgrounds_job_tuple = (process_name, "%s_conversion" % sample_category, lepton_selection_and_frWeight) elif genMatch_category == "fake": # sum fake contributions for each MC sample separately # input processes: TT2l0g1j, TT1l1g1j, TT1l0g2j, TT0l2g1j, TT0l1g2j, TT0l0g3j; ... # output processes: TT_fake; ... if sample_category in [ "signal" ]: processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_genMatches_fakes ] elif sample_category in [ "ttH" ]: processes_input = [] processes_input.extend([ "%s%s" % ("ttH_htt", genMatch) for genMatch in self.lepton_genMatches_fakes ]) processes_input.extend([ "%s%s" % ("ttH_hww", genMatch) for genMatch in self.lepton_genMatches_fakes ]) processes_input.extend([ "%s%s" % ("ttH_hzz", genMatch) for genMatch in self.lepton_genMatches_fakes ]) processes_input.extend([ "%s%s" % ("ttH_hzg", genMatch) for genMatch in self.lepton_genMatches_fakes ]) processes_input.extend([ "%s%s" % ("ttH_hmm", genMatch) for genMatch in self.lepton_genMatches_fakes ]) else: processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_genMatches_fakes ] process_output = "%s_fake" % sample_category addBackgrounds_job_tuple = (process_name, "%s_fake" % sample_category, lepton_selection_and_frWeight) if processes_input: logging.info(" ...for genMatch option = '%s'" % genMatch_category) key_addBackgrounds_job = getKey(*addBackgrounds_job_tuple) cfgFile_modified = os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_tuple) outputFile = os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_tuple) self.jobOptions_addBackgrounds[key_addBackgrounds_job] = { 'inputFile' : self.outputFile_hadd_stage1[key_hadd_stage1_job], 'cfgFile_modified' : cfgFile_modified, 'outputFile' : outputFile, 'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], os.path.basename(cfgFile_modified).replace("_cfg.py", ".log")), 'categories' : [ getHistogramDir(lepton_selection, lepton_frWeight) ], 'processes_input' : processes_input, 'process_output' : process_output } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds[key_addBackgrounds_job]) # initialize input and output file names for hadd_stage1_5 key_hadd_stage1_5_dir = getKey("hadd", lepton_selection_and_frWeight) key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight) if not key_hadd_stage1_5_job in self.inputFiles_hadd_stage1_5: self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job] = [] self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job].append(self.jobOptions_addBackgrounds[key_addBackgrounds_job]['outputFile']) self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job] = os.path.join(self.dirs[key_hadd_stage1_5_dir][DKEY_HIST], "hadd_stage1_5_%s.root" % lepton_selection_and_frWeight) # add output files of hadd_stage1 for data to list of input files for hadd_stage1_5 if not is_mc: key_hadd_stage1_job = getKey(process_name, lepton_selection_and_frWeight) key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight) if not key_hadd_stage1_5_job in self.inputFiles_hadd_stage1_5: self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job] = [] self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job].append(self.outputFile_hadd_stage1[key_hadd_stage1_job]) if self.do_sync: continue # sum fake background contributions for the total of all MC sample # input processes: TT2l0g1j, TT1l1g1j, TT1l0g2j, TT0l3j, TT0l3j, TT0l3j, TT0l3j; ... # output process: fakes_mc key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight) key_addBackgrounds_dir = getKey("addBackgrounds") addBackgrounds_job_fakes_tuple = ("fakes_mc", lepton_selection_and_frWeight) key_addBackgrounds_job_fakes = getKey(*addBackgrounds_job_fakes_tuple) sample_categories = [] sample_categories.extend(self.nonfake_backgrounds) sample_categories.extend([ "signal" ]) processes_input = [] for sample_category in sample_categories: processes_input.append("%s_fake" % sample_category) self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_cfg.py" % addBackgrounds_job_fakes_tuple), 'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s.root" % addBackgrounds_job_fakes_tuple), 'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s.log" % addBackgrounds_job_fakes_tuple), 'categories' : [ getHistogramDir(lepton_selection, lepton_frWeight) ], 'processes_input' : processes_input, 'process_output' : "fakes_mc" } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]) # sum conversion background contributions for the total of all MC sample # input processes: TT2l0g1j, TT1l1g1j, TT1l0g2j, TT0l3j, TT0l3j, TT0l3j, TT0l3j; ... # output process: conversions addBackgrounds_job_conversions_tuple = ("conversions", lepton_selection_and_frWeight) key_addBackgrounds_job_conversions = getKey(*addBackgrounds_job_conversions_tuple) sample_categories = [] sample_categories.extend(self.nonfake_backgrounds) sample_categories.extend([ "signal" ]) processes_input = [] for sample_category in sample_categories: processes_input.append("%s_conversion" % sample_category) self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_conversions] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_cfg.py" % addBackgrounds_job_conversions_tuple), 'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s.root" % addBackgrounds_job_conversions_tuple), 'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s.log" % addBackgrounds_job_conversions_tuple), 'categories' : [ getHistogramDir(lepton_selection, lepton_frWeight) ], 'processes_input' : processes_input, 'process_output' : "conversions" } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_conversions]) # initialize input and output file names for hadd_stage2 key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight) key_hadd_stage2_dir = getKey("hadd", lepton_selection_and_frWeight) key_hadd_stage2_job = getKey(lepton_selection_and_frWeight) if not key_hadd_stage2_job in self.inputFiles_hadd_stage2: self.inputFiles_hadd_stage2[key_hadd_stage2_job] = [] if lepton_selection == "Tight": self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile']) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_conversions]['outputFile']) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job]) self.outputFile_hadd_stage2[key_hadd_stage2_job] = os.path.join(self.dirs[key_hadd_stage2_dir][DKEY_HIST], "hadd_stage2_%s.root" % lepton_selection_and_frWeight) if self.do_sync: if self.is_sbatch: logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) self.createScript_sbatch_syncNtuple(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_syncNtuple(lines_makefile) outputFile_sync_path = os.path.join(self.outputDir, DKEY_SYNC, '%s.root' % self.channel) self.outputFile_sync['sync'] = outputFile_sync_path self.targets.append(outputFile_sync_path) self.addToMakefile_hadd_sync(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done.") return self.num_jobs logging.info("Creating configuration files to run 'addBackgroundFakes'") key_hadd_stage1_5_job = getKey(get_lepton_selection_and_frWeight("Fakeable", "enabled")) key_addFakes_job = getKey("fakes_data") category_sideband = "ttZctrl_Fakeable_wFakeRateWeights" self.jobOptions_addFakes[key_addFakes_job] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[DKEY_CFGS], "addBackgroundLeptonFakes_cfg.py"), 'outputFile' : os.path.join(self.dirs[DKEY_HIST], "addBackgroundLeptonFakes.root"), 'logFile' : os.path.join(self.dirs[DKEY_LOGS], "addBackgroundLeptonFakes.log"), 'category_signal' : "ttZctrl_Tight", 'category_sideband' : category_sideband } self.createCfg_addFakes(self.jobOptions_addFakes[key_addFakes_job]) key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled")) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addFakes[key_addFakes_job]['outputFile']) logging.info("Creating configuration files to run 'prepareDatacards'") for histogramToFit in self.histograms_to_fit: key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled")) key_prep_dcard_dir = getKey("prepareDatacards") prep_dcard_job_tuple = (self.channel, histogramToFit) key_prep_dcard_job = getKey(histogramToFit) self.jobOptions_prep_dcard[key_prep_dcard_job] = { 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_cfg.py" % prep_dcard_job_tuple), 'datacardFile' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s.root" % prep_dcard_job_tuple), 'histogramDir' : self.histogramDir_prep_dcard, 'histogramToFit' : histogramToFit, 'label' : None } self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job]) # add shape templates for the following systematic uncertainties: # - 'CMS_ttHl_Clos_norm_e' # - 'CMS_ttHl_Clos_shape_e' # - 'CMS_ttHl_Clos_norm_m' # - 'CMS_ttHl_Clos_shape_m' key_prep_dcard_job = getKey(histogramToFit) key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled")) key_add_syst_fakerate_dir = getKey("addSystFakeRates") add_syst_fakerate_job_tuple = (self.channel, histogramToFit) key_add_syst_fakerate_job = getKey(histogramToFit) self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job] = { 'inputFile' : self.jobOptions_prep_dcard[key_prep_dcard_job]['datacardFile'], 'cfgFile_modified' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_CFGS], "addSystFakeRates_%s_%s_cfg.py" % add_syst_fakerate_job_tuple), 'outputFile' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_DCRD], "addSystFakeRates_%s_%s.root" % add_syst_fakerate_job_tuple), 'category' : self.channel, 'histogramToFit' : histogramToFit, 'plots_outputFileName' : os.path.join(self.dirs[DKEY_PLOT], "addSystFakeRates.png") } histogramDir_nominal = self.histogramDir_prep_dcard for lepton_type in [ 'e', 'm' ]: lepton_mcClosure = "Fakeable_mcClosure_%s" % lepton_type if lepton_mcClosure not in self.lepton_selections: continue lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_mcClosure, "enabled") key_addBackgrounds_job_fakes = getKey("fakes_mc", lepton_selection_and_frWeight) histogramDir_mcClosure = self.mcClosure_dir[lepton_mcClosure] self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job].update({ 'add_Clos_%s' % lepton_type : ("Fakeable_mcClosure_%s" % lepton_type) in self.lepton_selections, 'inputFile_nominal_%s' % lepton_type : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'histogramName_nominal_%s' % lepton_type : "%s/sel/evt/fakes_mc/%s" % (histogramDir_nominal, histogramToFit), 'inputFile_mcClosure_%s' % lepton_type : self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile'], 'histogramName_mcClosure_%s' % lepton_type : "%s/sel/evt/fakes_mc/%s" % (histogramDir_mcClosure, histogramToFit) }) self.createCfg_add_syst_fakerate(self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job]) logging.info("Creating configuration files to run 'makePlots'") key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled")) key_makePlots_dir = getKey("makePlots") key_makePlots_job = getKey('') self.jobOptions_make_plots[key_makePlots_job] = { 'executable' : self.executable_make_plots, 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_cfg.py" % self.channel), 'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s.png" % self.channel), 'histogramDir' : self.histogramDir_prep_dcard, 'label' : "t#bar{t}Z control region", 'make_plots_backgrounds' : self.make_plots_backgrounds } self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job]) if self.is_sbatch: logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addBackgrounds) self.sbatchFile_addBackgrounds = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_%s.py" % self.channel) self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds, self.jobOptions_addBackgrounds) self.sbatchFile_addBackgrounds_sum = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_sum_%s.py" % self.channel) self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds_sum, self.jobOptions_addBackgrounds_sum) logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addFakes) self.sbatchFile_addFakes = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addFakes_%s.py" % self.channel) self.createScript_sbatch(self.executable_addFakes, self.sbatchFile_addFakes, self.jobOptions_addFakes) logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) self.addToMakefile_backgrounds_from_data(lines_makefile) self.addToMakefile_hadd_stage2(lines_makefile) self.addToMakefile_prep_dcard(lines_makefile) self.addToMakefile_add_syst_fakerate(lines_makefile) self.addToMakefile_make_plots(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done.") return self.num_jobs
def create(self): """Creates all necessary config files and runs the Ntuple production -- either locally or on the batch system """ for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) else: create_if_not_exists(self.dirs[key]) self.inputFileIds = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] is_mc = (sample_info["type"] == "mc") logging.info( "Creating configuration files to run '%s' for sample %s" % (self.executable_prodNtuple, process_name)) inputFileList = generateInputFileList(sample_name, sample_info, self.max_files_per_job, self.debug) key_dir = getKey(sample_name) subDirs = list( map( lambda y: os.path.join(self.dirs[key_dir][DKEY_NTUPLES], '%04d' % y), set(map(lambda x: x // 1000, inputFileList.keys())))) for subDir in subDirs: create_if_not_exists(subDir) for jobId in inputFileList.keys(): key_file = getKey(sample_name, jobId) self.inputFiles[key_file] = inputFileList[jobId] if len(self.inputFiles[key_file]) == 0: print "Warning: ntupleFiles['%s'] = %s --> skipping job !!" % ( key_file, self.inputFiles[key_file]) continue self.cfgFiles_prodNtuple_modified[key_file] = os.path.join(self.dirs[key_dir][DKEY_CFGS], "produceNtuple_%s_%i_cfg.py" % \ (process_name, jobId)) self.outputFiles[key_file] = os.path.join( self.dirs[key_dir][DKEY_NTUPLES], "%04d" % (jobId // 1000), "tree_%i.root" % jobId) self.logFiles_prodNtuple[key_file] = os.path.join(self.dirs[key_dir][DKEY_LOGS], "produceNtuple_%s_%i.log" % \ (process_name, jobId)) jobOptions = { 'inputFiles': self.inputFiles[key_file], 'cfgFile_modified': self.cfgFiles_prodNtuple_modified[key_file], 'outputFile': self.outputFiles[key_file], ##'use_HIP_mitigation_mediumMuonId' : sample_info["use_HIP_mitigation_mediumMuonId"], 'use_HIP_mitigation_mediumMuonId': True, 'is_mc': is_mc, 'random_seed': jobId } self.createCfg_prodNtuple(jobOptions) if self.is_sbatch: logging.info( "Creating script for submitting '%s' jobs to batch system" % self.executable_prodNtuple) self.createScript_sbatch() logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_prodNtuple(lines_makefile) #self.addToMakefile_clean(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done")
def create(self): """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system """ for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] sample_category = sample_info["sample_category"] is_mc = (sample_info["type"] == "mc") logging.info("Building dictionaries for sample %s..." % process_name) for charge_selection in self.charge_selections: central_or_shift_extensions = ["", "hadd", "addBackgrounds"] central_or_shifts_extended = central_or_shift_extensions + self.central_or_shifts for central_or_shift_or_dummy in central_or_shifts_extended: process_name_extended = [process_name, "hadd"] for process_name_or_dummy in process_name_extended: if central_or_shift_or_dummy in [ "hadd" ] and process_name_or_dummy in ["hadd"]: continue if central_or_shift_or_dummy != "central" and central_or_shift_or_dummy not in central_or_shift_extensions: if not is_mc: continue if not self.accept_central_or_shift( central_or_shift_or_dummy, sample_info): continue key_dir = getKey(process_name_or_dummy, charge_selection, central_or_shift_or_dummy) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES ]: initDict(self.dirs, [key_dir, dir_type]) if dir_type in [DKEY_CFGS, DKEY_LOGS]: self.dirs[key_dir][dir_type] = os.path.join( self.configDir, dir_type, self.channel, "_".join([charge_selection]), process_name_or_dummy, central_or_shift_or_dummy) else: self.dirs[key_dir][dir_type] = os.path.join( self.outputDir, dir_type, self.channel, "_".join([charge_selection]), process_name_or_dummy) for subdirectory in ["comp_jetToTauFakeRate", "makePlots"]: key_dir = getKey(subdirectory) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]: initDict(self.dirs, [key_dir, dir_type]) if dir_type in [DKEY_CFGS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT]: self.dirs[key_dir][dir_type] = os.path.join( self.configDir, dir_type, self.channel, subdirectory) else: self.dirs[key_dir][dir_type] = os.path.join( self.outputDir, dir_type, self.channel, subdirectory) for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]: initDict(self.dirs, [dir_type]) if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]: self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel) else: self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel) numDirectories = 0 for key in self.dirs.keys(): if type(self.dirs[key]) == dict: numDirectories += len(self.dirs[key]) else: numDirectories += 1 logging.info("Creating directory structure (numDirectories = %i)" % numDirectories) numDirectories_created = 0 frac = 1 for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) numDirectories_created += len(self.dirs[key]) else: create_if_not_exists(self.dirs[key]) numDirectories_created = numDirectories_created + 1 while 100 * numDirectories_created >= frac * numDirectories: logging.info(" %i%% completed" % frac) frac = frac + 1 logging.info("Done.") inputFileLists = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue logging.info("Checking input files for sample %s" % sample_info["process_name_specific"]) inputFileLists[sample_name] = generateInputFileList( sample_info, self.max_files_per_job) self.inputFileIds = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] inputFileList = inputFileLists[sample_name] logging.info( "Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name)) is_mc = (sample_info["type"] == "mc") sample_category = sample_info["sample_category"] for charge_selection in self.charge_selections: for central_or_shift in self.central_or_shifts: if central_or_shift != "central" and not is_mc: continue if not self.accept_central_or_shift( central_or_shift, sample_info): continue # build config files for executing analysis code key_analyze_dir = getKey(process_name, charge_selection, central_or_shift) for jobId in inputFileList.keys(): analyze_job_tuple = (process_name, charge_selection, central_or_shift, jobId) key_analyze_job = getKey(*analyze_job_tuple) ntupleFiles = inputFileList[jobId] if len(ntupleFiles) == 0: logging.warning( "No input ntuples for %s --> skipping job !!" % (key_analyze_job)) continue cfgFile_modified_path = os.path.join( self.dirs[key_analyze_dir][DKEY_CFGS], "analyze_%s_%s_%s_%i_cfg.py" % analyze_job_tuple) logFile_path = os.path.join( self.dirs[key_analyze_dir][DKEY_LOGS], "analyze_%s_%s_%s_%i.log" % analyze_job_tuple) histogramFile_path = os.path.join( self.dirs[key_analyze_dir][DKEY_HIST], "analyze_%s_%s_%s_%i.root" % analyze_job_tuple) rleOutputFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_RLES], "rle_%s_%s_%s_%i.txt" % analyze_job_tuple) \ if self.select_rle_output else "" self.jobOptions_analyze[key_analyze_job] = { 'ntupleFiles': ntupleFiles, 'cfgFile_modified': cfgFile_modified_path, 'histogramFile': histogramFile_path, 'logFile': logFile_path, 'chargeSelection': charge_selection, 'jet_minPt': self.jet_minPt, 'jet_maxPt': self.jet_maxPt, 'jet_minAbsEta': self.jet_minAbsEta, 'jet_maxAbsEta': self.jet_maxAbsEta, 'hadTau_selection_tight': self.hadTau_selection_tight, 'hadTauSelection_denominator': self.hadTau_selection_denominator, 'hadTauSelections_numerator': self.hadTau_selections_numerator, 'trigMatchingOptions': self.trigMatchingOptions, 'selEventsFileName_output': rleOutputFile_path, 'absEtaBins': self.absEtaBins, 'decayModes': self.decayModes, 'central_or_shift': central_or_shift, 'central_or_shifts_local': [], 'apply_hlt_filter': self.hlt_filter, } self.createCfg_analyze( self.jobOptions_analyze[key_analyze_job], sample_info) # initialize input and output file names for hadd_stage1 key_hadd_stage1_dir = getKey(process_name, charge_selection) hadd_stage1_job_tuple = (process_name, charge_selection) key_hadd_stage1_job = getKey(*hadd_stage1_job_tuple) if not key_hadd_stage1_job in self.inputFiles_hadd_stage1: self.inputFiles_hadd_stage1[ key_hadd_stage1_job] = [] self.inputFiles_hadd_stage1[ key_hadd_stage1_job].append( self.jobOptions_analyze[key_analyze_job] ['histogramFile']) self.outputFile_hadd_stage1[ key_hadd_stage1_job] = os.path.join( self.dirs[key_hadd_stage1_dir][DKEY_HIST], "hadd_stage1_%s_%s.root" % hadd_stage1_job_tuple) # initialize input and output file names for hadd_stage2 key_hadd_stage1_job = getKey(process_name, charge_selection) key_hadd_stage2_dir = getKey("hadd", charge_selection) key_hadd_stage2_job = getKey(charge_selection) if not key_hadd_stage2_job in self.inputFiles_hadd_stage2: self.inputFiles_hadd_stage2[key_hadd_stage2_job] = [] self.inputFiles_hadd_stage2[key_hadd_stage2_job].append( self.outputFile_hadd_stage1[key_hadd_stage1_job]) self.outputFile_hadd_stage2[ key_hadd_stage2_job] = os.path.join( self.dirs[key_hadd_stage2_dir][DKEY_HIST], "hadd_stage2_%s.root" % charge_selection) logging.info( "Creating configuration files for executing 'comp_jetToTauFakeRate'" ) for charge_selection in self.charge_selections: charge_key = "comp_%s" % charge_selection self.comp_input_files[charge_key] = [] for trigMatchingOption in self.trigMatchingOptions: key_hadd_stage2_job = getKey(charge_selection) key_comp_jetToTauFakeRate_dir = getKey("comp_jetToTauFakeRate") key_comp_jetToTauFakeRate_job = getKey(charge_selection, trigMatchingOption) self.jobOptions_comp_jetToTauFakeRate[ key_comp_jetToTauFakeRate_job] = { 'inputFile': self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified': os.path.join( self.dirs[DKEY_CFGS], "comp_jetToTauFakeRate_%s_%s_cfg.py" % (charge_selection, trigMatchingOption)), 'outputFile': os.path.join( self.dirs[DKEY_HIST], "comp_jetToTauFakeRate_%s_%s.root" % (charge_selection, trigMatchingOption)), 'logFile': os.path.join( self.dirs[DKEY_LOGS], "comp_jetToTauFakeRate_%s_%s.log" % (charge_selection, trigMatchingOption)), 'looseRegion': "jetToTauFakeRate_%s_%s/denominator/" % (charge_selection, trigMatchingOption), 'tightRegion': "jetToTauFakeRate_%s_%s/numerator/" % (charge_selection, trigMatchingOption), 'absEtaBins': self.absEtaBins, 'ptBins': self.ptBins, 'decayModes': self.decayModes, 'hadTauSelections': self.hadTau_selections_numerator, 'trigMatchingOption': trigMatchingOption, 'plots_outputFileName': os.path.join( self.dirs[key_comp_jetToTauFakeRate_dir] [DKEY_PLOT], "comp_jetToTauFakeRate_%s.png" % trigMatchingOption) } self.createCfg_comp_jetToTauFakeRate( self.jobOptions_comp_jetToTauFakeRate[ key_comp_jetToTauFakeRate_job]) comp_output = self.jobOptions_comp_jetToTauFakeRate[ key_comp_jetToTauFakeRate_job]['outputFile'] self.targets.append(comp_output) self.comp_input_files[charge_key].append(comp_output) self.comp_output_files[charge_key] = os.path.join( self.dirs[DKEY_HIST], "comp_jetToTauFakeRate_%s.root" % charge_selection) logging.info("Creating configuration files to run 'makePlots'") for charge_selection in self.charge_selections: key_hadd_stage2_job = getKey(charge_selection) key_makePlots_dir = getKey("makePlots") key_makePlots_job = getKey(charge_selection) self.jobOptions_make_plots[key_makePlots_job] = { 'executable': self.executable_make_plots, 'inputFile': self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified': os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_cfg.py" % self.channel), 'outputFile': os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s.png" % self.channel), 'histogramDir': "jetToTauFakeRate_%s" % charge_selection, 'label': None, 'make_plots_backgrounds': self.make_plots_backgrounds } self.createCfg_makePlots( self.jobOptions_make_plots[key_makePlots_job]) for trigMatchingOption in self.trigMatchingOptions: self.cfgFile_make_plots = self.cfgFile_make_plots_denominator for absEtaBin in ["absEtaLt1_5", "absEta1_5to9_9"]: key_hadd_stage2_job = getKey(charge_selection) key_makePlots_job = getKey(charge_selection, trigMatchingOption, absEtaBin, "denominator") self.jobOptions_make_plots[key_makePlots_job] = { 'executable' : self.executable_make_plots, 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join( self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_%s_%s_denominator_%s_cfg.py" % \ (self.channel, charge_selection, trigMatchingOption, absEtaBin)), 'outputFile' : os.path.join( self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s_%s_%s_denominator_%s.png" % (self.channel, charge_selection, trigMatchingOption, absEtaBin)), 'histogramDir' : "jetToTauFakeRate_%s_%s/denominator/%s" % (charge_selection, trigMatchingOption, absEtaBin), 'label' : None, 'make_plots_backgrounds' : self.make_plots_backgrounds } self.createCfg_makePlots( self.jobOptions_make_plots[key_makePlots_job]) for hadTau_selection_numerator in self.hadTau_selections_numerator: key_hadd_stage2_job = getKey(charge_selection) key_makePlots_job = getKey(charge_selection, trigMatchingOption, absEtaBin, "numerator", hadTau_selection_numerator) self.jobOptions_make_plots[key_makePlots_job] = { 'executable' : self.executable_make_plots, 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join( self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_%s_%s_numerator_%s_%s_cfg.py" % \ (self.channel, charge_selection, trigMatchingOption, hadTau_selection_numerator, absEtaBin)), 'outputFile' : os.path.join( self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s_%s_%s_numerator_%s_%s.png" % \ (self.channel, charge_selection, trigMatchingOption, hadTau_selection_numerator, absEtaBin)), 'histogramDir' : "jetToTauFakeRate_%s_%s/numerator/%s/%s" % (charge_selection, trigMatchingOption, hadTau_selection_numerator, absEtaBin), 'label' : None, 'make_plots_backgrounds' : self.make_plots_backgrounds } self.createCfg_makePlots( self.jobOptions_make_plots[key_makePlots_job]) self.sbatchFile_analyze = os.path.join( self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) self.sbatchFile_comp_jetToTauFakeRate = os.path.join( self.dirs[DKEY_SCRIPTS], "sbatch_comp_jetToTauFakeRate.py") if self.is_sbatch: logging.info( "Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) logging.info( "Creating script for submitting '%s' jobs to batch system" % self.executable_comp_jetToTauFakeRate) self.createScript_sbatch(self.executable_comp_jetToTauFakeRate, self.sbatchFile_comp_jetToTauFakeRate, self.jobOptions_comp_jetToTauFakeRate) lines_makefile = [] self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) self.addToMakefile_hadd_stage2(lines_makefile, make_dependency="phony_hadd_stage1", max_mem='4096M') self.addToMakefile_comp_jetToTauFakeRate(lines_makefile) self.addToMakefile_comp_hadd(lines_makefile) self.addToMakefile_make_plots(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done.") return self.num_jobs
def create(self): """Creates all necessary config files and runs the PU profile production -- either locally or on the batch system """ for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) else: create_if_not_exists(self.dirs[key]) self.inputFileIds = {} for sample_name, sample_info in self.samples.items(): if not sample_info['use_it']: continue process_name = sample_info["process_name_specific"] is_mc = (sample_info["type"] == "mc") if not is_mc: continue logging.info( "Creating configuration files to run '%s' for sample %s" % (self.executable, process_name)) inputFileList_map = generateInputFileList(sample_info, 1) key_dir = getKey(process_name) key_file = getKey(process_name) self.inputFiles[key_file] = list( itertools.chain(*inputFileList_map.values())) if len(self.inputFiles[key_file]) == 0: logging.warning("'%s' = %s --> skipping job !!" % (key_file, self.inputFiles[key_file])) continue outputFile = os.path.join(self.dirs[key_dir][DKEY_RESULTS], "%s.txt" % process_name) self.outputFiles[key_file] = outputFile if os.path.isfile(outputFile): logging.info('File {} already exists --> skipping job'.format( outputFile)) continue self.cfgFiles[key_file] = os.path.join( self.dirs[key_dir][DKEY_CFGS], "refGenWeight_%s_cfg.txt" % (process_name)) self.logFiles[key_file] = os.path.join( self.dirs[key_dir][DKEY_LOGS], "refGenWeight_%s.log" % (process_name)) self.scriptFiles[key_file] = os.path.join( self.dirs[key_dir][DKEY_CFGS], "refGenWeight_%s_cfg.sh" % (process_name)) self.plotFiles[key_file] = ' '.join([ os.path.join(self.dirs[key_dir][DKEY_PLOTS], "refGenWeight_%s.%s" % (process_name, extension)) for extension in ['pdf', 'png'] ]) self.jobOptions_sbatch[key_file] = { 'inputFiles': self.inputFiles[key_file], 'cfgFile_path': self.cfgFiles[key_file], 'cmdParams': "-i {} -o {} -p {} -v".format( self.cfgFiles[key_file], self.outputFiles[key_file], self.plotFiles[key_file], ), 'outputFile': self.outputFiles[key_file], 'logFile': self.logFiles[key_file], 'scriptFile': self.scriptFiles[key_file], } self.createCfg(self.jobOptions_sbatch[key_file]) if self.is_sbatch: logging.info( "Creating script for submitting '%s' jobs to batch system" % self.executable) self.num_jobs['refGenWeight'] += self.createScript_sbatch( self.executable, self.sbatchFile, self.jobOptions_sbatch) logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile(lines_makefile) self.addToMakefile_final(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done") return self.num_jobs
def create(self): """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system """ for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] sample_category = sample_info["sample_category"] is_mc = (sample_info["type"] == "mc") logging.info("Building dictionaries for sample %s..." % process_name) for lepton_selection in self.lepton_selections: for lepton_frWeight in self.lepton_frWeights: if lepton_frWeight == "enabled" and not lepton_selection.startswith("Fakeable"): continue if lepton_frWeight == "disabled" and not lepton_selection in ["Tight"]: continue lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_selection, lepton_frWeight) for lepton_charge_selection in self.lepton_charge_selections: if 'mcClosure' in lepton_selection and lepton_charge_selection != 'SS': # Run MC closure only for the region that complements the SR continue central_or_shift_extensions = ["", "hadd", "addBackgrounds"] central_or_shift_dedicated = self.central_or_shifts if self.runTHweights(sample_info) else self.central_or_shifts_external central_or_shifts_extended = central_or_shift_extensions + central_or_shift_dedicated for central_or_shift_or_dummy in central_or_shifts_extended: process_name_extended = [ process_name, "hadd" ] for process_name_or_dummy in process_name_extended: if central_or_shift_or_dummy in [ "hadd", "addBackgrounds" ] and process_name_or_dummy in [ "hadd" ]: continue if central_or_shift_or_dummy not in central_or_shift_extensions and not self.accept_systematics( central_or_shift_or_dummy, is_mc, lepton_selection, lepton_charge_selection, sample_info ): continue key_dir = getKey(process_name_or_dummy, lepton_selection_and_frWeight, lepton_charge_selection, central_or_shift_or_dummy) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES, DKEY_SYNC ]: if dir_type == DKEY_SYNC and not self.do_sync: continue initDict(self.dirs, [ key_dir, dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_LOGS ]: self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, "_".join([ lepton_selection_and_frWeight, lepton_charge_selection ]), process_name_or_dummy, central_or_shift_or_dummy) else: self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, "_".join([ lepton_selection_and_frWeight, lepton_charge_selection ]), process_name_or_dummy) for subdirectory in [ "addBackgrounds", "addBackgroundLeptonFakes", "addBackgroundLeptonFlips", "prepareDatacards", "addSystFakeRates", "makePlots" ]: key_dir = getKey(subdirectory) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]: initDict(self.dirs, [ key_dir, dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]: self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, subdirectory) else: self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, subdirectory) for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT, DKEY_SYNC ]: if dir_type == DKEY_SYNC and not self.do_sync: continue initDict(self.dirs, [ dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]: self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel) else: self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel) numDirectories = 0 for key in self.dirs.keys(): if type(self.dirs[key]) == dict: numDirectories += len(self.dirs[key]) else: numDirectories += 1 logging.info("Creating directory structure (numDirectories = %i)" % numDirectories) numDirectories_created = 0; frac = 1 for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) numDirectories_created += len(self.dirs[key]) else: create_if_not_exists(self.dirs[key]) numDirectories_created = numDirectories_created + 1 while 100*numDirectories_created >= frac*numDirectories: logging.info(" %i%% completed" % frac) frac = frac + 1 logging.info("Done.") inputFileLists = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue logging.info("Checking input files for sample %s" % sample_info["process_name_specific"]) inputFileLists[sample_name] = generateInputFileList(sample_info, self.max_files_per_job) mcClosure_regex = re.compile('Fakeable_mcClosure_(?P<type>m|e)_wFakeRateWeights') for lepton_selection in self.lepton_selections: electron_selection = lepton_selection muon_selection = lepton_selection hadTauVeto_selection = "Tight" hadTauVeto_selection = "|".join([ hadTauVeto_selection, self.hadTauVeto_selection_part2 ]) if lepton_selection == "Fakeable_mcClosure_e": electron_selection = "Fakeable" muon_selection = "Tight" elif lepton_selection == "Fakeable_mcClosure_m": electron_selection = "Tight" muon_selection = "Fakeable" for lepton_frWeight in self.lepton_frWeights: if lepton_frWeight == "enabled" and not lepton_selection.startswith("Fakeable"): continue if lepton_frWeight == "disabled" and not lepton_selection in [ "Tight" ]: continue lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_selection, lepton_frWeight) for lepton_charge_selection in self.lepton_charge_selections: if 'mcClosure' in lepton_selection and lepton_charge_selection != 'SS': # Run MC closure only for the region that complements the SR continue for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name)) inputFileList = inputFileLists[sample_name] sample_category = sample_info["sample_category"] is_mc = (sample_info["type"] == "mc") use_th_weights = self.runTHweights(sample_info) central_or_shift_dedicated = self.central_or_shifts if use_th_weights else self.central_or_shifts_external for central_or_shift in central_or_shift_dedicated: if not self.accept_systematics( central_or_shift, is_mc, lepton_selection, lepton_charge_selection, sample_info ): continue central_or_shifts_local = [] if central_or_shift == "central" and not use_th_weights: for central_or_shift_local in self.central_or_shifts_internal: if self.accept_systematics( central_or_shift_local, is_mc, lepton_selection, lepton_charge_selection, sample_info ): central_or_shifts_local.append(central_or_shift_local) logging.info(" ... for '%s' and systematic uncertainty option '%s'" % (lepton_selection_and_frWeight, central_or_shift)) # build config files for executing analysis code key_analyze_dir = getKey(process_name, lepton_selection_and_frWeight, lepton_charge_selection, central_or_shift) for jobId in inputFileList.keys(): analyze_job_tuple = (process_name, lepton_selection_and_frWeight, lepton_charge_selection, central_or_shift, jobId) key_analyze_job = getKey(*analyze_job_tuple) ntupleFiles = inputFileList[jobId] if len(ntupleFiles) == 0: logging.warning("No input ntuples for %s --> skipping job !!" % (key_analyze_job)) continue syncOutput = '' syncTree = '' syncGenMatch = self.lepton_genMatches_nonfakes if self.do_sync: mcClosure_match = mcClosure_regex.match(lepton_selection_and_frWeight) if lepton_selection_and_frWeight == 'Tight': if lepton_charge_selection == 'SS': syncOutput = os.path.join(self.dirs[key_analyze_dir][DKEY_SYNC], '%s_%s_SR.root' % (self.channel, central_or_shift)) syncTree = 'syncTree_%s_SR' % self.channel elif lepton_charge_selection == 'OS': syncOutput = os.path.join(self.dirs[key_analyze_dir][DKEY_SYNC], '%s_%s_Flip.root' % (self.channel, central_or_shift)) syncTree = 'syncTree_%s_Flip' % self.channel else: continue elif lepton_selection_and_frWeight == 'Fakeable_wFakeRateWeights' and lepton_charge_selection == 'SS': syncOutput = os.path.join(self.dirs[key_analyze_dir][DKEY_SYNC], '%s_%s_Fake.root' % (self.channel, central_or_shift)) syncTree = 'syncTree_%s_Fake' % self.channel elif mcClosure_match and lepton_charge_selection == 'SS': mcClosure_type = mcClosure_match.group('type') syncOutput = os.path.join(self.dirs[key_analyze_dir][DKEY_SYNC], '%s_%s_mcClosure_%s.root' % (self.channel, central_or_shift, mcClosure_type)) syncTree = 'syncTree_%s_mcClosure_%s' % (self.channel, mcClosure_type) else: continue if syncTree and central_or_shift != "central": syncTree = os.path.join(central_or_shift, syncTree) syncRLE = '' if self.do_sync and self.rle_select: syncRLE = self.rle_select % syncTree if not os.path.isfile(syncRLE): logging.warning("Input RLE file for the sync is missing: %s; skipping the job" % syncRLE) continue if syncOutput: self.inputFiles_sync['sync'].append(syncOutput) cfgFile_modified_path = os.path.join(self.dirs[key_analyze_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" % analyze_job_tuple) logFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" % analyze_job_tuple) rleOutputFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_RLES], "rle_%s_%s_%s_%s_%i.txt" % analyze_job_tuple) \ if self.select_rle_output else "" histogramFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_HIST], "analyze_%s_%s_%s_%s_%i.root" % analyze_job_tuple) self.jobOptions_analyze[key_analyze_job] = { 'ntupleFiles' : ntupleFiles, 'cfgFile_modified' : cfgFile_modified_path, 'histogramFile' : histogramFile_path, 'logFile' : logFile_path, 'selEventsFileName_output' : rleOutputFile_path, 'electronSelection' : electron_selection, 'muonSelection' : muon_selection, 'apply_leptonGenMatching' : self.apply_leptonGenMatching, 'leptonChargeSelection' : lepton_charge_selection, 'hadTauSelection_veto' : hadTauVeto_selection, 'applyFakeRateWeights' : self.applyFakeRateWeights if not lepton_selection == "Tight" else "disabled", 'central_or_shift' : central_or_shift, 'central_or_shifts_local' : central_or_shifts_local, 'syncOutput' : syncOutput, 'syncTree' : syncTree, 'syncRLE' : syncRLE, 'useNonNominal' : self.use_nonnominal, 'apply_hlt_filter' : self.hlt_filter, 'syncGenMatch' : syncGenMatch, } self.createCfg_analyze(self.jobOptions_analyze[key_analyze_job], sample_info, lepton_selection) # initialize input and output file names for hadd_stage1 key_hadd_stage1_dir = getKey(process_name, lepton_selection_and_frWeight, lepton_charge_selection) hadd_stage1_job_tuple = (process_name, lepton_selection_and_frWeight, lepton_charge_selection) key_hadd_stage1_job = getKey(*hadd_stage1_job_tuple) if not key_hadd_stage1_job in self.inputFiles_hadd_stage1: self.inputFiles_hadd_stage1[key_hadd_stage1_job] = [] self.inputFiles_hadd_stage1[key_hadd_stage1_job].append(self.jobOptions_analyze[key_analyze_job]['histogramFile']) self.outputFile_hadd_stage1[key_hadd_stage1_job] = os.path.join(self.dirs[key_hadd_stage1_dir][DKEY_HIST], "hadd_stage1_%s_%s_%s.root" % hadd_stage1_job_tuple) if self.do_sync: continue if is_mc: logging.info("Creating configuration files to run 'addBackgrounds' for sample %s" % process_name) sample_categories = [ sample_category ] for sample_category in sample_categories: # sum non-fake and fake contributions for each MC sample separately genMatch_categories = [ "nonfake", "Convs", "fake", "flip" ] for genMatch_category in genMatch_categories: key_hadd_stage1_job = getKey(process_name, lepton_selection_and_frWeight, lepton_charge_selection) key_addBackgrounds_dir = getKey(process_name, lepton_selection_and_frWeight, lepton_charge_selection, "addBackgrounds") addBackgrounds_job_tuple = None processes_input = None process_output = None if genMatch_category == "nonfake": # sum non-fake contributions for each MC sample separately # input processes: TT2l0g0j; ... # output processes: TT; ... if sample_category in self.ttHProcs: lepton_genMatches = [] lepton_genMatches.extend(self.lepton_genMatches_nonfakes) lepton_genMatches.extend(self.lepton_genMatches_Convs) processes_input = [] processes_input.extend([ "%s%s" % ("ttH_htt", genMatch) for genMatch in lepton_genMatches ]) processes_input.extend([ "%s%s" % ("ttH_hww", genMatch) for genMatch in lepton_genMatches ]) processes_input.extend([ "%s%s" % ("ttH_hzz", genMatch) for genMatch in lepton_genMatches ]) processes_input.extend([ "%s%s" % ("ttH_hzg", genMatch) for genMatch in lepton_genMatches ]) processes_input.extend([ "%s%s" % ("ttH_hmm", genMatch) for genMatch in lepton_genMatches ]) else: processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_genMatches_nonfakes ] process_output = sample_category addBackgrounds_job_tuple = (process_name, sample_category, lepton_selection_and_frWeight, lepton_charge_selection) elif genMatch_category == "Convs": # sum conversion background contributions for each MC sample separately # input processes: TT1l1g0j, TT0l2g0j; ... # output processes: TT_Convs; ... if sample_category in self.ttHProcs: processes_input = [] processes_input.extend([ "%s%s" % ("ttH_htt", genMatch) for genMatch in self.lepton_genMatches_Convs ]) processes_input.extend([ "%s%s" % ("ttH_hww", genMatch) for genMatch in self.lepton_genMatches_Convs ]) processes_input.extend([ "%s%s" % ("ttH_hzz", genMatch) for genMatch in self.lepton_genMatches_Convs ]) processes_input.extend([ "%s%s" % ("ttH_hzg", genMatch) for genMatch in self.lepton_genMatches_Convs ]) processes_input.extend([ "%s%s" % ("ttH_hmm", genMatch) for genMatch in self.lepton_genMatches_Convs ]) else: processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_genMatches_Convs ] process_output = "%s_Convs" % sample_category addBackgrounds_job_tuple = (process_name, "%s_Convs" % sample_category, lepton_selection_and_frWeight, lepton_charge_selection) elif genMatch_category == "fake": # sum fake contributions for each MC sample separately # input processes: TT1l0g1j, TT0l1g1j, TT0l0g2j; ... # output processes: TT_fake; ... if sample_category in self.ttHProcs: processes_input = [] processes_input.extend([ "%s%s" % ("ttH_htt", genMatch) for genMatch in self.lepton_genMatches_fakes ]) processes_input.extend([ "%s%s" % ("ttH_hww", genMatch) for genMatch in self.lepton_genMatches_fakes ]) processes_input.extend([ "%s%s" % ("ttH_hzz", genMatch) for genMatch in self.lepton_genMatches_fakes ]) processes_input.extend([ "%s%s" % ("ttH_hzg", genMatch) for genMatch in self.lepton_genMatches_fakes ]) processes_input.extend([ "%s%s" % ("ttH_hmm", genMatch) for genMatch in self.lepton_genMatches_fakes ]) else: processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_genMatches_fakes ] process_output = "%s_fake" % sample_category addBackgrounds_job_tuple = (process_name, "%s_fake" % sample_category, lepton_selection_and_frWeight, lepton_charge_selection) elif genMatch_category == "flip": # sum flip contributions for each MC sample separately # input processes: TT2l2f0g0j&2t0e0m0j, TT2l1f0g0j&2t0e0m0j; ... # output processes: TT_flip; ... if sample_category in self.ttHProcs: processes_input = [] processes_input.extend([ "%s%s" % ("ttH_htt", genMatch) for genMatch in self.lepton_genMatches_flips ]) processes_input.extend([ "%s%s" % ("ttH_hww", genMatch) for genMatch in self.lepton_genMatches_flips ]) processes_input.extend([ "%s%s" % ("ttH_hzz", genMatch) for genMatch in self.lepton_genMatches_flips ]) processes_input.extend([ "%s%s" % ("ttH_hzg", genMatch) for genMatch in self.lepton_genMatches_flips ]) processes_input.extend([ "%s%s" % ("ttH_hmm", genMatch) for genMatch in self.lepton_genMatches_flips ]) else: processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_genMatches_flips ] process_output = "%s_flip" % sample_category addBackgrounds_job_tuple = (process_name, "%s_flip" % sample_category, lepton_selection_and_frWeight, lepton_charge_selection) if processes_input: logging.info(" ...for genMatch option = '%s'" % genMatch_category) key_addBackgrounds_job = getKey(*addBackgrounds_job_tuple) cfgFile_modified = os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_%s_cfg.py" % addBackgrounds_job_tuple) outputFile = os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s_%s.root" % addBackgrounds_job_tuple) self.jobOptions_addBackgrounds[key_addBackgrounds_job] = { 'inputFile' : self.outputFile_hadd_stage1[key_hadd_stage1_job], 'cfgFile_modified' : cfgFile_modified, 'outputFile' : outputFile, 'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], os.path.basename(cfgFile_modified).replace("_cfg.py", ".log")), 'categories' : [ getHistogramDir(lepton_selection, lepton_frWeight, lepton_charge_selection) ], 'processes_input' : processes_input, 'process_output' : process_output } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds[key_addBackgrounds_job]) # initialize input and output file names for hadd_stage1_5 key_hadd_stage1_5_dir = getKey("hadd", lepton_selection_and_frWeight, lepton_charge_selection) hadd_stage1_5_job_tuple = (lepton_selection_and_frWeight, lepton_charge_selection) key_hadd_stage1_5_job = getKey(*hadd_stage1_5_job_tuple) if not key_hadd_stage1_5_job in self.inputFiles_hadd_stage1_5: self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job] = [] self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job].append(self.jobOptions_addBackgrounds[key_addBackgrounds_job]['outputFile']) self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job] = os.path.join(self.dirs[key_hadd_stage1_5_dir][DKEY_HIST], "hadd_stage1_5_%s_%s.root" % hadd_stage1_5_job_tuple) # add output files of hadd_stage1 for data to list of input files for hadd_stage1_5 if not is_mc: key_hadd_stage1_job = getKey(process_name, lepton_selection_and_frWeight, lepton_charge_selection) key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight, lepton_charge_selection) if not key_hadd_stage1_5_job in self.inputFiles_hadd_stage1_5: self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job] = [] self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job].append(self.outputFile_hadd_stage1[key_hadd_stage1_job]) if self.do_sync: continue # sum fake background contributions for the total of all MC samples # input processes: TT1l0g1j, TT0l1g1j, TT0l0g2j; ... # output process: fakes_mc key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight, lepton_charge_selection) key_addBackgrounds_dir = getKey("addBackgrounds") addBackgrounds_job_fakes_tuple = ("fakes_mc", lepton_selection_and_frWeight, lepton_charge_selection) key_addBackgrounds_job_fakes = getKey(*addBackgrounds_job_fakes_tuple) sample_categories = [] sample_categories.extend(self.nonfake_backgrounds) sample_categories.extend(self.ttHProcs) processes_input = [] for sample_category in sample_categories: processes_input.append("%s_fake" % sample_category) self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_fakes_tuple), 'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_fakes_tuple), 'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s.log" % addBackgrounds_job_fakes_tuple), 'categories' : [ getHistogramDir(lepton_selection, lepton_frWeight, lepton_charge_selection) ], 'processes_input' : processes_input, 'process_output' : "fakes_mc" } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]) # sum flip background contributions for the total of all MC sample # input processes: TT2l1f0g0j,TT2l2f0g0j; ... # output process: flips_mc addBackgrounds_job_flips_tuple = ("flips_mc", lepton_selection_and_frWeight, lepton_charge_selection) key_addBackgrounds_job_flips = getKey(*addBackgrounds_job_flips_tuple) sample_categories = [] sample_categories.extend(self.nonfake_backgrounds) sample_categories.extend(self.ttHProcs) processes_input = [] for sample_category in sample_categories: processes_input.append("%s_flip" % sample_category) self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_flips] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_flips_tuple), 'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_flips_tuple), 'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s.log" % addBackgrounds_job_flips_tuple), 'categories' : [ getHistogramDir(lepton_selection, lepton_frWeight, lepton_charge_selection) ], 'processes_input' : processes_input, 'process_output' : "flips_mc" } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_flips]) # sum conversion background contributions for the total of all MC samples # input processes: TT1l1g0j, TT0l2g0j; ... # output process: Convs addBackgrounds_job_Convs_tuple = ("Convs", lepton_selection_and_frWeight, lepton_charge_selection) key_addBackgrounds_job_Convs = getKey(*addBackgrounds_job_Convs_tuple) sample_categories = [] sample_categories.extend(self.nonfake_backgrounds) sample_categories.extend(self.ttHProcs) processes_input = [] for sample_category in self.convs_backgrounds: processes_input.append("%s_Convs" % sample_category) self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_Convs_tuple), 'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_Convs_tuple), 'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s.log" % addBackgrounds_job_Convs_tuple), 'categories' : [ getHistogramDir(lepton_selection, lepton_frWeight, lepton_charge_selection) ], 'processes_input' : processes_input, 'process_output' : "Convs" } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs]) # initialize input and output file names for hadd_stage2 key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight, lepton_charge_selection) key_hadd_stage2_dir = getKey("hadd", lepton_selection_and_frWeight, lepton_charge_selection) hadd_stage2_job_tuple = (lepton_selection_and_frWeight, lepton_charge_selection) key_hadd_stage2_job = getKey(*hadd_stage2_job_tuple) if not key_hadd_stage2_job in self.inputFiles_hadd_stage2: self.inputFiles_hadd_stage2[key_hadd_stage2_job] = [] if lepton_selection == "Tight": self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile']) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_flips]['outputFile']) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs]['outputFile']) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job]) self.outputFile_hadd_stage2[key_hadd_stage2_job] = os.path.join(self.dirs[key_hadd_stage2_dir][DKEY_HIST], "hadd_stage2_%s_%s.root" % hadd_stage2_job_tuple) if self.do_sync: if self.is_sbatch: logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) self.createScript_sbatch_syncNtuple(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_syncNtuple(lines_makefile) outputFile_sync_path = os.path.join(self.outputDir, DKEY_SYNC, '%s.root' % self.channel) self.outputFile_sync['sync'] = outputFile_sync_path self.addToMakefile_hadd_sync(lines_makefile) self.addToMakefile_validate(lines_makefile) self.targets.extend(self.phoniesToAdd) self.createMakefile(lines_makefile) logging.info("Done.") return self.num_jobs logging.info("Creating configuration files to run 'addBackgroundFakes'") for lepton_charge_selection in self.lepton_charge_selections: key_hadd_stage1_5_job = getKey(get_lepton_selection_and_frWeight("Fakeable", "enabled"), lepton_charge_selection) key_addFakes_dir = getKey("addBackgroundLeptonFakes") addFakes_job_tuple = (lepton_charge_selection) key_addFakes_job = getKey("data_fakes", lepton_charge_selection) category_sideband = None if self.applyFakeRateWeights == "2lepton": category_sideband = "ttWctrl_%s_Fakeable_wFakeRateWeights" % lepton_charge_selection else: raise ValueError("Invalid Configuration parameter 'applyFakeRateWeights' = %s !!" % self.applyFakeRateWeights) self.jobOptions_addFakes[key_addFakes_job] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addFakes_dir][DKEY_CFGS], "addBackgroundLeptonFakes_%s_cfg.py" % addFakes_job_tuple), 'outputFile' : os.path.join(self.dirs[key_addFakes_dir][DKEY_HIST], "addBackgroundLeptonFakes_%s.root" % addFakes_job_tuple), 'logFile' : os.path.join(self.dirs[key_addFakes_dir][DKEY_LOGS], "addBackgroundLeptonFakes_%s.log" % addFakes_job_tuple), 'category_signal' : "ttWctrl_%s_Tight" % lepton_charge_selection, 'category_sideband' : category_sideband } self.createCfg_addFakes(self.jobOptions_addFakes[key_addFakes_job]) key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), lepton_charge_selection) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addFakes[key_addFakes_job]['outputFile']) #-------------------------------------------------------------------------- # CV: add histograms in OS and SS regions, # so that "data_fakes" background can be subtracted from OS control region used to estimate charge flip background key_hadd_stage1_5_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS") key_addFakes_job = getKey("data_fakes", "OS") key_hadd_stage1_6_dir = getKey("hadd", get_lepton_selection_and_frWeight("Tight", "disabled"), "OS") key_hadd_stage1_6_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS") if key_hadd_stage1_6_job not in self.inputFiles_hadd_stage1_6: self.inputFiles_hadd_stage1_6[key_hadd_stage1_6_job] = [] self.inputFiles_hadd_stage1_6[key_hadd_stage1_6_job].append(self.jobOptions_addFakes[key_addFakes_job]['outputFile']) self.inputFiles_hadd_stage1_6[key_hadd_stage1_6_job].append(self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job]) self.outputFile_hadd_stage1_6[key_hadd_stage1_6_job] = os.path.join(self.dirs[key_hadd_stage1_6_dir][DKEY_HIST], "hadd_stage1_6_Tight_OS.root") #-------------------------------------------------------------------------- logging.info("Creating configuration files to run 'addBackgroundFlips'") key_addFlips_dir = getKey("addBackgroundLeptonFlips") key_addFlips_job = getKey("data_flips") self.jobOptions_addFlips[key_addFlips_job] = { 'inputFile' : self.outputFile_hadd_stage1_6, 'cfgFile_modified' : os.path.join(self.dirs[key_addFlips_dir][DKEY_CFGS], "addBackgroundLeptonFlips_cfg.py"), 'outputFile' : os.path.join(self.dirs[key_addFlips_dir][DKEY_HIST], "addBackgroundLeptonFlips.root"), 'logFile' : os.path.join(self.dirs[key_addFlips_dir][DKEY_LOGS], "addBackgroundLeptonFlips.log"), 'category_signal' : "ttWctrl_SS_Tight", 'category_sideband' : "ttWctrl_OS_Tight" } self.createCfg_addFlips(self.jobOptions_addFlips[key_addFlips_job]) key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "SS") self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addFlips[key_addFlips_job]['outputFile']) logging.info("Creating configuration files to run 'prepareDatacards'") for histogramToFit in self.histograms_to_fit: key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "SS") key_prep_dcard_dir = getKey("prepareDatacards") prep_dcard_job_tuple = (self.channel, histogramToFit) key_prep_dcard_job = getKey(histogramToFit) self.jobOptions_prep_dcard[key_prep_dcard_job] = { 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_cfg.py" % prep_dcard_job_tuple), 'datacardFile' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s.root" % prep_dcard_job_tuple), 'histogramDir' : self.histogramDir_prep_dcard, 'histogramToFit' : histogramToFit, 'label' : None } self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job]) # add shape templates for the following systematic uncertainties: # - 'CMS_ttHl_Clos_norm_e' # - 'CMS_ttHl_Clos_shape_e' # - 'CMS_ttHl_Clos_norm_m' # - 'CMS_ttHl_Clos_shape_m' key_add_syst_fakerate_dir = getKey("addSystFakeRates") add_syst_fakerate_job_tuple = (self.channel, histogramToFit) key_add_syst_fakerate_job = getKey(histogramToFit) self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job] = { 'inputFile' : self.jobOptions_prep_dcard[key_prep_dcard_job]['datacardFile'], 'cfgFile_modified' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_CFGS], "addSystFakeRates_%s_%s_cfg.py" % add_syst_fakerate_job_tuple), 'outputFile' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_DCRD], "addSystFakeRates_%s_%s.root" % add_syst_fakerate_job_tuple), 'category' : self.channel, 'histogramToFit' : histogramToFit, 'plots_outputFileName' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_PLOT], "addSystFakeRates.png") } histogramDir_nominal = self.histogramDir_prep_dcard for lepton_type in [ 'e', 'm' ]: lepton_mcClosure = "Fakeable_mcClosure_%s" % lepton_type if lepton_mcClosure not in self.lepton_selections: continue lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_mcClosure, "enabled") key_addBackgrounds_job_fakes = getKey("fakes_mc", lepton_selection_and_frWeight, 'SS') histogramDir_mcClosure = self.mcClosure_dir['%s_%s' % (lepton_mcClosure, 'SS')] self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job].update({ 'add_Clos_%s' % lepton_type : ("Fakeable_mcClosure_%s" % lepton_type) in self.lepton_selections, 'inputFile_nominal_%s' % lepton_type : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'histogramName_nominal_%s' % lepton_type : "%s/sel/evt/fakes_mc/%s" % (histogramDir_nominal, histogramToFit), 'inputFile_mcClosure_%s' % lepton_type : self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile'], 'histogramName_mcClosure_%s' % lepton_type : "%s/sel/evt/fakes_mc/%s" % (histogramDir_mcClosure, histogramToFit) }) self.createCfg_add_syst_fakerate(self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job]) logging.info("Creating configuration files to run 'makePlots'") key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "SS") key_makePlots_dir = getKey("makePlots") key_makePlots_job = getKey("SS") self.jobOptions_make_plots[key_makePlots_job] = { 'executable' : self.executable_make_plots, 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_cfg.py" % self.channel), 'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s.png" % self.channel), 'histogramDir' : self.histogramDir_prep_dcard, 'label' : "t#bar{t}W control region", 'make_plots_backgrounds' : self.make_plots_backgrounds } self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job]) self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) self.sbatchFile_addBackgrounds = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_%s.py" % self.channel) self.sbatchFile_addBackgrounds_sum = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_sum_%s.py" % self.channel) self.sbatchFile_addFakes = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addFakes_%s.py" % self.channel) self.sbatchFile_addFlips = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addFlips_%s.py" % self.channel) if self.is_sbatch: logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addBackgrounds) self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds, self.jobOptions_addBackgrounds) self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds_sum, self.jobOptions_addBackgrounds_sum) logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addFakes) self.createScript_sbatch(self.executable_addFakes, self.sbatchFile_addFakes, self.jobOptions_addFakes) logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addFlips) self.createScript_sbatch(self.executable_addFlips, self.sbatchFile_addFlips, self.jobOptions_addFlips) logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) self.addToMakefile_backgrounds_from_data_withFlips(lines_makefile) self.addToMakefile_hadd_stage2(lines_makefile) self.addToMakefile_prep_dcard(lines_makefile) self.addToMakefile_add_syst_fakerate(lines_makefile) self.addToMakefile_make_plots(lines_makefile) self.addToMakefile_validate(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done.") return self.num_jobs
def create(self): """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system """ for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] sample_category = sample_info["sample_category"] is_mc = (sample_info["type"] == "mc") logging.info("Building dictionaries for sample %s..." % process_name) for lepton_selection in self.lepton_selections: for lepton_frWeight in self.lepton_frWeights: if lepton_frWeight == "enabled" and not lepton_selection.startswith("Fakeable"): continue if lepton_frWeight == "disabled" and not lepton_selection in [ "Tight", "forBDTtraining" ]: continue lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_selection, lepton_frWeight) for chargeSumSelection in self.chargeSumSelections: central_or_shift_extensions = ["", "hadd", "addBackgrounds"] central_or_shift_dedicated = self.central_or_shifts if self.runTHweights(sample_info) else self.central_or_shifts_external central_or_shifts_extended = central_or_shift_extensions + central_or_shift_dedicated for central_or_shift_or_dummy in central_or_shifts_extended: process_name_extended = [ process_name, "hadd" ] for process_name_or_dummy in process_name_extended: if central_or_shift_or_dummy in [ "hadd", "addBackgrounds" ] and process_name_or_dummy in [ "hadd" ]: continue if central_or_shift_or_dummy not in central_or_shift_extensions and not self.accept_systematics( central_or_shift_or_dummy, is_mc, lepton_selection, chargeSumSelection, sample_info ): continue key_dir = getKey(process_name_or_dummy, lepton_selection_and_frWeight, chargeSumSelection, central_or_shift_or_dummy) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES, DKEY_SYNC ]: if dir_type == DKEY_SYNC and not self.do_sync: continue initDict(self.dirs, [ key_dir, dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_LOGS ]: self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, "_".join([ lepton_selection_and_frWeight, chargeSumSelection ]), process_name_or_dummy, central_or_shift_or_dummy) else: self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, "_".join([ lepton_selection_and_frWeight, chargeSumSelection ]), process_name_or_dummy) for subdirectory in [ "addBackgrounds", "addBackgroundLeptonFakes", "prepareDatacards", "addSystFakeRates", "makePlots" ]: key_dir = getKey(subdirectory) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]: initDict(self.dirs, [ key_dir, dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]: self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, subdirectory) else: self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, subdirectory) for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT, DKEY_SYNC ]: if dir_type == DKEY_SYNC and not self.do_sync: continue initDict(self.dirs, [ dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]: self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel) else: self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel) numDirectories = 0 for key in self.dirs.keys(): if type(self.dirs[key]) == dict: numDirectories += len(self.dirs[key]) else: numDirectories += 1 logging.info("Creating directory structure (numDirectories = %i)" % numDirectories) numDirectories_created = 0; frac = 1 for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) numDirectories_created += len(self.dirs[key]) else: create_if_not_exists(self.dirs[key]) numDirectories_created = numDirectories_created + 1 while 100*numDirectories_created >= frac*numDirectories: logging.info(" %i%% completed" % frac) frac = frac + 1 logging.info("Done.") inputFileLists = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue logging.info("Checking input files for sample %s" % sample_info["process_name_specific"]) inputFileLists[sample_name] = generateInputFileList(sample_info, self.max_files_per_job) mcClosure_regex = re.compile('Fakeable_mcClosure_(?P<type>m|e)_wFakeRateWeights') for lepton_selection in self.lepton_selections: electron_selection = lepton_selection muon_selection = lepton_selection hadTauVeto_selection = "Tight" hadTauVeto_selection = "|".join([ hadTauVeto_selection, self.hadTauVeto_selection_part2 ]) if lepton_selection == "forBDTtraining": electron_selection = "Loose" muon_selection = "Loose" elif lepton_selection == "Fakeable_mcClosure_e": electron_selection = "Fakeable" muon_selection = "Tight" elif lepton_selection == "Fakeable_mcClosure_m": electron_selection = "Tight" muon_selection = "Fakeable" for lepton_frWeight in self.lepton_frWeights: if lepton_frWeight == "enabled" and not lepton_selection.startswith("Fakeable"): continue if lepton_frWeight == "disabled" and not lepton_selection in [ "Tight", "forBDTtraining" ]: continue lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_selection, lepton_frWeight) for chargeSumSelection in self.chargeSumSelections: for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name)) inputFileList = inputFileLists[sample_name] sample_category = sample_info["sample_category"] is_mc = (sample_info["type"] == "mc") use_th_weights = self.runTHweights(sample_info) central_or_shift_dedicated = self.central_or_shifts if use_th_weights else self.central_or_shifts_external for central_or_shift in central_or_shift_dedicated: if not self.accept_systematics( central_or_shift, is_mc, lepton_selection, chargeSumSelection, sample_info ): continue central_or_shifts_local = [] if central_or_shift == "central" and not use_th_weights: for central_or_shift_local in self.central_or_shifts_internal: if self.accept_systematics( central_or_shift_local, is_mc, lepton_selection, chargeSumSelection, sample_info ): central_or_shifts_local.append(central_or_shift_local) logging.info(" ... for '%s' and systematic uncertainty option '%s'" % (lepton_selection_and_frWeight, central_or_shift)) # build config files for executing analysis code key_analyze_dir = getKey(process_name, lepton_selection_and_frWeight, chargeSumSelection, central_or_shift) for jobId in inputFileList.keys(): analyze_job_tuple = (process_name, lepton_selection_and_frWeight, chargeSumSelection, central_or_shift, jobId) key_analyze_job = getKey(*analyze_job_tuple) ntupleFiles = inputFileList[jobId] if len(ntupleFiles) == 0: logging.warning("No input ntuples for %s --> skipping job !!" % (key_analyze_job)) continue syncOutput = '' syncTree = '' if self.do_sync: if chargeSumSelection != 'OS': continue mcClosure_match = mcClosure_regex.match(lepton_selection_and_frWeight) if lepton_selection_and_frWeight == 'Tight': syncOutput = os.path.join(self.dirs[key_analyze_dir][DKEY_SYNC], '%s_%s_SR.root' % (self.channel, central_or_shift)) syncTree = 'syncTree_%s_SR' % self.channel.replace('_', '') elif lepton_selection_and_frWeight == 'Fakeable_wFakeRateWeights': syncOutput = os.path.join(self.dirs[key_analyze_dir][DKEY_SYNC], '%s_%s_Fake.root' % (self.channel, central_or_shift)) syncTree = 'syncTree_%s_Fake' % self.channel.replace('_', '') elif mcClosure_match: mcClosure_type = mcClosure_match.group('type') syncOutput = os.path.join(self.dirs[key_analyze_dir][DKEY_SYNC], '%s_%s_mcClosure_%s.root' % (self.channel, central_or_shift, mcClosure_type)) syncTree = 'syncTree_%s_mcClosure_%s' % (self.channel.replace('_', ''), mcClosure_type) else: continue if syncTree and central_or_shift != "central": syncTree = os.path.join(central_or_shift, syncTree) syncRLE = '' if self.do_sync and self.rle_select: syncRLE = self.rle_select % syncTree if not os.path.isfile(syncRLE): logging.warning("Input RLE file for the sync is missing: %s; skipping the job" % syncRLE) continue if syncOutput: self.inputFiles_sync['sync'].append(syncOutput) cfgFile_modified_path = os.path.join(self.dirs[key_analyze_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" % analyze_job_tuple) logFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" % analyze_job_tuple) rleOutputFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_RLES], "rle_%s_%s_%s_%s_%i.txt" % analyze_job_tuple) \ if self.select_rle_output else "" histogramFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_HIST], "analyze_%s_%s_%s_%s_%i.root" % analyze_job_tuple) branchName_memOutput = '%s_%s' % (self.MEMbranch, self.get_addMEM_systematics(central_or_shift)) \ if self.MEMbranch else '' self.jobOptions_analyze[key_analyze_job] = { 'ntupleFiles' : ntupleFiles, 'cfgFile_modified' : cfgFile_modified_path, 'histogramFile' : histogramFile_path, 'logFile' : logFile_path, 'selEventsFileName_output' : rleOutputFile_path, 'electronSelection' : electron_selection, 'muonSelection' : muon_selection, 'apply_leptonGenMatching' : self.apply_leptonGenMatching, 'hadTauSelection' : hadTauVeto_selection, 'chargeSumSelection' : chargeSumSelection, 'applyFakeRateWeights' : self.applyFakeRateWeights if not lepton_selection == "Tight" else "disabled", 'central_or_shift' : central_or_shift, 'central_or_shifts_local' : central_or_shifts_local, 'selectBDT' : self.isBDTtraining, 'branchName_memOutput' : branchName_memOutput, 'syncOutput' : syncOutput, 'syncTree' : syncTree, 'syncRLE' : syncRLE, 'apply_hlt_filter' : self.hlt_filter, 'useNonNominal' : self.use_nonnominal, 'fillGenEvtHistograms' : True, 'isControlRegion' : self.isControlRegion, } self.createCfg_analyze(self.jobOptions_analyze[key_analyze_job], sample_info, lepton_selection) # initialize input and output file names for hadd_stage1 key_hadd_stage1_dir = getKey(process_name, lepton_selection_and_frWeight, chargeSumSelection) hadd_stage1_job_tuple = (process_name, lepton_selection_and_frWeight, chargeSumSelection) key_hadd_stage1_job = getKey(*hadd_stage1_job_tuple) if not key_hadd_stage1_job in self.inputFiles_hadd_stage1: self.inputFiles_hadd_stage1[key_hadd_stage1_job] = [] self.inputFiles_hadd_stage1[key_hadd_stage1_job].append(self.jobOptions_analyze[key_analyze_job]['histogramFile']) self.outputFile_hadd_stage1[key_hadd_stage1_job] = os.path.join(self.dirs[key_hadd_stage1_dir][DKEY_HIST], "hadd_stage1_%s_%s_%s.root" % hadd_stage1_job_tuple) if self.isBDTtraining or self.do_sync: continue # add output files of hadd_stage1 for data to list of input files for hadd_stage1_5 key_hadd_stage1_job = getKey(process_name, lepton_selection_and_frWeight, chargeSumSelection) key_hadd_stage1_5_dir = getKey("hadd", lepton_selection_and_frWeight, chargeSumSelection) hadd_stage1_5_job_tuple = (lepton_selection_and_frWeight, chargeSumSelection) key_hadd_stage1_5_job = getKey(*hadd_stage1_5_job_tuple) if not key_hadd_stage1_5_job in self.inputFiles_hadd_stage1_5: self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job] = [] self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job].append(self.outputFile_hadd_stage1[key_hadd_stage1_job]) self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job] = os.path.join(self.dirs[key_hadd_stage1_5_dir][DKEY_HIST], "hadd_stage1_5_%s_%s.root" % hadd_stage1_5_job_tuple) if self.isBDTtraining or self.do_sync: continue ## doing list of processes to make the hadd in _Convs and _fake ## we could remove the tH ones with althernative couplings sample_categories = [] sample_categories.extend(self.nonfake_backgrounds) sample_categories.extend(self.ttHProcs) processes_input_base = self.get_processes_input_base(sample_categories) # sum fake background contributions for the total of all MC sample # input processes: TT_fake, TTW_fake, TTWW_fake, ... # output process: fakes_mc key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight, chargeSumSelection) key_addBackgrounds_dir = getKey("addBackgrounds") addBackgrounds_job_fakes_tuple = ("fakes_mc", lepton_selection_and_frWeight, chargeSumSelection) key_addBackgrounds_job_fakes = getKey(*addBackgrounds_job_fakes_tuple) processes_input = [] for process_input_base in processes_input_base: if "HH" in process_input_base: continue processes_input.append("%s_fake" % process_input_base) self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_fakes_tuple), 'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_fakes_tuple), 'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s.log" % addBackgrounds_job_fakes_tuple), 'categories' : [ getHistogramDir(self.channel, lepton_selection, lepton_frWeight, chargeSumSelection) ], 'processes_input' : processes_input, 'process_output' : "fakes_mc" } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]) # sum conversion background contributions for the total of all MC sample # input processes: TT_Convs, TTW_Convs, TTWW_Convs, ... # output process: Convs addBackgrounds_job_Convs_tuple = ("Convs", lepton_selection_and_frWeight, chargeSumSelection) key_addBackgrounds_job_Convs = getKey(*addBackgrounds_job_Convs_tuple) processes_input = [] for process_input_base in self.convs_backgrounds: if "HH" in process_input_base: continue processes_input.append("%s_Convs" % process_input_base) self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_Convs_tuple), 'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_Convs_tuple), 'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s.log" % addBackgrounds_job_Convs_tuple), 'categories' : [ getHistogramDir(self.channel, lepton_selection, lepton_frWeight, chargeSumSelection) ], 'processes_input' : processes_input, 'process_output' : "Convs" } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs]) # initialize input and output file names for hadd_stage2 key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight, chargeSumSelection) key_hadd_stage2_dir = getKey("hadd", lepton_selection_and_frWeight, chargeSumSelection) hadd_stage2_job_tuple = (lepton_selection_and_frWeight, chargeSumSelection) key_hadd_stage2_job = getKey(*hadd_stage2_job_tuple) if not key_hadd_stage2_job in self.inputFiles_hadd_stage2: self.inputFiles_hadd_stage2[key_hadd_stage2_job] = [] if lepton_selection == "Tight": self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile']) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs]['outputFile']) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job]) self.outputFile_hadd_stage2[key_hadd_stage2_job] = os.path.join(self.dirs[key_hadd_stage2_dir][DKEY_HIST], "hadd_stage2_%s_%s.root" % hadd_stage2_job_tuple) if self.isBDTtraining or self.do_sync: if self.is_sbatch: logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) if self.isBDTtraining: self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) elif self.do_sync: self.createScript_sbatch_syncNtuple(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) logging.info("Creating Makefile") lines_makefile = [] if self.isBDTtraining: self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) elif self.do_sync: self.addToMakefile_syncNtuple(lines_makefile) outputFile_sync_path = os.path.join(self.outputDir, DKEY_SYNC, '%s.root' % self.channel) self.outputFile_sync['sync'] = outputFile_sync_path self.addToMakefile_hadd_sync(lines_makefile) else: raise ValueError("Internal logic error") self.addToMakefile_validate(lines_makefile) self.targets.extend(self.phoniesToAdd) self.createMakefile(lines_makefile) logging.info("Done.") return self.num_jobs logging.info("Creating configuration files to run 'addBackgroundFakes'") for chargeSumSelection in self.chargeSumSelections: key_hadd_stage1_5_job = getKey(get_lepton_selection_and_frWeight("Fakeable", "enabled"), chargeSumSelection) key_addFakes_dir = getKey("addBackgroundLeptonFakes") key_addFakes_job = getKey("data_fakes", chargeSumSelection) category_sideband = "{}_{}_Fakeable_wFakeRateWeights".format(self.channel, chargeSumSelection) self.jobOptions_addFakes[key_addFakes_job] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addFakes_dir][DKEY_CFGS], "addBackgroundLeptonFakes_%s_cfg.py" % chargeSumSelection), 'outputFile' : os.path.join(self.dirs[key_addFakes_dir][DKEY_HIST], "addBackgroundLeptonFakes_%s.root" % chargeSumSelection), 'logFile' : os.path.join(self.dirs[key_addFakes_dir][DKEY_LOGS], "addBackgroundLeptonFakes_%s.log" % chargeSumSelection), 'category_signal' : "{}_{}_Tight".format(self.channel, chargeSumSelection), 'category_sideband' : category_sideband } self.createCfg_addFakes(self.jobOptions_addFakes[key_addFakes_job]) key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), chargeSumSelection) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addFakes[key_addFakes_job]['outputFile']) logging.info("Creating configuration files to run 'prepareDatacards'") for histogramToFit in self.histograms_to_fit: key_prep_dcard_dir = getKey("prepareDatacards") if "OS" in self.chargeSumSelections: key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS") prep_dcard_job_tuple = (self.channel, "OS", histogramToFit) key_prep_dcard_job = getKey("OS", histogramToFit) self.jobOptions_prep_dcard[key_prep_dcard_job] = { 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_%s_cfg.py" % prep_dcard_job_tuple), 'datacardFile' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s_%s.root" % prep_dcard_job_tuple), 'histogramDir' : self.histogramDir_prep_dcard, 'histogramToFit' : histogramToFit, 'label' : None } self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job]) if "SS" in self.chargeSumSelections: key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "SS") prep_dcard_job_tuple = (self.channel, "SS", histogramToFit) key_prep_dcard_job = getKey("SS", histogramToFit) self.jobOptions_prep_dcard[key_prep_dcard_job] = { 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_%s_cfg.py" % prep_dcard_job_tuple), 'datacardFile' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s_%s.root" % prep_dcard_job_tuple), 'histogramDir' : self.histogramDir_prep_dcard_SS, 'histogramToFit' : histogramToFit, 'label' : 'SS' } self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job]) # add shape templates for the following systematic uncertainties: # - 'CMS_ttHl_Clos_norm_e' # - 'CMS_ttHl_Clos_shape_e' # - 'CMS_ttHl_Clos_norm_m' # - 'CMS_ttHl_Clos_shape_m' for chargeSumSelection in self.chargeSumSelections: key_prep_dcard_job = getKey(chargeSumSelection, histogramToFit) key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), chargeSumSelection) key_add_syst_fakerate_dir = getKey("addSystFakeRates") add_syst_fakerate_job_tuple = (self.channel, chargeSumSelection, histogramToFit) key_add_syst_fakerate_job = getKey(chargeSumSelection, histogramToFit) self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job] = { 'inputFile' : self.jobOptions_prep_dcard[key_prep_dcard_job]['datacardFile'], 'cfgFile_modified' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_CFGS], "addSystFakeRates_%s_%s_%s_cfg.py" % add_syst_fakerate_job_tuple), 'outputFile' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_DCRD], "addSystFakeRates_%s_%s_%s.root" % add_syst_fakerate_job_tuple), 'category' : self.channel, 'histogramToFit' : histogramToFit, 'plots_outputFileName' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_PLOT], "addSystFakeRates.png") } histogramDir_nominal = None if chargeSumSelection == "OS": histogramDir_nominal = self.histogramDir_prep_dcard elif chargeSumSelection == "SS": histogramDir_nominal = self.histogramDir_prep_dcard_SS else: raise ValueError("Invalid parameter 'chargeSumSelection' = %s !!" % chargeSumSelection) for lepton_type in [ 'e', 'm' ]: lepton_mcClosure = "Fakeable_mcClosure_%s" % lepton_type if lepton_mcClosure not in self.lepton_selections: continue lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_mcClosure, "enabled") key_addBackgrounds_job_fakes = getKey("fakes_mc", lepton_selection_and_frWeight, chargeSumSelection) histogramDir_mcClosure = self.mcClosure_dir['%s_%s' % (lepton_mcClosure, chargeSumSelection)] self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job].update({ 'add_Clos_%s' % lepton_type : ("Fakeable_mcClosure_%s" % lepton_type) in self.lepton_selections, 'inputFile_nominal_%s' % lepton_type : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'histogramName_nominal_%s' % lepton_type : "%s/sel/evt/fakes_mc/%s" % (histogramDir_nominal, histogramToFit), 'inputFile_mcClosure_%s' % lepton_type : self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile'], 'histogramName_mcClosure_%s' % lepton_type : "%s/sel/evt/fakes_mc/%s" % (histogramDir_mcClosure, histogramToFit) }) self.createCfg_add_syst_fakerate(self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job]) logging.info("Creating configuration files to run 'makePlots'") key_makePlots_dir = getKey("makePlots") if "OS" in self.chargeSumSelections: key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS") key_makePlots_job = getKey("OS") self.jobOptions_make_plots[key_makePlots_job] = { 'executable' : self.executable_make_plots, 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_cfg.py" % self.channel), 'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s.png" % self.channel), 'histogramDir' : self.histogramDir_prep_dcard, 'label' : self.channel, 'make_plots_backgrounds' : self.make_plots_backgrounds } self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job]) if "SS" in self.chargeSumSelections: key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "SS") key_makePlots_job = getKey("SS") self.jobOptions_make_plots[key_makePlots_job] = { 'executable' : self.executable_make_plots, 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_SS_cfg.py" % self.channel), 'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s_SS.png" % self.channel), 'histogramDir' : self.histogramDir_prep_dcard_SS, 'label' : "{} SS".format(self.channel), 'make_plots_backgrounds' : self.make_plots_backgrounds } self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job]) if "Fakeable_mcClosure" in self.lepton_selections: #TODO key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS") key_makePlots_job = getKey("OS") self.jobOptions_make_plots[key_makePlots_job] = { 'executable' : self.executable_make_plots_mcClosure, 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_mcClosure_%s_cfg.py" % self.channel), 'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_mcClosure_%s.png" % self.channel) } self.createCfg_makePlots_mcClosure(self.jobOptions_make_plots[key_makePlots_job]) self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) self.sbatchFile_addBackgrounds = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_%s.py" % self.channel) self.sbatchFile_addBackgrounds_sum = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_sum_%s.py" % self.channel) self.sbatchFile_addFakes = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addFakes_%s.py" % self.channel) if self.is_sbatch: logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addBackgrounds) self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds, self.jobOptions_addBackgrounds) self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds_sum, self.jobOptions_addBackgrounds_sum) logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addFakes) self.createScript_sbatch(self.executable_addFakes, self.sbatchFile_addFakes, self.jobOptions_addFakes) logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) self.addToMakefile_backgrounds_from_data(lines_makefile) self.addToMakefile_hadd_stage2(lines_makefile) self.addToMakefile_prep_dcard(lines_makefile) self.addToMakefile_add_syst_fakerate(lines_makefile) self.addToMakefile_make_plots(lines_makefile) self.addToMakefile_validate(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done.") return self.num_jobs
def create(self): """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system """ for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue sample_category = sample_info["sample_category"] is_mc = (sample_info["type"] == "mc") process_name = sample_info["process_name_specific"] logging.info("Building dictionaries for sample %s..." % process_name) for lepton_and_hadTau_selection in self.lepton_and_hadTau_selections: for lepton_and_hadTau_frWeight in self.lepton_and_hadTau_frWeights: if lepton_and_hadTau_frWeight == "enabled" and not lepton_and_hadTau_selection.startswith("Fakeable"): continue if lepton_and_hadTau_frWeight == "disabled" and not lepton_and_hadTau_selection in [ "Tight", "forBDTtraining" ]: continue lepton_and_hadTau_selection_and_frWeight = get_lepton_and_hadTau_selection_and_frWeight(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight) for chargeSumSelection in self.chargeSumSelections: central_or_shift_extensions = ["", "hadd", "addBackgrounds"] central_or_shift_dedicated = self.central_or_shifts if self.runTHweights(sample_info) else self.central_or_shifts_external central_or_shifts_extended = central_or_shift_extensions + central_or_shift_dedicated for central_or_shift_or_dummy in central_or_shifts_extended: process_name_extended = [ process_name, "hadd" ] for process_name_or_dummy in process_name_extended: if central_or_shift_or_dummy in [ "hadd", "addBackgrounds" ] and process_name_or_dummy in [ "hadd" ]: continue if central_or_shift_or_dummy not in central_or_shift_extensions and not self.accept_systematics( central_or_shift_or_dummy, is_mc, lepton_and_hadTau_selection, chargeSumSelection, sample_info ): continue key_dir = getKey(process_name_or_dummy, lepton_and_hadTau_selection_and_frWeight, chargeSumSelection, central_or_shift_or_dummy) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES ]: initDict(self.dirs, [ key_dir, dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_LOGS ]: self.dirs[key_dir][dir_type] = os.path.join(self.get_dir_type(dir_type), dir_type, self.channel, "_".join([ lepton_and_hadTau_selection_and_frWeight, chargeSumSelection ]), process_name_or_dummy, central_or_shift_or_dummy) else: self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, "_".join([ lepton_and_hadTau_selection_and_frWeight, chargeSumSelection ]), process_name_or_dummy) for subdirectory in [ "addBackgrounds", "addBackgroundLeptonFakes", "prepareDatacards", "addSystFakeRates", "makePlots" ]: key_dir = getKey(subdirectory) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]: initDict(self.dirs, [ key_dir, dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]: self.dirs[key_dir][dir_type] = os.path.join(self.get_dir_type(dir_type), dir_type, self.channel, subdirectory) else: self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, subdirectory) for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]: initDict(self.dirs, [ dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]: self.dirs[dir_type] = os.path.join(self.get_dir_type(dir_type), dir_type, self.channel) else: self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel) numDirectories = 0 for key in self.dirs.keys(): if type(self.dirs[key]) == dict: numDirectories += len(self.dirs[key]) else: numDirectories += 1 logging.info("Creating directory structure (numDirectories = %i)" % numDirectories) numDirectories_created = 0; frac = 1 for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) numDirectories_created += len(self.dirs[key]) else: create_if_not_exists(self.dirs[key]) numDirectories_created = numDirectories_created + 1 while 100*numDirectories_created >= frac*numDirectories: logging.info(" %i%% completed" % frac) frac = frac + 1 logging.info("Done.") inputFileLists = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue logging.info("Checking input files for sample %s" % sample_info["process_name_specific"]) inputFileLists[sample_name] = generateInputFileList(sample_info, self.max_files_per_job) mcClosure_regex = re.compile('Fakeable_mcClosure_(?P<type>m|e|t)_wFakeRateWeights') for lepton_and_hadTau_selection in self.lepton_and_hadTau_selections: lepton_selection = lepton_and_hadTau_selection hadTau_selection = lepton_and_hadTau_selection electron_selection = lepton_selection muon_selection = lepton_selection if self.applyFakeRateWeights == "3tau": lepton_selection = "Tight" hadTau_selection = "|".join([ hadTau_selection, self.hadTau_selection_part2 ]) if lepton_and_hadTau_selection == "forBDTtraining": lepton_selection = "Loose" electron_selection = lepton_selection muon_selection = lepton_selection hadTau_selection = "Tight|%s" % self.hadTau_selection_relaxed elif lepton_and_hadTau_selection == "Fakeable_mcClosure_e": electron_selection = "Fakeable" muon_selection = "Tight" hadTau_selection = "Tight" hadTau_selection = "|".join([hadTau_selection, self.hadTau_selection_part2]) elif lepton_and_hadTau_selection == "Fakeable_mcClosure_m": electron_selection = "Tight" muon_selection = "Fakeable" hadTau_selection = "Tight" hadTau_selection = "|".join([hadTau_selection, self.hadTau_selection_part2]) elif lepton_and_hadTau_selection == "Fakeable_mcClosure_t": electron_selection = "Tight" muon_selection = "Tight" hadTau_selection = "Fakeable" hadTau_selection = "|".join([hadTau_selection, self.hadTau_selection_part2]) for lepton_and_hadTau_frWeight in self.lepton_and_hadTau_frWeights: if lepton_and_hadTau_frWeight == "enabled" and not lepton_and_hadTau_selection.startswith("Fakeable"): continue if lepton_and_hadTau_frWeight == "disabled" and not lepton_and_hadTau_selection in [ "Tight", "forBDTtraining" ]: continue lepton_and_hadTau_selection_and_frWeight = get_lepton_and_hadTau_selection_and_frWeight(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight) for chargeSumSelection in self.chargeSumSelections: for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name)) inputFileList = inputFileLists[sample_name] sample_category = sample_info["sample_category"] is_mc = (sample_info["type"] == "mc") use_th_weights = self.runTHweights(sample_info) central_or_shift_dedicated = self.central_or_shifts if use_th_weights else self.central_or_shifts_external for central_or_shift in central_or_shift_dedicated: if not self.accept_systematics( central_or_shift, is_mc, lepton_and_hadTau_selection, chargeSumSelection, sample_info ): continue central_or_shifts_local = [] if central_or_shift == "central" and not use_th_weights: for central_or_shift_local in self.central_or_shifts_internal: if self.accept_systematics( central_or_shift_local, is_mc, lepton_and_hadTau_selection, chargeSumSelection, sample_info ): central_or_shifts_local.append(central_or_shift_local) logging.info(" ... for '%s' and systematic uncertainty option '%s'" % (lepton_and_hadTau_selection_and_frWeight, central_or_shift)) # build config files for executing analysis code key_analyze_dir = getKey(process_name, lepton_and_hadTau_selection_and_frWeight, chargeSumSelection, central_or_shift) for jobId in inputFileList.keys(): analyze_job_tuple = (process_name, lepton_and_hadTau_selection_and_frWeight, chargeSumSelection, central_or_shift, jobId) key_analyze_job = getKey(*analyze_job_tuple) ntupleFiles = inputFileList[jobId] if len(ntupleFiles) == 0: logging.warning("No input ntuples for %s --> skipping job !!" % (key_analyze_job)) continue cfgFile_modified_path = os.path.join(self.dirs[key_analyze_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" % analyze_job_tuple) logFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" % analyze_job_tuple) rleOutputFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_RLES], "rle_%s_%s_%s_%s_%i.txt" % analyze_job_tuple) \ if self.select_rle_output else "" histogramFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_HIST], "analyze_%s_%s_%s_%s_%i.root" % analyze_job_tuple) applyFakeRateWeights = self.applyFakeRateWeights \ if not (lepton_selection == "Tight" and hadTau_selection.find("Tight") != -1) \ else "disabled" self.jobOptions_analyze[key_analyze_job] = { 'ntupleFiles' : ntupleFiles, 'cfgFile_modified' : cfgFile_modified_path, 'histogramFile' : histogramFile_path, 'logFile' : logFile_path, 'selEventsFileName_output' : rleOutputFile_path, 'electronSelection' : electron_selection, 'muonSelection' : muon_selection, 'apply_leptonGenMatching' : self.apply_leptonGenMatching, 'hadTauSelection' : hadTau_selection, 'apply_hadTauGenMatching' : self.apply_hadTauGenMatching, 'chargeSumSelection' : chargeSumSelection, 'applyFakeRateWeights' : applyFakeRateWeights, 'central_or_shift' : central_or_shift, 'central_or_shifts_local' : central_or_shifts_local, 'apply_hlt_filter' : self.hlt_filter, 'useNonNominal' : self.use_nonnominal, 'fillGenEvtHistograms' : True, 'selectBDT' : self.isBDTtraining, 'gen_mHH' : self.gen_mHH, } self.createCfg_analyze(self.jobOptions_analyze[key_analyze_job], sample_info, lepton_and_hadTau_selection) # initialize input and output file names for hadd_stage1 key_hadd_stage1_dir = getKey(process_name, lepton_and_hadTau_selection_and_frWeight, chargeSumSelection) hadd_stage1_job_tuple = (process_name, lepton_and_hadTau_selection_and_frWeight, chargeSumSelection) key_hadd_stage1_job = getKey(*hadd_stage1_job_tuple) if not key_hadd_stage1_job in self.inputFiles_hadd_stage1: self.inputFiles_hadd_stage1[key_hadd_stage1_job] = [] self.inputFiles_hadd_stage1[key_hadd_stage1_job].append(self.jobOptions_analyze[key_analyze_job]['histogramFile']) self.outputFile_hadd_stage1[key_hadd_stage1_job] = os.path.join(self.dirs[key_hadd_stage1_dir][DKEY_HIST], "hadd_stage1_%s_%s_%s.root" % hadd_stage1_job_tuple) if self.isBDTtraining: continue # add output files of hadd_stage1 to list of input files for hadd_stage1_5 key_hadd_stage1_job = getKey(process_name, lepton_and_hadTau_selection_and_frWeight, chargeSumSelection) key_hadd_stage1_5_dir = getKey("hadd", lepton_and_hadTau_selection_and_frWeight, chargeSumSelection) hadd_stage1_5_job_tuple = (lepton_and_hadTau_selection_and_frWeight, chargeSumSelection) key_hadd_stage1_5_job = getKey(*hadd_stage1_5_job_tuple) if not key_hadd_stage1_5_job in self.inputFiles_hadd_stage1_5: self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job] = [] self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job].append(self.outputFile_hadd_stage1[key_hadd_stage1_job]) self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job] = os.path.join(self.dirs[key_hadd_stage1_5_dir][DKEY_HIST], "hadd_stage1_5_%s_%s.root" % hadd_stage1_5_job_tuple) if self.isBDTtraining: continue # sum fake background contributions for the total of all MC sample # input processes: TT_fake, TTW_fake, TTWW_fake, ... # output process: fakes_mc key_hadd_stage1_5_job = getKey(lepton_and_hadTau_selection_and_frWeight, chargeSumSelection) key_addBackgrounds_dir = getKey("addBackgrounds") addBackgrounds_job_fakes_tuple = ("fakes_mc", lepton_and_hadTau_selection_and_frWeight, chargeSumSelection) key_addBackgrounds_job_fakes = getKey(*addBackgrounds_job_fakes_tuple) sample_categories = self.get_sample_categories() processes_input = [] for sample_category in sample_categories: processes_input.append("%s_fake" % sample_category) self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_fakes_tuple), 'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_fakes_tuple), 'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s.log" % addBackgrounds_job_fakes_tuple), 'categories' : [ getHistogramDir(lepton_selection, hadTau_selection, lepton_and_hadTau_frWeight, chargeSumSelection) ], 'processes_input' : processes_input, 'process_output' : "fakes_mc" } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]) # sum conversion background contributions for the total of all MC sample # input processes: TT_Convs, TTW_Convs, TTWW_Convs, ... # output process: Convs addBackgrounds_job_Convs_tuple = ("Convs", lepton_and_hadTau_selection_and_frWeight, chargeSumSelection) key_addBackgrounds_job_Convs = getKey(*addBackgrounds_job_Convs_tuple) processes_input = [] for sample_category in self.convs_backgrounds: processes_input.append("%s_Convs" % sample_category) self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_Convs_tuple), 'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_Convs_tuple), 'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s.log" % addBackgrounds_job_Convs_tuple), 'categories' : [ getHistogramDir(lepton_selection, hadTau_selection, lepton_and_hadTau_frWeight, chargeSumSelection) ], 'processes_input' : processes_input, 'process_output' : "Convs" } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs]) if self.isBDTtraining: continue # initialize input and output file names for hadd_stage2 key_hadd_stage1_5_job = getKey(lepton_and_hadTau_selection_and_frWeight, chargeSumSelection) key_hadd_stage2_dir = getKey("hadd", lepton_and_hadTau_selection_and_frWeight, chargeSumSelection) hadd_stage2_job_tuple = (lepton_and_hadTau_selection_and_frWeight, chargeSumSelection) key_hadd_stage2_job = getKey(*hadd_stage2_job_tuple) if not key_hadd_stage2_job in self.inputFiles_hadd_stage2: self.inputFiles_hadd_stage2[key_hadd_stage2_job] = [] if lepton_and_hadTau_selection == "Tight": self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile']) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs]['outputFile']) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job]) self.outputFile_hadd_stage2[key_hadd_stage2_job] = os.path.join(self.dirs[key_hadd_stage2_dir][DKEY_HIST], "hadd_stage2_%s_%s.root" % hadd_stage2_job_tuple) if self.isBDTtraining: if self.is_sbatch: logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) self.targets.extend(self.phoniesToAdd) self.addToMakefile_validate(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done.") return self.num_jobs logging.info("Creating configuration files to run 'addBackgroundFakes'") for chargeSumSelection in self.chargeSumSelections: key_hadd_stage1_5_job = getKey(get_lepton_and_hadTau_selection_and_frWeight("Fakeable", "enabled"), chargeSumSelection) key_addFakes_dir = getKey("addBackgroundLeptonFakes") key_addFakes_job = getKey("data_fakes", chargeSumSelection) category_sideband = None if self.applyFakeRateWeights == "4L": category_sideband = "hh_1l_3tau_%s_Fakeable_wFakeRateWeights" % chargeSumSelection elif self.applyFakeRateWeights == "3tau": category_sideband = "hh_1l_3tau_%s_Fakeable_wFakeRateWeights" % chargeSumSelection else: raise ValueError("Invalid Configuration parameter 'applyFakeRateWeights' = %s !!" % self.applyFakeRateWeights) self.jobOptions_addFakes[key_addFakes_job] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addFakes_dir][DKEY_CFGS], "addBackgroundLeptonFakes_%s_cfg.py" % chargeSumSelection), 'outputFile' : os.path.join(self.dirs[key_addFakes_dir][DKEY_HIST], "addBackgroundLeptonFakes_%s.root" % chargeSumSelection), 'logFile' : os.path.join(self.dirs[key_addFakes_dir][DKEY_LOGS], "addBackgroundLeptonFakes_%s.log" % chargeSumSelection), 'category_signal' : "hh_1l_3tau_%s_Tight" % chargeSumSelection, 'category_sideband' : category_sideband } self.createCfg_addFakes(self.jobOptions_addFakes[key_addFakes_job]) key_hadd_stage2_job = getKey(get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"), chargeSumSelection) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addFakes[key_addFakes_job]['outputFile']) logging.info("Creating configuration files to run 'prepareDatacards'...") for histogramToFit in self.histograms_to_fit: logging.info(" ... for histogram %s" % histogramToFit) prep_dcard_HH = set() for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue sample_category = sample_info["sample_category"] if sample_category.startswith("signal"): sample_category = sample_info["sample_category_hh"] doAdd = False if "BDTOutput" in histogramToFit or "MVAOutput" in histogramToFit: if ("SM" in histogramToFit or any(nonresPoint in histogramToFit for nonresPoint in NONRESONANT_KEYS)) and 'nonresonant' in sample_category: doAdd = True if ("spin0" in histogramToFit and "spin0" in sample_category) or ("spin2" in histogramToFit and "spin2" in sample_category): startpos = None for pattern in [ "MVAOutput", "BDTOutput" ]: if pattern in histogramToFit: startpos = histogramToFit.find(pattern) + len(pattern) + 1 # CV: increment startpos by 1 to account for trailing "_" if not startpos: raise ValueError("Failed to parse histogram name = '%s' !!" % histogramToFit) endpos = histogramToFit.find("_", startpos) masspoint = histogramToFit[startpos:endpos] if ("_%s_" % masspoint) in sample_category: doAdd = True else: doAdd = True if doAdd: if "_wwww" in sample_category: prep_dcard_HH.add(sample_category.replace("_wwww", "_zzzz")) prep_dcard_HH.add(sample_category.replace("_wwww", "_wwww")) prep_dcard_HH.add(sample_category.replace("_wwww", "_zzww")) if not ("BDTOutput" in histogramToFit or "MVAOutput" in histogramToFit): prep_dcard_HH.add(sample_category.replace("_wwww", "")) elif "_wwtt" in sample_category: prep_dcard_HH.add(sample_category.replace("_wwtt", "_ttzz")) prep_dcard_HH.add(sample_category.replace("_wwtt", "_ttww")) if not ("BDTOutput" in histogramToFit or "MVAOutput" in histogramToFit): prep_dcard_HH.add(sample_category.replace("_wwtt", "")) elif "_tttt" in sample_category: prep_dcard_HH.add(sample_category) if not ("BDTOutput" in histogramToFit or "MVAOutput" in histogramToFit): prep_dcard_HH.add(sample_category.replace("_tttt", "")) else: raise ValueError("Failed to identify relevant HH decay mode(s) for 'sample_category' = %s !!" % sample_category) prep_dcard_HH = list(prep_dcard_HH) prep_dcard_H = [] prep_dcard_other_nonfake_backgrounds = [] for process in self.nonfake_backgrounds: if process in [ "VH", "WH", "ZH", "TH", "tHq", "tHW", "TTH", "TTWH", "TTZH", "ggH", "qqH" ]: prep_dcard_H.append("%s_hww" % process) prep_dcard_H.append("%s_hzz" % process) prep_dcard_H.append("%s_htt" % process) prep_dcard_H.append("%s_hbb" % process) else: prep_dcard_other_nonfake_backgrounds.append(process) self.prep_dcard_processesToCopy = [ "data_obs" ] + prep_dcard_HH + prep_dcard_H + prep_dcard_other_nonfake_backgrounds + [ "Convs", "data_fakes", "fakes_mc" ] key_prep_dcard_dir = getKey("prepareDatacards") if "OS" in self.chargeSumSelections: key_hadd_stage2_job = getKey(get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"), "OS") prep_dcard_job_tuple = (self.channel, "OS", histogramToFit) key_prep_dcard_job = getKey("OS", histogramToFit) self.jobOptions_prep_dcard[key_prep_dcard_job] = { 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_%s_cfg.py" % prep_dcard_job_tuple), 'datacardFile' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s_%s.root" % prep_dcard_job_tuple), 'histogramDir' : self.histogramDir_prep_dcard, 'histogramToFit' : histogramToFit, 'label' : None } self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job]) if "SS" in self.chargeSumSelections: key_hadd_stage2_job = getKey(get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"), "SS") prep_dcard_job_tuple = (self.channel, "SS", histogramToFit) key_prep_dcard_job = getKey("SS", histogramToFit) self.jobOptions_prep_dcard[key_prep_dcard_job] = { 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_%s_cfg.py" % prep_dcard_job_tuple), 'datacardFile' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s_%s.root" % prep_dcard_job_tuple), 'histogramDir' : self.histogramDir_prep_dcard_SS, 'histogramToFit' : histogramToFit, 'label' : 'SS' } self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job]) # add shape templates for the following systematic uncertainties: # - 'CMS_ttHl_Clos_norm_e' # - 'CMS_ttHl_Clos_shape_e' # - 'CMS_ttHl_Clos_norm_m' # - 'CMS_ttHl_Clos_shape_m' # - 'CMS_ttHl_Clos_norm_t' # - 'CMS_ttHl_Clos_shape_t' for chargeSumSelection in self.chargeSumSelections: key_prep_dcard_job = getKey(chargeSumSelection, histogramToFit) key_hadd_stage2_job = getKey(get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"), chargeSumSelection) key_add_syst_fakerate_dir = getKey("addSystFakeRates") add_syst_fakerate_job_tuple = (self.channel, chargeSumSelection, histogramToFit) key_add_syst_fakerate_job = getKey(chargeSumSelection, histogramToFit) self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job] = { 'inputFile' : self.jobOptions_prep_dcard[key_prep_dcard_job]['datacardFile'], 'cfgFile_modified' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_CFGS], "addSystFakeRates_%s_%s_%s_cfg.py" % add_syst_fakerate_job_tuple), 'outputFile' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_DCRD], "addSystFakeRates_%s_%s_%s.root" % add_syst_fakerate_job_tuple), 'category' : self.channel, 'histogramToFit' : histogramToFit, 'plots_outputFileName' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_PLOT], "addSystFakeRates.png") } histogramDir_nominal = None if chargeSumSelection == "OS": histogramDir_nominal = "%s/sel/evt/fakes_mc" % self.histogramDir_prep_dcard elif chargeSumSelection == "SS": histogramDir_nominal = "%s/sel/evt/fakes_mc" % self.histogramDir_prep_dcard_SS else: raise ValueError("Invalid parameter 'chargeSumSelection' = %s !!" % chargeSumSelection) for lepton_and_hadTau_type in [ 'e', 'm', 't' ]: lepton_and_hadTau_mcClosure = "Fakeable_mcClosure_%s" % lepton_and_hadTau_type if lepton_and_hadTau_mcClosure not in self.lepton_and_hadTau_selections: continue lepton_and_hadTau_selection_and_frWeight = get_lepton_and_hadTau_selection_and_frWeight(lepton_and_hadTau_mcClosure, "enabled") key_addBackgrounds_job_fakes = getKey("fakes_mc", lepton_and_hadTau_selection_and_frWeight, chargeSumSelection) histogramDir_mcClosure = "%s/sel/evt/fakes_mc" % self.mcClosure_dir['%s_%s' % (lepton_and_hadTau_mcClosure, chargeSumSelection)] if "BDTOutput" in histogramToFit or "MVAOutput" in histogramToFit: histogramDir_nominal = histogramDir_nominal.replace("/sel/evt", "/sel/datacard") histogramDir_mcClosure = histogramDir_mcClosure.replace("/sel/evt", "/sel/datacard") self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job].update({ 'add_Clos_%s' % lepton_and_hadTau_type : ("Fakeable_mcClosure_%s" % lepton_and_hadTau_type) in self.lepton_and_hadTau_selections, 'inputFile_nominal_%s' % lepton_and_hadTau_type : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'histogramName_nominal_%s' % lepton_and_hadTau_type : "%s/%s" % (histogramDir_nominal, histogramToFit), 'inputFile_mcClosure_%s' % lepton_and_hadTau_type : self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile'], 'histogramName_mcClosure_%s' % lepton_and_hadTau_type : "%s/%s" % (histogramDir_mcClosure, histogramToFit) }) self.createCfg_add_syst_fakerate(self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job]) logging.info("Creating configuration files to run 'makePlots'") key_makePlots_dir = getKey("makePlots") if "OS" in self.chargeSumSelections: key_hadd_stage2_job = getKey(get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"), "OS") key_makePlots_job = getKey("OS") self.jobOptions_make_plots[key_makePlots_job] = { 'executable' : self.executable_make_plots, 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_cfg.py" % self.channel), 'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s.png" % self.channel), 'histogramDir' : self.histogramDir_prep_dcard, 'label' : '1l+3#tau_{h}', 'make_plots_backgrounds' : self.make_plots_backgrounds } self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job]) if "SS" in self.chargeSumSelections: key_hadd_stage2_job = getKey(get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"), "SS") key_makePlots_job = getKey("SS") self.jobOptions_make_plots[key_makePlots_job] = { 'executable' : self.executable_make_plots, 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_SS_cfg.py" % self.channel), 'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s_SS.png" % self.channel), 'histogramDir' : self.histogramDir_prep_dcard_SS, 'label' : "1l+3#tau_{h} SS", 'make_plots_backgrounds' : self.make_plots_backgrounds } self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job]) if "Fakeable_mcClosure" in self.lepton_and_hadTau_selections: #TODO key_hadd_stage2_job = getKey(get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"), "OS") key_makePlots_job = getKey("Fakeable_mcClosure", "OS") self.jobOptions_make_plots[key_makePlots_job] = { 'executable' : self.executable_make_plots_mcClosure, 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_mcClosure_%s_cfg.py" % self.channel), 'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_mcClosure_%s.png" % self.channel) } self.createCfg_makePlots_mcClosure(self.jobOptions_make_plots[key_makePlots_job]) if self.is_sbatch: logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addBackgrounds) self.sbatchFile_addBackgrounds = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_%s.py" % self.channel) self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds, self.jobOptions_addBackgrounds) self.sbatchFile_addBackgrounds_sum = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_sum_%s.py" % self.channel) self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds_sum, self.jobOptions_addBackgrounds_sum) logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addFakes) self.sbatchFile_addFakes = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addFakes_%s.py" % self.channel) self.createScript_sbatch(self.executable_addFakes, self.sbatchFile_addFakes, self.jobOptions_addFakes) logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) self.addToMakefile_backgrounds_from_data(lines_makefile) self.addToMakefile_hadd_stage2(lines_makefile) self.addToMakefile_prep_dcard(lines_makefile) self.addToMakefile_add_syst_fakerate(lines_makefile) self.addToMakefile_make_plots(lines_makefile) self.addToMakefile_validate(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done.") return self.num_jobs
def create(self): """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system """ for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue sample_category = sample_info["sample_category"] is_mc = (sample_info["type"] == "mc") process_name = sample_info["process_name_specific"] logging.info("Building dictionaries for sample %s..." % process_name) for lepton_selection in self.lepton_selections: for lepton_frWeight in self.lepton_frWeights: if lepton_frWeight == "enabled" and not lepton_selection.startswith("Fakeable"): continue if lepton_frWeight == "disabled" and not lepton_selection in [ "Tight", "forBDTtraining" ]: continue lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_selection, lepton_frWeight) for leptonChargeSelection in self.leptonChargeSelections: central_or_shift_extensions = ["", "hadd", "addBackgrounds"] central_or_shift_dedicated = self.central_or_shifts if self.runTHweights(sample_info) else self.central_or_shifts_external central_or_shifts_extended = central_or_shift_extensions + central_or_shift_dedicated for central_or_shift_or_dummy in central_or_shifts_extended: process_name_extended = [ process_name, "hadd" ] for process_name_or_dummy in process_name_extended: if central_or_shift_or_dummy in [ "hadd", "addBackgrounds" ] and process_name_or_dummy in [ "hadd" ]: continue if central_or_shift_or_dummy not in central_or_shift_extensions and not self.accept_systematics( central_or_shift_or_dummy, is_mc, lepton_selection, leptonChargeSelection, sample_info ): continue key_dir = getKey(process_name_or_dummy, lepton_selection_and_frWeight, leptonChargeSelection, central_or_shift_or_dummy) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES, DKEY_SYNC ]: if dir_type == DKEY_SYNC and not self.do_sync: continue initDict(self.dirs, [ key_dir, dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_LOGS ]: self.dirs[key_dir][dir_type] = os.path.join(self.get_dir_type(dir_type), dir_type, self.channel, "_".join([ lepton_selection_and_frWeight, leptonChargeSelection ]), process_name_or_dummy, central_or_shift_or_dummy) else: self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, "_".join([ lepton_selection_and_frWeight, leptonChargeSelection ]), process_name_or_dummy) for subdirectory in [ "addBackgrounds", "addBackgroundLeptonFakes", "addBackgroundLeptonFlips", "prepareDatacards", "addSystFakeRates", "makePlots" ]: key_dir = getKey(subdirectory) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]: initDict(self.dirs, [ key_dir, dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]: self.dirs[key_dir][dir_type] = os.path.join(self.get_dir_type(dir_type), dir_type, self.channel, subdirectory) else: self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, subdirectory) for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT, DKEY_SYNC ]: if dir_type == DKEY_SYNC and not self.do_sync: continue initDict(self.dirs, [ dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]: self.dirs[dir_type] = os.path.join(self.get_dir_type(dir_type), dir_type, self.channel) else: self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel) numDirectories = 0 for key in self.dirs.keys(): if type(self.dirs[key]) == dict: numDirectories += len(self.dirs[key]) else: numDirectories += 1 logging.info("Creating directory structure (numDirectories = %i)" % numDirectories) numDirectories_created = 0; frac = 1 for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) numDirectories_created += len(self.dirs[key]) else: create_if_not_exists(self.dirs[key]) numDirectories_created = numDirectories_created + 1 while 100*numDirectories_created >= frac*numDirectories: logging.info(" %i%% completed" % frac) frac = frac + 1 logging.info("Done.") inputFileLists = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue logging.info("Checking input files for sample %s" % sample_info["process_name_specific"]) inputFileLists[sample_name] = generateInputFileList(sample_info, self.max_files_per_job) for lepton_selection in self.lepton_selections: electron_selection = lepton_selection muon_selection = lepton_selection hadTauVeto_selection = "Tight" hadTauVeto_selection = "|".join([ hadTauVeto_selection, self.hadTauVeto_selection_part2 ]) if lepton_selection == "forBDTtraining": electron_selection = "Loose" muon_selection = "Loose" elif lepton_selection == "Fakeable_mcClosure_e": electron_selection = "Fakeable" muon_selection = "Tight" elif lepton_selection == "Fakeable_mcClosure_m": electron_selection = "Tight" muon_selection = "Fakeable" for lepton_frWeight in self.lepton_frWeights: if lepton_frWeight == "enabled" and not lepton_selection.startswith("Fakeable"): continue if lepton_frWeight == "disabled" and not lepton_selection in [ "Tight", "forBDTtraining" ]: continue lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_selection, lepton_frWeight) for leptonChargeSelection in self.leptonChargeSelections: if 'mcClosure' in lepton_selection and leptonChargeSelection != 'SS': continue for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name)) inputFileList = inputFileLists[sample_name] sample_category = sample_info["sample_category"] is_mc = (sample_info["type"] == "mc") use_th_weights = self.runTHweights(sample_info) central_or_shift_dedicated = self.central_or_shifts if use_th_weights else self.central_or_shifts_external for central_or_shift in central_or_shift_dedicated: if not self.accept_systematics( central_or_shift, is_mc, lepton_selection, leptonChargeSelection, sample_info ): continue central_or_shifts_local = [] if central_or_shift == "central" and not use_th_weights: for central_or_shift_local in self.central_or_shifts_internal: if self.accept_systematics( central_or_shift_local, is_mc, lepton_selection, leptonChargeSelection, sample_info ): central_or_shifts_local.append(central_or_shift_local) logging.info(" ... for '%s' and systematic uncertainty option '%s'" % (lepton_selection_and_frWeight, central_or_shift)) # build config files for executing analysis code key_analyze_dir = getKey(process_name, lepton_selection_and_frWeight, leptonChargeSelection, central_or_shift) for jobId in inputFileList.keys(): analyze_job_tuple = (process_name, lepton_selection_and_frWeight, leptonChargeSelection, central_or_shift, jobId) key_analyze_job = getKey(*analyze_job_tuple) ntupleFiles = inputFileList[jobId] if len(ntupleFiles) == 0: logging.warning("No input ntuples for %s --> skipping job !!" % (key_analyze_job)) continue cfgFile_modified_path = os.path.join(self.dirs[key_analyze_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" % analyze_job_tuple) logFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" % analyze_job_tuple) rleOutputFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_RLES], "rle_%s_%s_%s_%s_%i.txt" % analyze_job_tuple) \ if self.select_rle_output else "" histogramFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_HIST], "analyze_%s_%s_%s_%s_%i.root" % analyze_job_tuple) applyFakeRateWeights = self.applyFakeRateWeights \ if lepton_selection.find("Tight") == -1 \ else "disabled" self.jobOptions_analyze[key_analyze_job] = { 'ntupleFiles' : ntupleFiles, 'cfgFile_modified' : cfgFile_modified_path, 'histogramFile' : histogramFile_path, 'logFile' : logFile_path, 'selEventsFileName_output' : rleOutputFile_path, 'electronSelection' : electron_selection, 'muonSelection' : muon_selection, 'apply_leptonGenMatching' : self.apply_leptonGenMatching, 'hadTauSelection' : hadTauVeto_selection, 'leptonChargeSelection' : leptonChargeSelection, 'applyFakeRateWeights' : applyFakeRateWeights, 'central_or_shift' : central_or_shift, 'central_or_shifts_local' : central_or_shifts_local, 'selectBDT' : self.isBDTtraining, 'apply_hlt_filter' : self.hlt_filter, 'useNonNominal' : self.use_nonnominal, 'fillGenEvtHistograms' : True, 'gen_mHH' : self.gen_mHH, } self.createCfg_analyze(self.jobOptions_analyze[key_analyze_job], sample_info, lepton_selection) # initialize input and output file names for hadd_stage1 key_hadd_stage1_dir = getKey(process_name, lepton_selection_and_frWeight, leptonChargeSelection) hadd_stage1_job_tuple = (process_name, lepton_selection_and_frWeight, leptonChargeSelection) key_hadd_stage1_job = getKey(*hadd_stage1_job_tuple) if not key_hadd_stage1_job in self.inputFiles_hadd_stage1: self.inputFiles_hadd_stage1[key_hadd_stage1_job] = [] self.inputFiles_hadd_stage1[key_hadd_stage1_job].append(self.jobOptions_analyze[key_analyze_job]['histogramFile']) self.outputFile_hadd_stage1[key_hadd_stage1_job] = os.path.join(self.dirs[key_hadd_stage1_dir][DKEY_HIST], "hadd_stage1_%s_%s_%s.root" % hadd_stage1_job_tuple) if self.isBDTtraining or self.do_sync: continue # add output files of hadd_stage1 to list of input files for hadd_stage1_5 key_hadd_stage1_job = getKey(process_name, lepton_selection_and_frWeight, leptonChargeSelection) key_hadd_stage1_5_dir = getKey("hadd", lepton_selection_and_frWeight, leptonChargeSelection) hadd_stage1_5_job_tuple = (lepton_selection_and_frWeight, leptonChargeSelection) key_hadd_stage1_5_job = getKey(*hadd_stage1_5_job_tuple) if not key_hadd_stage1_5_job in self.inputFiles_hadd_stage1_5: self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job] = [] self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job].append(self.outputFile_hadd_stage1[key_hadd_stage1_job]) self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job] = os.path.join(self.dirs[key_hadd_stage1_5_dir][DKEY_HIST], "hadd_stage1_5_%s_%s.root" % hadd_stage1_5_job_tuple) if self.isBDTtraining or self.do_sync: continue # sum fake background contributions for the total of all MC sample # input processes: TT_fake, TTW_fake, TTWW_fake, ... # output process: fakes_mc key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight, leptonChargeSelection) key_addBackgrounds_dir = getKey("addBackgrounds") addBackgrounds_job_fakes_tuple = ("fakes_mc", lepton_selection_and_frWeight, leptonChargeSelection) key_addBackgrounds_job_fakes = getKey(*addBackgrounds_job_fakes_tuple) sample_categories = self.get_sample_categories() processes_input = [] for sample_category in sample_categories: processes_input.append("%s_fake" % sample_category) self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_fakes_tuple), 'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_fakes_tuple), 'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s.log" % addBackgrounds_job_fakes_tuple), 'categories' : [ getHistogramDir(category, lepton_selection, lepton_frWeight, leptonChargeSelection) for category in self.categories ], 'processes_input' : processes_input, 'process_output' : "fakes_mc" } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]) # sum fake background contributions for the total of all MC sample # input processes: TT_flip, TTW_flip, TTWW_flip, ... # output process: flips_mc addBackgrounds_job_flips_tuple = ("flips_mc", lepton_selection_and_frWeight, leptonChargeSelection) key_addBackgrounds_job_flips = getKey(*addBackgrounds_job_flips_tuple) processes_input = [] for sample_category in sample_categories: processes_input.append("%s_flip" % sample_category) self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_flips] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_flips_tuple), 'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_flips_tuple), 'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s.log" % addBackgrounds_job_flips_tuple), 'categories' : [ getHistogramDir(category, lepton_selection, lepton_frWeight, leptonChargeSelection) for category in self.categories ], 'processes_input' : processes_input, 'process_output' : "flips_mc" } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_flips]) # sum conversion background contributions for the total of all MC sample # input processes: TT_Convs, TTW_Convs, TTWW_Convs, ... # output process: Convs addBackgrounds_job_Convs_tuple = ("Convs", lepton_selection_and_frWeight, leptonChargeSelection) key_addBackgrounds_job_Convs = getKey(*addBackgrounds_job_Convs_tuple) processes_input = [] for sample_category in self.convs_backgrounds: processes_input.append("%s_Convs" % sample_category) self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_Convs_tuple), 'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_Convs_tuple), 'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s.log" % addBackgrounds_job_Convs_tuple), 'categories' : [ getHistogramDir(category, lepton_selection, lepton_frWeight, leptonChargeSelection) for category in self.categories ], 'processes_input' : processes_input, 'process_output' : "Convs" } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs]) # sum signal contributions from HH->4tau ("tttt"), HH->2W2tau ("wwtt"), and HH->4W ("wwww"), # separately for "nonfake" and "fake" contributions genMatch_categories = [ "nonfake", "fake" ] for genMatch_category in genMatch_categories: for signal_base, signal_input in self.signal_io.items(): addBackgrounds_job_signal_tuple = (lepton_selection_and_frWeight, leptonChargeSelection, signal_base, genMatch_category) key_addBackgrounds_job_signal = getKey(*addBackgrounds_job_signal_tuple) if key_addBackgrounds_job_signal in self.jobOptions_addBackgrounds_sum.keys(): continue processes_input = signal_input process_output = signal_base if genMatch_category == "fake": processes_input = [ process_input + "_fake" for process_input in processes_input ] process_output += "_fake" self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_signal] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_%s_cfg.py" % addBackgrounds_job_signal_tuple), 'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s_%s.root" % addBackgrounds_job_signal_tuple), 'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s_%s.log" % addBackgrounds_job_signal_tuple), 'categories' : [ getHistogramDir(category, lepton_selection, lepton_frWeight, leptonChargeSelection) for category in self.categories ], 'processes_input' : processes_input, 'process_output' : process_output } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_signal]) key_hadd_stage2_job = getKey(lepton_selection_and_frWeight, leptonChargeSelection) if not key_hadd_stage2_job in self.inputFiles_hadd_stage2: self.inputFiles_hadd_stage2[key_hadd_stage2_job] = [] if lepton_selection == "Tight": self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_signal]['outputFile']) # initialize input and output file names for hadd_stage2 key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight, leptonChargeSelection) key_hadd_stage2_dir = getKey("hadd", lepton_selection_and_frWeight, leptonChargeSelection) hadd_stage2_job_tuple = (lepton_selection_and_frWeight, leptonChargeSelection) key_hadd_stage2_job = getKey(*hadd_stage2_job_tuple) if not key_hadd_stage2_job in self.inputFiles_hadd_stage2: self.inputFiles_hadd_stage2[key_hadd_stage2_job] = [] if lepton_selection == "Tight": self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile']) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_flips]['outputFile']) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs]['outputFile']) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job]) self.outputFile_hadd_stage2[key_hadd_stage2_job] = os.path.join(self.dirs[key_hadd_stage2_dir][DKEY_HIST], "hadd_stage2_%s_%s.root" % hadd_stage2_job_tuple) if self.isBDTtraining or self.do_sync: if self.is_sbatch: logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) if self.isBDTtraining: self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) elif self.do_sync: self.createScript_sbatch_syncNtuple(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) logging.info("Creating Makefile") lines_makefile = [] if self.isBDTtraining: self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) elif self.do_sync: self.addToMakefile_syncNtuple(lines_makefile) outputFile_sync_path = os.path.join(self.outputDir, DKEY_SYNC, '%s.root' % self.channel) self.outputFile_sync['sync'] = outputFile_sync_path self.targets.append(outputFile_sync_path) self.addToMakefile_hadd_sync(lines_makefile) else: raise ValueError("Internal logic error") self.targets.extend(self.phoniesToAdd) self.addToMakefile_validate(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done.") return self.num_jobs logging.info("Creating configuration files to run 'addBackgroundFakes'") for category in self.categories: for leptonChargeSelection in self.leptonChargeSelections: key_hadd_stage1_5_job = getKey(get_lepton_selection_and_frWeight("Fakeable", "enabled"), leptonChargeSelection) key_addFakes_dir = getKey("addBackgroundLeptonFakes") addFakes_job_tuple = (category, leptonChargeSelection) key_addFakes_job = getKey("data_fakes", *addFakes_job_tuple) self.jobOptions_addFakes[key_addFakes_job] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addFakes_dir][DKEY_CFGS], "addBackgroundLeptonFakes_%s_%s_cfg.py" % addFakes_job_tuple), 'outputFile' : os.path.join(self.dirs[key_addFakes_dir][DKEY_HIST], "addBackgroundLeptonFakes_%s_%s.root" % addFakes_job_tuple), 'logFile' : os.path.join(self.dirs[key_addFakes_dir][DKEY_LOGS], "addBackgroundLeptonFakes_%s_%s.log" % addFakes_job_tuple), 'category_signal' : getHistogramDir(category, "Tight", "disabled", leptonChargeSelection), 'category_sideband' : getHistogramDir(category, "Fakeable", "enabled", leptonChargeSelection) } self.createCfg_addFakes(self.jobOptions_addFakes[key_addFakes_job]) key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), leptonChargeSelection) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addFakes[key_addFakes_job]['outputFile']) #-------------------------------------------------------------------------- # add histograms in OS and SS regions, # so that "data_fakes" background can be subtracted from OS control region used to estimate charge flip background key_hadd_stage1_5_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS") key_hadd_stage1_6_dir = getKey("hadd", get_lepton_selection_and_frWeight("Tight", "disabled"), "OS") key_hadd_stage1_6_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS") if key_hadd_stage1_6_job not in self.inputFiles_hadd_stage1_6: self.inputFiles_hadd_stage1_6[key_hadd_stage1_6_job] = [] for category in self.categories: key_addFakes_job = getKey("data_fakes", category, leptonChargeSelection) self.inputFiles_hadd_stage1_6[key_hadd_stage1_6_job].append(self.jobOptions_addFakes[key_addFakes_job]['outputFile']) self.inputFiles_hadd_stage1_6[key_hadd_stage1_6_job].append(self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job]) self.outputFile_hadd_stage1_6[key_hadd_stage1_6_job] = os.path.join(self.dirs[key_hadd_stage1_6_dir][DKEY_HIST], "hadd_stage1_6_Tight_OS.root") #-------------------------------------------------------------------------- logging.info("Creating configuration files to run 'addBackgroundFlips'") for category in self.categories: key_hadd_stage1_6_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS") key_addFlips_dir = getKey("addBackgroundLeptonFlips") key_addFlips_job = getKey("data_flips", category) self.jobOptions_addFlips[key_addFlips_job] = { 'inputFile' : self.outputFile_hadd_stage1_6[key_hadd_stage1_6_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addFlips_dir][DKEY_CFGS], "addBackgroundLeptonFlips_%s_cfg.py" % category), 'outputFile' : os.path.join(self.dirs[key_addFlips_dir][DKEY_HIST], "addBackgroundLeptonFlips_%s.root" % category), 'logFile' : os.path.join(self.dirs[key_addFlips_dir][DKEY_LOGS], "addBackgroundLeptonFlips_%s.log" % category), 'category_signal' : getHistogramDir(category, "Tight", "disabled", "SS" ), 'category_sideband' : getHistogramDir(category, "Tight", "disabled", "OS" ) } self.createCfg_addFlips(self.jobOptions_addFlips[key_addFlips_job]) key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "SS") self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addFlips[key_addFlips_job]['outputFile']) logging.info("Creating configuration files to run 'prepareDatacards'...") for category in self.categories: for histogramToFit in self.histograms_to_fit: logging.info(" ... for category %s, histogram %s" % (category, histogramToFit)) prep_dcard_HH = set() for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue sample_category = sample_info["sample_category"] masses_to_exclude = ["3000", "2500", "2000", "1750", "1500", "1250"] if sample_category.startswith("signal"): sample_category = sample_info["sample_category_hh"] doAdd = False if "BDTOutput" in histogramToFit or "MVAOutput" in histogramToFit: if ("SM" in histogramToFit or any(nonresPoint in histogramToFit for nonresPoint in NONRESONANT_KEYS)) and 'nonresonant' in sample_category: doAdd = True if "spin0" in histogramToFit and "spin0" in sample_category and histogramToFit[9:13] in sample_category: doAdd = True if "spin2" in histogramToFit and "spin2" in sample_category and histogramToFit[9:13] in sample_category: doAdd = True for mass in masses_to_exclude: if mass in sample_category: doAdd = False else: doAdd = True if doAdd: if "wwww" in sample_category: prep_dcard_HH.add(sample_category.replace("wwww", "zzzz")) prep_dcard_HH.add(sample_category.replace("wwww", "wwww")) prep_dcard_HH.add(sample_category.replace("wwww", "zzww")) elif "wwtt" in sample_category: prep_dcard_HH.add(sample_category.replace("wwtt", "ttzz")) prep_dcard_HH.add(sample_category.replace("wwtt", "ttww")) elif "tttt" in sample_category: prep_dcard_HH.add(sample_category) else: raise ValueError("Failed to identify relevant HH decay mode(s) for 'sample_category' = %s !!" % sample_category) prep_dcard_HH = list(prep_dcard_HH) prep_dcard_H = [] prep_dcard_other_nonfake_backgrounds = [] for process in self.nonfake_backgrounds: if process in [ "VH", "WH", "ZH", "TH", "tHq", "tHW", "TTH", "TTWH", "TTZH", "ggH", "qqH" ]: prep_dcard_H.append("%s_hww" % process) prep_dcard_H.append("%s_hzz" % process) prep_dcard_H.append("%s_htt" % process) prep_dcard_H.append("%s_hbb" % process) else: prep_dcard_other_nonfake_backgrounds.append(process) self.prep_dcard_processesToCopy = [ "data_obs" ] + prep_dcard_HH + prep_dcard_H + prep_dcard_other_nonfake_backgrounds + [ "Convs", "data_fakes", "data_flips", "fakes_mc", "flips_mc" ] key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "SS") key_prep_dcard_dir = getKey("prepareDatacards") prep_dcard_job_tuple = (self.channel, category, "SS", histogramToFit) key_prep_dcard_job = getKey(category, "SS", histogramToFit) self.jobOptions_prep_dcard[key_prep_dcard_job] = { 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_%s_%s_cfg.py" % prep_dcard_job_tuple), 'datacardFile' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s_%s_%s.root" % prep_dcard_job_tuple), 'histogramDir' : getHistogramDir(category, "Tight", "disabled", "SS"), 'histogramToFit' : histogramToFit, 'label' : "2lSS" } self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job]) if "OS" in self.leptonChargeSelections: key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS") prep_dcard_job_tuple = (self.channel, category, "OS", histogramToFit) key_prep_dcard_job = getKey(category, "OS", histogramToFit) self.jobOptions_prep_dcard[key_prep_dcard_job] = { 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_%s_%s_cfg.py" % prep_dcard_job_tuple), 'datacardFile' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s_%s_%s.root" % prep_dcard_job_tuple), 'histogramDir' : getHistogramDir(category, "Tight", "disabled", "OS"), 'histogramToFit' : histogramToFit, 'label' : "2lOS", } self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job]) # add shape templates for the following systematic uncertainties: # - 'CMS_ttHl_Clos_norm_e' # - 'CMS_ttHl_Clos_shape_e' # - 'CMS_ttHl_Clos_norm_m' # - 'CMS_ttHl_Clos_shape_m' key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "SS") key_add_syst_fakerate_dir = getKey("addSystFakeRates") add_syst_fakerate_job_tuple = (self.channel, category, "SS", histogramToFit) key_add_syst_fakerate_job = getKey(category, "SS", histogramToFit) key_prep_dcard_job = getKey(category, "SS", histogramToFit) self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job] = { 'inputFile' : self.jobOptions_prep_dcard[key_prep_dcard_job]['datacardFile'], 'cfgFile_modified' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_CFGS], "addSystFakeRates_%s_%s_%s_%s_cfg.py" % add_syst_fakerate_job_tuple), 'outputFile' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_DCRD], "addSystFakeRates_%s_%s_%s_%s.root" % add_syst_fakerate_job_tuple), 'category' : category, 'histogramToFit' : histogramToFit, 'plots_outputFileName' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_PLOT], "addSystFakeRates.png") } histogramDir_nominal = "%s/sel/evt/fakes_mc" % getHistogramDir(category, "Tight", "disabled", "SS") for lepton_type in [ 'e', 'm' ]: lepton_mcClosure = "Fakeable_mcClosure_%s" % lepton_type if lepton_mcClosure not in self.lepton_selections: continue lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_mcClosure, "enabled") key_addBackgrounds_job_fakes = getKey("fakes_mc", lepton_selection_and_frWeight, "SS") histogramDir_mcClosure = "%s/sel/evt/fakes_mc" % self.mcClosure_dir['%s_%s' % (lepton_mcClosure, "SS")] if "BDTOutput" in histogramToFit or "MVAOutput" in histogramToFit: histogramDir_nominal = histogramDir_nominal.replace("/sel/evt", "/sel/datacard") histogramDir_mcClosure = histogramDir_mcClosure.replace("/sel/evt", "/sel/datacard") self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job].update({ 'add_Clos_%s' % lepton_type : ("Fakeable_mcClosure_%s" % lepton_type) in self.lepton_selections, 'inputFile_nominal_%s' % lepton_type : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'histogramName_nominal_%s' % lepton_type : "%s/%s" % (histogramDir_nominal, histogramToFit), 'inputFile_mcClosure_%s' % lepton_type : self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile'], 'histogramName_mcClosure_%s' % lepton_type : "%s/%s" % (histogramDir_mcClosure, histogramToFit) }) self.createCfg_add_syst_fakerate(self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job]) logging.info("Creating configuration files to run 'makePlots'") key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "SS") key_makePlots_dir = getKey("makePlots") key_makePlots_job = getKey("SS") self.jobOptions_make_plots[key_makePlots_job] = { 'executable' : self.executable_make_plots, 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_cfg.py" % self.channel), 'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s.png" % self.channel), 'histogramDir' : self.histogramDir_prep_dcard, 'label' : "2lSS", 'make_plots_backgrounds' : self.make_plots_backgrounds, } self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job]) if "OS" in self.leptonChargeSelections: key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS") key_makePlots_job = getKey("OS") self.jobOptions_make_plots[key_makePlots_job] = { 'executable' : self.executable_make_plots, 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_OS_cfg.py" % self.channel), 'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s_OS.png" % self.channel), 'histogramDir' : self.histogramDir_prep_dcard_OS, 'label' : "2lOS", 'make_plots_backgrounds' : self.make_plots_backgrounds_OS, } self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job]) if "Fakeable_mcClosure" in self.lepton_selections: #TODO key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "SS") key_makePlots_job = getKey("Fakeable_mcClosure", "SS") self.jobOptions_make_plots[key_makePlots_job] = { 'executable' : self.executable_make_plots_mcClosure, 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_mcClosure_%s_cfg.py" % self.channel), 'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_mcClosure_%s.png" % self.channel), } self.createCfg_makePlots_mcClosure(self.jobOptions_make_plots[key_makePlots_job]) if self.is_sbatch: logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addBackgrounds) self.sbatchFile_addBackgrounds = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_%s.py" % self.channel) self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds, self.jobOptions_addBackgrounds) self.sbatchFile_addBackgrounds_sum = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_sum_%s.py" % self.channel) self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds_sum, self.jobOptions_addBackgrounds_sum) logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addFakes) self.sbatchFile_addFakes = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addFakes_%s.py" % self.channel) self.createScript_sbatch(self.executable_addFakes, self.sbatchFile_addFakes, self.jobOptions_addFakes) logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addFlips) self.sbatchFile_addFlips = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addFlips_%s.py" % self.channel) self.createScript_sbatch(self.executable_addFlips, self.sbatchFile_addFlips, self.jobOptions_addFlips) logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) self.addToMakefile_backgrounds_from_data_withFlips(lines_makefile) self.addToMakefile_hadd_stage2(lines_makefile) self.addToMakefile_prep_dcard(lines_makefile) self.addToMakefile_add_syst_fakerate(lines_makefile) self.addToMakefile_make_plots(lines_makefile) self.addToMakefile_validate(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done.") return self.num_jobs
def create(self): """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system """ for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue sample_category = sample_info["sample_category"] is_mc = (sample_info["type"] == "mc") process_name = sample_info["process_name_specific"] logging.info("Building dictionaries for sample %s..." % process_name) for chargeSumSelection in self.chargeSumSelections: for lepton_and_hadTau_selection in self.lepton_and_hadTau_selections: for lepton_and_hadTau_frWeight in self.lepton_and_hadTau_frWeights: if lepton_and_hadTau_frWeight == "enabled" and not lepton_and_hadTau_selection.startswith("Fakeable"): continue if lepton_and_hadTau_frWeight == "disabled" and not lepton_and_hadTau_selection in [ "Tight", "forBDTtraining" ]: continue lepton_and_hadTau_selection_and_frWeight = get_lepton_and_hadTau_selection_and_frWeight(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight) central_or_shift_extensions = ["", "hadd", "copyHistograms", "addBackgrounds"] central_or_shift_dedicated = self.central_or_shifts if self.runTHweights(sample_info) else self.central_or_shifts_external central_or_shifts_extended = central_or_shift_extensions + central_or_shift_dedicated for central_or_shift_or_dummy in central_or_shifts_extended: process_name_extended = [ process_name, "hadd" ] for process_name_or_dummy in process_name_extended: if process_name_or_dummy in [ "hadd" ] and central_or_shift_or_dummy != "": continue evtcategories_extended = [""] evtcategories_extended.extend(self.evtCategories) if central_or_shift_or_dummy in [ "hadd", "copyHistograms", "addBackgrounds" ] and process_name_or_dummy in [ "hadd" ]: continue if central_or_shift_or_dummy not in central_or_shift_extensions and not self.accept_systematics( central_or_shift_or_dummy, is_mc, lepton_and_hadTau_selection, chargeSumSelection, sample_category, sample_name, sample_info ): continue key_dir = getKey(process_name_or_dummy, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight, central_or_shift_or_dummy) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_ROOT, DKEY_RLES, DKEY_SYNC ]: initDict(self.dirs, [ key_dir, dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_LOGS ]: self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, "_".join([ lepton_and_hadTau_selection_and_frWeight, chargeSumSelection ]), process_name_or_dummy, central_or_shift_or_dummy) else: self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, "_".join([ lepton_and_hadTau_selection_and_frWeight, chargeSumSelection ]), process_name_or_dummy, central_or_shift_or_dummy) for subdirectory in [ "addBackgrounds", "addBackgroundLeptonFakes", "prepareDatacards", "addSystFakeRates", "makePlots" ]: key_dir = getKey(subdirectory) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_ROOT, DKEY_DCRD, DKEY_PLOT ]: initDict(self.dirs, [ key_dir, dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]: self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, subdirectory) else: self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, subdirectory) for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT, DKEY_SYNC ]: initDict(self.dirs, [ dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]: self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel) else: self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel) numDirectories = 0 for key in self.dirs.keys(): if type(self.dirs[key]) == dict: numDirectories += len(self.dirs[key]) else: numDirectories += 1 logging.info("Creating directory structure (numDirectories = %i)" % numDirectories) numDirectories_created = 0; frac = 1 for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) numDirectories_created += len(self.dirs[key]) else: create_if_not_exists(self.dirs[key]) numDirectories_created = numDirectories_created + 1 while 100*numDirectories_created >= frac*numDirectories: logging.info(" %i%% completed" % frac) frac = frac + 1 logging.info("Done.") inputFileLists = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue logging.info("Checking input files for sample %s" % sample_info["process_name_specific"]) inputFileLists[sample_name] = generateInputFileList(sample_info, self.max_files_per_job) for chargeSumSelection in self.chargeSumSelections: for lepton_and_hadTau_selection in self.lepton_and_hadTau_selections: lepton_selection = lepton_and_hadTau_selection ##hadTau_selection = None ##if lepton_and_hadTau_selection == "Tight": ## hadTau_selection = "%s|%s" % (lepton_and_hadTau_selection, self.hadTau_mva_wp) ##else: ## hadTau_selection = lepton_and_hadTau_selection hadTau_selection = "%s|%s" % (lepton_and_hadTau_selection, self.hadTau_mva_wp) electron_selection = lepton_selection muon_selection = lepton_selection if lepton_and_hadTau_selection == "Fakeable_mcClosure_e": electron_selection = "Fakeable" muon_selection = "Tight" hadTau_selection = "Tight|%s" % self.hadTau_mva_wp elif lepton_and_hadTau_selection == "Fakeable_mcClosure_m": electron_selection = "Tight" muon_selection = "Fakeable" hadTau_selection = "Tight|%s" % self.hadTau_mva_wp elif lepton_and_hadTau_selection == "Fakeable_mcClosure_t": electron_selection = "Tight" muon_selection = "Tight" hadTau_selection = "Fakeable" for lepton_and_hadTau_frWeight in self.lepton_and_hadTau_frWeights: if lepton_and_hadTau_frWeight == "enabled" and not lepton_and_hadTau_selection.startswith("Fakeable"): continue if lepton_and_hadTau_frWeight == "disabled" and not lepton_and_hadTau_selection in [ "Tight", "forBDTtraining" ]: continue lepton_and_hadTau_selection_and_frWeight = get_lepton_and_hadTau_selection_and_frWeight(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight) for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name)) inputFileList = inputFileLists[sample_name] sample_category = sample_info["sample_category"] is_mc = (sample_info["type"] == "mc") use_th_weights = self.runTHweights(sample_info) central_or_shift_dedicated = self.central_or_shifts if use_th_weights else self.central_or_shifts_external for central_or_shift in central_or_shift_dedicated: if not self.accept_systematics( central_or_shift, is_mc, lepton_and_hadTau_selection, chargeSumSelection, sample_category, sample_name, sample_info ): continue central_or_shifts_local = [] if central_or_shift == "central" and not use_th_weights: for central_or_shift_local in self.central_or_shifts_internal: if self.accept_systematics( central_or_shift_local, is_mc, lepton_and_hadTau_selection, chargeSumSelection, sample_category, sample_name, sample_info ): central_or_shifts_local.append(central_or_shift_local) logging.info(" ... for '%s' and systematic uncertainty option '%s'" % (lepton_and_hadTau_selection_and_frWeight, central_or_shift)) # build config files for executing analysis code key_analyze_dir = getKey(process_name, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight, central_or_shift) for jobId in inputFileList.keys(): analyze_job_tuple = (process_name, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight, central_or_shift, jobId) key_analyze_job = getKey(*analyze_job_tuple) ntupleFiles = inputFileList[jobId] if len(ntupleFiles) == 0: logging.warning("No input ntuples for %s --> skipping job !!" % (key_analyze_job)) continue cfgFile_modified_path = os.path.join(self.dirs[key_analyze_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" % analyze_job_tuple) histogramFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_HIST], "analyze_%s_%s_%s_%s_%i.root" % analyze_job_tuple) logFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" % analyze_job_tuple) rleOutputFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_RLES], "rle_%s_%s_%s_%s_%i.txt" % analyze_job_tuple) \ if self.select_rle_output else "" applyFakeRateWeights = self.applyFakeRateWeights \ if self.isBDTtraining or not lepton_and_hadTau_selection == "Tight" \ else "disabled" self.jobOptions_analyze[key_analyze_job] = { 'ntupleFiles' : ntupleFiles, 'cfgFile_modified' : cfgFile_modified_path, 'histogramFile' : histogramFile_path, 'logFile' : logFile_path, 'selEventsFileName_output' : rleOutputFile_path, 'electronSelection' : electron_selection, 'muonSelection' : muon_selection, 'apply_leptonGenMatching' : self.apply_leptonGenMatching, 'hadTauSelection' : hadTau_selection, 'apply_hadTauGenMatching' : self.apply_hadTauGenMatching, 'chargeSumSelection' : chargeSumSelection, 'applyFakeRateWeights' : applyFakeRateWeights, 'central_or_shift' : central_or_shift, 'central_or_shifts_local' : central_or_shifts_local, 'selectBDT' : self.isBDTtraining, 'apply_hlt_filter' : self.hlt_filter, 'useNonNominal' : self.use_nonnominal, 'fillGenEvtHistograms' : True, 'useObjectMultiplicity' : True, } self.createCfg_analyze(self.jobOptions_analyze[key_analyze_job], sample_info, lepton_and_hadTau_selection) # initialize input and output file names for hadd_stage1 key_hadd_stage1_dir = getKey(process_name, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight, "hadd") hadd_stage1_job_tuple = (process_name, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight) key_hadd_stage1_job = getKey(*hadd_stage1_job_tuple) if not key_hadd_stage1_job in self.inputFiles_hadd_stage1: self.inputFiles_hadd_stage1[key_hadd_stage1_job] = [] self.inputFiles_hadd_stage1[key_hadd_stage1_job].append(self.jobOptions_analyze[key_analyze_job]['histogramFile']) self.outputFile_hadd_stage1[key_hadd_stage1_job] = os.path.join(self.dirs[key_hadd_stage1_dir][DKEY_HIST], "hadd_stage1_%s_%s_%s.root" % hadd_stage1_job_tuple) if self.isBDTtraining: self.targets.append(self.outputFile_hadd_stage1[key_hadd_stage1_job]) if self.isBDTtraining: continue #---------------------------------------------------------------------------- # split hadd_stage1 files into separate files, one for each event category for category in self.evtCategories: key_hadd_stage1_job = getKey(process_name, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight) key_copyHistograms_dir = getKey(process_name, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight, "copyHistograms") copyHistograms_job_tuple = (category, process_name, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight) key_copyHistograms_job = getKey(*copyHistograms_job_tuple) cfgFile_modified = os.path.join(self.dirs[key_copyHistograms_dir][DKEY_CFGS], "copyHistograms_%s_%s_%s_%s_cfg.py" % copyHistograms_job_tuple) outputFile = os.path.join(self.dirs[key_copyHistograms_dir][DKEY_HIST], "copyHistograms_%s_%s_%s_%s.root" % copyHistograms_job_tuple) self.jobOptions_copyHistograms[key_copyHistograms_job] = { 'inputFile' : self.outputFile_hadd_stage1[key_hadd_stage1_job], 'cfgFile_modified' : cfgFile_modified, 'outputFile' : outputFile, 'logFile' : os.path.join(self.dirs[key_copyHistograms_dir][DKEY_LOGS], os.path.basename(cfgFile_modified).replace("_cfg.py", ".log")), 'categories' : [ category ], } self.createCfg_copyHistograms(self.jobOptions_copyHistograms[key_copyHistograms_job]) #---------------------------------------------------------------------------- # add output files of copyHistograms jobs to list of input files for hadd_stage1_5 for category in self.evtCategories: key_copyHistograms_job = getKey(category, process_name, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight) key_hadd_stage1_5_dir = getKey("hadd", chargeSumSelection, lepton_and_hadTau_selection_and_frWeight) hadd_stage1_5_job_tuple = (category, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight) key_hadd_stage1_5_job = getKey(*hadd_stage1_5_job_tuple) if not key_hadd_stage1_5_job in self.inputFiles_hadd_stage1_5: self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job] = [] self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job].append(self.jobOptions_copyHistograms[key_copyHistograms_job]['outputFile']) self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job] = os.path.join(self.dirs[key_hadd_stage1_5_dir][DKEY_HIST], "hadd_stage1_5_%s_%s_%s.root" % hadd_stage1_5_job_tuple) if self.isBDTtraining: continue for category in self.evtCategories: # sum fake background contributions for the total of all MC sample # input processes: TT_fake, TTW_fake, TTWW_fake, ... # output process: fakes_mc key_hadd_stage1_5_job = getKey(category, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight) key_addBackgrounds_dir = getKey("addBackgrounds") addBackgrounds_job_fakes_tuple = ("fakes_mc", category, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight) key_addBackgrounds_job_fakes = getKey(*addBackgrounds_job_fakes_tuple) sample_categories = [] sample_categories.extend(self.nonfake_backgrounds) processes_input = [] for sample_category in sample_categories: processes_input.append("%s_fake" % sample_category) self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_%s_cfg.py" % addBackgrounds_job_fakes_tuple), 'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s_%s.root" % addBackgrounds_job_fakes_tuple), 'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s_%s.log" % addBackgrounds_job_fakes_tuple), 'categories' : [ getHistogramDir(category, lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, chargeSumSelection) ], 'processes_input' : processes_input, 'process_output' : "fakes_mc" } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]) # sum conversion background contributions for the total of all MC sample # input processes: TT_Convs, TTW_Convs, TTWW_Convs, ... # output process: Convs addBackgrounds_job_Convs_tuple = ("Convs", category, lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, chargeSumSelection) key_addBackgrounds_job_Convs = getKey(*addBackgrounds_job_Convs_tuple) sample_categories = [] sample_categories.extend(self.nonfake_backgrounds) processes_input = [] for sample_category in sample_categories: processes_input.append("%s_Convs" % sample_category) self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_%s_%s_cfg.py" % addBackgrounds_job_Convs_tuple), 'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s_%s_%s.root" % addBackgrounds_job_Convs_tuple), 'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s_%s_%s.log" % addBackgrounds_job_Convs_tuple), 'categories' : [ getHistogramDir(category, lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, chargeSumSelection) ], 'processes_input' : processes_input, 'process_output' : "Convs" } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs]) # sum signal contributions from gluon fusion and VBF HH production, # separately for "nonfake" and "fake" contributions genMatch_categories = [ "nonfake", "fake" ] for genMatch_category in genMatch_categories: for signal_base, signal_input in self.signal_io.items(): addBackgrounds_job_signal_tuple = (category, chargeSumSelection, lepton_and_hadTau_selection, signal_base, genMatch_category) key_addBackgrounds_job_signal = getKey(*addBackgrounds_job_signal_tuple) if key_addBackgrounds_job_signal in self.jobOptions_addBackgrounds_sum.keys(): continue processes_input = signal_input process_output = signal_base if genMatch_category == "fake": processes_input = [ process_input + "_fake" for process_input in processes_input ] process_output += "_fake" self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_signal] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_%s_%s_cfg.py" % addBackgrounds_job_signal_tuple), 'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s_%s_%s.root" % addBackgrounds_job_signal_tuple), 'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s_%s_%s.log" % addBackgrounds_job_signal_tuple), 'categories' : [ getHistogramDir(category, lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, chargeSumSelection) ], 'processes_input' : processes_input, 'process_output' : process_output } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_signal]) key_hadd_stage2_job = getKey(category, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight) if not key_hadd_stage2_job in self.inputFiles_hadd_stage2: self.inputFiles_hadd_stage2[key_hadd_stage2_job] = [] if lepton_selection == "Tight": self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_signal]['outputFile']) # initialize input and output file names for hadd_stage2 key_hadd_stage1_5_job = getKey(category, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight) key_hadd_stage2_dir = getKey("hadd", chargeSumSelection, lepton_and_hadTau_selection_and_frWeight) hadd_stage2_job_tuple = (category, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight) key_hadd_stage2_job = getKey(*hadd_stage2_job_tuple) if not key_hadd_stage2_job in self.inputFiles_hadd_stage2: self.inputFiles_hadd_stage2[key_hadd_stage2_job] = [] if lepton_selection == "Tight": self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile']) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs]['outputFile']) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job]) self.outputFile_hadd_stage2[key_hadd_stage2_job] = os.path.join(self.dirs[key_hadd_stage2_dir][DKEY_HIST], "hadd_stage2_%s_%s_%s.root" % hadd_stage2_job_tuple) if self.isBDTtraining: if self.is_sbatch: logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done") return self.num_jobs logging.info("Creating configuration files to run 'addBackgroundFakes'") for chargeSumSelection in self.chargeSumSelections: for category in self.evtCategories: key_hadd_stage1_5_job = getKey(category, chargeSumSelection, get_lepton_and_hadTau_selection_and_frWeight("Fakeable", "enabled")) key_addFakes_dir = getKey("addBackgroundLeptonFakes") addFakes_job_tuple = (category, chargeSumSelection) key_addFakes_job = getKey("data_fakes", *addFakes_job_tuple) self.jobOptions_addFakes[key_addFakes_job] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addFakes_dir][DKEY_CFGS], "addBackgroundLeptonFakes_%s_%s_cfg.py" % addFakes_job_tuple), 'outputFile' : os.path.join(self.dirs[key_addFakes_dir][DKEY_HIST], "addBackgroundLeptonFakes_%s_%s.root" % addFakes_job_tuple), 'logFile' : os.path.join(self.dirs[key_addFakes_dir][DKEY_LOGS], "addBackgroundLeptonFakes_%s_%s.log" % addFakes_job_tuple), 'category_signal' : getHistogramDir(category, "Tight", "disabled", chargeSumSelection), 'category_sideband' : getHistogramDir(category, "Fakeable", "enabled", chargeSumSelection) } self.createCfg_addFakes(self.jobOptions_addFakes[key_addFakes_job]) key_hadd_stage2_job = getKey(category, chargeSumSelection, get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled")) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addFakes[key_addFakes_job]['outputFile']) logging.info("Creating configuration files to run 'prepareDatacards'") for chargeSumSelection in self.chargeSumSelections: for category in self.evtCategories: for histogramToFit in self.histograms_to_fit: key_hadd_stage2_job = getKey(category, chargeSumSelection, get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled")) key_prep_dcard_dir = getKey("prepareDatacards") prep_dcard_job_tuple = (self.channel, category, chargeSumSelection, histogramToFit) key_prep_dcard_job = getKey(category, chargeSumSelection, histogramToFit) self.jobOptions_prep_dcard[key_prep_dcard_job] = { 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_%s_%s_cfg.py" % prep_dcard_job_tuple), 'datacardFile' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s_%s_%s.root" % prep_dcard_job_tuple), 'histogramDir' : getHistogramDir(category, "Tight", "disabled", chargeSumSelection), 'histogramToFit' : histogramToFit } self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job]) # add shape templates for the following systematic uncertainties: # - 'CMS_ttHl_Clos_norm_e' # - 'CMS_ttHl_Clos_shape_e' # - 'CMS_ttHl_Clos_norm_m' # - 'CMS_ttHl_Clos_shape_m' key_add_syst_fakerate_dir = getKey("addSystFakeRates") add_syst_fakerate_job_tuple = (self.channel, category, chargeSumSelection, histogramToFit) key_add_syst_fakerate_job = getKey(category, chargeSumSelection, histogramToFit) self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job] = { 'inputFile' : self.jobOptions_prep_dcard[key_prep_dcard_job]['datacardFile'], 'cfgFile_modified' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_CFGS], "addSystFakeRates_%s_%s_%s_%s_cfg.py" % add_syst_fakerate_job_tuple), 'outputFile' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_DCRD], "addSystFakeRates_%s_%s_%s_%s.root" % add_syst_fakerate_job_tuple), 'category' : category, 'histogramToFit' : histogramToFit, 'plots_outputFileName' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_PLOT], "addSystFakeRates.png") } histogramDir_nominal = getHistogramDir(category, "Tight", "disabled", chargeSumSelection) for lepton_and_hadTau_type in [ 'e', 'm', 't' ]: lepton_and_hadTau_mcClosure = "Fakeable_mcClosure_%s" % lepton_and_hadTau_type if lepton_and_hadTau_mcClosure not in self.lepton_and_hadTau_selections: continue lepton_and_hadTau_selection_and_frWeight = get_lepton_and_hadTau_selection_and_frWeight(lepton_and_hadTau_mcClosure, "enabled") key_addBackgrounds_job_fakes = getKey("fakes_mc", category, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight) histogramDir_mcClosure = self.mcClosure_dir[lepton_and_hadTau_mcClosure+'_%s' %chargeSumSelection] histogramDir_mcClosure = histogramDir_mcClosure.replace(self.evtCategory_inclusive, category) self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job].update({ 'add_Clos_%s' % lepton_and_hadTau_type : ("Fakeable_mcClosure_%s" % lepton_and_hadTau_type) in self.lepton_and_hadTau_selections, 'inputFile_nominal_%s' % lepton_and_hadTau_type : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'histogramName_nominal_%s' % lepton_and_hadTau_type : "%s/sel/evt/fakes_mc/%s" % (histogramDir_nominal, histogramToFit), 'inputFile_mcClosure_%s' % lepton_and_hadTau_type : self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile'], 'histogramName_mcClosure_%s' % lepton_and_hadTau_type : "%s/sel/evt/fakes_mc/%s" % (histogramDir_mcClosure, histogramToFit) }) self.createCfg_add_syst_fakerate(self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job]) logging.info("Creating configuration files to run 'makePlots'") for chargeSumSelection in self.chargeSumSelections: key_hadd_stage2_job = getKey(self.evtCategory_inclusive, chargeSumSelection, get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled")) key_makePlots_dir = getKey("makePlots") key_makePlots_job = getKey(chargeSumSelection) self.jobOptions_make_plots[key_makePlots_job] = { 'executable' : self.executable_make_plots, 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_%s_cfg.py" % (self.channel, chargeSumSelection)), 'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s_%s.png" % (self.channel, chargeSumSelection)), 'histogramDir' : getHistogramDir(self.evtCategory_inclusive, "Tight", "disabled", chargeSumSelection), 'label' : '1l1tau', 'make_plots_backgrounds' : self.make_plots_backgrounds } self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job]) if "Fakeable_mcClosure" in self.lepton_and_hadTau_selections: #TODO key_makePlots_job = getKey(chargeSumSelection) key_hadd_stage2 = getKey(chargeSumSelection, get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled")) self.jobOptions_make_plots[key_makePlots_job] = { 'executable' : self.executable_make_plots_mcClosure, 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2], 'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_mcClosure_%s_%s_cfg.py" % (self.channel, chargeSumSelection)), 'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_mcClosure_%s_%s.png" % (self.channel, chargeSumSelection)) } self.createCfg_makePlots_mcClosure(self.jobOptions_make_plots[key_makePlots_job]) if self.is_sbatch: logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_copyHistograms) self.sbatchFile_copyHistograms = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_copyHistograms_%s.py" % self.channel) self.createScript_sbatch_copyHistograms(self.executable_copyHistograms, self.sbatchFile_copyHistograms, self.jobOptions_copyHistograms) logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addBackgrounds) self.sbatchFile_addBackgrounds = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_%s.py" % self.channel) self.createScript_sbatch_addBackgrounds(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds, self.jobOptions_addBackgrounds) self.sbatchFile_addBackgrounds_sum = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_sum_%s.py" % self.channel) self.createScript_sbatch_addBackgrounds(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds_sum, self.jobOptions_addBackgrounds_sum) logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addFakes) self.sbatchFile_addFakes = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addFakes_%s.py" % self.channel) self.createScript_sbatch_addFakes(self.executable_addFakes, self.sbatchFile_addFakes, self.jobOptions_addFakes) logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) self.addToMakefile_copyHistograms(lines_makefile, make_target = "phony_copyHistograms", make_dependency = "phony_hadd_stage1") self.addToMakefile_backgrounds_from_data(lines_makefile, make_dependency = "phony_copyHistograms") #---------------------------------------------------------------------------- self.addToMakefile_hadd_stage2(lines_makefile) self.addToMakefile_prep_dcard(lines_makefile) self.addToMakefile_add_syst_fakerate(lines_makefile) self.addToMakefile_make_plots(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done") return self.num_jobs
def create(self): """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system """ for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue process_name = sample_info["process_name_specific"] for lepton_selection in self.lepton_selections: #lepton_and_hadTau_selection_and_frWeight = get_lepton_and_hadTau_selection_and_frWeight(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight) key_dir = getKey(process_name, lepton_selection) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES ]: initDict(self.dirs, [ key_dir, dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_LOGS ]: self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, "_".join([ lepton_selection ]), process_name) else: self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, "_".join([ lepton_selection ]), process_name) for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]: initDict(self.dirs, [ dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]: self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel) else: self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel) ##print "self.dirs = ", self.dirs for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) else: create_if_not_exists(self.dirs[key]) inputFileLists = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue logging.info("Checking input files for sample %s" % sample_info["process_name_specific"]) inputFileLists[sample_name] = generateInputFileList(sample_name, sample_info, self.max_files_per_job, self.debug) for lepton_selection in self.lepton_selections: for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue process_name = sample_info["process_name_specific"] # Edit Siddh ~~~~~~~~~~~~~ run_process = False for sprocess_run in sample_process_run_s: if sprocess_run == process_name: run_process = True #print "Run process: ", sprocess_run if run_process == False: continue #print "run_process:",process_name # ~~~~~~~~~~~~~~~~~~~~~~~~~~ #if not ("DY" in process_name or "Muon" in process_name): continue logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name)) sample_category = sample_info["sample_category"] is_mc = (sample_info["type"] == "mc") is_signal = (sample_category == "signal") inputFileList = inputFileLists[sample_name] for central_or_shift in self.central_or_shifts: for jobId in inputFileList.keys(): #if central_or_shift != "central" and not (lepton_and_hadTau_selection.startswith("Tight") and lepton_charge_selection == "SS"): # continue if central_or_shift != "central" and not is_mc: continue # build config files for executing analysis code key_dir = getKey(process_name, lepton_selection) key_analyze_job = getKey(process_name, lepton_selection, central_or_shift, jobId) ntupleFiles = inputFileList[jobId] if len(ntupleFiles) == 0: print "Warning: ntupleFiles['%s'] = %s --> skipping job !!" % (key_job, ntupleFiles) continue self.jobOptions_analyze[key_analyze_job] = { 'ntupleFiles' : ntupleFiles, 'cfgFile_modified' : os.path.join(self.dirs[key_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" % \ (self.channel, process_name, lepton_selection, central_or_shift, jobId)), 'histogramFile' : os.path.join(self.dirs[key_dir][DKEY_HIST], "%s_%s_%s_%i.root" % \ (process_name, lepton_selection, central_or_shift, jobId)), 'logFile' : os.path.join(self.dirs[key_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" % \ (self.channel, process_name, lepton_selection, central_or_shift, jobId)), 'rleOutputFile' : os.path.join(self.dirs[key_dir][DKEY_RLES], "rle_%s_%s_%s_%s_%i.txt" % \ (self.channel, process_name, lepton_selection, central_or_shift, jobId)) if self.select_rle_output else "", 'sample_category' : sample_category, 'triggers' : sample_info["triggers"], 'lepton_selection' : lepton_selection, #'apply_leptonGenMatching' : self.apply_leptonGenMatching, #'apply_hadTauGenMatching' : self.apply_hadTauGenMatching, #'applyFakeRateWeights' : self.applyFakeRateWeights if not (lepton_selection == "Tight" and hadTau_selection.find("Tight") != -1) else "disabled", 'applyFakeRateWeights' : "disabled", 'use_HIP_mitigation_mediumMuonId' : True, 'is_mc' : is_mc, 'central_or_shift' : central_or_shift, 'lumi_scale' : 1. if not (self.use_lumi and is_mc) else sample_info["xsection"] * self.lumi / sample_info["nof_events"], 'apply_genWeight' : sample_info["genWeight"] if (is_mc and "genWeight" in sample_info.keys()) else False, 'apply_trigger_bits' : (is_mc and (self.era == "2015" or (self.era == "2016" and sample_info["reHLT"]))) or not is_mc } #applyFakeRateWeights = self.applyFakeRateWeights #if lepton_and_hadTau_frWeight == "disabled": # applyFakeRateWeights = "disabled" self.createCfg_analyze(self.jobOptions_analyze[key_analyze_job]) # initialize input and output file names for hadd_stage1 key_hadd_stage1 = getKey(process_name, lepton_selection) if not key_hadd_stage1 in self.inputFiles_hadd_stage1.keys(): self.inputFiles_hadd_stage1[key_hadd_stage1] = [] self.inputFiles_hadd_stage1[key_hadd_stage1].append(self.jobOptions_analyze[key_analyze_job]['histogramFile']) self.outputFile_hadd_stage1[key_hadd_stage1] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage1_%s_%s_%s.root" % \ (self.channel, process_name, lepton_selection)) print key_hadd_stage1, self.channel, process_name, lepton_selection, self.outputFile_hadd_stage1[key_hadd_stage1] #key_hadd_stage1 = getKey(process_name, lepton_selection) #key_hadd_stage1_5 = getKey(lepton_selection) #print self.inputFiles_hadd_stage1_5 #self.inputFiles_hadd_stage1_5[key_hadd_stage1_5].append(self.outputFile_hadd_stage1[key_hadd_stage1]) key_addBackgrounds_job = getKey(lepton_selection) sample_categories = [] sample_categories.extend([ "signal" ]) processes_input = [] # initialize input and output file names for hadd_stage2 key_hadd_stage2 = getKey(lepton_selection) if not key_hadd_stage2 in self.inputFiles_hadd_stage2.keys(): self.inputFiles_hadd_stage2[key_hadd_stage2] = [] #if lepton_selection == "Tight": # self.inputFiles_hadd_stage2[key_hadd_stage2].append(self.jobOptions_addBackgrounds[key_addBackgrounds_job]['outputFile']) #key_hadd_stage1_5 = getKey(lepton_and_hadTau_selection_and_frWeight) self.inputFiles_hadd_stage2[key_hadd_stage2].append(self.outputFile_hadd_stage1[key_hadd_stage1]) self.outputFile_hadd_stage2[key_hadd_stage2] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage2_%s_%s.root" % \ (self.channel, lepton_selection)) key_hadd_stage2 = getKey(lepton_selection) #self.inputFiles_hadd_stage2[key_hadd_stage2].append(self.jobOptions_addFlips[key_addFlips_job]['outputFile']) logging.info("Creating configuration files to run 'prepareDatacards'") processesToCopy = [] for process in self.prep_dcard_processesToCopy: processesToCopy.append(process) self.prep_dcard_processesToCopy = processesToCopy processesToCopy = [] for process in self.prep_dcard_signals: processesToCopy.append(process) self.prep_dcard_signals = processesToCopy for histogramToFit in self.histograms_to_fit: key_prep_dcard_job = getKey(histogramToFit) key_hadd_stage2 = getKey(lepton_selection) self.jobOptions_prep_dcard[key_prep_dcard_job] = { 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2], 'cfgFile_modified' : os.path.join(self.dirs[DKEY_CFGS], "prepareDatacards_%s_%s_cfg.py" % (self.channel, histogramToFit)), 'datacardFile' : os.path.join(self.dirs[DKEY_DCRD], "prepareDatacards_%s_%s.root" % (self.channel, histogramToFit)), 'histogramDir' : self.histogramDir_prep_dcard, 'histogramToFit' : histogramToFit, 'label' : None } self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job]) if self.is_sbatch: logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) #self.addToMakefile_backgrounds_from_data(lines_makefile) self.addToMakefile_hadd_stage2(lines_makefile) self.addToMakefile_prep_dcard(lines_makefile) #self.addToMakefile_add_syst_dcard(lines_makefile) #self.addToMakefile_make_plots(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done")
def create(self): """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system """ for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] sample_category = sample_info["sample_category"] is_mc = (sample_info["type"] == "mc") logging.info("Building dictionaries for sample %s..." % process_name) central_or_shift_extensions = ["", "hadd", "addBackgrounds"] central_or_shifts_extended = central_or_shift_extensions + self.central_or_shifts for central_or_shift_or_dummy in central_or_shifts_extended: process_name_extended = [ process_name, "hadd" ] for process_name_or_dummy in process_name_extended: if central_or_shift_or_dummy in [ "hadd", "addBackgrounds" ] and process_name_or_dummy in [ "hadd" ]: if not is_mc: continue if not self.accept_central_or_shift(central_or_shift_or_dummy, sample_info): continue key_dir = getKey(process_name_or_dummy, central_or_shift_or_dummy) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES ]: initDict(self.dirs, [ key_dir, dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_LOGS ]: self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, process_name_or_dummy, central_or_shift_or_dummy) else: self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, process_name_or_dummy) for subdirectory in [ "addBackgrounds", "prepareDatacards" ]: key_dir = getKey(subdirectory) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]: initDict(self.dirs, [ key_dir, dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]: self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, subdirectory) else: self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, subdirectory) for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_HIST, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT, DKEY_COMBINE_OUTPUT ]: initDict(self.dirs, [ dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_HADD_RT, DKEY_PLOT, DKEY_COMBINE_OUTPUT ]: self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel) else: self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel) numDirectories = 0 for key in self.dirs.keys(): if type(self.dirs[key]) == dict: numDirectories += len(self.dirs[key]) else: numDirectories += 1 logging.info("Creating directory structure (numDirectories = %i)" % numDirectories) numDirectories_created = 0; frac = 1 for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) numDirectories_created += len(self.dirs[key]) else: create_if_not_exists(self.dirs[key]) numDirectories_created = numDirectories_created + 1 while 100*numDirectories_created >= frac*numDirectories: logging.info(" %i%% completed" % frac) frac = frac + 1 logging.info("Done.") inputFileLists = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue logging.info("Checking input files for sample %s" % sample_info["process_name_specific"]) inputFileLists[sample_name] = generateInputFileList(sample_info, self.max_files_per_job) self.inputFileIds = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name)) inputFileList = inputFileLists[sample_name] is_mc = (sample_info["type"] == "mc") sample_category = sample_info["sample_category"] for central_or_shift in self.central_or_shifts: if central_or_shift != "central" and not is_mc: continue if not self.accept_central_or_shift(central_or_shift, sample_info): continue key_analyze_dir = getKey(process_name, central_or_shift) for jobId in inputFileList.keys(): analyze_job_tuple = (process_name, central_or_shift, jobId) key_analyze_job = getKey(*analyze_job_tuple) ntupleFiles = inputFileList[jobId] if len(ntupleFiles) == 0: logging.warning("No input ntuples for %s --> skipping job !!" % (key_analyze_job)) continue rleOutputFile = os.path.join( self.dirs[key_analyze_dir][DKEY_RLES], "rle_{channel}_{process_name}_{central_or_shift}_{jobId}_%s_%s.txt".format( channel = self.channel, process_name = process_name, central_or_shift = central_or_shift, jobId = jobId, )) if self.select_rle_output else "" cfgFile_modified_path = os.path.join(self.dirs[key_analyze_dir][DKEY_CFGS], "analyze_%s_%s_%i_cfg.py" % analyze_job_tuple) logFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_LOGS], "analyze_%s_%s_%i.log" % analyze_job_tuple) histogramFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_HIST], "analyze_%s_%s_%i.root" % analyze_job_tuple) self.jobOptions_analyze[key_analyze_job] = { 'ntupleFiles' : ntupleFiles, 'cfgFile_modified' : cfgFile_modified_path, 'histogramFile' : histogramFile_path, 'selEventsFileName_output' : rleOutputFile, 'logFile' : logFile_path, 'absEtaBins_e' : self.absEtaBins_e, 'ptBins_e' : self.ptBins_e, 'absEtaBins_mu' : self.absEtaBins_mu, 'ptBins_mu' : self.ptBins_mu, 'central_or_shift' : central_or_shift, 'fillGenEvtHistograms' : self.fillGenEvtHistograms, 'triggers_mu_cfg' : "leptonFR_triggers['{}']['{}']".format(self.era, 'mu'), 'triggers_e_cfg' : "leptonFR_triggers['{}']['{}']".format(self.era, 'e'), 'lep_mva_cut_e' : float(self.lep_mva_cut_e), 'lep_mva_cut_mu' : float(self.lep_mva_cut_mu), } self.createCfg_analyze(self.jobOptions_analyze[key_analyze_job], sample_info) # initialize input and output file names for hadd_stage1 key_hadd_stage1_dir = getKey(process_name) key_hadd_stage1_job = getKey(process_name) if not key_hadd_stage1_job in self.inputFiles_hadd_stage1: self.inputFiles_hadd_stage1[key_hadd_stage1_job] = [] self.inputFiles_hadd_stage1[key_hadd_stage1_job].append(self.jobOptions_analyze[key_analyze_job]['histogramFile']) self.outputFile_hadd_stage1[key_hadd_stage1_job] = os.path.join(self.dirs[key_hadd_stage1_dir][DKEY_HIST], "hadd_stage1_%s.root" % process_name) # initialize input and output file names for hadd_stage1_5 key_hadd_stage1_5_dir = getKey("hadd") key_hadd_stage1_5_job = getKey('') if not key_hadd_stage1_5_job in self.inputFiles_hadd_stage1_5: self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job] = [] for key_hadd_stage1_job in self.outputFile_hadd_stage1.keys(): self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job].append(self.outputFile_hadd_stage1[key_hadd_stage1_job]) self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job] = os.path.join(self.dirs[key_hadd_stage1_5_dir][DKEY_HIST], "hadd_stage1_5.root" ) # sum fake contributions for the total of all MC samples # input processes: TTj,... ## HERE !! # output process: fakes_mc key_hadd_stage1_5_job = getKey('') key_addBackgrounds_dir = getKey("addBackgrounds") key_addBackgrounds_job_sum = getKey("fakes_mc") sample_categories = [] sample_categories.extend(self.nonfake_backgrounds) sample_categories.extend(self.ttHProcs) processes_input = [] for sample_category in sample_categories: processes_input.append("%sj" % sample_category) self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_sum] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_cfg.py" % "fakes_mc"), 'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s.root" % "fakes_mc"), 'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s.log" % "fakes_mc"), 'categories' : [ "LeptonFakeRate/numerator/electrons_tight", "LeptonFakeRate/denominator/electrons_fakeable", "LeptonFakeRate/numerator/muons_tight", "LeptonFakeRate/denominator/muons_fakeable" ], 'processes_input' : processes_input, 'process_output' : "fakes_mc", 'histogramsToCopy' : list(self.histograms_to_fit.keys()), 'sysShifts' : [] } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_sum]) # create configuration files to run 'addBackgrounds_LeptonFakeRate' key_addBackgrounds_job_leptonFR = getKey('') self.jobOptions_addBackgrounds_LeptonFakeRate[key_addBackgrounds_job_leptonFR] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], os.path.basename(self.cfgFile_addBackgrounds_LeptonFakeRate)), 'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackground_LeptonFakeRate.root"), 'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], os.path.basename(self.cfgFile_addBackgrounds_LeptonFakeRate.replace("_cfg.py", ".log")) ), } self.createCfg_addBackgrounds_LeptonFakeRate(self.jobOptions_addBackgrounds_LeptonFakeRate[key_addBackgrounds_job_leptonFR]) # create configuration files to run 'addBackgrounds_Convs_LeptonFakeRate' key_addBackgrounds_job_conv = getKey('') self.jobOptions_addBackgrounds_Convs_LeptonFakeRate[key_addBackgrounds_job_conv] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], os.path.basename(self.cfgFile_addBackgrounds_Convs_LeptonFakeRate)), 'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackground_Convs_LeptonFakeRate.root"), 'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], os.path.basename(self.cfgFile_addBackgrounds_Convs_LeptonFakeRate.replace("_cfg.py", ".log")) ), } self.createCfg_addBackgrounds_Convs_LeptonFakeRate(self.jobOptions_addBackgrounds_Convs_LeptonFakeRate[key_addBackgrounds_job_conv]) # initialize input and output file names for hadd_stage2 key_hadd_stage2_dir = getKey("hadd") key_hadd_stage2_job = getKey('') if not key_hadd_stage2_job in self.inputFiles_hadd_stage2: self.inputFiles_hadd_stage2[key_hadd_stage2_job] = [] # CV: hadd_stage_1_5 output file does not need to be added as input for hadd_stage_2, # as addBackgrounds_LeptonFakeRate output file contains all histograms except fakes_mc self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_sum]['outputFile']) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_LeptonFakeRate[key_addBackgrounds_job_leptonFR]['outputFile']) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_Convs_LeptonFakeRate[key_addBackgrounds_job_conv]['outputFile']) self.outputFile_hadd_stage2[key_hadd_stage2_job] = os.path.join(self.dirs[key_hadd_stage2_dir][DKEY_HIST], "hadd_stage2.root") # We need to generate the eta and pt bins for electrons and muons lepton_bins = {} categories = [] for lepton in ['electron', 'muon']: if lepton not in lepton_bins: lepton_bins[lepton] = {} absEtaBins = None ptBins = None lepton_short = None if lepton == 'electron': absEtaBins = self.absEtaBins_e ptBins = self.ptBins_e lepton_short = 'e' elif lepton == 'muon': absEtaBins = self.absEtaBins_mu ptBins = self.ptBins_mu lepton_short = 'mu' else: raise ValueError('Invalid lepton type: %s' % lepton) for selection in ['tight', 'fakeable']: if selection not in lepton_bins[lepton]: lepton_bins[lepton][selection] = [] num_or_den = None if selection == 'tight': num_or_den = 'numerator' elif selection == 'fakeable': num_or_den = 'denominator' else: raise ValueError('Invalid lepton selection: %s' % selection) for absEtaBin_idx in range(0, len(absEtaBins) - 1): absEtaBinLowerEdge = absEtaBins[absEtaBin_idx] absEtaBinUpperEdge = absEtaBins[absEtaBin_idx + 1] absEtaBinString = getEtaBin(absEtaBinLowerEdge, absEtaBinUpperEdge) for ptBin_idx in range(0, len(ptBins) - 1): ptBinsLowerEdge = ptBins[ptBin_idx] ptBinsUpperEdge = ptBins[ptBin_idx + 1] ptBinString = getPtBin(ptBinsLowerEdge, ptBinsUpperEdge) absEta_and_ptBinString = '%s_%s' % (absEtaBinString, ptBinString) lepton_bins[lepton][selection].append( construct_lepton_params( lepton, lepton_short, selection, absEta_and_ptBinString, error_msg = "No fit parameter range specified for abs(eta) range = (%.3f, %.3f) and " "pT range = (%.3f, %.3f) for lepton type '%s' !!" % \ (absEtaBinLowerEdge, absEtaBinUpperEdge, ptBinsLowerEdge, ptBinsUpperEdge, lepton) ) + (absEtaBinLowerEdge, absEtaBinUpperEdge, ptBinsLowerEdge, ptBinsUpperEdge, 0) ) categories.append( ( "LeptonFakeRate/%s/%ss_%s/%s/%s" % (num_or_den, lepton, selection, absEtaBinString, ptBinString), "%ss_%s_%s_shapes" % (lepton, selection, absEta_and_ptBinString), ) ) # Let's also add inclusive category lepton_bins[lepton][selection].append( construct_lepton_params( lepton, lepton_short, selection, 'incl', error_msg = "No fit parameter range specified for lepton type %s" % lepton ) + (-1., -1., -1., -1., 1) ) categories.append( ( "LeptonFakeRate/%s/%ss_%s/incl" % (num_or_den, lepton, selection), "%ss_%s_incl_shapes" % (lepton, selection), ) ) lepton_bins_merged = [] for lepton_type in lepton_bins: for lepton_selection in lepton_bins[lepton_type]: lepton_bins_merged.extend(lepton_bins[lepton_type][lepton_selection]) if self.prep_dcard: logging.info("Creating configuration files to run 'prepareDatacards_LeptonFakeRate'") datacards = [] for histogramToFit in self.histograms_to_fit: key_prep_dcard_dir = getKey("prepareDatacards") key_prep_dcard_job = getKey(histogramToFit) datacard = os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s.root" % (histogramToFit)) self.jobOptions_prep_dcard[key_prep_dcard_job] = { 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_LeptonFakeRate_%s_cfg.py" % histogramToFit), 'datacardFile' : datacard, 'histogramDir' : (self.histogramDir_prep_dcard), 'histogramToFit' : histogramToFit, 'label' : None, 'categories' : categories, } datacards.append(datacard) self.createCfg_prep_dcard_LeptonFakeRate(self.jobOptions_prep_dcard[key_prep_dcard_job]) # Create setupDatacards_LeptonFakeRate.py script from the template systematics_leptonFR = [] for systematic in self.central_or_shifts: if systematic == 'central': continue systematic_name = systematic.replace('Up', '').replace('Down', '') if systematic_name not in systematics_leptonFR: systematics_leptonFR.append(systematic_name) setup_dcards_template_file = os.path.join(jinja_template_dir, 'setupDatacards_LeptonFakeRate.py.template') with open(setup_dcards_template_file, 'r') as setup_dcards_template_file_ptr: setup_dcards_template = setup_dcards_template_file_ptr.read() setup_dcards_script = jinja2.Template(setup_dcards_template).render( leptons = lepton_bins_merged, central_or_shifts = systematics_leptonFR, signal_process = "QCD" if self.use_QCD_fromMC else "data_fakes", ) setup_dcards_script_path = os.path.join(self.dirs[DKEY_SCRIPTS], 'setupDatacards_LeptonFakeRate.py') logging.debug("writing setupDatacards_LeptonFakeRate script file = '%s'" % setup_dcards_script_path) with codecs.open(setup_dcards_script_path, "w", "utf-8") as setup_dcards_script_file: setup_dcards_script_file.write(setup_dcards_script) setup_dcards_script_file.flush() os.fsync(setup_dcards_script_file.fileno()) add_chmodX(setup_dcards_script_path) if self.use_QCD_fromMC: postfit_plot_script_path = os.path.join(os.environ['CMSSW_BASE'], 'src/tthAnalysis/HiggsToTauTau/data/leptonFR/scripts/postFitPlot_fakes_from_mc.py') yieldtable_script_path = os.path.join(os.environ['CMSSW_BASE'], 'src/tthAnalysis/HiggsToTauTau/data/leptonFR/scripts/yieldTable_fakes_from_mc.py') else: postfit_plot_script_path = os.path.join(os.environ['CMSSW_BASE'], 'src/tthAnalysis/HiggsToTauTau/data/leptonFR/scripts/postFitPlot_fakes_from_data.py') yieldtable_script_path = os.path.join(os.environ['CMSSW_BASE'], 'src/tthAnalysis/HiggsToTauTau/data/leptonFR/scripts/yieldTable_fakes_from_data.py') # Create run_postFit.sh script from the template combine_output_dir = os.path.join(self.dirs[DKEY_COMBINE_OUTPUT], 'output') postfit_template_file = os.path.join(jinja_template_dir, 'run_postFit.sh.template') with open(postfit_template_file, 'r') as postfit_template_file_ptr: postfit_template = postfit_template_file_ptr.read() for lepton in ['electron', 'muon']: for selection in ['fakeable', 'tight']: is_num = selection == 'tight' for params in lepton_bins[lepton][selection]: l_array, l_range, l_sub_dir, l_eta_low, l_eta_high, l_pt_low, l_pt_high, l_is_inclusive = params postfit_script = jinja2.Template(postfit_template).render( new_cmssw_base = self.cmssw_base_dir_combine, setup_dcards_script = setup_dcards_script_path, postfit_plot_script = postfit_plot_script_path, int_lumi_data = self.lumi, yieldtable_script = yieldtable_script_path, output_dir = combine_output_dir, numerator_plotLabel = self.numerator_plotLabel, denominator_plotLabel = self.denominator_plotLabel, l_array = l_array, l_range = l_range, l_sub_dir = l_sub_dir, l_eta_low = l_eta_low, l_eta_high = l_eta_high, l_pt_low = l_pt_low, l_pt_high = l_pt_high, l_is_inclusive = l_is_inclusive, is_num = is_num, numerator_output_dir = os.path.join(combine_output_dir, 'mlfit_LeptonFakeRate_%s' % self.numerator_histogram), denominator_output_dir = os.path.join(combine_output_dir, 'mlfit_LeptonFakeRate_%s' % self.denominator_histogram), selection = selection, lepton_letter = 'e' if lepton == 'electron' else 'mu', grep_value = "QCD" if self.use_QCD_fromMC else "data_fakes", ) postfit_script_path = os.path.join( self.dirs[DKEY_SCRIPTS], 'mlfit_%s_%s.sh' % (self.numerator_histogram if is_num else self.denominator_histogram, l_array) ) logging.debug("Writing run_postFit script file = '%s'" % postfit_script_path) with codecs.open(postfit_script_path, "w", "utf-8") as postfit_script_file: postfit_script_file.write(postfit_script) postfit_script_file.flush() os.fsync(postfit_script_file.fileno()) add_chmodX(postfit_script_path) key_prep_dcard_dir = getKey("prepareDatacards") fit_value_file = os.path.join(combine_output_dir, 'fit_values.txt') makefile_template_file = os.path.join(jinja_template_dir, 'Makefile_postFit.template') makefile_template = open(makefile_template_file, 'r').read() makefile_templatized = jinja2.Template(makefile_template).render( new_cmssw_base = self.cmssw_base_dir_combine, setup_dcards_script = setup_dcards_script_path, numerator_histogram = self.numerator_histogram, denominator_histogram = self.denominator_histogram, scripts_dir = self.dirs[DKEY_SCRIPTS], numerator_datacard = os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s.root" % self.numerator_histogram), denominator_datacard = os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s.root" % self.denominator_histogram), output_dir = combine_output_dir, numerator_output_dir = os.path.join(combine_output_dir, 'mlfit_LeptonFakeRate_%s' % self.numerator_histogram), denominator_output_dir = os.path.join(combine_output_dir, 'mlfit_LeptonFakeRate_%s' % self.denominator_histogram), lepton_bins = lepton_bins, fit_values = fit_value_file, ) makefile_path = os.path.join(self.dirs[DKEY_SCRIPTS], 'Makefile_postFit') logging.debug("Writing run_postFit script file = '%s'" % makefile_path) with codecs.open(makefile_path, "w", "utf-8") as makefile_path_file: makefile_path_file.write(makefile_templatized) makefile_path_file.flush() os.fsync(makefile_path_file.fileno()) self.jobOptions_combine = { 'inputFile' : ' '.join(datacards), 'outputFile' : fit_value_file, 'makefile_path' : makefile_path, 'logFile' : os.path.join(self.dirs[DKEY_LOGS], 'postFit.log'), } key_comp_LeptonFakeRate = getKey('') leptonFR_final_output = os.path.join(combine_output_dir, 'leptonFakeRates.root') self.jobOptions_comp_LeptonFakeRate[key_comp_LeptonFakeRate] = { 'inputFile' : [ fit_value_file, self.outputFile_hadd_stage2[key_hadd_stage2_job] ], 'outputFile' : leptonFR_final_output, 'absEtaBins_e' : self.absEtaBins_e, 'ptBins_e' : self.ptBins_e, 'absEtaBins_mu' : self.absEtaBins_mu, 'ptBins_mu' : self.ptBins_mu, 'logFile' : os.path.join(self.dirs[DKEY_LOGS], os.path.basename(self.cfgFile_comp_LeptonFakeRate).replace('_cfg.py', '.log')), 'cfgFile_modified' : os.path.join(self.dirs[DKEY_CFGS], os.path.basename(self.cfgFile_comp_LeptonFakeRate)), 'plots_outputFileName' : os.path.join(self.dirs[DKEY_PLOT], "comp_LeptonFakeRate.png") } self.createCfg_comp_LeptonFakeRate(self.jobOptions_comp_LeptonFakeRate[key_comp_LeptonFakeRate]) self.targets.append(self.jobOptions_comp_LeptonFakeRate[key_comp_LeptonFakeRate]['outputFile']) self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_LeptonFakeRate.py") self.sbatchFile_addBackgrounds_sum = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_sum_LeptonFakeRate.py") self.sbatchFile_addBackgrounds_LeptonFakeRate = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_LeptonFakeRate.py") self.sbatchFile_addBackgrounds_Convs_LeptonFakeRate = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_Convs_LeptonFakeRate.py") self.sbatchFile_comp_LeptonFakeRate = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_comp_LeptonFakeRate.py") if self.is_sbatch: logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) self.createScript_sbatch(self.executable_addBackgrounds_recursively, self.sbatchFile_addBackgrounds_sum, self.jobOptions_addBackgrounds_sum) self.createScript_sbatch(self.executable_addBackgrounds_LeptonFakeRate, self.sbatchFile_addBackgrounds_LeptonFakeRate, self.jobOptions_addBackgrounds_LeptonFakeRate) self.createScript_sbatch(self.executable_addBackgrounds_LeptonFakeRate, self.sbatchFile_addBackgrounds_Convs_LeptonFakeRate, self.jobOptions_addBackgrounds_Convs_LeptonFakeRate) logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_comp_LeptonFakeRate) self.createScript_sbatch(self.executable_comp_LeptonFakeRate, self.sbatchFile_comp_LeptonFakeRate, self.jobOptions_comp_LeptonFakeRate) lines_makefile = [] self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) self.addToMakefile_backgrounds_from_data(lines_makefile) ## this step now does both e Conv, data_fakes and fakes_mc computation # self.addToMakefile_backgrounds_from_MC(lines_makefile) self.addToMakefile_hadd_stage2(lines_makefile, make_dependency = " ".join([ "phony_addBackgrounds_LeptonFakeRate", "phony_addBackgrounds_Convs_LeptonFakeRate", "phony_addBackgrounds_sum" ])) self.addToMakefile_prep_dcard(lines_makefile) self.addToMakefile_combine(lines_makefile) self.addToMakefile_comp_LeptonFakeRate(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done.") return self.num_jobs
def create(self): """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system """ for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue process_name = sample_info["process_name_specific"] for lepton_and_hadTau_selection in self.lepton_and_hadTau_selections: for lepton_and_hadTau_frWeight in self.lepton_and_hadTau_frWeights: if lepton_and_hadTau_frWeight == "enabled" and not lepton_and_hadTau_selection.startswith("Fakeable"): continue lepton_and_hadTau_selection_and_frWeight = get_lepton_and_hadTau_selection_and_frWeight(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight) for hadTau_charge_selection in self.hadTau_charge_selections: key_dir = getKey(process_name, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES ]: initDict(self.dirs, [ key_dir, dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_LOGS ]: self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, "_".join([ lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection ]), process_name) else: self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, "_".join([ lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection ]), process_name) for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]: initDict(self.dirs, [ dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]: self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel) else: self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel) ##print "self.dirs = ", self.dirs for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) else: create_if_not_exists(self.dirs[key]) inputFileLists = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue logging.info("Checking input files for sample %s" % sample_info["process_name_specific"]) inputFileLists[sample_name] = generateInputFileList(sample_name, sample_info, self.max_files_per_job, self.debug) for lepton_and_hadTau_selection in self.lepton_and_hadTau_selections: lepton_selection = lepton_and_hadTau_selection if self.applyFakeRateWeights == "2tau": lepton_selection = "Tight" hadTau_selection = "|".join([ lepton_and_hadTau_selection, self.hadTau_selection_part2 ]) if lepton_and_hadTau_selection == "forBDTtraining": lepton_selection = "Loose" hadTau_selection = "Tight|%s" % self.hadTau_selection_relaxed for lepton_and_hadTau_frWeight in self.lepton_and_hadTau_frWeights: if lepton_and_hadTau_frWeight == "enabled" and not lepton_and_hadTau_selection.startswith("Fakeable"): continue if lepton_and_hadTau_frWeight == "disabled" and not lepton_and_hadTau_selection in [ "Tight", "forBDTtraining" ]: continue lepton_and_hadTau_selection_and_frWeight = get_lepton_and_hadTau_selection_and_frWeight(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight) for hadTau_charge_selection in self.hadTau_charge_selections: for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue process_name = sample_info["process_name_specific"] logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name)) sample_category = sample_info["sample_category"] is_mc = (sample_info["type"] == "mc") is_signal = (sample_category == "signal") for central_or_shift in self.central_or_shifts: inputFileList = inputFileLists[sample_name] for jobId in inputFileList.keys(): if central_or_shift != "central": isFR_shape_shift = False for FR_shape_shift in [ "CMS_ttHl_FRe_shape", "CMS_ttHl_FRm_shape", "CMS_ttHl_FRjt_norm", "CMS_ttHl_FRjt_shape" ]: if central_or_shift.find(FR_shape_shift) != -1: isFR_shape_shift = True if not ((lepton_and_hadTau_selection == "Fakeable" and hadTau_charge_selection == "OS" and isFR_shape_shift) or (lepton_and_hadTau_selection == "Tight" and hadTau_charge_selection == "OS")): continue if not is_mc and not isFR_shape_shift: continue if central_or_shift.startswith("CMS_ttHl_thu_shape_ttH") and sample_category != "signal": continue if central_or_shift.startswith("CMS_ttHl_thu_shape_ttW") and sample_category != "TTW": continue if central_or_shift.startswith("CMS_ttHl_thu_shape_ttZ") and sample_category != "TTZ": continue ##print "processing sample %s: jobId = %i, central_or_shift = '%s'" % (process_name, jobId, central_or_shift) # build config files for executing analysis code key_dir = getKey(process_name, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection) key_analyze_job = getKey(process_name, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection, central_or_shift, jobId) ntupleFiles = inputFileList[jobId] if len(ntupleFiles) == 0: print "Warning: ntupleFiles['%s'] = %s --> skipping job !!" % (key_file, ntupleFiles) continue self.jobOptions_analyze[key_analyze_job] = { 'ntupleFiles' : ntupleFiles, 'cfgFile_modified' : os.path.join(self.dirs[key_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%s_%i_cfg.py" % \ (self.channel, process_name, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection, central_or_shift, jobId)), 'histogramFile' : os.path.join(self.dirs[key_dir][DKEY_HIST], "%s_%s_%s_%s_%i.root" % \ (process_name, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection, central_or_shift, jobId)), 'logFile' : os.path.join(self.dirs[key_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%s_%i.log" % \ (self.channel, process_name, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection, central_or_shift, jobId)), 'rleOutputFile' : os.path.join(self.dirs[key_dir][DKEY_RLES], "rle_%s_%s_%s_%s_%s_%i.txt" % \ (self.channel, process_name, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection, central_or_shift, jobId)) if self.select_rle_output else "", 'sample_category' : sample_category, 'process_name_specific' : sample_info["process_name_specific"], 'triggers' : sample_info["triggers"], 'lepton_selection' : lepton_selection, 'apply_leptonGenMatching' : self.apply_leptonGenMatching, 'hadTau_selection' : hadTau_selection, 'apply_hadTauGenMatching' : self.apply_hadTauGenMatching, 'hadTau_charge_selection' : hadTau_charge_selection, 'applyFakeRateWeights' : self.applyFakeRateWeights if not (lepton_selection == "Tight" and hadTau_selection.find("Tight") != -1) else "disabled", ##'use_HIP_mitigation_bTag' : sample_info["use_HIP_mitigation_bTag"], ##'use_HIP_mitigation_mediumMuonId' : sample_info["use_HIP_mitigation_mediumMuonId"], 'use_HIP_mitigation_bTag' : True, 'use_HIP_mitigation_mediumMuonId' : True, 'is_mc' : is_mc, 'central_or_shift' : central_or_shift, 'lumi_scale' : 1. if not (self.use_lumi and is_mc) else sample_info["xsection"] * self.lumi / sample_info["nof_events"], 'apply_genWeight' : sample_info["genWeight"] if (is_mc and "genWeight" in sample_info) else False, 'apply_trigger_bits' : (is_mc and (self.era == "2015" or (self.era == "2016" and sample_info["reHLT"]))) or not is_mc, 'selectBDT': self.isBDTtraining, 'changeBranchNames' : self.changeBranchNames } self.createCfg_analyze(self.jobOptions_analyze[key_analyze_job]) # initialize input and output file names for hadd_stage1 key_hadd_stage1 = getKey(process_name, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection) if not key_hadd_stage1 in self.inputFiles_hadd_stage1: self.inputFiles_hadd_stage1[key_hadd_stage1] = [] self.inputFiles_hadd_stage1[key_hadd_stage1].append(self.jobOptions_analyze[key_analyze_job]['histogramFile']) self.outputFile_hadd_stage1[key_hadd_stage1] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage1_%s_%s_%s_%s.root" % \ (self.channel, process_name, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)) if self.isBDTtraining: continue if is_mc: logging.info("Creating configuration files to run 'addBackgrounds' for sample %s" % process_name) sample_categories = [ sample_category ] if is_signal: sample_categories = [ "signal", "ttH", "ttH_htt", "ttH_hww", "ttH_hzz" ] for sample_category in sample_categories: # sum non-fake contributions for each MC sample separately # input processes: TT2t0e0m0j, TT1t1e0m0j, TT1t0e1m0j", TT0t2e0m0j, TT0t1e1m0j, TT0t0e2m0j; TTW2t0e0m0j,... # output processes: TT; ... key_hadd_stage1 = getKey(process_name, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection) key_addBackgrounds_job = getKey(process_name, sample_category, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection) processes_input = None if sample_category in [ "signal" ]: lepton_and_hadTau_genMatches = [] lepton_and_hadTau_genMatches.extend(self.lepton_and_hadTau_genMatches_nonfakes) lepton_and_hadTau_genMatches.extend(self.lepton_and_hadTau_genMatches_fakes) processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in lepton_and_hadTau_genMatches ] elif sample_category in [ "ttH" ]: processes_input = [] processes_input.extend([ "%s%s" % ("ttH_htt", genMatch) for genMatch in self.lepton_and_hadTau_genMatches_nonfakes ]) processes_input.extend([ "%s%s" % ("ttH_hww", genMatch) for genMatch in self.lepton_and_hadTau_genMatches_nonfakes ]) processes_input.extend([ "%s%s" % ("ttH_hzz", genMatch) for genMatch in self.lepton_and_hadTau_genMatches_nonfakes ]) else: processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_and_hadTau_genMatches_nonfakes ] if processes_input: self.jobOptions_addBackgrounds[key_addBackgrounds_job] = { 'inputFile' : self.outputFile_hadd_stage1[key_hadd_stage1], 'cfgFile_modified' : os.path.join(self.dirs[DKEY_CFGS], "addBackgrounds_%s_%s_%s_%s_%s_cfg.py" % \ (self.channel, process_name, sample_category, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)), 'outputFile' : os.path.join(self.dirs[DKEY_HIST], "addBackgrounds_%s_%s_%s_%s_%s.root" % \ (self.channel, process_name, sample_category, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)), 'logFile' : os.path.join(self.dirs[DKEY_LOGS], "addBackgrounds_%s_%s_%s_%s_%s.log" % \ (self.channel, process_name, sample_category, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)), 'categories' : [ getHistogramDir(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, hadTau_charge_selection) ], 'processes_input' : processes_input, 'process_output' : sample_category } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds[key_addBackgrounds_job]) # initialize input and output file names for hadd_stage1_5 key_hadd_stage1_5 = getKey(lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection) if not key_hadd_stage1_5 in self.inputFiles_hadd_stage1_5: self.inputFiles_hadd_stage1_5[key_hadd_stage1_5] = [] self.inputFiles_hadd_stage1_5[key_hadd_stage1_5].append(self.jobOptions_addBackgrounds[key_addBackgrounds_job]['outputFile']) self.outputFile_hadd_stage1_5[key_hadd_stage1_5] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage1_5_%s_%s_%s.root" % \ (self.channel, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)) # sum fake contributions for each MC sample separately # input processes: TT1t0e0m1j, TT0t1e0m1j, TT0t0e1m1j, TT0t0e0m2j; TTW1t0e0m1j,... # output processes: TT_fake; ... key_hadd_stage1 = getKey(process_name, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection) key_addBackgrounds_job = getKey(process_name, "%s_fake" % sample_category, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection) processes_input = None if sample_category in [ "signal" ]: processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_and_hadTau_genMatches_fakes ] elif sample_category in [ "ttH" ]: processes_input = [] processes_input.extend([ "%s%s" % ("ttH_htt", genMatch) for genMatch in self.lepton_and_hadTau_genMatches_fakes ]) processes_input.extend([ "%s%s" % ("ttH_hww", genMatch) for genMatch in self.lepton_and_hadTau_genMatches_fakes ]) processes_input.extend([ "%s%s" % ("ttH_hzz", genMatch) for genMatch in self.lepton_and_hadTau_genMatches_fakes ]) else: processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_and_hadTau_genMatches_fakes ] if processes_input: self.jobOptions_addBackgrounds[key_addBackgrounds_job] = { 'inputFile' : self.outputFile_hadd_stage1[key_hadd_stage1], 'cfgFile_modified' : os.path.join(self.dirs[DKEY_CFGS], "addBackgrounds_%s_fakes_%s_%s_%s_%s_cfg.py" % \ (self.channel, process_name, sample_category, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)), 'outputFile' : os.path.join(self.dirs[DKEY_HIST], "addBackgrounds_%s_fakes_%s_%s_%s_%s.root" % \ (self.channel, process_name, sample_category, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)), 'logFile' : os.path.join(self.dirs[DKEY_LOGS], "addBackgrounds_%s_fakes_%s_%s_%s_%s.log" % \ (self.channel, process_name, sample_category, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)), 'categories' : [ getHistogramDir(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, hadTau_charge_selection) ], 'processes_input' : processes_input, 'process_output' : "%s_fake" % sample_category } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds[key_addBackgrounds_job]) # initialize input and output file names for hadd_stage1_5 key_hadd_stage1_5 = getKey(lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection) if not key_hadd_stage1_5 in self.inputFiles_hadd_stage1_5: self.inputFiles_hadd_stage1_5[key_hadd_stage1_5] = [] self.inputFiles_hadd_stage1_5[key_hadd_stage1_5].append(self.jobOptions_addBackgrounds[key_addBackgrounds_job]['outputFile']) self.outputFile_hadd_stage1_5[key_hadd_stage1_5] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage1_5_%s_%s_%s.root" % \ (self.channel, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)) if self.isBDTtraining: continue # add output files of hadd_stage1 for data to list of input files for hadd_stage1_5 if not is_mc: key_hadd_stage1 = getKey(process_name, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection) key_hadd_stage1_5 = getKey(lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection) if not key_hadd_stage1_5 in self.inputFiles_hadd_stage1_5: self.inputFiles_hadd_stage1_5[key_hadd_stage1_5] = [] self.inputFiles_hadd_stage1_5[key_hadd_stage1_5].append(self.outputFile_hadd_stage1[key_hadd_stage1]) if self.isBDTtraining: continue # sum fake contributions for the total of all MC sample # input processes: TT1t0e0m1j, TT0t1e0m1j, TT0t0e1m1j, TT0t0e0m2j; TTW1t0e0m1j,... # output process: fakes_mc key_addBackgrounds_job = getKey(lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection) sample_categories = [] sample_categories.extend(self.nonfake_backgrounds) sample_categories.extend([ "signal" ]) processes_input = [] for sample_category in sample_categories: processes_input.append("%s_fake" % sample_category) self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5], 'cfgFile_modified' : os.path.join(self.dirs[DKEY_CFGS], "addBackgrounds_%s_fakes_mc_%s_%s_cfg.py" % \ (self.channel, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)), 'outputFile' : os.path.join(self.dirs[DKEY_HIST], "addBackgrounds_%s_fakes_mc_%s_%s.root" % \ (self.channel, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)), 'logFile' : os.path.join(self.dirs[DKEY_LOGS], "addBackgrounds_%s_fakes_mc_%s_%s.log" % \ (self.channel, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)), 'categories' : [ getHistogramDir(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, hadTau_charge_selection) ], 'processes_input' : processes_input, 'process_output' : "fakes_mc" } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job]) # initialize input and output file names for hadd_stage2 key_hadd_stage2 = getKey(lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection) if not key_hadd_stage2 in self.inputFiles_hadd_stage2: self.inputFiles_hadd_stage2[key_hadd_stage2] = [] if lepton_and_hadTau_selection == "Tight": self.inputFiles_hadd_stage2[key_hadd_stage2].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job]['outputFile']) key_hadd_stage1_5 = getKey(lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection) self.inputFiles_hadd_stage2[key_hadd_stage2].append(self.outputFile_hadd_stage1_5[key_hadd_stage1_5]) self.outputFile_hadd_stage2[key_hadd_stage2] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage2_%s_%s_%s.root" % \ (self.channel, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)) if self.isBDTtraining: if self.is_sbatch: logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done") return self.num_jobs logging.info("Creating configuration files to run 'addBackgroundFakes'") for hadTau_charge_selection in self.hadTau_charge_selections: key_addFakes_job = getKey("fakes_data", hadTau_charge_selection) key_hadd_stage1_5 = getKey(get_lepton_and_hadTau_selection_and_frWeight("Fakeable", "enabled"), hadTau_charge_selection) category_sideband = None if self.applyFakeRateWeights == "3L": category_sideband = "1l_2tau_%s_Fakeable_wFakeRateWeights" % hadTau_charge_selection elif self.applyFakeRateWeights == "2tau": category_sideband = "1l_2tau_%s_Fakeable_wFakeRateWeights" % hadTau_charge_selection else: raise ValueError("Invalid Configuration parameter 'applyFakeRateWeights' = %s !!" % applyFakeRateWeights) self.jobOptions_addFakes[key_addFakes_job] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5], 'cfgFile_modified' : os.path.join(self.dirs[DKEY_CFGS], "addBackgroundLeptonFakes_%s_%s_cfg.py" % \ (self.channel, hadTau_charge_selection)), 'outputFile' : os.path.join(self.dirs[DKEY_HIST], "addBackgroundLeptonFakes_%s_%s.root" % \ (self.channel, hadTau_charge_selection)), 'logFile' : os.path.join(self.dirs[DKEY_LOGS], "addBackgroundLeptonFakes_%s_%s.log" % \ (self.channel, hadTau_charge_selection)), 'category_signal' : "1l_2tau_%s_Tight" % hadTau_charge_selection, 'category_sideband' : category_sideband } self.createCfg_addFakes(self.jobOptions_addFakes[key_addFakes_job]) key_hadd_stage2 = getKey(get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"), hadTau_charge_selection) self.inputFiles_hadd_stage2[key_hadd_stage2].append(self.jobOptions_addFakes[key_addFakes_job]['outputFile']) logging.info("Creating configuration files to run 'prepareDatacards'") for histogramToFit in self.histograms_to_fit: key_prep_dcard_job = getKey(histogramToFit) key_hadd_stage2 = getKey(get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"), "OS") self.jobOptions_prep_dcard[key_prep_dcard_job] = { 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2], 'cfgFile_modified' : os.path.join(self.dirs[DKEY_CFGS], "prepareDatacards_%s_%s_cfg.py" % (self.channel, histogramToFit)), 'datacardFile' : os.path.join(self.dirs[DKEY_DCRD], "prepareDatacards_%s_%s.root" % (self.channel, histogramToFit)), 'histogramDir' : self.histogramDir_prep_dcard, 'histogramToFit' : histogramToFit, 'label' : None } self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job]) if "SS" in self.hadTau_charge_selections: key_prep_dcard_job = getKey(histogramToFit, "SS") key_hadd_stage2 = getKey(get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"), "SS") self.jobOptions_prep_dcard[key_prep_dcard_job] = { 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2], 'cfgFile_modified' : os.path.join(self.dirs[DKEY_CFGS], "prepareDatacards_%s_SS_%s_cfg.py" % (self.channel, histogramToFit)), 'datacardFile' : os.path.join(self.dirs[DKEY_DCRD], "prepareDatacards_%s_SS_%s.root" % (self.channel, histogramToFit)), 'histogramDir' : self.histogramDir_prep_dcard_SS, 'histogramToFit' : histogramToFit, 'label' : 'SS' } self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job]) logging.info("Creating configuration files to run 'makePlots'") key_makePlots_job = getKey("OS") key_hadd_stage2 = getKey(get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"), "OS") self.jobOptions_make_plots[key_makePlots_job] = { 'executable' : self.executable_make_plots, 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2], 'cfgFile_modified' : os.path.join(self.dirs[DKEY_CFGS], "makePlots_%s_cfg.py" % self.channel), 'outputFile' : os.path.join(self.dirs[DKEY_PLOT], "makePlots_%s.png" % self.channel), 'histogramDir' : self.histogramDir_prep_dcard, 'label' : None, 'make_plots_backgrounds' : self.make_plots_backgrounds } self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job]) if "SS" in self.hadTau_charge_selections: key_makePlots_job = getKey("SS") key_hadd_stage2 = getKey(get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"), "SS") self.jobOptions_make_plots[key_makePlots_job] = { 'executable' : self.executable_make_plots, 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2], 'cfgFile_modified' : os.path.join(self.dirs[DKEY_CFGS], "makePlots_%s_SS_cfg.py" % self.channel), 'outputFile' : os.path.join(self.dirs[DKEY_PLOT], "makePlots_%s_SS.png" % self.channel), 'histogramDir' : self.histogramDir_prep_dcard_SS, 'label' : "SS", 'make_plots_backgrounds' : self.make_plots_backgrounds } self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job]) if "Fakeable_mcClosure" in self.lepton_and_hadTau_selections: key_makePlots_job = getKey("OS") key_hadd_stage2 = getKey(get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"), "OS") self.jobOptions_make_plots[key_makePlots_job] = { 'executable' : self.executable_make_plots_mcClosure, 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2], 'cfgFile_modified' : os.path.join(self.dirs[DKEY_CFGS], "makePlots_mcClosure_%s_cfg.py" % self.channel), 'outputFile' : os.path.join(self.dirs[DKEY_PLOT], "makePlots_mcClosure_%s.png" % self.channel) } self.createCfg_makePlots_mcClosure(self.jobOptions_make_plots[key_makePlots_job]) if self.is_sbatch: logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addBackgrounds) self.sbatchFile_addBackgrounds = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_%s.py" % self.channel) self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds, self.jobOptions_addBackgrounds) self.sbatchFile_addBackgrounds_sum = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_sum_%s.py" % self.channel) self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds_sum, self.jobOptions_addBackgrounds_sum) logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addFakes) self.sbatchFile_addFakes = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addFakes_%s.py" % self.channel) self.createScript_sbatch(self.executable_addFakes, self.sbatchFile_addFakes, self.jobOptions_addFakes) logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) self.addToMakefile_backgrounds_from_data(lines_makefile) self.addToMakefile_hadd_stage2(lines_makefile) self.addToMakefile_prep_dcard(lines_makefile) self.addToMakefile_make_plots(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done") return self.num_jobs
def create(self): """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system """ for key in self.dirs.keys(): for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) self.inputFileIds = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue process_name = sample_info["process_name_specific"] logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name)) is_mc = (sample_info["type"] == "mc") lumi_scale = 1. if not (self.use_lumi and is_mc) else sample_info["xsection"] * self.lumi / sample_info["nof_events"] sample_category = sample_info["sample_category"] triggers = sample_info["triggers"] apply_trigger_bits = (is_mc and (self.era == "2015" or (self.era == "2016" and sample_info["reHLT"]))) or not is_mc for lepton_selection in self.lepton_selections: for central_or_shift in self.central_or_shifts: inputFileList = generateInputFileList(sample_name, sample_info, self.max_files_per_job, self.debug) for jobId in inputFileList.keys(): if central_or_shift != "central" and not (lepton_selection == "Tight"): continue if central_or_shift != "central" and not is_mc: continue if central_or_shift.startswith("CMS_ttHl_thu_shape_ttH") and sample_category != "signal": continue if central_or_shift.startswith("CMS_ttHl_thu_shape_ttW") and sample_category != "TTW": continue if central_or_shift.startswith("CMS_ttHl_thu_shape_ttZ") and sample_category != "TTZ": continue key_dir = getKey(sample_name, lepton_selection) key_file = getKey(sample_name, lepton_selection, central_or_shift, jobId) self.ntupleFiles[key_file] = inputFileList[jobId] self.cfgFiles_analyze_modified[key_file] = os.path.join(self.dirs[key_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" % \ (self.channel, process_name, lepton_selection, central_or_shift, jobId)) self.histogramFiles[key_file] = os.path.join(self.dirs[key_dir][DKEY_HIST], "%s_%s_%s_%i.root" % \ (process_name, lepton_selection, central_or_shift, jobId)) self.logFiles_analyze[key_file] = os.path.join(self.dirs[key_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" % \ (self.channel, process_name, lepton_selection, central_or_shift, jobId)) self.rleOutputFiles[key_file] = os.path.join(self.dirs[key_dir][DKEY_RLES], "rle_%s_%s_%s_%s_%i.txt" % \ (self.channel, process_name, lepton_selection, central_or_shift, jobId)) if self.select_rle_output else "" self.createCfg_analyze(self.ntupleFiles[key_file], self.histogramFiles[key_file], sample_category, self.era, triggers, lepton_selection, is_mc, central_or_shift, lumi_scale, apply_trigger_bits, self.cfgFiles_analyze_modified[key_file], self.rleOutputFiles[key_file]) if self.is_sbatch: logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.createScript_sbatch() print self.sbatchFile_analyze #logging.info("Creating configuration files for executing 'addBackgroundLeptonFakes'") #self.createCfg_addFakes(self.histogramFile_hadd_stage1, self.histogramFile_addFakes, self.cfgFile_addFakes_modified) #logging.info("Creating configuration files for executing 'addBackgroundLeptonFlips'") #self.createCfg_addFlips(self.histogramFile_hadd_stage1, self.histogramFile_addFlips, self.cfgFile_addFlips_modified) logging.info("Creating configuration files for executing 'prepareDatacards'") for histogramToFit in self.histograms_to_fit: self.createCfg_prep_dcard(histogramToFit) lines_makefile = [] self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) self.addToMakefile_backgrounds_from_data(lines_makefile) self.addToMakefile_hadd_stage2(lines_makefile) self.addToMakefile_prep_dcard(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done")
def create(self): """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system """ for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] key_dir = getKey(process_name) for dir_type in [DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES]: initDict(self.dirs, [key_dir, dir_type]) if dir_type in [DKEY_CFGS, DKEY_LOGS]: self.dirs[key_dir][dir_type] = os.path.join( self.configDir, dir_type, self.channel, process_name) else: self.dirs[key_dir][dir_type] = os.path.join( self.outputDir, dir_type, self.channel, process_name) for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]: initDict(self.dirs, [dir_type]) if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]: self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel) else: self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel) numDirectories = 0 for key in self.dirs.keys(): if type(self.dirs[key]) == dict: numDirectories += len(self.dirs[key]) else: numDirectories += 1 logging.info("Creating directory structure (numDirectories = %i)" % numDirectories) numDirectories_created = 0 frac = 1 for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) numDirectories_created += len(self.dirs[key]) else: create_if_not_exists(self.dirs[key]) numDirectories_created = numDirectories_created + 1 while 100 * numDirectories_created >= frac * numDirectories: logging.info(" %i%% completed" % frac) frac = frac + 1 logging.info("Done.") inputFileLists = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue logging.info("Checking input files for sample %s" % sample_info["process_name_specific"]) inputFileLists[sample_name] = generateInputFileList( sample_info, self.max_files_per_job) for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] logging.info( "Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name)) inputFileList = inputFileLists[sample_name] for jobId in inputFileList.keys(): ##print "processing sample %s: jobId = %i" % (process_name, jobId) # build config files for executing analysis code key_analyze_dir = getKey(process_name) analyze_job_tuple = (process_name, jobId) key_analyze_job = getKey(*analyze_job_tuple) ntupleFiles = inputFileList[jobId] if len(ntupleFiles) == 0: logging.warning( "No input ntuples for %s --> skipping job !!" % (key_analyze_job)) continue cfgFile_modified_path = os.path.join( self.dirs[key_analyze_dir][DKEY_CFGS], "analyze_%s_%i_cfg.py" % analyze_job_tuple) logFile_path = os.path.join( self.dirs[key_analyze_dir][DKEY_LOGS], "analyze_%s_%i.log" % analyze_job_tuple) histogramFile_path = os.path.join( self.dirs[key_analyze_dir][DKEY_HIST], "analyze_%s_%i.root" % analyze_job_tuple) self.jobOptions_analyze[key_analyze_job] = { 'ntupleFiles': ntupleFiles, 'cfgFile_modified': cfgFile_modified_path, 'histogramFile': histogramFile_path, 'histogramDir': 'analyze_hadTopTagger', 'logFile': logFile_path, 'hadTauSelection': self.hadTau_selection, 'lumiScale': 1., 'selectBDT': True, } self.createCfg_analyze( self.jobOptions_analyze[key_analyze_job], sample_info) # initialize input and output file names for hadd_stage1 key_hadd_stage1_dir = getKey(process_name, lepton_selection_and_frWeight) key_hadd_stage1_job = getKey(process_name) if not key_hadd_stage1_job in self.inputFiles_hadd_stage1: self.inputFiles_hadd_stage1[key_hadd_stage1_job] = [] self.inputFiles_hadd_stage1[key_hadd_stage1_job].append( self.jobOptions_analyze[key_analyze_job]['histogramFile']) self.outputFile_hadd_stage1[ key_hadd_stage1_job] = os.path.join( self.dirs[key_hadd_stage1_dir][DKEY_HIST], "hadd_stage1_%s.root" % process_name) self.targets.append( self.outputFile_hadd_stage1[key_hadd_stage1_job]) self.sbatchFile_analyze = os.path.join( self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) if self.is_sbatch: logging.info( "Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done.") return self.num_jobs
def create(self): """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system """ for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue process_name = sample_info["process_name_specific"] for charge_selection in self.charge_selections: key_dir = getKey(process_name, charge_selection) for dir_type in [DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES]: initDict(self.dirs, [key_dir, dir_type]) if dir_type in [DKEY_CFGS, DKEY_LOGS]: self.dirs[key_dir][dir_type] = os.path.join( self.configDir, dir_type, self.channel, "_".join([charge_selection]), process_name) else: self.dirs[key_dir][dir_type] = os.path.join( self.outputDir, dir_type, self.channel, "_".join([charge_selection]), process_name) for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]: initDict(self.dirs, [dir_type]) if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]: self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel) else: self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel) ##print "self.dirs = ", self.dirs for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) else: create_if_not_exists(self.dirs[key]) inputFileLists = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue logging.info("Checking input files for sample %s" % sample_info["process_name_specific"]) inputFileLists[sample_name] = generateInputFileList( sample_name, sample_info, self.max_files_per_job, self.debug) self.inputFileIds = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue process_name = sample_info["process_name_specific"] logging.info( "Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name)) is_mc = (sample_info["type"] == "mc") lumi_scale = 1. if not (self.use_lumi and is_mc) else sample_info[ "xsection"] * self.lumi / sample_info["nof_events"] apply_genWeight = sample_info["apply_genWeight"] if ( is_mc and "apply_genWeight" in sample_info.keys()) else False sample_category = sample_info["sample_category"] triggers = sample_info["triggers"] apply_trigger_bits = ( is_mc and (self.era == "2015" or (self.era == "2016" and sample_info["reHLT"]))) or not is_mc for charge_selection in self.charge_selections: for central_or_shift in self.central_or_shifts: inputFileList = inputFileLists[sample_name] for jobId in inputFileList.keys(): if central_or_shift != "central" and not is_mc: continue if central_or_shift.startswith( "CMS_ttHl_thu_shape_ttH" ) and sample_category != "signal": continue if central_or_shift.startswith( "CMS_ttHl_thu_shape_ttW" ) and sample_category != "TTW": continue if central_or_shift.startswith( "CMS_ttHl_thu_shape_ttZ" ) and sample_category != "TTZ": continue # build config files for executing analysis code key_dir = getKey(process_name, charge_selection) key_analyze_job = getKey(process_name, charge_selection, central_or_shift, jobId) ntupleFiles = inputFileList[jobId] if len(ntupleFiles) == 0: print "Warning: ntupleFiles['%s'] = %s --> skipping job !!" % ( key_job, ntupleFiles) continue self.jobOptions_analyze[key_analyze_job] = { 'ntupleFiles' : ntupleFiles, 'cfgFile_modified' : os.path.join(self.dirs[key_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" % \ (self.channel, process_name, charge_selection, central_or_shift, jobId)), 'histogramFile' : os.path.join(self.dirs[key_dir][DKEY_HIST], "%s_%s_%s_%i.root" % \ (process_name, charge_selection, central_or_shift, jobId)), 'logFile' : os.path.join(self.dirs[key_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" % \ (self.channel, process_name, charge_selection, central_or_shift, jobId)), 'sample_category' : sample_category, 'triggers' : sample_info["triggers"], 'charge_selection' : charge_selection, 'jet_minPt' : self.jet_minPt, 'jet_maxPt' : self.jet_maxPt, 'jet_minAbsEta' : self.jet_minAbsEta, 'jet_maxAbsEta' : self.jet_maxAbsEta, 'hadTau_selection_denominator' : self.hadTau_selection_denominator, 'hadTau_selections_numerator' : self.hadTau_selections_numerator, 'absEtaBins' : self.absEtaBins, ##'use_HIP_mitigation_mediumMuonId' : sample_info["use_HIP_mitigation_mediumMuonId"], 'use_HIP_mitigation_mediumMuonId' : True, 'is_mc' : is_mc, 'central_or_shift' : central_or_shift, 'lumi_scale' : 1. if not (self.use_lumi and is_mc) else sample_info["xsection"] * self.lumi / sample_info["nof_events"], 'apply_genWeight' : sample_info["genWeight"] if (is_mc and "genWeight" in sample_info.keys()) else False, 'apply_trigger_bits' : (is_mc and (self.era == "2015" or (self.era == "2016" and sample_info["reHLT"]))) or not is_mc, } self.createCfg_analyze( self.jobOptions_analyze[key_analyze_job]) # initialize input and output file names for hadd_stage1 key_hadd_stage1 = getKey(process_name, charge_selection) if not key_hadd_stage1 in self.inputFiles_hadd_stage1: self.inputFiles_hadd_stage1[key_hadd_stage1] = [] self.inputFiles_hadd_stage1[key_hadd_stage1].append( self.jobOptions_analyze[key_analyze_job] ['histogramFile']) self.outputFile_hadd_stage1[key_hadd_stage1] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage1_%s_%s_%s.root" % \ (self.channel, process_name, charge_selection)) # initialize input and output file names for hadd_stage2 key_hadd_stage1 = getKey(process_name, charge_selection) key_hadd_stage2 = getKey(charge_selection) if not key_hadd_stage2 in self.inputFiles_hadd_stage2: self.inputFiles_hadd_stage2[key_hadd_stage2] = [] self.inputFiles_hadd_stage2[key_hadd_stage2].append( self.outputFile_hadd_stage1[key_hadd_stage1]) self.outputFile_hadd_stage2[key_hadd_stage2] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage2_%s_%s.root" % \ (self.channel, charge_selection)) logging.info( "Creating configuration files for executing 'comp_jetToTauFakeRate'" ) for charge_selection in self.charge_selections: key_comp_jetToTauFakeRate_job = getKey(charge_selection) key_hadd_stage2 = getKey(charge_selection) self.jobOptions_comp_jetToTauFakeRate[ key_comp_jetToTauFakeRate_job] = { 'inputFile': self.outputFile_hadd_stage2[key_hadd_stage2], 'cfgFile_modified': os.path.join( self.dirs[DKEY_CFGS], "comp_jetToTauFakeRate_%s_cfg.py" % charge_selection), 'outputFile': os.path.join( self.dirs[DKEY_HIST], "comp_jetToTauFakeRate_%s.root" % charge_selection), 'logFile': os.path.join( self.dirs[DKEY_LOGS], "comp_jetToTauFakeRate_%s.log" % charge_selection), 'looseRegion': "jetToTauFakeRate_%s/denominator/" % charge_selection, 'tightRegion': "jetToTauFakeRate_%s/numerator/" % charge_selection, 'absEtaBins': self.absEtaBins, 'ptBins': self.ptBins } self.createCfg_comp_jetToTauFakeRate( self.jobOptions_comp_jetToTauFakeRate[ key_comp_jetToTauFakeRate_job]) self.targets.append(self.jobOptions_comp_jetToTauFakeRate[ key_comp_jetToTauFakeRate_job]['outputFile']) logging.info("Creating configuration files to run 'makePlots'") for charge_selection in self.charge_selections: key_makePlots_job = getKey(charge_selection) key_hadd_stage2 = getKey(charge_selection) self.jobOptions_make_plots[key_makePlots_job] = { 'executable': self.executable_make_plots, 'inputFile': self.outputFile_hadd_stage2[key_hadd_stage2], 'cfgFile_modified': os.path.join(self.dirs[DKEY_CFGS], "makePlots_%s_cfg.py" % self.channel), 'outputFile': os.path.join(self.dirs[DKEY_PLOT], "makePlots_%s.png" % self.channel), 'histogramDir': "jetToTauFakeRate_%s" % charge_selection, 'label': None, 'make_plots_backgrounds': ["TT", "TTW", "TTZ", "EWK", "Rares"], } self.createCfg_makePlots( self.jobOptions_make_plots[key_makePlots_job]) self.cfgFile_make_plots = self.cfgFile_make_plots_denominator for absEtaBin in ["absEtaLt1_5", "absEta1_5to9_9"]: key_makePlots_job = getKey(charge_selection, absEtaBin, "denominator") key_hadd_stage2 = getKey(charge_selection) self.jobOptions_make_plots[key_makePlots_job] = { 'executable': self.executable_make_plots, 'inputFile': self.outputFile_hadd_stage2[key_hadd_stage2], 'cfgFile_modified': os.path.join( self.dirs[DKEY_CFGS], "makePlots_%s_%s_denominator_%s_cfg.py" % (self.channel, charge_selection, absEtaBin)), 'outputFile': os.path.join( self.dirs[DKEY_PLOT], "makePlots_%s_%s_denominator_%s.png" % (self.channel, charge_selection, absEtaBin)), 'histogramDir': "jetToTauFakeRate_%s/denominator/%s" % (charge_selection, absEtaBin), 'label': None, 'make_plots_backgrounds': ["TT", "TTW", "TTZ", "EWK", "Rares"], } self.createCfg_makePlots( self.jobOptions_make_plots[key_makePlots_job]) for hadTau_selection_numerator in self.hadTau_selections_numerator: key_makePlots_job = getKey(charge_selection, absEtaBin, "numerator", hadTau_selection_numerator) key_hadd_stage2 = getKey(charge_selection) self.jobOptions_make_plots[key_makePlots_job] = { 'executable': self.executable_make_plots, 'inputFile': self.outputFile_hadd_stage2[key_hadd_stage2], 'cfgFile_modified': os.path.join( self.dirs[DKEY_CFGS], "makePlots_%s_%s_numerator_%s_%s_cfg.py" % (self.channel, charge_selection, hadTau_selection_numerator, absEtaBin)), 'outputFile': os.path.join( self.dirs[DKEY_PLOT], "makePlots_%s_%s_numerator_%s_%s.png" % (self.channel, charge_selection, hadTau_selection_numerator, absEtaBin)), 'histogramDir': "jetToTauFakeRate_%s/numerator/%s/%s" % (charge_selection, hadTau_selection_numerator, absEtaBin), 'label': None, 'make_plots_backgrounds': ["TT", "TTW", "TTZ", "EWK", "Rares"], } self.createCfg_makePlots( self.jobOptions_make_plots[key_makePlots_job]) if self.is_sbatch: logging.info( "Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.sbatchFile_analyze = os.path.join( self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) logging.info( "Creating script for submitting '%s' jobs to batch system" % self.executable_comp_jetToTauFakeRate) self.sbatchFile_comp_jetToTauFakeRate = os.path.join( self.dirs[DKEY_SCRIPTS], "sbatch_comp_jetToTauFakeRate.py") self.createScript_sbatch(self.executable_comp_jetToTauFakeRate, self.sbatchFile_comp_jetToTauFakeRate, self.jobOptions_comp_jetToTauFakeRate) lines_makefile = [] self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) self.addToMakefile_hadd_stage2(lines_makefile) self.addToMakefile_comp_jetToTauFakeRate(lines_makefile) self.addToMakefile_make_plots(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done")
def create(self): """Creates all necessary config files and runs the MEM -- either locally or on the batch system """ statistics = {} for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) else: create_if_not_exists(self.dirs[key]) for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue if not os.path.exists(sample_info['local_paths'][0]['path']): logging.warning("Skipping sample {sample_name}".format(sample_name = sample_name)) continue process_name = sample_info["process_name_specific"] logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_addMEM, process_name)) inputFileList = generateInputFileList(sample_name, sample_info, self.max_files_per_job, self.debug) # typically, the analysis ends here and starts looping b/c the smallest unit of work processes at least one file # we need, however, to split the file into event ranges in such a way that each job performs # mem_integrations_per_job MEM integrations # so what we are going to do is to open each set of files in inputFileList, read the variable # requestMEM_2lss_1tau and try to gather the event ranges such that each event range # performs up to mem_integrations_per_job integrations per job memEvtRangeDict = self.memJobList(inputFileList) for jobId in memEvtRangeDict.keys(): key_dir = getKey(sample_name) key_file = getKey(sample_name, jobId) self.inputFiles[key_file] = memEvtRangeDict[jobId]['input_fileset'] # there should always be a job assert(self.inputFiles[key_file] > 0), "More than one input file: %s ?? !!" % \ ', '.join(self.inputFiles[key_file]) #TODO: is this assertion really needed? in principle, no ... assert(len(self.inputFiles[key_file]) == 1), "There is more than one input file!" self.cfgFiles_addMEM_modified[key_file] = os.path.join(self.dirs[key_dir][DKEY_CFGS], "addMEM_%s_%s_%i_cfg.py" % \ (self.channel, process_name, jobId)) self.outputFiles[key_file] = os.path.join(self.dirs[key_dir][DKEY_NTUPLES], "%s_%i.root" % \ (process_name, jobId)) self.logFiles_addMEM[key_file] = os.path.join(self.dirs[key_dir][DKEY_LOGS], "addMEM_%s_%s_%i.log" % \ (self.channel, process_name, jobId)) self.createCfg_addMEM( self.inputFiles[key_file], memEvtRangeDict[jobId]['event_range'][0], memEvtRangeDict[jobId]['event_range'][1], self.outputFiles[key_file], self.era, self.cfgFiles_addMEM_modified[key_file], ) # associate the output file with the fileset_id fileset_id = memEvtRangeDict[jobId]['fileset_id'] hadd_output = os.path.join( self.dirs[key_dir][DKEY_FINAL_NTUPLES], '%s_%i.root' % ('tree', fileset_id) ) if hadd_output not in self.hadd_records: self.hadd_records[hadd_output] = {} self.hadd_records[hadd_output]['output_files'] = [] self.hadd_records[hadd_output]['fileset_id'] = fileset_id self.hadd_records[hadd_output]['output_files'].append(self.outputFiles[key_file]) #self.filesToClean.append(self.outputFiles[key_file]) # let's sum the number of integration per sample nofEntriesMap = {} for v in memEvtRangeDict.values(): if v['fileset_id'] not in nofEntriesMap: nofEntriesMap[v['fileset_id']] = v['nof_entries'] statistics[process_name] = { 'nof_int' : sum([entry['nof_int'] for entry in memEvtRangeDict.values()]), 'nof_entries' : sum(nofEntriesMap.values()), 'nof_jobs' : len(memEvtRangeDict), 'nof_events_pass' : sum([entry['nof_events_pass'] for entry in memEvtRangeDict.values()]), 'nof_int_pass' : sum([entry['nof_int_pass'] for entry in memEvtRangeDict.values()]), } if self.is_sbatch: logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addMEM) self.createScript_sbatch() logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_addMEM(lines_makefile) self.addToMakefile_hadd(lines_makefile) self.createMakefile(lines_makefile) ws_len = max([len(kk) + 1 for kk in statistics.keys()]) total_nof_integrations_sum = sum(x['nof_int'] for x in statistics.values()) total_nof_entires = sum(x['nof_entries'] for x in statistics.values()) total_nof_integrations_avg = float(total_nof_integrations_sum) / total_nof_entires total_nof_jobs = sum(x['nof_jobs'] for x in statistics.values()) total_nof_pass = sum(x['nof_events_pass'] for x in statistics.values()) total_nof_int_pass_avg = float(sum(x['nof_int_pass'] for x in statistics.values())) / total_nof_pass for k, v in statistics.iteritems(): print('%s%s: %d (%d entries; %d jobs; %.2f int/evt; %d (%.2f%%) evt pass; %.2f int/evt pass)' % (k, ' ' * (ws_len - len(k)), v['nof_int'], v['nof_entries'], v['nof_jobs'], float(v['nof_int']) / v['nof_entries'], v['nof_events_pass'], (100 * float(v['nof_events_pass']) / v['nof_entries']), float(v['nof_int_pass']) / v['nof_events_pass'])) print('%s%s: %d (%d entries; %d jobs; %.2f int/evt; %d evt pass; %.2f int/evt pass)' % ('total', ' ' * (ws_len - len('total')), total_nof_integrations_sum, total_nof_entires, total_nof_jobs, total_nof_integrations_avg, total_nof_pass, total_nof_int_pass_avg)) if total_nof_integrations_sum > self.max_mem_integrations: logging.error("Will not start the jobs (max nof integrations exceeded)!") return False else: logging.info("Done") return True
def create(self): for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] central_or_shifts_extended = [""] central_or_shifts_extended.extend(self.central_or_shifts) central_or_shifts_extended.extend( ["hadd", "copyHistograms", "addBackgrounds"]) for central_or_shift_or_dummy in central_or_shifts_extended: process_name_extended = [process_name, "hadd"] for process_name_or_dummy in process_name_extended: if process_name_or_dummy in [ "hadd" ] and central_or_shift_or_dummy != "": continue if central_or_shift_or_dummy in [ "hadd", "copyHistograms", "addBackgrounds" ] and process_name_or_dummy in ["hadd"]: continue key_dir = getKey(process_name_or_dummy, central_or_shift_or_dummy) for dir_type in [ DKEY_CFGS, DKEY_LOGS, DKEY_RLES, DKEY_SYNC ]: initDict(self.dirs, [key_dir, dir_type]) if dir_type in [DKEY_CFGS, DKEY_LOGS]: self.dirs[key_dir][dir_type] = os.path.join( self.configDir, dir_type, self.channel, process_name_or_dummy, central_or_shift_or_dummy) else: self.dirs[key_dir][dir_type] = os.path.join( self.outputDir, dir_type, self.channel, process_name_or_dummy, central_or_shift_or_dummy) for subdirectory in [ "addBackgrounds", "addBackgroundLeptonFakes", "prepareDatacards", "addSystFakeRates", "makePlots" ]: key_dir = getKey(subdirectory) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_ROOT, DKEY_DCRD, DKEY_PLOT ]: initDict(self.dirs, [key_dir, dir_type]) if dir_type in [DKEY_CFGS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT]: self.dirs[key_dir][dir_type] = os.path.join( self.configDir, dir_type, self.channel, subdirectory) else: self.dirs[key_dir][dir_type] = os.path.join( self.outputDir, dir_type, self.channel, subdirectory) for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_SYNC, DKEY_HADD_RT ]: initDict(self.dirs, [dir_type]) if dir_type in [DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_HADD_RT]: self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel) else: self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel) numDirectories = 0 for key in self.dirs.keys(): if type(self.dirs[key]) == dict: numDirectories += len(self.dirs[key]) else: numDirectories += 1 logging.info("Creating directory structure (numDirectories = %i)" % numDirectories) numDirectories_created = 0 frac = 1 for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) numDirectories_created += len(self.dirs[key]) else: create_if_not_exists(self.dirs[key]) numDirectories_created = numDirectories_created + 1 while 100 * numDirectories_created >= frac * numDirectories: logging.info(" %i%% completed" % frac) frac = frac + 1 logging.info("Done.") inputFileLists = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue logging.info("Checking input files for sample %s" % sample_info["process_name_specific"]) inputFileLists[sample_name] = generateInputFileList( sample_info, self.max_files_per_job) for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] logging.info( "Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name)) inputFileList = inputFileLists[sample_name] for jobId in inputFileList.keys(): for central_or_shift in self.central_or_shifts: logging.info(" ... for systematic uncertainty %s" % central_or_shift) key_analyze_dir = getKey(process_name, central_or_shift) analyze_job_tuple = (process_name, central_or_shift, jobId) key_analyze_job = getKey(*analyze_job_tuple) ntupleFiles = inputFileList[jobId] if len(ntupleFiles) == 0: print("Warning: no ntupleFiles --> skipping job !!") continue syncOutput = os.path.join( self.dirs[key_analyze_dir][DKEY_SYNC], '%s_%s.root' % (self.channel, central_or_shift)) syncOutputTree = self.output_tree if central_or_shift == "central" else os.path.join( central_or_shift, self.output_tree) self.inputFiles_sync['sync'].append(syncOutput) cfgFile_modified_path = os.path.join( self.dirs[key_analyze_dir][DKEY_CFGS], "analyze_%s_%s_%i_cfg.py" % analyze_job_tuple) logFile_path = os.path.join( self.dirs[key_analyze_dir][DKEY_LOGS], "analyze_%s_%s_%i.log" % analyze_job_tuple) self.jobOptions_analyze[key_analyze_job] = { 'ntupleFiles': ntupleFiles, 'cfgFile_modified': cfgFile_modified_path, 'histogramFile': '', 'logFile': logFile_path, 'syncTree': syncOutputTree, 'syncOutput': syncOutput, 'syncRLE': self.rle_select if self.rle_select and '%s' not in self.rle_select else '', 'useNonNominal': self.use_nonnominal, } self.createCfg_analyze( self.jobOptions_analyze[key_analyze_job], sample_info) logging.info( "Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.sbatchFile_analyze = os.path.join( self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) self.createScript_sbatch_syncNtuple(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_syncNtuple(lines_makefile) outputFile_sync_path = os.path.join(self.outputDir, DKEY_SYNC, '%s.root' % self.channel) self.outputFile_sync['sync'] = outputFile_sync_path self.addToMakefile_hadd_sync(lines_makefile) self.targets.extend(self.phoniesToAdd) self.createMakefile(lines_makefile) logging.info("Done") return self.num_jobs
def create(self): """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system """ for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue process_name = sample_info["process_name_specific"] key_dir = getKey(process_name) for dir_type in [DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES]: initDict(self.dirs, [key_dir, dir_type]) if dir_type in [DKEY_CFGS, DKEY_LOGS]: self.dirs[key_dir][dir_type] = os.path.join( self.configDir, dir_type, self.channel, "", process_name) else: self.dirs[key_dir][dir_type] = os.path.join( self.outputDir, dir_type, self.channel, "", process_name) for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]: initDict(self.dirs, [dir_type]) if dir_type in [DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_HADD_RT]: self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel) else: self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel) ##print "self.dirs = ", self.dirs for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) else: create_if_not_exists(self.dirs[key]) inputFileLists = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue logging.info("Checking input files for sample %s" % sample_info["process_name_specific"]) inputFileLists[sample_name] = generateInputFileList( sample_name, sample_info, self.max_files_per_job, self.debug) for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue process_name = sample_info["process_name_specific"] logging.info( "Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name)) sample_category = sample_info["sample_category"] is_mc = (sample_info["type"] == "mc") is_signal = (sample_category == "signal") for central_or_shift in self.central_or_shifts: inputFileList = inputFileLists[sample_name] for jobId in inputFileList.keys(): if central_or_shift != "central" and not is_mc: continue if central_or_shift.startswith( "CMS_ttHl_thu_shape_ttH" ) and sample_category != "signal": continue if central_or_shift.startswith( "CMS_ttHl_thu_shape_ttW" ) and sample_category != "TTW": continue if central_or_shift.startswith( "CMS_ttHl_thu_shape_ttZ" ) and sample_category != "TTZ": continue # build config files for executing analysis code key_dir = getKey(process_name) key_analyze_job = getKey(process_name, central_or_shift, jobId) ntupleFiles = inputFileList[jobId] if len(ntupleFiles) == 0: print "Warning: ntupleFiles['%s'] = %s --> skipping job !!" % ( key_job, ntupleFiles) continue self.jobOptions_analyze[key_analyze_job] = { 'ntupleFiles' : ntupleFiles, 'cfgFile_modified' : os.path.join(self.dirs[key_dir][DKEY_CFGS], "analyze_%s_%s_%s_%i_cfg.py" % \ (self.channel, process_name, central_or_shift, jobId)), 'histogramFile' : os.path.join(self.dirs[key_dir][DKEY_HIST], "%s_%s_%i.root" % \ (process_name, central_or_shift, jobId)), 'logFile' : os.path.join(self.dirs[key_dir][DKEY_LOGS], "analyze_%s_%s_%s_%i.log" % \ (self.channel, process_name, central_or_shift, jobId)), 'rleOutputFile' : os.path.join(self.dirs[key_dir][DKEY_RLES], "rle_%s_%s_%s_%i.txt" % \ (self.channel, process_name, central_or_shift, jobId)) if self.select_rle_output else "", 'sample_category' : sample_category, 'triggers' : sample_info["triggers"], 'hadTau_selection' : self.hadTau_selection_part2, ##'use_HIP_mitigation_mediumMuonId' : sample_info["use_HIP_mitigation_mediumMuonId"], 'use_HIP_mitigation_mediumMuonId' : True, 'is_mc' : is_mc, 'central_or_shift' : central_or_shift, 'lumi_scale' : 1. if not (self.use_lumi and is_mc) else sample_info["xsection"] * self.lumi / sample_info["nof_events"], 'apply_genWeight' : sample_info["genWeight"] if (is_mc and "genWeight" in sample_info.keys()) else False, 'apply_trigger_bits' : (is_mc and (self.era == "2015" or (self.era == "2016" and sample_info["reHLT"]))) or not is_mc } self.createCfg_analyze( self.jobOptions_analyze[key_analyze_job]) # initialize input and output file names for hadd_stage1 key_hadd_stage1 = getKey(process_name) if not key_hadd_stage1 in self.inputFiles_hadd_stage1.keys( ): self.inputFiles_hadd_stage1[key_hadd_stage1] = [] self.inputFiles_hadd_stage1[key_hadd_stage1].append( self.jobOptions_analyze[key_analyze_job] ['histogramFile']) self.outputFile_hadd_stage1[key_hadd_stage1] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage1_%s_%s.root" % \ (self.channel, process_name)) # initialize input and output file names for hadd_stage2 key_hadd_stage1 = getKey(process_name) key_hadd_stage2 = getKey("all") if not key_hadd_stage2 in self.inputFiles_hadd_stage2.keys(): self.inputFiles_hadd_stage2[key_hadd_stage2] = [] self.inputFiles_hadd_stage2[key_hadd_stage2].append( self.outputFile_hadd_stage1[key_hadd_stage1]) self.outputFile_hadd_stage2[key_hadd_stage2] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage2_%s.root" % \ (self.channel)) logging.info("Creating configuration files to run 'prepareDatacards'") for evtSelection in self.evtSelections: for histogramToFit in self.histograms_to_fit: key_prep_dcard_job = getKey(evtSelection, histogramToFit) key_hadd_stage2 = getKey("all") self.jobOptions_prep_dcard[key_prep_dcard_job] = { 'inputFile': self.outputFile_hadd_stage2[key_hadd_stage2], 'cfgFile_modified': os.path.join( self.dirs[DKEY_CFGS], "prepareDatacards_%s_%s_%s_cfg.py" % (self.channel, evtSelection, histogramToFit)), 'datacardFile': os.path.join( self.dirs[DKEY_DCRD], "prepareDatacards_%s_%s_%s.root" % (self.channel, evtSelection, histogramToFit)), 'histogramDir': "_".join([self.histogramDir_prep_dcard, evtSelection]), 'histogramToFit': histogramToFit, 'label': None } self.createCfg_prep_dcard( self.jobOptions_prep_dcard[key_prep_dcard_job]) logging.info("Creating configuration files to run 'makePlots'") for evtSelection in self.evtSelections: key_makePlots_job = getKey(evtSelection) key_hadd_stage2 = getKey("all") self.jobOptions_make_plots[key_makePlots_job] = { 'executable': self.executable_make_plots, 'inputFile': self.outputFile_hadd_stage2[key_hadd_stage2], 'cfgFile_modified': os.path.join( self.dirs[DKEY_CFGS], "makePlots_%s_%s_cfg.py" % (self.channel, evtSelection)), 'outputFile': os.path.join( self.dirs[DKEY_PLOT], "makePlots_%s_%s.png" % (self.channel, evtSelection)), 'histogramDir': "_".join([self.histogramDir_prep_dcard, evtSelection]), 'label': evtSelection, 'make_plots_backgrounds': self.make_plots_backgrounds } self.createCfg_makePlots( self.jobOptions_make_plots[key_makePlots_job]) if self.is_sbatch: logging.info( "Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.sbatchFile_analyze = os.path.join( self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) self.createScript_sbatch() logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) self.addToMakefile_hadd_stage2(lines_makefile) self.addToMakefile_prep_dcard(lines_makefile) self.addToMakefile_make_plots(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done")
def create(self): """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system """ for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] is_mc = (sample_info["type"] == "mc") logging.info("Building dictionaries for sample %s..." % process_name) for lepton_selection in self.lepton_selections: central_or_shift_extensions = ["", "hadd", "addBackgrounds"] central_or_shifts_extended = central_or_shift_extensions + self.central_or_shifts for central_or_shift_or_dummy in central_or_shifts_extended: process_name_extended = [ process_name, "hadd" ] for process_name_or_dummy in process_name_extended: if central_or_shift_or_dummy in [ "hadd", "addBackgrounds" ] and process_name_or_dummy in [ "hadd" ]: continue if central_or_shift_or_dummy != "central" and central_or_shift_or_dummy not in central_or_shift_extensions: if not is_mc: continue if not self.accept_central_or_shift(central_or_shift_or_dummy, sample_info): continue key_dir = getKey(process_name_or_dummy, lepton_selection, central_or_shift_or_dummy) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES ]: initDict(self.dirs, [ key_dir, dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_LOGS ]: self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, "_".join([ lepton_selection ]), process_name_or_dummy, central_or_shift_or_dummy) else: self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, "_".join([ lepton_selection ]), process_name_or_dummy) for subdirectory in [ "prepareDatacards" ]: key_dir = getKey(subdirectory) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]: initDict(self.dirs, [ key_dir, dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]: self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, subdirectory) else: self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, subdirectory) for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT, DKEY_COMBINE_OUTPUT ]: initDict(self.dirs, [ dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT, DKEY_COMBINE_OUTPUT ]: self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel) else: self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel) numDirectories = 0 for key in self.dirs.keys(): if type(self.dirs[key]) == dict: numDirectories += len(self.dirs[key]) else: numDirectories += 1 logging.info("Creating directory structure (numDirectories = %i)" % numDirectories) numDirectories_created = 0; frac = 1 for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) numDirectories_created += len(self.dirs[key]) else: create_if_not_exists(self.dirs[key]) numDirectories_created = numDirectories_created + 1 while 100*numDirectories_created >= frac*numDirectories: logging.info(" %i%% completed" % frac) frac = frac + 1 logging.info("Done.") inputFileLists = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue logging.info("Checking input files for sample %s" % sample_info["process_name_specific"]) inputFileLists[sample_name] = generateInputFileList(sample_info, self.max_files_per_job) for lepton_selection in self.lepton_selections: for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name)) is_mc = (sample_info["type"] == "mc") inputFileList = inputFileLists[sample_name] for central_or_shift in self.central_or_shifts: if central_or_shift != "central" and not is_mc: continue # build config files for executing analysis code key_analyze_dir = getKey(process_name, lepton_selection, central_or_shift) for jobId in inputFileList.keys(): analyze_job_tuple = (process_name, lepton_selection, central_or_shift, jobId) key_analyze_job = getKey(*analyze_job_tuple) ntupleFiles = inputFileList[jobId] if len(ntupleFiles) == 0: logging.warning("No input ntuples for %s --> skipping job !!" % (key_analyze_job)) continue cfgFile_modified_path = os.path.join(self.dirs[key_analyze_dir][DKEY_CFGS], "analyze_%s_%s_%s_%i_cfg.py" % analyze_job_tuple) logFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_LOGS], "analyze_%s_%s_%s_%i.log" % analyze_job_tuple) rleOutputFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_RLES], "rle_%s_%s_%s_%i.txt" % analyze_job_tuple) \ if self.select_rle_output else "" histogramFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_HIST], "analyze_%s_%s_%s_%i.root" % analyze_job_tuple) self.jobOptions_analyze[key_analyze_job] = { 'ntupleFiles' : ntupleFiles, 'cfgFile_modified' : cfgFile_modified_path, 'histogramFile' : histogramFile_path, 'logFile' : logFile_path, 'selEventsFileName_output' : rleOutputFile_path, 'leptonSelection' : lepton_selection, 'applyFakeRateWeights' : "disabled", 'central_or_shift' : central_or_shift, } self.createCfg_analyze(self.jobOptions_analyze[key_analyze_job], sample_info) # initialize input and output file names for hadd_stage1 key_hadd_stage1_dir = getKey(process_name, lepton_selection) hadd_stage1_job_tuple = (process_name, lepton_selection) key_hadd_stage1_job = getKey(*hadd_stage1_job_tuple) if not key_hadd_stage1_job in self.inputFiles_hadd_stage1.keys(): self.inputFiles_hadd_stage1[key_hadd_stage1_job] = [] self.inputFiles_hadd_stage1[key_hadd_stage1_job].append(self.jobOptions_analyze[key_analyze_job]['histogramFile']) self.outputFile_hadd_stage1[key_hadd_stage1_job] = os.path.join(self.dirs[key_hadd_stage1_dir][DKEY_HIST], "hadd_stage1_%s_%s.root" % hadd_stage1_job_tuple) # initialize input and output file names for hadd_stage2 key_hadd_stage1_job = getKey(process_name, lepton_selection) key_hadd_stage2_dir = getKey("hadd", lepton_selection) key_hadd_stage2_job = getKey(lepton_selection) if not key_hadd_stage2_job in self.inputFiles_hadd_stage2.keys(): self.inputFiles_hadd_stage2[key_hadd_stage2_job] = [] self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.outputFile_hadd_stage1[key_hadd_stage1_job]) self.outputFile_hadd_stage2[key_hadd_stage2_job] = os.path.join(self.dirs[key_hadd_stage2_dir][DKEY_HIST], "hadd_stage2_%s.root" % lepton_selection) logging.info("Creating configuration files to run 'prepareDatacards'") processesToCopy = [] for process in self.prep_dcard_processesToCopy: processesToCopy.append(process) self.prep_dcard_processesToCopy = processesToCopy processesToCopy = [] for process in self.prep_dcard_signals: processesToCopy.append(process) self.prep_dcard_signals = processesToCopy for histogramToFit in self.histograms_to_fit: key_hadd_stage2_job = getKey("Tight") key_prep_dcard_dir = getKey("prepareDatacards") prep_dcard_job_tuple = (self.channel, histogramToFit) key_prep_dcard_job = getKey(histogramToFit) datacardFile = os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s.root" % prep_dcard_job_tuple) self.jobOptions_prep_dcard[key_prep_dcard_job] = { 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_cfg.py" % prep_dcard_job_tuple), 'datacardFile' : datacardFile, 'histogramDir' : self.histogramDir_prep_dcard, 'histogramToFit' : histogramToFit, 'label' : None } self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job]) jobOptions_makefile = copy.deepcopy(self.jobOptions_postFit) jobOptions_makefile['fit_result'] = os.path.join( self.dirs[DKEY_COMBINE_OUTPUT], 'fit_{}'.format(histogramToFit), jobOptions_makefile['target'] ) jobOptions_makefile['hadd_stage2'] = self.outputFile_hadd_stage2[key_hadd_stage2_job] jobOptions_makefile['prepare_datacard'] = datacardFile jobOptions_makefile['data_datacard'] = os.path.join( self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_data_%s_%s.root" % prep_dcard_job_tuple ) jobOptions_makefile['pseudodata_datacard'] = os.path.join( self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_pseudodata_%s_%s.root" % prep_dcard_job_tuple ) jobOptions_makefile['makefile'] = os.path.join( self.dirs[DKEY_COMBINE_OUTPUT], 'Makefile_{}'.format(histogramToFit) ) jobOptions_makefile['stdout'] = os.path.join( self.dirs[DKEY_COMBINE_OUTPUT], 'stdout_{}.log'.format(histogramToFit) ) self.createCfg_postFit(jobOptions_makefile) self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) if self.is_sbatch: logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) self.addToMakefile_hadd_stage2(lines_makefile, make_dependency = "phony_hadd_stage1") self.addToMakefile_prep_dcard(lines_makefile) self.addToMakefile_postFit(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done.") return self.num_jobs
def create(self): """Creates all necessary config files and runs the PU profile production -- either locally or on the batch system """ for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) else: create_if_not_exists(self.dirs[key]) self.inputFileIds = {} for sample_name, sample_info in self.samples.items(): if not sample_info['use_it']: continue process_name = sample_info["process_name_specific"] is_mc = (sample_info["type"] == "mc") if not is_mc: continue logging.info( "Creating configuration files to run '%s' for sample %s" % (self.executable, process_name)) inputFileList = generateInputFileList(sample_info, self.max_files_per_job) key_dir = getKey(process_name) outputFile = os.path.join(self.dirs[key_dir][DKEY_HISTO], "%s.root" % process_name) if os.path.isfile(outputFile) and tools_is_file_ok( outputFile, min_file_size=2000): logging.info('File {} already exists --> skipping job'.format( outputFile)) continue self.outputFiles[process_name] = { 'inputFiles': [], 'outputFile': outputFile } for jobId in inputFileList.keys(): key_file = getKey(sample_name, jobId) self.inputFiles[key_file] = inputFileList[jobId] if len(self.inputFiles[key_file]) == 0: logging.warning( "ntupleFiles['%s'] = %s --> skipping job !!" % (key_file, self.inputFiles[key_file])) continue self.cfgFiles_puProfile[key_file] = os.path.join( self.dirs[key_dir][DKEY_CFGS], "puProfile_%s_%i_cfg.txt" % (process_name, jobId)) self.outputFiles_tmp[key_file] = os.path.join( self.dirs[key_dir][DKEY_HISTO_TMP], "histogram_%i.root" % jobId) self.logFiles_puProfile[key_file] = os.path.join( self.dirs[key_dir][DKEY_LOGS], "puProfile_%s_%i.log" % (process_name, jobId)) self.scriptFiles_puProfile[key_file] = os.path.join( self.dirs[key_dir][DKEY_CFGS], "puProfile_%s_%i_cfg.sh" % (process_name, jobId)) self.jobOptions_sbatch[key_file] = { 'histName': process_name, 'inputFiles': self.inputFiles[key_file], 'cfgFile_path': self.cfgFiles_puProfile[key_file], 'outputFile': self.outputFiles_tmp[key_file], 'logFile': self.logFiles_puProfile[key_file], 'scriptFile': self.scriptFiles_puProfile[key_file], } self.createCfg_puProfile(self.jobOptions_sbatch[key_file]) self.outputFiles[process_name]['inputFiles'].append( self.outputFiles_tmp[key_file]) if self.is_sbatch: logging.info( "Creating script for submitting '%s' jobs to batch system" % self.executable) self.num_jobs['puProfile'] += self.createScript_sbatch( self.executable, self.sbatchFile_puProfile, self.jobOptions_sbatch) logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_puProfile(lines_makefile) self.addToMakefile_hadd(lines_makefile) self.addToMakefile_plot(lines_makefile) self.addToMakefile_finalHadd(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done") return self.num_jobs
def create(self): """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system """ for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) else: create_if_not_exists(self.dirs[key]) self.inputFileIds = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue process_name = sample_info["process_name_specific"] logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name)) is_mc = (sample_info["type"] == "mc") lumi_scale = 1. if not (self.use_lumi and is_mc) else sample_info["xsection"] * self.lumi / sample_info["nof_events"] apply_genWeight = sample_info["apply_genWeight"] if (is_mc and "apply_genWeight" in sample_info.keys()) else False sample_category = sample_info["sample_category"] triggers = sample_info["triggers"] apply_trigger_bits = (is_mc and (self.era == "2015" or (self.era == "2016" and sample_info["reHLT"]))) or not is_mc for lepton_and_hadTau_selection in self.lepton_and_hadTau_selections: lepton_selection = lepton_and_hadTau_selection if self.applyFakeRateWeights == "2tau": lepton_selection = "Tight" hadTau_selection = "|".join([ lepton_and_hadTau_selection, self.hadTau_selection_part2 ]) for lepton_and_hadTau_frWeight in self.lepton_and_hadTau_frWeights: if lepton_and_hadTau_frWeight == "enabled" and not lepton_and_hadTau_selection.startswith("Fakeable"): continue if lepton_and_hadTau_selection == "Fakeable_mcClosure" and not lepton_and_hadTau_frWeight == "enabled": continue lepton_and_hadTau_selection_and_frWeight = get_lepton_and_hadTau_selection_and_frWeight(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight) for hadTau_charge_selection in self.hadTau_charge_selections: for central_or_shift in self.central_or_shifts: inputFileList = generateInputFileList(sample_name, sample_info, self.max_files_per_job, self.debug) for jobId in inputFileList.keys(): if central_or_shift != "central" and not (lepton_and_hadTau_selection.startswith("Tight") and hadTau_charge_selection == "OS"): continue if central_or_shift != "central" and not is_mc: continue if central_or_shift.startswith("CMS_ttHl_thu_shape_ttH") and sample_category != "signal": continue if central_or_shift.startswith("CMS_ttHl_thu_shape_ttW") and sample_category != "TTW": continue if central_or_shift.startswith("CMS_ttHl_thu_shape_ttZ") and sample_category != "TTZ": continue key_dir = getKey(sample_name, lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, hadTau_charge_selection) key_file = getKey(sample_name, lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, hadTau_charge_selection, central_or_shift, jobId) self.ntupleFiles[key_file] = inputFileList[jobId] if len(self.ntupleFiles[key_file]) == 0: print "Warning: ntupleFiles['%s'] = %s --> skipping job !!" % (key_file, self.ntupleFiles[key_file]) continue self.cfgFiles_analyze_modified[key_file] = os.path.join(self.dirs[key_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%s_%i_cfg.py" % \ (self.channel, process_name, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection, central_or_shift, jobId)) self.histogramFiles[key_file] = os.path.join(self.dirs[key_dir][DKEY_HIST], "%s_%s_%s_%s_%i.root" % \ (process_name, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection, central_or_shift, jobId)) self.logFiles_analyze[key_file] = os.path.join(self.dirs[key_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%s_%i.log" % \ (self.channel, process_name, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection, central_or_shift, jobId)) self.rleOutputFiles[key_file] = os.path.join(self.dirs[key_dir][DKEY_RLES], "rle_%s_%s_%s_%s_%s_%i.txt" % \ (self.channel, process_name, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection, central_or_shift, jobId)) if self.select_rle_output else "" applyFakeRateWeights = self.applyFakeRateWeights if lepton_and_hadTau_frWeight == "disabled": applyFakeRateWeights = "disabled" self.createCfg_analyze(self.ntupleFiles[key_file], self.histogramFiles[key_file], sample_category, self.era, triggers, lepton_selection, self.apply_leptonGenMatching, hadTau_selection, self.apply_hadTauGenMatching, hadTau_charge_selection, applyFakeRateWeights, is_mc, central_or_shift, lumi_scale, apply_genWeight, apply_trigger_bits, self.cfgFiles_analyze_modified[key_file], self.rleOutputFiles[key_file]) if self.is_sbatch: logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.createScript_sbatch() logging.info("Creating configuration files for executing 'addBackgrounds'") process_names = [] process_names.extend(self.nonfake_backgrounds) process_names.extend([ "signal", "ttH_htt", "ttH_hww", "ttH_hzz" ]) # sum non-fake contributions for each MC sample separately # input processes: TT2t0e0m0j, TT1t1e0m0j, TT1t0e1m0j", TT0t2e0m0j, TT0t1e1m0j, TT0t0e2m0j; TTW2t0e0m0j,... # output processes: TT; ... for process_name in process_names: for lepton_and_hadTau_selection in self.lepton_and_hadTau_selections: for lepton_and_hadTau_frWeight in self.lepton_and_hadTau_frWeights: lepton_and_hadTau_selection_and_frWeight = get_lepton_and_hadTau_selection_and_frWeight(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight) for hadTau_charge_selection in self.hadTau_charge_selections: key = getKey(process_name, lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, hadTau_charge_selection) if lepton_and_hadTau_frWeight == "enabled" and not lepton_and_hadTau_selection.startswith("Fakeable"): continue if lepton_and_hadTau_selection == "Fakeable_mcClosure" and not lepton_and_hadTau_frWeight == "enabled": continue self.histogramFile_addBackgrounds[key] = os.path.join(self.outputDir, DKEY_HIST, "addBackgrounds_%s_%s_%s_%s.root" % \ (self.channel, process_name, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)) self.cfgFile_addBackgrounds_modified[key] = os.path.join(self.outputDir, DKEY_CFGS, "addBackgrounds_%s_%s_%s_%s_cfg.py" % \ (self.channel, process_name, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)) histogramDir = getHistogramDir(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, hadTau_charge_selection) processes_input = [ "%s%s" % (process_name, genMatch) for genMatch in self.lepton_and_hadTau_genMatches_nonfakes ] # CV: treat fakes in ttH signal events as "signal", not as "background" ##if process_name in [ "signal", "ttH_htt", "ttH_hww", "ttH_hzz" ]: ## processes_input.extend([ "%s%s" % (process_name, genMatch) for genMatch in self.lepton_and_hadTau_genMatches_fakes ]) self.process_output_addBackgrounds[key] = process_name self.createCfg_addBackgrounds(self.histogramFile_hadd_stage1, self.histogramFile_addBackgrounds[key], self.cfgFile_addBackgrounds_modified[key], [ histogramDir ], processes_input, self.process_output_addBackgrounds[key]) # sum fake contributions for each MC sample separately # input processes: TT1t0e0m1j, TT0t1e0m1j, TT0t0e1m1j, TT0t0e0m2j; TTW1t0e0m1j,... # output processes: fakes_TT; ... for process_name in process_names: for lepton_and_hadTau_selection in self.lepton_and_hadTau_selections: for lepton_and_hadTau_frWeight in self.lepton_and_hadTau_frWeights: lepton_and_hadTau_selection_and_frWeight = get_lepton_and_hadTau_selection_and_frWeight(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight) for hadTau_charge_selection in self.hadTau_charge_selections: key = getKey("fakes_%s" % process_name, lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, hadTau_charge_selection) if lepton_and_hadTau_frWeight == "enabled" and not lepton_and_hadTau_selection.startswith("Fakeable"): continue if lepton_and_hadTau_selection == "Fakeable_mcClosure" and not lepton_and_hadTau_frWeight == "enabled": continue self.histogramFile_addBackgrounds[key] = os.path.join(self.outputDir, DKEY_HIST, "addBackgrounds_%s_fakes_%s_%s_%s.root" % \ (self.channel, process_name, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)) self.cfgFile_addBackgrounds_modified[key] = os.path.join(self.outputDir, DKEY_CFGS, "addBackgrounds_%s_fakes_%s_%s_%s_cfg.py" % \ (self.channel, process_name, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)) histogramDir = getHistogramDir(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, hadTau_charge_selection) processes_input = [ "%s%s" % (process_name, genMatch) for genMatch in self.lepton_and_hadTau_genMatches_fakes ] self.process_output_addBackgrounds[key] = "fakes_%s" % process_name self.createCfg_addBackgrounds(self.histogramFile_hadd_stage1, self.histogramFile_addBackgrounds[key], self.cfgFile_addBackgrounds_modified[key], [ histogramDir ], processes_input, self.process_output_addBackgrounds[key]) # sum fake contributions for the total of all MC sample # input processes: TT1t0e0m1j, TT0t1e0m1j, TT0t0e1m1j, TT0t0e0m2j; TTW1t0e0m1j,... # output process: fakes_mc for lepton_and_hadTau_selection in self.lepton_and_hadTau_selections: for lepton_and_hadTau_frWeight in self.lepton_and_hadTau_frWeights: if lepton_and_hadTau_frWeight == "enabled" and not lepton_and_hadTau_selection.startswith("Fakeable"): continue if lepton_and_hadTau_selection == "Fakeable_mcClosure" and not lepton_and_hadTau_frWeight == "enabled": continue lepton_and_hadTau_selection_and_frWeight = get_lepton_and_hadTau_selection_and_frWeight(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight) for hadTau_charge_selection in self.hadTau_charge_selections: key = getKey(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, hadTau_charge_selection) self.histogramFile_addBackgrounds[key] = os.path.join(self.outputDir, DKEY_HIST, "addBackgrounds_%s_fakes_mc_%s_%s.root" % \ (self.channel, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)) self.cfgFile_addBackgrounds_modified[key] = os.path.join(self.outputDir, DKEY_CFGS, "addBackgrounds_%s_fakes_mc_%s_%s_cfg.py" % \ (self.channel, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)) histogramDir = getHistogramDir(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, hadTau_charge_selection) processes_input = [] for process_name in self.nonfake_backgrounds: for genMatch in self.lepton_and_hadTau_genMatches_fakes: processes_input.append("%s%s" % (process_name, genMatch)) self.process_output_addBackgrounds[key] = "fakes_mc" self.createCfg_addBackgrounds(self.histogramFile_hadd_stage1, self.histogramFile_addBackgrounds[key], self.cfgFile_addBackgrounds_modified[key], [ histogramDir ], processes_input, self.process_output_addBackgrounds[key]) logging.info("Creating configuration files for executing 'addBackgroundFakes'") for hadTau_charge_selection in self.hadTau_charge_selections: key = getKey("fakes_data", hadTau_charge_selection) self.histogramFile_addFakes[key] = os.path.join(self.outputDir, DKEY_HIST, "addBackgroundFakes_%s_%s.root" % \ (self.channel, hadTau_charge_selection)) self.cfgFile_addFakes_modified[key] = os.path.join(self.outputDir, DKEY_CFGS, "addBackgroundFakes_%s_%s_cfg.py" % \ (self.channel, hadTau_charge_selection)) category_signal = "1l_2tau_%s_Tight" % hadTau_charge_selection category_sideband = "1l_2tau_%s_Fakeable_wFakeRateWeights" % hadTau_charge_selection self.createCfg_addFakes(self.histogramFile_hadd_stage1_5, self.histogramFile_addFakes[key], self.cfgFile_addFakes_modified[key], category_signal, category_sideband) logging.info("Creating configuration files for executing 'prepareDatacards'") for histogramToFit in self.histograms_to_fit: self.createCfg_prep_dcard(histogramToFit) if "SS" in self.hadTau_charge_selections: self.createCfg_prep_dcard(histogramToFit, self.histogramDir_prep_dcard_SS, "SS") logging.info("Creating configuration files for executing 'makePlots'") self.createCfg_makePlots() if "SS" in self.hadTau_charge_selections: self.createCfg_makePlots(self.histogramDir_prep_dcard_SS, "SS") if "Fakeable_mcClosure" in self.lepton_and_hadTau_selections: self.createCfg_makePlots_mcClosure() logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) self.addToMakefile_backgrounds_from_data(lines_makefile) self.addToMakefile_hadd_stage2(lines_makefile) self.addToMakefile_prep_dcard(lines_makefile) self.addToMakefile_make_plots(lines_makefile) self.addToMakefile_make_plots_mcClosure(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done")
def create(self): """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system """ for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] key_dir = getKey(process_name) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES, DKEY_SYNC ]: initDict(self.dirs, [key_dir, dir_type]) if dir_type in [DKEY_CFGS, DKEY_LOGS]: self.dirs[key_dir][dir_type] = os.path.join( self.configDir, dir_type, self.channel, process_name) else: self.dirs[key_dir][dir_type] = os.path.join( self.outputDir, dir_type, self.channel, process_name) for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT, DKEY_SYNC ]: initDict(self.dirs, [dir_type]) if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]: self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel) else: self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel) for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) else: create_if_not_exists(self.dirs[key]) inputFileLists = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue logging.info("Checking input files for sample %s" % sample_info["process_name_specific"]) inputFileLists[sample_name] = generateInputFileList( sample_info, self.max_files_per_job) for apply_jetSmearing in self.apply_jetSmearing_options: jetSmearingLabel = None if apply_jetSmearing: jetSmearingLabel = "jetSmearingEnabled" else: jetSmearingLabel = "jetSmearingDisabled" for apply_metSmearing in self.apply_metSmearing_options: metSmearingLabel = None if apply_metSmearing: metSmearingLabel = "metSmearingEnabled" else: metSmearingLabel = "metSmearingDisabled" for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] isSignal = True if process_name.find( "signal") != -1 else False logging.info( "Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name)) sample_category = sample_info["sample_category"] inputFileList = inputFileLists[sample_name] numJobsPerFile = None if sample_info[ "process_name_specific"] == "signal_ggf_nonresonant_node_sm_hh_2b2v": numJobsPerFile = 500 elif sample_info[ "process_name_specific"] == "signal_ggf_nonresonant_cHHH1_hh_2b2v": numJobsPerFile = 100 elif sample_info[ "process_name_specific"] == "TTJets_DiLept": numJobsPerFile = 50 elif sample_info[ "process_name_specific"] == "TTJets_DiLept_ext1": numJobsPerFile = 50 elif sample_info["process_name_specific"] == "TTTo2L2Nu": numJobsPerFile = 10 else: raise ValueError("Invalid sample: %s" % sample_info["process_name_specific"]) numJobs = numJobsPerFile * len(inputFileList.keys()) for jobId in range(1, numJobs + 1): ntupleId = ((jobId - 1) / numJobsPerFile) + 1 maxSelEvents = 500 skipSelEvents = maxSelEvents * ( (jobId - 1) % numJobsPerFile) # build config files for executing analysis code key_dir = getKey(process_name) key_analyze_job = getKey(process_name, jetSmearingLabel, metSmearingLabel, jobId) ntupleFiles = inputFileList[ntupleId] if len(ntupleFiles) == 0: logging.warning( "No input ntuples for %s --> skipping job !!" % (key_analyze_job)) continue cfgFile_modified_path = os.path.join( self.dirs[key_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" % (self.channel, process_name, jetSmearingLabel, metSmearingLabel, jobId)) histogramFile_path = os.path.join( self.dirs[key_dir][DKEY_HIST], "analyze_%s_%s_%s_%s_%i.root" % (self.channel, process_name, jetSmearingLabel, metSmearingLabel, jobId)) logFile_path = os.path.join( self.dirs[key_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" % (self.channel, process_name, jetSmearingLabel, metSmearingLabel, jobId)) rleOutputFile_path = os.path.join(self.dirs[key_dir][DKEY_RLES], "rle_%s_%s_%s_%s_%i.txt" % (self.channel, process_name, jetSmearingLabel, metSmearingLabel, jobId)) \ if self.select_rle_output else "" self.jobOptions_analyze[key_analyze_job] = { 'ntupleFiles': ntupleFiles, 'cfgFile_modified': cfgFile_modified_path, 'histogramFile': histogramFile_path, 'logFile': logFile_path, 'selEventsFileName_output': rleOutputFile_path, 'apply_jetSmearing': apply_jetSmearing, 'apply_metSmearing': apply_metSmearing, 'maxSelEvents': maxSelEvents, 'skipSelEvents': skipSelEvents } self.createCfg_analyze( self.jobOptions_analyze[key_analyze_job], sample_info) # initialize input and output file names for hadd_stage1 key_hadd_stage1 = getKey(process_name, jetSmearingLabel, metSmearingLabel) if not key_hadd_stage1 in self.inputFiles_hadd_stage1: self.inputFiles_hadd_stage1[key_hadd_stage1] = [] self.inputFiles_hadd_stage1[key_hadd_stage1].append( self.jobOptions_analyze[key_analyze_job] ['histogramFile']) self.outputFile_hadd_stage1[key_hadd_stage1] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage1_%s_%s_%s_%s.root" % \ (self.channel, process_name, jetSmearingLabel, metSmearingLabel)) # add output files of hadd_stage1 to list of input files for hadd_stage2 key_hadd_stage1 = getKey(process_name, jetSmearingLabel, metSmearingLabel) key_hadd_stage2 = getKey("") if not key_hadd_stage2 in self.inputFiles_hadd_stage2: self.inputFiles_hadd_stage2[key_hadd_stage2] = [] self.inputFiles_hadd_stage2[key_hadd_stage2].append( self.outputFile_hadd_stage1[key_hadd_stage1]) self.outputFile_hadd_stage2[ key_hadd_stage2] = os.path.join( self.dirs[DKEY_HIST], "histograms_harvested_stage2_%s.root" % self.channel) if self.is_sbatch: logging.info( "Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.sbatchFile_analyze = os.path.join( self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) self.addToMakefile_hadd_stage2(lines_makefile) self.targets.extend(self.outputFile_hadd_stage2.values()) self.createMakefile(lines_makefile) logging.info("Done") return self.num_jobs
def create(self): """Creates all necessary config files and runs the MEM -- either locally or on the batch system """ statistics = {} for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) else: create_if_not_exists(self.dirs[key]) # read the file in, sample-by-sample # build the dictionary recursively # add rle file also to generated cfg files # print integrations per job as well! # consider more than 1 file per jobs -- the jobs are splitted by MEM integration anyways for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or \ sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue if not os.path.exists(sample_info['local_paths'][0]['path']): logging.warning("Skipping sample {sample_name}".format( sample_name=sample_name)) continue process_name = sample_info["process_name_specific"] logging.info( "Creating configuration files to run '%s' for sample %s" % (self.executable_addMEM, process_name)) is_mc = (sample_info["type"] == "mc") inputFileList = generateInputFileList(sample_name, sample_info, self.max_files_per_job, self.debug) # typically, the analysis ends here and starts looping b/c the smallest unit of work processes # at least one file; we need, however, to split the file into event ranges in such a way that # each job performs mem_integrations_per_job MEM integrations # so what we are going to do is to open each set of files in inputFileList, read the variable # requestMEM_*l_*tau and try to gather the event ranges such that each event range # performs up to mem_integrations_per_job integrations per job memEvtRangeDict = self.memJobList(inputFileList) for jobId in memEvtRangeDict.keys(): key_dir = getKey(sample_name) key_file = getKey(sample_name, jobId) self.inputFiles[key_file] = memEvtRangeDict[jobId][ 'input_fileset'] # there should always be a job assert(self.inputFiles[key_file] > 0), "More than one input file: %s ?? !!" % \ ', '.join(self.inputFiles[key_file]) #assert(len(self.inputFiles[key_file]) == 1), "There is more than one input file!" self.cfgFiles_addMEM_modified[key_file] = os.path.join( self.dirs[key_dir][DKEY_CFGS], "addMEM_%s_%s_%i_cfg.py" % (self.channel, process_name, jobId)) self.shFiles_addMEM_modified[key_file] = os.path.join( self.dirs[key_dir][DKEY_CFGS], "addMEM_%s_%s_%i.sh" % (self.channel, process_name, jobId)) self.outputFiles[key_file] = os.path.join( self.dirs[key_dir][DKEY_NTUPLES], "%s_%i.root" % (process_name, jobId)) self.logFiles_addMEM[key_file] = os.path.join( self.dirs[key_dir][DKEY_LOGS], "addMEM_%s_%s_%i.log" % (self.channel, process_name, jobId)) self.createCfg_addMEM( self.inputFiles[key_file], memEvtRangeDict[jobId]['event_range'][0], memEvtRangeDict[jobId]['event_range'][1], self.outputFiles[key_file], self.era, is_mc, self.cfgFiles_addMEM_modified[key_file], ) # associate the output file with the fileset_id #UDPATE: ONE OUTPUT FILE PER SAMPLE! fileset_id = memEvtRangeDict[jobId]['fileset_id'] hadd_output_dir = os.path.join( self.dirs[key_dir][DKEY_FINAL_NTUPLES], str('%04d' % fileset_id // 1000)) if not os.path.exists(hadd_output_dir): os.makedirs(hadd_output_dir) hadd_output = os.path.join( hadd_output_dir, '%s_%i.root' % ('tree', fileset_id) # UDPATE: ADDED #hadd_output_dir, "tree.root" # UDPATE: REMOVED ) if hadd_output not in self.hadd_records: self.hadd_records[hadd_output] = {} self.hadd_records[hadd_output]['output_files'] = [] self.hadd_records[hadd_output]['fileset_id'] = fileset_id self.hadd_records[hadd_output]['output_files'].append( self.outputFiles[key_file]) self.hadd_records[hadd_output]['process_name'] = process_name #self.filesToClean.append(self.outputFiles[key_file]) # let's sum the number of integration per sample nofEntriesMap = {} for v in memEvtRangeDict.values(): if v['fileset_id'] not in nofEntriesMap: nofEntriesMap[v['fileset_id']] = { 'nof_entries': v['nof_entries'], } statistics[process_name] = { 'nof_int': sum([entry['nof_int'] for entry in memEvtRangeDict.values()]), 'nof_entries': sum([entry['nof_entries'] for entry in nofEntriesMap.values()]), 'nof_events_pass': sum([ entry['nof_events_pass'] for entry in memEvtRangeDict.values() ]), 'nof_int_pass': sum([ entry['nof_int_pass'] for entry in memEvtRangeDict.values() ]), 'nof_zero': sum([entry['nof_zero'] for entry in memEvtRangeDict.values()]), 'nof_jobs': len(memEvtRangeDict), } if self.is_sbatch: logging.info( "Creating script for submitting '%s' jobs to batch system" % self.executable_addMEM) self.createScript_sbatch() logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_addMEM(lines_makefile) self.addToMakefile_hadd(lines_makefile) self.createMakefile(lines_makefile) ws_len = max([len(kk) + 1 for kk in statistics.keys()]) total_nof_integrations_sum = sum(x['nof_int'] for x in statistics.values()) total_nof_entires = sum(x['nof_entries'] for x in statistics.values()) total_nof_zero_int = sum(x['nof_zero'] for x in statistics.values()) total_nof_jobs = sum(x['nof_jobs'] for x in statistics.values()) total_nof_pass = sum(x['nof_events_pass'] for x in statistics.values()) total_nof_int_pass_avg = float( sum(x['nof_int_pass'] for x in statistics.values())) / total_nof_pass total_nof_integrations_avg = float( total_nof_integrations_sum) / total_nof_entires total_nof_int_per_job = float( total_nof_integrations_sum) / total_nof_jobs for k, v in statistics.iteritems(): if v['nof_entries'] == 0: int_per_event = 0. evt_pass = 0. else: int_per_event = float(v['nof_int']) / v['nof_entries'] evt_pass = (100 * float(v['nof_events_pass']) / v['nof_entries']) if v['nof_events_pass'] == 0: nof_int_pass = 0. else: nof_int_pass = float(v['nof_int_pass']) / v['nof_events_pass'] print( '%s%s: %d (%d entries; %d jobs; %.2f int/evt; %d (%.2f%%) evt pass; %.2f int/evt pass; %d evt 0int)' % ( k, ' ' * (ws_len - len(k)), v['nof_int'], v['nof_entries'], v['nof_jobs'], int_per_event, v['nof_events_pass'], evt_pass, nof_int_pass, v['nof_zero'], )) print( '%s%s: %d (%d entries; %d jobs; %.2f int/evt; %d evt pass; %.2f int/evt pass; ' '%.2f int/job pass; %d evt 0int)' % ( 'total', ' ' * (ws_len - len('total')), total_nof_integrations_sum, total_nof_entires, total_nof_jobs, total_nof_integrations_avg, total_nof_pass, total_nof_int_pass_avg, total_nof_int_per_job, total_nof_zero_int, )) if self.max_mem_integrations > 0 and total_nof_integrations_sum > self.max_mem_integrations: logging.error( "Will not start the jobs (max nof integrations exceeded)!") return False else: logging.info("Done") return True
def create(self): """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system """ for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] for mode in self.modes: key_dir = getKey(process_name, mode) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES, DKEY_SYNC ]: if dir_type == DKEY_SYNC and not self.do_sync: continue initDict(self.dirs, [ key_dir, dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_LOGS ]: self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, "_".join([ mode ]), process_name) else: self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, "_".join([ mode ]), process_name) for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT, DKEY_SYNC ]: if dir_type == DKEY_SYNC and not self.do_sync: continue initDict(self.dirs, [ dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]: self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel) else: self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel) ##print "self.dirs = ", self.dirs for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) else: create_if_not_exists(self.dirs[key]) inputFileLists = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue logging.info("Checking input files for sample %s" % sample_info["process_name_specific"]) inputFileLists[sample_name] = generateInputFileList(sample_info, self.max_files_per_job) for mode in self.modes: for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name)) sample_category = sample_info["sample_category"] is_mc = (sample_info["type"] == "mc") for central_or_shift in self.central_or_shifts: inputFileList = inputFileLists[sample_name] for jobId in inputFileList.keys(): # build config files for executing analysis code key_dir = getKey(process_name, mode) key_analyze_job = getKey(process_name, mode, central_or_shift, jobId) ntupleFiles = inputFileList[jobId] if len(ntupleFiles) == 0: logging.warning("No input ntuples for %s --> skipping job !!" % (key_analyze_job)) continue self.jobOptions_analyze[key_analyze_job] = { 'ntupleFiles' : ntupleFiles, 'cfgFile_modified' : os.path.join(self.dirs[key_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" % \ (self.channel, process_name, mode, central_or_shift, jobId)), 'histogramFile' : os.path.join(self.dirs[key_dir][DKEY_HIST], "%s_%s_%s_%i.root" % \ (process_name, mode, central_or_shift, jobId)), 'logFile' : os.path.join(self.dirs[key_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" % \ (self.channel, process_name, mode, central_or_shift, jobId)), 'sample_category' : sample_category, 'mode' : mode, 'lepton_selection' : self.lepton_selection, 'hadTau_selection' : self.hadTau_selection, 'SVfit4tau_logM_wMassConstraint_MarkovChain' : self.SVfit4tau_logM_wMassConstraint_MarkovChain, 'SVfit4tau_logM_woMassConstraint_MarkovChain' : self.SVfit4tau_logM_woMassConstraint_MarkovChain, 'SVfit4tau_logM_wMassConstraint_VAMP' : self.SVfit4tau_logM_wMassConstraint_VAMP, 'use_HIP_mitigation_mediumMuonId' : False, 'is_mc' : is_mc, 'central_or_shift' : central_or_shift, 'lumi_scale' : 1., 'apply_genWeight' : sample_info["genWeight"] if (is_mc and "genWeight" in sample_info) else False, } self.createCfg_analyze(self.jobOptions_analyze[key_analyze_job]) # initialize input and output file names for hadd_stage1 key_hadd_stage1 = getKey(process_name, mode) if not key_hadd_stage1 in self.inputFiles_hadd_stage1: self.inputFiles_hadd_stage1[key_hadd_stage1] = [] self.inputFiles_hadd_stage1[key_hadd_stage1].append(self.jobOptions_analyze[key_analyze_job]['histogramFile']) self.outputFile_hadd_stage1[key_hadd_stage1] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage1_%s_%s_%s.root" % \ (self.channel, process_name, mode)) # initialize input and output file names for hadd_stage2 key_hadd_stage2 = getKey() if not key_hadd_stage2 in self.inputFiles_hadd_stage2: self.inputFiles_hadd_stage2[key_hadd_stage2] = [] self.inputFiles_hadd_stage2[key_hadd_stage2].append(self.outputFile_hadd_stage1[key_hadd_stage1]) self.outputFile_hadd_stage2[key_hadd_stage2] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage2_%s.root" % \ (self.channel)) if self.is_sbatch: logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) self.addToMakefile_hadd_stage2(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done") return self.num_jobs
def create(self): """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system """ for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue sample_category = sample_info["sample_category"] is_mc = (sample_info["type"] == "mc") process_name = sample_info["process_name_specific"] logging.info("Building dictionaries for sample %s..." % process_name) for lepton_selection in self.lepton_selections: for lepton_frWeight in self.lepton_frWeights: if lepton_frWeight == "enabled" and not lepton_selection.startswith("Fakeable"): continue if lepton_frWeight == "disabled" and not lepton_selection in ["Tight", "forBDTtraining"]: continue lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_selection, lepton_frWeight) for leptonChargeSelection in self.leptonChargeSelections: central_or_shift_extensions = ["", "hadd", "addBackgrounds"] central_or_shift_dedicated = self.central_or_shifts if self.runTHweights(sample_info) else self.central_or_shifts_external central_or_shifts_extended = central_or_shift_extensions + central_or_shift_dedicated for central_or_shift_or_dummy in central_or_shifts_extended: process_name_extended = [ process_name, "hadd" ] for process_name_or_dummy in process_name_extended: if central_or_shift_or_dummy in [ "hadd", "addBackgrounds" ] and process_name_or_dummy in [ "hadd" ]: continue if central_or_shift_or_dummy not in central_or_shift_extensions and not self.accept_systematics( central_or_shift_or_dummy, is_mc, lepton_selection, leptonChargeSelection, sample_info ): continue key_dir = getKey(process_name_or_dummy, lepton_selection_and_frWeight, leptonChargeSelection, central_or_shift_or_dummy) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES, DKEY_SYNC ]: if dir_type == DKEY_SYNC and not self.do_sync: continue initDict(self.dirs, [ key_dir, dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_LOGS ]: self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, "_".join([ lepton_selection_and_frWeight, leptonChargeSelection ]), process_name_or_dummy, central_or_shift_or_dummy) else: self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, "_".join([ lepton_selection_and_frWeight, leptonChargeSelection ]), process_name_or_dummy) for subdirectory in [ "addBackgrounds", "addBackgroundLeptonFakes", "prepareDatacards", "addSystFakeRates", "makePlots" ]: key_dir = getKey(subdirectory) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]: initDict(self.dirs, [ key_dir, dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]: self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, subdirectory) else: self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, subdirectory) for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT, DKEY_SYNC ]: if dir_type == DKEY_SYNC and not self.do_sync: continue initDict(self.dirs, [ dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]: self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel) else: self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel) numDirectories = 0 for key in self.dirs.keys(): if type(self.dirs[key]) == dict: numDirectories += len(self.dirs[key]) else: numDirectories += 1 logging.info("Creating directory structure (numDirectories = %i)" % numDirectories) numDirectories_created = 0; frac = 1 for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) numDirectories_created += len(self.dirs[key]) else: create_if_not_exists(self.dirs[key]) numDirectories_created = numDirectories_created + 1 while 100*numDirectories_created >= frac*numDirectories: logging.info(" %i%% completed" % frac) frac = frac + 1 logging.info("Done.") inputFileLists = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue logging.info("Checking input files for sample %s" % sample_info["process_name_specific"]) inputFileLists[sample_name] = generateInputFileList(sample_info, self.max_files_per_job) for lepton_selection in self.lepton_selections: hadTau_selection = "Tight" hadTau_selection = "|".join([hadTau_selection, self.hadTau_selection_part2]) electron_selection = lepton_selection muon_selection = lepton_selection if lepton_selection == "forBDTtraining": electron_selection = "Loose" muon_selection = "Loose" elif lepton_selection == "Fakeable_mcClosure_e": electron_selection = "Fakeable" muon_selection = "Tight" elif lepton_selection == "Fakeable_mcClosure_m": electron_selection = "Tight" muon_selection = "Fakeable" if "forBDTtraining" in lepton_selection: electron_selection = "Loose" muon_selection = "Loose" elif lepton_selection == "Fakeable_mcClosure_e": electron_selection = "Fakeable" muon_selection = "Tight" elif lepton_selection == "Fakeable_mcClosure_m": electron_selection = "Tight" muon_selection = "Fakeable" for lepton_frWeight in self.lepton_frWeights: if lepton_frWeight == "enabled" and not lepton_selection.startswith("Fakeable"): continue if lepton_frWeight == "disabled" and not lepton_selection in [ "Tight", "forBDTtraining" ]: continue lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_selection, lepton_frWeight) for leptonChargeSelection in self.leptonChargeSelections: for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name)) inputFileList = inputFileLists[sample_name] sample_category = sample_info["sample_category"] is_mc = (sample_info["type"] == "mc") use_th_weights = self.runTHweights(sample_info) central_or_shift_dedicated = self.central_or_shifts if use_th_weights else self.central_or_shifts_external for central_or_shift in central_or_shift_dedicated: if not self.accept_systematics( central_or_shift, is_mc, lepton_selection, leptonChargeSelection, sample_info ): continue central_or_shifts_local = [] if central_or_shift == "central" and not use_th_weights: for central_or_shift_local in self.central_or_shifts_internal: if self.accept_systematics( central_or_shift_local, is_mc, lepton_selection, leptonChargeSelection, sample_info ): central_or_shifts_local.append(central_or_shift_local) logging.info(" ... for '%s' and systematic uncertainty option '%s'" % (lepton_selection_and_frWeight, central_or_shift)) # build config files for executing analysis code key_analyze_dir = getKey(process_name, lepton_selection_and_frWeight, leptonChargeSelection, central_or_shift) for jobId in inputFileList.keys(): analyze_job_tuple = (process_name, lepton_selection_and_frWeight, leptonChargeSelection, central_or_shift, jobId) key_analyze_job = getKey(*analyze_job_tuple) ntupleFiles = inputFileList[jobId] if len(ntupleFiles) == 0: logging.warning("No input ntuples for %s --> skipping job !!" % (key_analyze_job)) continue cfgFile_modified_path = os.path.join(self.dirs[key_analyze_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" % analyze_job_tuple) logFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" % analyze_job_tuple) rleOutputFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_RLES], "rle_%s_%s_%s_%s_%i.txt" % analyze_job_tuple) \ if self.select_rle_output else "" histogramFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_HIST], "analyze_%s_%s_%s_%s_%i.root" % analyze_job_tuple) applyFakeRateWeights = self.applyFakeRateWeights \ if self.isBDTtraining or lepton_selection.find("Tight") == -1 \ else "disabled" self.jobOptions_analyze[key_analyze_job] = { 'ntupleFiles' : ntupleFiles, 'cfgFile_modified' : cfgFile_modified_path, 'histogramFile' : histogramFile_path, 'logFile' : logFile_path, 'selEventsFileName_output' : rleOutputFile_path, 'electronSelection' : electron_selection, 'muonSelection' : muon_selection, 'apply_leptonGenMatching' : self.apply_leptonGenMatching, 'leptonChargeSelection' : leptonChargeSelection, 'applyFakeRateWeights' : applyFakeRateWeights, 'hadTauSelection' : hadTau_selection, 'central_or_shift' : central_or_shift, 'central_or_shifts_local' : central_or_shifts_local, 'fillGenEvtHistograms' : True, 'selectBDT' : self.isBDTtraining, 'apply_hlt_filter' : self.hlt_filter, 'selectBDT' : self.isBDTtraining, } self.createCfg_analyze(self.jobOptions_analyze[key_analyze_job], sample_info, lepton_selection) # initialize input and output file names for hadd_stage1 key_hadd_stage1_dir = getKey(process_name, lepton_selection_and_frWeight, leptonChargeSelection) hadd_stage1_job_tuple = (process_name, lepton_selection_and_frWeight, leptonChargeSelection) key_hadd_stage1_job = getKey(*hadd_stage1_job_tuple) if not key_hadd_stage1_job in self.inputFiles_hadd_stage1: self.inputFiles_hadd_stage1[key_hadd_stage1_job] = [] self.inputFiles_hadd_stage1[key_hadd_stage1_job].append(self.jobOptions_analyze[key_analyze_job]['histogramFile']) self.outputFile_hadd_stage1[key_hadd_stage1_job] = os.path.join(self.dirs[key_hadd_stage1_dir][DKEY_HIST], "hadd_stage1_%s_%s_%s.root" % hadd_stage1_job_tuple) if self.isBDTtraining: continue # add output files of hadd_stage1 to list of input files for hadd_stage1_5 key_hadd_stage1_job = getKey(process_name, lepton_selection_and_frWeight, leptonChargeSelection) key_hadd_stage1_5_dir = getKey("hadd", lepton_selection_and_frWeight, leptonChargeSelection) hadd_stage1_5_job_tuple = (lepton_selection_and_frWeight, leptonChargeSelection) key_hadd_stage1_5_job = getKey(*hadd_stage1_5_job_tuple) if not key_hadd_stage1_5_job in self.inputFiles_hadd_stage1_5: self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job] = [] self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job].append(self.outputFile_hadd_stage1[key_hadd_stage1_job]) self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job] = os.path.join(self.dirs[key_hadd_stage1_5_dir][DKEY_HIST], "hadd_stage1_5_%s_%s.root" % hadd_stage1_5_job_tuple) if self.isBDTtraining: continue # sum fake background contributions for the total of all MC sample # input processes: TT_fake, TTW_fake, TTWW_fake, ... # output process: fakes_mc key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight, leptonChargeSelection) key_addBackgrounds_dir = getKey("addBackgrounds") addBackgrounds_job_fakes_tuple = ("fakes_mc", lepton_selection_and_frWeight, leptonChargeSelection) key_addBackgrounds_job_fakes = getKey(*addBackgrounds_job_fakes_tuple) sample_categories = [] sample_categories.extend(self.nonfake_backgrounds) processes_input = [] for sample_category in sample_categories: processes_input.append("%s_fake" % sample_category) self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_fakes_tuple), 'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_fakes_tuple), 'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s.log" % addBackgrounds_job_fakes_tuple), 'categories' : [ getHistogramDir(lepton_selection, lepton_frWeight, leptonChargeSelection) ], 'processes_input' : processes_input, 'process_output' : "fakes_mc" } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]) # sum conversion background contributions for the total of all MC sample # input processes: TT_Convs, TTW_Convs, TTWW_Convs, ... # output process: Convs addBackgrounds_job_Convs_tuple = ("Convs", lepton_selection_and_frWeight, leptonChargeSelection) key_addBackgrounds_job_Convs = getKey(*addBackgrounds_job_Convs_tuple) sample_categories = [] sample_categories.extend(self.nonfake_backgrounds) processes_input = [] for sample_category in self.convs_backgrounds: processes_input.append("%s_Convs" % sample_category) self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_Convs_tuple), 'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_Convs_tuple), 'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s.log" % addBackgrounds_job_Convs_tuple), 'categories' : [ getHistogramDir(lepton_selection, lepton_frWeight, leptonChargeSelection) ], 'processes_input' : processes_input, 'process_output' : "Convs" } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs]) # sum signal contributions from HH->4tau ("tttt"), HH->2W2tau ("wwtt"), and HH->4W ("wwww"), # separately for "nonfake" and "fake" contributions genMatch_categories = [ "nonfake", "fake" ] for genMatch_category in genMatch_categories: for signal_base, signal_input in self.signal_io.items(): addBackgrounds_job_signal_tuple = (lepton_selection_and_frWeight, leptonChargeSelection, signal_base, genMatch_category) key_addBackgrounds_job_signal = getKey(*addBackgrounds_job_signal_tuple) if key_addBackgrounds_job_signal in self.jobOptions_addBackgrounds_sum.keys(): continue processes_input = signal_input process_output = signal_base if genMatch_category == "fake": processes_input = [ process_input + "_fake" for process_input in processes_input ] process_output += "_fake" self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_signal] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_%s_cfg.py" % addBackgrounds_job_signal_tuple), 'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s_%s.root" % addBackgrounds_job_signal_tuple), 'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s_%s.log" % addBackgrounds_job_signal_tuple), 'categories' : [ getHistogramDir(lepton_selection, lepton_frWeight, leptonChargeSelection) ], 'processes_input' : processes_input, 'process_output' : process_output } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_signal]) key_hadd_stage2_job = getKey(lepton_selection_and_frWeight, leptonChargeSelection) if not key_hadd_stage2_job in self.inputFiles_hadd_stage2: self.inputFiles_hadd_stage2[key_hadd_stage2_job] = [] if lepton_selection == "Tight": self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_signal]['outputFile']) # initialize input and output file names for hadd_stage2 key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight, leptonChargeSelection) key_hadd_stage2_dir = getKey("hadd", lepton_selection_and_frWeight, leptonChargeSelection) hadd_stage2_job_tuple = (lepton_selection_and_frWeight, leptonChargeSelection) key_hadd_stage2_job = getKey(*hadd_stage2_job_tuple) if not key_hadd_stage2_job in self.inputFiles_hadd_stage2: self.inputFiles_hadd_stage2[key_hadd_stage2_job] = [] if lepton_selection == "Tight": self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile']) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs]['outputFile']) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job]) self.outputFile_hadd_stage2[key_hadd_stage2_job] = os.path.join(self.dirs[key_hadd_stage2_dir][DKEY_HIST], "hadd_stage2_%s_%s.root" % hadd_stage2_job_tuple) if self.isBDTtraining: if self.is_sbatch: logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) self.targets.extend(self.phoniesToAdd) self.addToMakefile_validate(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done.") return self.num_jobs logging.info("Creating configuration files to run 'addBackgroundFakes'") for leptonChargeSelection in self.leptonChargeSelections: key_hadd_stage1_5_job = getKey(get_lepton_selection_and_frWeight("Fakeable", "enabled"), leptonChargeSelection) key_addFakes_dir = getKey("addBackgroundLeptonFakes") key_addFakes_job = getKey("data_fakes", leptonChargeSelection) category_sideband = "hh_4l_%s_Fakeable_wFakeRateWeights" % leptonChargeSelection self.jobOptions_addFakes[key_addFakes_job] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addFakes_dir][DKEY_CFGS], "addBackgroundLeptonFakes_%s_cfg.py" % leptonChargeSelection), 'outputFile' : os.path.join(self.dirs[key_addFakes_dir][DKEY_HIST], "addBackgroundLeptonFakes_%s.root" % leptonChargeSelection), 'logFile' : os.path.join(self.dirs[key_addFakes_dir][DKEY_LOGS], "addBackgroundLeptonFakes_%s.log" % leptonChargeSelection), 'category_signal' : "hh_4l_%s_Tight" % leptonChargeSelection, 'category_sideband' : category_sideband } self.createCfg_addFakes(self.jobOptions_addFakes[key_addFakes_job]) key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), leptonChargeSelection) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addFakes[key_addFakes_job]['outputFile']) logging.info("Creating configuration files to run 'prepareDatacards'") for histogramToFit in self.histograms_to_fit: key_prep_dcard_dir = getKey("prepareDatacards") if "OS" in self.leptonChargeSelections: key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS") prep_dcard_job_tuple = (self.channel, "OS", histogramToFit) key_prep_dcard_job = getKey("OS", histogramToFit) self.jobOptions_prep_dcard[key_prep_dcard_job] = { 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_%s_cfg.py" % prep_dcard_job_tuple), 'datacardFile' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s_%s.root" % prep_dcard_job_tuple), 'histogramDir' : self.histogramDir_prep_dcard, 'histogramToFit' : histogramToFit, 'label' : '4l', } self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job]) if "SS" in self.leptonChargeSelections: key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "SS") prep_dcard_job_tuple = (self.channel, "SS", histogramToFit) key_prep_dcard_job = getKey("SS", histogramToFit) self.jobOptions_prep_dcard[key_prep_dcard_job] = { 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_%s_cfg.py" % prep_dcard_job_tuple), 'datacardFile' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s_%s.root" % prep_dcard_job_tuple), 'histogramDir' : self.histogramDir_prep_dcard_SS, 'histogramToFit' : histogramToFit, 'label' : '4l SS', } self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job]) # add shape templates for the following systematic uncertainties: # - 'CMS_ttHl_Clos_norm_e' # - 'CMS_ttHl_Clos_shape_e' # - 'CMS_ttHl_Clos_norm_m' # - 'CMS_ttHl_Clos_shape_m' for leptonChargeSelection in self.leptonChargeSelections: key_prep_dcard_job = getKey(leptonChargeSelection, histogramToFit) key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), leptonChargeSelection) key_add_syst_fakerate_dir = getKey("addSystFakeRates") add_syst_fakerate_job_tuple = (self.channel, leptonChargeSelection, histogramToFit) key_add_syst_fakerate_job = getKey(leptonChargeSelection, histogramToFit) self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job] = { 'inputFile' : self.jobOptions_prep_dcard[key_prep_dcard_job]['datacardFile'], 'cfgFile_modified' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_CFGS], "addSystFakeRates_%s_%s_%s_cfg.py" % add_syst_fakerate_job_tuple), 'outputFile' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_DCRD], "addSystFakeRates_%s_%s_%s.root" % add_syst_fakerate_job_tuple), 'category' : self.channel, 'histogramToFit' : histogramToFit, 'plots_outputFileName' : os.path.join(self.dirs[DKEY_PLOT], "addSystFakeRates.png") } histogramDir_nominal = None if leptonChargeSelection == "OS": histogramDir_nominal = self.histogramDir_prep_dcard elif leptonChargeSelection == "SS": histogramDir_nominal = self.histogramDir_prep_dcard_SS else: raise ValueError("Invalid parameter 'leptonChargeSelection' = %s !!" % leptonChargeSelection) for lepton_type in [ 'e', 'm' ]: lepton_mcClosure = "Fakeable_mcClosure_%s" % lepton_type if lepton_mcClosure not in self.lepton_selections: continue lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_mcClosure, "enabled") key_addBackgrounds_job_fakes = getKey("fakes_mc", lepton_selection_and_frWeight, leptonChargeSelection) histogramDir_mcClosure = self.mcClosure_dir['%s_%s' % (lepton_mcClosure, leptonChargeSelection)] self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job].update({ 'add_Clos_%s' % lepton_type : ("Fakeable_mcClosure_%s" % lepton_type) in self.lepton_selections, 'inputFile_nominal_%s' % lepton_type : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'histogramName_nominal_%s' % lepton_type : "%s/sel/evt/fakes_mc/%s" % (histogramDir_nominal, histogramToFit), 'inputFile_mcClosure_%s' % lepton_type : self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile'], 'histogramName_mcClosure_%s' % lepton_type : "%s/sel/evt/fakes_mc/%s" % (histogramDir_mcClosure, histogramToFit) }) self.createCfg_add_syst_fakerate(self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job]) logging.info("Creating configuration files to run 'makePlots'") key_makePlots_dir = getKey("makePlots") if "OS" in self.leptonChargeSelections: key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS") key_makePlots_job = getKey("OS") self.jobOptions_make_plots[key_makePlots_job] = { 'executable' : self.executable_make_plots, 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_cfg.py" % self.channel), 'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s.png" % self.channel), 'histogramDir' : self.histogramDir_prep_dcard, 'label' : '4l', 'make_plots_backgrounds' : self.make_plots_backgrounds, } self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job]) if "SS" in self.leptonChargeSelections: key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "SS") key_makePlots_job = getKey("SS") self.jobOptions_make_plots[key_makePlots_job] = { 'executable' : self.executable_make_plots, 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_SS_cfg.py" % self.channel), 'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s_SS.png" % self.channel), 'histogramDir' : self.histogramDir_prep_dcard_SS, 'label' : "4l SS", 'make_plots_backgrounds' : self.make_plots_backgrounds, } self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job]) if "Fakeable_mcClosure" in self.lepton_selections: #TODO key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS") key_makePlots_job = getKey("Fakeable_mcClosure", "OS") self.jobOptions_make_plots[key_makePlots_job] = { 'executable' : self.executable_make_plots_mcClosure, 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_mcClosure_%s_cfg.py" % self.channel), 'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_mcClosure_%s.png" % self.channel) } self.createCfg_makePlots_mcClosure(self.jobOptions_make_plots[key_makePlots_job]) if self.is_sbatch: logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addBackgrounds) self.sbatchFile_addBackgrounds = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_%s.py" % self.channel) self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds, self.jobOptions_addBackgrounds) self.sbatchFile_addBackgrounds_sum = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_sum_%s.py" % self.channel) self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds_sum, self.jobOptions_addBackgrounds_sum) logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addFakes) self.sbatchFile_addFakes = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addFakes_%s.py" % self.channel) self.createScript_sbatch(self.executable_addFakes, self.sbatchFile_addFakes, self.jobOptions_addFakes) logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) self.addToMakefile_backgrounds_from_data(lines_makefile) self.addToMakefile_hadd_stage2(lines_makefile) self.addToMakefile_prep_dcard(lines_makefile) self.addToMakefile_add_syst_fakerate(lines_makefile) self.addToMakefile_make_plots(lines_makefile) self.addToMakefile_validate(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done.") return self.num_jobs
def create(self): """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system """ for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue process_name = sample_info["process_name_specific"] # for charge_selection in self.charge_selections: ## NO CHARGE SELECTION NEEDED HERE # key_dir = getKey(process_name, charge_selection) ## NO CHARGE SELECTION NEEDED HERE key_dir = getKey(process_name) for dir_type in [DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES]: initDict(self.dirs, [key_dir, dir_type]) if dir_type in [DKEY_CFGS, DKEY_LOGS]: # self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, # "_".join([ charge_selection ]), process_name) ## NO CHARGE SELECTION NEEDED HERE self.dirs[key_dir][dir_type] = os.path.join( self.configDir, dir_type, self.channel, process_name) else: # self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, # "_".join([ charge_selection ]), process_name) ## NO CHARGE SELECTION NEEDED HERE self.dirs[key_dir][dir_type] = os.path.join( self.outputDir, dir_type, self.channel, process_name) for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_HIST, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]: initDict(self.dirs, [dir_type]) if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_HADD_RT ]: ## DKEY_PLOT TO BE ADDED LATER self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel) else: self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel) print "self.dirs = ", self.dirs for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) else: create_if_not_exists(self.dirs[key]) inputFileLists = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue logging.info("Checking input files for sample %s" % sample_info["process_name_specific"]) inputFileLists[sample_name] = generateInputFileList( sample_name, sample_info, self.max_files_per_job, self.debug) self.inputFileIds = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue process_name = sample_info["process_name_specific"] logging.info( "Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name)) is_mc = (sample_info["type"] == "mc") lumi_scale = 1. if not (self.use_lumi and is_mc) else sample_info[ "xsection"] * self.lumi / sample_info["nof_events"] apply_genWeight = sample_info["apply_genWeight"] if ( is_mc and "apply_genWeight" in sample_info.keys()) else False sample_category = sample_info["sample_category"] triggers = sample_info["triggers"] apply_trigger_bits = ( is_mc and (self.era == "2015" or (self.era == "2016" and sample_info["reHLT"]))) or not is_mc # for charge_selection in self.charge_selections: ## NO CHARGE SELECTION NEEDED HERE for central_or_shift in self.central_or_shifts: inputFileList = inputFileLists[sample_name] for jobId in inputFileList.keys(): if central_or_shift != "central" and not is_mc: continue if central_or_shift.startswith( "CMS_ttHl_thu_shape_ttH" ) and sample_category != "signal": continue if central_or_shift.startswith( "CMS_ttHl_thu_shape_ttW" ) and sample_category != "TTW": continue if central_or_shift.startswith( "CMS_ttHl_thu_shape_ttZ" ) and sample_category != "TTZ": continue # build config files for executing analysis code # key_dir = getKey(process_name, charge_selection) ## NO CHARGE SELECTION NEEDED HERE key_dir = getKey(process_name) # key_analyze_job = getKey(process_name, charge_selection, central_or_shift, jobId) ## NO CHARGE SELECTION NEEDED HERE key_analyze_job = getKey(process_name, central_or_shift, jobId) ntupleFiles = inputFileList[jobId] if len(ntupleFiles) == 0: print "Warning: ntupleFiles['%s'] = %s --> skipping job !!" % ( key_job, ntupleFiles) continue self.jobOptions_analyze[key_analyze_job] = { 'ntupleFiles' : ntupleFiles, # 'cfgFile_modified' : os.path.join(self.dirs[key_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" % \ # (self.channel, process_name, charge_selection, central_or_shift, jobId)), ## NO CHARGE SELECTION NEEDED HERE # 'histogramFile' : os.path.join(self.dirs[key_dir][DKEY_HIST], "%s_%s_%s_%i.root" % \ # (process_name, charge_selection, central_or_shift, jobId)), ## NO CHARGE SELECTION NEEDED HERE # 'logFile' : os.path.join(self.dirs[key_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" % \ # (self.channel, process_name, charge_selection, central_or_shift, jobId)), ## NO CHARGE SELECTION NEEDED HERE 'cfgFile_modified' : os.path.join(self.dirs[key_dir][DKEY_CFGS], "analyze_%s_%s_%s_%i_cfg.py" % \ (self.channel, process_name, central_or_shift, jobId)), 'histogramFile' : os.path.join(self.dirs[key_dir][DKEY_HIST], "%s_%s_%i.root" % \ (process_name, central_or_shift, jobId)), 'logFile' : os.path.join(self.dirs[key_dir][DKEY_LOGS], "analyze_%s_%s_%s_%i.log" % \ (self.channel, process_name, central_or_shift, jobId)), 'sample_category' : sample_category, 'triggers' : sample_info["triggers"], # 'charge_selection' : charge_selection, ## NO CHARGE SELECTION NEEDED HERE # 'jet_minPt' : self.jet_minPt, # 'jet_maxPt' : self.jet_maxPt, # 'jet_minAbsEta' : self.jet_minAbsEta, # 'jet_maxAbsEta' : self.jet_maxAbsEta, # 'hadTau_selections' : self.hadTau_selections, 'absEtaBins_e' : self.absEtaBins_e, 'absEtaBins_mu' : self.absEtaBins_mu, 'absPtBins_e' : self.absPtBins_e, 'absPtBins_mu' : self.absPtBins_mu, ##'use_HIP_mitigation_mediumMuonId' : sample_info["use_HIP_mitigation_mediumMuonId"], 'use_HIP_mitigation_mediumMuonId' : True, 'is_mc' : is_mc, 'central_or_shift' : central_or_shift, 'lumi_scale' : 1. if not (self.use_lumi and is_mc) else sample_info["xsection"] * self.lumi / sample_info["nof_events"], 'apply_genWeight' : sample_info["genWeight"] if (is_mc and "genWeight" in sample_info.keys()) else False, 'apply_trigger_bits' : (is_mc and (self.era == "2015" or (self.era == "2016" and sample_info["reHLT"]))) or not is_mc, } self.createCfg_analyze( self.jobOptions_analyze[key_analyze_job]) # initialize input and output file names for hadd_stage1 # key_hadd_stage1 = getKey(process_name, charge_selection) key_hadd_stage1 = getKey( process_name) ## NO CHARGE SELECTION NEEDED HERE if not key_hadd_stage1 in self.inputFiles_hadd_stage1: self.inputFiles_hadd_stage1[key_hadd_stage1] = [] self.inputFiles_hadd_stage1[key_hadd_stage1].append( self.jobOptions_analyze[key_analyze_job] ['histogramFile']) # self.outputFile_hadd_stage1[key_hadd_stage1] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage1_%s_%s_%s.root" % \ # (self.channel, process_name, charge_selection)) ## NO CHARGE SELECTION NEEDED HERE self.outputFile_hadd_stage1[key_hadd_stage1] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage1_%s_%s.root" % \ (self.channel, process_name)) # initialize input and output file names for hadd_stage1_5 key_hadd_stage1_5 = getKey('') if not key_hadd_stage1_5 in self.inputFiles_hadd_stage1_5: self.inputFiles_hadd_stage1_5[key_hadd_stage1_5] = [] for key_hadd_stage1 in self.outputFile_hadd_stage1.keys(): self.inputFiles_hadd_stage1_5[key_hadd_stage1_5].append( self.outputFile_hadd_stage1[key_hadd_stage1]) self.outputFile_hadd_stage1_5[key_hadd_stage1_5] = os.path.join( self.dirs[DKEY_HIST], "histograms_harvested_stage1_5.root") ## Creating configuration files to run 'addBackgrounds_LeptonFakeRate' [stage 1.5] key_addBackgrounds_job = getKey('') self.jobOptions_addBackgrounds_LeptonFakeRate[ key_addBackgrounds_job] = { 'inputFile': self.outputFile_hadd_stage1_5[key_hadd_stage1_5], 'cfgFile_modified': os.path.join( self.dirs[DKEY_CFGS], os.path.basename( self.cfgFile_addBackgrounds_LeptonFakeRate)), 'outputFile': os.path.join(self.dirs[DKEY_HIST], "addBackground_LeptonFakeRate.root"), 'logFile': os.path.join( self.dirs[DKEY_LOGS], os.path.basename( self.cfgFile_addBackgrounds_LeptonFakeRate.replace( "_cfg.py", ".log"))), } self.createCfg_addBackgrounds_LeptonFakeRate( self. jobOptions_addBackgrounds_LeptonFakeRate[key_addBackgrounds_job]) # initialize input and output file names for hadd_stage2 # key_hadd_stage2 = getKey(charge_selection) ## NO CHARGE SELECTION NEEDED HERE # if not key_hadd_stage2 in self.inputFiles_hadd_stage2: ## NO CHARGE SELECTION NEEDED HERE # self.inputFiles_hadd_stage2[key_hadd_stage2] = [] ## NO CHARGE SELECTION NEEDED HERE # self.inputFiles_hadd_stage2[key_hadd_stage2].append(self.outputFile_hadd_stage1[key_hadd_stage1]) ## NO CHARGE SELECTION NEEDED HERE # self.outputFile_hadd_stage2[key_hadd_stage2] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage2_%s_%s.root" % \ ## NO CHARGE SELECTION NEEDED HERE # (self.channel, charge_selection)) ## NO CHARGE SELECTION NEEDED HERE key_hadd_stage2 = getKey('') if not key_hadd_stage2 in self.inputFiles_hadd_stage2: self.inputFiles_hadd_stage2[key_hadd_stage2] = [] for key_hadd_stage1_5 in self.outputFile_hadd_stage1_5.keys(): self.inputFiles_hadd_stage2[key_hadd_stage2].append( self.outputFile_hadd_stage1_5[key_hadd_stage1_5]) self.inputFiles_hadd_stage2[key_hadd_stage2].append( self.jobOptions_addBackgrounds_LeptonFakeRate[ key_addBackgrounds_job]['outputFile']) self.outputFile_hadd_stage2[key_hadd_stage2] = os.path.join( self.dirs[DKEY_HIST], "histograms_harvested_stage2.root") if self.prep_dcard: processesToCopy = [] signals = [] logging.info( "Creating configuration files to run 'prepareDatacards_LeptonFakeRate'" ) for process in self.prep_dcard_signals: signals.append(process) self.prep_dcard_signals = signals for process in self.prep_dcard_processesToCopy: processesToCopy.append(process) self.prep_dcard_processesToCopy = processesToCopy for histogramToFit in self.histograms_to_fit: key_prep_dcard_job = getKey(histogramToFit) self.jobOptions_prep_dcard[key_prep_dcard_job] = { 'inputFile': self.outputFile_hadd_stage2[key_hadd_stage2], 'cfgFile_modified': os.path.join( self.dirs[DKEY_CFGS], "prepareDatacards_LeptonFakeRate_%s_cfg.py" % (histogramToFit)), 'datacardFile': os.path.join(self.dirs[DKEY_DCRD], "prepareDatacards_%s.root" % (histogramToFit)), 'histogramDir': (self.histogramDir_prep_dcard), 'histogramToFit': histogramToFit, 'label': None } # self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job]) ## DEF LINE self.createCfg_prep_dcard_LeptonFakeRate( self.jobOptions_prep_dcard[key_prep_dcard_job]) if self.is_sbatch: logging.info( "Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.sbatchFile_analyze = os.path.join( self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) # self.createScript_sbatch() self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) self.sbatchFile_addBackgrounds_LeptonFakeRate = os.path.join( self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_LeptonFakeRate_%s.py" % self.channel) self.createScript_sbatch( self.executable_addBackgrounds_LeptonFakeRate, self.sbatchFile_addBackgrounds_LeptonFakeRate, self.jobOptions_addBackgrounds_LeptonFakeRate) # logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_comp_jetToTauFakeRate) # self.sbatchFile_comp_jetToTauFakeRate = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_comp_jetToTauFakeRate.py") # self.createScript_sbatch(self.executable_comp_jetToTauFakeRate, self.sbatchFile_comp_jetToTauFakeRate, self.jobOptions_comp_jetToTauFakeRate) #### FAKE RATE COMP BLOCK COMMENTED OUT ######################## # logging.info("Creating configuration files for executing 'comp_jetToTauFakeRate'") # for charge_selection in self.charge_selections: # key_comp_jetToTauFakeRate_job = getKey(charge_selection) # key_hadd_stage2 = getKey(charge_selection) # self.jobOptions_comp_jetToTauFakeRate[key_comp_jetToTauFakeRate_job] = { # 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2], # 'cfgFile_modified' : os.path.join( # self.dirs[DKEY_CFGS], "comp_jetToTauFakeRate_%s_cfg.py" % charge_selection), # 'outputFile' : os.path.join( # self.dirs[DKEY_HIST], "comp_jetToTauFakeRate_%s.root" % charge_selection), # 'looseRegion' : "jetToTauFakeRate_%s/denominator/" % charge_selection, # 'tightRegion' : "jetToTauFakeRate_%s/numerator/" % charge_selection, # 'absEtaBins' : self.absEtaBins, # 'ptBins' : self.ptBins # } # self.createCfg_comp_jetToTauFakeRate(self.jobOptions_comp_jetToTauFakeRate[key_comp_jetToTauFakeRate_job]) # self.targets.append(self.jobOptions_comp_jetToTauFakeRate[key_comp_jetToTauFakeRate_job]['outputFile']) # logging.info("Creating configuration files to run 'makePlots'") # for charge_selection in self.charge_selections: # key_makePlots_job = getKey(charge_selection) # key_hadd_stage2 = getKey(charge_selection) # self.jobOptions_make_plots[key_makePlots_job] = { # 'executable' : self.executable_make_plots, # 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2], # 'cfgFile_modified' : os.path.join( # self.dirs[DKEY_CFGS], "makePlots_%s_cfg.py" % self.channel), # 'outputFile' : os.path.join( # self.dirs[DKEY_PLOT], "makePlots_%s.png" % self.channel), # 'histogramDir' : "jetToTauFakeRate_%s" % charge_selection, # 'label' : None, # 'make_plots_backgrounds' : [ "TT", "TTW", "TTZ", "EWK", "Rares" ], # } # self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job]) # self.cfgFile_make_plots = self.cfgFile_make_plots_denominator # for absEtaBin in [ "absEtaLt1_5", "absEta1_5to9_9" ]: # key_makePlots_job = getKey(charge_selection, absEtaBin, "denominator") # key_hadd_stage2 = getKey(charge_selection) # self.jobOptions_make_plots[key_makePlots_job] = { # 'executable' : self.executable_make_plots, # 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2], # 'cfgFile_modified' : os.path.join( # self.dirs[DKEY_CFGS], "makePlots_%s_%s_denominator_%s_cfg.py" % (self.channel, charge_selection, absEtaBin)), # 'outputFile' : os.path.join( # self.dirs[DKEY_PLOT], "makePlots_%s_%s_denominator_%s.png" % (self.channel, charge_selection, absEtaBin)), # 'histogramDir' : "jetToTauFakeRate_%s/denominator/%s" % (charge_selection, absEtaBin), # 'label' : None, # 'make_plots_backgrounds' : [ "TT", "TTW", "TTZ", "EWK", "Rares" ], # } # self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job]) # for hadTau_selection in self.hadTau_selections: # key_makePlots_job = getKey(charge_selection, absEtaBin, "numerator", hadTau_selection) # key_hadd_stage2 = getKey(charge_selection) # self.jobOptions_make_plots[key_makePlots_job] = { # 'executable' : self.executable_make_plots, # 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2], # 'cfgFile_modified' : os.path.join( # self.dirs[DKEY_CFGS], "makePlots_%s_%s_numerator_%s_%s_cfg.py" % (self.channel, charge_selection, hadTau_selection, absEtaBin)), # 'outputFile' : os.path.join( # self.dirs[DKEY_PLOT], "makePlots_%s_%s_numerator_%s_%s.png" % (self.channel, charge_selection, hadTau_selection, absEtaBin)), # 'histogramDir' : "jetToTauFakeRate_%s/numerator/%s/%s" % (charge_selection, hadTau_selection, absEtaBin), # 'label' : None, # 'make_plots_backgrounds' : [ "TT", "TTW", "TTZ", "EWK", "Rares" ], # } # self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job]) ######################################################### lines_makefile = [] self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) # self.addToMakefile_hadd_stage1_5(lines_makefile) self.addToMakefile_backgrounds_from_data(lines_makefile) self.addToMakefile_hadd_stage2(lines_makefile) self.addToMakefile_prep_dcard(lines_makefile) # self.addToMakefile_comp_jetToTauFakeRate(lines_makefile) ## TO BE IMPLEMENTED LATER # self.addToMakefile_make_plots(lines_makefile) ## TO BE IMPLEMENTED LATER self.targets = [ outputFile for outputFile in self.outputFile_hadd_stage2.values() ] self.createMakefile(lines_makefile) logging.info("Done")
def create(self): """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system """ for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue process_name = sample_info["process_name_specific"] key_dir = getKey(process_name) for dir_type in [DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES]: initDict(self.dirs, [key_dir, dir_type]) if dir_type in [DKEY_CFGS, DKEY_LOGS]: self.dirs[key_dir][dir_type] = os.path.join( self.configDir, dir_type, self.channel, process_name) else: self.dirs[key_dir][dir_type] = os.path.join( self.outputDir, dir_type, self.channel, process_name) for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]: initDict(self.dirs, [dir_type]) if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]: self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel) else: self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel) ##print "self.dirs = ", self.dirs for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) else: create_if_not_exists(self.dirs[key]) inputFileLists = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue logging.info("Checking input files for sample %s" % sample_info["process_name_specific"]) inputFileLists[sample_name] = generateInputFileList( sample_name, sample_info, self.max_files_per_job, self.debug) for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue process_name = sample_info["process_name_specific"] logging.info( "Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name)) sample_category = sample_info["sample_category"] is_mc = (sample_info["type"] == "mc") inputFileList = inputFileLists[sample_name] for jobId in inputFileList.keys(): ##print "processing sample %s: jobId = %i" % (process_name, jobId) # build config files for executing analysis code key_dir = getKey(process_name) key_analyze_job = getKey(process_name, jobId) ntupleFiles = inputFileList[jobId] if len(ntupleFiles) == 0: print "Warning: ntupleFiles['%s'] = %s --> skipping job !!" % ( key_file, ntupleFiles) continue self.jobOptions_analyze[key_analyze_job] = { 'ntupleFiles' : ntupleFiles, 'cfgFile_modified' : os.path.join(self.dirs[key_dir][DKEY_CFGS], "analyze_%s_%s_%i_cfg.py" % \ (self.channel, process_name, jobId)), 'histogramFile' : os.path.join(self.dirs[key_dir][DKEY_HIST], "%s_%i.root" % \ (process_name, jobId)), 'logFile' : os.path.join(self.dirs[key_dir][DKEY_LOGS], "analyze_%s_%s_%i.log" % \ (self.channel, process_name, jobId)), 'sample_category' : sample_category, 'hadTau_selection' : self.hadTau_selection, 'use_HIP_mitigation_mediumMuonId' : True, 'is_mc' : is_mc, 'lumi_scale' : 1. if not (self.use_lumi and is_mc) else sample_info["xsection"] * self.lumi / sample_info["nof_events"], 'apply_genWeight' : sample_info["genWeight"] if (is_mc and "genWeight" in sample_info) else False, 'selectBDT' : True, 'changeBranchNames' : self.changeBranchNames } self.createCfg_analyze( self.jobOptions_analyze[key_analyze_job]) # initialize input and output file names for hadd_stage1 key_hadd_stage1 = getKey(process_name) if not key_hadd_stage1 in self.inputFiles_hadd_stage1: self.inputFiles_hadd_stage1[key_hadd_stage1] = [] self.inputFiles_hadd_stage1[key_hadd_stage1].append( self.jobOptions_analyze[key_analyze_job]['histogramFile']) self.outputFile_hadd_stage1[key_hadd_stage1] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage1_%s_%s.root" % \ (self.channel, process_name)) self.targets.append( self.outputFile_hadd_stage1[key_hadd_stage1]) if self.is_sbatch: logging.info( "Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.sbatchFile_analyze = os.path.join( self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done") return self.num_jobs
def create(self): """Creates all necessary config files and runs the PU profile production -- either locally or on the batch system """ for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) else: create_if_not_exists(self.dirs[key]) self.inputFileIds = {} for sample_name, sample_info in self.samples.items(): if not sample_info['use_it']: continue process_name = sample_info["process_name_specific"] is_mc = (sample_info["type"] == "mc") if not is_mc: continue logging.info( "Creating configuration files to run '%s' for sample %s" % (self.executable, process_name)) inputFileList = generateInputFileList(sample_info, self.max_files_per_job) key_dir = getKey(process_name) outputFile = os.path.join(self.dirs[key_dir][DKEY_HISTO], "%s.root" % process_name) self.outputFiles[process_name] = { 'inputFiles': [], 'outputFile': outputFile, } if os.path.isfile(outputFile) and tools_is_file_ok( outputFile, min_file_size=2000): logging.info('File {} already exists --> skipping job'.format( outputFile)) continue for jobId in inputFileList.keys(): key_file = getKey(sample_name, jobId) self.inputFiles[key_file] = inputFileList[jobId] if len(self.inputFiles[key_file]) == 0: logging.warning("'%s' = %s --> skipping job !!" % (key_file, self.inputFiles[key_file])) continue self.cfgFiles_projection[key_file] = os.path.join( self.dirs[key_dir][DKEY_CFGS], "project_%s_%i_cfg.txt" % (process_name, jobId)) self.outputFiles_tmp[key_file] = os.path.join( self.dirs[key_dir][DKEY_HISTO_TMP], "histogram_%i.root" % jobId) self.logFiles_projection[key_file] = os.path.join( self.dirs[key_dir][DKEY_LOGS], "project_%s_%i.log" % (process_name, jobId)) self.scriptFiles_projection[key_file] = os.path.join( self.dirs[key_dir][DKEY_CFGS], "project_%s_%i_cfg.sh" % (process_name, jobId)) projection_module = self.projection_module if projection_module == "count": projection_module = "countHistogramAll" if sample_name.startswith('/TTTo'): projection_module += "CompTopRwgt" elif sample_info['sample_category'].startswith('ttH'): projection_module += "CompHTXS" elif isSplitByNlheJet(process_name): projection_module += "SplitByLHENjet" elif isSplitByNlheHT(process_name): projection_module += "SplitByLHEHT" elif isSplitByNlheJetHT(process_name, sample_name): projection_module += "SplitByLHENjetHT" self.jobOptions_sbatch[key_file] = { 'histName': process_name, 'inputFiles': self.inputFiles[key_file], 'cfgFile_path': self.cfgFiles_projection[key_file], 'outputFile': self.outputFiles_tmp[key_file], 'logFile': self.logFiles_projection[key_file], 'scriptFile': self.scriptFiles_projection[key_file], 'projection_module': projection_module, } if self.projection_module != 'puHist': self.jobOptions_sbatch[key_file][ 'ref_genWeight'] = self.ref_genWeights[process_name] if process_name not in self.ref_genWeights: raise RuntimeError( "Unable to find reference LHE weight for process %s" % process_name) self.createCfg_project(self.jobOptions_sbatch[key_file]) self.outputFiles[process_name]['inputFiles'].append( self.outputFiles_tmp[key_file]) if self.is_sbatch: logging.info( "Creating script for submitting '%s' jobs to batch system" % self.executable) self.num_jobs['project'] += self.createScript_sbatch( self.executable, self.sbatchFile_projection, self.jobOptions_sbatch) logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_project(lines_makefile) self.addToMakefile_hadd(lines_makefile) if self.plot: self.addToMakefile_plot(lines_makefile) self.addToMakefile_finalHadd(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done") return self.num_jobs
def create(self): """Creates all necessary config files and runs the Ntuple production -- either locally or on the batch system """ for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) else: create_if_not_exists(self.dirs[key]) self.inputFileIds = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue process_name = sample_info["process_name_specific"] is_mc = (sample_info["type"] == "mc") rle_filename = '' if self.rle_directory: rle_filename = os.path.join( self.rle_directory, "{base_name}.txt".format(base_name=process_name)) if not os.path.isfile(rle_filename): logging.error( "No such RLE file: {rle_filename}; setting it to ''". format(rle_filename=rle_filename)) rle_filename = '' else: logging.info("Using RLE file {rle_filename}".format( rle_filename=rle_filename)) logging.info( "Creating configuration files to run '%s' for sample %s" % (self.executable_preselNtuple, process_name)) inputFileList = generateInputFileList(sample_name, sample_info, self.max_files_per_job, self.debug) for jobId in inputFileList.keys(): key_dir = getKey(sample_name) key_file = getKey(sample_name, jobId) self.inputFiles[key_file] = inputFileList[jobId] if len(self.inputFiles[key_file]) == 0: print "Warning: ntupleFiles['%s'] = %s --> skipping job !!" % ( key_file, self.inputFiles[key_file]) continue ##print "sample = '%s', jobId = %i: number of input files = %i" % (sample_name, jobId, len(self.inputFiles[key_file])) ##print self.inputFiles[key_file] assert (len(self.inputFiles[key_file]) == 1 ), "There is more than one input file!" self.cfgFiles_preselNtuple_modified[key_file] = os.path.join(self.dirs[key_dir][DKEY_CFGS], "preselNtuple_%s_%s_%i_cfg.py" % \ (self.channel, process_name, jobId)) self.outputFiles[key_file] = os.path.join(self.dirs[key_dir][DKEY_NTUPLES], "%s_%i.root" % \ (process_name, jobId)) self.logFiles_preselNtuple[key_file] = os.path.join(self.dirs[key_dir][DKEY_LOGS], "preselNtuple_%s_%s_%i.log" % \ (self.channel, process_name, jobId)) self.createCfg_preselNtuple( self.inputFiles[key_file], self.outputFiles[key_file], self.era, is_mc, self.cfgFiles_preselNtuple_modified[key_file], rle_filename) if self.is_sbatch: logging.info( "Creating script for submitting '%s' jobs to batch system" % self.executable_preselNtuple) self.createScript_sbatch() logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_preselNtuple(lines_makefile) #self.addToMakefile_clean(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done")