def GetSingleMuHistograms(self): #this_analyses = ["trigmu_highmass_CSVTM", "trigmu_lowmass_CSVTM", "trigmubbh_highmass_CSVTM", "trigmubbl_lowmass_CSVTM", "trigmubbll_lowmass_CSVTM", "trigmu24i_lowmass_CSVTM", "trigmu24ibbl_lowmass_CSVTM", "trigmu24ibbll_lowmass_CSVTM", "trigmu40_lowmass_CSVTM", "trigmu40bbl_lowmass_CSVTM", "trigmu40bbll_lowmass_CSVTM"] this_analyses = ["trigmu24i_lowmass_CSVTM", "trigmu24ibbl_lowmass_CSVTM"] this_analyses.extend(["trigmu24i_highmass_CSVTM", "trigmu24ibbh_highmass_CSVTM"]) for analysis in this_analyses: print "Opening " + analysis_config.get_b_histogram_filename(analysis, "SingleMu_2012") f = ROOT.TFile(analysis_config.get_b_histogram_filename(analysis, "SingleMu_2012"), "READ") self._mjj_histograms_fine[analysis] = f.Get("BHistograms/h_pfjet_mjj") self._mjj_histograms_fine[analysis].SetName("h_" + analysis + "_mjj_fine") self._mjj_histograms_fine[analysis].SetDirectory(0) self._mjj_histograms[analysis] = histogram_tools.rebin_histogram(self._mjj_histograms_fine[analysis], self._mass_bins, normalization_bin_width=1) self._mjj_histograms[analysis].SetName("h_" + analysis + "_mjj") self._mjj_histograms[analysis].SetDirectory(0) if not "highmass" in analysis: self._mjj_histograms_csvorder[analysis] = f.Get("BHistograms/h_pfjet_mjj_csvorder") self._mjj_histograms_csvorder[analysis].SetName("h_" + analysis + "_mjj_csvorder") self._mjj_histograms_csvorder[analysis].SetDirectory(0) self._mjj_histograms_csvorder[analysis] = histogram_tools.rebin_histogram(self._mjj_histograms_csvorder[analysis], self._mass_bins, normalization_bin_width=1) self._mjj_histograms_vetothirdjet[analysis] = f.Get("BHistograms/h_pfjet_mjj_vetothirdjet") self._mjj_histograms_vetothirdjet[analysis].SetName("h_" + analysis + "_mjj_vetothirdjet") self._mjj_histograms_vetothirdjet[analysis].SetDirectory(0) self._mjj_histograms_vetothirdjet[analysis] = histogram_tools.rebin_histogram(self._mjj_histograms_vetothirdjet[analysis], self._mass_bins, normalization_bin_width=1) if "bbll" in analysis: self._mjj_histograms_fine[analysis].Scale(1.7) # Prescale for singlemu + 60/53. The prescale was not computer for these analyses. self._mjj_histograms[analysis].Scale(1.7) # Prescale for singlemu + 60/53. The prescale was not computer for these analyses. self._mjj_histograms_fine[analysis].Rebin(5) self._analyses.append(analysis) f.Close()
def RunBHistogramsBackground(analysis, sample, files_per_job=1, retar=False, data_source=None): # Create working directory and cd start_directory = os.getcwd() working_directory = dijet_directory + "/data/EightTeeEeVeeBee/BHistograms/condor/submit_" + analysis + "_" + sample os.system("mkdir -pv " + working_directory) os.chdir(working_directory) command = "condor_cmsRun" if retar: command += " --retar " #input_txt = open("tmp.txt", 'w') #input_txt.write(analysis_config.files_QCDBEventTree[sample] + "\n") #input_txt.close() command += " --file-list=" + analysis_config.files_QCDBEventTree[sample] + " " command += " --files-per-job=" + str(files_per_job) command += " --submit-file=submit_" + analysis + "_" + sample + ".jdl " #command += " --output-file=" + output_prefix + "_" + sample + ".root " command += " --output-tag=BHistograms_" + sample + " " command += " --run " command += " " + dijet_directory + "/CMSSW_5_3_32_patch3/src/MyTools/RootUtils/scripts/cmsRun_wrapper.sh " + analysis_config.analysis_cfgs[analysis] command += " dataSource=simulation " command += " dataType=background " #command += "inputFiles=" + os.path.basename(input_files[sample]) output_filename = os.path.basename(analysis_config.get_b_histogram_filename(analysis, sample)).replace(".root", "_\$\(Cluster\)_\$\(Process\).root") command += " outputFile=" + os.path.basename(analysis_config.get_b_histogram_filename(analysis, sample)).replace(".root", "_\$\(Cluster\)_\$\(Process\).root") print command os.system(command) os.system("rm -f tmp.txt") postprocessing_file = open('postprocessing.sh', 'w') postprocessing_file.write("#!/bin/bash\n") postprocessing_file.write("hadd " + working_directory + "/" + os.path.basename(analysis_config.get_b_histogram_filename(analysis, sample)) + " " + output_filename.replace("_\$\(Cluster\)_\$\(Process\)", "*") + "\n") postprocessing_file.close() # cd back os.chdir(start_directory)
def __init__(self, numerator_analysis, denominator_analysis, samples): print "[OfflineBTagPlots::__init__] Initializing with sr = " + sr + ", samples = ", print samples self._samples = samples self._numerator_analysis = numerator_analysis self._denominator_analysis = denominator_analysis self._input_files = {"numerator":{}, "denominator":{}} for sample in self._samples: # For trigjetht, have to cobble together the frankenhist later. if not (numerator_analysis == "trigjetht_eta1p7_CSVTM" or numerator_analysis == "trigjetht_eta2p2_CSVTM"): print "Numerator input file = " + analysis_config.get_b_histogram_filename(self._numerator_analysis, sample) self._input_files["numerator"][sample] = TFile(analysis_config.get_b_histogram_filename(self._numerator_analysis, sample)) if not (denominator_analysis == "trigjetht_eta1p7" or denominator_analysis == "trigjetht_eta2p2"): print "Denominator input file = " + analysis_config.get_b_histogram_filename(self._denominator_analysis, sample) self._input_files["denominator"][sample] = TFile(analysis_config.get_b_histogram_filename(self._denominator_analysis, sample))
def RunBHistogramsBackground(analysis, sample, files_per_job=1, retar=False, data_source=None): # Create working directory and cd start_directory = os.getcwd() working_directory = dijet_directory + "/data/EightTeeEeVeeBee/BHistograms/condor/submit_" + analysis + "_" + sample os.system("mkdir -pv " + working_directory) os.chdir(working_directory) command = "condor_cmsRun" if retar: command += " --retar " #input_txt = open("tmp.txt", 'w') #input_txt.write(analysis_config.files_QCDBEventTree[sample] + "\n") #input_txt.close() command += " --file-list=" + analysis_config.files_QCDBEventTree[ sample] + " " command += " --files-per-job=" + str(files_per_job) command += " --submit-file=submit_" + analysis + "_" + sample + ".jdl " #command += " --output-file=" + output_prefix + "_" + sample + ".root " command += " --output-tag=BHistograms_" + sample + " " command += " --run " command += " " + dijet_directory + "/CMSSW_5_3_32_patch3/src/MyTools/RootUtils/scripts/cmsRun_wrapper.sh " + analysis_config.analysis_cfgs[ analysis] command += " dataSource=simulation " command += " dataType=background " #command += "inputFiles=" + os.path.basename(input_files[sample]) output_filename = os.path.basename( analysis_config.get_b_histogram_filename(analysis, sample)).replace( ".root", "_\$\(Cluster\)_\$\(Process\).root") command += " outputFile=" + os.path.basename( analysis_config.get_b_histogram_filename(analysis, sample)).replace( ".root", "_\$\(Cluster\)_\$\(Process\).root") print command os.system(command) os.system("rm -f tmp.txt") postprocessing_file = open('postprocessing.sh', 'w') postprocessing_file.write("#!/bin/bash\n") postprocessing_file.write( "hadd " + working_directory + "/" + os.path.basename( analysis_config.get_b_histogram_filename(analysis, sample)) + " " + output_filename.replace("_\$\(Cluster\)_\$\(Process\)", "*") + "\n") postprocessing_file.close() # cd back os.chdir(start_directory)
def signal_acc_eff(): for analysis in ["trigbbh_CSVTM", "trigbbl_CSVTM"]: if "bbl" in analysis: mjj_range = [296, 1246] elif "bbh" in analysis: mjj_range = [526, 1455] for model in ["Hbb", "RSG", "ZPrime"]: for mass in [350, 400, 500, 600, 750, 900, 1200]: if analysis == "trigbbh_CSVTM" and mass == 350: continue f = TFile( analysis_config.get_b_histogram_filename( analysis, analysis_config.simulation.get_signal_tag( model, mass, "FULLSIM"), ), "READ") nevents = (f.Get("BHistograms/h_sample_nevents")).Integral() h_mjj = f.Get("BHistograms/h_pfjet_mjj") low_bin = h_mjj.GetXaxis().FindBin(mjj_range[0] + 1.e-5) high_bin = h_mjj.GetXaxis().FindBin(mjj_range[1] - 1.e-5) nsignal = h_mjj.Integral(low_bin, high_bin) if nevents > 0: acceff = 1. * nsignal / nevents else: acceff = 0. print analysis + "\t&\t" + model + "\t&\t" + str( mass) + "\t&\t" + str(nsignal) + "\t&\t" + str( nevents) + "\t&\t" + str(acceff) + "\t\\\\\n"
def GetBJetPlusXHistograms(self): this_analyses = ["trigbbh_CSVTM", "trigbbl_CSVTM", "trigbbll_CSVTM", "trigbbh_trigbbl_CSVTM"] this_samples = ["BJetPlusX_2012", "BJetPlusX_2012BCD"] for analysis in this_analyses: for sample in this_samples: name = analysis + "_" + sample f = ROOT.TFile(analysis_config.get_b_histogram_filename(analysis, sample), "READ") self._mjj_histograms_fine[name] = f.Get("BHistograms/h_pfjet_mjj") self._mjj_histograms_fine[name].SetName("h_" + name + "_mjj_fine") self._mjj_histograms_fine[name].SetDirectory(0) self._mjj_histograms[name] = histogram_tools.rebin_histogram(self._mjj_histograms_fine[name], self._mass_bins, normalization_bin_width=1) self._mjj_histograms[name].SetName("h_" + name + "_mjj") self._mjj_histograms[name].SetDirectory(0) self._mjj_histograms_csvorder[name] = f.Get("BHistograms/h_pfjet_mjj_csvorder") self._mjj_histograms_csvorder[name].SetName("h_" + name + "_mjj_csvorder") self._mjj_histograms_csvorder[name].SetDirectory(0) self._mjj_histograms_csvorder[name] = histogram_tools.rebin_histogram(self._mjj_histograms_csvorder[name], self._mass_bins, normalization_bin_width=1) self._mjj_histograms_vetothirdjet[name] = f.Get("BHistograms/h_pfjet_mjj_vetothirdjet") self._mjj_histograms_vetothirdjet[name].SetName("h_" + name + "_mjj_vetothirdjet") self._mjj_histograms_vetothirdjet[name].SetDirectory(0) self._mjj_histograms_vetothirdjet[name] = histogram_tools.rebin_histogram(self._mjj_histograms_vetothirdjet[name], self._mass_bins, normalization_bin_width=1) self._mjj_histograms_fine[name].Rebin(5) self._analyses.append(name) f.Close() if "bbll" in analysis: self._mjj_histograms[name].Scale(1.7) self._mjj_histograms_fine[name].Scale(1.7)
def GetJetHTHistogram(self): for sr_name in ["highmass", "lowmass"]: analyses = {} HT_slices = [] for mass in xrange(200, 600, 50): HT_slices.append("HT" + str(mass)) analyses["HT" + str(mass)] = "trigjetht" + str(mass) if sr_name == "lowmass": analyses["HT" + str(mass)] += "_eta1p7" analyses["HT" + str(mass)] += "_CSVTM" sample = "JetHT_2012BCD" HT_slice_histograms = {} for HT_slice in HT_slices: f = TFile(analysis_config.get_b_histogram_filename(analyses[HT_slice], sample), "READ") HT_slice_histograms[HT_slice] = f.Get("BHistograms/h_pfjet_mjj") print "On file " + analysis_config.get_b_histogram_filename(analyses[HT_slice], sample) HT_slice_histograms[HT_slice].SetName(HT_slice_histograms[HT_slice].GetName() + "_" + analyses[HT_slice]) HT_slice_histograms[HT_slice].SetDirectory(0) f.Close() HT_slices.append("HTUnprescaled") unprescaled_analysis_name = "trigjetht" if sr_name == "lowmass": unprescaled_analysis_name += "_eta1p7" unprescaled_analysis_name += "_CSVTM" analyses["HTUnprescaled"] = unprescaled_analysis_name f_unprescaled = TFile(analysis_config.get_b_histogram_filename(unprescaled_analysis_name, sample), "READ") HT_slice_histograms["HTUnprescaled"] = f_unprescaled.Get("BHistograms/h_pfjet_mjj") HT_slice_histograms["HTUnprescaled"].SetName(HT_slice_histograms["HTUnprescaled"].GetName() + "_" + analyses["HTUnprescaled"]) HT_slice_histograms["HTUnprescaled"].SetDirectory(0) f_unprescaled.Close() ranges = { "HT200":[220, 386], "HT250":[386, 489], "HT300":[489, 526], "HT350":[526, 606], "HT400":[606, 649], "HT450":[649, 740], "HT500":[740, 788], "HT550":[788, 890], #"HT650":[800, 890], "HTUnprescaled":[890, 2000] } self._analyses.append("JetHT") self._mjj_histograms_fine["JetHT"] = self.FrankenHist(HT_slices, HT_slice_histograms, ranges) self._mjj_histograms["JetHT"] = histogram_tools.rebin_histogram(self._mjj_histograms_fine["JetHT"], self._mass_bins, normalization_bin_width=1) self._mjj_histograms_fine["JetHT"].Rebin(5)
def __init__(self, numerator_analysis, denominator_analysis, samples): print "[OfflineBTagPlots::__init__] Initializing with sr = " + sr + ", samples = ", print samples self._samples = samples self._numerator_analysis = numerator_analysis self._denominator_analysis = denominator_analysis self._input_files = {"numerator": {}, "denominator": {}} for sample in self._samples: # For trigjetht, have to cobble together the frankenhist later. if not (numerator_analysis == "trigjetht_eta1p7_CSVTM" or numerator_analysis == "trigjetht_eta2p2_CSVTM"): print "Numerator input file = " + analysis_config.get_b_histogram_filename( self._numerator_analysis, sample) self._input_files["numerator"][sample] = TFile( analysis_config.get_b_histogram_filename( self._numerator_analysis, sample)) if not (denominator_analysis == "trigjetht_eta1p7" or denominator_analysis == "trigjetht_eta2p2"): print "Denominator input file = " + analysis_config.get_b_histogram_filename( self._denominator_analysis, sample) self._input_files["denominator"][sample] = TFile( analysis_config.get_b_histogram_filename( self._denominator_analysis, sample))
"QCD_Pt-800to1000_TuneZ2star_8TeV_pythia6", "QCD_Pt-1000to1400_TuneZ2star_8TeV_pythia6", "QCD_Pt-1400to1800_TuneZ2star_8TeV_pythia6", "QCD_Pt-1800_TuneZ2star_8TeV_pythia6" ] analyses = [ "NoTrigger_eta2p2", "NoTrigger_eta2p2_CSVTM", "NoTrigger_eta1p7", "NoTrigger_eta1p7_CSVTM", "trigbbl_CSVTM", "trigbbh_CSVTM" ] lumi = 19710. for analysis in analyses: first = True output_file = TFile( analysis_config.get_b_histogram_filename( analysis, "QCD_TuneZ2star_8TeV_pythia6"), "RECREATE") output_directory = output_file.mkdir("BHistograms") histograms = {} for sample in qcd_samples: input_file = TFile( analysis_config.get_b_histogram_filename(analysis, sample), "READ") input_directory = input_file.Get("BHistograms") input_directory.cd() xsec = analysis_config.simulation.background_cross_sections[sample] normalization = xsec * lumi / input_file.Get( "BHistograms/h_sample_nevents").Integral() for key in gDirectory.GetListOfKeys(): key.Print() if "TH1" in key.GetClassName() or "TH2" in key.GetClassName(): hist = key.ReadObj() hist.Scale(normalization)
args = parser.parse_args() print args if args.ht: analyses = {} names = [] for mass in xrange(200, 700, 50): if mass == 600: continue names.append("HT" + str(mass)) analyses["HT" + str(mass)] = "trigjetht" + str(mass) sample = "JetHT_2012BCD" histograms = {} for name in names: f = TFile( analysis_config.get_b_histogram_filename( analyses[name], sample), "READ") #histograms[name] = mjj_common.apply_dijet_binning_normalized(f.Get("BHistograms/h_pfjet_mjj")) print "[debug] For name " + name + ", input events = " + str( f.Get("BHistograms/h_input_nevents").GetEntries()) print "[debug] \tPrescale = " + str( f.Get("BHistograms/h_pass_nevents_weighted").GetBinContent(1) / f.Get("BHistograms/h_pass_nevents").GetBinContent(1)) histograms[name] = f.Get("BHistograms/h_pfjet_mjj").Rebin(20) histograms[name].SetName(histograms[name].GetName() + "_" + name) histograms[name].SetDirectory(0) f.Close() ht_threshold_plot(names, histograms, save_tag="jetht_thresholds", x_range=[0., 1200.], logy=True)
style_counter = 0 for name in names: histograms[name].SetLineWidth(2) histograms[name].SetLineColor(seaborn.GetColorRoot("dark", style_counter)) histograms[name].Draw("hist same") #histograms[name].Draw("hist same") l.AddEntry(histograms[name], name, "pl") style_counter += 1 l.Draw() c.SaveAs(analysis_config.figure_directory + "/" + c.GetName() + ".pdf") if __name__ == "__main__": for model in ["Hbb", "RSG"]: for mass_point in [600, 750, 900, 1200]: f1 = TFile(analysis_config.get_b_histogram_filename("trigbbh_CSVTM", analysis_config.simulation.get_signal_tag(model, mass_point, "FULLSIM")), "READ") f2 = TFile(analysis_config.get_b_histogram_filename("trigbbh_CSVTM_bfat", analysis_config.simulation.get_signal_tag(model, mass_point, "FULLSIM"))) histograms = {} histograms["ak5"] = f1.Get("BHistograms/h_pfjet_mjj") histograms["ak5"].SetName("h_ak5_" + model + "_" + str(mass_point)) histograms["ak5"].SetDirectory(0) histograms["Fat, #DeltaR=1.1, p_{T}=30 GeV"] = f1.Get("BHistograms/h_fatjet_mjj") histograms["Fat, #DeltaR=1.1, p_{T}=30 GeV"].SetName("h_fat1p1_" + model + "_" + str(mass_point)) histograms["Fat, #DeltaR=1.1, p_{T}=30 GeV"].SetDirectory(0) histograms["Fat, #DeltaR=0.8, p_{T}=15 GeV"] = f2.Get("BHistograms/h_fatjet_mjj") histograms["Fat, #DeltaR=0.8, p_{T}=15 GeV"].SetName("h_fat0p8_" + model + "_" + str(mass_point)) histograms["Fat, #DeltaR=0.8, p_{T}=15 GeV"].SetDirectory(0)
data_samples = args.data_samples.split(",") signal_samples = [] if args.signal_samples == "all": for signal_model in ["Hbb", "RSG"]: for mass in [600, 750, 900, 1200]: signal_samples.append(analysis_config.simulation.get_signal_tag(signal_model, mass, "FULLSIM")) elif args.signal_samples: signal_samples = args.signal_samples.split(",") f_data = {} f_signal = {} for analysis in analyses: f_data[analysis] = {} for data_sample in data_samples: f_data[analysis][data_sample] = TFile(analysis_config.get_b_histogram_filename(analysis, data_sample), "READ") f_signal[analysis] = {} for signal_sample in signal_samples: f_signal[analysis][signal_sample] = TFile(analysis_config.get_b_histogram_filename(analysis, signal_sample), "READ") if args.mjj: for analysis in analyses: for data_sample in data_samples: save_file = TFile("/uscms/home/dryu/Dijets/data/EightTeeEeVeeBee/Results/mjj_fits_" + analysis + "_" + data_sample + ".root", "RECREATE") if "trigbbl" in analysis: fit_minima = {"pfjet":419.1} elif "trigbbh" in analysis: fit_minima = {"pfjet":526.1} for jet_type in ["pfjet"]: data_hist = f_data[analysis][data_sample].Get("BHistograms/h_" + jet_type + "_mjj")
import CMSDIJET.QCDAnalysis.mjj_fits from CMSDIJET.QCDAnalysis.mjj_fits import * import CMSDIJET.QCDAnalysis.analysis_configuration_8TeV as analysis_config import CMSDIJET.QCDAnalysis.mjj_common as mjj_common from CMSDIJET.QCDAnalysis.plots import AnalysisComparisonPlot def f8(seq): # Dave Kirby # Order preserving seen = set() return [x for x in seq if x not in seen and not seen.add(x)] if __name__ == "__main__": for analysis in ["lowmass", "highmass"]: if analysis == "highmass": f_bjetplusx = TFile(analysis_config.get_b_histogram_filename("trigbbh_CSVTM", "BJetPlusX_2012"), "READ") f_singlemu = TFile(analysis_config.get_b_histogram_filename("mu_highmass_CSVTM", "SingleMu_2012"), "READ") else: f_bjetplusx = TFile(analysis_config.get_b_histogram_filename("trigbbl_CSVTM", "BJetPlusX_2012BCD"), "READ") f_singlemu = TFile(analysis_config.get_b_histogram_filename("mu_lowmass_CSVTM", "SingleMu_2012"), "READ") print "[debug] For BJetsPlusX_2012, input events = " + str(f_bjetplusx.Get("BHistograms/h_input_nevents").GetEntries()) print "[debug] For SingleMu_2012, input events = " + str(f_singlemu.Get("BHistograms/h_input_nevents").GetEntries()) bjetplusx_histogram = f_bjetplusx.Get("BHistograms/h_pfjet_mjj").Rebin(25) bjetplusx_histogram.SetDirectory(0) f_bjetplusx.Close() singlemu_histogram = f_singlemu.Get("BHistograms/h_pfjet_mjj").Rebin(25) singlemu_histogram.SetDirectory(0) f_singlemu.Close() # Normalize the histograms above 450 GeV norm_low_bin = bjetplusx_histogram.GetXaxis().FindBin(450)
def RunBHistogramsSignal(analysis, sample, files_per_job=1, retar=False, data_source=None): # Create working directory and cd start_directory = os.getcwd() working_directory = dijet_directory + "/data/EightTeeEeVeeBee/BHistograms/condor/submit_" + analysis + "_" + sample os.system("mkdir -pv " + working_directory) os.chdir(working_directory) method = "csub" if method == "csub": input_files_txt = open(analysis_config.files_QCDBEventTree[sample], 'r') input_files = [line.strip() for line in input_files_txt] input_files_txt.close() bash_script_path = working_directory + "/run_" + analysis + "_" + sample + ".sh" bash_script = open(bash_script_path, 'w') bash_script.write("#!/bin/bash\n") bash_script.write("input_files=( " + " ".join([os.path.basename(x) for x in input_files]) + " )\n") output_filename = os.path.basename(analysis_config.get_b_histogram_filename(analysis, sample)).replace(".root", "_$1.root") bash_script.write("cmsRun " + os.path.basename(analysis_config.analysis_cfgs[analysis]) + " dataSource=simulation" + " dataType=signal" + " signalMass=" + str(analysis_config.simulation.signal_sample_masses[sample]) + " outputFile=" + output_filename + " inputFiles=file:${input_files[$1]}\n" ) bash_script.close() submit_command = "csub " + bash_script_path + " --cmssw " if not retar: submit_command += " --no_retar" submit_command += " -F " + ",".join(input_files) + "," + analysis_config.analysis_cfgs[analysis] + " -n " + str(len(input_files)) os.system(submit_command) else: command = "condor_cmsRun" if retar: command += " --retar " #input_txt = open("tmp.txt", 'w') #input_txt.write(analysis_config.files_QCDBEventTree[sample] + "\n") #input_txt.close() command += " --file-list=" + analysis_config.files_QCDBEventTree[sample] + " " command += " --files-per-job=" + str(files_per_job) command += " --submit-file=submit_" + analysis + "_" + sample + ".jdl " #command += " --output-file=" + output_prefix + "_" + sample + ".root " command += " --output-tag=BHistograms_" + sample + " " command += " --run " command += " " + dijet_directory + "/CMSSW_5_3_32_patch3/src/MyTools/RootUtils/scripts/cmsRun_wrapper.sh " + analysis_config.analysis_cfgs[analysis] command += " dataSource=simulation " command += " dataType=signal " command += " signalMass=" + str(analysis_config.simulation.signal_sample_masses[sample]) + " " #command += "inputFiles=" + os.path.basename(input_files[sample]) if "ZPrime" in sample: command += " bottomOnly=true " output_filename = os.path.basename(analysis_config.get_b_histogram_filename(analysis, sample)).replace(".root", "_\$\(Cluster\)_\$\(Process\).root") command += " outputFile=" + os.path.basename(analysis_config.get_b_histogram_filename(analysis, sample)).replace(".root", "_\$\(Cluster\)_\$\(Process\).root") print command os.system(command) os.system("rm -f tmp.txt") postprocessing_file = open('postprocessing.sh', 'w') postprocessing_file.write("#!/bin/bash\n") postprocessing_file.write("hadd " + working_directory + "/" + os.path.basename(analysis_config.get_b_histogram_filename(analysis, sample)) + " " + output_filename.replace("_\$\(Cluster\)_\$\(Process\)", "*").replace("$1", "*") + "\n") postprocessing_file.close() # cd back os.chdir(start_directory)
print "\t\\label{table:X}\n", print "\\end{tabular}\n", print "\\end{table}\n", if __name__ == "__main__": import argparse parser = argparse.ArgumentParser(description = 'Make tables from cutflow histograms') #parser.add_argument('analysis', type=str, help='Analysis name') #parser.add_argument('sample', type=str, help='Sample name') args = parser.parse_args() analyses = ["trigbbl_CSVTM", "trigbbh_CSVTM"] samples = ["BJetPlusX_2012"] for model in ["Hbb", "RSG"]: for mass in [400, 500, 600, 750, 900, 1200]: samples.append(analysis_config.simulation.get_signal_tag(model, mass, "FULLSIM")) for analysis in analyses: headers = [] cutflow_histograms = {} for sample in samples: headers.append(sample) f = TFile(analysis_config.get_b_histogram_filename(analysis, sample), "READ") cutflow_histograms[sample] = f.Get("BHistograms/CutFlowCounter_QCDEventSelector").Clone() cutflow_histograms[sample].SetDirectory(0) f.Close() histogram_to_table(headers, cutflow_histograms, normalize=False, txt_file=analysis_config.figure_directory + "/cutflow_" + analysis + ".tex") histogram_to_table(headers, cutflow_histograms, normalize=True, txt_file=analysis_config.figure_directory + "/cuteff_" + analysis + ".tex")
seaborn.GetColorRoot("dark", style_counter)) histograms[name].Draw("hist same") #histograms[name].Draw("hist same") l.AddEntry(histograms[name], name, "pl") style_counter += 1 l.Draw() c.SaveAs(analysis_config.figure_directory + "/" + c.GetName() + ".pdf") if __name__ == "__main__": for model in ["Hbb", "RSG"]: for mass_point in [600, 750, 900, 1200]: f1 = TFile( analysis_config.get_b_histogram_filename( "trigbbh_CSVTM", analysis_config.simulation.get_signal_tag( model, mass_point, "FULLSIM")), "READ") f2 = TFile( analysis_config.get_b_histogram_filename( "trigbbh_CSVTM_bfat", analysis_config.simulation.get_signal_tag( model, mass_point, "FULLSIM"))) histograms = {} histograms["ak5"] = f1.Get("BHistograms/h_pfjet_mjj") histograms["ak5"].SetName("h_ak5_" + model + "_" + str(mass_point)) histograms["ak5"].SetDirectory(0) histograms["Fat, #DeltaR=1.1, p_{T}=30 GeV"] = f1.Get( "BHistograms/h_fatjet_mjj") histograms["Fat, #DeltaR=1.1, p_{T}=30 GeV"].SetName(
"trigbbl_CSVTM": [400, 500, 600, 750], "trigbbh_CSVTM": [600, 750, 900, 1200] } if args.mjj: for analysis in analyses: names = [] histograms = {} colors = {} styles = {} for model in models: color_counter = 0 for mass in masses[analysis]: f = TFile( analysis_config.get_b_histogram_filename( analysis, analysis_config.simulation.get_signal_tag( model, mass, "FULLSIM"))) if model == "Hbb": name = "H, m=" + str(mass) + " GeV" elif model == "RSG": name = "G, m=" + str(mass) + " GeV" names.append(name) histograms[name] = f.Get("BHistograms/h_pfjet_mjj") histograms[name].SetDirectory(0) histograms[name].Rebin(25) histograms[name].Scale(1. / histograms[name].Integral()) f.Close() if model == "Hbb": styles[name] = 2 elif model == "RSG": styles[name] = 3
plotter = OfflineBTagPlots(numerator_analysis, denominator_analysis, ["SingleMu_2012"]) plotter.EfficiencyPlot(var="mjj", logy=True, binning=dijet_binning, save_tag="_" + wp + "_" + sr + "_onbtag_singlemu", ratio_range=[0.,0.1], x_range=[0., 2000.], legend_position="topright", numerator_legend="With online b-tag", denominator_legend="Without online b-tag", ratio_title="Online b-tag efficiency (no offline CSV)") if args.do_jetht: for sr in ["lowmass", "highmass"]: if sr == "lowmass": numerator_analysis = "trigjetht_eta1p7_CSVTM" denominator_analysis = "trigjetht_eta1p7" else: numerator_analysis = "trigjetht_eta2p2_CSVTM" denominator_analysis = "trigjetht_eta2p2" plotter = OfflineBTagPlots(numerator_analysis, denominator_analysis, ["JetHT_2012BCD"]) plotter.FrankenEfficiencyPlot(logy=True, binning=dijet_binning, save_tag="_JetHT_CSVTM_" + sr, ratio_range=[0.,0.01], x_range=[0., 2000.], legend_position="topright", numerator_legend="With offline b-tag", denominator_legend="Without offline b-tag", ratio_title="Offline b-tag efficiency)") #for ht_slice in [200, 250, 300, 350, 400, 450, 500, 550, 650]: # if sr == "lowmass": # numerator_analysis = "trigjetht" + str(ht_slice) + "_eta1p7_CSVTM" # denominator_analysis = "trigjetht" + str(ht_slice) + "_eta1p7" # else: # numerator_analysis = "trigjetht" + str(ht_slice) + "_CSVTM" # denominator_analysis = "trigjetht" + str(ht_slice) # plotter = OfflineBTagPlots(numerator_analysis, denominator_analysis, ["JetHT_2012BCD"]) # plotter.EfficiencyPlot(var="mjj", logy=True, binning=dijet_binning, save_tag="_CSVTM_" + sr + "_jetht" + str(ht_slice), ratio_range=[0.,0.01], x_range=[0., 2000.], legend_position="topright", numerator_legend="With offline b-tag", denominator_legend="Without offline b-tag", ratio_title="Offline b-tag efficiency", prescaled=True) if args.do_singlejet: for analysis in ["trigbbl_CSVTM", "trigbbh_CSVTM"]: f = TFile(analysis_config.get_b_histogram_filename(analysis, "BJetPlusX_2012"), "READ") h_mjj = histogram_tools.rebin_histogram(f.Get("BHistograms/h_pfjet_mjj"), dijet_binning) h_mjj_btagcorr = histogram_tools.rebin_histogram(f.Get("BHistograms/h_pfjet_mjj_btagcorr"), dijet_binning) EfficiencyPlot(h_mjj, h_mjj_btagcorr, name_num="No correction", name_den="b tag correction", logy=True, save_directory=analysis_config.figure_directory + "/OfflineBTag", save_tag="from_singlejet_"+analysis)
def FrankenEfficiencyPlot(self, logy=True, binning=None, simple_rebin=None, save_tag="", x_range=None, ratio_range=None, legend_position="topright", numerator_legend=None, denominator_legend=None, ratio_title=None): ht_slices = ["HT200","HT250","HT300","HT350","HT400","HT450","HT500","HT550","HTUnprescaled"] # HT650 if self._numerator_analysis == "trigjetht_eta1p7_CSVTM": numerator_analyses = { "HT200":"trigjetht200_eta1p7_CSVTM", "HT250":"trigjetht250_eta1p7_CSVTM", "HT300":"trigjetht300_eta1p7_CSVTM", "HT350":"trigjetht350_eta1p7_CSVTM", "HT400":"trigjetht400_eta1p7_CSVTM", "HT450":"trigjetht450_eta1p7_CSVTM", "HT500":"trigjetht500_eta1p7_CSVTM", "HT550":"trigjetht550_eta1p7_CSVTM", #"HT650":"trigjetht650_eta1p7_CSVTM", "HTUnprescaled":"trigjetht_eta1p7_CSVTM" } elif self._numerator_analysis == "trigjetht_eta2p2_CSVTM": numerator_analyses = { "HT200":"trigjetht200_CSVTM", "HT250":"trigjetht250_CSVTM", "HT300":"trigjetht300_CSVTM", "HT350":"trigjetht350_CSVTM", "HT400":"trigjetht400_CSVTM", "HT450":"trigjetht450_CSVTM", "HT500":"trigjetht500_CSVTM", "HT550":"trigjetht550_CSVTM", #"HT650":"trigjetht650_CSVTM", "HTUnprescaled":"trigjetht_CSVTM" } else: print "[OfflineBTagPlots::FrankenEfficiencyPlot] ERROR : numerator analysis must be trigjetht_eta1p7_CSVTM or trigjetht_eta2p2_CSVTM" sys.exit(1) if self._denominator_analysis == "trigjetht_eta1p7": denominator_analyses = { "HT200":"trigjetht200_eta1p7", "HT250":"trigjetht250_eta1p7", "HT300":"trigjetht300_eta1p7", "HT350":"trigjetht350_eta1p7", "HT400":"trigjetht400_eta1p7", "HT450":"trigjetht450_eta1p7", "HT500":"trigjetht500_eta1p7", "HT550":"trigjetht550_eta1p7", #"HT650":"trigjetht650_eta1p7", "HTUnprescaled":"trigjetht_eta1p7" } elif self._denominator_analysis == "trigjetht_eta2p2": denominator_analyses = { "HT200":"trigjetht200", "HT250":"trigjetht250", "HT300":"trigjetht300", "HT350":"trigjetht350", "HT400":"trigjetht400", "HT450":"trigjetht450", "HT500":"trigjetht500", "HT550":"trigjetht550", #"HT650":"trigjetht650", "HTUnprescaled":"trigjetht" } else: print "[OfflineBTagPlots::FrankenEfficiencyPlot] ERROR : denominator analysis must be trigjetht_eta1p7 or trigjetht_eta2p2" sys.exit(1) ht_ranges = { "HT200":[220, 386], "HT250":[386, 489], "HT300":[489, 526], "HT350":[526, 606], "HT400":[606, 649], "HT450":[649, 740], "HT500":[740, 788], "HT550":[788, 890], #"HT650":[890, 2000], "HTUnprescaled":[890, 2000] } numerator_histogram = None denominator_histogram = None for sample in self._samples: print "[EfficiencyPlot] DEBUG : Sample " + sample numerator_slice_histograms = {} denominator_slice_histograms = {} for slice_name in ht_slices: print slice_name print analysis_config.get_b_histogram_filename(numerator_analyses[slice_name], sample) numerator_file = TFile(analysis_config.get_b_histogram_filename(numerator_analyses[slice_name], sample), "READ") print analysis_config.get_b_histogram_filename(denominator_analyses[slice_name], sample) denominator_file = TFile(analysis_config.get_b_histogram_filename(denominator_analyses[slice_name], sample), "READ") # Check input nevents num_nevents = numerator_file.Get("BHistograms/h_input_nevents").Integral() den_nevents = denominator_file.Get("BHistograms/h_input_nevents").Integral() if num_nevents != den_nevents: # Allow tiny differences...? if abs((num_nevents - den_nevents) / den_nevents) < 0.001: print "[EfficiencyPlot] ERROR : Small inconsistency between number of events between numerator and denominator. I'm going to rescale away the difference, but you may want to fix this." numerator_normalization = den_nevents / num_nevents denominator_normalization = 1. else: print "[EfficiencyPlot] ERROR : Inconsistent number of events between numerator and denominator. Results would be wrong, so I'm aborting." print "[EfficiencyPlot] ERROR : \tNumerator = " + str(numerator_file.Get("BHistograms/h_input_nevents").Integral()) print "[EfficiencyPlot] ERROR : \tDenominator = " + str(denominator_file.Get("BHistograms/h_input_nevents").Integral()) sys.exit(1) else: numerator_normalization = 1. denominator_normalization = 1. numerator_slice_histograms[slice_name] = numerator_file.Get("BHistograms/h_pfjet_mjj") numerator_slice_histograms[slice_name].SetName("h_pfjet_mjj_num_" + slice_name + "_" + sample) numerator_slice_histograms[slice_name].SetDirectory(0) numerator_slice_histograms[slice_name].Scale(numerator_normalization) denominator_slice_histograms[slice_name] = denominator_file.Get("BHistograms/h_pfjet_mjj") denominator_slice_histograms[slice_name].SetName("h_pfjet_mjj_den_" + slice_name + "_" + sample) denominator_slice_histograms[slice_name].SetDirectory(0) denominator_slice_histograms[slice_name].Scale(denominator_normalization) numerator_file.Close() denominator_file.Close() # Make frankenhist this_numerator_histogram = self.FrankenHist(ht_slices, numerator_slice_histograms, ht_ranges) this_denominator_histogram = self.FrankenHist(ht_slices, denominator_slice_histograms, ht_ranges) if not numerator_histogram: numerator_histogram = this_numerator_histogram.Clone() numerator_histogram.SetName(numerator_histogram.GetName() + save_tag + "_num_" + str(time.time())) denominator_histogram = this_denominator_histogram.Clone() denominator_histogram.SetName(denominator_histogram.GetName() + save_tag + "_num_" + str(time.time())) else: numerator_histogram.Add(this_numerator_histogram) denominator_histogram.Add(this_denominator_histogram) # Rebin if binning: numerator_histogram = histogram_tools.rebin_histogram(numerator_histogram, binning) denominator_histogram = histogram_tools.rebin_histogram(denominator_histogram, binning) elif simple_rebin: numerator_histogram.Rebin(simple_rebin) denominator_histogram.Rebin(simple_rebin) cname = "c_offline_btag_eff_mjj" if logy: cname += "_log" c = TCanvas(cname, "Offline b-tag #epsilon", 800, 1000) top = TPad("top", "top", 0., 0.5, 1., 1.) top.SetBottomMargin(0.02) if logy: top.SetLogy() top.Draw() top.cd() frame_top = numerator_histogram.Clone() frame_top.Reset() if x_range: frame_top.GetXaxis().SetRangeUser(x_range[0], x_range[1]) if logy: y_min = 0.1 y_max = max(numerator_histogram.GetMaximum(), denominator_histogram.GetMaximum()) * 10. else: y_min = 0. y_max = max(numerator_histogram.GetMaximum(), denominator_histogram.GetMaximum()) * 1.5 frame_top.SetMinimum(y_min) frame_top.SetMaximum(y_max) frame_top.GetXaxis().SetLabelSize(0) frame_top.GetXaxis().SetTitleSize(0) if binning: #frame_top.GetYaxis().SetTitle("Events / 1 GeV") frame_top.GetYaxis().SetTitle("Events") else: frame_top.GetYaxis().SetTitle("Events") frame_top.Draw("axis") print "numerator integral = " + str(numerator_histogram.Integral()) print "denominator integral = " + str(denominator_histogram.Integral()) numerator_histogram.SetMarkerStyle(20) numerator_histogram.SetMarkerColor(seaborn.GetColorRoot("default", 0)) numerator_histogram.SetLineColor(seaborn.GetColorRoot("default", 0)) numerator_histogram.Draw("same") denominator_histogram.SetMarkerStyle(24) denominator_histogram.SetMarkerColor(seaborn.GetColorRoot("default", 2)) denominator_histogram.SetLineColor(seaborn.GetColorRoot("default", 2)) denominator_histogram.Draw("same") if legend_position == "topright": l = TLegend(0.6, 0.6, 0.85, 0.8) elif legend_position == "bottomright": l = TLegend(0.6, 0.2, 0.85, 0.4) l.SetFillColor(0) l.SetBorderSize(0) if numerator_legend: l.AddEntry(numerator_histogram, numerator_legend) else: l.AddEntry(numerator_histogram, "CSVT+CSVM") if denominator_legend: l.AddEntry(denominator_histogram, denominator_legend) else: l.AddEntry(denominator_histogram, "No CSV") l.Draw() c.cd() bottom = TPad("bottom", "bottom", 0., 0., 1., 0.5) bottom.SetTopMargin(0.01) bottom.SetBottomMargin(0.2) bottom.Draw() bottom.cd() ratio_histogram = numerator_histogram.Clone() ratio_histogram.Reset() if x_range: ratio_histogram.GetXaxis().SetRangeUser(x_range[0], x_range[1]) ratio_histogram.SetName(numerator_histogram.GetName() + "_ratio_" + save_tag + str(time.time())) ratio_histogram.SetDirectory(0) for bin in xrange(1, numerator_histogram.GetNbinsX() + 1): # Undo bin normalization if numerator_histogram.GetBinError(bin) > 0 and denominator_histogram.GetBinError(bin) > 0: num_unnormalized = (numerator_histogram.GetBinContent(bin))**2 / (numerator_histogram.GetBinError(bin))**2 den_unnormalized = (denominator_histogram.GetBinContent(bin))**2 / (denominator_histogram.GetBinError(bin))**2 num = numerator_histogram.GetBinContent(bin) den = denominator_histogram.GetBinContent(bin) ratio = 1. * num_unnormalized / den_unnormalized ratio_err = sqrt(ratio * (1. - ratio) / den_unnormalized) #ratio_err = max(sqrt(ratio * (1. - ratio) / den), 1./den) else: ratio = 0. ratio_err = 0. ratio_histogram.SetBinContent(bin, ratio) ratio_histogram.SetBinError(bin, ratio_err) ratio_histogram.SetMarkerSize(1) ratio_histogram.SetMarkerColor(kBlack) ratio_histogram.SetLineColor(kBlack) ratio_histogram.SetLineWidth(2) ratio_histogram.GetXaxis().SetTitle("m_{jj} [GeV]") if ratio_title: ratio_histogram.GetYaxis().SetTitle(ratio_title) else: ratio_histogram.GetYaxis().SetTitle("Offline 2#timesb-tag efficiency") if ratio_range: ratio_histogram.SetMinimum(ratio_range[0]) ratio_histogram.SetMaximum(ratio_range[1]) else: ratio_histogram.SetMinimum(0.) ratio_histogram.SetMaximum(1.) ratio_histogram.Draw() c.cd() c.SaveAs(analysis_config.figure_directory + "/OfflineBTag/" + c.GetName() + save_tag + ".pdf") ROOT.SetOwnership(c, False) ROOT.SetOwnership(top, False) ROOT.SetOwnership(bottom, False)
denominator_legend="Without offline b-tag", ratio_title="Offline b-tag efficiency)") #for ht_slice in [200, 250, 300, 350, 400, 450, 500, 550, 650]: # if sr == "lowmass": # numerator_analysis = "trigjetht" + str(ht_slice) + "_eta1p7_CSVTM" # denominator_analysis = "trigjetht" + str(ht_slice) + "_eta1p7" # else: # numerator_analysis = "trigjetht" + str(ht_slice) + "_CSVTM" # denominator_analysis = "trigjetht" + str(ht_slice) # plotter = OfflineBTagPlots(numerator_analysis, denominator_analysis, ["JetHT_2012BCD"]) # plotter.EfficiencyPlot(var="mjj", logy=True, binning=dijet_binning, save_tag="_CSVTM_" + sr + "_jetht" + str(ht_slice), ratio_range=[0.,0.01], x_range=[0., 2000.], legend_position="topright", numerator_legend="With offline b-tag", denominator_legend="Without offline b-tag", ratio_title="Offline b-tag efficiency", prescaled=True) if args.do_singlejet: for analysis in ["trigbbl_CSVTM", "trigbbh_CSVTM"]: f = TFile( analysis_config.get_b_histogram_filename( analysis, "BJetPlusX_2012"), "READ") h_mjj = histogram_tools.rebin_histogram( f.Get("BHistograms/h_pfjet_mjj"), dijet_binning) h_mjj_btagcorr = histogram_tools.rebin_histogram( f.Get("BHistograms/h_pfjet_mjj_btagcorr"), dijet_binning) EfficiencyPlot(h_mjj, h_mjj_btagcorr, name_num="No correction", name_den="b tag correction", logy=True, save_directory=analysis_config.figure_directory + "/OfflineBTag", save_tag="from_singlejet_" + analysis)
ROOT.SetOwnership(c, False) ROOT.SetOwnership(top, False) ROOT.SetOwnership(bottom, False) if __name__ == "__main__": for analysis in ["trigbbh_CSVTM", "trigbbl_CSVTM"]: for model in ["Hbb", "RSG"]: if analysis == "trigbbh_CSVTM": masses = [600, 750, 900, 1200] elif analysis == "trigbbl_CSVTM": masses = [400, 500, 600] for mass in masses: print "On " + model + " / " + str(mass) signal_file = TFile(analysis_config.get_b_histogram_filename(analysis, analysis_config.simulation.get_signal_tag(model, mass, "FULLSIM")), "READ") signal_histogram = signal_file.Get("BHistograms/h_nminusone_PFDijetMaxDeltaEta_vs_PFMjj") xsec = 1. # 1 pb placeholder ngenevt = signal_file.Get("BHistograms/h_input_nevents").Integral() signal_histogram.Scale(19700. * xsec / ngenevt) print "Background file " + analysis_config.get_b_histogram_filename(analysis, "BJetPlusX_2012") background_file = TFile(analysis_config.get_b_histogram_filename(analysis, "BJetPlusX_2012"), "READ") background_histogram = background_file.Get("BHistograms/h_nminusone_PFDijetMaxDeltaEta_vs_PFMjj") signal_histogram_mjj = signal_histogram.ProjectionY() signal_fit_results = DoSignalFit(signal_histogram_mjj, fit_range=[mass-150., mass+150.]) signal_x0 = signal_fit_results["fit"].GetParameter(2) signal_sigma = signal_fit_results["fit"].GetParameter(3) print "\tWindow = [" + str(signal_x0 - signal_sigma) + ", " + str(signal_x0 + signal_sigma) + "]"
analyses = ["trigbbl_CSVTM", "trigbbh_CSVTM"] masses = { "trigbbl_CSVTM":[400, 500, 600, 750], "trigbbh_CSVTM":[600, 750, 900, 1200] } if args.mjj: for analysis in analyses: names = [] histograms = {} colors = {} styles = {} for model in models: color_counter = 0 for mass in masses[analysis]: f = TFile(analysis_config.get_b_histogram_filename(analysis, analysis_config.simulation.get_signal_tag(model, mass, "FULLSIM"))) if model == "Hbb": name = "H, m=" + str(mass) + " GeV" elif model == "RSG": name = "G, m=" + str(mass) + " GeV" names.append(name) histograms[name] = f.Get("BHistograms/h_pfjet_mjj") histograms[name].SetDirectory(0) histograms[name].Rebin(25) histograms[name].Scale(1. / histograms[name].Integral()) f.Close() if model == "Hbb": styles[name] = 2 elif model == "RSG": styles[name] = 3 colors[name] = seaborn.GetColorRoot("dark", color_counter)
gROOT.SetBatch(True) gStyle.SetOptStat(0) gStyle.SetOptTitle(0) gSystem.Load("~/Dijets/CMSSW_5_3_32_patch3/lib/slc6_amd64_gcc472/libMyToolsRootUtils.so") import CMSDIJET.QCDAnalysis.analysis_configuration_8TeV as analysis_config sys.path.append("/uscms/home/dryu/Dijets/CMSSW_5_3_32_patch3/python/MyTools/RootUtils") import histogram_tools seaborn = Root.SeabornInterface() seaborn.Initialize() print "Loading histograms" analyses = ["trigmu_highmass_CSVTM", "trigmu_lowmass_CSVTM", "trigmubbh_highmass_CSVTM", "trigmubbl_lowmass_CSVTM", "trigmubbll_lowmass_CSVTM"] files = {} for analysis in analyses: files[analysis] = ROOT.TFile(analysis_config.get_b_histogram_filename(analysis, "SingleMu_2012"), "READ") from array import array mass_bins = array("d", [1, 3, 6, 10, 16, 23, 31, 40, 50, 61, 74, 88, 103, 119, 137, 156, 176, 197, 220, 244, 270, 296, 325, 354, 386, 419, 453, 489, 526, 565, 606, 649, 693, 740, 788, 838, 890, 944, 1000, 1058, 1118, 1181, 1246, 1313, 1383, 1455, 1530, 1607, 1687, 1770, 1856, 1945, 2037, 2132, 2231, 2332, 2438, 2546, 2659, 2775, 2895, 3019, 3147, 3279, 3416, 3558, 3704, 3854, 4010, 4171, 4337, 4509, 4686, 4869, 5058, 5253, 5455, 5663, 5877, 6099, 6328, 6564, 6808, 7060, 7320, 7589, 7866, 8000]) variables = ["mjj", "pt1", "pt2", "pt_btag1", "pt_btag2"] histograms = {} for analysis in analyses: histograms[analysis] = {} for variable in variables: histograms[analysis][variable] = files[analysis].Get("BHistograms/h_pfjet_" + variable) histograms[analysis][variable].SetName("h_" + analysis + "_" + variable) histograms[analysis][variable].SetDirectory(0) if analysis == "trigmubbll_lowmass": histograms[analysis][variable].Scale(1.7) # Prescale if variable == "mjj":
def RunBHistogramsSignal(analysis, sample, files_per_job=1, retar=False, data_source=None): # Create working directory and cd start_directory = os.getcwd() working_directory = dijet_directory + "/data/EightTeeEeVeeBee/BHistograms/condor/submit_" + analysis + "_" + sample os.system("mkdir -pv " + working_directory) os.chdir(working_directory) method = "csub" if method == "csub": input_files_txt = open(analysis_config.files_QCDBEventTree[sample], 'r') input_files = [line.strip() for line in input_files_txt] input_files_txt.close() bash_script_path = working_directory + "/run_" + analysis + "_" + sample + ".sh" bash_script = open(bash_script_path, 'w') bash_script.write("#!/bin/bash\n") bash_script.write("input_files=( " + " ".join([os.path.basename(x) for x in input_files]) + " )\n") output_filename = os.path.basename( analysis_config.get_b_histogram_filename(analysis, sample)).replace( ".root", "_$1.root") bash_script.write( "cmsRun " + os.path.basename(analysis_config.analysis_cfgs[analysis]) + " dataSource=simulation" + " dataType=signal" + " signalMass=" + str(analysis_config.simulation.signal_sample_masses[sample]) + " outputFile=" + output_filename + " inputFiles=file:${input_files[$1]}\n") bash_script.close() submit_command = "csub " + bash_script_path + " --cmssw " if not retar: submit_command += " --no_retar" submit_command += " -F " + ",".join( input_files ) + "," + analysis_config.analysis_cfgs[analysis] + " -n " + str( len(input_files)) os.system(submit_command) else: command = "condor_cmsRun" if retar: command += " --retar " #input_txt = open("tmp.txt", 'w') #input_txt.write(analysis_config.files_QCDBEventTree[sample] + "\n") #input_txt.close() command += " --file-list=" + analysis_config.files_QCDBEventTree[ sample] + " " command += " --files-per-job=" + str(files_per_job) command += " --submit-file=submit_" + analysis + "_" + sample + ".jdl " #command += " --output-file=" + output_prefix + "_" + sample + ".root " command += " --output-tag=BHistograms_" + sample + " " command += " --run " command += " " + dijet_directory + "/CMSSW_5_3_32_patch3/src/MyTools/RootUtils/scripts/cmsRun_wrapper.sh " + analysis_config.analysis_cfgs[ analysis] command += " dataSource=simulation " command += " dataType=signal " command += " signalMass=" + str( analysis_config.simulation.signal_sample_masses[sample]) + " " #command += "inputFiles=" + os.path.basename(input_files[sample]) if "ZPrime" in sample: command += " bottomOnly=true " output_filename = os.path.basename( analysis_config.get_b_histogram_filename( analysis, sample)).replace(".root", "_\$\(Cluster\)_\$\(Process\).root") command += " outputFile=" + os.path.basename( analysis_config.get_b_histogram_filename( analysis, sample)).replace( ".root", "_\$\(Cluster\)_\$\(Process\).root") print command os.system(command) os.system("rm -f tmp.txt") postprocessing_file = open('postprocessing.sh', 'w') postprocessing_file.write("#!/bin/bash\n") postprocessing_file.write( "hadd " + working_directory + "/" + os.path.basename( analysis_config.get_b_histogram_filename(analysis, sample)) + " " + output_filename.replace("_\$\(Cluster\)_\$\(Process\)", "*").replace( "$1", "*") + "\n") postprocessing_file.close() # cd back os.chdir(start_directory)
ROOT.SetOwnership(bottom, False) if __name__ == "__main__": for analysis in ["trigbbh_CSVTM", "trigbbl_CSVTM"]: for model in ["Hbb", "RSG"]: if analysis == "trigbbh_CSVTM": masses = [600, 750, 900, 1200] elif analysis == "trigbbl_CSVTM": masses = [400, 500, 600] for mass in masses: print "On " + model + " / " + str(mass) signal_file = TFile( analysis_config.get_b_histogram_filename( analysis, analysis_config.simulation.get_signal_tag( model, mass, "FULLSIM")), "READ") signal_histogram = signal_file.Get( "BHistograms/h_nminusone_PFDijetMaxDeltaEta_vs_PFMjj") xsec = 1. # 1 pb placeholder ngenevt = signal_file.Get( "BHistograms/h_input_nevents").Integral() signal_histogram.Scale(19700. * xsec / ngenevt) print "Background file " + analysis_config.get_b_histogram_filename( analysis, "BJetPlusX_2012") background_file = TFile( analysis_config.get_b_histogram_filename( analysis, "BJetPlusX_2012"), "READ") background_histogram = background_file.Get( "BHistograms/h_nminusone_PFDijetMaxDeltaEta_vs_PFMjj")
def RunBHistogramsEOS(analysis, sample, files_per_job=20, retar=False, data_source=None): if not data_source: print "[RunBHistogramsEOS] ERROR : Please specify data_source = collision_data or simulation" sys.exit(1) # Create working directory and cd start_directory = os.getcwd() working_directory = dijet_directory + "/data/EightTeeEeVeeBee/BHistograms/condor/submit_" + analysis + "_" + sample os.system("mkdir -pv " + working_directory) os.chdir(working_directory) # Get input file list input_files_txt = file(analysis_config.files_QCDBEventTree[sample], 'r') input_files = [line.strip() for line in input_files_txt] input_files_txt.close() method = "csub" if method == "csub": input_files_txt = open(analysis_config.files_QCDBEventTree[sample], 'r') input_files = [line.strip() for line in input_files_txt] input_files_txt.close() # Recalculate files_per_job to split evenly n_jobs = int(math.ceil(1. * len(input_files) / files_per_job)) files_per_job = int(math.ceil(1. * len(input_files) / n_jobs)) bash_script_path = working_directory + "/run_" + analysis + "_" + sample + ".sh" bash_script = open(bash_script_path, 'w') bash_script.write("#!/bin/bash\n") bash_script.write("input_files=( " + " ".join(input_files) + " )\n") bash_script.write("files_per_job=" + str(files_per_job) + "\n") bash_script.write("first_file_index=$(($1*$files_per_job))\n") bash_script.write("max_file_index=$((${#input_files[@]}-1))\n") bash_script.write("if [ $(($first_file_index+$files_per_job-1)) -gt $max_file_index ]; then\n") bash_script.write(" files_per_job=$(($max_file_index-$first_file_index+1))\n") bash_script.write("fi\n") bash_script.write("declare -a this_input_files=(${input_files[@]:$first_file_index:$files_per_job})\n") bash_script.write("function join { local IFS=\"$1\"; shift; echo \"$*\"; }\n") bash_script.write("this_input_files_string=\"$(join , ${this_input_files[@]})\"\n") bash_script.write("echo \"Input files:\"\n") bash_script.write("echo $this_input_files_string\n") output_filename = os.path.basename(analysis_config.get_b_histogram_filename(analysis, sample).replace(".root", "_$1.root")) bash_script.write("cmsRun " + os.path.basename(analysis_config.analysis_cfgs[analysis]) + " dataSource=" + data_source + " dataType=data" + " outputFile=" + output_filename + " inputFiles=$this_input_files_string\n" ) bash_script.close() submit_command = "csub " + bash_script_path + " --cmssw " if not retar: submit_command += " --no_retar" print "[debug] This job will have " + str(len(input_files)) + " / " + str(files_per_job) + " = " + str(n_jobs) + " jobs" submit_command += " -F " + "," + analysis_config.analysis_cfgs[analysis] + " -n " + str(n_jobs) os.system(submit_command) else: file_index = 0 subjob_index = 0 subjob_output_filenames = [] while file_index < len(file_list): this_job_files = [] while file_index < len(file_list) and len(this_job_files) < files_per_job: this_job_files.append(file_list[file_index].rstrip()) file_index += 1 if len(this_job_files) < 1: continue command = "echo \"condor_cmsRun" if retar: command += " --retar " command += " --submit-file=submit_" + analysis + "_" + sample + ".subjob" + str(subjob_index) + ".jdl " command += " --output-tag=BHistograms_" + sample + ".subjob" + str(subjob_index) + " " command += " --run " + dijet_directory + "/CMSSW_5_3_32_patch3/src/MyTools/RootUtils/scripts/cmsRun_wrapper.sh " + analysis_config.analysis_cfgs[analysis] command += " dataSource=" + data_source command += " dataType=data inputFiles=" for input_file in this_job_files: command += input_file + "," command = command.rstrip(",") command += "\"" subjob_output_filename = os.path.basename(analysis_config.get_b_histogram_filename(analysis, sample) + ".subjob" + str(subjob_index)) command += " outputFile=" + subjob_output_filename subjob_output_filenames.append(subjob_output_filename) #print command os.system(command) subjob_index += 1 # Postprocessing script merge_command = "hadd " + analysis_config.get_b_histogram_filename(analysis, sample) + " " + working_directory + "/" + os.path.basename(analysis_config.get_b_histogram_filename(analysis, sample)).replace(".root", "_*.root") postprocessing_file = open('postprocessing.py', 'w') postprocessing_file.write("import os\n") postprocessing_file.write("import sys\n") postprocessing_file.write("import glob\n") postprocessing_file.write("log_files = glob.glob(\"" + working_directory + "/*stderr\")\n") postprocessing_file.write("failed_logs = []\n") postprocessing_file.write("for log_file in log_files:\n") postprocessing_file.write("\tlog_file_handle = open(log_file, 'r')\n") postprocessing_file.write("\tfor line in log_file_handle:\n") postprocessing_file.write("\t\tif \"FAILURE\" in line:\n") postprocessing_file.write("\t\t\tfailed_logs.append(log_file)\n") postprocessing_file.write("if len(failed_logs) == 0:\n") postprocessing_file.write("\tos.system(\"" + merge_command + "\")\n") postprocessing_file.write("else:\n") postprocessing_file.write("\tprint(\"Some jobs failed. You need to retry them.\")\n") postprocessing_file.write("\tfor failed_log in failed_logs:\n") postprocessing_file.write("\t\tprint failed_log\n") postprocessing_file.close() # cd back os.chdir(start_directory)
def FrankenEfficiencyPlot(self, logy=True, binning=None, simple_rebin=None, save_tag="", x_range=None, ratio_range=None, legend_position="topright", numerator_legend=None, denominator_legend=None, ratio_title=None): ht_slices = [ "HT200", "HT250", "HT300", "HT350", "HT400", "HT450", "HT500", "HT550", "HTUnprescaled" ] # HT650 if self._numerator_analysis == "trigjetht_eta1p7_CSVTM": numerator_analyses = { "HT200": "trigjetht200_eta1p7_CSVTM", "HT250": "trigjetht250_eta1p7_CSVTM", "HT300": "trigjetht300_eta1p7_CSVTM", "HT350": "trigjetht350_eta1p7_CSVTM", "HT400": "trigjetht400_eta1p7_CSVTM", "HT450": "trigjetht450_eta1p7_CSVTM", "HT500": "trigjetht500_eta1p7_CSVTM", "HT550": "trigjetht550_eta1p7_CSVTM", #"HT650":"trigjetht650_eta1p7_CSVTM", "HTUnprescaled": "trigjetht_eta1p7_CSVTM" } elif self._numerator_analysis == "trigjetht_eta2p2_CSVTM": numerator_analyses = { "HT200": "trigjetht200_CSVTM", "HT250": "trigjetht250_CSVTM", "HT300": "trigjetht300_CSVTM", "HT350": "trigjetht350_CSVTM", "HT400": "trigjetht400_CSVTM", "HT450": "trigjetht450_CSVTM", "HT500": "trigjetht500_CSVTM", "HT550": "trigjetht550_CSVTM", #"HT650":"trigjetht650_CSVTM", "HTUnprescaled": "trigjetht_CSVTM" } else: print "[OfflineBTagPlots::FrankenEfficiencyPlot] ERROR : numerator analysis must be trigjetht_eta1p7_CSVTM or trigjetht_eta2p2_CSVTM" sys.exit(1) if self._denominator_analysis == "trigjetht_eta1p7": denominator_analyses = { "HT200": "trigjetht200_eta1p7", "HT250": "trigjetht250_eta1p7", "HT300": "trigjetht300_eta1p7", "HT350": "trigjetht350_eta1p7", "HT400": "trigjetht400_eta1p7", "HT450": "trigjetht450_eta1p7", "HT500": "trigjetht500_eta1p7", "HT550": "trigjetht550_eta1p7", #"HT650":"trigjetht650_eta1p7", "HTUnprescaled": "trigjetht_eta1p7" } elif self._denominator_analysis == "trigjetht_eta2p2": denominator_analyses = { "HT200": "trigjetht200", "HT250": "trigjetht250", "HT300": "trigjetht300", "HT350": "trigjetht350", "HT400": "trigjetht400", "HT450": "trigjetht450", "HT500": "trigjetht500", "HT550": "trigjetht550", #"HT650":"trigjetht650", "HTUnprescaled": "trigjetht" } else: print "[OfflineBTagPlots::FrankenEfficiencyPlot] ERROR : denominator analysis must be trigjetht_eta1p7 or trigjetht_eta2p2" sys.exit(1) ht_ranges = { "HT200": [220, 386], "HT250": [386, 489], "HT300": [489, 526], "HT350": [526, 606], "HT400": [606, 649], "HT450": [649, 740], "HT500": [740, 788], "HT550": [788, 890], #"HT650":[890, 2000], "HTUnprescaled": [890, 2000] } numerator_histogram = None denominator_histogram = None for sample in self._samples: print "[EfficiencyPlot] DEBUG : Sample " + sample numerator_slice_histograms = {} denominator_slice_histograms = {} for slice_name in ht_slices: print slice_name print analysis_config.get_b_histogram_filename( numerator_analyses[slice_name], sample) numerator_file = TFile( analysis_config.get_b_histogram_filename( numerator_analyses[slice_name], sample), "READ") print analysis_config.get_b_histogram_filename( denominator_analyses[slice_name], sample) denominator_file = TFile( analysis_config.get_b_histogram_filename( denominator_analyses[slice_name], sample), "READ") # Check input nevents num_nevents = numerator_file.Get( "BHistograms/h_input_nevents").Integral() den_nevents = denominator_file.Get( "BHistograms/h_input_nevents").Integral() if num_nevents != den_nevents: # Allow tiny differences...? if abs((num_nevents - den_nevents) / den_nevents) < 0.001: print "[EfficiencyPlot] ERROR : Small inconsistency between number of events between numerator and denominator. I'm going to rescale away the difference, but you may want to fix this." numerator_normalization = den_nevents / num_nevents denominator_normalization = 1. else: print "[EfficiencyPlot] ERROR : Inconsistent number of events between numerator and denominator. Results would be wrong, so I'm aborting." print "[EfficiencyPlot] ERROR : \tNumerator = " + str( numerator_file.Get( "BHistograms/h_input_nevents").Integral()) print "[EfficiencyPlot] ERROR : \tDenominator = " + str( denominator_file.Get( "BHistograms/h_input_nevents").Integral()) sys.exit(1) else: numerator_normalization = 1. denominator_normalization = 1. numerator_slice_histograms[slice_name] = numerator_file.Get( "BHistograms/h_pfjet_mjj") numerator_slice_histograms[slice_name].SetName( "h_pfjet_mjj_num_" + slice_name + "_" + sample) numerator_slice_histograms[slice_name].SetDirectory(0) numerator_slice_histograms[slice_name].Scale( numerator_normalization) denominator_slice_histograms[ slice_name] = denominator_file.Get( "BHistograms/h_pfjet_mjj") denominator_slice_histograms[slice_name].SetName( "h_pfjet_mjj_den_" + slice_name + "_" + sample) denominator_slice_histograms[slice_name].SetDirectory(0) denominator_slice_histograms[slice_name].Scale( denominator_normalization) numerator_file.Close() denominator_file.Close() # Make frankenhist this_numerator_histogram = self.FrankenHist( ht_slices, numerator_slice_histograms, ht_ranges) this_denominator_histogram = self.FrankenHist( ht_slices, denominator_slice_histograms, ht_ranges) if not numerator_histogram: numerator_histogram = this_numerator_histogram.Clone() numerator_histogram.SetName(numerator_histogram.GetName() + save_tag + "_num_" + str(time.time())) denominator_histogram = this_denominator_histogram.Clone() denominator_histogram.SetName(denominator_histogram.GetName() + save_tag + "_num_" + str(time.time())) else: numerator_histogram.Add(this_numerator_histogram) denominator_histogram.Add(this_denominator_histogram) # Rebin if binning: numerator_histogram = histogram_tools.rebin_histogram( numerator_histogram, binning) denominator_histogram = histogram_tools.rebin_histogram( denominator_histogram, binning) elif simple_rebin: numerator_histogram.Rebin(simple_rebin) denominator_histogram.Rebin(simple_rebin) cname = "c_offline_btag_eff_mjj" if logy: cname += "_log" c = TCanvas(cname, "Offline b-tag #epsilon", 800, 1000) top = TPad("top", "top", 0., 0.5, 1., 1.) top.SetBottomMargin(0.02) if logy: top.SetLogy() top.Draw() top.cd() frame_top = numerator_histogram.Clone() frame_top.Reset() if x_range: frame_top.GetXaxis().SetRangeUser(x_range[0], x_range[1]) if logy: y_min = 0.1 y_max = max(numerator_histogram.GetMaximum(), denominator_histogram.GetMaximum()) * 10. else: y_min = 0. y_max = max(numerator_histogram.GetMaximum(), denominator_histogram.GetMaximum()) * 1.5 frame_top.SetMinimum(y_min) frame_top.SetMaximum(y_max) frame_top.GetXaxis().SetLabelSize(0) frame_top.GetXaxis().SetTitleSize(0) if binning: #frame_top.GetYaxis().SetTitle("Events / 1 GeV") frame_top.GetYaxis().SetTitle("Events") else: frame_top.GetYaxis().SetTitle("Events") frame_top.Draw("axis") print "numerator integral = " + str(numerator_histogram.Integral()) print "denominator integral = " + str(denominator_histogram.Integral()) numerator_histogram.SetMarkerStyle(20) numerator_histogram.SetMarkerColor(seaborn.GetColorRoot("default", 0)) numerator_histogram.SetLineColor(seaborn.GetColorRoot("default", 0)) numerator_histogram.Draw("same") denominator_histogram.SetMarkerStyle(24) denominator_histogram.SetMarkerColor(seaborn.GetColorRoot( "default", 2)) denominator_histogram.SetLineColor(seaborn.GetColorRoot("default", 2)) denominator_histogram.Draw("same") if legend_position == "topright": l = TLegend(0.6, 0.6, 0.85, 0.8) elif legend_position == "bottomright": l = TLegend(0.6, 0.2, 0.85, 0.4) l.SetFillColor(0) l.SetBorderSize(0) if numerator_legend: l.AddEntry(numerator_histogram, numerator_legend) else: l.AddEntry(numerator_histogram, "CSVT+CSVM") if denominator_legend: l.AddEntry(denominator_histogram, denominator_legend) else: l.AddEntry(denominator_histogram, "No CSV") l.Draw() c.cd() bottom = TPad("bottom", "bottom", 0., 0., 1., 0.5) bottom.SetTopMargin(0.01) bottom.SetBottomMargin(0.2) bottom.Draw() bottom.cd() ratio_histogram = numerator_histogram.Clone() ratio_histogram.Reset() if x_range: ratio_histogram.GetXaxis().SetRangeUser(x_range[0], x_range[1]) ratio_histogram.SetName(numerator_histogram.GetName() + "_ratio_" + save_tag + str(time.time())) ratio_histogram.SetDirectory(0) for bin in xrange(1, numerator_histogram.GetNbinsX() + 1): # Undo bin normalization if numerator_histogram.GetBinError( bin) > 0 and denominator_histogram.GetBinError(bin) > 0: num_unnormalized = (numerator_histogram.GetBinContent( bin))**2 / (numerator_histogram.GetBinError(bin))**2 den_unnormalized = (denominator_histogram.GetBinContent( bin))**2 / (denominator_histogram.GetBinError(bin))**2 num = numerator_histogram.GetBinContent(bin) den = denominator_histogram.GetBinContent(bin) ratio = 1. * num_unnormalized / den_unnormalized ratio_err = sqrt(ratio * (1. - ratio) / den_unnormalized) #ratio_err = max(sqrt(ratio * (1. - ratio) / den), 1./den) else: ratio = 0. ratio_err = 0. ratio_histogram.SetBinContent(bin, ratio) ratio_histogram.SetBinError(bin, ratio_err) ratio_histogram.SetMarkerSize(1) ratio_histogram.SetMarkerColor(kBlack) ratio_histogram.SetLineColor(kBlack) ratio_histogram.SetLineWidth(2) ratio_histogram.GetXaxis().SetTitle("m_{jj} [GeV]") if ratio_title: ratio_histogram.GetYaxis().SetTitle(ratio_title) else: ratio_histogram.GetYaxis().SetTitle( "Offline 2#timesb-tag efficiency") if ratio_range: ratio_histogram.SetMinimum(ratio_range[0]) ratio_histogram.SetMaximum(ratio_range[1]) else: ratio_histogram.SetMinimum(0.) ratio_histogram.SetMaximum(1.) ratio_histogram.Draw() c.cd() c.SaveAs(analysis_config.figure_directory + "/OfflineBTag/" + c.GetName() + save_tag + ".pdf") ROOT.SetOwnership(c, False) ROOT.SetOwnership(top, False) ROOT.SetOwnership(bottom, False)
sys.path.append( "/uscms/home/dryu/Dijets/CMSSW_5_3_32_patch3/python/MyTools/RootUtils") import histogram_tools seaborn = Root.SeabornInterface() seaborn.Initialize() print "Loading histograms" analyses = [ "trigmu_highmass_CSVTM", "trigmu_lowmass_CSVTM", "trigmubbh_highmass_CSVTM", "trigmubbl_lowmass_CSVTM", "trigmubbll_lowmass_CSVTM" ] files = {} for analysis in analyses: files[analysis] = ROOT.TFile( analysis_config.get_b_histogram_filename(analysis, "SingleMu_2012"), "READ") from array import array mass_bins = array("d", [ 1, 3, 6, 10, 16, 23, 31, 40, 50, 61, 74, 88, 103, 119, 137, 156, 176, 197, 220, 244, 270, 296, 325, 354, 386, 419, 453, 489, 526, 565, 606, 649, 693, 740, 788, 838, 890, 944, 1000, 1058, 1118, 1181, 1246, 1313, 1383, 1455, 1530, 1607, 1687, 1770, 1856, 1945, 2037, 2132, 2231, 2332, 2438, 2546, 2659, 2775, 2895, 3019, 3147, 3279, 3416, 3558, 3704, 3854, 4010, 4171, 4337, 4509, 4686, 4869, 5058, 5253, 5455, 5663, 5877, 6099, 6328, 6564, 6808, 7060, 7320, 7589, 7866, 8000 ]) variables = ["mjj", "pt1", "pt2", "pt_btag1", "pt_btag2"] histograms = {}
if __name__ == "__main__": for model in ["Hbb", "RSG"]: #for analysis_base in ["trigbbh", "trigbbl"]: for analysis_base in ["trigbbl", "trigbbh"]: if analysis_base == "trigbbh": masses = [600, 750, 900, 1200] elif analysis_base == "trigbbl": masses = [400, 600, 750, 900] analyses = [analysis_base + "_" + x for x in ["CSVL", "CSVM", "CSVT", "CSVTL", "CSVTM", "CSVML"]] for mass in masses: signal_histograms = {} data_histograms = {} for analysis in analyses: signal_sample = analysis_config.simulation.get_signal_tag(model, mass, "FULLSIM") #print "Signal file: " + analysis_config.get_b_histogram_filename(analysis, signal_sample) signal_histogram_file = TFile(analysis_config.get_b_histogram_filename(analysis, signal_sample), "READ") #print "Data file: " + analysis_config.get_b_histogram_filename(analysis, "BJetPlusX_2012") data_histogram_file = TFile(analysis_config.get_b_histogram_filename(analysis, "BJetPlusX_2012"), "READ") signal_histograms[analysis] = signal_histogram_file.Get("BHistograms/h_pfjet_mjj") if not signal_histograms[analysis]: print "ERROR : Couldn't find signal histogram BHistograms/h_pfjet_mjj in file " + analysis_config.get_b_histogram_filename(analysis, signal_sample) continue signal_histograms[analysis].SetDirectory(0) ngenevt = signal_histogram_file.Get("BHistograms/h_input_nevents").Integral() xsec = 1. # 1 pb placeholder signal_histograms[analysis].Scale(19700. * xsec / ngenevt) print "[debug] Data file " + analysis_config.get_b_histogram_filename(analysis, "BJetPlusX_2012") data_histograms[analysis] = data_histogram_file.Get("BHistograms/h_pfjet_mjj") data_histograms[analysis].SetDirectory(0) if not data_histograms[analysis]: print "ERROR : Couldn't find data histogram"
def RunBHistogramsEOS(analysis, sample, files_per_job=20, retar=False, data_source=None): if not data_source: print "[RunBHistogramsEOS] ERROR : Please specify data_source = collision_data or simulation" sys.exit(1) # Create working directory and cd start_directory = os.getcwd() working_directory = dijet_directory + "/data/EightTeeEeVeeBee/BHistograms/condor/submit_" + analysis + "_" + sample os.system("mkdir -pv " + working_directory) os.chdir(working_directory) # Get input file list input_files_txt = file(analysis_config.files_QCDBEventTree[sample], 'r') input_files = [line.strip() for line in input_files_txt] input_files_txt.close() method = "csub" if method == "csub": input_files_txt = open(analysis_config.files_QCDBEventTree[sample], 'r') input_files = [line.strip() for line in input_files_txt] input_files_txt.close() # Recalculate files_per_job to split evenly n_jobs = int(math.ceil(1. * len(input_files) / files_per_job)) files_per_job = int(math.ceil(1. * len(input_files) / n_jobs)) bash_script_path = working_directory + "/run_" + analysis + "_" + sample + ".sh" bash_script = open(bash_script_path, 'w') bash_script.write("#!/bin/bash\n") bash_script.write("input_files=( " + " ".join(input_files) + " )\n") bash_script.write("files_per_job=" + str(files_per_job) + "\n") bash_script.write("first_file_index=$(($1*$files_per_job))\n") bash_script.write("max_file_index=$((${#input_files[@]}-1))\n") bash_script.write( "if [ $(($first_file_index+$files_per_job-1)) -gt $max_file_index ]; then\n" ) bash_script.write( " files_per_job=$(($max_file_index-$first_file_index+1))\n") bash_script.write("fi\n") bash_script.write( "declare -a this_input_files=(${input_files[@]:$first_file_index:$files_per_job})\n" ) bash_script.write( "function join { local IFS=\"$1\"; shift; echo \"$*\"; }\n") bash_script.write( "this_input_files_string=\"$(join , ${this_input_files[@]})\"\n") bash_script.write("echo \"Input files:\"\n") bash_script.write("echo $this_input_files_string\n") output_filename = os.path.basename( analysis_config.get_b_histogram_filename(analysis, sample).replace( ".root", "_$1.root")) bash_script.write( "cmsRun " + os.path.basename(analysis_config.analysis_cfgs[analysis]) + " dataSource=" + data_source + " dataType=data" + " outputFile=" + output_filename + " inputFiles=$this_input_files_string\n") bash_script.close() submit_command = "csub " + bash_script_path + " --cmssw " if not retar: submit_command += " --no_retar" print "[debug] This job will have " + str( len(input_files)) + " / " + str(files_per_job) + " = " + str( n_jobs) + " jobs" submit_command += " -F " + "," + analysis_config.analysis_cfgs[ analysis] + " -n " + str(n_jobs) os.system(submit_command) else: file_index = 0 subjob_index = 0 subjob_output_filenames = [] while file_index < len(file_list): this_job_files = [] while file_index < len(file_list) and len( this_job_files) < files_per_job: this_job_files.append(file_list[file_index].rstrip()) file_index += 1 if len(this_job_files) < 1: continue command = "echo \"condor_cmsRun" if retar: command += " --retar " command += " --submit-file=submit_" + analysis + "_" + sample + ".subjob" + str( subjob_index) + ".jdl " command += " --output-tag=BHistograms_" + sample + ".subjob" + str( subjob_index) + " " command += " --run " + dijet_directory + "/CMSSW_5_3_32_patch3/src/MyTools/RootUtils/scripts/cmsRun_wrapper.sh " + analysis_config.analysis_cfgs[ analysis] command += " dataSource=" + data_source command += " dataType=data inputFiles=" for input_file in this_job_files: command += input_file + "," command = command.rstrip(",") command += "\"" subjob_output_filename = os.path.basename( analysis_config.get_b_histogram_filename(analysis, sample) + ".subjob" + str(subjob_index)) command += " outputFile=" + subjob_output_filename subjob_output_filenames.append(subjob_output_filename) #print command os.system(command) subjob_index += 1 # Postprocessing script merge_command = "hadd " + analysis_config.get_b_histogram_filename( analysis, sample) + " " + working_directory + "/" + os.path.basename( analysis_config.get_b_histogram_filename( analysis, sample)).replace(".root", "_*.root") postprocessing_file = open('postprocessing.py', 'w') postprocessing_file.write("import os\n") postprocessing_file.write("import sys\n") postprocessing_file.write("import glob\n") postprocessing_file.write("log_files = glob.glob(\"" + working_directory + "/*stderr\")\n") postprocessing_file.write("failed_logs = []\n") postprocessing_file.write("for log_file in log_files:\n") postprocessing_file.write("\tlog_file_handle = open(log_file, 'r')\n") postprocessing_file.write("\tfor line in log_file_handle:\n") postprocessing_file.write("\t\tif \"FAILURE\" in line:\n") postprocessing_file.write("\t\t\tfailed_logs.append(log_file)\n") postprocessing_file.write("if len(failed_logs) == 0:\n") postprocessing_file.write("\tos.system(\"" + merge_command + "\")\n") postprocessing_file.write("else:\n") postprocessing_file.write( "\tprint(\"Some jobs failed. You need to retry them.\")\n") postprocessing_file.write("\tfor failed_log in failed_logs:\n") postprocessing_file.write("\t\tprint failed_log\n") postprocessing_file.close() # cd back os.chdir(start_directory)
parser.add_argument('--btag_mc_notrig', action='store_true', help='Make online B tag efficiency plot from MC') args = parser.parse_args() print args if args.ht: analyses = {} names = [] for mass in xrange(200, 700, 50): if mass == 600: continue names.append("HT" + str(mass)) analyses["HT" + str(mass)] = "trigjetht" + str(mass) sample = "JetHT_2012BCD" histograms = {} for name in names: f = TFile(analysis_config.get_b_histogram_filename(analyses[name], sample), "READ") #histograms[name] = mjj_common.apply_dijet_binning_normalized(f.Get("BHistograms/h_pfjet_mjj")) print "[debug] For name " + name + ", input events = " + str(f.Get("BHistograms/h_input_nevents").GetEntries()) print "[debug] \tPrescale = " + str(f.Get("BHistograms/h_pass_nevents_weighted").GetBinContent(1) / f.Get("BHistograms/h_pass_nevents").GetBinContent(1)) histograms[name] = f.Get("BHistograms/h_pfjet_mjj").Rebin(20) histograms[name].SetName(histograms[name].GetName() + "_" + name) histograms[name].SetDirectory(0) f.Close() ht_threshold_plot(names, histograms, save_tag="jetht_thresholds", x_range=[0., 1200.], logy=True) if args.btag: f_jetht_save = TFile(analysis_config.dijet_directory + "/data/EightTeeEeVeeBee/TriggerEfficiency/trigeff_jetht_data.root", "RECREATE") for sr in ["lowmass", "highmass"]: ht_analyses = {} names = [] for mass in xrange(200, 700, 50):
from ROOT import * gSystem.Load("~/Dijets/CMSSW_5_3_32_patch3/lib/slc6_amd64_gcc472/libMyToolsRootUtils.so") import CMSDIJET.QCDAnalysis.analysis_configuration_8TeV as analysis_config # Combine the QCD MC samples with appropriate weights qcd_samples = ["QCD_Pt-80to120_TuneZ2star_8TeV_pythia6","QCD_Pt-120to170_TuneZ2star_8TeV_pythia6","QCD_Pt-170to300_TuneZ2star_8TeV_pythia6","QCD_Pt-300to470_TuneZ2star_8TeV_pythia6","QCD_Pt-470to600_TuneZ2star_8TeV_pythia6","QCD_Pt-600to800_TuneZ2star_8TeV_pythia6","QCD_Pt-800to1000_TuneZ2star_8TeV_pythia6","QCD_Pt-1000to1400_TuneZ2star_8TeV_pythia6","QCD_Pt-1400to1800_TuneZ2star_8TeV_pythia6","QCD_Pt-1800_TuneZ2star_8TeV_pythia6"] analyses = ["NoTrigger_eta2p2", "NoTrigger_eta2p2_CSVTM", "NoTrigger_eta1p7", "NoTrigger_eta1p7_CSVTM", "trigbbl_CSVTM", "trigbbh_CSVTM"] lumi = 19710. for analysis in analyses: first = True output_file = TFile(analysis_config.get_b_histogram_filename(analysis, "QCD_TuneZ2star_8TeV_pythia6"), "RECREATE") output_directory = output_file.mkdir("BHistograms") histograms = {} for sample in qcd_samples: input_file = TFile(analysis_config.get_b_histogram_filename(analysis, sample), "READ") input_directory = input_file.Get("BHistograms") input_directory.cd() xsec = analysis_config.simulation.background_cross_sections[sample] normalization = xsec * lumi / input_file.Get("BHistograms/h_sample_nevents").Integral() for key in gDirectory.GetListOfKeys(): key.Print() if "TH1" in key.GetClassName() or "TH2" in key.GetClassName(): hist = key.ReadObj() hist.Scale(normalization) if first: histograms[hist.GetName()] = hist histograms[hist.GetName()].SetDirectory(output_directory) else: histograms[hist.GetName()].Add(hist)