def draw_histos(args, config, distribution, tree, output_file): print 'inside draw_histos with ' + distribution nBins = config['num bins'] xMin = config['distributions'][distribution]['min'] xMax = config['distributions'][distribution]['max'] vars = config['distributions'][distribution]['variables'] base_cuts = config.get('common cuts', {}).values() dist_cuts = config['distributions'][distribution].get('cuts', {}).values() sel_strings = {} for var in vars: #print var sel_strings[var] = plot_helper.DrawStringMaker() if var in config['special cuts'].keys(): sel_strings[var].append_selection_requirements( base_cuts, dist_cuts, config['special cuts'].get(var, {}).values()) else: sel_strings[var].append_selection_requirements( base_cuts, dist_cuts) #print sel_strings[var].draw_string #output_file.cd() canvas = ROOT.TCanvas('hist ' + distribution, 'hist ' + distribution) hist = ROOT.TH1D('hist_' + distribution, 'hist_' + distribution, nBins, xMin, xMax) tree.Draw(vars[0] + ' >> hist_' + distribution, sel_strings[vars[0]].draw_string, '') for var in vars: if not var == vars[0]: tree.Draw(var + ' >>+ hist_' + distribution, sel_strings[var].draw_string, '') hist.SetDirectory(output_file) hist.Write() #output_file.Write() del hist del canvas
def compute_num_denom(type, bin, lep_cat, iLep, lepStr, base_cut, WP_tag, WP_probe, FR_array, config, bin_NP): #print 'Inside compute_num_denom' this_base_cut = copy.deepcopy(base_cut) this_num_denom = 0.0 if FR_array[bin][0] == 1 and lep_cat == 'ele_ele': return 0.0 if FR_array[bin][0] == 0 and lep_cat == 'mu_mu': return 0.0 ## Apply cuts for this bin, probe lepton #this_base_cut.append(lepStr+str(iLep+1)+'_lepMVA > '+str(WP_probe)) this_base_cut.append(lepStr + str(iLep + 1) + '_lepCut >= ' + str(WP_probe)) this_base_cut.append(lepStr + str(iLep + 1) + '_isMuon == ' + str(FR_array[bin][0])) this_base_cut.append(lepStr + str(iLep + 1) + '_pt > ' + str(FR_array[bin][1])) this_base_cut.append(lepStr + str(iLep + 1) + '_pt < ' + str(FR_array[bin][2])) this_base_cut.append('abs(' + lepStr + str(iLep + 1) + '_eta) > ' + str(FR_array[bin][3])) this_base_cut.append('abs(' + lepStr + str(iLep + 1) + '_eta) < ' + str(FR_array[bin][4])) this_base_cut.append(lepStr + str(iLep + 1) + '_jetBTagCSV > ' + str(FR_array[bin][5])) this_base_cut.append(lepStr + str(iLep + 1) + '_jetBTagCSV < ' + str(FR_array[bin][6])) #data_cut = [lepStr+str(abs(iLep-1)+1)+'_lepMVA > '+str(WP_tag)] #MC_cut = [lepStr+str(abs(iLep-1)+1)+'_lepMVA > '+str(WP_tag)] #QF_cut = [lepStr+str(abs(iLep-1)+1)+'_lepMVA > '+str(WP_tag)] #NP_cut = [lepStr+str(abs(iLep-1)+1)+'_lepMVA < '+str(WP_tag)] data_cut = [lepStr + str(abs(iLep - 1) + 1) + '_lepCut >= ' + str(WP_tag)] MC_cut = [lepStr + str(abs(iLep - 1) + 1) + '_lepCut >= ' + str(WP_tag)] QF_cut = [lepStr + str(abs(iLep - 1) + 1) + '_lepCut >= ' + str(WP_tag)] NP_cut = [lepStr + str(abs(iLep - 1) + 1) + '_lepCut < ' + str(WP_tag)] ## Apply cuts for this bin, tag lepton if type == 'NP_tag': tag_lepton_bin_cut = [ lepStr + str(abs(iLep - 1) + 1) + '_isMuon == ' + str(FR_array[bin_NP][0]) ] tag_lepton_bin_cut[0] += (' && ' + lepStr + str(abs(iLep - 1) + 1) + '_pt > ' + str(FR_array[bin_NP][1])) tag_lepton_bin_cut[0] += (' && ' + lepStr + str(abs(iLep - 1) + 1) + '_pt < ' + str(FR_array[bin_NP][2])) tag_lepton_bin_cut[0] += (' && abs(' + lepStr + str(abs(iLep - 1) + 1) + '_eta) > ' + str(FR_array[bin_NP][3])) tag_lepton_bin_cut[0] += (' && abs(' + lepStr + str(abs(iLep - 1) + 1) + '_eta) < ' + str(FR_array[bin_NP][4])) tag_lepton_bin_cut[0] += (' && ' + lepStr + str(abs(iLep - 1) + 1) + '_jetBTagCSV > ' + str(FR_array[bin_NP][5])) tag_lepton_bin_cut[0] += (' && ' + lepStr + str(abs(iLep - 1) + 1) + '_jetBTagCSV < ' + str(FR_array[bin_NP][6])) for sample in config['samples']: sample_dict = config['samples'][sample] if config['samples'][ sample] else { 'systematics': ['common'], 'weights': ['common'] } ## Get basic sample information tree_sample = sample_dict.get('tree sample', sample) additional_cuts = sample_dict.get('additional cuts', []) cuts_to_remove = sample_dict.get('cuts to remove', []) sample_info = plot_helper.SampleInformation(tree_sample) ## Only compute relevant samples #print 'sample '+sample+', type '+type+', sample_type '+sample_info.sample_type if type == 'data' and ((not sample_info.sample_type == 'data') or ('sideband' in sample)): continue if type == 'MC' and not sample_info.sample_type == 'MC': continue if type == 'QF' and not 'QF' in sample: continue if 'NP' in type and not 'NP' in sample: continue #print 'Proceeding with calculation' ## Get tree file and summaryTree if lep_cat == 'mu_mu' or lep_cat == 'mu_ele' or lep_cat == 'ele_ele': source_file_name = '%s/%s_%s_all.root' % ( config['input_trees_directory'], tree_sample, config['SS label']) elif lep_cat == '3l': source_file_name = '%s/%s_%s_all.root' % ( config['input_trees_directory'], tree_sample, config['3l label']) source_file = ROOT.TFile(source_file_name) tree = source_file.Get('summaryTree') draw_string_maker = plot_helper.DrawStringMaker() ## Apply proper selection criteria if sample in config['lepton categories'][lep_cat]['data samples']: draw_string_maker.append_selection_requirements( this_base_cut, data_cut) elif sample_info.sample_type == 'MC': draw_string_maker.append_selection_requirements( this_base_cut, MC_cut) elif 'QF_sideband' in sample and sample.replace( '_QF_sideband', '') in config['lepton categories'][lep_cat]['data samples']: draw_string_maker.append_selection_requirements( this_base_cut, QF_cut) elif 'NP_sideband' in sample and sample.replace( '_NP_sideband', '') in config['lepton categories'][lep_cat]['data samples']: draw_string_maker.append_selection_requirements( this_base_cut, NP_cut) else: #print sample+' not included in '+lep_cat+' num_denom' continue if type == 'NP_tag': draw_string_maker.append_selection_requirements(tag_lepton_bin_cut) draw_string_maker.remove_selection_requirements(cuts_to_remove) draw_string_maker.append_selection_requirements(additional_cuts) ## Are the QF weights implemented properly? weights = plot_helper.customize_list( config['weights'], sample_dict.get('weights', ['common'])) ## Apply MC weights weights_cat = ['1.0'] if (lep_cat == 'mu_mu' or lep_cat == 'mu_ele' or lep_cat == 'ele_ele') and sample_info.sample_type == 'MC': weights_cat = config['weights SS'] elif lep_cat == '3l' and sample_info.sample_type == 'MC': weights_cat = config['weights 3l'] if sample_info.sample_type == 'MC' and 'triggerSF' in weights: matched_SF = draw_string_maker.get_matched_SF(lep_cat) weights = [matched_SF if x == 'triggerSF' else x for x in weights] #draw_string_maker.multiply_by_factors(weights, [systematic_weight_string]) draw_string_maker.multiply_by_factors(weights, weights_cat) ######################################## ## ADD IN EFF and (1/EFF) WEIGHTS FOR MC ######################################## if sample_info.sample_type not in [ 'MC', 'data' ] and 'sideband' not in sample_info.sample_type: sys.exit('Invalid sample_type must be data, sideband, or MC' % (sample_info.sample_type)) this_plot_integral = 0.0 plot = plot_helper.Plot(sample, 0, tree, 'distribution', config['distribution'], draw_string_maker.draw_string) #if sample in config['lepton categories'][lep_cat]['data samples'] and plot.plot.Integral() == 0: #print draw_string_maker.draw_string #print draw_string_maker.draw_string if sample_info.sample_type == 'MC': plot.plot.Scale(sample_info.x_section * config['luminosity'] / sample_info.num_generated) this_plot_integral = plot.plot.Integral() #this_plot_integral += plot.plot.Integral()*FR_hist_tight_mu.GetBinContent(ptBin+1, etaBin+1, csvBin+1) #this_plot_integral += plot.plot.Integral()*FR_hist_tight_ele.GetBinContent(ptBin+1, etaBin+1, csvBin+1) #print sample+' integral for '+lep_cat+', '+bin+', '+bin_NP+', lepton '+str(iLep+1)+', WP('+str(WP_probe)+','+str(WP_tag)+') = '+str(this_plot_integral) this_num_denom += this_plot_integral ## End loop over samples print 'num_denom for ' + type + ', ' + lep_cat + ', WP_probe ' + str( WP_probe) + ', ' + bin + ', ' + bin_NP + ', lepton ' + str( iLep + 1) + ' = ' + str(this_num_denom) return this_num_denom
def draw_histos(args, config, distribution, tree, output_file, cat): print 'inside draw_histos with ' + distribution + ' ' + cat nBins = config['distributions'][distribution]['num bins'] xMin = config['distributions'][distribution]['min'] xMax = config['distributions'][distribution]['max'] ## Branch names for gen-level correct permutation (sig) vars = config['distributions'][distribution]['variables'] ## Branch names for all permutations (bkg) bkg_vars = config['distributions'][distribution]['bkg_vars'] ## Basic cuts base_cuts = config.get('common cuts', {}).values() ## Cuts for this distribution dist_cuts = config['distributions'][distribution].get('cuts', {}).values() ## Cuts for background permutations bkg_cuts = config['distributions'][distribution].get('bkg_cuts', {}).values() ## If there are categories, divide them up if cat == '': cat_cut = config['category cuts']['none'] cat_str = '' else: cat_cut = config['category cuts'][cat] cat_str = '_' + cat ## Array of selection strings for the 'Draw' command sel_strings = {} bkg_sel_strings = {} ## Sometimes there are multiple branch names (variables) for each distribution for var in vars: ## Initialize an empty selection string for signal sel_strings[var] = plot_helper.DrawStringMaker() ## Add cuts to selection string sel_strings[var].append_selection_requirements(base_cuts, dist_cuts, cat_cut) ## Add specific cuts for specific variables if var in config['special cuts'].keys(): sel_strings[var].append_selection_requirements( config['special cuts'].get(var, {}).values()) ## Initialize an empty array for background bkg_sel_strings[var] = {} for bkg_var in bkg_vars: ## Initialize an empty selection string for this signal variable and this background variable bkg_sel_string_temp = plot_helper.DrawStringMaker() # ## Apply dist_cuts ('cuts') and bkg_cuts to bkg # bkg_sel_string_temp.append_selection_requirements(base_cuts, dist_cuts, cat_cut, bkg_cuts) # if var in config['special cuts'].keys(): # bkg_sel_string_temp.append_selection_requirements(config['special cuts'].get(var, {}).values()) # if bkg_var in config['special cuts'].keys(): # bkg_sel_string_temp.append_selection_requirements(config['special cuts'].get(bkg_var, {}).values()) ## Don't apply dist_cuts ('cuts') to bkg - only bkg_cuts bkg_sel_string_temp.append_selection_requirements( base_cuts, cat_cut, bkg_cuts) if bkg_var in config['special cuts'].keys(): bkg_sel_string_temp.append_selection_requirements( config['special cuts'].get(bkg_var, {}).values()) ## Replace place-holders in background variable names with numbers for i in range( config['distributions'][distribution]['num_bkg_vars'][0]): bkg_var_new = bkg_var.replace('_WW', '_' + str(i + 1)) bkg_sel_string = bkg_sel_string_temp.draw_string.replace( '_WW', '_' + str(i + 1)) ## One place-holder ('WW') if len(config['distributions'][distribution] ['num_bkg_vars']) == 1: ## Fill selection string for this signal variable and this background variable bkg_sel_strings[var][bkg_var_new] = bkg_sel_string ## Two or more place-holders ('WW','XX') else: for j in range(config['distributions'][distribution] ['num_bkg_vars'][1]): bkg_var_new = bkg_var.replace('_WW', '_' + str(i + 1)) bkg_var_new = bkg_var_new.replace( '_XX', '_' + str(j + 1)) bkg_sel_string = bkg_sel_string_temp.draw_string.replace( '_WW', '_' + str(i + 1)) bkg_sel_string = bkg_sel_string.replace( '_XX', '_' + str(j + 1)) ## Two place-holders if len(config['distributions'][distribution] ['num_bkg_vars']) == 2: ## Indices always in ascending order if i < j: bkg_sel_strings[var][ bkg_var_new] = bkg_sel_string ## Three or more place-holders ('WW','XX',YY') else: for k in range(config['distributions'] [distribution]['num_bkg_vars'][2]): bkg_var_new = bkg_var.replace( '_WW', '_' + str(i + 1)) bkg_var_new = bkg_var_new.replace( '_XX', '_' + str(j + 1)) bkg_var_new = bkg_var_new.replace( '_YY', '_' + str(k + 1)) bkg_sel_string = bkg_sel_string_temp.draw_string.replace( '_WW', '_' + str(i + 1)) bkg_sel_string = bkg_sel_string.replace( '_XX', '_' + str(j + 1)) bkg_sel_string = bkg_sel_string.replace( '_YY', '_' + str(k + 1)) ## Three place-holders if len(config['distributions'][distribution] ['num_bkg_vars']) == 3: if i < j and j < k: bkg_sel_strings[var][ bkg_var_new] = bkg_sel_string ## Four place-holders ('WW','XX','YY','ZZ') else: for l in range( config['distributions'] [distribution]['num_bkg_vars'][3]): if i < j and j < k and k < l: bkg_var_new = bkg_var.replace( '_WW', '_' + str(i + 1)) bkg_var_new = bkg_var_new.replace( '_XX', '_' + str(j + 1)) bkg_var_new = bkg_var_new.replace( '_YY', '_' + str(k + 1)) bkg_var_new = bkg_var_new.replace( '_ZZ', '_' + str(l + 1)) bkg_sel_string = bkg_sel_string_temp.draw_string.replace( '_WW', '_' + str(i + 1)) bkg_sel_string = bkg_sel_string.replace( '_XX', '_' + str(j + 1)) bkg_sel_string = bkg_sel_string.replace( '_YY', '_' + str(k + 1)) bkg_sel_string = bkg_sel_string.replace( '_ZZ', '_' + str(l + 1)) bkg_sel_strings[var][ bkg_var_new] = bkg_sel_string ## Draw the signal histograms canvas = ROOT.TCanvas('hist ' + distribution + cat_str, 'hist ' + distribution + cat_str) hist = ROOT.TH1D('hist_' + distribution + cat_str, 'hist_' + distribution + cat_str, nBins, xMin, xMax) ## Draw first variable tree.Draw(vars[0] + ' >> hist_' + distribution + cat_str, sel_strings[vars[0]].draw_string, '') for var in vars: ## Draw additional variables if they exist if not var == vars[0]: tree.Draw(var + ' >>+ hist_' + distribution + cat_str, sel_strings[var].draw_string, '') ## Draw the background histograms bkg_vars_new = {} ## Fill array of background variable names place-holders replaced with numbers for var in vars: bkg_vars_new[var] = bkg_sel_strings[var].keys() bkg_canvas = ROOT.TCanvas('bkg_hist ' + distribution + cat_str, 'bkg hist ' + distribution + cat_str) bkg_hist = ROOT.TH1D('bkg_hist_' + distribution + cat_str, 'bkg_hist_' + distribution + cat_str, nBins, xMin, xMax) ## Draw for first signal variable and first background variable tree.Draw( bkg_vars_new[vars[0]][0] + ' >> bkg_hist_' + distribution + cat_str, bkg_sel_strings[var][bkg_vars_new[vars[0]][0]], '') for var in vars: for bkg_var in bkg_vars_new[var]: ## Draw for other signal variables and other background variables (why both?) if not (var == vars[0] and bkg_var == bkg_vars_new[var][0]): tree.Draw(bkg_var + ' >>+ bkg_hist_' + distribution + cat_str, bkg_sel_strings[var][bkg_var], '') # ## Subtract signal hist from background hist (best not to do this - divide by 0 errors) # bkg_hist.Add(hist, -1) ## Draw the ratio histogram ratio_canvas = ROOT.TCanvas('ratio ' + distribution + cat_str, 'ratio ' + distribution + cat_str) ratio_hist = ROOT.TH1D('ratio_' + distribution + cat_str, 'ratio_' + distribution + cat_str, nBins, xMin, xMax) ## Fill each bin with sig / bkg for i in range(nBins): ratio_hist.SetBinContent( i + 1, hist.GetBinContent(i + 1) / bkg_hist.GetBinContent(i + 1)) ## Get the integral of the signal histogram integral = hist.Integral(1, nBins) if config['distributions'][distribution]['underflow']: ratio_hist.SetBinContent(1, hist.Integral(0, 1) / bkg_hist.Integral(0, 1)) integral = hist.Integral(0, nBins) if config['distributions'][distribution]['overflow']: ratio_hist.SetBinContent( nBins, hist.Integral(nBins, nBins + 1) / bkg_hist.Integral(nBins, nBins + 1)) integral = hist.Integral(1, nBins + 1) if config['distributions'][distribution]['underflow']: integral = hist.Integral(0, nBins + 1) ## Get the integral weighted by the ratio value for each bin weighted_integral = 0 for i in range(nBins): if i + 1 == 1 and config['distributions'][distribution]['underflow']: weighted_integral += ratio_hist.GetBinContent(i + 1) * hist.Integral( 0, i + 1) elif i + 1 == nBins and config['distributions'][distribution][ 'overflow']: weighted_integral += ratio_hist.GetBinContent(i + 1) * hist.Integral( i + 1, nBins + 1) else: weighted_integral += ratio_hist.GetBinContent( i + 1) * hist.GetBinContent(i + 1) ## Scale the ratio histogram by the weighted_integral for signal, ## so the average ratio value for a correct permutation (signal) is 1 ratio_hist.Scale(integral / weighted_integral) hist.SetDirectory(output_file) hist.Write() bkg_hist.SetDirectory(output_file) bkg_hist.Write() ratio_hist.SetDirectory(output_file) ratio_hist.Write() del hist del canvas del bkg_hist del bkg_canvas del ratio_hist del ratio_canvas
def draw_corrs(args, config, distribution, tree, output_file, cat): ## Which distributions are dependent on which ## Only set up for single dependencies (dependencies[0]) right now dependencies = config['distributions'][distribution].get( 'dependencies', {}) if len(dependencies) == 0: print distribution + ' ' + cat + ' has no dependencies' return else: print distribution + ' ' + cat + ' depends on ' + dependencies[ 0] + ' ' + cat ## Parameters for the dependent variable nBins = config['distributions'][distribution]['num bins'] xMin = config['distributions'][distribution]['min'] xMax = config['distributions'][distribution]['max'] bin_width = (xMax - xMin) / nBins vars = config['distributions'][distribution]['variables'] ## Parameters for the independent variable nBins_indep = config['distributions'][dependencies[0]]['num bins'] xMin_indep = config['distributions'][dependencies[0]]['min'] xMax_indep = config['distributions'][dependencies[0]]['max'] vars_indep = config['distributions'][dependencies[0]]['variables'] base_cuts = config.get('common cuts', {}).values() dist_cuts = config['distributions'][distribution].get('cuts', {}).values() if cat == '': cat_cut = config['category cuts']['none'] cat_str = '' else: cat_cut = config['category cuts'][cat] cat_str = '_' + cat ## Number of bins for the correlation histogram nBins_corr = config['distributions'][distribution]['num corr bins'] bin_width_corr = (xMax - xMin) / nBins_corr corr = ROOT.TH1D('corr_' + distribution + '_' + dependencies[0] + cat_str, 'corr_' + distribution + '_' + dependencies[0] + cat_str, nBins_corr, xMin, xMax) ## Get the ratio histograms for the dependent and independent distributions ratio_dist = output_file.Get('ratio_' + distribution + cat_str) ratio_indep = output_file.Get('ratio_' + dependencies[0] + cat_str) for i in range(nBins_corr): ## Initialize average ratio values to 0 avg_ratio_dist = 0 avg_ratio_indep = 0 ## Make temporary histograms for this correlation bin hist_indep = ROOT.TH1D('hist_indep_' + dependencies[0] + cat_str, 'hist_indep_' + dependencies[0] + cat_str, nBins_indep, xMin_indep, xMax_indep) hist_dist = ROOT.TH1D('hist_dist_' + dependencies[0] + cat_str, 'hist_dist_' + dependencies[0] + cat_str, nBins, xMin, xMax) ## Loop over variables for independent distribution for var_indep in vars_indep: sel_string = plot_helper.DrawStringMaker() sel_string.append_selection_requirements(base_cuts, dist_cuts, cat_cut) if var_indep in config['special cuts'].keys(): sel_string.append_selection_requirements( config['special cuts'].get(var_indep, {}).values()) ## Loop over variables for dependent distribution for var in vars: ## Single selection string for both independent and dependent distributions sel_string_var = sel_string if var in config['special cuts'].keys(): sel_string_var.append_selection_requirements( config['special cuts'].get(var, {}).values()) ## Specify this correlation bin in the cuts if i == 0: var_bin_cut = '%s < %d' % (var, xMin + (i + 1) * bin_width_corr) elif i == nBins_corr - 1: var_bin_cut = '%s > %d' % (var, xMin + i * bin_width_corr) else: var_bin_cut = '%s > %d && %s < %d' % ( var, xMin + i * bin_width_corr, var, xMin + (i + 1) * bin_width_corr) sel_string_var.append_selection_requirement(var_bin_cut) ## Draw independent and dependent histograms for this correlation bin if var == vars[0]: tree.Draw( var_indep + ' >> hist_indep_' + dependencies[0] + cat_str, sel_string_var.draw_string, '') tree.Draw( var + ' >> hist_dist_' + dependencies[0] + cat_str, sel_string_var.draw_string, '') else: tree.Draw( var_indep + ' >>+ hist_indep_' + dependencies[0] + cat_str, sel_string_var.draw_string, '') tree.Draw( var + ' >>+ hist_dist_' + dependencies[0] + cat_str, sel_string_var.draw_string, '') ## Get average ratio for independent and dependent distributions in this correlation bin for j in range(nBins_indep): avg_ratio_indep += hist_indep.GetBinContent( j + 1) * ratio_indep.GetBinContent(j + 1) for j in range(nBins): avg_ratio_dist += hist_dist.GetBinContent( j + 1) * ratio_dist.GetBinContent(j + 1) if hist_indep.Integral() == 0: avg_ratio_indep = 1.0 else: avg_ratio_indep = avg_ratio_indep / hist_indep.Integral() if hist_dist.Integral() == 0: avg_ratio_dist = 1.0 else: avg_ratio_dist = avg_ratio_dist / hist_dist.Integral() ## Set correlation value for this bin to 1/(average ratio for independent distribution in this correlation bin) corr.SetBinContent(i + 1, 1 / avg_ratio_indep) ## If dependent variable is 'over-correlated', set correlation to 1 if abs(1 - avg_ratio_indep) > abs(1 - avg_ratio_dist): print 'In bin %d, ratio_indep = %f and ratio_dist = %f' % ( i + 1, avg_ratio_indep, avg_ratio_dist) corr.SetBinContent(i + 1, 1.0) del hist_indep del hist_dist ## Get integral of the dependent distribution hist = output_file.Get('hist_' + distribution + cat_str) integral = hist.Integral(1, nBins) if config['distributions'][distribution]['underflow']: integral = hist.Integral(0, nBins) if config['distributions'][distribution]['overflow']: integral = hist.Integral(1, nBins + 1) if config['distributions'][distribution]['underflow']: integral = hist.Integral(0, nBins + 1) ## Get integral of the dependent distribution weighted by the correlation value weighted_integral = 0 for i in range(nBins): if i + 1 == 1 and config['distributions'][distribution]['underflow']: weighted_integral += corr.GetBinContent(corr.GetXaxis().FindBin( xMin + (i + 0.5) * bin_width)) * hist.Integral(0, i + 1) elif i + 1 == nBins: weighted_integral += corr.GetBinContent(corr.GetXaxis().FindBin( xMin + (i + 0.5) * bin_width)) * hist.Integral(i + 1, nBins + 1) else: weighted_integral += corr.GetBinContent(corr.GetXaxis().FindBin( xMin + (i + 0.5) * bin_width)) * hist.GetBinContent(i + 1) ## Scale the correlation histogram so that the average correlation value is 1 corr.Scale(integral / weighted_integral) canvas = ROOT.TCanvas( 'corr ' + distribution + ' to ' + dependencies[0] + cat_str, 'corr ' + distribution + ' to ' + dependencies[0] + cat_str) corr.SetDirectory(output_file) corr.Write() del hist del ratio_indep del corr del canvas
def compute_num_denom(bin, lep_cat, iLep, lepStr, base_cut, WP_tag, WP_probe, FR_array, FR_hist_tight_mu, FR_hist_tight_ele, config): #print 'Inside compute_num_denom' this_base_cut = copy.deepcopy(base_cut) this_num_denom = 0.0 if FR_array[bin][0] == 1 and lep_cat == 'ele_ele': return 0.0 if FR_array[bin][0] == 0 and lep_cat == 'mu_mu': return 0.0 #this_base_cut.append(lepStr+str(iLep+1)+'_lepMVA > '+str(WP_probe)) this_base_cut.append(lepStr+str(iLep+1)+'_lepCut >= '+str(WP_probe)) this_base_cut.append(lepStr+str(iLep+1)+'_isMuon == '+str(FR_array[bin][0])) this_base_cut.append(lepStr+str(iLep+1)+'_pt > '+str(FR_array[bin][1])) this_base_cut.append(lepStr+str(iLep+1)+'_pt < '+str(FR_array[bin][2])) this_base_cut.append('abs('+lepStr+str(iLep+1)+'_eta) > '+str(FR_array[bin][3])) this_base_cut.append('abs('+lepStr+str(iLep+1)+'_eta) < '+str(FR_array[bin][4])) this_base_cut.append(lepStr+str(iLep+1)+'_jetBTagCSV > '+str(FR_array[bin][5])) this_base_cut.append(lepStr+str(iLep+1)+'_jetBTagCSV < '+str(FR_array[bin][6])) #data_cut = [lepStr+str(abs(iLep-1)+1)+'_lepMVA > '+str(WP_tag)] #MC_cut = [lepStr+str(abs(iLep-1)+1)+'_lepMVA > '+str(WP_tag)] #QF_cut = [lepStr+str(abs(iLep-1)+1)+'_lepMVA > '+str(WP_tag)] #NP_cut = [lepStr+str(abs(iLep-1)+1)+'_lepMVA < '+str(WP_tag)] data_cut = [lepStr+str(abs(iLep-1)+1)+'_lepCut >= '+str(WP_tag)] MC_cut = [lepStr+str(abs(iLep-1)+1)+'_lepCut >= '+str(WP_tag)] QF_cut = [lepStr+str(abs(iLep-1)+1)+'_lepCut >= '+str(WP_tag)] NP_cut = [lepStr+str(abs(iLep-1)+1)+'_lepCut < '+str(WP_tag)] these_integrals = {} pool = multiprocessing.Pool(processes=10) work = [] for sample in config['samples']: sample_dict = config['samples'][sample] if config['samples'][sample] else {'systematics':['common'], 'weights':['common']} tree_sample = sample_dict.get('tree sample', sample) additional_cuts = sample_dict.get('additional cuts', []) cuts_to_remove = sample_dict.get('cuts to remove', []) sample_info = plot_helper.SampleInformation(tree_sample) if lep_cat == 'mu_mu' or lep_cat == 'mu_ele' or lep_cat == 'ele_ele': source_file_name = '%s/%s_%s_all.root' % (config['input_trees_directory'], tree_sample, config['SS label']) elif lep_cat == '3l': source_file_name = '%s/%s_%s_all.root' % (config['input_trees_directory'], tree_sample, config['3l label']) source_file = ROOT.TFile(source_file_name) tree = source_file.Get('summaryTree') draw_string_maker = plot_helper.DrawStringMaker() if sample in config['lepton categories'][lep_cat]['data samples']: draw_string_maker.append_selection_requirements(this_base_cut, data_cut) elif sample_info.sample_type == 'MC': draw_string_maker.append_selection_requirements(this_base_cut, MC_cut) elif 'QF_sideband' in sample and sample.replace('_QF_sideband','') in config['lepton categories'][lep_cat]['data samples']: draw_string_maker.append_selection_requirements(this_base_cut, QF_cut) elif 'NP_sideband' in sample and sample.replace('_NP_sideband','') in config['lepton categories'][lep_cat]['data samples']: draw_string_maker.append_selection_requirements(this_base_cut, NP_cut) else: #print sample+' not included in '+lep_cat+' num_denom' continue draw_string_maker.remove_selection_requirements(cuts_to_remove) draw_string_maker.append_selection_requirements(additional_cuts) ## Are the QF weights implemented properly? weights = plot_helper.customize_list(config['weights'], sample_dict.get('weights', ['common'])) weights_cat = ['1.0'] if (lep_cat == 'mu_mu' or lep_cat == 'mu_ele' or lep_cat == 'ele_ele') and sample_info.sample_type == 'MC': weights_cat = config['weights SS'] elif lep_cat == '3l' and sample_info.sample_type == 'MC': weights_cat = config['weights 3l'] if sample_info.sample_type == 'MC' and 'triggerSF' in weights: matched_SF = draw_string_maker.get_matched_SF(lep_cat) weights = [matched_SF if x=='triggerSF' else x for x in weights] #draw_string_maker.multiply_by_factors(weights, [systematic_weight_string]) draw_string_maker.multiply_by_factors(weights, weights_cat) if sample_info.sample_type not in ['MC', 'data'] and 'sideband' not in sample_info.sample_type: sys.exit('Invalid sample_type must be data, sideband, or MC' % (sample_info.sample_type)) #this_plot_integral = 0.0 if 'NP_sideband' in sample: #print sample for isMuon in range(2): if isMuon == 1: for ptBin in range(FR_hist_tight_mu.GetNbinsX()): for etaBin in range(FR_hist_tight_mu.GetNbinsY()): for csvBin in range(FR_hist_tight_mu.GetNbinsZ()): tag_lepton_bin_cut = [lepStr+str(abs(iLep-1)+1)+'_isMuon == '+str(isMuon)] tag_lepton_bin_cut[0] += (' && '+lepStr+str(abs(iLep-1)+1)+'_pt > '+str(FR_hist_tight_mu.GetXaxis().GetBinLowEdge(ptBin+1))) tag_lepton_bin_cut[0] += (' && '+lepStr+str(abs(iLep-1)+1)+'_pt < '+str(FR_hist_tight_mu.GetXaxis().GetBinLowEdge(ptBin+1)+FR_hist_tight_mu.GetXaxis().GetBinWidth(ptBin+1))) tag_lepton_bin_cut[0] += (' && abs('+lepStr+str(abs(iLep-1)+1)+'_eta) > '+str(FR_hist_tight_mu.GetYaxis().GetBinLowEdge(etaBin+1))) tag_lepton_bin_cut[0] += (' && abs('+lepStr+str(abs(iLep-1)+1)+'_eta) < '+str(FR_hist_tight_mu.GetYaxis().GetBinLowEdge(etaBin+1)+FR_hist_tight_mu.GetYaxis().GetBinWidth(etaBin+1))) tag_lepton_bin_cut[0] += (' && '+lepStr+str(abs(iLep-1)+1)+'_jetBTagCSV > '+str(FR_hist_tight_mu.GetZaxis().GetBinLowEdge(csvBin+1))) tag_lepton_bin_cut[0] += (' && '+lepStr+str(abs(iLep-1)+1)+'_jetBTagCSV < '+str(FR_hist_tight_mu.GetZaxis().GetBinLowEdge(csvBin+1)+FR_hist_tight_mu.GetZaxis().GetBinWidth(csvBin+1))) #print tag_lepton_bin_cut #print FR_hist_tight_mu.GetBinContent(ptBin+1, etaBin+1) draw_string_maker.append_selection_requirements(tag_lepton_bin_cut) these_integrals['%s_%d_%d_%d_%d' % (sample, isMuon, ptBin, etaBin, csvBin)] = -99.0 # output_file = ROOT.TFile('temp_'+str(FR_hist_tight_mu.GetNbinsX())+'_'+str(FR_hist_tight_mu.GetNbinsY())+'_'+str(FR_hist_tight_mu.GetNbinsZ())+'.root', 'RECREATE') # plot = plot_helper.Plot(sample, output_file, tree, 'distribution', config['distribution'], draw_string_maker.draw_string) # #print draw_string_maker.draw_string # #print plot.plot.Integral() # this_plot_integral += plot.plot.Integral()*FR_hist_tight_mu.GetBinContent(ptBin+1, etaBin+1, csvBin+1) thread_index = FR_hist_tight_mu.GetNbinsX()*100 + FR_hist_tight_mu.GetNbinsY()*10 + FR_hist_tight_mu.GetNbinsZ() scale = FR_hist_tight_mu.GetBinContent(ptBin+1, etaBin+1, csvBin+1) #these_integrals['%s_%d_%d_%d_%d' % (sample, isMuon, ptBin, etaBin, csvBin)] = get_one_integral(thread_index, sample, tree, config['distribution'], draw_string_maker.draw_string, scale) work.append( ('%s_%d_%d_%d_%d' % (sample, isMuon, ptBin, etaBin, csvBin), pool.apply_async(get_one_integral, [thread_index, sample, config['distribution'], draw_string_maker.draw_string, scale, source_file_name]) ) ) draw_string_maker.remove_selection_requirements(tag_lepton_bin_cut) # output_file.Close() else: for ptBin in range(FR_hist_tight_ele.GetNbinsX()): for etaBin in range(FR_hist_tight_ele.GetNbinsY()): for csvBin in range(FR_hist_tight_ele.GetNbinsZ()): tag_lepton_bin_cut = [lepStr+str(abs(iLep-1)+1)+'_isMuon == '+str(isMuon)] tag_lepton_bin_cut[0] += (' && '+lepStr+str(abs(iLep-1)+1)+'_pt > '+str(FR_hist_tight_ele.GetXaxis().GetBinLowEdge(ptBin+1))) tag_lepton_bin_cut[0] += (' && '+lepStr+str(abs(iLep-1)+1)+'_pt < '+str(FR_hist_tight_ele.GetXaxis().GetBinLowEdge(ptBin+1)+FR_hist_tight_ele.GetXaxis().GetBinWidth(ptBin+1))) tag_lepton_bin_cut[0] += (' && abs('+lepStr+str(abs(iLep-1)+1)+'_eta) > '+str(FR_hist_tight_ele.GetYaxis().GetBinLowEdge(etaBin+1))) tag_lepton_bin_cut[0] += (' && abs('+lepStr+str(abs(iLep-1)+1)+'_eta) < '+str(FR_hist_tight_ele.GetYaxis().GetBinLowEdge(etaBin+1)+FR_hist_tight_ele.GetYaxis().GetBinWidth(etaBin+1))) tag_lepton_bin_cut[0] += (' && '+lepStr+str(abs(iLep-1)+1)+'_jetBTagCSV > '+str(FR_hist_tight_ele.GetZaxis().GetBinLowEdge(csvBin+1))) tag_lepton_bin_cut[0] += (' && '+lepStr+str(abs(iLep-1)+1)+'_jetBTagCSV < '+str(FR_hist_tight_ele.GetZaxis().GetBinLowEdge(csvBin+1)+FR_hist_tight_ele.GetZaxis().GetBinWidth(csvBin+1))) #print tag_lepton_bin_cut #print FR_hist_tight_ele.GetBinContent(ptBin+1, etaBin+1) draw_string_maker.append_selection_requirements(tag_lepton_bin_cut) these_integrals['%s_%d_%d_%d_%d' % (sample, isMuon, ptBin, etaBin, csvBin)] = -99.0 # output_file = ROOT.TFile('temp_'+str(FR_hist_tight_mu.GetNbinsX())+'_'+str(FR_hist_tight_mu.GetNbinsY())+'_'+str(FR_hist_tight_mu.GetNbinsZ())+'.root', 'RECREATE') # plot = plot_helper.Plot(sample, output_file, tree, 'distribution', config['distribution'], draw_string_maker.draw_string) # #print draw_string_maker.draw_string # #print plot.plot.Integral() # this_plot_integral += plot.plot.Integral()*FR_hist_tight_ele.GetBinContent(ptBin+1, etaBin+1, csvBin+1) thread_index = FR_hist_tight_ele.GetNbinsX()*100 + FR_hist_tight_ele.GetNbinsY()*10 + FR_hist_tight_ele.GetNbinsZ() scale = FR_hist_tight_ele.GetBinContent(ptBin+1, etaBin+1, csvBin+1) #these_integrals['%s_%d_%d_%d_%d' % (sample, isMuon, ptBin, etaBin, csvBin)] = get_one_integral(thread_index, sample, tree, config['distribution'], draw_string_maker.draw_string, scale) work.append( ('%s_%d_%d_%d_%d' % (sample, isMuon, ptBin, etaBin, csvBin), pool.apply_async(get_one_integral, [thread_index, sample, config['distribution'], draw_string_maker.draw_string, scale, source_file_name]) ) ) draw_string_maker.remove_selection_requirements(tag_lepton_bin_cut) # output_file.Close() else: these_integrals[sample] = -99.0 # output_file = ROOT.TFile('temp_'+str(FR_hist_tight_mu.GetNbinsX())+'_'+str(FR_hist_tight_mu.GetNbinsY())+'_'+str(FR_hist_tight_mu.GetNbinsZ())+'.root', 'RECREATE') # plot = plot_helper.Plot(sample, output_file, tree, 'distribution', config['distribution'], draw_string_maker.draw_string) # #if sample in config['lepton categories'][lep_cat]['data samples'] and plot.plot.Integral() == 0: # #print draw_string_maker.draw_string # #print draw_string_maker.draw_string # if sample_info.sample_type == 'MC': # plot.plot.Scale(sample_info.x_section * config['luminosity'] / sample_info.num_generated) # this_plot_integral = plot.plot.Integral() thread_index = FR_hist_tight_mu.GetNbinsX()*100 + FR_hist_tight_mu.GetNbinsY()*10 + FR_hist_tight_mu.GetNbinsZ() scale = 1.0 if sample_info.sample_type == 'MC': scale = sample_info.x_section * config['luminosity'] / sample_info.num_generated #these_integrals[sample] = get_one_integral(thread_index, sample, tree, config['distribution'], draw_string_maker.draw_string, scale) work.append( (sample, pool.apply_async(get_one_integral, [thread_index, sample, config['distribution'], draw_string_maker.draw_string, scale, source_file_name]) ) ) #for (id, res) in work: #these_integrals[id] = res.get() #print 'id %s in work yields %f' % (id, these_integrals[id]) # output_file.Close() #print sample+' integral for '+lep_cat+', '+bin+', lepton '+str(iLep+1)+' = '+str(this_plot_integral) # if sample in config['lepton categories'][lep_cat]['data samples']: # this_num_denom += this_plot_integral # else: # this_num_denom -= this_plot_integral for (id, res) in work: these_integrals[id] = res.get() #pool.join() #pool.close() for sample in these_integrals: #print '%s integral is %f' % (sample, these_integrals[sample]) if sample in config['lepton categories'][lep_cat]['data samples']: this_num_denom += these_integrals[sample] else: this_num_denom -= these_integrals[sample] print 'num_denom for '+lep_cat+', WP_probe '+str(WP_probe)+', '+bin+', lepton '+str(iLep+1)+' = '+str(this_num_denom) return this_num_denom
def make_histos(args, config, samples, lepton_categories, jet_tag_categories): for sample, sample_dict in samples.items(): tree_sample = sample_dict.get('tree sample', sample) additional_cuts = sample_dict.get('additional cuts', []) cuts_to_remove = sample_dict.get('cuts to remove', []) sample_info = plot_helper.SampleInformation(tree_sample) for lepton_category in lepton_categories: lepton_category_cut_strings = config['lepton categories'][ lepton_category].get('cuts', {}).values() if sample_info.sample_type == 'data' or 'sideband' in sample_info.sample_type: if any([ x == sample for x in config['lepton categories'] [lepton_category].get('excluded samples', []) ]): config['weights'].append( '0' ) #So we get empty histograms for hadding to get the inclusive category #continue if not plot_helper.is_matching_data_sample( config['lepton categories'][lepton_category] ['data samples'], sample): continue for jet_tag_category, jet_tag_category_cut_strings in jet_tag_categories.items( ): systematics_list = plot_helper.customize_systematics( config['systematics'], sample_dict.get('systematics', 'common')) output_file_name = '%s/%s/%s_%s_%s_%s.root' % ( config['output directory'], lepton_category, lepton_category, jet_tag_category, sample, config['output label']) if args.limits: output_file_name = '%s/%s/%s_%s_%s_%s.root' % ( config['limits output directory'], lepton_category, lepton_category, jet_tag_category, sample, config['output label']) if config['limits skip systematics']: systematics_list = ['nominal'] elif config['skip systematics']: systematics_list = ['nominal'] output_file = ROOT.TFile(output_file_name, 'RECREATE') for systematic in systematics_list: print 'Beginning next loop iteration. Sample: %10s Jet tag category: %-10s Lepton category: %-10s Systematic: %-10s' % ( sample, jet_tag_category, lepton_category, systematic) systematic_weight_string, systematic_label = plot_helper.get_systematic_info( systematic) source_file_name = '%s/%s_%s_all.root' % ( config['input_trees_directory'], tree_sample, config['label']) if 'JES' in systematic or 'JER' in systematic: source_file_name = '%s/%s_%s_%s_all.root' % ( config['input_trees_directory'], tree_sample, config['label'], systematic) if args.file: source_file_name = args.file source_file = ROOT.TFile(source_file_name) tree = source_file.Get('summaryTree') draw_string_maker = plot_helper.DrawStringMaker() draw_string_maker.append_selection_requirements( config['common cuts'].values(), lepton_category_cut_strings, jet_tag_category_cut_strings, additional_cuts) #additional_cuts is empty by default draw_string_maker.remove_selection_requirements( cuts_to_remove) if not args.no_weights: weights = plot_helper.customize_list( config['weights'], sample_dict.get('weights', ['common'])) if sample_info.sample_type == 'MC' and 'triggerSF' in weights: matched_SF = draw_string_maker.get_matched_SF( lepton_category) weights = [ matched_SF if x == 'triggerSF' else x for x in weights ] draw_string_maker.multiply_by_factors( weights, [systematic_weight_string]) if sample_info.sample_type not in [ 'MC', 'data' ] and 'sideband' not in sample_info.sample_type: sys.exit( 'Invalid sample_type must be data, sideband, or MC' % (sample_info.sample_type)) config = plot_helper.append_integral_histo(config) distribution_items = config['distributions'].items() if args.limits: distribution_items = config[ 'limits distributions'].items() for distribution, parameters in distribution_items: if sample not in parameters.get('samples', [sample]): continue draw_string_maker.remove_selection_requirements( parameters.get('cuts to remove', [])) draw_string_maker.append_selection_requirements( parameters.get('additional cuts', [])) plot_name = '%s%s' % (distribution, systematic_label) plot = plot_helper.Plot(sample, output_file, tree, plot_name, parameters, draw_string_maker.draw_string) if sample_info.sample_type == 'MC': plot.plot.Scale(sample_info.x_section * config['luminosity'] / sample_info.num_generated) output_file.Write() if args.pdf: plot.save_image('pdf') if args.web: plot.post_to_web(config, lepton_category) source_file.Close() #end systematic config_file = ROOT.TObjString(args.config_file_name) output_file.cd() config_file.Write('config_file') output_file.Close() #end jet tag category
def draw_corrs(args, config, distribution, tree, output_file): dependencies = config['distributions'][distribution].get( 'dependencies', {}) if len(dependencies) == 0: print distribution + ' has no dependencies' return else: print distribution + ' depends on ' + dependencies[0] nBins = config['num bins'] xMin = config['distributions'][distribution]['min'] xMax = config['distributions'][distribution]['max'] vars = config['distributions'][distribution]['variables'] xMin_dep = config['distributions'][dependencies[0]]['min'] xMax_dep = config['distributions'][dependencies[0]]['max'] vars_dep = config['distributions'][dependencies[0]]['variables'] base_cuts = config.get('common cuts', {}).values() dist_cuts = config['distributions'][distribution].get('cuts', {}).values() dist_cuts_dep = config['distributions'][dependencies[0]].get('cuts', {}).values() nBins_corr = config['num corr bins'] bin_width_corr = (xMax - xMin) / nBins_corr corr = ROOT.TH1D('corr_' + distribution + '_' + dependencies[0], 'corr_' + distribution + '_' + dependencies[0], nBins_corr, xMin, xMax) prob_dep = output_file.Get('prob_' + dependencies[0]) for i in range(nBins_corr): avg_prob_dep = 0 hist_dep = ROOT.TH1D('hist_dep_' + dependencies[0], 'hist_dep_' + dependencies[0], nBins, xMin_dep, xMax_dep) iVar = 0 for var_dep in vars_dep: sel_string = plot_helper.DrawStringMaker() if var_dep in config['special cuts'].keys(): sel_string.append_selection_requirements( base_cuts, dist_cuts, config['special cuts'].get(var_dep, {}).values()) else: sel_string.append_selection_requirements(base_cuts, dist_cuts) if vars[iVar] in config['special cuts'].keys(): sel_string.append_selection_requirements( config['special cuts'].get(vars[iVar], {}).values()) if i == 0: var_bin_cut = '%s < %d' % (vars[iVar], xMin + (i + 1) * bin_width_corr) elif i == nBins - 1: var_bin_cut = '%s > %d' % (vars[iVar], xMin + i * bin_width_corr) else: var_bin_cut = '%s > %d && %s < %d' % ( vars[iVar], xMin + i * bin_width_corr, vars[iVar], xMin + (i + 1) * bin_width_corr) sel_string.append_selection_requirement(var_bin_cut) #print sel_string.draw_string if iVar == 0: tree.Draw(var_dep + ' >> hist_dep_' + dependencies[0], sel_string.draw_string, '') else: tree.Draw(var_dep + ' >>+ hist_dep_' + dependencies[0], sel_string.draw_string, '') if len(vars) > 1: iVar += 1 for j in range(nBins): avg_prob_dep += hist_dep.GetBinContent( j + 1) * prob_dep.GetBinContent(j + 1) #print hist_dep.Integral() if hist_dep.Integral() == 0: avg_prob_dep = 1.0 else: avg_prob_dep = avg_prob_dep / hist_dep.Integral() corr.SetBinContent(i + 1, avg_prob_dep) del hist_dep canvas = ROOT.TCanvas('corr ' + distribution + ' to ' + dependencies[0], 'corr ' + distribution + ' to ' + dependencies[0]) corr.SetDirectory(output_file) corr.Write() del prob_dep del corr del canvas