def getCleanedShape(self, sample): subtract = copy.copy(self.histograms.keys()) subtract.remove(sample) subtract.remove('data') hist = hu.clean_control_region(self.histograms, data_label='data', subtract=subtract, fix_to_zero=True) return hist
def plot_fit_variable( histograms, fit_variable, variable, bin_range, fit_variable_distribution, qcd_fit_variable_distribution, title, save_path ): global fit_variable_properties, b_tag_bin, save_as, b_tag_bin_ctl mc_uncertainty = 0.10 prepare_histograms( histograms, rebin = fit_variable_properties[fit_variable]['rebin'], scale_factor = measurement_config.luminosity_scale ) histogram_properties = Histogram_properties() histogram_properties.x_axis_title = fit_variable_properties[fit_variable]['x-title'] histogram_properties.y_axis_title = fit_variable_properties[fit_variable]['y-title'] histogram_properties.x_limits = [fit_variable_properties[fit_variable]['min'], fit_variable_properties[fit_variable]['max']] histogram_lables = ['data', 'QCD', 'V+Jets', 'Single-Top', samples_latex['TTJet']] histogram_colors = ['black', 'yellow', 'green', 'magenta', 'red'] # qcd_from_data = histograms['data'][qcd_fit_variable_distribution].Clone() # clean against other processes histograms_for_cleaning = {'data':histograms['data'][qcd_fit_variable_distribution], 'V+Jets':histograms['V+Jets'][qcd_fit_variable_distribution], 'SingleTop':histograms['SingleTop'][qcd_fit_variable_distribution], 'TTJet':histograms['TTJet'][qcd_fit_variable_distribution]} qcd_from_data = clean_control_region( histograms_for_cleaning, subtract = ['TTJet', 'V+Jets', 'SingleTop'] ) histograms_to_draw = [histograms['data'][qcd_fit_variable_distribution], histograms['QCD'][qcd_fit_variable_distribution], histograms['V+Jets'][qcd_fit_variable_distribution], histograms['SingleTop'][qcd_fit_variable_distribution], histograms['TTJet'][qcd_fit_variable_distribution]] histogram_properties.title = title + ', ' + b_tag_bins_latex[b_tag_bin_ctl] histogram_properties.name = variable + '_' + bin_range + '_' + fit_variable + '_%s_QCDConversions' % b_tag_bin_ctl make_data_mc_comparison_plot( histograms_to_draw, histogram_lables, histogram_colors, histogram_properties, save_folder = save_path + '/qcd/', show_ratio = False, save_as = save_as, ) histograms_to_draw = [qcd_from_data, histograms['QCD'][qcd_fit_variable_distribution], ] histogram_properties.name = variable + '_' + bin_range + '_' + fit_variable + '_%s_QCDConversions_subtracted' % b_tag_bin_ctl make_data_mc_comparison_plot( histograms_to_draw, histogram_lables = ['data', 'QCD'], histogram_colors = ['black', 'yellow'], histogram_properties = histogram_properties, save_folder = save_path + '/qcd/', show_ratio = False, save_as = save_as, ) # scale QCD to predicted n_qcd_predicted_mc = histograms['QCD'][fit_variable_distribution].Integral() n_qcd_fit_variable_distribution = qcd_from_data.Integral() if not n_qcd_fit_variable_distribution == 0: qcd_from_data.Scale( 1.0 / n_qcd_fit_variable_distribution * n_qcd_predicted_mc ) histograms_to_draw = [histograms['data'][fit_variable_distribution], qcd_from_data, histograms['V+Jets'][fit_variable_distribution], histograms['SingleTop'][fit_variable_distribution], histograms['TTJet'][fit_variable_distribution]] histogram_properties.title = title + ', ' + b_tag_bins_latex[b_tag_bin] histogram_properties.name = variable + '_' + bin_range + '_' + fit_variable + '_' + b_tag_bin make_data_mc_comparison_plot( histograms_to_draw, histogram_lables, histogram_colors, histogram_properties, save_folder = save_path, show_ratio = False, save_as = save_as, ) histogram_properties.mc_error = mc_uncertainty histogram_properties.mc_errors_label = '$\mathrm{t}\\bar{\mathrm{t}}$ uncertainty' histogram_properties.name = variable + '_' + bin_range + '_' + fit_variable + '_' + b_tag_bin + '_templates' # change histogram order for better visibility histograms_to_draw = [histograms['TTJet'][fit_variable_distribution] + histograms['SingleTop'][fit_variable_distribution], histograms['TTJet'][fit_variable_distribution], histograms['SingleTop'][fit_variable_distribution], histograms['V+Jets'][fit_variable_distribution], qcd_from_data] histogram_lables = ['QCD', 'V+Jets', 'Single-Top', samples_latex['TTJet'], samples_latex['TTJet'] + ' + ' + 'Single-Top'] histogram_lables.reverse() # change QCD color to orange for better visibility histogram_colors = ['orange', 'green', 'magenta', 'red', 'black'] histogram_colors.reverse() make_shape_comparison_plot( shapes = histograms_to_draw, names = histogram_lables, colours = histogram_colors, histogram_properties = histogram_properties, fill_area = False, alpha = 1, save_folder = save_path, save_as = save_as, )
def compare_qcd_control_regions( variable = 'MET', met_type = 'patType1CorrectedPFMet', title = 'Untitled'): ''' Compares the templates from the control regions in different bins of the current variable''' global fit_variable_properties, b_tag_bin, save_as, b_tag_bin_ctl variable_bins = variable_bins_ROOT[variable] histogram_template = get_histogram_template( variable ) for fit_variable in electron_fit_variables: all_hists = {} inclusive_hist = None if '_bl' in fit_variable: b_tag_bin_ctl = '1orMoreBtag' else: b_tag_bin_ctl = '0orMoreBtag' save_path = 'plots/fit_variables/%dTeV/%s/%s/' % (measurement_config.centre_of_mass_energy, variable, fit_variable) make_folder_if_not_exists(save_path + '/qcd/') max_bins = 3 for bin_range in variable_bins[0:max_bins]: params = {'met_type': met_type, 'bin_range':bin_range, 'fit_variable':fit_variable, 'b_tag_bin':b_tag_bin, 'variable':variable} fit_variable_distribution = histogram_template % params qcd_fit_variable_distribution = fit_variable_distribution.replace( 'Ref selection', 'QCDConversions' ) qcd_fit_variable_distribution = qcd_fit_variable_distribution.replace( b_tag_bin, b_tag_bin_ctl ) # format: histograms['data'][qcd_fit_variable_distribution] histograms = get_histograms_from_files( [qcd_fit_variable_distribution], histogram_files ) prepare_histograms( histograms, rebin = fit_variable_properties[fit_variable]['rebin'], scale_factor = measurement_config.luminosity_scale ) histograms_for_cleaning = {'data':histograms['data'][qcd_fit_variable_distribution], 'V+Jets':histograms['V+Jets'][qcd_fit_variable_distribution], 'SingleTop':histograms['SingleTop'][qcd_fit_variable_distribution], 'TTJet':histograms['TTJet'][qcd_fit_variable_distribution]} qcd_from_data = clean_control_region( histograms_for_cleaning, subtract = ['TTJet', 'V+Jets', 'SingleTop'] ) # clean all_hists[bin_range] = qcd_from_data # create the inclusive distributions inclusive_hist = deepcopy(all_hists[variable_bins[0]]) for bin_range in variable_bins[1:max_bins]: inclusive_hist += all_hists[bin_range] for bin_range in variable_bins[0:max_bins]: if not all_hists[bin_range].Integral() == 0: all_hists[bin_range].Scale(1/all_hists[bin_range].Integral()) # normalise all histograms inclusive_hist.Scale(1/inclusive_hist.Integral()) # now compare inclusive to all bins histogram_properties = Histogram_properties() histogram_properties.x_axis_title = fit_variable_properties[fit_variable]['x-title'] histogram_properties.y_axis_title = fit_variable_properties[fit_variable]['y-title'] histogram_properties.y_axis_title = histogram_properties.y_axis_title.replace('Events', 'a.u.') histogram_properties.x_limits = [fit_variable_properties[fit_variable]['min'], fit_variable_properties[fit_variable]['max']] # histogram_properties.y_limits = [0, 0.5] histogram_properties.title = title + ', ' + b_tag_bins_latex[b_tag_bin_ctl] histogram_properties.name = variable + '_' + fit_variable + '_' + b_tag_bin_ctl + '_QCD_template_comparison' measurements = {bin_range + ' GeV': histogram for bin_range, histogram in all_hists.iteritems()} measurements = OrderedDict(sorted(measurements.items())) compare_measurements(models = {'inclusive' : inclusive_hist}, measurements = measurements, show_measurement_errors = True, histogram_properties = histogram_properties, save_folder = save_path + '/qcd/', save_as = save_as)
def make_plot( channel, x_axis_title, y_axis_title, signal_region_tree, control_region_tree, branchName, name_prefix, x_limits, nBins, use_qcd_data_region = False, compare_qcd_signal_with_data_control = False, y_limits = [], y_max_scale = 1.3, rebin = 1, legend_location = ( 0.98, 0.78 ), cms_logo_location = 'right', log_y = False, legend_color = False, ratio_y_limits = [0.3, 2.5], normalise = False, ): global output_folder, measurement_config, category, normalise_to_fit global preliminary, norm_variable, sum_bins, b_tag_bin, histogram_files controlToCompare = [] if 'electron' in channel : controlToCompare = ['QCDConversions', 'QCD non iso e+jets'] elif 'muon' in channel : controlToCompare = ['QCD iso > 0.3', 'QCD 0.12 < iso <= 0.3'] histogramsToCompare = {} for qcd_data_region in controlToCompare: print 'Doing ',qcd_data_region # Input files, normalisations, tree/region names title = title_template % ( measurement_config.new_luminosity, measurement_config.centre_of_mass_energy ) normalisation = None weightBranchSignalRegion = 'EventWeight' if 'electron' in channel: histogram_files['data'] = measurement_config.data_file_electron_trees histogram_files['QCD'] = measurement_config.electron_QCD_MC_category_templates_trees[category] if normalise_to_fit: normalisation = normalisations_electron[norm_variable] # if use_qcd_data_region: # qcd_data_region = 'QCDConversions' # # qcd_data_region = 'QCD non iso e+jets' if not 'QCD' in channel and not 'NPU' in branchName: weightBranchSignalRegion += ' * ElectronEfficiencyCorrection' if 'muon' in channel: histogram_files['data'] = measurement_config.data_file_muon_trees histogram_files['QCD'] = measurement_config.muon_QCD_MC_category_templates_trees[category] if normalise_to_fit: normalisation = normalisations_muon[norm_variable] # if use_qcd_data_region: # qcd_data_region = 'QCD iso > 0.3' if not 'QCD' in channel and not 'NPU' in branchName: weightBranchSignalRegion += ' * MuonEfficiencyCorrection' if not "_NPUNoWeight" in name_prefix: weightBranchSignalRegion += ' * PUWeight' if not "_NBJetsNoWeight" in name_prefix: weightBranchSignalRegion += ' * BJetWeight' selection = '1' if branchName == 'abs(lepton_eta)' : selection = 'lepton_eta > -10' else: selection = '%s >= 0' % branchName # if 'QCDConversions' in signal_region_tree: # selection += '&& isTightElectron' # print selection histograms = get_histograms_from_trees( trees = [signal_region_tree, control_region_tree], branch = branchName, weightBranch = weightBranchSignalRegion, files = histogram_files, nBins = nBins, xMin = x_limits[0], xMax = x_limits[-1], selection = selection ) histograms_QCDControlRegion = None if use_qcd_data_region: qcd_control_region = signal_region_tree.replace( 'Ref selection', qcd_data_region ) histograms_QCDControlRegion = get_histograms_from_trees( trees = [qcd_control_region], branch = branchName, weightBranch = 'EventWeight', files = histogram_files, nBins = nBins, xMin = x_limits[0], xMax = x_limits[-1], selection = selection ) # Split histograms up into signal/control (?) signal_region_hists = {} control_region_hists = {} for sample in histograms.keys(): signal_region_hists[sample] = histograms[sample][signal_region_tree] if compare_qcd_signal_with_data_control: if sample is 'data': signal_region_hists[sample] = histograms[sample][control_region_tree] elif sample is 'QCD' : signal_region_hists[sample] = histograms[sample][signal_region_tree] else: del signal_region_hists[sample] if use_qcd_data_region: control_region_hists[sample] = histograms_QCDControlRegion[sample][qcd_control_region] # Prepare histograms if normalise_to_fit: # only scale signal region to fit (results are invalid for control region) prepare_histograms( signal_region_hists, rebin = rebin, scale_factor = measurement_config.luminosity_scale, normalisation = normalisation ) elif normalise_to_data: totalMC = 0 for sample in signal_region_hists: if sample is 'data' : continue totalMC += signal_region_hists[sample].Integral() newScale = signal_region_hists['data'].Integral() / totalMC prepare_histograms( signal_region_hists, rebin = rebin, scale_factor = newScale, ) else: print measurement_config.luminosity_scale prepare_histograms( signal_region_hists, rebin = rebin, scale_factor = measurement_config.luminosity_scale ) prepare_histograms( control_region_hists, rebin = rebin, scale_factor = measurement_config.luminosity_scale ) # Use qcd from data control region or not qcd_from_data = None if use_qcd_data_region: qcd_from_data = clean_control_region( control_region_hists, subtract = ['TTJet', 'V+Jets', 'SingleTop'] ) # Normalise control region correctly nBins = signal_region_hists['QCD'].GetNbinsX() n, error = signal_region_hists['QCD'].integral(0,nBins+1,error=True) n_qcd_predicted_mc_signal = ufloat( n, error) n, error = control_region_hists['QCD'].integral(0,nBins+1,error=True) n_qcd_predicted_mc_control = ufloat( n, error) n, error = qcd_from_data.integral(0,nBins+1,error=True) n_qcd_control_region = ufloat( n, error) if not n_qcd_control_region == 0: dataDrivenQCDScale = n_qcd_predicted_mc_signal / n_qcd_predicted_mc_control print 'Overall scale : ',dataDrivenQCDScale qcd_from_data.Scale( dataDrivenQCDScale.nominal_value ) signalToControlScale = n_qcd_predicted_mc_signal / n_qcd_control_region dataToMCscale = n_qcd_control_region / n_qcd_predicted_mc_control print "Signal to control :",signalToControlScale print "QCD scale : ",dataToMCscale else: qcd_from_data = signal_region_hists['QCD'] # Which histograms to draw, and properties histograms_to_draw = [] histogram_lables = [] histogram_colors = [] if compare_qcd_signal_with_data_control : histograms_to_draw = [signal_region_hists['data'], qcd_from_data ] histogram_lables = ['data', 'QCD'] histogram_colors = ['black', 'yellow'] else : histograms_to_draw = [signal_region_hists['data'], qcd_from_data, signal_region_hists['V+Jets'], signal_region_hists['SingleTop'], signal_region_hists['TTJet']] histogram_lables = ['data', 'QCD', 'V+Jets', 'Single-Top', samples_latex['TTJet']] histogram_colors = [colours['data'], colours['QCD'], colours['V+Jets'], colours['Single-Top'], colours['TTJet'] ] print list(qcd_from_data.y()) histogramsToCompare[qcd_data_region] = qcd_from_data print histogramsToCompare histogram_properties = Histogram_properties() histogram_properties.name = 'QCD_control_region_comparison_' + channel + '_' + branchName histogram_properties.title = title histogram_properties.x_axis_title = x_axis_title histogram_properties.y_axis_title = y_axis_title histogram_properties.x_limits = x_limits histogram_properties.y_limits = y_limits histogram_properties.mc_error = 0.0 histogram_properties.legend_location = ( 0.98, 0.78 ) histogram_properties.ratio_y_limits = ratio_y_limits if 'electron' in channel: make_control_region_comparison(histogramsToCompare['QCDConversions'], histogramsToCompare['QCD non iso e+jets'], name_region_1='Conversions', name_region_2='Non Iso', histogram_properties=histogram_properties, save_folder=output_folder) elif 'muon' in channel: make_control_region_comparison(histogramsToCompare['QCD iso > 0.3'], histogramsToCompare['QCD 0.12 < iso <= 0.3'], name_region_1='QCD iso > 0.3', name_region_2='QCD 0.12 < iso <= 0.3', histogram_properties=histogram_properties, save_folder=output_folder)
def get_qcd_histograms( input_files, variable, variable_bin, channel, fit_variable_hist_name, rebin = 1 ): ''' Retrieves the data-driven QCD template and normalises it to MC prediction. It uses the inclusive template (across all variable bins) and removes other processes before normalising the QCD template. ''' global electron_QCD_MC_file, muon_QCD_MC_file, analysis_type, \ electron_control_region, muon_control_region, b_tag_bin control_region = '' control_region_btag = '0btag' if 'M_bl' in fit_variable_hist_name or 'angle_bl' in fit_variable_hist_name: control_region_btag = '1orMoreBtag' qcd_file = '' samples = ['data', 'V+Jets', 'SingleTop', 'TTJet'] if channel == 'electron': control_region = electron_control_region qcd_file = electron_QCD_MC_file if channel == 'muon': control_region = muon_control_region qcd_file = muon_QCD_MC_file inclusive_control_region_hists = {} for var_bin in variable_bins_ROOT[variable]: hist_name = fit_variable_hist_name.replace( variable_bin, var_bin ) control_region_hist_name = hist_name.replace( 'Ref selection', control_region ) for sample in samples: if not inclusive_control_region_hists.has_key( sample ): inclusive_control_region_hists[sample] = get_histogram( input_files[sample], control_region_hist_name, control_region_btag, ) else: inclusive_control_region_hists[sample] += get_histogram( input_files[sample], control_region_hist_name, control_region_btag, ) for sample in samples: inclusive_control_region_hists[sample].Rebin( rebin ) inclusive_control_region_hists['data'] = clean_control_region( inclusive_control_region_hists, subtract = ['TTJet', 'V+Jets', 'SingleTop'] ) # now apply proper normalisation QCD_normalisation_factor = 1 signal_region_mc = get_histogram( qcd_file, fit_variable_hist_name, b_tag_bin ) n_mc = signal_region_mc.Integral() n_control = inclusive_control_region_hists['data'].Integral() if not n_control == 0: # scale to MC prediction if not n_mc == 0: QCD_normalisation_factor = 1 / n_control * n_mc else: QCD_normalisation_factor = 1 / n_control inclusive_control_region_hists['data'].Scale( QCD_normalisation_factor ) return inclusive_control_region_hists['data']
def compare_qcd_control_regions(variable='MET', met_type='patType1CorrectedPFMet', title='Untitled', channel='electron'): ''' Compares the templates from the control regions in different bins of the current variable''' global fit_variable_properties, b_tag_bin, save_as, b_tag_bin_ctl variable_bins = variable_bins_ROOT[variable] histogram_template = get_histogram_template(variable) for fit_variable in electron_fit_variables: all_hists = {} inclusive_hist = None if '_bl' in fit_variable: b_tag_bin_ctl = '1orMoreBtag' else: b_tag_bin_ctl = '0orMoreBtag' save_path = 'plots/%dTeV/fit_variables/%s/%s/' % ( measurement_config.centre_of_mass_energy, variable, fit_variable) make_folder_if_not_exists(save_path + '/qcd/') max_bins = 3 for bin_range in variable_bins[0:max_bins]: params = { 'met_type': met_type, 'bin_range': bin_range, 'fit_variable': fit_variable, 'b_tag_bin': b_tag_bin, 'variable': variable } fit_variable_distribution = histogram_template % params qcd_fit_variable_distribution = fit_variable_distribution.replace( 'Ref selection', 'QCDConversions') qcd_fit_variable_distribution = qcd_fit_variable_distribution.replace( b_tag_bin, b_tag_bin_ctl) # format: histograms['data'][qcd_fit_variable_distribution] histograms = get_histograms_from_files( [qcd_fit_variable_distribution], histogram_files) prepare_histograms( histograms, rebin=fit_variable_properties[fit_variable]['rebin'], scale_factor=measurement_config.luminosity_scale) histograms_for_cleaning = { 'data': histograms['data'][qcd_fit_variable_distribution], 'V+Jets': histograms['V+Jets'][qcd_fit_variable_distribution], 'SingleTop': histograms['SingleTop'][qcd_fit_variable_distribution], 'TTJet': histograms['TTJet'][qcd_fit_variable_distribution] } qcd_from_data = clean_control_region( histograms_for_cleaning, subtract=['TTJet', 'V+Jets', 'SingleTop']) # clean all_hists[bin_range] = qcd_from_data # create the inclusive distributions inclusive_hist = deepcopy(all_hists[variable_bins[0]]) for bin_range in variable_bins[1:max_bins]: inclusive_hist += all_hists[bin_range] for bin_range in variable_bins[0:max_bins]: if not all_hists[bin_range].Integral() == 0: all_hists[bin_range].Scale(1 / all_hists[bin_range].Integral()) # normalise all histograms inclusive_hist.Scale(1 / inclusive_hist.Integral()) # now compare inclusive to all bins histogram_properties = Histogram_properties() histogram_properties.x_axis_title = fit_variable_properties[ fit_variable]['x-title'] histogram_properties.y_axis_title = fit_variable_properties[ fit_variable]['y-title'] histogram_properties.y_axis_title = histogram_properties.y_axis_title.replace( 'Events', 'a.u.') histogram_properties.x_limits = [ fit_variable_properties[fit_variable]['min'], fit_variable_properties[fit_variable]['max'] ] # histogram_properties.y_limits = [0, 0.5] histogram_properties.title = title histogram_properties.additional_text = channel_latex[ channel] + ', ' + b_tag_bins_latex[b_tag_bin_ctl] histogram_properties.name = variable + '_' + fit_variable + '_' + b_tag_bin_ctl + '_QCD_template_comparison' histogram_properties.y_max_scale = 1.5 measurements = { bin_range + ' GeV': histogram for bin_range, histogram in all_hists.iteritems() } measurements = OrderedDict(sorted(measurements.items())) compare_measurements(models={'inclusive': inclusive_hist}, measurements=measurements, show_measurement_errors=True, histogram_properties=histogram_properties, save_folder=save_path + '/qcd/', save_as=save_as)
def plot_fit_variable(histograms, fit_variable, variable, bin_range, fit_variable_distribution, qcd_fit_variable_distribution, title, save_path, channel='electron'): global fit_variable_properties, b_tag_bin, save_as, b_tag_bin_ctl histograms_ = deepcopy(histograms) mc_uncertainty = 0.10 prepare_histograms(histograms_, rebin=fit_variable_properties[fit_variable]['rebin'], scale_factor=measurement_config.luminosity_scale) ###################################### # plot the control regions as they are ###################################### histogram_properties = Histogram_properties() histogram_properties.x_axis_title = fit_variable_properties[fit_variable][ 'x-title'] histogram_properties.y_axis_title = fit_variable_properties[fit_variable][ 'y-title'] histogram_properties.x_limits = [ fit_variable_properties[fit_variable]['min'], fit_variable_properties[fit_variable]['max'] ] histogram_properties.y_max_scale = 2 histogram_lables = [ 'data', 'QCD', 'V+Jets', 'Single-Top', samples_latex['TTJet'] ] histogram_colors = ['black', 'yellow', 'green', 'magenta', 'red'] # qcd_from_data = histograms_['data'][qcd_fit_variable_distribution].Clone() # clean against other processes histograms_for_cleaning = { 'data': histograms_['data'][qcd_fit_variable_distribution], 'V+Jets': histograms_['V+Jets'][qcd_fit_variable_distribution], 'SingleTop': histograms_['SingleTop'][qcd_fit_variable_distribution], 'TTJet': histograms_['TTJet'][qcd_fit_variable_distribution] } qcd_from_data = clean_control_region( histograms_for_cleaning, subtract=['TTJet', 'V+Jets', 'SingleTop']) histograms_to_draw = [ histograms_['data'][qcd_fit_variable_distribution], histograms_['QCD'][qcd_fit_variable_distribution], histograms_['V+Jets'][qcd_fit_variable_distribution], histograms_['SingleTop'][qcd_fit_variable_distribution], histograms_['TTJet'][qcd_fit_variable_distribution] ] histogram_properties.title = title histogram_properties.additional_text = channel_latex[ channel] + ', ' + b_tag_bins_latex[b_tag_bin_ctl] histogram_properties.name = variable + '_' + bin_range + '_' + fit_variable + '_%s_QCDConversions' % b_tag_bin_ctl make_data_mc_comparison_plot( histograms_to_draw, histogram_lables, histogram_colors, histogram_properties, save_folder=save_path + '/qcd/', show_ratio=False, save_as=save_as, ) ###################################### # plot QCD against data control region with TTJet, SingleTop and V+Jets removed ###################################### histograms_to_draw = [ qcd_from_data, histograms_['QCD'][qcd_fit_variable_distribution], ] histogram_properties.y_max_scale = 1.5 histogram_properties.name = variable + '_' + bin_range + '_' + fit_variable + '_%s_QCDConversions_subtracted' % b_tag_bin_ctl make_data_mc_comparison_plot( histograms_to_draw, histogram_lables=['data', 'QCD'], histogram_colors=['black', 'yellow'], histogram_properties=histogram_properties, save_folder=save_path + '/qcd/', show_ratio=False, save_as=save_as, ) ###################################### # plot signal region ###################################### # scale QCD to predicted n_qcd_predicted_mc = histograms_['QCD'][ fit_variable_distribution].Integral() n_qcd_fit_variable_distribution = qcd_from_data.Integral() if not n_qcd_fit_variable_distribution == 0: qcd_from_data.Scale(1.0 / n_qcd_fit_variable_distribution * n_qcd_predicted_mc) histograms_to_draw = [ histograms_['data'][fit_variable_distribution], qcd_from_data, histograms_['V+Jets'][fit_variable_distribution], histograms_['SingleTop'][fit_variable_distribution], histograms_['TTJet'][fit_variable_distribution] ] histogram_properties.additional_text = channel_latex[ channel] + ', ' + b_tag_bins_latex[b_tag_bin] histogram_properties.name = variable + '_' + bin_range + '_' + fit_variable + '_' + b_tag_bin make_data_mc_comparison_plot( histograms_to_draw, histogram_lables, histogram_colors, histogram_properties, save_folder=save_path, show_ratio=False, save_as=save_as, ) ###################################### # plot templates ###################################### histogram_properties.mc_error = mc_uncertainty histogram_properties.mc_errors_label = '$\mathrm{t}\\bar{\mathrm{t}}$ uncertainty' histogram_properties.name = variable + '_' + bin_range + '_' + fit_variable + '_' + b_tag_bin + '_templates' histogram_properties.y_max_scale = 2 # change histogram order for better visibility histograms_to_draw = [ histograms_['TTJet'][fit_variable_distribution] + histograms_['SingleTop'][fit_variable_distribution], histograms_['TTJet'][fit_variable_distribution], histograms_['SingleTop'][fit_variable_distribution], histograms_['V+Jets'][fit_variable_distribution], qcd_from_data ] histogram_lables = [ 'QCD', 'V+Jets', 'Single-Top', samples_latex['TTJet'], samples_latex['TTJet'] + ' + ' + 'Single-Top' ] histogram_lables.reverse() # change QCD color to orange for better visibility histogram_colors = ['orange', 'green', 'magenta', 'red', 'black'] histogram_colors.reverse() # plot template make_shape_comparison_plot( shapes=histograms_to_draw, names=histogram_lables, colours=histogram_colors, histogram_properties=histogram_properties, fill_area=False, alpha=1, save_folder=save_path, save_as=save_as, )
def make_plot( channel, x_axis_title, y_axis_title, signal_region_tree, control_region_tree, branchName, name_prefix, x_limits, nBins, use_qcd_data_region=False, compare_qcd_signal_with_data_control=False, y_limits=[], y_max_scale=1.3, rebin=1, legend_location=(0.98, 0.78), cms_logo_location='right', log_y=False, legend_color=False, ratio_y_limits=[0.3, 2.5], normalise=False, ): global output_folder, measurement_config, category, normalise_to_fit global preliminary, norm_variable, sum_bins, b_tag_bin, histogram_files controlToCompare = [] if 'electron' in channel: controlToCompare = ['QCDConversions', 'QCD non iso e+jets'] elif 'muon' in channel: controlToCompare = ['QCD iso > 0.3', 'QCD 0.12 < iso <= 0.3'] histogramsToCompare = {} for qcd_data_region in controlToCompare: print 'Doing ', qcd_data_region # Input files, normalisations, tree/region names title = title_template % (measurement_config.new_luminosity, measurement_config.centre_of_mass_energy) normalisation = None weightBranchSignalRegion = 'EventWeight' if 'electron' in channel: histogram_files[ 'data'] = measurement_config.data_file_electron_trees histogram_files[ 'QCD'] = measurement_config.electron_QCD_MC_category_templates_trees[ category] if normalise_to_fit: normalisation = normalisations_electron[norm_variable] # if use_qcd_data_region: # qcd_data_region = 'QCDConversions' # # qcd_data_region = 'QCD non iso e+jets' if not 'QCD' in channel and not 'NPU' in branchName: weightBranchSignalRegion += ' * ElectronEfficiencyCorrection' if 'muon' in channel: histogram_files['data'] = measurement_config.data_file_muon_trees histogram_files[ 'QCD'] = measurement_config.muon_QCD_MC_category_templates_trees[ category] if normalise_to_fit: normalisation = normalisations_muon[norm_variable] # if use_qcd_data_region: # qcd_data_region = 'QCD iso > 0.3' if not 'QCD' in channel and not 'NPU' in branchName: weightBranchSignalRegion += ' * MuonEfficiencyCorrection' if not "_NPUNoWeight" in name_prefix: weightBranchSignalRegion += ' * PUWeight' if not "_NBJetsNoWeight" in name_prefix: weightBranchSignalRegion += ' * BJetWeight' selection = '1' if branchName == 'abs(lepton_eta)': selection = 'lepton_eta > -10' else: selection = '%s >= 0' % branchName # if 'QCDConversions' in signal_region_tree: # selection += '&& isTightElectron' # print selection histograms = get_histograms_from_trees( trees=[signal_region_tree, control_region_tree], branch=branchName, weightBranch=weightBranchSignalRegion, files=histogram_files, nBins=nBins, xMin=x_limits[0], xMax=x_limits[-1], selection=selection) histograms_QCDControlRegion = None if use_qcd_data_region: qcd_control_region = signal_region_tree.replace( 'Ref selection', qcd_data_region) histograms_QCDControlRegion = get_histograms_from_trees( trees=[qcd_control_region], branch=branchName, weightBranch='EventWeight', files=histogram_files, nBins=nBins, xMin=x_limits[0], xMax=x_limits[-1], selection=selection) # Split histograms up into signal/control (?) signal_region_hists = {} control_region_hists = {} for sample in histograms.keys(): signal_region_hists[sample] = histograms[sample][ signal_region_tree] if compare_qcd_signal_with_data_control: if sample is 'data': signal_region_hists[sample] = histograms[sample][ control_region_tree] elif sample is 'QCD': signal_region_hists[sample] = histograms[sample][ signal_region_tree] else: del signal_region_hists[sample] if use_qcd_data_region: control_region_hists[sample] = histograms_QCDControlRegion[ sample][qcd_control_region] # Prepare histograms if normalise_to_fit: # only scale signal region to fit (results are invalid for control region) prepare_histograms( signal_region_hists, rebin=rebin, scale_factor=measurement_config.luminosity_scale, normalisation=normalisation) elif normalise_to_data: totalMC = 0 for sample in signal_region_hists: if sample is 'data': continue totalMC += signal_region_hists[sample].Integral() newScale = signal_region_hists['data'].Integral() / totalMC prepare_histograms( signal_region_hists, rebin=rebin, scale_factor=newScale, ) else: print measurement_config.luminosity_scale prepare_histograms( signal_region_hists, rebin=rebin, scale_factor=measurement_config.luminosity_scale) prepare_histograms( control_region_hists, rebin=rebin, scale_factor=measurement_config.luminosity_scale) # Use qcd from data control region or not qcd_from_data = None if use_qcd_data_region: qcd_from_data = clean_control_region( control_region_hists, subtract=['TTJet', 'V+Jets', 'SingleTop']) # Normalise control region correctly nBins = signal_region_hists['QCD'].GetNbinsX() n, error = signal_region_hists['QCD'].integral(0, nBins + 1, error=True) n_qcd_predicted_mc_signal = ufloat(n, error) n, error = control_region_hists['QCD'].integral(0, nBins + 1, error=True) n_qcd_predicted_mc_control = ufloat(n, error) n, error = qcd_from_data.integral(0, nBins + 1, error=True) n_qcd_control_region = ufloat(n, error) if not n_qcd_control_region == 0: dataDrivenQCDScale = n_qcd_predicted_mc_signal / n_qcd_predicted_mc_control print 'Overall scale : ', dataDrivenQCDScale qcd_from_data.Scale(dataDrivenQCDScale.nominal_value) signalToControlScale = n_qcd_predicted_mc_signal / n_qcd_control_region dataToMCscale = n_qcd_control_region / n_qcd_predicted_mc_control print "Signal to control :", signalToControlScale print "QCD scale : ", dataToMCscale else: qcd_from_data = signal_region_hists['QCD'] # Which histograms to draw, and properties histograms_to_draw = [] histogram_lables = [] histogram_colors = [] if compare_qcd_signal_with_data_control: histograms_to_draw = [signal_region_hists['data'], qcd_from_data] histogram_lables = ['data', 'QCD'] histogram_colors = ['black', 'yellow'] else: histograms_to_draw = [ signal_region_hists['data'], qcd_from_data, signal_region_hists['V+Jets'], signal_region_hists['SingleTop'], signal_region_hists['TTJet'] ] histogram_lables = [ 'data', 'QCD', 'V+Jets', 'Single-Top', samples_latex['TTJet'] ] histogram_colors = [ colours['data'], colours['QCD'], colours['V+Jets'], colours['Single-Top'], colours['TTJet'] ] print list(qcd_from_data.y()) histogramsToCompare[qcd_data_region] = qcd_from_data print histogramsToCompare histogram_properties = Histogram_properties() histogram_properties.name = 'QCD_control_region_comparison_' + channel + '_' + branchName histogram_properties.title = title histogram_properties.x_axis_title = x_axis_title histogram_properties.y_axis_title = y_axis_title histogram_properties.x_limits = x_limits histogram_properties.y_limits = y_limits histogram_properties.mc_error = 0.0 histogram_properties.legend_location = (0.98, 0.78) histogram_properties.ratio_y_limits = ratio_y_limits if 'electron' in channel: make_control_region_comparison( histogramsToCompare['QCDConversions'], histogramsToCompare['QCD non iso e+jets'], name_region_1='Conversions', name_region_2='Non Iso', histogram_properties=histogram_properties, save_folder=output_folder) elif 'muon' in channel: make_control_region_comparison( histogramsToCompare['QCD iso > 0.3'], histogramsToCompare['QCD 0.12 < iso <= 0.3'], name_region_1='QCD iso > 0.3', name_region_2='QCD 0.12 < iso <= 0.3', histogram_properties=histogram_properties, save_folder=output_folder)
def background_subtraction(self, histograms): ttjet_hist = clean_control_region(histograms, subtract=['QCD', 'V+Jets', 'SingleTop']) self.normalisation[ 'TTJet'] = hist_to_value_error_tuplelist(ttjet_hist)
def compare_QCD_control_regions_to_MC(): config = XSectionConfig(13) ctrl_e1 = 'TTbar_plus_X_analysis/EPlusJets/QCDConversions/FitVariables' ctrl_e2 = 'TTbar_plus_X_analysis/EPlusJets/QCD non iso e+jets/FitVariables' mc_e = 'TTbar_plus_X_analysis/EPlusJets/Ref selection/FitVariables' data_file_e = config.data_file_electron_trees ttbar_file = config.ttbar_category_templates_trees['central'] vjets_file = config.VJets_category_templates_trees['central'] singleTop_file = config.SingleTop_category_templates_trees['central'] qcd_file_e = config.electron_QCD_MC_tree_file ctrl_mu1 = 'TTbar_plus_X_analysis/MuPlusJets/QCD iso > 0.3/FitVariables' ctrl_mu2 = 'TTbar_plus_X_analysis/MuPlusJets/QCD 0.12 < iso <= 0.3/FitVariables' mc_mu = 'TTbar_plus_X_analysis/MuPlusJets/Ref selection/FitVariables' data_file_mu = config.data_file_muon_trees qcd_file_mu = config.muon_QCD_MC_tree_file weight_branches_electron = [ "EventWeight", "PUWeight", "BJetWeight", "ElectronEfficiencyCorrection" ] weight_branches_mu = [ "EventWeight", "PUWeight", "BJetWeight", "MuonEfficiencyCorrection" ] variables = [ 'MET', 'HT', 'ST', 'NJets', 'lepton_pt', 'abs_lepton_eta', 'WPT' ] # variables = ['abs_lepton_eta'] for variable in variables: branch = variable selection = '{0} >= 0'.format(branch) if variable == 'abs_lepton_eta': branch = 'abs(lepton_eta)' selection = 'lepton_eta >= -3' for channel in ['electron', 'muon']: data_file = data_file_e qcd_file = qcd_file_e ctrl1 = ctrl_e1 ctrl2 = ctrl_e2 mc = mc_e weight_branches = weight_branches_electron if channel == 'muon': data_file = data_file_mu qcd_file = qcd_file_mu ctrl1 = ctrl_mu1 ctrl2 = ctrl_mu2 mc = mc_mu weight_branches = weight_branches_mu inputs = { 'branch': branch, 'weight_branches': weight_branches, 'tree': ctrl1, 'bin_edges': bin_edges_vis[variable], 'selection': selection, } hs_ctrl1 = { 'data': get_histogram_from_tree(input_file=data_file, **inputs), 'TTJet': get_histogram_from_tree(input_file=ttbar_file, **inputs), 'VJets': get_histogram_from_tree(input_file=vjets_file, **inputs), 'SingleTop': get_histogram_from_tree(input_file=singleTop_file, **inputs), 'QCD': get_histogram_from_tree(input_file=qcd_file, **inputs), } inputs['tree'] = ctrl2 hs_ctrl2 = { 'data': get_histogram_from_tree(input_file=data_file, **inputs), 'TTJet': get_histogram_from_tree(input_file=ttbar_file, **inputs), 'VJets': get_histogram_from_tree(input_file=vjets_file, **inputs), 'SingleTop': get_histogram_from_tree(input_file=singleTop_file, **inputs), 'QCD': get_histogram_from_tree(input_file=qcd_file, **inputs), } inputs['tree'] = mc h_qcd = get_histogram_from_tree(input_file=qcd_file, **inputs) h_ctrl1 = clean_control_region( hs_ctrl1, data_label='data', subtract=['TTJet', 'VJets', 'SingleTop'], fix_to_zero=True) h_ctrl2 = clean_control_region( hs_ctrl2, data_label='data', subtract=['TTJet', 'VJets', 'SingleTop'], fix_to_zero=True) n_qcd_ctrl1 = hs_ctrl1['QCD'].integral() n_qcd_ctrl2 = hs_ctrl2['QCD'].integral() n_data1 = h_ctrl1.integral() n_data2 = h_ctrl2.integral() n_qcd_sg = h_qcd.integral() ratio_ctrl1 = n_data1 / n_qcd_ctrl1 ratio_ctrl2 = n_data2 / n_qcd_ctrl2 qcd_estimate_ctrl1 = n_qcd_sg * ratio_ctrl1 qcd_estimate_ctrl2 = n_qcd_sg * ratio_ctrl2 h_ctrl1.Scale(qcd_estimate_ctrl1 / n_data1) h_ctrl2.Scale(qcd_estimate_ctrl2 / n_data2) properties = Histogram_properties() properties.name = 'compare_qcd_control_regions_to_mc_{0}_{1}_channel'.format( variable, channel) properties.title = 'Comparison of QCD control regions ({0} channel)'.format( channel) properties.path = 'plots' properties.has_ratio = False properties.xerr = True properties.x_limits = (bin_edges_vis[variable][0], bin_edges_vis[variable][-1]) properties.x_axis_title = variables_latex[variable] properties.y_axis_title = 'number of QCD events' histograms = { 'control region 1': h_ctrl1, 'control region 2': h_ctrl2, 'MC prediction': h_qcd } diff = absolute(h_ctrl1 - h_ctrl2) lower = h_ctrl1 - diff upper = h_ctrl1 + diff err_e = ErrorBand('uncertainty', lower, upper) plot_e = Plot(histograms, properties) plot_e.draw_method = 'errorbar' plot_e.add_error_band(err_e) compare_histograms(plot_e)
def compare_QCD_control_regions_to_MC(): config = XSectionConfig(13) ctrl_e1 = 'TTbar_plus_X_analysis/EPlusJets/QCDConversions/FitVariables' ctrl_e2 = 'TTbar_plus_X_analysis/EPlusJets/QCD non iso e+jets/FitVariables' mc_e = 'TTbar_plus_X_analysis/EPlusJets/Ref selection/FitVariables' data_file_e = config.data_file_electron_trees ttbar_file = config.ttbar_category_templates_trees['central'] vjets_file = config.VJets_category_templates_trees['central'] singleTop_file = config.SingleTop_category_templates_trees['central'] qcd_file_e = config.electron_QCD_MC_tree_file ctrl_mu1 = 'TTbar_plus_X_analysis/MuPlusJets/QCD iso > 0.3/FitVariables' ctrl_mu2 = 'TTbar_plus_X_analysis/MuPlusJets/QCD 0.12 < iso <= 0.3/FitVariables' mc_mu = 'TTbar_plus_X_analysis/MuPlusJets/Ref selection/FitVariables' data_file_mu = config.data_file_muon_trees qcd_file_mu = config.muon_QCD_MC_tree_file weight_branches_electron = [ "EventWeight", "PUWeight", "BJetWeight", "ElectronEfficiencyCorrection" ] weight_branches_mu = [ "EventWeight", "PUWeight", "BJetWeight", "MuonEfficiencyCorrection" ] variables = ['MET', 'HT', 'ST', 'NJets', 'lepton_pt', 'abs_lepton_eta', 'WPT'] # variables = ['abs_lepton_eta'] for variable in variables: branch = variable selection = '{0} >= 0'.format(branch) if variable == 'abs_lepton_eta': branch = 'abs(lepton_eta)' selection = 'lepton_eta >= -3' for channel in ['electron', 'muon']: data_file = data_file_e qcd_file = qcd_file_e ctrl1 = ctrl_e1 ctrl2 = ctrl_e2 mc = mc_e weight_branches = weight_branches_electron if channel == 'muon': data_file = data_file_mu qcd_file = qcd_file_mu ctrl1 = ctrl_mu1 ctrl2 = ctrl_mu2 mc = mc_mu weight_branches = weight_branches_mu inputs = { 'branch': branch, 'weight_branches': weight_branches, 'tree': ctrl1, 'bin_edges': bin_edges_vis[variable], 'selection': selection, } hs_ctrl1 = { 'data': get_histogram_from_tree(input_file=data_file, **inputs), 'TTJet': get_histogram_from_tree(input_file=ttbar_file, **inputs), 'VJets': get_histogram_from_tree(input_file=vjets_file, **inputs), 'SingleTop': get_histogram_from_tree(input_file=singleTop_file, **inputs), 'QCD': get_histogram_from_tree(input_file=qcd_file, **inputs), } inputs['tree'] = ctrl2 hs_ctrl2 = { 'data': get_histogram_from_tree(input_file=data_file, **inputs), 'TTJet': get_histogram_from_tree(input_file=ttbar_file, **inputs), 'VJets': get_histogram_from_tree(input_file=vjets_file, **inputs), 'SingleTop': get_histogram_from_tree(input_file=singleTop_file, **inputs), 'QCD': get_histogram_from_tree(input_file=qcd_file, **inputs), } inputs['tree'] = mc h_qcd = get_histogram_from_tree(input_file=qcd_file, **inputs) h_ctrl1 = clean_control_region( hs_ctrl1, data_label='data', subtract=['TTJet', 'VJets', 'SingleTop'], fix_to_zero=True) h_ctrl2 = clean_control_region( hs_ctrl2, data_label='data', subtract=['TTJet', 'VJets', 'SingleTop'], fix_to_zero=True) n_qcd_ctrl1 = hs_ctrl1['QCD'].integral() n_qcd_ctrl2 = hs_ctrl2['QCD'].integral() n_data1 = h_ctrl1.integral() n_data2 = h_ctrl2.integral() n_qcd_sg = h_qcd.integral() ratio_ctrl1 = n_data1 / n_qcd_ctrl1 ratio_ctrl2 = n_data2 / n_qcd_ctrl2 qcd_estimate_ctrl1 = n_qcd_sg * ratio_ctrl1 qcd_estimate_ctrl2 = n_qcd_sg * ratio_ctrl2 h_ctrl1.Scale(qcd_estimate_ctrl1 / n_data1) h_ctrl2.Scale(qcd_estimate_ctrl2 / n_data2) properties = Histogram_properties() properties.name = 'compare_qcd_control_regions_to_mc_{0}_{1}_channel'.format( variable, channel) properties.title = 'Comparison of QCD control regions ({0} channel)'.format( channel) properties.path = 'plots' properties.has_ratio = False properties.xerr = True properties.x_limits = ( bin_edges_vis[variable][0], bin_edges_vis[variable][-1]) properties.x_axis_title = variables_latex[variable] properties.y_axis_title = 'number of QCD events' histograms = {'control region 1': h_ctrl1, 'control region 2': h_ctrl2, 'MC prediction': h_qcd} diff = absolute(h_ctrl1 - h_ctrl2) lower = h_ctrl1 - diff upper = h_ctrl1 + diff err_e = ErrorBand('uncertainty', lower, upper) plot_e = Plot(histograms, properties) plot_e.draw_method = 'errorbar' plot_e.add_error_band(err_e) compare_histograms(plot_e)
def background_subtraction(self, histograms): ttjet_hist = clean_control_region( histograms, subtract=['QCD', 'V+Jets', 'SingleTop']) self.normalisation['TTJet'] = hist_to_value_error_tuplelist(ttjet_hist)