def test_vectors( self ): h_signal = adjust_overflow_to_limit( self.h_signal, x_min, x_max ) h_signal.Scale(1/h_signal.Integral()) h_bkg1 = adjust_overflow_to_limit( self.h_bkg1, x_min, x_max ) h_bkg1.Scale(1/h_bkg1.Integral()) signal = list( h_signal.y() ) bkg1 = list( h_bkg1.y() ) v_from_fit_data = self.fit_data_1.vectors v_from_single_collection = self.single_collection.vectors() # v_from_collection = self.collection_1.vectors( 'signal region' ) # v_from_collection_1 = self.collection_1.vectors()['signal region'] self.assertEqual(signal, v_from_fit_data['signal']) self.assertEqual(bkg1, v_from_fit_data['bkg1']) self.assertEqual(signal, v_from_single_collection['signal']) self.assertEqual(bkg1, v_from_single_collection['bkg1'])
def test_adjust_overflow_to_limit( self ): x_min = 40 x_max = 80 adjusted = adjust_overflow_to_limit(self.h1, x_min, x_max) # number of events should be unchanged # the adjusted histogram should have no overflow for this example self.assertEqual(self.h1.integral( overflow = True ), adjusted.Integral()) # first bin (x_min) should contain all events # with x <= x_min x_min_bin = self.h1.FindBin(x_min) x_max_bin = self.h1.FindBin(x_max) self.assertEqual(self.h1.integral(0, x_min_bin), adjusted.GetBinContent(x_min_bin)) # last bin (x_max) should contain all events # with x >= x_max self.assertEqual( self.h1.integral( x_max_bin, self.h1.nbins() + 1 ), adjusted.GetBinContent( x_max_bin ) )
def test_adjust_overflow_to_limit_simple( self ): x_min = 0 x_max = 95 adjusted = adjust_overflow_to_limit(self.simple, x_min, x_max) # for entry_1, entry_2 in zip(hist_to_value_error_tuplelist(self.simple), hist_to_value_error_tuplelist(adjusted)): # print entry_1, entry_2 # print self.simple.integral( overflow = True ), adjusted.integral() # print self.simple.GetBinContent(1), self.simple.GetBinContent(self.simple.nbins()) # number of events should be unchanged # the adjusted histogram should have no overflow for this example self.assertEqual( self.simple.integral( overflow = True ), adjusted.integral() ) # first bin (x_min) should contain all events # with x <= x_min x_min_bin = self.simple.FindBin(x_min) x_max_bin = self.simple.FindBin(x_max) self.assertEqual(self.simple.integral(0, x_min_bin), adjusted.GetBinContent(x_min_bin)) # last bin (x_max) should contain all events # with x >= x_max self.assertEqual( self.simple.integral( x_max_bin, self.simple.nbins() + 1), adjusted.GetBinContent( x_max_bin ) )
def test_normalisation( self ): normalisation = {name:adjust_overflow_to_limit(histogram, x_min, x_max).Integral() for name, histogram in self.histograms_1.iteritems()} normalisation_from_fit_data = self.fit_data_1.normalisation normalisation_from_single_collection = self.single_collection.mc_normalisation() normalisation_from_collection = self.collection_1.mc_normalisation( 'signal region' ) normalisation_from_collection_1 = self.collection_1.mc_normalisation()['signal region'] for sample in normalisation.keys(): self.assertEqual( normalisation[sample], normalisation_from_fit_data[sample] ) self.assertEqual( normalisation[sample], normalisation_from_single_collection[sample] ) self.assertEqual( normalisation[sample], normalisation_from_collection[sample] ) self.assertEqual( normalisation[sample], normalisation_from_collection_1[sample] ) # data normalisation normalisation = self.h_data.integral( overflow = True ) normalisation_from_fit_data = self.fit_data_1.n_data() normalisation_from_single_collection = self.single_collection.n_data() normalisation_from_collection = self.collection_1.n_data( 'signal region' ) normalisation_from_collection_1 = self.collection_1.n_data()['signal region'] self.assertEqual( normalisation, normalisation_from_fit_data ) self.assertEqual( normalisation, normalisation_from_single_collection ) self.assertEqual( normalisation, normalisation_from_collection ) self.assertEqual( normalisation, normalisation_from_collection_1 ) self.assertAlmostEqual(normalisation, self.collection_1.max_n_data(), delta = 1 )
def get_histograms( channel, input_files, variable, met_type, variable_bin, b_tag_bin, rebin = 1, fit_variable = 'absolute_eta', scale_factors = None ): global b_tag_bin_VJets, fit_variables global electron_control_region, muon_control_region boundaries = measurement_config.fit_boundaries[fit_variable] histograms = {} if not variable in measurement_config.histogram_path_templates.keys(): print 'Fatal Error: unknown variable ', variable sys.exit() fit_variable_name = '' fit_variable_name_data = '' fit_variable_template = measurement_config.histogram_path_templates[variable] ht_fill_list, other_fill_list = None, None if fit_variable == 'absolute_eta': ht_fill_list = ( analysis_type[channel], variable_bin, channel + '_' + fit_variable ) other_fill_list = ( analysis_type[channel], met_type, variable_bin, channel + '_' + fit_variable ) else: ht_fill_list = ( analysis_type[channel], variable_bin, fit_variable ) other_fill_list = ( analysis_type[channel], met_type, variable_bin, fit_variable ) if variable == 'HT': fit_variable_name = fit_variable_template % ht_fill_list fit_variable_name_data = fit_variable_name else: fit_variable_name = fit_variable_template % other_fill_list if 'JetRes' in met_type: fit_variable_name_data = fit_variable_name.replace( 'JetResDown', '' ) fit_variable_name_data = fit_variable_name_data.replace( 'JetResUp', '' ) if 'patPFMet' in met_type: fit_variable_name = fit_variable_name.replace( 'patPFMet', 'PFMET' ) else: fit_variable_name_data = fit_variable_name for sample, file_name in input_files.iteritems(): if not file_name: continue h_fit_variable = None if sample == 'data': h_fit_variable = get_histogram( file_name, fit_variable_name_data, b_tag_bin ) elif sample == 'V+Jets': # extracting the inclusive V+Jets template across all bins from its specific b-tag bin (>=0 by default) and scaling it to analysis b-tag bin for var_bin in variable_bins_ROOT[variable]: temp_variable_name = fit_variable_name.replace(variable_bin, var_bin) if h_fit_variable == None: h_fit_variable = get_histogram( file_name, temp_variable_name, b_tag_bin ) else: h_fit_variable += get_histogram( file_name, temp_variable_name, b_tag_bin ) h_fit_variable_for_scaling = get_histogram(file_name, fit_variable_name, b_tag_bin) scale = h_fit_variable_for_scaling.integral (overflow = True ) / h_fit_variable.integral( overflow = True ) h_fit_variable.Scale(scale) else: h_fit_variable = get_histogram( file_name, fit_variable_name, b_tag_bin ) h_fit_variable.Rebin( rebin ) h_fit_variable = adjust_overflow_to_limit( h_fit_variable, boundaries[0], boundaries[1] ) histograms[sample] = h_fit_variable h_qcd = get_qcd_histograms( input_files, variable, variable_bin, channel, fit_variable_name, rebin ) if h_qcd.Integral() < 0.1: h_qcd.Scale( 0.1/h_qcd.Integral() ) pass histograms['QCD'] = adjust_overflow_to_limit( h_qcd, boundaries[0], boundaries[1] ) # normalise histograms if not measurement_config.luminosity_scale == 1.0: for sample, histogram in histograms.iteritems(): if sample == 'data': continue histogram.Scale( measurement_config.luminosity_scale ) # apply normalisation scale factors for rate-changing systematics if scale_factors: for source, factor in scale_factors.iteritems(): if 'luminosity' in source: for sample, histogram in histograms.iteritems(): if sample == 'data': continue histogram.Scale( factor ) for sample, histogram in histograms.iteritems(): if sample in source: histogram.Scale( factor ) return histograms
def get_histograms( channel, input_files, variable, met_type, variable_bin, b_tag_bin, rebin = 1, fit_variable = 'absolute_eta', scale_factors = None ): global b_tag_bin_VJets, fit_variables global electron_control_region, muon_control_region boundaries = measurement_config.fit_boundaries[fit_variable] histograms = {} if not variable in measurement_config.histogram_path_templates.keys(): print 'Fatal Error: unknown variable ', variable sys.exit() fit_variable_name = '' fit_variable_name_data = '' fit_variable_template = measurement_config.histogram_path_templates[variable] ht_fill_list, other_fill_list = None, None if fit_variable == 'absolute_eta': ht_fill_list = ( analysis_type[channel], variable_bin, channel + '_' + fit_variable ) other_fill_list = ( analysis_type[channel], met_type, variable_bin, channel + '_' + fit_variable ) else: ht_fill_list = ( analysis_type[channel], variable_bin, fit_variable ) other_fill_list = ( analysis_type[channel], met_type, variable_bin, fit_variable ) if variable == 'HT': fit_variable_name = fit_variable_template % ht_fill_list fit_variable_name_data = fit_variable_name else: fit_variable_name = fit_variable_template % other_fill_list if 'JetRes' in met_type: fit_variable_name_data = fit_variable_name.replace( 'JetResDown', '' ) fit_variable_name_data = fit_variable_name_data.replace( 'JetResUp', '' ) if 'patPFMet' in met_type: fit_variable_name = fit_variable_name.replace( 'patPFMet', 'PFMET' ) else: fit_variable_name_data = fit_variable_name for sample, file_name in input_files.iteritems(): if not file_name: continue h_fit_variable = None if sample == 'data': h_fit_variable = get_histogram( file_name, fit_variable_name_data, b_tag_bin ) elif sample == 'V+Jets': # extracting the V+Jets template from its specific b-tag bin (>=0 by default) and scaling it to analysis b-tag bin h_fit_variable = get_histogram( file_name, fit_variable_name, b_tag_bin ) if not '_bl' in fit_variable: # this procedure is not valid for fit variables requiring at least one b-tag h_fit_variable_VJets_specific_b_tag_bin = get_histogram( file_name, fit_variable_name, b_tag_bin_VJets ) try: scale = h_fit_variable.integral( overflow = True ) / h_fit_variable_VJets_specific_b_tag_bin.integral( overflow = True ) h_fit_variable_VJets_specific_b_tag_bin.Scale( scale ) h_fit_variable = h_fit_variable_VJets_specific_b_tag_bin except: print 'WARNING: V+Jets template from ' + str( file_name ) + ', histogram ' + fit_variable_name + ' in ' + b_tag_bin_VJets + \ ' b-tag bin is empty. Using central bin (' + b_tag_bin + '), integral = ' + str( h_fit_variable.Integral() ) else: h_fit_variable = get_histogram( file_name, fit_variable_name, b_tag_bin ) h_fit_variable.Rebin( rebin ) # this is not working. M3 has fewer data events than absolute_eta h_fit_variable = adjust_overflow_to_limit( h_fit_variable, boundaries[0], boundaries[1] ) histograms[sample] = h_fit_variable h_qcd = get_qcd_histograms( input_files, variable, variable_bin, channel, fit_variable_name, rebin ) if h_qcd.Integral() < 0.1: h_qcd.Scale( 0.1/h_qcd.Integral() ) pass histograms['QCD'] = adjust_overflow_to_limit( h_qcd, boundaries[0], boundaries[1] ) # normalise histograms if not measurement_config.luminosity_scale == 1.0: for sample, histogram in histograms.iteritems(): if sample == 'data': continue histogram.Scale( measurement_config.luminosity_scale ) # apply normalisation scale factors for rate-changing systematics if scale_factors: for source, factor in scale_factors.iteritems(): if 'luminosity' in source: for sample, histogram in histograms.iteritems(): if sample == 'data': continue histogram.Scale( factor ) for sample, histogram in histograms.iteritems(): if sample in source: histogram.Scale( factor ) return histograms