def get_unfold_histogram_tuple( inputfile, variable, channel, met_type = 'patType1CorrectedPFMet', centre_of_mass = 8, ttbar_xsection = 245.8, luminosity = 19712, load_fakes = False, scale_to_lumi = False, ): folder = None h_truth = None h_measured = None h_response = None h_fakes = None if not channel == 'combined': if not 'HT' in variable: folder = inputfile.Get( 'unfolding_%s_analyser_%s_channel_%s' % ( variable, channel, met_type ) ) else: folder = inputfile.Get( 'unfolding_%s_analyser_%s_channel' % ( variable, channel ) ) h_truth = asrootpy( folder.truth.Clone() ) h_measured = asrootpy( folder.measured.Clone() ) # response matrix is always without fakes # fake subtraction from measured is performed automatically in RooUnfoldSvd (h_measured - h_response->ProjectionX()) # or manually for TSVDUnfold # fix for a bug/typo in NTupleTools h_response = asrootpy( folder.response_without_fakes.Clone() ) if load_fakes: h_fakes = asrootpy( folder.fake.Clone() ) else: return get_combined_unfold_histogram_tuple( inputfile = inputfile, variable = variable, met_type = met_type, centre_of_mass = centre_of_mass, ttbar_xsection = ttbar_xsection, luminosity = luminosity, load_fakes = load_fakes, scale_to_lumi = scale_to_lumi, ) if scale_to_lumi: nEvents = inputfile.EventFilter.EventCounter.GetBinContent( 1 ) # number of processed events lumiweight = ttbar_xsection * luminosity / nEvents if load_fakes: h_fakes.Scale( lumiweight ) h_truth.Scale( lumiweight ) h_measured.Scale( lumiweight ) h_response.Scale( lumiweight ) h_truth, h_measured, h_response = [ fix_overflow( hist ) for hist in [h_truth, h_measured, h_response] ] if load_fakes: h_fakes = fix_overflow( h_fakes ) return h_truth, h_measured, h_response, h_fakes
def test_overflow_2D( self ): before_fix = check_overflow_in_2DHist(self.h2) has_overflow_in_x = before_fix['has_overflow_in_x'] has_overflow_in_y = before_fix['has_overflow_in_y'] self.assertGreater(has_overflow_in_x, 0, '2D hist: No overflow in x present, wrong setup.') self.assertGreater(has_overflow_in_y, 0, '2D hist: No overflow in y present, wrong setup.') h2 = fix_overflow( self.h2 ) after_fix = check_overflow_in_2DHist(h2) has_overflow_in_x = after_fix['has_overflow_in_x'] has_overflow_in_y = after_fix['has_overflow_in_y'] # check if overflow has been reset self.assertEqual( has_overflow_in_x, 0, '2D hist: Overflow in x is not 0.' ) self.assertEqual( has_overflow_in_y, 0, '2D hist: Overflow in y is not 0.' ) # now check if new last bin content is equal to the old one plus overflow overflow_x_before = before_fix['overflow_x'] overflow_y_before = before_fix['overflow_y'] last_bin_content_x_before = before_fix['last_bin_content_x'] last_bin_content_y_before = before_fix['last_bin_content_y'] last_bin_content_x_after = after_fix['last_bin_content_x'] last_bin_content_y_after = after_fix['last_bin_content_y'] check_last_bin_content_x = [overflow + last_bin_content for overflow,last_bin_content in zip(overflow_x_before, last_bin_content_x_before)] check_last_bin_content_y = [overflow + last_bin_content for overflow,last_bin_content in zip(overflow_y_before, last_bin_content_y_before)] # remember, the last item in each list is actually the overflow, which should be 0 and the above calculation is not correct. self.assertTrue(check_equal_lists(check_last_bin_content_x[:-2], last_bin_content_x_after[:-2]), '2D hist: last bins in x are not correct.') self.assertTrue(check_equal_lists(check_last_bin_content_y[:-2], last_bin_content_y_after[:-2]), '2D hist: last bins in y are not correct.')
def calculate_normalisation(self): ''' 1. get file names 2. get histograms from files 3. ??? 4. calculate normalisation based on self.method ''' if self.have_normalisation: return histograms = self.measurement.histograms for sample, hist in histograms.items(): # TODO: this should be a list of bin-contents hist = fix_overflow(hist) histograms[sample] = hist self.initial_normalisation[ sample] = hist_to_value_error_tuplelist(hist) if self.method == self.BACKGROUND_SUBTRACTION and sample != 'TTJet': self.normalisation[sample] = self.initial_normalisation[sample] if self.method == self.BACKGROUND_SUBTRACTION: self.background_subtraction(histograms) if self.method == self.SIMULTANEOUS_FIT: self.simultaneous_fit(histograms) # next, let's round all numbers (they are event numbers after all for sample, values in self.normalisation.items(): new_values = [(round(v, 1), round(e, 1)) for v, e in values] self.normalisation[sample] = new_values self.have_normalisation = True
def setUp( self ): # create histograms # self.h1 = Hist2D( 15, 0, 15, 15, 0, 15 ) # x_1 = [1, 3, 7, 7, 8, 1, 12, 7, 8, 6] # y_1 = [1, 7, 3, 7, 8, 12, 7, 12, 13, 11] self.h1 = Hist2D( 60, 40, 100, 60, 40, 100 ) n_1 = 100000 x_1 = 60 + 10 * np.random.randn( n_1 ) y_1 = x_1 + np.random.randn( n_1 ) z_1 = np.vstack( ( x_1, y_1 ) ).T self.h1.fill_array( z_1 ) self.h1 = fix_overflow( self.h1 ) self.histogram_information = [ {'hist': self.h1, 'CoM': 7, 'channel':'test_1'}, ] self.histograms = [info['hist'] for info in self.histogram_information] # requirements for new binning self.p_min, self.s_min, self.n_min = 0.5, 0.5, 1000 self.bin_edges = [] self.purities_GetBinContent = [] self.stabilities_GetBinContent = [] self.n_events_GetBinContent = [] self.purities_Integral = [] self.stabilities_Integral = [] self.n_events_Integral = [] first_hist = self.histograms[0] n_bins = first_hist.GetNbinsX() current_bin_start = 0 current_bin_end = 0 while current_bin_end < n_bins: current_bin_end, p, s, n_gen_and_reco = pick_bins.get_next_end( self.histograms, current_bin_start, current_bin_end, self.p_min, self.s_min, self.n_min ) if not self.bin_edges: # if empty self.bin_edges.append( first_hist.GetXaxis().GetBinLowEdge( current_bin_start + 1 ) ) self.bin_edges.append( first_hist.GetXaxis().GetBinLowEdge( current_bin_end ) + first_hist.GetXaxis().GetBinWidth( current_bin_end ) ) self.purities_Integral.append(p) self.stabilities_Integral.append(s) self.n_events_Integral.append(n_gen_and_reco) current_bin_start = current_bin_end self.h1_rebinned = rebin_2d(self.h1, self.bin_edges, self.bin_edges) self.purities_GetBinContent = calculate_purities( self.h1_rebinned ) self.stabilities_GetBinContent = calculate_stabilities( self.h1_rebinned ) self.n_events_GetBinContent = [int( self.h1_rebinned.GetBinContent( i, i ) ) for i in range( 1, len( self.bin_edges ) )]
def test_overflow_1D( self ): last_bin = self.h1.nbins() overflow_bin = last_bin + 1 overflow = self.h1.GetBinContent( overflow_bin ) last_bin_content = self.h1.GetBinContent( last_bin ) self.assertGreater( overflow, 0, '1D hist: No overflow present, wrong setup.' ) h1 = fix_overflow( self.h1 ) self.assertEqual( h1.GetBinContent( overflow_bin ), 0., '1D hist: Overflow bin is not 0.' ) self.assertEqual( h1.GetBinContent( last_bin ), last_bin_content + overflow, '1D hist: last bin is not correct.' )
def test_overflow_1D(self): last_bin = self.h1.nbins() overflow_bin = last_bin + 1 overflow = self.h1.GetBinContent(overflow_bin) last_bin_content = self.h1.GetBinContent(last_bin) self.assertGreater(overflow, 0, '1D hist: No overflow present, wrong setup.') h1 = fix_overflow(self.h1) self.assertEqual(h1.GetBinContent(overflow_bin), 0., '1D hist: Overflow bin is not 0.') self.assertEqual(h1.GetBinContent(last_bin), last_bin_content + overflow, '1D hist: last bin is not correct.')
def test_overflow_2D(self): before_fix = check_overflow_in_2DHist(self.h2) has_overflow_in_x = before_fix['has_overflow_in_x'] has_overflow_in_y = before_fix['has_overflow_in_y'] self.assertGreater(has_overflow_in_x, 0, '2D hist: No overflow in x present, wrong setup.') self.assertGreater(has_overflow_in_y, 0, '2D hist: No overflow in y present, wrong setup.') h2 = fix_overflow(self.h2) after_fix = check_overflow_in_2DHist(h2) has_overflow_in_x = after_fix['has_overflow_in_x'] has_overflow_in_y = after_fix['has_overflow_in_y'] # check if overflow has been reset self.assertEqual(has_overflow_in_x, 0, '2D hist: Overflow in x is not 0.') self.assertEqual(has_overflow_in_y, 0, '2D hist: Overflow in y is not 0.') # now check if new last bin content is equal to the old one plus overflow overflow_x_before = before_fix['overflow_x'] overflow_y_before = before_fix['overflow_y'] last_bin_content_x_before = before_fix['last_bin_content_x'] last_bin_content_y_before = before_fix['last_bin_content_y'] last_bin_content_x_after = after_fix['last_bin_content_x'] last_bin_content_y_after = after_fix['last_bin_content_y'] check_last_bin_content_x = [ overflow + last_bin_content for overflow, last_bin_content in zip( overflow_x_before, last_bin_content_x_before) ] check_last_bin_content_y = [ overflow + last_bin_content for overflow, last_bin_content in zip( overflow_y_before, last_bin_content_y_before) ] # remember, the last item in each list is actually the overflow, which should be 0 and the above calculation is not correct. self.assertTrue( check_equal_lists(check_last_bin_content_x[:-2], last_bin_content_x_after[:-2]), '2D hist: last bins in x are not correct.') self.assertTrue( check_equal_lists(check_last_bin_content_y[:-2], last_bin_content_y_after[:-2]), '2D hist: last bins in y are not correct.')