def unfold_results(results, category, channel, tau_value, h_truth, h_measured, h_response, h_fakes, method, visiblePS): global variable, path_to_JSON, options edges = bin_edges[variable] if visiblePS: edges = bin_edges_vis[variable] h_data = value_error_tuplelist_to_hist(results, edges) # Remove fakes before unfolding h_measured, h_data = removeFakes(h_measured, h_data, h_response) unfolding = Unfolding(h_truth, h_measured, h_response, h_fakes, method=method, k_value=-1, tau=tau_value) # turning off the unfolding errors for systematic samples if not category == 'central': unfoldCfg.error_treatment = 0 else: unfoldCfg.error_treatment = options.error_treatment h_unfolded_data = unfolding.unfold(h_data) del unfolding return hist_to_value_error_tuplelist( h_unfolded_data), hist_to_value_error_tuplelist(h_data)
def test_tau_closure(self): for channel in self.channels: for variable in self.variables: data = self.dict[channel][variable]['h_measured'] truth = hist_to_value_error_tuplelist( self.dict[channel][variable]['h_truth'] ) unfolded_result = hist_to_value_error_tuplelist(self.dict[channel][variable]['tau_unfolding_object'].unfold(data)) # the difference between the truth and unfolded result should be within the unfolding error for (value, error), (true_value, _) in zip(unfolded_result, truth): self.assertAlmostEquals(value, true_value, delta = error)
def test_closure(self): for channel in self.channels: for variable in self.variables: # closure test unfolded_result = hist_to_value_error_tuplelist( self.dict[channel][variable]["unfolding_object"].closureTest() ) truth = hist_to_value_error_tuplelist(self.dict[channel][variable]["h_truth"]) # the difference between the truth and unfolded result should be within the unfolding error for (value, error), (true_value, _) in zip(unfolded_result, truth): self.assertAlmostEquals(value, true_value, delta=error)
def check_multiple_data_multiple_unfolding( input_file, method, channel ): global nbins, use_N_toy, output_folder, offset_toy_mc, offset_toy_data, k_value # same unfolding input, different data get_folder = input_file.Get pulls = [] add_pull = pulls.append histograms = [] add_histograms = histograms.append print 'Reading toy MC' start1 = time() mc_range = range( offset_toy_mc + 1, offset_toy_mc + use_N_toy + 1 ) data_range = range( offset_toy_data + 1, offset_toy_data + use_N_toy + 1 ) for nth_toy_mc in range( 1, 10000 + 1 ): # read all of them (easier) if nth_toy_mc in mc_range or nth_toy_mc in data_range: folder_mc = get_folder( channel + '/toy_%d' % nth_toy_mc ) add_histograms( get_histograms( folder_mc ) ) else: add_histograms( ( 0, 0, 0 ) ) print 'Done reading toy MC in', time() - start1, 's' for nth_toy_mc in range( offset_toy_mc + 1, offset_toy_mc + use_N_toy + 1 ): print 'Doing MC no', nth_toy_mc h_truth, h_measured, h_response = histograms[nth_toy_mc - 1] unfolding_obj = Unfolding( h_truth, h_measured, h_response, method = method, k_value = k_value ) unfold, get_pull, reset = unfolding_obj.unfold, unfolding_obj.pull, unfolding_obj.Reset for nth_toy_data in range( offset_toy_data + 1, offset_toy_data + use_N_toy + 1 ): if nth_toy_data == nth_toy_mc: continue print 'Doing MC no, ' + str( nth_toy_mc ) + ', data no', nth_toy_data h_data = histograms[nth_toy_data - 1][1] unfold( h_data ) pull = get_pull() diff = unfolding_obj.unfolded_data - unfolding_obj.truth diff_tuple = hist_to_value_error_tuplelist( diff ) unfolded = unfolding_obj.unfolded_data unfolded_tuple = hist_to_value_error_tuplelist( unfolded ) all_data = {'unfolded': unfolded_tuple, 'difference' : diff_tuple, 'pull': pull, 'nth_toy_mc': nth_toy_mc, 'nth_toy_data':nth_toy_data } add_pull( all_data ) reset() save_pulls( pulls, test = 'multiple_data_multiple_unfolding', method = method, channel = channel )
def __init__(self, config, measurement, method=BACKGROUND_SUBTRACTION, phase_space='FullPS'): self.config = config self.variable = measurement.variable self.category = measurement.name self.channel = measurement.channel self.method = method self.phase_space = phase_space self.measurement = measurement self.measurement.read() self.met_type = measurement.met_type self.fit_variables = ['M3'] self.normalisation = {} self.initial_normalisation = {} self.templates = {} self.have_normalisation = False for sample, hist in self.measurement.histograms.items(): h = deepcopy(hist) h_norm = h.integral() if h_norm > 0: h.Scale(1 / h.integral()) self.templates[sample] = hist_to_value_error_tuplelist(h) self.auxiliary_info = {} self.auxiliary_info['norms'] = measurement.aux_info_norms
def calculate_normalisation(self): ''' 1. get file names 2. get histograms from files 3. ??? 4. calculate normalisation based on self.method ''' if self.have_normalisation: return histograms = self.measurement.histograms for sample, hist in histograms.items(): # TODO: this should be a list of bin-contents # hist = fix_overflow(hist) # histograms[sample] = hist self.initial_normalisation[ sample] = hist_to_value_error_tuplelist(hist) if self.method == self.BACKGROUND_SUBTRACTION and sample != 'TTJet': self.normalisation[sample] = self.initial_normalisation[sample] if self.method == self.BACKGROUND_SUBTRACTION: self.background_subtraction(histograms) if self.method == self.SIMULTANEOUS_FIT: self.simultaneous_fit(histograms) # next, let's round all numbers (they are event numbers after all for sample, values in self.normalisation.items(): new_values = [(round(v, 1), round(e, 1)) for v, e in values] self.normalisation[sample] = new_values self.have_normalisation = True
def calculate_normalisation(self): ''' 1. get file names 2. get histograms from files 3. ??? 4. calculate normalisation based on self.method ''' if self.have_normalisation: return histograms = self.measurement.histograms for sample, hist in histograms.items(): # TODO: this should be a list of bin-contents # hist = fix_overflow(hist) # histograms[sample] = hist self.initial_normalisation[sample] = hist_to_value_error_tuplelist( hist) if self.method == self.BACKGROUND_SUBTRACTION and sample != 'TTJet': self.normalisation[sample] = self.initial_normalisation[sample] if self.method == self.BACKGROUND_SUBTRACTION: self.background_subtraction(histograms) if self.method == self.SIMULTANEOUS_FIT: self.simultaneous_fit(histograms) # next, let's round all numbers (they are event numbers after all for sample, values in self.normalisation.items(): new_values = [(round(v, 1), round(e, 1)) for v, e in values] self.normalisation[sample] = new_values self.have_normalisation = True
def unfold_results(results, h_truth, h_measured, h_response, method): global bin_edges h_data = value_error_tuplelist_to_hist(results, bin_edges) unfolding = Unfolding(h_truth, h_measured, h_response, method=method) h_unfolded_data = unfolding.unfold(h_data) return hist_to_value_error_tuplelist(h_unfolded_data)
def unfold_results(results, category, channel, h_truth, h_measured, h_response, method): global variable, path_to_JSON h_data = value_error_tuplelist_to_hist(results, bin_edges[variable]) unfolding = Unfolding(h_truth, h_measured, h_response, method=method) #turning off the unfolding errors for systematic samples if category != 'central': unfoldCfg.Hreco = 0 h_unfolded_data = unfolding.unfold(h_data) #export the D and SV distributions SVD_path = path_to_JSON + '/' + variable + '/unfolding_objects/' + channel + '/kv_' + str( unfoldCfg.SVD_k_value) + '/' make_folder_if_not_exists(SVD_path) if method == 'TSVDUnfold': SVDdist = TFile( SVD_path + method + '_SVDdistributions_' + category + '.root', 'recreate') directory = SVDdist.mkdir('SVDdist') directory.cd() unfolding.unfoldObject.GetD().Write() unfolding.unfoldObject.GetSV().Write() # unfolding.unfoldObject.GetUnfoldCovMatrix(data_covariance_matrix(h_data), unfoldCfg.SVD_n_toy).Write() SVDdist.Close() else: SVDdist = TFile( SVD_path + method + '_SVDdistributions_Hreco' + str(unfoldCfg.Hreco) + '_' + category + '.root', 'recreate') directory = SVDdist.mkdir('SVDdist') directory.cd() unfolding.unfoldObject.Impl().GetD().Write() unfolding.unfoldObject.Impl().GetSV().Write() h_truth.Write() h_measured.Write() h_response.Write() # unfolding.unfoldObject.Impl().GetUnfoldCovMatrix(data_covariance_matrix(h_data), unfoldCfg.SVD_n_toy).Write() SVDdist.Close() #export the whole unfolding object if it doesn't exist if method == 'TSVDUnfold': unfolding_object_file_name = SVD_path + method + '_unfoldingObject_' + category + '.root' else: unfolding_object_file_name = SVD_path + method + '_unfoldingObject_Hreco' + str( unfoldCfg.Hreco) + '_' + category + '.root' if not os.path.isfile(unfolding_object_file_name): unfoldingObjectFile = TFile(unfolding_object_file_name, 'recreate') directory = unfoldingObjectFile.mkdir('unfoldingObject') directory.cd() if method == 'TSVDUnfold': unfolding.unfoldObject.Write() else: unfolding.unfoldObject.Impl().Write() unfoldingObjectFile.Close() del unfolding return hist_to_value_error_tuplelist(h_unfolded_data)
def pull( self ): result = [9999999] if self.unfolded_data and self.truth: diff = self.unfolded_data - self.truth value_error_tuplelist = hist_to_value_error_tuplelist( diff ) result = [value / error for value, error in value_error_tuplelist] return result
def unfold_results( results, category, channel, k_value, h_truth, h_measured, h_response, h_fakes, method ): global variable, path_to_JSON, options h_data = value_error_tuplelist_to_hist( results, bin_edges[variable] ) unfolding = Unfolding( h_truth, h_measured, h_response, h_fakes, method = method, k_value = k_value ) # turning off the unfolding errors for systematic samples if not category == 'central': unfoldCfg.Hreco = 0 else: unfoldCfg.Hreco = options.Hreco h_unfolded_data = unfolding.unfold( h_data ) if options.write_unfolding_objects: # export the D and SV distributions SVD_path = path_to_JSON + '/unfolding_objects/' + channel + '/kv_' + str( k_value ) + '/' make_folder_if_not_exists( SVD_path ) if method == 'TSVDUnfold': SVDdist = File( SVD_path + method + '_SVDdistributions_' + category + '.root', 'recreate' ) directory = SVDdist.mkdir( 'SVDdist' ) directory.cd() unfolding.unfoldObject.GetD().Write() unfolding.unfoldObject.GetSV().Write() # unfolding.unfoldObject.GetUnfoldCovMatrix(data_covariance_matrix(h_data), unfoldCfg.SVD_n_toy).Write() SVDdist.Close() else: SVDdist = File( SVD_path + method + '_SVDdistributions_Hreco' + str( unfoldCfg.Hreco ) + '_' + category + '.root', 'recreate' ) directory = SVDdist.mkdir( 'SVDdist' ) directory.cd() unfolding.unfoldObject.Impl().GetD().Write() unfolding.unfoldObject.Impl().GetSV().Write() h_truth.Write() h_measured.Write() h_response.Write() # unfolding.unfoldObject.Impl().GetUnfoldCovMatrix(data_covariance_matrix(h_data), unfoldCfg.SVD_n_toy).Write() SVDdist.Close() # export the whole unfolding object if it doesn't exist if method == 'TSVDUnfold': unfolding_object_file_name = SVD_path + method + '_unfoldingObject_' + category + '.root' else: unfolding_object_file_name = SVD_path + method + '_unfoldingObject_Hreco' + str( unfoldCfg.Hreco ) + '_' + category + '.root' if not os.path.isfile( unfolding_object_file_name ): unfoldingObjectFile = File( unfolding_object_file_name, 'recreate' ) directory = unfoldingObjectFile.mkdir( 'unfoldingObject' ) directory.cd() if method == 'TSVDUnfold': unfolding.unfoldObject.Write() else: unfolding.unfoldObject.Impl().Write() unfoldingObjectFile.Close() del unfolding return hist_to_value_error_tuplelist( h_unfolded_data )
def unfold_results( results, category, channel, tau_value, h_truth, h_measured, h_response, h_fakes, method, visiblePS ): global variable, path_to_JSON, options edges = bin_edges[variable] if visiblePS: edges = bin_edges_vis[variable] h_data = value_error_tuplelist_to_hist( results, edges ) # Remove fakes before unfolding h_measured, h_data = removeFakes( h_measured, h_data, h_response ) unfolding = Unfolding( h_truth, h_measured, h_response, h_fakes, method = method, k_value = -1, tau = tau_value ) # turning off the unfolding errors for systematic samples if not category == 'central': unfoldCfg.error_treatment = 0 else: unfoldCfg.error_treatment = options.error_treatment h_unfolded_data = unfolding.unfold( h_data ) del unfolding return hist_to_value_error_tuplelist( h_unfolded_data ), hist_to_value_error_tuplelist( h_data )
def pull ( self ): result = [9999999] if self.unfolded_data and self.truth: for bin in range(0,self.truth.GetNbinsX()): self.truth.SetBinError(bin,0) diff = self.unfolded_data - self.truth value_error_tuplelist = hist_to_value_error_tuplelist( diff ) result = [value / error for value, error in value_error_tuplelist] return result
def unfold( self, data ): have_zeros = [value == 0 for value,_ in hist_to_value_error_tuplelist( data )] if not False in have_zeros: raise ValueError('Data histograms contains only zeros') self.setup_unfolding( data ) if self.method == 'TSVDUnfold' or self.method == 'TopSVDUnfold': self.unfolded_data = asrootpy( self.unfoldObject.Unfold( self.k_value ) ) else: # remove unfold reports (faster) self.unfoldObject.SetVerbose( self.verbose ) self.unfolded_data = asrootpy( self.unfoldObject.Hreco( self.Hreco ) ) return self.unfolded_data
def get_unfolded_normalisation(TTJet_fit_results, category, channel): global variable, met_type, path_to_JSON, file_for_unfolding, file_for_powheg, file_for_mcatnlo global file_for_matchingdown, file_for_matchingup, file_for_scaledown, file_for_scaleup global ttbar_generator_systematics files_for_systematics = { ttbar_theory_systematic_prefix + 'matchingdown':file_for_matchingdown, ttbar_theory_systematic_prefix + 'matchingup':file_for_matchingup, ttbar_theory_systematic_prefix + 'scaledown':file_for_scaledown, ttbar_theory_systematic_prefix + 'scaleup':file_for_scaleup, } h_truth, h_measured, h_response = None, None, None if category in ttbar_generator_systematics and not 'ptreweight' in category: h_truth, h_measured, h_response = get_unfold_histogram_tuple(files_for_systematics[category], variable, channel, met_type) elif 'mcatnlo_matrix' in category: h_truth, h_measured, h_response = get_unfold_histogram_tuple(file_for_mcatnlo, variable, channel, met_type) else: h_truth, h_measured, h_response = get_unfold_histogram_tuple(file_for_unfolding, variable, channel, met_type) MADGRAPH_results = hist_to_value_error_tuplelist(h_truth) POWHEG_results = hist_to_value_error_tuplelist(get_unfold_histogram_tuple(file_for_powheg, variable, channel, met_type)[0]) MCATNLO_results = hist_to_value_error_tuplelist(get_unfold_histogram_tuple(file_for_mcatnlo, variable, channel, met_type)[0]) matchingdown_results = hist_to_value_error_tuplelist(get_unfold_histogram_tuple(file_for_matchingdown, variable, channel, met_type)[0]) matchingup_results = hist_to_value_error_tuplelist(get_unfold_histogram_tuple(file_for_matchingup, variable, channel, met_type)[0]) scaledown_results = hist_to_value_error_tuplelist(get_unfold_histogram_tuple(file_for_scaledown, variable, channel, met_type)[0]) scaleup_results = hist_to_value_error_tuplelist(get_unfold_histogram_tuple(file_for_scaleup, variable, channel, met_type)[0]) TTJet_fit_results_unfolded = unfold_results(TTJet_fit_results, category, channel, h_truth, h_measured, h_response, 'RooUnfoldSvd' # 'TSVDUnfold' ) normalisation_unfolded = { 'TTJet_measured' : TTJet_fit_results, 'TTJet_unfolded' : TTJet_fit_results_unfolded, 'MADGRAPH': MADGRAPH_results, # other generators 'POWHEG': POWHEG_results, 'MCATNLO': MCATNLO_results, # systematics 'matchingdown': matchingdown_results, 'matchingup': matchingup_results, 'scaledown': scaledown_results, 'scaleup': scaleup_results } write_data_to_JSON(normalisation_unfolded, path_to_JSON + '/xsection_measurement_results' + '/kv' + str(unfoldCfg.SVD_k_value) + '/' + category + '/normalisation_' + channel + '_' + met_type + '.txt') return normalisation_unfolded
def unfold( self, data ): if data is None: raise ValueError('Data histogram is None') have_zeros = [value == 0 for value,_ in hist_to_value_error_tuplelist( data )] if not False in have_zeros: raise ValueError('Data histograms contains only zeros') self.setup_unfolding( data ) if self.method == 'TSVDUnfold': self.unfolded_data = asrootpy( self.unfoldObject.Unfold( self.k_value ) ) else: # remove unfold reports (faster) self.unfoldObject.SetVerbose( self.verbose ) self.unfolded_data = asrootpy( self.unfoldObject.Hreco( self.error_treatment ) ) return self.unfolded_data
def unfold(self, data): if data is None: raise ValueError("Data histogram is None") have_zeros = [value == 0 for value, _ in hist_to_value_error_tuplelist(data)] if not False in have_zeros: raise ValueError("Data histograms contains only zeros") self.setup_unfolding(data) if self.method == "TSVDUnfold": self.unfolded_data = asrootpy(self.unfoldObject.Unfold(self.k_value)) else: # remove unfold reports (faster) self.unfoldObject.SetVerbose(self.verbose) self.unfolded_data = asrootpy(self.unfoldObject.Hreco(self.error_treatment)) return self.unfolded_data
def get_unfolded_normalisation(TTJet_fit_results, category, channel): global variable, met_type, path_to_JSON h_truth, h_measured, h_response = get_unfold_histogram_tuple( file_for_unfolding, variable, channel, met_type) MADGRAPH_results = hist_to_value_error_tuplelist(h_truth) POWHEG_results = hist_to_value_error_tuplelist( get_unfold_histogram_tuple(file_for_powheg, variable, channel, met_type)[0]) MCATNLO_results = hist_to_value_error_tuplelist( get_unfold_histogram_tuple(file_for_mcatnlo, variable, channel, met_type)[0]) matchingdown_results = hist_to_value_error_tuplelist( get_unfold_histogram_tuple(file_for_matchingdown, variable, channel, met_type)[0]) matchingup_results = hist_to_value_error_tuplelist( get_unfold_histogram_tuple(file_for_matchingup, variable, channel, met_type)[0]) scaledown_results = hist_to_value_error_tuplelist( get_unfold_histogram_tuple(file_for_scaledown, variable, channel, met_type)[0]) scaleup_results = hist_to_value_error_tuplelist( get_unfold_histogram_tuple(file_for_scaleup, variable, channel, met_type)[0]) TTJet_fit_results_unfolded = unfold_results( TTJet_fit_results, category, channel, h_truth, h_measured, h_response, 'RooUnfoldSvd' # 'TSVDUnfold' ) normalisation_unfolded = { 'TTJet_measured': TTJet_fit_results, 'TTJet_unfolded': TTJet_fit_results_unfolded, 'MADGRAPH': MADGRAPH_results, #other generators 'POWHEG': POWHEG_results, 'MCATNLO': MCATNLO_results, #systematics 'matchingdown': matchingdown_results, 'matchingup': matchingup_results, 'scaledown': scaledown_results, 'scaleup': scaleup_results } write_data_to_JSON( normalisation_unfolded, path_to_JSON + '/' + variable + '/xsection_measurement_results' + '/kv' + str(unfoldCfg.SVD_k_value) + '/' + category + '/normalisation_' + channel + '_' + met_type + '.txt') return normalisation_unfolded
def get_unfolded_normalisation(TTJet_fit_results, category, channel): global variable, met_type, path_to_JSON h_truth, h_measured, h_response = get_unfold_histogram_tuple(file_for_unfolding, variable, channel, met_type) MADGRAPH_results = hist_to_value_error_tuplelist(h_truth) POWHEG_results = hist_to_value_error_tuplelist(get_unfold_histogram_tuple(file_for_powheg, variable, channel, met_type)[0]) MCATNLO_results = hist_to_value_error_tuplelist(get_unfold_histogram_tuple(file_for_mcatnlo, variable, channel, met_type)[0]) matchingdown_results = hist_to_value_error_tuplelist(get_unfold_histogram_tuple(file_for_matchingdown, variable, channel, met_type)[0]) matchingup_results = hist_to_value_error_tuplelist(get_unfold_histogram_tuple(file_for_matchingup, variable, channel, met_type)[0]) scaledown_results = hist_to_value_error_tuplelist(get_unfold_histogram_tuple(file_for_scaledown, variable, channel, met_type)[0]) scaleup_results = hist_to_value_error_tuplelist(get_unfold_histogram_tuple(file_for_scaleup, variable, channel, met_type)[0]) TTJet_fit_results_unfolded = unfold_results(TTJet_fit_results, category, channel, h_truth, h_measured, h_response, 'RooUnfoldSvd' # 'TSVDUnfold' ) normalisation_unfolded = { 'TTJet_measured' : TTJet_fit_results, 'TTJet_unfolded' : TTJet_fit_results_unfolded, 'MADGRAPH': MADGRAPH_results, #other generators 'POWHEG': POWHEG_results, 'MCATNLO': MCATNLO_results, #systematics 'matchingdown': matchingdown_results, 'matchingup': matchingup_results, 'scaledown': scaledown_results, 'scaleup': scaleup_results } write_data_to_JSON(normalisation_unfolded, path_to_JSON + '/' + variable + '/xsection_measurement_results' + '/kv' + str(unfoldCfg.SVD_k_value) + '/' + category + '/normalisation_' + channel + '_' + met_type + '.txt') return normalisation_unfolded
def get_unfolded_normalisation(TTJet_fit_results, category, channel, tau_value, visiblePS): global variable, met_type, path_to_JSON, file_for_unfolding, file_for_powheg_pythia, file_for_amcatnlo_herwig, file_for_ptreweight, files_for_pdfs global centre_of_mass, luminosity, ttbar_xsection, load_fakes, method global file_for_powhegPythia8, file_for_madgraphMLM, file_for_amcatnlo # global file_for_matchingdown, file_for_matchingup global file_for_scaledown, file_for_scaleup global file_for_massdown, file_for_massup global ttbar_generator_systematics, ttbar_theory_systematics, pdf_uncertainties global use_ptreweight files_for_systematics = { # ttbar_theory_systematic_prefix + 'matchingdown' : file_for_matchingdown, # ttbar_theory_systematic_prefix + 'matchingup' : file_for_matchingup, ttbar_theory_systematic_prefix + 'scaledown': file_for_scaledown, ttbar_theory_systematic_prefix + 'scaleup': file_for_scaleup, ttbar_theory_systematic_prefix + 'massdown': file_for_massdown, ttbar_theory_systematic_prefix + 'massup': file_for_massup, # 'JES_down' : file_for_jesdown, # 'JES_up' : file_for_jesup, # 'JER_down' : file_for_jerdown, # 'JER_up' : file_for_jerup, # 'BJet_up' : file_for_bjetdown, # 'BJet_down' : file_for_bjetup, ttbar_theory_systematic_prefix + 'hadronisation': file_for_amcatnlo_herwig, ttbar_theory_systematic_prefix + 'NLOgenerator': file_for_amcatnlo, # 'ElectronEnUp' : file_for_ElectronEnUp, # 'ElectronEnDown' : file_for_ElectronEnDown, # 'MuonEnUp' : file_for_MuonEnUp, # 'MuonEnDown' : file_for_MuonEnDown, # 'TauEnUp' : file_for_TauEnUp, # 'TauEnDown' : file_for_TauEnDown, # 'UnclusteredEnUp' : file_for_UnclusteredEnUp, # 'UnclusteredEnDown' : file_for_UnclusteredEnDown, # 'Muon_up' : file_for_LeptonUp, # 'Muon_down' : file_for_LeptonDown, # 'Electron_up' : file_for_LeptonUp, # 'Electron_down' : file_for_LeptonDown, # 'PileUpSystematic' : file_for_PUSystematic, } h_truth, h_measured, h_response, h_fakes = None, None, None, None # Systematics where you change the response matrix if category in ttbar_generator_systematics or category in files_for_systematics: print 'Doing category', category, 'by changing response matrix' h_truth, h_measured, h_response, h_fakes = get_unfold_histogram_tuple( inputfile=files_for_systematics[category], variable=variable, channel=channel, met_type=met_type, centre_of_mass=centre_of_mass, ttbar_xsection=ttbar_xsection, luminosity=luminosity, load_fakes=load_fakes, visiblePS=visiblePS, ) elif category in pdf_uncertainties: h_truth, h_measured, h_response, h_fakes = get_unfold_histogram_tuple( inputfile=files_for_pdfs[category], variable=variable, channel=channel, met_type=met_type, centre_of_mass=centre_of_mass, ttbar_xsection=ttbar_xsection, luminosity=luminosity, load_fakes=load_fakes, visiblePS=visiblePS, ) # Systematics where you change input MC else: h_truth, h_measured, h_response, h_fakes = get_unfold_histogram_tuple( inputfile=file_for_unfolding, variable=variable, channel=channel, met_type=met_type, centre_of_mass=centre_of_mass, ttbar_xsection=ttbar_xsection, luminosity=luminosity, load_fakes=load_fakes, visiblePS=visiblePS, ) # central_results = hist_to_value_error_tuplelist( h_truth ) TTJet_fit_results_unfolded, TTJet_fit_results_withoutFakes = unfold_results( TTJet_fit_results, category, channel, tau_value, h_truth, h_measured, h_response, h_fakes, method, visiblePS, ) normalisation_unfolded = { 'TTJet_measured': TTJet_fit_results, 'TTJet_measured_withoutFakes': TTJet_fit_results_withoutFakes, 'TTJet_unfolded': TTJet_fit_results_unfolded } # # THESE ARE FOR GETTING THE HISTOGRAMS FOR COMPARING WITH UNFOLDED DATA # if category == 'central': # h_truth_matchingdown, _, _, _ = get_unfold_histogram_tuple( inputfile = file_for_matchingdown, # variable = variable, # channel = channel, # met_type = met_type, # centre_of_mass = centre_of_mass, # ttbar_xsection = ttbar_xsection, # luminosity = luminosity, # load_fakes = load_fakes # ) # h_truth_matchingup, _, _, _ = get_unfold_histogram_tuple( inputfile = file_for_matchingup, # variable = variable, # channel = channel, # met_type = met_type, # centre_of_mass = centre_of_mass, # ttbar_xsection = ttbar_xsection, # luminosity = luminosity, # load_fakes = load_fakes # ) h_truth_scaledown, _, _, _ = get_unfold_histogram_tuple( inputfile=file_for_scaledown, variable=variable, channel=channel, met_type=met_type, centre_of_mass=centre_of_mass, ttbar_xsection=ttbar_xsection, luminosity=luminosity, load_fakes=load_fakes, visiblePS=visiblePS, ) h_truth_scaleup, _, _, _ = get_unfold_histogram_tuple( inputfile=file_for_scaleup, variable=variable, channel=channel, met_type=met_type, centre_of_mass=centre_of_mass, ttbar_xsection=ttbar_xsection, luminosity=luminosity, load_fakes=load_fakes, visiblePS=visiblePS, ) h_truth_massdown, _, _, _ = get_unfold_histogram_tuple( inputfile=file_for_massdown, variable=variable, channel=channel, met_type=met_type, centre_of_mass=centre_of_mass, ttbar_xsection=ttbar_xsection, luminosity=luminosity, load_fakes=load_fakes, visiblePS=visiblePS, ) h_truth_massup, _, _, _ = get_unfold_histogram_tuple( inputfile=file_for_massup, variable=variable, channel=channel, met_type=met_type, centre_of_mass=centre_of_mass, ttbar_xsection=ttbar_xsection, luminosity=luminosity, load_fakes=load_fakes, visiblePS=visiblePS, ) h_truth_powhegPythia8, _, _, _ = get_unfold_histogram_tuple( inputfile=file_for_powhegPythia8, variable=variable, channel=channel, met_type=met_type, centre_of_mass=centre_of_mass, ttbar_xsection=ttbar_xsection, luminosity=luminosity, load_fakes=load_fakes, visiblePS=visiblePS, ) h_truth_amcatnlo, _, _, _ = get_unfold_histogram_tuple( inputfile=file_for_amcatnlo, variable=variable, channel=channel, met_type=met_type, centre_of_mass=centre_of_mass, ttbar_xsection=ttbar_xsection, luminosity=luminosity, load_fakes=load_fakes, visiblePS=visiblePS, ) h_truth_madgraphMLM, _, _, _ = get_unfold_histogram_tuple( inputfile=file_for_madgraphMLM, variable=variable, channel=channel, met_type=met_type, centre_of_mass=centre_of_mass, ttbar_xsection=ttbar_xsection, luminosity=luminosity, load_fakes=load_fakes, visiblePS=visiblePS, ) h_truth_amcatnlo_HERWIG, _, _, _ = get_unfold_histogram_tuple( inputfile=file_for_amcatnlo_herwig, variable=variable, channel=channel, met_type=met_type, centre_of_mass=centre_of_mass, ttbar_xsection=ttbar_xsection, luminosity=luminosity, load_fakes=load_fakes, visiblePS=visiblePS, ) # MADGRAPH_ptreweight_results = hist_to_value_error_tuplelist( h_truth_ptreweight ) # POWHEG_PYTHIA_results = hist_to_value_error_tuplelist( h_truth_POWHEG_PYTHIA ) # MCATNLO_results = None powhegPythia8_results = hist_to_value_error_tuplelist( h_truth_powhegPythia8) madgraphMLM_results = hist_to_value_error_tuplelist( h_truth_madgraphMLM) AMCATNLO_results = hist_to_value_error_tuplelist(h_truth_amcatnlo) amcatnlo_HERWIG_results = hist_to_value_error_tuplelist( h_truth_amcatnlo_HERWIG) # matchingdown_results = hist_to_value_error_tuplelist( h_truth_matchingdown ) # matchingup_results = hist_to_value_error_tuplelist( h_truth_matchingup ) scaledown_results = hist_to_value_error_tuplelist(h_truth_scaledown) scaleup_results = hist_to_value_error_tuplelist(h_truth_scaleup) massdown_results = hist_to_value_error_tuplelist(h_truth_massdown) massup_results = hist_to_value_error_tuplelist(h_truth_massup) normalisation_unfolded['powhegPythia8'] = powhegPythia8_results normalisation_unfolded['amcatnlo'] = AMCATNLO_results normalisation_unfolded['madgraphMLM'] = madgraphMLM_results normalisation_unfolded['amcatnlo_HERWIG'] = amcatnlo_HERWIG_results normalisation_unfolded['scaledown'] = scaledown_results normalisation_unfolded['scaleup'] = scaleup_results normalisation_unfolded['massdown'] = massdown_results normalisation_unfolded['massup'] = massup_results return normalisation_unfolded
}, met_type='patType1CorrectedPFMet', b_tag_bin='2orMoreBtags', ) write_fit_results_and_initial_values(fit_results_electron, fit_results_muon, initial_values_electron, initial_values_muon) #continue with only TTJet TTJet_fit_results_electron = fit_results_electron['TTJet'] TTJet_fit_results_muon = fit_results_muon['TTJet'] # get t values for systematics # for systematics we only need the TTJet results! # unfold all above h_truth, h_measured, h_response = get_unfold_histogram_tuple(file_for_unfolding, 'electron') MADGRAPH_results_electron = hist_to_value_error_tuplelist(h_truth) POWHEG_results_electron = hist_to_value_error_tuplelist(get_unfold_histogram_tuple(file_for_powheg, 'electron')[1]) PYTHIA_results_electron = hist_to_value_error_tuplelist(get_unfold_histogram_tuple(file_for_pythia, 'electron')[1]) MCATNLO_results_electron = hist_to_value_error_tuplelist(get_unfold_histogram_tuple(file_for_mcatnlo, 'electron')[1]) matchingdown_results_electron = hist_to_value_error_tuplelist(get_unfold_histogram_tuple(file_for_matchingdown, 'electron')[1]) matchingup_results_electron = hist_to_value_error_tuplelist(get_unfold_histogram_tuple(file_for_matchingup, 'electron')[1]) scaledown_results_electron = hist_to_value_error_tuplelist(get_unfold_histogram_tuple(file_for_scaledown, 'electron')[1]) scaleup_results_electron = hist_to_value_error_tuplelist(get_unfold_histogram_tuple(file_for_scaleup, 'electron')[1]) TTJet_fit_results_electron_unfolded = unfold_results(TTJet_fit_results_electron, h_truth, h_measured, h_response, 'RooUnfoldSvd')
def calculate_xsection( nEventsHistogram, variable ): resultsAsTuple = hist_to_value_error_tuplelist( nEventsHistogram ) normalised_xsection = calculate_normalised_xsection( resultsAsTuple, bin_widths_visiblePS[variable], False ) return value_error_tuplelist_to_hist(normalised_xsection, bin_edges_vis[variable])
def get_unfolded_normalisation( TTJet_fit_results, category, channel, k_value ): global variable, met_type, path_to_JSON, file_for_unfolding, file_for_powheg_pythia, file_for_powheg_herwig, file_for_mcatnlo, file_for_ptreweight, files_for_pdfs global centre_of_mass, luminosity, ttbar_xsection, load_fakes, method global file_for_matchingdown, file_for_matchingup, file_for_scaledown, file_for_scaleup global ttbar_generator_systematics, ttbar_theory_systematics, pdf_uncertainties global use_ptreweight files_for_systematics = { ttbar_theory_systematic_prefix + 'matchingdown':file_for_matchingdown, ttbar_theory_systematic_prefix + 'matchingup':file_for_matchingup, ttbar_theory_systematic_prefix + 'scaledown':file_for_scaledown, ttbar_theory_systematic_prefix + 'scaleup':file_for_scaleup, ttbar_theory_systematic_prefix + 'powheg_pythia':file_for_powheg_pythia, ttbar_theory_systematic_prefix + 'powheg_herwig':file_for_powheg_herwig, ttbar_theory_systematic_prefix + 'ptreweight':file_for_ptreweight, } h_truth, h_measured, h_response, h_fakes = None, None, None, None if category in ttbar_generator_systematics or category in ttbar_theory_systematics: h_truth, h_measured, h_response, h_fakes = get_unfold_histogram_tuple( inputfile = files_for_systematics[category], variable = variable, channel = channel, met_type = met_type, centre_of_mass = centre_of_mass, ttbar_xsection = ttbar_xsection, luminosity = luminosity, load_fakes = load_fakes ) elif category in pdf_uncertainties: h_truth, h_measured, h_response, h_fakes = get_unfold_histogram_tuple( inputfile = files_for_pdfs[category], variable = variable, channel = channel, met_type = met_type, centre_of_mass = centre_of_mass, ttbar_xsection = ttbar_xsection, luminosity = luminosity, load_fakes = load_fakes ) elif use_ptreweight: h_truth, h_measured, h_response, h_fakes = get_unfold_histogram_tuple( inputfile = file_for_ptreweight, variable = variable, channel = channel, met_type = met_type, centre_of_mass = centre_of_mass, ttbar_xsection = ttbar_xsection, luminosity = luminosity, load_fakes = load_fakes ) else: h_truth, h_measured, h_response, h_fakes = get_unfold_histogram_tuple( inputfile = file_for_unfolding, variable = variable, channel = channel, met_type = met_type, centre_of_mass = centre_of_mass, ttbar_xsection = ttbar_xsection, luminosity = luminosity, load_fakes = load_fakes ) h_truth_POWHEG_PYTHIA, _, _, _ = get_unfold_histogram_tuple( inputfile = file_for_powheg_pythia, variable = variable, channel = channel, met_type = met_type, centre_of_mass = centre_of_mass, ttbar_xsection = ttbar_xsection, luminosity = luminosity, load_fakes = load_fakes ) h_truth_POWHEG_HERWIG, _, _, _ = get_unfold_histogram_tuple( inputfile = file_for_powheg_herwig, variable = variable, channel = channel, met_type = met_type, centre_of_mass = centre_of_mass, ttbar_xsection = ttbar_xsection, luminosity = luminosity, load_fakes = load_fakes ) h_truth_MCATNLO, _, _, _ = get_unfold_histogram_tuple( inputfile = file_for_mcatnlo, variable = variable, channel = channel, met_type = met_type, centre_of_mass = centre_of_mass, ttbar_xsection = ttbar_xsection, luminosity = luminosity, load_fakes = load_fakes ) h_truth_matchingdown, _, _, _ = get_unfold_histogram_tuple( inputfile = file_for_matchingdown, variable = variable, channel = channel, met_type = met_type, centre_of_mass = centre_of_mass, ttbar_xsection = ttbar_xsection, luminosity = luminosity, load_fakes = load_fakes ) h_truth_matchingup, _, _, _ = get_unfold_histogram_tuple( inputfile = file_for_matchingup, variable = variable, channel = channel, met_type = met_type, centre_of_mass = centre_of_mass, ttbar_xsection = ttbar_xsection, luminosity = luminosity, load_fakes = load_fakes ) h_truth_scaledown, _, _, _ = get_unfold_histogram_tuple( inputfile = file_for_scaledown, variable = variable, channel = channel, met_type = met_type, centre_of_mass = centre_of_mass, ttbar_xsection = ttbar_xsection, luminosity = luminosity, load_fakes = load_fakes ) h_truth_scaleup, _, _, _ = get_unfold_histogram_tuple( inputfile = file_for_scaleup, variable = variable, channel = channel, met_type = met_type, centre_of_mass = centre_of_mass, ttbar_xsection = ttbar_xsection, luminosity = luminosity, load_fakes = load_fakes ) h_truth_ptreweight, _, _, _ = get_unfold_histogram_tuple( inputfile = file_for_ptreweight, variable = variable, channel = channel, met_type = met_type, centre_of_mass = centre_of_mass, ttbar_xsection = ttbar_xsection, luminosity = luminosity, load_fakes = load_fakes ) MADGRAPH_results = hist_to_value_error_tuplelist( h_truth ) MADGRAPH_ptreweight_results = hist_to_value_error_tuplelist( h_truth_ptreweight ) POWHEG_PYTHIA_results = hist_to_value_error_tuplelist( h_truth_POWHEG_PYTHIA ) POWHEG_HERWIG_results = hist_to_value_error_tuplelist( h_truth_POWHEG_HERWIG ) MCATNLO_results = hist_to_value_error_tuplelist( h_truth_MCATNLO ) matchingdown_results = hist_to_value_error_tuplelist( h_truth_matchingdown ) matchingup_results = hist_to_value_error_tuplelist( h_truth_matchingup ) scaledown_results = hist_to_value_error_tuplelist( h_truth_scaledown ) scaleup_results = hist_to_value_error_tuplelist( h_truth_scaleup ) TTJet_fit_results_unfolded = unfold_results( TTJet_fit_results, category, channel, k_value, h_truth, h_measured, h_response, h_fakes, method ) normalisation_unfolded = { 'TTJet_measured' : TTJet_fit_results, 'TTJet_unfolded' : TTJet_fit_results_unfolded, 'MADGRAPH': MADGRAPH_results, 'MADGRAPH_ptreweight': MADGRAPH_ptreweight_results, # other generators 'POWHEG_PYTHIA': POWHEG_PYTHIA_results, 'POWHEG_HERWIG': POWHEG_HERWIG_results, 'MCATNLO': MCATNLO_results, # systematics 'matchingdown': matchingdown_results, 'matchingup': matchingup_results, 'scaledown': scaledown_results, 'scaleup': scaleup_results } return normalisation_unfolded
def background_subtraction(self, histograms): ttjet_hist = clean_control_region( histograms, subtract=['QCD', 'V+Jets', 'SingleTop']) self.normalisation['TTJet'] = hist_to_value_error_tuplelist(ttjet_hist)
def check_multiple_data_multiple_unfolding( input_file, method, channel, variable, responseMatrix, n_toy_data, output_folder, tau_value=-1 ): ''' Loops through a n_toy_data of pseudo data, unfolds the pseudo data and compares it to the MC truth ''' # same unfolding input, different data get_folder = input_file.Get pulls = [] add_pull = pulls.append histograms = [] add_histograms = histograms.append print('Reading toy MC') start1 = time() data_range = range(0, n_toy_data) for nth_toy_data in range(0, n_toy_data + 1): # read all of them (easier) if nth_toy_data in data_range: tpl = '{channel}/{variable}/toy_{nth}' folder_mc = tpl.format(channel=channel, variable=variable, nth=nth_toy_data+1) folder_mc = get_folder(folder_mc) add_histograms(get_measured_histogram(folder_mc)) else: add_histograms(0) print('Done reading toy MC in', time() - start1, 's') # Get truth and measured histograms h_truth = get_truth_histogram( get_folder('{channel}/{variable}/Original'.format(channel=channel, variable=variable) ) ) h_measured = get_measured_histogram( get_folder('{channel}/{variable}/Original'.format(channel=channel, variable=variable) ) ) # Set response matrix h_response = responseMatrix # Make sure the pseudo data to be unfolded has the same integral as the response matrix measured_from_response = asrootpy( h_response.ProjectionX('px',1) ) truth_from_response = asrootpy( h_response.ProjectionY() ) truthScale = truth_from_response.Integral() / h_truth.Integral() h_truth.Scale( truthScale ) h_measured.Scale( truthScale ) for nth_toy_data in data_range: if nth_toy_data % 100 == 0 : print( 'Doing data no', nth_toy_data) h_data = histograms[nth_toy_data] h_data.Scale( truthScale ) unfolding_obj = Unfolding( h_data, h_truth, h_data, h_response, method=method, k_value=-1, tau=tau_value) unfold, get_pull = unfolding_obj.unfold, unfolding_obj.pull reset = unfolding_obj.Reset unfold() pull = get_pull() diff = unfolding_obj.unfolded_data - h_truth diff_tuple = hist_to_value_error_tuplelist(diff) truth_tuple = hist_to_value_error_tuplelist(unfolding_obj.truth) bias = [] sumBias2 = 0 for d, t in zip(diff_tuple, truth_tuple): b = d[0] / t[0] bias.append(b) unfolded = unfolding_obj.unfolded_data unfolded_tuple = hist_to_value_error_tuplelist(unfolded) all_data = {'unfolded': unfolded_tuple, 'difference': diff_tuple, 'truth': truth_tuple, 'bias':bias, 'pull': pull, 'nth_toy_data': nth_toy_data } add_pull(all_data) reset() output_file_name = save_pulls(pulls, method, channel, tau_value, output_folder) return output_file_name
unfolding_file3 = root_open('/storage/TopQuarkGroup/mc/8TeV/NoSkimUnfolding/v10/TTJets_scaleup_TuneZ2star_8TeV-madgraph-tauola/unfolding_v10_Summer12_DR53X-PU_S10_START53_V7A-v1_NoSkim/TTJets-scaleup_nTuple_53X_mc_merged_001.root') unfolding_file4 = root_open('/storage/TopQuarkGroup/mc/8TeV/NoSkimUnfolding/v10/TTJets_scaledown_TuneZ2star_8TeV-madgraph-tauola/unfolding_v10_Summer12_DR53X-PU_S10_START53_V7A-v1_NoSkim/TTJets-scaledown_nTuple_53X_mc_merged_001.root') test_file = root_open('test_unfolded.root') test1 = test_file.Get(channel + '_MET__TTJet__TTJetsMatching__plus') test2 = test_file.Get(channel + '_MET__TTJet__TTJetsMatching__minus') test3 = test_file.Get(channel + '_MET__TTJet__TTJetsScale__plus') test4 = test_file.Get(channel + '_MET__TTJet__TTJetsScale__minus') test1.Sumw2() test2.Sumw2() test3.Sumw2() test4.Sumw2() folder = 'unfolding_MET_analyser_' + channel + '_channel_patMETsPFlow' ref1 = hist_to_value_error_tuplelist(unfolding_file1.Get(folder + '/truth_AsymBins')) ref2 = hist_to_value_error_tuplelist(unfolding_file2.Get(folder + '/truth_AsymBins')) ref3 = hist_to_value_error_tuplelist(unfolding_file3.Get(folder + '/truth_AsymBins')) ref4 = hist_to_value_error_tuplelist(unfolding_file4.Get(folder + '/truth_AsymBins')) ref1 = value_error_tuplelist_to_hist(ref1, bin_edges['MET']) ref2 = value_error_tuplelist_to_hist(ref2, bin_edges['MET']) ref3 = value_error_tuplelist_to_hist(ref3, bin_edges['MET']) ref4 = value_error_tuplelist_to_hist(ref4, bin_edges['MET']) normalise([test1, test2, test3, test4, ref1, ref2, ref3, ref4]) draw_pair(test1, ref1, 'matching_up') draw_pair(test2, ref2, 'matching_down') draw_pair(test3, ref3, 'scale_up') draw_pair(test4, ref4, 'scale_down')
def background_subtraction(self, histograms): ttjet_hist = clean_control_region(histograms, subtract=['QCD', 'V+Jets', 'SingleTop']) self.normalisation[ 'TTJet'] = hist_to_value_error_tuplelist(ttjet_hist)
def check_multiple_data_multiple_unfolding(input_file, method, channel, variable, n_toy_mc, n_toy_data, output_folder, offset_toy_mc, offset_toy_data, k_value, tau_value=-1, run_matrix=None): ''' Loops through a n_toy_mc x n_toy_data matrix of pseudo data versus simulation, unfolds the pseudo data and compares it to the MC truth ''' # same unfolding input, different data get_folder = input_file.Get pulls = [] add_pull = pulls.append histograms = [] add_histograms = histograms.append print('Reading toy MC') start1 = time() mc_range = range(offset_toy_mc + 1, offset_toy_mc + n_toy_mc + 1) data_range = range(offset_toy_data + 1, offset_toy_data + n_toy_data + 1) for nth_toy_mc in range(1, 10000 + 1): # read all of them (easier) if nth_toy_mc in mc_range or nth_toy_mc in data_range: tpl = '{channel}/{variable}/toy_{nth}' folder_mc = tpl.format(channel=channel, variable=variable, nth=nth_toy_mc) folder_mc = get_folder(folder_mc) add_histograms(get_histograms(folder_mc)) else: add_histograms((0, 0, 0)) print('Done reading toy MC in', time() - start1, 's') if not run_matrix: run_matrix = create_run_matrix(n_toy_mc, n_toy_data, offset_toy_mc, offset_toy_data) for nth_toy_mc, nth_toy_data in run_matrix: h_truth, h_measured, h_response = histograms[nth_toy_mc - 1] if tau_value >= 0: unfolding_obj = Unfolding(h_truth, h_measured, h_response, method=method, k_value=-1, tau=tau_value) else: unfolding_obj = Unfolding(h_truth, h_measured, h_response, method=method, k_value=k_value) unfold, get_pull = unfolding_obj.unfold, unfolding_obj.pull reset = unfolding_obj.Reset if nth_toy_data == nth_toy_mc: continue print('Doing MC no, ' + str(nth_toy_mc) + ', data no', nth_toy_data) h_data = histograms[nth_toy_data - 1][1] unfold(h_data) pull = get_pull() diff = unfolding_obj.unfolded_data - unfolding_obj.truth diff_tuple = hist_to_value_error_tuplelist(diff) unfolded = unfolding_obj.unfolded_data unfolded_tuple = hist_to_value_error_tuplelist(unfolded) all_data = { 'unfolded': unfolded_tuple, 'difference': diff_tuple, 'pull': pull, 'nth_toy_mc': nth_toy_mc, 'nth_toy_data': nth_toy_data } add_pull(all_data) reset() save_pulls(pulls, 'multiple_data_multiple_unfolding', method, channel, output_folder, n_toy_mc, n_toy_data, offset_toy_mc, offset_toy_data)
'/storage/TopQuarkGroup/mc/8TeV/NoSkimUnfolding/v10/TTJets_scaledown_TuneZ2star_8TeV-madgraph-tauola/unfolding_v10_Summer12_DR53X-PU_S10_START53_V7A-v1_NoSkim/TTJets-scaledown_nTuple_53X_mc_merged_001.root' ) test_file = root_open('test_unfolded.root') test1 = test_file.Get(channel + '_MET__TTJet__TTJetsMatching__plus') test2 = test_file.Get(channel + '_MET__TTJet__TTJetsMatching__minus') test3 = test_file.Get(channel + '_MET__TTJet__TTJetsScale__plus') test4 = test_file.Get(channel + '_MET__TTJet__TTJetsScale__minus') test1.Sumw2() test2.Sumw2() test3.Sumw2() test4.Sumw2() folder = 'unfolding_MET_analyser_' + channel + '_channel_patMETsPFlow' ref1 = hist_to_value_error_tuplelist( unfolding_file1.Get(folder + '/truth_AsymBins')) ref2 = hist_to_value_error_tuplelist( unfolding_file2.Get(folder + '/truth_AsymBins')) ref3 = hist_to_value_error_tuplelist( unfolding_file3.Get(folder + '/truth_AsymBins')) ref4 = hist_to_value_error_tuplelist( unfolding_file4.Get(folder + '/truth_AsymBins')) ref1 = value_error_tuplelist_to_hist(ref1, bin_edges['MET']) ref2 = value_error_tuplelist_to_hist(ref2, bin_edges['MET']) ref3 = value_error_tuplelist_to_hist(ref3, bin_edges['MET']) ref4 = value_error_tuplelist_to_hist(ref4, bin_edges['MET']) normalise([test1, test2, test3, test4, ref1, ref2, ref3, ref4]) draw_pair(test1, ref1, 'matching_up') draw_pair(test2, ref2, 'matching_down')
def get_unfolded_normalisation( TTJet_fit_results, category, channel, tau_value, visiblePS ): global variable, met_type, path_to_JSON, file_for_unfolding, file_for_powheg_pythia, file_for_amcatnlo_herwig, file_for_ptreweight, files_for_pdfs global centre_of_mass, luminosity, ttbar_xsection, load_fakes, method global file_for_powhegPythia8, file_for_madgraphMLM, file_for_amcatnlo # global file_for_matchingdown, file_for_matchingup global file_for_scaledown, file_for_scaleup global file_for_massdown, file_for_massup global ttbar_generator_systematics, ttbar_theory_systematics, pdf_uncertainties global use_ptreweight files_for_systematics = { # ttbar_theory_systematic_prefix + 'matchingdown' : file_for_matchingdown, # ttbar_theory_systematic_prefix + 'matchingup' : file_for_matchingup, ttbar_theory_systematic_prefix + 'scaledown' : file_for_scaledown, ttbar_theory_systematic_prefix + 'scaleup' : file_for_scaleup, ttbar_theory_systematic_prefix + 'massdown' : file_for_massdown, ttbar_theory_systematic_prefix + 'massup' : file_for_massup, # 'JES_down' : file_for_jesdown, # 'JES_up' : file_for_jesup, # 'JER_down' : file_for_jerdown, # 'JER_up' : file_for_jerup, # 'BJet_up' : file_for_bjetdown, # 'BJet_down' : file_for_bjetup, ttbar_theory_systematic_prefix + 'hadronisation' : file_for_amcatnlo_herwig, ttbar_theory_systematic_prefix + 'NLOgenerator' : file_for_amcatnlo, # 'ElectronEnUp' : file_for_ElectronEnUp, # 'ElectronEnDown' : file_for_ElectronEnDown, # 'MuonEnUp' : file_for_MuonEnUp, # 'MuonEnDown' : file_for_MuonEnDown, # 'TauEnUp' : file_for_TauEnUp, # 'TauEnDown' : file_for_TauEnDown, # 'UnclusteredEnUp' : file_for_UnclusteredEnUp, # 'UnclusteredEnDown' : file_for_UnclusteredEnDown, # 'Muon_up' : file_for_LeptonUp, # 'Muon_down' : file_for_LeptonDown, # 'Electron_up' : file_for_LeptonUp, # 'Electron_down' : file_for_LeptonDown, # 'PileUpSystematic' : file_for_PUSystematic, } h_truth, h_measured, h_response, h_fakes = None, None, None, None # Systematics where you change the response matrix if category in ttbar_generator_systematics or category in files_for_systematics : print 'Doing category',category,'by changing response matrix' h_truth, h_measured, h_response, h_fakes = get_unfold_histogram_tuple( inputfile = files_for_systematics[category], variable = variable, channel = channel, met_type = met_type, centre_of_mass = centre_of_mass, ttbar_xsection = ttbar_xsection, luminosity = luminosity, load_fakes = load_fakes, visiblePS = visiblePS, ) elif category in pdf_uncertainties: h_truth, h_measured, h_response, h_fakes = get_unfold_histogram_tuple( inputfile = files_for_pdfs[category], variable = variable, channel = channel, met_type = met_type, centre_of_mass = centre_of_mass, ttbar_xsection = ttbar_xsection, luminosity = luminosity, load_fakes = load_fakes, visiblePS = visiblePS, ) # Systematics where you change input MC else: h_truth, h_measured, h_response, h_fakes = get_unfold_histogram_tuple( inputfile = file_for_unfolding, variable = variable, channel = channel, met_type = met_type, centre_of_mass = centre_of_mass, ttbar_xsection = ttbar_xsection, luminosity = luminosity, load_fakes = load_fakes, visiblePS = visiblePS, ) # central_results = hist_to_value_error_tuplelist( h_truth ) TTJet_fit_results_unfolded, TTJet_fit_results_withoutFakes = unfold_results( TTJet_fit_results, category, channel, tau_value, h_truth, h_measured, h_response, h_fakes, method, visiblePS, ) normalisation_unfolded = { 'TTJet_measured' : TTJet_fit_results, 'TTJet_measured_withoutFakes' : TTJet_fit_results_withoutFakes, 'TTJet_unfolded' : TTJet_fit_results_unfolded } # # THESE ARE FOR GETTING THE HISTOGRAMS FOR COMPARING WITH UNFOLDED DATA # if category == 'central': # h_truth_matchingdown, _, _, _ = get_unfold_histogram_tuple( inputfile = file_for_matchingdown, # variable = variable, # channel = channel, # met_type = met_type, # centre_of_mass = centre_of_mass, # ttbar_xsection = ttbar_xsection, # luminosity = luminosity, # load_fakes = load_fakes # ) # h_truth_matchingup, _, _, _ = get_unfold_histogram_tuple( inputfile = file_for_matchingup, # variable = variable, # channel = channel, # met_type = met_type, # centre_of_mass = centre_of_mass, # ttbar_xsection = ttbar_xsection, # luminosity = luminosity, # load_fakes = load_fakes # ) h_truth_scaledown, _, _, _ = get_unfold_histogram_tuple( inputfile = file_for_scaledown, variable = variable, channel = channel, met_type = met_type, centre_of_mass = centre_of_mass, ttbar_xsection = ttbar_xsection, luminosity = luminosity, load_fakes = load_fakes, visiblePS = visiblePS, ) h_truth_scaleup, _, _, _ = get_unfold_histogram_tuple( inputfile = file_for_scaleup, variable = variable, channel = channel, met_type = met_type, centre_of_mass = centre_of_mass, ttbar_xsection = ttbar_xsection, luminosity = luminosity, load_fakes = load_fakes, visiblePS = visiblePS, ) h_truth_massdown, _, _, _ = get_unfold_histogram_tuple( inputfile = file_for_massdown, variable = variable, channel = channel, met_type = met_type, centre_of_mass = centre_of_mass, ttbar_xsection = ttbar_xsection, luminosity = luminosity, load_fakes = load_fakes, visiblePS = visiblePS, ) h_truth_massup, _, _, _ = get_unfold_histogram_tuple( inputfile = file_for_massup, variable = variable, channel = channel, met_type = met_type, centre_of_mass = centre_of_mass, ttbar_xsection = ttbar_xsection, luminosity = luminosity, load_fakes = load_fakes, visiblePS = visiblePS, ) h_truth_powhegPythia8, _, _, _ = get_unfold_histogram_tuple( inputfile = file_for_powhegPythia8, variable = variable, channel = channel, met_type = met_type, centre_of_mass = centre_of_mass, ttbar_xsection = ttbar_xsection, luminosity = luminosity, load_fakes = load_fakes, visiblePS = visiblePS, ) h_truth_amcatnlo, _, _, _ = get_unfold_histogram_tuple( inputfile = file_for_amcatnlo, variable = variable, channel = channel, met_type = met_type, centre_of_mass = centre_of_mass, ttbar_xsection = ttbar_xsection, luminosity = luminosity, load_fakes = load_fakes, visiblePS = visiblePS, ) h_truth_madgraphMLM, _, _, _ = get_unfold_histogram_tuple( inputfile = file_for_madgraphMLM, variable = variable, channel = channel, met_type = met_type, centre_of_mass = centre_of_mass, ttbar_xsection = ttbar_xsection, luminosity = luminosity, load_fakes = load_fakes, visiblePS = visiblePS, ) h_truth_amcatnlo_HERWIG, _, _, _ = get_unfold_histogram_tuple( inputfile = file_for_amcatnlo_herwig, variable = variable, channel = channel, met_type = met_type, centre_of_mass = centre_of_mass, ttbar_xsection = ttbar_xsection, luminosity = luminosity, load_fakes = load_fakes, visiblePS = visiblePS, ) # MADGRAPH_ptreweight_results = hist_to_value_error_tuplelist( h_truth_ptreweight ) # POWHEG_PYTHIA_results = hist_to_value_error_tuplelist( h_truth_POWHEG_PYTHIA ) # MCATNLO_results = None powhegPythia8_results = hist_to_value_error_tuplelist( h_truth_powhegPythia8 ) madgraphMLM_results = hist_to_value_error_tuplelist( h_truth_madgraphMLM ) AMCATNLO_results = hist_to_value_error_tuplelist( h_truth_amcatnlo ) amcatnlo_HERWIG_results = hist_to_value_error_tuplelist( h_truth_amcatnlo_HERWIG ) # matchingdown_results = hist_to_value_error_tuplelist( h_truth_matchingdown ) # matchingup_results = hist_to_value_error_tuplelist( h_truth_matchingup ) scaledown_results = hist_to_value_error_tuplelist( h_truth_scaledown ) scaleup_results = hist_to_value_error_tuplelist( h_truth_scaleup ) massdown_results = hist_to_value_error_tuplelist( h_truth_massdown ) massup_results = hist_to_value_error_tuplelist( h_truth_massup ) normalisation_unfolded['powhegPythia8'] = powhegPythia8_results normalisation_unfolded['amcatnlo'] = AMCATNLO_results normalisation_unfolded['madgraphMLM'] = madgraphMLM_results normalisation_unfolded['amcatnlo_HERWIG'] = amcatnlo_HERWIG_results normalisation_unfolded['scaledown'] = scaledown_results normalisation_unfolded['scaleup'] = scaleup_results normalisation_unfolded['massdown'] = massdown_results normalisation_unfolded['massup'] = massup_results return normalisation_unfolded
def check_multiple_data_multiple_unfolding( input_file, method, channel, variable, n_toy_mc, n_toy_data, output_folder, offset_toy_mc, offset_toy_data, k_value, tau_value=-1, run_matrix=None): ''' Loops through a n_toy_mc x n_toy_data matrix of pseudo data versus simulation, unfolds the pseudo data and compares it to the MC truth ''' # same unfolding input, different data get_folder = input_file.Get pulls = [] add_pull = pulls.append histograms = [] add_histograms = histograms.append print('Reading toy MC') start1 = time() mc_range = range(offset_toy_mc + 1, offset_toy_mc + n_toy_mc + 1) data_range = range(offset_toy_data + 1, offset_toy_data + n_toy_data + 1) for nth_toy_mc in range(1, 10000 + 1): # read all of them (easier) if nth_toy_mc in mc_range or nth_toy_mc in data_range: tpl = '{channel}/{variable}/toy_{nth}' folder_mc = tpl.format(channel=channel, variable=variable, nth=nth_toy_mc) folder_mc = get_folder(folder_mc) add_histograms(get_histograms(folder_mc)) else: add_histograms((0, 0, 0)) print('Done reading toy MC in', time() - start1, 's') if not run_matrix: run_matrix = create_run_matrix(n_toy_mc, n_toy_data, offset_toy_mc, offset_toy_data) for nth_toy_mc, nth_toy_data in run_matrix: h_truth, h_measured, h_response = histograms[nth_toy_mc - 1] if tau_value >= 0: unfolding_obj = Unfolding( h_truth, h_measured, h_response, method=method, k_value=-1, tau=tau_value) else: unfolding_obj = Unfolding( h_truth, h_measured, h_response, method=method, k_value=k_value) unfold, get_pull = unfolding_obj.unfold, unfolding_obj.pull reset = unfolding_obj.Reset if nth_toy_data == nth_toy_mc: continue print( 'Doing MC no, ' + str(nth_toy_mc) + ', data no', nth_toy_data) h_data = histograms[nth_toy_data - 1][1] unfold(h_data) pull = get_pull() diff = unfolding_obj.unfolded_data - unfolding_obj.truth diff_tuple = hist_to_value_error_tuplelist(diff) unfolded = unfolding_obj.unfolded_data unfolded_tuple = hist_to_value_error_tuplelist(unfolded) all_data = {'unfolded': unfolded_tuple, 'difference': diff_tuple, 'pull': pull, 'nth_toy_mc': nth_toy_mc, 'nth_toy_data': nth_toy_data } add_pull(all_data) reset() save_pulls(pulls, 'multiple_data_multiple_unfolding', method, channel, output_folder, n_toy_mc, n_toy_data, offset_toy_mc, offset_toy_data)