def setUp(self): # create histograms h_bkg1_1 = Hist(100, 40, 200, title='Background') h_signal_1 = h_bkg1_1.Clone(title='Signal') h_data_1 = h_bkg1_1.Clone(title='Data') h_bkg1_2 = h_bkg1_1.Clone(title='Background') h_signal_2 = h_bkg1_1.Clone(title='Signal') h_data_2 = h_bkg1_1.Clone(title='Data') # fill the histograms with our distributions map(h_bkg1_1.Fill, x1) map(h_signal_1.Fill, x2) map(h_data_1.Fill, x1_obs) map(h_data_1.Fill, x2_obs) map(h_bkg1_2.Fill, x3) map(h_signal_2.Fill, x4) map(h_data_2.Fill, x3_obs) map(h_data_2.Fill, x4_obs) h_data_1.Scale(data_scale) h_data_2.Scale(data_scale) histograms_1 = {'signal': h_signal_1, 'bkg1': h_bkg1_1} histograms_2 = {'signal': h_signal_2, 'bkg1': h_bkg1_2} fit_data_1 = FitData(h_data_1, histograms_1, fit_boundaries=(40, 200)) fit_data_2 = FitData(h_data_2, histograms_2, fit_boundaries=(40, 200)) single_fit_collection = FitDataCollection() single_fit_collection.add(fit_data_1) collection_1 = FitDataCollection() collection_1.add(fit_data_1, 'var1') collection_1.add(fit_data_2, 'var2') collection_2 = FitDataCollection() collection_2.add(fit_data_1, 'var1') collection_2.add(fit_data_2, 'var2') collection_2.set_normalisation_constraints({'bkg1': 0.5}) collection_3 = FitDataCollection() collection_3.add(fit_data_1, 'var1') collection_3.add(fit_data_2, 'var2') collection_3.set_normalisation_constraints({'bkg1': 0.001}) self.minuit_fitter = Minuit(single_fit_collection) self.minuit_fitter.fit() self.simultaneous_fit = Minuit(collection_1) self.simultaneous_fit.fit() self.simultaneous_fit_with_constraints = Minuit(collection_2) self.simultaneous_fit_with_constraints.fit() self.simultaneous_fit_with_bad_constraints = Minuit(collection_3) self.simultaneous_fit_with_bad_constraints.fit()
def setUp( self ): # create histograms h_bkg1_1 = Hist( 100, 40, 200, title = 'Background' ) h_signal_1 = h_bkg1_1.Clone( title = 'Signal' ) h_data_1 = h_bkg1_1.Clone( title = 'Data' ) h_bkg1_2 = h_bkg1_1.Clone( title = 'Background' ) h_signal_2 = h_bkg1_1.Clone( title = 'Signal' ) h_data_2 = h_bkg1_1.Clone( title = 'Data' ) # fill the histograms with our distributions map( h_bkg1_1.Fill, x1 ) map( h_signal_1.Fill, x2 ) map( h_data_1.Fill, x1_obs ) map( h_data_1.Fill, x2_obs ) map( h_bkg1_2.Fill, x3 ) map( h_signal_2.Fill, x4 ) map( h_data_2.Fill, x3_obs ) map( h_data_2.Fill, x4_obs ) h_data_1.Scale(data_scale) h_data_2.Scale(data_scale) self.histograms_1 = {'signal': h_signal_1, 'bkg1': h_bkg1_1} self.histograms_2 = {'signal': h_signal_2, 'bkg1': h_bkg1_2} self.histograms_3 = {'var1': h_signal_1, 'bkg1': h_bkg1_1} self.fit_data_1 = FitData( h_data_1, self.histograms_1, fit_boundaries = ( x_min, x_max )) self.fit_data_2 = FitData( h_data_2, self.histograms_2, fit_boundaries = ( x_min, x_max )) self.fit_data_3 = FitData( h_data_1, self.histograms_3, fit_boundaries = ( x_min, x_max )) self.collection_1 = FitDataCollection() self.collection_1.add( self.fit_data_1, 'signal region' ) self.collection_1.add( self.fit_data_2, 'control region' ) self.collection_1.set_normalisation_constraints({'bkg1': 0.5}) self.collection_2 = FitDataCollection() self.collection_2.add( self.fit_data_1 ) self.collection_2.add( self.fit_data_2 ) self.collection_2.set_normalisation_constraints({'bkg1': 0.5}) self.single_collection = FitDataCollection() self.single_collection.add( self.fit_data_1 ) self.single_collection.set_normalisation_constraints({'bkg1': 0.5}) self.non_simultaneous_fit_collection = FitDataCollection() self.non_simultaneous_fit_collection.add( self.fit_data_1 ) self.non_simultaneous_fit_collection.add( self.fit_data_3 ) self.h_data = h_data_1 self.h_bkg1 = h_bkg1_1 self.h_signal = h_signal_1
def main (): N_bkg1 = 9000 N_signal = 1000 N_bkg1_obs = 10000 N_signal_obs = 2000 N_data = N_bkg1_obs + N_signal_obs mu1, mu2, sigma1, sigma2 = 100, 140, 15, 5 x1 = mu1 + sigma1 * np.random.randn( N_bkg1 ) x2 = mu2 + sigma2 * np.random.randn( N_signal ) x1_obs = mu1 + sigma1 * np.random.randn( N_bkg1_obs ) x2_obs = mu2 + sigma2 * np.random.randn( N_signal_obs ) h1 = Hist( 100, 40, 200, title = 'Background' ) h2 = h1.Clone( title = 'Signal' ) h3 = h1.Clone( title = 'Data' ) h3.markersize = 1.2 # fill the histograms with our distributions map( h1.Fill, x1 ) map( h2.Fill, x2 ) map( h3.Fill, x1_obs ) map( h3.Fill, x2_obs ) histograms_1 = {'signal': h2, 'bkg1': h1, 'data': h3} histograms_2 = {'signal': h2, 'bkg1': h1, 'data': h3} # roofit_histograms contains RooDataHist # model = RooAddPdf model1, roofit_histograms_1,fit_variable_1 = get_roofit_model( histograms_1, fit_boundaries = ( 40, 200 ), name = 'm1' ) model2, roofit_histograms_2, fit_variable_2 = get_roofit_model( histograms_2, fit_boundaries = ( 40, 200 ), name = 'm2' ) sample = RooCategory( 'sample', 'sample' ) sample.defineType( 'm1', 1 ) sample.defineType( 'm2', 2 ) combined_data = deepcopy( roofit_histograms_1['data'] ) combined_data.add( roofit_histograms_2['data'] ) # RooDataHist(const char* name, const char* title, const RooArgList& vars, RooCategory& indexCat, map<std::string,TH1*> histMap, Double_t initWgt = 1.0) sim_pdf = RooSimultaneous( "simPdf", "simultaneous pdf", sample ) sim_pdf.addPdf( model1, 'm1' ) sim_pdf.addPdf( model2, 'm2' ) variables = RooArgList() variables.add(fit_variable_1) variables.add(fit_variable_2) # combined_data = RooDataHist('combined_data', 'combined_data', # variables, RooFit.Index(sample), # RooFit.Import('m1', roofit_histograms_1['data']), # RooFit.Import('m2', roofit_histograms_2['data'])) fitResult = sim_pdf.fitTo( combined_data, # RooFit.Minimizer( "Minuit2", "Migrad" ), # RooFit.NumCPU( 1 ), # RooFit.Extended(), RooFit.Save(), )
def test_plottable_clone(): a = Hist(10, 0, 1, linecolor='blue', drawstyle='same') b = a.Clone(fillstyle='solid') assert_equals(b.fillstyle, 'solid') assert_equals(b.linecolor, 'blue') assert_equals(b.drawstyle, 'same') c = a.Clone(color='red') assert_equals(c.linecolor, 'red') assert_equals(c.fillcolor, 'red') assert_equals(c.markercolor, 'red')
def weighted_mass_workspace(analysis, categories, masses, systematics=False, cuts=None): hist_template = Hist(20, 50, 250, type='D') channels = {} for category in analysis.iter_categories(categories): clf = analysis.get_clf(category, load=True, mass=125) clf_bins = clf.binning(analysis.year, overflow=1E5) scores = analysis.get_scores(clf, category, analysis.target_region, masses=[125], mode='combined', systematics=False, unblind=True) bkg_scores = scores.bkg_scores sig_scores = scores.all_sig_scores[125] min_score = scores.min_score max_score = scores.max_score bkg_score_hist = Hist(clf_bins, type='D') sig_score_hist = bkg_score_hist.Clone() hist_scores(bkg_score_hist, bkg_scores) _bkg = bkg_score_hist.Clone() hist_scores(sig_score_hist, sig_scores) _sig = sig_score_hist.Clone() sob_hist = (1 + _sig / _bkg) _log = math.log for bin in sob_hist.bins(overflow=True): bin.value = _log(bin.value) log.info(str(list(sob_hist.y()))) for mass in masses: channel = analysis.get_channel_array( {MMC_MASS: hist_template}, category=category, region=analysis.target_region, include_signal=True, weight_hist=sob_hist, clf=clf, cuts=cuts, mass=mass, mode='workspace', systematics=systematics)[MMC_MASS] if mass not in channels: channels[mass] = {} channels[mass][category.name] = channel return channels, []
def draw_ROC(bkg_scores, sig_scores): # draw ROC curves for all categories hist_template = Hist(100, -1, 1) plt.figure() for category, (bkg_scores, sig_scores) in category_scores.items(): bkg_hist = hist_template.Clone() sig_hist = hist_template.Clone() hist_scores(bkg_hist, bkg_scores) hist_scores(sig_hist, sig_scores) bkg_array = np.array(bkg_hist) sig_array = np.array(sig_hist) # reverse cumsum bkg_eff = bkg_array[::-1].cumsum()[::-1] sig_eff = sig_array[::-1].cumsum()[::-1] bkg_eff /= bkg_array.sum() sig_eff /= sig_array.sum() plt.plot(sig_eff, 1. - bkg_eff, linestyle='-', linewidth=2., label=category) plt.legend(loc='lower left') plt.ylabel('Background Rejection') plt.xlabel('Signal Efficiency') plt.ylim(0, 1) plt.xlim(0, 1) plt.grid() plt.savefig(os.path.join(PLOTS_DIR, 'ROC.png'), bbox_inches='tight')
def test_slice_assign(): hist = Hist(10, 0, 1) hist[:] = [i for i in xrange(len(hist))] assert all([a.value == b for a, b in zip(hist, xrange(len(hist)))]) clone = hist.Clone() # reverse bins hist[:] = clone[::-1] assert all([a.value == b.value for a, b in zip(hist, clone[::-1])])
def get_histograms_from_trees( trees = [], branch = 'var', weightBranch = 'EventWeight', selection = '1', files = {}, verbose = False, nBins = 40, xMin = 0, xMax = 100, ignoreUnderflow = True, ): histograms = {} nHistograms = 0 # Setup selection and weight string for ttree draw weightAndSelection = '( %s ) * ( %s )' % ( weightBranch, selection ) for sample, input_file in files.iteritems(): histograms[sample] = {} for tree in trees: tempTree = tree if 'data' in sample and ( 'Up' in tempTree or 'Down' in tempTree ) : tempTree = tempTree.replace('_'+tempTree.split('_')[-1],'') chain = None; if isinstance( input_file, list ): for f in input_file: chain.Add(f) else: chain = TreeChain(tempTree, [input_file]); weightAndSelection = '( %s ) * ( %s )' % ( weightBranch, selection ) root_histogram = Hist( nBins, xMin, xMax, type='D') chain.Draw(branch, weightAndSelection, hist = root_histogram) if not is_valid_histogram( root_histogram, tree, input_file): return # When a tree is filled with a dummy variable, it will end up in the underflow, so ignore it if ignoreUnderflow: root_histogram.SetBinContent(0, 0) root_histogram.SetBinError(0,0) gcd() nHistograms += 1 histograms[sample][tree] = root_histogram.Clone() return histograms
def test_hist(): from rootpy.plotting import root2matplotlib as rplt h = Hist(100, -5, 5) h.FillRandom('gaus') rplt.hist(h) # stack h1 = h.Clone() stack = HistStack([h, h1]) rplt.hist(stack) rplt.hist([h, h1])
def test_bar(): from rootpy.plotting import root2matplotlib as rplt h = Hist(100, -5, 5) h.FillRandom('gaus') rplt.bar(h) # stack h1 = h.Clone() stack = HistStack([h, h1]) rplt.bar(stack) rplt.bar([h, h1], stacked=True) rplt.bar([h, h1], stacked=False) rplt.bar([h, h1], stacked=False, reverse=True)
def setUp(self): # create histograms h_bkg1_1 = Hist(100, 40, 200, title='Background') h_signal_1 = h_bkg1_1.Clone(title='Signal') h_data_1 = h_bkg1_1.Clone(title='Data') # fill the histograms with our distributions map(h_bkg1_1.Fill, x1) map(h_signal_1.Fill, x2) map(h_data_1.Fill, x1_obs) map(h_data_1.Fill, x2_obs) histograms_1 = { 'signal': h_signal_1, 'bkg1': h_bkg1_1, # 'data': h_data_1 } fit_data_1 = FitData(h_data_1, histograms_1, fit_boundaries=(40, 200)) self.single_fit_collection = FitDataCollection() self.single_fit_collection.add(fit_data_1) # self.roofitFitter = RooFitFit(histograms_1, dataLabel='data', fit_boundries=(40, 200)) self.roofitFitter = RooFitFit(self.single_fit_collection)
def weighted_mass_cba_workspace(analysis, categories, masses, systematics=False, cuts=None): hist_template = Hist(20, 50, 250, type='D') channels = {} def scaled(hist, factor): new_hist = hist * factor new_hist.name = hist.name + '_scaled' return new_hist for category in analysis.iter_categories(categories): for mass in masses: channel = analysis.get_channel_array( {MMC_MASS: hist_template}, category=category, region=analysis.target_region, include_signal=True, cuts=cuts, mass=mass, mode='workspace', systematics=systematics)[MMC_MASS] # weight by ln(1 + s / b) total_s = hist_template.Clone() total_s.Reset() total_b = total_s.Clone() for sample in channel.samples: if is_signal(sample): total_s += sample.hist else: total_b += sample.hist sob = math.log(1 + total_s.integral() / total_b.integral()) channel.data.hist = scaled(channel.data.hist, sob) for sample in channel.samples: sample.hist = scaled(sample.hist, sob) for hsys in sample.histo_sys: hsys.high = scaled(hsys.high, sob) hsys.low = scaled(hsys.low, sob) if mass not in channels: channels[mass] = {} channels[mass][category.name] = channel return channels, []
def test_draw(): for sample in analysis.backgrounds: print sample.name hist = Hist(30, 0, 250) hist_array = hist.Clone() field_hist = {'mmc1_mass': hist_array} sample.draw_into(hist, 'mmc1_mass', Category_VBF, 'OS_TRK') sample.draw_array(field_hist, Category_VBF, 'OS_TRK') assert_almost_equal(hist.Integral(), hist_array.Integral(), places=3) assert_equal(sorted(hist.systematics.keys()), sorted(hist_array.systematics.keys())) for term, sys_hist in hist.systematics.items(): print term sys_hist_array = hist_array.systematics[term] assert_almost_equal(sys_hist.Integral(), sys_hist_array.Integral())
#global summary plots are best gathered from the samples normalizations #to take into account at least a bit of nuisance correlations norms = mlfit_file.norm_fit_s edges = set() binning = binning_file[args.varname] for i in binning.itervalues(): edges.add(i['up_edge']) edges.add(i['low_edge']) edges = sorted(list(edges)) template_hist = Hist(edges) samples = set(i.name.split('/')[-1] for i in norms) fit_histos = {} for name in samples: fit_histos[name] = template_hist.Clone() fit_histos[name].title = '%s ' % name data = template_hist.Clone() data.title = 'data_obs' for categories in groups.itervalues(): #check that all categories have identical bin index assert(len(set(binning[i]['idx'] for i in categories)) <= 1) bin_idx = binning[categories[0]]['idx']+1 for sample in samples: val = sum( norms['%s/%s' % (i, sample)].value for i in categories if '%s/%s' % (i, sample) in norms ) err = quad(*[
# set the style style = get_style('ATLAS') style.SetEndErrorSize(3) set_style(style) # set the random seed ROOT.gRandom.SetSeed(42) np.random.seed(42) # signal distribution signal = 126 + 10 * np.random.randn(100) signal_obs = 126 + 10 * np.random.randn(100) # create histograms h1 = Hist(30, 40, 200, title='Background', markersize=0, legendstyle='F') h2 = h1.Clone(title='Signal') h3 = h1.Clone(title='Data', drawstyle='E1 X0', legendstyle='LEP') h3.markersize = 1.2 # fill the histograms with our distributions h1.FillRandom('landau', 1000) map(h2.Fill, signal) h3.FillRandom('landau', 1000) map(h3.Fill, signal_obs) # set visual attributes h1.fillstyle = 'solid' h1.fillcolor = 'green' h1.linecolor = 'green' h1.linewidth = 0
def unfolding_toy_diagnostics(indir, variable): plotter = BasePlotter(defaults={ 'clone': False, 'name_canvas': True, 'show_title': True, 'save': { 'png': True, 'pdf': False } }, ) styles = { 'dots': { 'linestyle': 0, 'markerstyle': 21, 'markercolor': 1 }, 'compare': { 'linesstyle': [1, 0], 'markerstyle': [0, 21], 'markercolor': [2, 1], 'linecolor': [2, 1], 'drawstyle': ['hist', 'pe'], 'legendstyle': ['l', 'p'] } } xaxislabel = set_pretty_label(variable) true_distribution = None curdir = os.getcwd() os.chdir(indir) toydirs = get_immediate_subdirectories(".") methods = [] pulls_lists = {} pull_means_lists = {} pull_mean_errors_lists = {} pull_sums_lists = {} pull_sigmas_lists = {} pull_sigma_errors_lists = {} deltas_lists = {} delta_means_lists = {} delta_mean_errors_lists = {} delta_sigmas_lists = {} delta_sigma_errors_lists = {} ratio_sums_lists = {} nneg_bins_lists = {} unfoldeds_lists = {} unfolded_sigmas_lists = {} taus_lists = {} histos_created = False lists_created = False idir = 0 true_distro = None #loop over toys for directory in toydirs: if not directory.startswith('toy_'): continue os.chdir(directory) log.debug('Inspecting toy %s' % directory) idir = idir + 1 i = 0 if not os.path.isfile("result_unfolding.root"): raise ValueError('root file not found in %s' % os.getcwd()) with io.root_open("result_unfolding.root") as inputfile: log.debug('Iteration %s over the file' % i) i = i + 1 if not methods: keys = [i.name for i in inputfile.keys()] for key in keys: if hasattr(getattr(inputfile, key), "hdata_unfolded"): methods.append(key) unfolded_hists = [ inputfile.get('%s/hdata_unfolded' % i) for i in methods ] unfolded_wps_hists = [ inputfile.get('%s/hdata_unfolded_ps_corrected' % i) for i in methods ] for unf, unfps, method in zip(unfolded_hists, unfolded_wps_hists, methods): unf.name = method unfps.name = method if true_distro is None: true_distribution = inputfile.true_distribution ROOT.TH1.AddDirectory(False) true_distro = true_distribution.Clone() taus = prettyjson.loads(inputfile.best_taus.GetTitle()) if len(taus_lists) == 0: taus_lists = dict((i, []) for i in taus) for i, t in taus.iteritems(): taus_lists[i].append(t) for histo in unfolded_hists: #create pull/delta containers during first iteration name = histo.name nbins = histo.nbins() log.debug("name = %s, n bins = %s" % (name, nbins)) if not lists_created: for ibin in range(1, nbins + 1): outname = "pull_" + name + "_bin" + str(ibin) pulls_lists[outname] = [] outname = "delta_" + name + "_bin" + str(ibin) deltas_lists[outname] = [] outname = "unfolded_" + name + "_bin" + str(ibin) unfoldeds_lists[outname] = [] unfolded_sigmas_lists[outname] = [] outname = "pull_" + name pull_means_lists[outname] = {} pull_mean_errors_lists[outname] = {} pull_sigmas_lists[outname] = {} pull_sigma_errors_lists[outname] = {} outname = "delta_" + name delta_means_lists[outname] = {} delta_mean_errors_lists[outname] = {} delta_sigmas_lists[outname] = {} delta_sigma_errors_lists[outname] = {} for ibin in range(1, nbins + 1): outname = "pull_" + name + "_bin" + str(ibin) unfolded_bin_content = histo.GetBinContent(ibin) unfolded_bin_error = histo.GetBinError(ibin) true_bin_content = true_distro.GetBinContent(ibin) true_bin_error = true_distro.GetBinError(ibin) total_bin_error = math.sqrt(unfolded_bin_error**2) #??? if (total_bin_error != 0): pull = (unfolded_bin_content - true_bin_content) / total_bin_error else: pull = 9999 log.debug( 'unfolded bin content %s +/- %s, true bin content %s, pull %s' % (unfolded_bin_content, unfolded_bin_error, true_bin_content, pull)) pulls_lists[outname].append(pull) outname = "delta_" + name + "_bin" + str(ibin) delta = unfolded_bin_content - true_bin_content log.debug( 'unfolded bin content %s +/- %s, true bin content %s, delta %s' % (unfolded_bin_content, unfolded_bin_error, true_bin_content, delta)) deltas_lists[outname].append(delta) outname = "unfolded_" + name + "_bin" + str(ibin) unfoldeds_lists[outname].append(unfolded_bin_content) unfolded_sigmas_lists[outname].append(unfolded_bin_error) nneg_bins_hists = [ i for i in inputfile.keys() if i.GetName().startswith("nneg_bins") ] nneg_bins_hists = [asrootpy(i.ReadObj()) for i in nneg_bins_hists] for histo in nneg_bins_hists: #create pull/delta containers during first iteration name = histo.name nbins = histo.nbins() log.debug("name = %s, n bins = %s" % (name, nbins)) if not lists_created: outname = name nneg_bins_lists[outname] = [] outname = name nneg_bins_lists[outname].append(histo.GetBinContent(1)) pull_sums_hists = [ i for i in inputfile.keys() if i.GetName().startswith("sum_of_pulls") ] pull_sums_hists = [asrootpy(i.ReadObj()) for i in pull_sums_hists] for histo in pull_sums_hists: #create pull/delta containers during first iteration name = histo.name nbins = histo.nbins() log.debug("name = %s, n bins = %s" % (name, nbins)) if not lists_created: outname = name pull_sums_lists[outname] = [] outname = name pull_sums_lists[outname].append(histo.GetBinContent(1)) ratio_sums_hists = [ i for i in inputfile.keys() if i.GetName().startswith("sum_of_ratios") ] ratio_sums_hists = [ asrootpy(i.ReadObj()) for i in ratio_sums_hists ] for histo in ratio_sums_hists: #create ratio/delta containers during first iteration name = histo.name nbins = histo.nbins() log.debug("name = %s, n bins = %s" % (name, nbins)) if not lists_created: outname = name ratio_sums_lists[outname] = [] outname = name ratio_sums_lists[outname].append(histo.GetBinContent(1)) #after the first iteration on the file all the lists are created lists_created = True os.chdir("..") #create histograms #histo containers taus = {} for name, vals in taus_lists.iteritems(): ROOT.TH1.AddDirectory(False) #repeat, you never know val_min = min(vals) val_min = 0.8 * val_min if val_min > 0 else 1.2 * val_min val_max = max(vals) val_max = 0.8 * val_max if val_max < 0 else 1.2 * val_max if val_min == val_max: if tau_nbins % 2: #if odd val_min, val_max = val_min - 0.01, val_min + 0.01 else: brange = 0.02 bwidth = brange / tau_nbins val_min, val_max = val_min - 0.01 + bwidth / 2., val_min + 0.01 + bwidth / 2. title = '#tau choice - %s ;#tau;N_{toys}' % (name) histo = Hist(tau_nbins, val_min, val_max, name=name, title=title) for val in vals: histo.Fill(val) taus[name] = histo pulls = {} for name, vals in pulls_lists.iteritems(): ROOT.TH1.AddDirectory(False) #repeat, you never know val_min = min(vals) val_min = 0.8 * val_min if val_min > 0 else 1.2 * val_min val_max = max(vals) val_max = 0.8 * val_max if val_max < 0 else 1.2 * val_max abs_max = max(abs(val_min), abs(val_max)) if 'L_curve' in name: method = 'L_curve' binno = name.split('_')[-1] else: _, method, binno = tuple(name.split('_')) title = 'Pulls - %s - %s ;Pull;N_{toys}' % (binno, method) histo = Hist(pull_nbins, -abs_max, abs_max, name=name, title=title) for val in vals: histo.Fill(val) pulls[name] = histo deltas = {} for name, vals in deltas_lists.iteritems(): ROOT.TH1.AddDirectory(False) #repeat, you never know val_min = min(vals) val_min = 0.8 * val_min if val_min > 0 else 1.2 * val_min val_max = max(vals) val_max = 0.8 * val_max if val_max < 0 else 1.2 * val_max if 'L_curve' in name: method = 'L_curve' binno = name.split('_')[-1] else: _, method, binno = tuple(name.split('_')) title = 'Deltas - %s - %s ;Delta;N_{toys}' % (binno, method) histo = Hist(delta_nbins, val_min, val_max, name=name, title=title) for val in vals: histo.Fill(val) deltas[name] = histo unfoldeds = {} for name, vals in unfoldeds_lists.iteritems(): ROOT.TH1.AddDirectory(False) #repeat, you never know val_min = min(vals) val_min = 0.8 * val_min if val_min > 0 else 1.2 * val_min val_max = max(vals) val_max = 0.8 * val_max if val_max < 0 else 1.2 * val_max if 'L_curve' in name: method = 'L_curve' binno = name.split('_')[-1] else: _, method, binno = tuple(name.split('_')) title = 'Unfoldeds - %s - %s ;Unfolded;N_{toys}' % (binno, method) histo = Hist(unfolded_nbins, val_min, val_max, name=name, title=title) for val in vals: histo.Fill(val) unfoldeds[name] = histo nneg_bins = {} for name, vals, in nneg_bins_lists.iteritems(): ROOT.TH1.AddDirectory(False) #repeat, you never know val_min = min(vals) val_min = 0 if val_min > 0 else val_min - 1 val_max = max(vals) val_max = 0 if val_max < 0 else val_max + 1 if 'L_curve' in name: method = 'L_curve' else: set_trace() _, method, _ = tuple(name.split('_')) title = 'N of negative bins - %s ;N. neg bins;N_{toys}' % method histo = Hist(int(val_max - val_min + 1), val_min, val_max, name=name, title=title) for val in vals: histo.Fill(val) nneg_bins[name] = histo pull_sums = {} for name, vals in pull_sums_lists.iteritems(): ROOT.TH1.AddDirectory(False) #repeat, you never know val_min = min(vals) val_min = 0.8 * val_min if val_min > 0 else 1.2 * val_min val_max = max(vals) val_max = 0.8 * val_max if val_max < 0 else 1.2 * val_max if 'L_curve' in name: method = 'L_curve' else: set_trace() _, _, _, _, _, method = tuple(name.split('_')) title = 'Pull sums - %s ;#Sigma(pull)/N_{bins};N_{toys}' % method histo = Hist(unfolded_nbins, val_min, val_max, name=name, title=title) for val in vals: histo.Fill(val) pull_sums[name] = histo ratio_sums = {} for name, vals in ratio_sums_lists.iteritems(): ROOT.TH1.AddDirectory(False) #repeat, you never know val_min = min(vals) val_min = 0.8 * val_min if val_min > 0 else 1.2 * val_min val_max = max(vals) val_max = 0.8 * val_max if val_max < 0 else 1.2 * val_max if 'L_curve' in name: method = 'L_curve' binno = name.split('_')[-1] else: set_trace() _, _, _, _, _, method = tuple(name.split('_')) title = 'Ratio sums - %s;#Sigma(ratio)/N_{bins};N_{toys}' % method histo = Hist(unfolded_nbins, val_min, val_max, name=name, title=title) for val in vals: histo.Fill(val) ratio_sums[name] = histo unfolded_sigmas = {} for name, vals in unfolded_sigmas_lists.iteritems(): ROOT.TH1.AddDirectory(False) #repeat, you never know val_min = min(vals) val_min = 0.8 * val_min if val_min > 0 else 1.2 * val_min val_max = max(vals) val_max = 0.8 * val_max if val_max < 0 else 1.2 * val_max if 'L_curve' in name: method = 'L_curve' binno = name.split('_')[-1] else: _, method, binno = tuple(name.split('_')) title = 'Unfolded uncertainties - %s - %s ;Uncertainty;N_{toys}' % ( binno, method) histo = Hist(unfolded_nbins, val_min, val_max, name=name, title=title) for val in vals: histo.Fill(val) unfolded_sigmas[name] = histo for name, histo in pulls.iteritems(): log.debug("name is %s and object type is %s" % (name, type(histo))) histo.Fit("gaus", 'Q') if not histo.GetFunction("gaus"): log.warning("Function not found for histogram %s" % name) continue mean = histo.GetFunction("gaus").GetParameter(1) meanError = histo.GetFunction("gaus").GetParError(1) sigma = histo.GetFunction("gaus").GetParameter(2) sigmaError = histo.GetFunction("gaus").GetParError(2) general_name, idx = tuple(name.split('_bin')) idx = int(idx) pull_means_lists[general_name][idx] = mean pull_mean_errors_lists[general_name][idx] = meanError pull_sigmas_lists[general_name][idx] = sigma pull_sigma_errors_lists[general_name][idx] = sigmaError for name, histo in deltas.iteritems(): log.debug("name is %s and object type is %s" % (name, type(histo))) histo.Fit("gaus", 'Q') if not histo.GetFunction("gaus"): log.warning("Function not found for histogram %s" % name) continue mean = histo.GetFunction("gaus").GetParameter(1) meanError = histo.GetFunction("gaus").GetParError(1) sigma = histo.GetFunction("gaus").GetParameter(2) sigmaError = histo.GetFunction("gaus").GetParError(2) general_name, idx = tuple(name.split('_bin')) idx = int(idx) delta_means_lists[general_name][idx] = mean delta_mean_errors_lists[general_name][idx] = meanError delta_sigmas_lists[general_name][idx] = sigma delta_sigma_errors_lists[general_name][idx] = sigmaError outfile = rootpy.io.File("unfolding_diagnostics.root", "RECREATE") outfile.cd() pull_means = {} pull_sigmas = {} pull_means_summary = {} pull_sigmas_summary = {} delta_means = {} delta_sigmas = {} delta_means_summary = {} delta_sigmas_summary = {} for outname, pmeans in pull_means_lists.iteritems(): outname_mean = outname + "_mean" outtitle = "Pull means - " + outname + ";Pull mean; N_{toys}" pull_mean_min = min(pmeans.values()) pull_mean_max = max(pmeans.values()) pull_mean_newmin = pull_mean_min - (pull_mean_max - pull_mean_min) * 0.5 pull_mean_newmax = pull_mean_max + (pull_mean_max - pull_mean_min) * 0.5 pull_means[outname] = plotting.Hist(pull_mean_nbins, pull_mean_newmin, pull_mean_newmax, name=outname_mean, title=outtitle) outname_mean_summary = outname + "_mean_summary" outtitle_mean_summary = "Pull mean summary - " + outname histocloned = true_distro.Clone(outname_mean_summary) histocloned.Reset() histocloned.xaxis.title = xaxislabel histocloned.yaxis.title = 'Pull mean' histocloned.title = outtitle_mean_summary pull_means_summary[outname] = histocloned for idx, pmean in pmeans.iteritems(): pull_means[outname].Fill(pmean) histocloned[idx].value = pmean histocloned[idx].error = pull_mean_errors_lists[outname][idx] histocloned.yaxis.SetRangeUser(min(pmeans.values()), max(pmeans.values())) for outname, psigmas in pull_sigmas_lists.iteritems(): outname_sigma = outname + "_sigma" outtitle_sigma = "Pull #sigma's - " + outname + ";Pull #sigma; N_{toys}" pull_sigma_min = min(psigmas.values()) pull_sigma_max = max(psigmas.values()) pull_sigma_newmin = pull_sigma_min - (pull_sigma_max - pull_sigma_min) * 0.5 pull_sigma_newmax = pull_sigma_max + (pull_sigma_max - pull_sigma_min) * 0.5 pull_sigmas[outname] = plotting.Hist(pull_sigma_nbins, pull_sigma_newmin, pull_sigma_newmax, name=outname_sigma, title=outtitle_sigma) outname_sigma_summary = outname + "_sigma_summary" outtitle_sigma_summary = "Pull #sigma summary - " + outname histocloned = true_distro.Clone(outname_sigma_summary) histocloned.Reset() histocloned.xaxis.title = xaxislabel histocloned.yaxis.title = 'Pull #sigma' histocloned.title = outtitle_sigma_summary pull_sigmas_summary[outname] = histocloned for idx, psigma in psigmas.iteritems(): pull_sigmas[outname].Fill(psigma) histocloned[idx].value = psigma histocloned[idx].error = pull_sigma_errors_lists[outname][idx] histocloned.yaxis.SetRangeUser(min(psigmas.values()), max(psigmas.values())) for outname, dmeans in delta_means_lists.iteritems(): outname_mean = outname + "_mean" outtitle = "Delta means - " + outname + ";Delta mean; N_{toys}" delta_mean_min = min(dmeans.values()) delta_mean_max = max(dmeans.values()) delta_mean_newmin = delta_mean_min - (delta_mean_max - delta_mean_min) * 0.5 delta_mean_newmax = delta_mean_max + (delta_mean_max - delta_mean_min) * 0.5 delta_means[outname] = plotting.Hist(delta_mean_nbins, delta_mean_newmin, delta_mean_newmax, name=outname_mean, title=outtitle) outname_mean_summary = outname + "_mean_summary" outtitle_mean_summary = "Delta mean summary - " + outname histocloned = true_distro.Clone(outname_mean_summary) histocloned.Reset() histocloned.xaxis.title = xaxislabel histocloned.yaxis.title = 'Delta mean' histocloned.title = outtitle_mean_summary delta_means_summary[outname] = histocloned for idx, dmean in dmeans.iteritems(): delta_means[outname].Fill(dmean) histocloned[idx].value = dmean histocloned[idx].error = delta_mean_errors_lists[outname][idx] histocloned.yaxis.SetRangeUser(min(dmeans.values()), max(dmeans.values())) for outname, dsigmas in delta_sigmas_lists.iteritems(): outname_sigma = outname + "_sigma" outtitle_sigma = "Delta #sigma's - " + outname + ";Delta #sigma; N_{toys}" delta_sigma_min = min(dsigmas.values()) delta_sigma_max = max(dsigmas.values()) delta_sigma_newmin = delta_sigma_min - (delta_sigma_max - delta_sigma_min) * 0.5 delta_sigma_newmax = delta_sigma_max + (delta_sigma_max - delta_sigma_min) * 0.5 delta_sigmas[outname] = plotting.Hist(delta_sigma_nbins, delta_sigma_newmin, delta_sigma_newmax, name=outname_sigma, title=outtitle_sigma) outname_sigma_summary = outname + "_sigma_summary" outtitle_sigma_summary = "Delta #sigma summary - " + outname histocloned = true_distro.Clone(outname_sigma_summary) histocloned.Reset() histocloned.xaxis.title = xaxislabel histocloned.yaxis.title = 'Delta #sigma' histocloned.title = outtitle_sigma_summary delta_sigmas_summary[outname] = histocloned for idx, dsigma in dsigmas.iteritems(): delta_sigmas[outname].Fill(dsigma) histocloned[idx].value = dsigma histocloned[idx].error = delta_sigma_errors_lists[outname][idx] histocloned.yaxis.SetRangeUser(min(dsigmas.values()), max(dsigmas.values())) unfolded_summary = {} unfolded_average = {} unfolded_envelope = {} for name, histo in unfoldeds.iteritems(): log.debug("name is %s and object type is %s" % (name, type(histo))) histo.Fit("gaus", 'Q') if not histo.GetFunction("gaus"): log.warning("Function not found for histogram %s" % name) continue mean = histo.GetFunction("gaus").GetParameter(1) meanError = histo.GetFunction("gaus").GetParError(1) sigma = histo.GetFunction("gaus").GetParameter(2) sigmaError = histo.GetFunction("gaus").GetParError(2) general_name, idx = tuple(name.split('_bin')) idx = int(idx) if general_name not in unfolded_summary: histo = true_distro.Clone("%s_unfolded_summary" % general_name) outtitle_unfolded_summary = "Unfolded summary - " + general_name histo.Reset() histo.xaxis.title = xaxislabel histo.yaxis.title = 'N_{events}' histo.title = outtitle_unfolded_summary unfolded_summary[general_name] = histo unfolded_envelope[general_name] = histo.Clone( "%s_unfolded_envelope" % general_name) unfolded_average[general_name] = histo.Clone( "%s_unfolded_average" % general_name) unfolded_summary[general_name][idx].value = mean unfolded_summary[general_name][idx].error = meanError unfolded_envelope[general_name][idx].value = mean unfolded_envelope[general_name][idx].error = sigma unfolded_average[general_name][idx].value = mean unfolded_average[general_name][idx].error = \ unfolded_sigmas['%s_bin%i' % (general_name, idx)].GetMean() plotter.set_subdir('taus') for name, histo in taus.iteritems(): #canvas = plotter.create_and_write_canvas_single(0, 21, 1, False, False, histo, write=False) plotter.canvas.cd() histo = plotter.plot(histo, **styles['dots']) histo.SetStats(True) info = plotter.make_text_box( 'mode #tau = %.5f' % histo[histo.GetMaximumBin()].x.center, position=(plotter.pad.GetLeftMargin(), plotter.pad.GetTopMargin(), 0.3, 0.025)) info.Draw() plotter.save() histo.Write() plotter.canvas.Write() plotter.set_subdir('pulls') for name, histo in pulls.iteritems(): histo = plotter.plot(histo, **styles['dots']) histo.SetStats(True) plotter.save() histo.Write() plotter.canvas.Write() for name, histo in pull_means.iteritems(): histo = plotter.plot(histo, **styles['dots']) histo.Write() plotter.save() for name, histo in pull_sigmas.iteritems(): histo = plotter.plot(histo, **styles['dots']) histo.Write() plotter.save() plotter.set_subdir('pull_summaries') for name, histo in pull_means_summary.iteritems(): histo = plotter.plot(histo, **styles['dots']) #histo.SetStats(True) line = ROOT.TLine(histo.GetBinLowEdge(1), 0, histo.GetBinLowEdge(histo.GetNbinsX() + 1), 0) line.Draw("same") plotter.save() histo.Write() plotter.canvas.Write() for name, histo in pull_sigmas_summary.iteritems(): histo = plotter.plot(histo, **styles['dots']) #histo.SetStats(True) line = ROOT.TLine(histo.GetBinLowEdge(1), 1, histo.GetBinLowEdge(histo.GetNbinsX() + 1), 1) line.Draw("same") plotter.save() histo.Write() plotter.canvas.Write() plotter.set_subdir('deltas') for name, histo in deltas.iteritems(): histo = plotter.plot(histo, **styles['dots']) histo.SetStats(True) plotter.save() histo.Write() plotter.canvas.Write() for name, histo in delta_means.iteritems(): histo = plotter.plot(histo, **styles['dots']) histo.Write() plotter.save() for name, histo in delta_sigmas.iteritems(): histo = plotter.plot(histo, **styles['dots']) histo.Write() plotter.save() plotter.set_subdir('delta_summaries') for name, histo in delta_means_summary.iteritems(): histo = plotter.plot(histo, **styles['dots']) #histo.SetStats(True) plotter.save() histo.Write() plotter.canvas.Write() for name, histo in delta_sigmas_summary.iteritems(): histo = plotter.plot(histo, **styles['dots']) #histo.SetStats(True) plotter.save() histo.Write() plotter.canvas.Write() plotter.set_subdir('unfolding_unc') for name, histo in unfolded_sigmas.iteritems(): histo = plotter.plot(histo, **styles['dots']) histo.SetStats(True) plotter.save() histo.Write() plotter.canvas.Write() plotter.set_subdir('unfolded') for name, histo in unfoldeds.iteritems(): histo = plotter.plot(histo, **styles['dots']) histo.SetStats(True) plotter.save() histo.Write() plotter.canvas.Write() plotter.set_subdir('unfolded_summaries') for name, histo in unfolded_summary.iteritems(): histo = plotter.plot(histo, **styles['dots']) histo.SetStats(True) plotter.save() histo.Write() plotter.canvas.Write() for name, histo in unfolded_summary.iteritems(): leg = LegendDefinition("Unfolding comparison", 'NE', labels=['Truth', 'Unfolded']) plotter.overlay_and_compare([true_distro], histo, legend_def=leg, **styles['compare']) plotter.canvas.name = 'Pull_' + name plotter.save() plotter.canvas.Write() plotter.overlay_and_compare([true_distro], histo, legend_def=leg, method='ratio', **styles['compare']) plotter.canvas.name = 'Ratio_' + name plotter.save() plotter.canvas.Write() plotter.set_subdir('unfolded_average') for name, histo in unfolded_average.iteritems(): leg = LegendDefinition("Unfolding comparison", 'NE', labels=['Truth', 'Unfolded']) #set_trace() plotter.overlay_and_compare([true_distro], histo, legend_def=leg, **styles['compare']) plotter.canvas.name = 'Pull_' + name plotter.save() plotter.canvas.Write() plotter.overlay_and_compare([true_distro], histo, legend_def=leg, method='ratio', **styles['compare']) plotter.canvas.name = 'Ratio_' + name plotter.save() plotter.canvas.Write() plotter.set_subdir('unfolded_envelope') for name, histo in unfolded_envelope.iteritems(): leg = LegendDefinition("Unfolding comparison", 'NE', labels=['Truth', 'Unfolded']) plotter.overlay_and_compare([true_distro], histo, legend_def=leg, **styles['compare']) plotter.canvas.name = 'Pull_' + name plotter.save() plotter.canvas.Write() plotter.overlay_and_compare([true_distro], histo, legend_def=leg, method='ratio', **styles['compare']) plotter.canvas.name = 'Ratio_' + name plotter.save() plotter.canvas.Write() plotter.set_subdir('figures_of_merit') for name, histo in nneg_bins.iteritems(): histo = plotter.plot(histo, **styles['dots']) histo.SetStats(True) plotter.save() histo.Write() plotter.canvas.Write() for name, histo in pull_sums.iteritems(): histo = plotter.plot(histo, **styles['dots']) histo.SetStats(True) plotter.save() histo.Write() plotter.canvas.Write() for name, histo in ratio_sums.iteritems(): histo = plotter.plot(histo, **styles['dots']) histo.SetStats(True) plotter.save() histo.Write() plotter.canvas.Write() outfile.close() os.chdir(curdir)
lbias[xb, yb].error = bias.std_dev if not args.asimov: tag = '%.1f' % cval if args.conly else '%.1f_%.1f' % (cval, lval) hdir.WriteTObject(csf, 'csf_%s' % tag) hdir.WriteTObject(csf_prefit, 'csf_prefit_%s' % tag) hdir.WriteTObject(csf2d, 'csf2D_%s' % tag) if not args.conly: hdir.WriteTObject(lsf2d, 'lsf2D_%s' % tag) hdir.WriteTObject(lsf, 'lsf_%s' % tag) hdir.WriteTObject(lsf_prefit, 'lsf_prefit_%s' % tag) for i in failing: new_val = i.value / mtot i.value = new_val out.WriteTObject(cbias.Clone(), 'cbias') out.WriteTObject(failing.Clone(), 'failing') out.WriteTObject(ccover.Clone(), 'ccover') if args.systematic: out.WriteTObject(ccover_p.Clone(), 'ccover_p') if not args.conly: out.WriteTObject(lbias.Clone(), 'lbias') out.WriteTObject(lcover.Clone(), 'lcover') if args.systematic: out.WriteTObject(lcoverp.Clone(), 'lcoverp') else: infile = root_open('%s/toys_summary.root' % indir) cbias = infile.cbias failing = infile.failing ccover = infile.ccover if not args.conly:
set_style('ATLAS') nominal_scores = np.random.normal(-.3, .2, size=args.events) up_scores = np.random.normal(-.25, .2, size=args.events) dn_scores = np.random.normal(-.35, .2, size=args.events) def transform(x): return 2.0 / (1.0 + np.exp(-args.transform_scale * x)) - 1.0 nominal = Hist(args.bins, -1, 1, title='Nominal') nominal.fill_array(transform(nominal_scores)) up = nominal.Clone(title='Up', linecolor='red', linestyle='dashed', linewidth=2) up.Reset() up.fill_array(transform(up_scores)) dn = nominal.Clone(title='Down', linecolor='blue', linestyle='dashed', linewidth=2) dn.Reset() dn.fill_array(transform(dn_scores)) # Plot the nominal, up, and down scores canvas = Canvas() nominal.SetMaximum(max(dn) * 1.1)
import matplotlib import matplotlib.pyplot as plt import numpy as np import rootpy.plotting.root2matplotlib as rplt from rootpy.plotting import Hist x = np.linspace(0, 2 * np.pi, 100) plt.figure(figsize=(8, 8), dpi=100) #plt.fill(x,np.sin(x),color='blue',alpha=0.5) plt.fill(x, np.sin(x), color='None', edgecolor='blue', hatch='///') plt.fill(x, np.sin(2 * x), color='None', linewidth=0, hatch=r'\\', zorder=950) a = Hist(5, x[0], x[-1]) b = a.Clone() b.Fill(3, .5) rplt.fill_between( a, b, edgecolor='black', linewidth=0, facecolor=(0, 0, 0, 0), hatch=r'\\\\\\\\', zorder=900, ) plt.savefig('./test.eps') plt.savefig('./test.pdf')
def rebin_hist(hist, new_binning, axis='x'): """ Redo the binning of the hist and returns: rebinned_hist, hist_template new_binning = list of bin edges WARNING: doesn't assert that the edges of the new binning matches the old one. """ assert axis in ['x', 'y', 'z'] x_binning = [hist.GetBinLowEdge(i) for i in range(1, hist.GetNbinsX() + 2)] y_binning = [ hist.GetYaxis().GetBinLowEdge(i) for i in range(1, hist.GetNbinsY() + 2) ] z_binning = [ hist.GetZaxis().GetBinLowEdge(i) for i in range(1, hist.GetNbinsZ() + 2) ] if axis is 'x': x_binning = new_binning elif axis is 'y': y_binning = new_binning elif axis is 'z': z_binning = new_binning else: print "ERROR! Can only rebin x, y or z axis" if hist.DIM == 1: new_hist_template = Hist(x_binning, type='D') elif hist.DIM == 2: new_hist_template = Hist2D(x_binning, y_binning, type='D') elif hist.DIM == 3: new_hist_template = Hist3D(x_binning, y_binning, z_binning, type='D') # Save the stats of the histogram stat_array = array.array('d', [0.] * 10) hist.GetStats(stat_array) entries = hist.GetEntries() new_hist = new_hist_template.Clone() if hasattr(hist, 'systematics'): new_hist.systematics = {} for sys_term in hist.systematics: new_hist.systematics[sys_term] = new_hist_template.Clone() # Use TH1.FindBin to find out where the bins should be merged into for x in range(1, hist.GetNbinsX() + 1): new_x = new_hist.FindBin(hist.GetBinCenter(x)) for y in range(1, hist.GetNbinsY() + 1): new_y = new_hist.GetYaxis().FindBin( hist.GetYaxis().GetBinCenter(y)) for z in range(1, hist.GetNbinsZ() + 1): new_z = new_hist.GetZaxis().FindBin( hist.GetZaxis().GetBinCenter(z)) v = hist.GetBinContent(x, y, z) new_v = new_hist.GetBinContent(new_x, new_y, new_z) new_hist.SetBinContent(new_x, new_y, new_z, v + new_v) comb_w2N = add_stat_w2(hist, (x, y, z), new_hist, (new_x, new_y, new_z)) set_stat_w2(new_hist, comb_w2N, new_x, new_y, new_z) if not hasattr(hist, 'systematics'): continue # Rebin the systematics histograms, too for sys_term in hist.systematics: v = hist.systematics[sys_term].GetBinContent(x, y, z) new_v = new_hist.systematics[sys_term].GetBinContent( new_x, new_y, new_z) new_hist.systematics[sys_term].SetBinContent( new_x, new_y, new_z, v + new_v) # WARNING: stats completely ignored in systematics histogram for now. # Restores the stats of the NOMINAL histogram new_hist.SetEntries(entries) new_hist.PutStats(stat_array) return new_hist, new_hist_template
from rootpy.plotting import Hist from rootpy.interactive import wait from mva.stats.smooth import smooth, smooth_alt import ROOT ROOT.gROOT.SetBatch(False) import numpy as np nom = Hist(50, 0, 100, linewidth=2) nom.fill_array(np.random.uniform(0, 100, 1000)) sys = nom.Clone(linewidth=2, linestyle='dashed') nom.Smooth(10) for i, bin in enumerate(sys.bins()): bin.value += i / 10. smooth_sys = smooth(nom, sys, 10, linecolor='red', linewidth=2, linestyle='dashed') smooth_alt_sys = smooth_alt(nom, sys, linecolor='blue', linewidth=2, linestyle='dashed') nom.SetMaximum( max(nom.GetMaximum(), sys.GetMaximum(), smooth_sys.GetMaximum(), smooth_alt_sys.GetMaximum()) * 1.2)
mu1, mu2, sigma1, sigma2 = 100, 140, 15, 5 x1 = mu1 + sigma1 * np.random.randn( N_bkg1 ) x2 = mu2 + sigma2 * np.random.randn( N_signal ) x1_obs = mu1 + sigma1 * np.random.randn( N_bkg1_obs ) x2_obs = mu2 + sigma2 * np.random.randn( N_signal_obs ) x3 = mu2 + sigma1 * np.random.randn( N_bkg1 ) x4 = mu1 + sigma2 * np.random.randn( N_signal ) x3_obs = mu2 + sigma1 * np.random.randn( N_bkg1_obs ) x4_obs = mu1 + sigma2 * np.random.randn( N_signal_obs ) data_scale = 1.2 N_data = N_data * data_scale h_bkg1_1 = Hist( 100, 40, 200, title = 'Background' ) h_signal_1 = h_bkg1_1.Clone( title = 'Signal' ) h_data_1 = h_bkg1_1.Clone( title = 'Data' ) h_bkg1_2 = h_bkg1_1.Clone( title = 'Background' ) h_signal_2 = h_bkg1_1.Clone( title = 'Signal' ) h_data_2 = h_bkg1_1.Clone( title = 'Data' ) # fill the histograms with our distributions map( h_bkg1_1.Fill, x1 ) map( h_signal_1.Fill, x2 ) map( h_data_1.Fill, x1_obs ) map( h_data_1.Fill, x2_obs ) map( h_bkg1_2.Fill, x3 ) map( h_signal_2.Fill, x4 ) map( h_data_2.Fill, x3_obs ) map( h_data_2.Fill, x4_obs )
with root_open(fname) as asifile: toy = asifile.toys.toy_asimov for bin in toy: if args.allbins: key = bin.leaves['CMS_channel'].index, bin.leaves[ 'CMS_th1x'].value else: key = bin.leaves['CMS_channel'].index tot += bin.weight if key in asimov: asimov[key] += bin.weight else: asimov[key] = bin.weight asitot = total.Clone() asitot.Reset() asitot.Fill(tot, height) asitot.markerstyle = 20 asitot.markercolor = 'red' f1 = plotter.parse_formula('height*TMath::Poisson(x, mean)', 'height[1], mean[%f]' % (tot), [0, 100000]) f1.linecolor = 'red' f1.linewidth = 2 peak = f1(tot) f1 = plotter.parse_formula('height*TMath::Poisson(x, mean)', 'height[%f], mean[%f]' % (height / peak, tot), [0, 100000]) canlog = all(i > 0 for i in asimov.itervalues())
print "effic: %.6f" % efficiency(ztautau, level_selection, prong, category) print "high: %.6f low: %.6f" % efficiency_uncertainty( ztautau, level_selection, prong, category) print "=" * 20 """ import ROOT ROOT.gROOT.SetBatch(True) from higgstautau.tauid import uncertainty from rootpy.plotting import Hist, Canvas from matplotlib import pyplot as plt from rootpy.plotting import root2matplotlib as rplt nominal = Hist(50, 0, 1, title='nominal') high = nominal.Clone(title='high') low = nominal.Clone(title='low') high.linecolor = 'red' low.linecolor = 'blue' for weight, event in tauid_sample.iter(): high_score, low_score = uncertainty(event.tau1_BDTJetScore, pt, event.tau1_numTrack, event.number_of_good_vertices) nominal.Fill(event.tau1_BDTJetScore, weight) low.Fill(low_score, weight) high.Fill(high_score, weight) fig = plt.figure() rplt.hist(nominal, histtype='stepfilled')
# h_data.FillRandom( t1Scale * h_t1Shape + t2Scale * h_t2Shape + t3Scale * h_t3Shape, nData ) fillingHistogram = 0 if useT1: fillingHistogram += t1Scale * h_t1Shape if useT2: fillingHistogram += t2Scale * h_t2Shape if useT3: fillingHistogram += t3Scale * h_t3Shape if useT4: fillingHistogram += t4Scale * h_t4Shape if useDataFromFile: h_data = getDataFromFile() else: # h_data.FillRandom( ( h_t1 * t1Scale + h_t2 * t2Scale + h_t3 * t3Scale ), nData ) # print 'Integral :',h_data.Integral() # h_data = h_t1 * t1Scale + h_t2 * t2Scale + h_t3 * t3Scale # h_data.FillRandom( h_t3, nData ) dataFillingHistogram = 0 if useT1: dataFillingHistogram += h_t1.Clone() if useT2: dataFillingHistogram += h_t2.Clone() if useT3: dataFillingHistogram += h_t3.Clone() if useT4: dataFillingHistogram += h_t4.Clone() # h_data = dataFillingHistogram h_data = h_t1 * 1.3 # h_data.Scale(absolute_eta_initialValues['data'][whichBinFromFile][0] / h_data.Integral() ) # h_data.FillRandom( dataFillingHistogram, int(absolute_eta_initialValues['data'][whichBinFromFile][0]) ) # h_data.FillRandom( dataFillingHistogram, int(absolute_eta_initialValues['data'][whichBinFromFile][0]) ) pass # for bin in range (0,nBins+1): # # h_data.SetBinContent( bin, t1Scale * h_t1.GetBinContent( bin ) + t2Scale*h_t2.GetBinContent( bin ) + t3Scale*h_t3.GetBinContent( bin ) ) # h_data.SetBinError(bin, sqrt(h_data.GetBinContent(bin))) # h_t1.SetBinError( bin, sqrt(h_t1.GetBinContent(bin))) # h_t2.SetBinError( bin, sqrt(h_t2.GetBinContent(bin)))
def plot_clf(background_scores, category, signal_scores=None, signal_scale=1., data_scores=None, name=None, draw_histograms=True, draw_data=False, save_histograms=False, hist_template=None, bins=10, min_score=0, max_score=1, signal_colors=cm.spring, systematics=None, unblind=False, **kwargs): if hist_template is None: if hasattr(bins, '__iter__'): # variable width bins hist_template = Hist(bins) min_score = min(bins) max_score = max(bins) else: hist_template = Hist(bins, min_score, max_score) bkg_hists = [] for bkg, scores_dict in background_scores: hist = hist_template.Clone(title=bkg.label) scores, weight = scores_dict['NOMINAL'] fill_hist(hist, scores, weight) hist.decorate(**bkg.hist_decor) hist.systematics = {} for sys_term in scores_dict.keys(): if sys_term == 'NOMINAL': continue sys_hist = hist_template.Clone() scores, weight = scores_dict[sys_term] fill_hist(sys_hist, scores, weight) hist.systematics[sys_term] = sys_hist bkg_hists.append(hist) if signal_scores is not None: sig_hists = [] for sig, scores_dict in signal_scores: sig_hist = hist_template.Clone(title=sig.label) scores, weight = scores_dict['NOMINAL'] fill_hist(sig_hist, scores, weight) sig_hist.decorate(**sig.hist_decor) sig_hist.systematics = {} for sys_term in scores_dict.keys(): if sys_term == 'NOMINAL': continue sys_hist = hist_template.Clone() scores, weight = scores_dict[sys_term] fill_hist(sys_hist, scores, weight) sig_hist.systematics[sys_term] = sys_hist sig_hists.append(sig_hist) else: sig_hists = None if data_scores is not None and draw_data and unblind is not False: data, data_scores = data_scores if isinstance(unblind, float): if sig_hists is not None: # unblind up to `unblind` % signal efficiency sum_sig = sum(sig_hists) cut = efficiency_cut(sum_sig, 0.3) data_scores = data_scores[data_scores < cut] data_hist = hist_template.Clone(title=data.label) data_hist.decorate(**data.hist_decor) fill_hist(data_hist, data_scores) if unblind >= 1 or unblind is True: log.info("Data events: %d" % sum(data_hist)) log.info("Model events: %f" % sum(sum(bkg_hists))) for hist in bkg_hists: log.info("{0} {1}".format(hist.GetTitle(), sum(hist))) log.info("Data / Model: %f" % (sum(data_hist) / sum(sum(bkg_hists)))) else: data_hist = None if draw_histograms: output_name = 'event_bdt_score' if name is not None: output_name += '_' + name for logy in (False, True): draw(data=data_hist, model=bkg_hists, signal=sig_hists, signal_scale=signal_scale, category=category, name="BDT Score", output_name=output_name, show_ratio=data_hist is not None, model_colors=None, signal_colors=signal_colors, systematics=systematics, logy=logy, **kwargs) return bkg_hists, sig_hists, data_hist
def clf_channels(self, clf, category, region, cuts=None, bins=10, limits=None, mass=None, mode=None, systematics=True, unblind=False, hybrid_data=False, no_signal_fixes=False, uniform=False, mva=False): """ Return a HistFactory Channel for each mass hypothesis """ log.info("constructing channels") # determine min and max scores scores_obj = self.get_scores( clf, category, region, cuts=cuts, masses=[mass], mode=mode, systematics=systematics, unblind=unblind) data_scores = scores_obj.data_scores bkg_scores = scores_obj.bkg_scores all_sig_scores = scores_obj.all_sig_scores min_score = scores_obj.min_score max_score = scores_obj.max_score if isinstance(bins, int): if limits is not None: low, high = limits binning = Hist(bins, low, high, type='D') else: binning = Hist(bins, min_score, max_score, type='D') else: # iterable if bins[0] > min_score: log.warning("min score is less than first edge " "(will be underflow)") if bins[-1] <= max_score: log.warning("max score is greater than or equal to last edge " "(will be overflow)") binning = Hist(bins, type='D') bkg_samples = [] for s, scores in bkg_scores: hist_template = binning.Clone( title=s.label, **s.hist_decor) sample = s.get_histfactory_sample( hist_template, clf, category, region, cuts=cuts, scores=scores, systematics=systematics, uniform=uniform, mva=mva) bkg_samples.append(sample) data_sample = None if data_scores is not None: hist_template = binning.Clone( title=self.data.label, **self.data.hist_decor) data_sample = self.data.get_histfactory_sample( hist_template, clf, category, region, cuts=cuts, scores=data_scores, uniform=uniform) if unblind is False: # blind full histogram data_sample.hist[:] = (0, 0) elif (unblind is not True) and isinstance(unblind, int): # blind highest N bins data_sample.hist[-(unblind + 1):] = (0, 0) elif isinstance(unblind, float): # blind above a signal efficiency max_unblind_score = efficiency_cut( sum([histogram_scores(hist_template, scores) for s, scores in all_sig_scores[mass]]), unblind) blind_bin = hist_template.FindBin(max_unblind_score) data_sample.hist[blind_bin:] = (0, 0) # create signal HistFactory samples sig_samples = [] for s, scores in all_sig_scores[mass]: hist_template = binning.Clone( title=s.label, **s.hist_decor) sample = s.get_histfactory_sample( hist_template, clf, category, region, cuts=cuts, scores=scores, no_signal_fixes=no_signal_fixes, systematics=systematics, uniform=uniform, mva=mva) sig_samples.append(sample) # replace data in blind bins with signal + background if hybrid_data and (unblind is not True): sum_sig_bkg = sum([s.hist for s in (bkg_samples + sig_samples)]) if unblind is False: # replace full hist data_sample.hist[:] = sum_sig_bkg[:] elif isinstance(unblind, int): # replace highest N bins bin = -(unblind + 1) data_sample.hist[bin:] = sum_sig_bkg[bin:] elif isinstance(unblind, float): data_sample.hist[blind_bin:] = sum_sig_bkg[blind_bin:] # create channel for this mass point channel = histfactory.make_channel( 'hh_{0}_{1}_{2}'.format(self.year % 1000, category.name, mass), bkg_samples + sig_samples, data=data_sample) return scores_obj, channel
def optimized_channels(clf, category, region, backgrounds, data=None, cuts=None, mass_points=None, mu=1., systematics=True, lumi_rel_error=0., algo='EvenBinningByLimit'): """ Return optimally binned HistFactory Channels for each mass hypothesis Determine the number of bins that yields the best limit at the 125 GeV mass hypothesis. Then construct and return the channels for all requested mass hypotheses. algos: EvenBinningByLimit, UnevenBinningBySignificance """ log.info("constructing optimized channels") scores_obj = get_scores(clf, category, region, backgrounds, data=data, cuts=cuts, mass_points=mass_points, mu=mu, systematics=systematics) data_scores = scores_obj.data_scores bkg_scores = scores_obj.bkg_scores all_sig_scores = scores_obj.all_sig_scores min_score = scores_obj.min_score max_score = scores_obj.max_score sig_scores = all_sig_scores[125] best_hist_template = None if algo == 'EvenBinningByLimit': limit_hists = [] best_limit = float('inf') best_nbins = 0 nbins_range = xrange(2, 50) for nbins in nbins_range: hist_template = Hist(nbins, min_score, max_score, type='D') # create HistFactory samples samples = [] for s, scores in bkg_scores + sig_scores: sample = s.get_histfactory_sample(hist_template, clf, category, region, cuts=cuts, scores=scores) samples.append(sample) data_sample = None if data is not None: data_sample = data.get_histfactory_sample(hist_template, clf, category, region, cuts=cuts, scores=data_scores) # create channel for this mass point channel = histfactory.make_channel("%s_%d" % (category.name, 125), samples, data=data_sample) # get limit limit_hist = get_limit(channel, lumi_rel_error=lumi_rel_error) limit_hist.SetName("%s_%d_%d" % (category, 125, nbins)) # is this better than the best limit so far? hist_dict = hist_to_dict(limit_hist) limit_hists.append(hist_dict) if hist_dict['Expected'] < best_limit: best_limit = hist_dict['Expected'] best_nbins = nbins best_hist_template = hist_template # plot limit vs nbins fig = plt.figure() ax = fig.add_subplot(111) central_values = np.array([h['Expected'] for h in limit_hists]) high_values_1sig = np.array([h['+1sigma'] for h in limit_hists]) low_values_1sig = np.array([h['-1sigma'] for h in limit_hists]) high_values_2sig = np.array([h['+2sigma'] for h in limit_hists]) low_values_2sig = np.array([h['-2sigma'] for h in limit_hists]) plt.plot(nbins_range, central_values, 'k-') plt.fill_between(nbins_range, low_values_2sig, high_values_2sig, linewidth=0, facecolor='yellow') plt.fill_between(nbins_range, low_values_1sig, high_values_1sig, linewidth=0, facecolor='green') plt.xlim(nbins_range[0], nbins_range[-1]) plt.xlabel("Number of Bins") plt.ylabel("Limit") plt.grid(True) plt.text(.5, .8, "Best limit of %.2f at %d bins" % (best_limit, best_nbins), horizontalalignment='center', verticalalignment='center', transform=ax.transAxes, fontsize=20) plt.savefig('category_%s_limit_vs_nbins.png' % category.name) elif algo == 'UnevenBinningBySignificance': #hist_template = Hist(200, min_score, max_score) hist_template = Hist(200, -1.0, 1.0, type='D') sig_hist = hist_template.Clone(title='Signal') sig_hist.systematics = {} for sig, scores_dict in sig_scores: scores, weight = scores_dict['NOMINAL'] sig_hist.fill_array(scores, weight) for sys_term in scores_dict.keys(): if sys_term == 'NOMINAL': continue if not sys_term in sig_hist.systematics: sys_hist = hist_template.Clone() sig_hist.systematics[sys_term] = sys_hist else: sys_hist = sig_hist.systematics[sys_term] scores, weight = scores_dict[sys_term] sys_hist.fill_array(scores, weight) bkg_hist = hist_template.Clone(title='Background') bkg_hist.systematics = {} for bkg, scores_dict in bkg_scores: scores, weight = scores_dict['NOMINAL'] bkg_hist.fill_array(scores, weight) for sys_term in scores_dict.keys(): if sys_term == 'NOMINAL': continue if not sys_term in bkg_hist.systematics: sys_hist = hist_template.Clone() bkg_hist.systematics[sys_term] = sys_hist else: sys_hist = bkg_hist.systematics[sys_term] scores, weight = scores_dict[sys_term] sys_hist.fill_array(scores, weight) print "SIG entries:", sig_hist.GetEntries() print "BKG entries:", bkg_hist.GetEntries() sig_hist, bkg_hist, best_hist_template = optimize_binning( sig_hist, bkg_hist, #starting_point='fine' starting_point='merged') if best_hist_template is None: best_hist_template = hist_template #raw_input("Hit enter to continue...") else: print "ERROR: binning optimisation algo %s not in list!" % algo exit(1) hist_template = best_hist_template channels = dict() # create HistFactory samples bkg_samples = [] for s, scores in bkg_scores: sample = s.get_histfactory_sample(hist_template, clf, category, region, cuts=cuts, scores=scores) bkg_samples.append(sample) data_sample = None if data_scores is not None: data_sample = data.get_histfactory_sample(hist_template, clf, category, region, cuts=cuts, scores=data_scores) # now use the optimal binning and construct channels for all requested mass # hypotheses for mass in Higgs.MASSES: if mass_points is not None and mass not in mass_points: continue log.info('=' * 20) log.info("%d GeV mass hypothesis" % mass) # create HistFactory samples sig_samples = [] for s, scores in all_sig_scores[mass]: sample = s.get_histfactory_sample(hist_template, clf, category, region, cuts=cuts, scores=scores) sig_samples.append(sample) # create channel for this mass point channel = histfactory.make_channel("%s_%d" % (category.name, mass), bkg_samples + sig_samples, data=data_sample) channels[mass] = channel return channels
from config import CMS # Setting this to True (default in rootpy) # changes how the histograms look in ROOT... ROOT.TH1.SetDefaultSumw2(False) ROOT.gROOT.SetBatch(True) # create normal distributions mu1, mu2, sigma1, sigma2 = 100, 140, 15, 5 x1 = mu1 + sigma1 * np.random.randn(10000) x2 = mu2 + sigma2 * np.random.randn(500) x1_obs = mu1 + sigma1 * np.random.randn(10000) x2_obs = mu2 + sigma2 * np.random.randn(1000) # create histograms h1 = Hist(100, 40, 200, title='Background') h2 = h1.Clone(title='Signal') h3 = h1.Clone(title='Data') h3.markersize = 1.2 # fill the histograms with our distributions map(h1.Fill, x1) map(h2.Fill, x2) map(h3.Fill, x1_obs) map(h3.Fill, x2_obs) # set visual attributes h1.fillstyle = 'solid' h1.fillcolor = 'green' h1.linecolor = 'green' h1.linewidth = 0
mu_mc_chain = TreeChain( "TTbar_plus_X_analysis/MuPlusJets/Ref selection/Pileup/Pileup", mc_files) mu_mc_chain.Draw('NVertices', 'EventWeight * LeptonEfficiencyCorrection', hist=pu_mu_mc_reco) el_chain = TreeChain( "TTbar_plus_X_analysis/EPlusJets/Ref selection/Pileup/Pileup", data_files_el) el_chain.Draw('NVertices', hist=pu_el_reco) mu_chain = TreeChain( "TTbar_plus_X_analysis/MuPlusJets/Ref selection/Pileup/Pileup", data_files_mu) mu_chain.Draw('NVertices', hist=pu_mu_reco) pu_mc_reco = pu_el_mc_reco.Clone('pu_mc_reco') pu_mc_reco += pu_mu_mc_reco pu_data_reco = pu_el_reco.Clone('pu_data_reco') pu_data_reco += pu_mu_reco hists = [pu_mc_reco, pu_data_reco] norm_hists = {} for hist in hists: name = hist.GetName() + '_norm' h = hist.Clone(name) h.Scale(1 / h.integral()) norm_hists[name] = h # rename so it is compatible to hardcoded name pileup = pu_data_reco.Clone('pileup')