def cut_flow(self): BasePlotter.set_canvas_style(self.canvas) BasePlotter.set_canvas_style(self.pad) lab_f1, _ = self.dual_pad_format() self.label_factor = lab_f1 views_to_flow = filter(lambda x: 'ttJets' not in x and 'QCD' not in x, self.mc_samples) views_to_flow.append(self.ttbar_to_use) qcd_samples = [i for i in self.views if 'QCD' in i] samples = [] for vtf in views_to_flow: histo = self.get_view(vtf).Get('cut_flow') print vtf, len(histo) self.keep.append(histo) samples.append(histo) #QCD may not have all the bins filled, needs special care qcd_histo = histo.Clone() qcd_histo.Reset() for sample in qcd_samples: qcd_flow = self.get_view(sample).Get('cut_flow') qcd_histo = qcd_histo.decorate(**qcd_flow.decorators) qcd_histo.title = qcd_flow.title for sbin, qbin in zip(qcd_histo, qcd_flow): sbin.value += qbin.value sbin.error = quad.quad(sbin.error, qbin.error) samples.append(qcd_histo) self.keep.append(qcd_histo) samples.sort(key=lambda x: x[-2].value) stack = plotting.HistStack() self.keep.append(stack) for i in samples: stack.Add(i) self.style_histo(stack) self.style_histo(histo, **histo.decorators) histo.Draw() #set the proper axis labels histo.yaxis.title = 'Events' data = self.get_view('data').Get('cut_flow') smin = min(stack.min(), data.min(), 1.2) smax = max(stack.max(), data.max()) histo.yaxis.range_user = smin * 0.8, smax * 1.2 stack.Draw('same') data.Draw('same') self.keep.append(data) self.add_legend([stack, data], False, entries=len(views_to_flow) + 1) self.pad.SetLogy() self.add_ratio_plot(data, stack, ratio_range=0.4) self.lower_pad.SetLogy(False)
def create_stack(self, *histos, **styles_kwargs): '''makes a HistStack out of provided histograms, styles them according to provided styles and default ones''' sort = True histos = list(histos) if 'sort' in styles_kwargs: sort = styles_kwargs['sort'] del styles_kwargs['sort'] if sort: histos.sort(key=lambda x: x.Integral()) stack = plotting.HistStack() styles = BasePlotter._kwargs_to_styles_(styles_kwargs, len(histos)) for histo, style in izip_longest(histos, styles): style = style if style else {} self.style_histo(histo, **style) stack.Add(histo) return stack
*[ views.SubdirectoryView( tfile, category ) for category in categories] ) if options.differential == 1: input_view = DifferentialView( input_view ) histograms = [ apply_style(input_view.Get(i), i) for i in keys ] histograms = sorted(histograms, key=lambda x: x.Integral()) observed = apply_style(input_view.Get(data), data) logging.debug("debugging histos:") for histo in histograms: logging.debug(" %s: style: %s, integral: %.2f" % ( histo.GetTitle(), histo.drawstyle, histo.Integral() ) ) stack = plotting.HistStack() for obj in histograms: stack.Add(obj) maximum = max(list(observed)+[stack.GetMaximum()]) canvas = plotting.Canvas(name='adsf', title='asdf') canvas.cd() stack.SetMaximum(maximum*1.8) stack.Draw() stack.GetXaxis().SetTitle(options.xtitle) stack.GetYaxis().SetTitle(options.ytitle) observed.Draw('same') #tries to figure which side the legend goes obslist = list(observed)
ch_plot = ch_h.Clone() # set bincontents of plot histogram to means of all samples for iBin in range(unc_plot.GetNbinsX() + 1): bin_contents = [h.GetBinContent(iBin) for h in unchanged_hists] unc_plot.SetBinContent(iBin, np.mean(bin_contents)) unc_plot.SetBinError(iBin, np.std(bin_contents)) for iBin in range(ch_plot.GetNbinsX() + 1): bin_contents = [h.GetBinContent(iBin) for h in changed_hists] ch_plot.SetBinContent(iBin, np.mean(bin_contents)) ch_plot.SetBinError(iBin, np.std(bin_contents)) # generate upper plot canvas = pltstyle.init_canvas(ratiopad=True) stack = rp.HistStack([unc_plot, ch_plot], stacked=True, drawstyle="HIST E1 X0") max_val = stack.GetMaximum() stack.SetMaximum(max_val * 1.3) rp.utils.draw([stack], pad=canvas.cd(1), xtitle="discriminator output for {} node".format( event_classes[i_node]), ytitle="Events") legend = pltstyle.init_legend([unc_plot, ch_plot]) pltstyle.add_category_label(canvas.cd(1), categories[key]) # generate lower plot line1 = rp.Graph(50) for i, x in enumerate(np.linspace(0., 1., 50)):
def make_charge_flip_control_plot(self, variable, xaxis='', rebin=1, legend_on_the_left=False, data_type='data', x_range=None, apply_scale='', show_ratio=False, differential=False): ss_p1p2_view, ss_fakes_est, os_flip_est_nofake = self.get_flip_data( rebin, xaxis, data_type) if differential: ss_p1p2_view = DifferentialView(ss_p1p2_view) ss_fakes_est = DifferentialView(ss_fakes_est) os_flip_est_nofake = DifferentialView(os_flip_est_nofake) fakes_hist = ss_fakes_est.Get(variable) flip_hist = os_flip_est_nofake.Get(variable) if apply_scale: flip_hist = MedianView.apply_view( flip_hist, os_flip_est_nofake.Get(variable + apply_scale)) obs_hist = ss_p1p2_view.Get(variable) estimate_hist = plotting.HistStack() estimate_hist.Add(fakes_hist) estimate_hist.Add(flip_hist) estimate_error = HistStackToTGRaphErrors(estimate_hist) estimate_error.SetFillStyle(3013) estimate_error.SetFillColor(ROOT.EColor.kBlack) estimate_error.SetTitle('Error on estimate') #from pdb import set_trace; set_trace() sum_stack = sum(estimate_hist.hists) print "variable %s: data integral: %.1f (%.1f/%.1f), estimate: %.1f (%.1f/%.1f) (under/overflow)" % (variable, \ obs_hist.Integral(), obs_hist.GetBinContent(0), obs_hist.GetBinContent(obs_hist.GetNbinsX()+1), \ sum_stack.Integral(), sum_stack.GetBinContent(0), sum_stack.GetBinContent(sum_stack.GetNbinsX()+1) ) hmax = max([estimate_hist.GetMaximum(), max(list(obs_hist))]) obs_hist.GetYaxis().SetRangeUser(0, hmax * 1.3) if x_range: obs_hist.GetXaxis().SetRangeUser(x_range[0], x_range[1]) obs_hist.Draw() estimate_hist.Draw('same') self.canvas.Update() estimate_error.Draw('2 same') obs_hist.Draw('same') self.keep.extend([estimate_hist, estimate_error, obs_hist]) legend = self.add_legend([obs_hist], leftside=legend_on_the_left, entries=4) legend.AddEntry(estimate_hist, 'f') #legend.AddEntry(estimate_error,'f') legend.Draw() self.add_cms_blurb(self.sqrts) if show_ratio: self.add_ratio_plot(obs_hist, estimate_hist, x_range, ratio_range=0.2)
def plot_class_differences(self, log=False): pltstyle.init_plot_style() nbins = 20 bin_range = [0., 1.] # loop over discriminator nodes for i, node_cls in enumerate(self.event_classes): node_index = self.data.class_translation[node_cls] # get outputs of node node_values = self.mainnet_predicted_vector[:, i] filtered_node_values = np.array([node_values[k] for k in range(len(node_values)) \ if self.predicted_classes[k] == node_index]) filtered_weights = [ self.data.get_lumi_weights()[k] for k in range(len(node_values)) \ if self.predicted_classes[k] == node_index] histograms = [] first = True max_val = 0 # loop over other nodes and get those predictions for j, other_cls in enumerate(self.event_classes): if i == j: continue other_index = self.data.class_translation[other_cls] other_values = self.mainnet_predicted_vector[:, j] filtered_other_values = np.array([other_values[k] for k in range(len(other_values)) \ if self.predicted_classes[k] == node_index]) # get difference of predicted node value and other value diff_values = (filtered_node_values - filtered_other_values) / filtered_node_values hist = rp.Hist(nbins, *bin_range, title=str(other_cls) + " node", drawstyle="HIST E1 X0") pltstyle.set_sig_hist_style(hist, other_cls) hist.fill_array(diff_values, filtered_weights) if hist.GetMaximum() > max_val: max_val = hist.GetMaximum() if first: stack = rp.HistStack([hist], stacked=True) first_hist = hist first = False else: histograms.append(hist) # create canvas canvas = pltstyle.init_canvas() # drawing hists stack.SetMaximum(max_val * 1.3) rp.utils.draw([stack] + histograms, pad=canvas, xtitle="relative difference (" + str(node_cls) + " - X_node)/" + str(node_cls), ytitle="Events") if log: canvas.cd().SetLogy() # legend legend = pltstyle.init_legend([first_hist] + histograms) pltstyle.add_lumi(canvas) pltstyle.add_category_label(canvas, self.event_category) # save out_path = self.save_path + "/node_differences_{}.pdf".format( node_cls) pltstyle.save_canvas(canvas, out_path)
def plot_classification(self, log=False): ''' plot all events classified as one category ''' pltstyle.init_plot_style() nbins = 20 bin_range = [0., 1.] ttH_index = self.data.class_translation["ttHbb"] # loop over discriminator nodes for i, node_cls in enumerate(self.event_classes): node_index = self.data.class_translation[node_cls] # get outputs of node out_values = self.mainnet_predicted_vector[:, i] # fill lists according to class bkg_hists = [] weight_integral = 0 # loop over all classes to fill hist according to predicted class for j, truth_cls in enumerate(self.event_classes): class_index = self.data.class_translation[truth_cls] # filter values per event class filtered_values = [ out_values[k] for k in range(len(out_values)) \ if self.data.get_test_labels(as_categorical = False)[k] == class_index \ and self.predicted_classes[k] == node_index ] filtered_weights = [ self.data.get_lumi_weights()[k] for k in range(len(out_values)) \ if self.data.get_test_labels(as_categorical = False)[k] == class_index \ and self.predicted_classes[k] == node_index ] if j == ttH_index: # signal in this node sig_values = filtered_values sig_label = str(truth_cls) sig_weights = filtered_weights else: # background in this node weight_integral += sum(filtered_weights) hist = rp.Hist(nbins, *bin_range, title=str(truth_cls)) pltstyle.set_bkg_hist_style(hist, truth_cls) hist.fill_array(filtered_values, filtered_weights) bkg_hists.append(hist) # stack backgrounds bkg_stack = rp.HistStack(bkg_hists, stacked=True, drawstyle="HIST E1 X0") bkg_stack.SetMinimum(1e-4) max_val = bkg_stack.GetMaximum() * 1.3 bkg_stack.SetMaximum(max_val) # plot signal weight_sum = sum(sig_weights) scale_factor = 1. * weight_integral / weight_sum sig_weights = [w * scale_factor for w in sig_weights] sig_title = sig_label + "*{:.3f}".format(scale_factor) sig_hist = rp.Hist(nbins, *bin_range, title=sig_title) pltstyle.set_sig_hist_style(sig_hist, sig_label) sig_hist.fill_array(sig_values, sig_weights) # creatin canvas canvas = pltstyle.init_canvas() # drawing hists rp.utils.draw([bkg_stack, sig_hist], xtitle="Events predicted as " + node_cls, ytitle="Events", pad=canvas) if log: canvas.cd().SetLogy() # legend legend = pltstyle.init_legend(bkg_hists + [sig_hist]) pltstyle.add_lumi(canvas) pltstyle.add_category_label(canvas, self.event_category) print("S/B = {}".format(weight_sum / weight_integral)) # save out_path = self.save_path + "/predictions_{}.pdf".format(node_cls) pltstyle.save_canvas(canvas, out_path)
def plot_discriminators(self, log=False, cut_on_variable=None): ''' plot discriminators for output classes ''' pltstyle.init_plot_style() nbins = 50 bin_range = [0., 1.] # get some ttH specific info for plotting ttH_index = self.data.class_translation["ttHbb"] ttH_true_labels = self.data.get_ttH_flag() # apply cut to output node value if wanted if cut_on_variable: cut_class = cut_on_variable["class"] cut_value = cut_on_variable["val"] cut_index = self.data.class_translation[cut_class] cut_prediction = self.mainnet_predicted_vector[:, cut_index] # loop over discriminator nodes for i, node_cls in enumerate(self.event_classes): # get outputs of node out_values = self.mainnet_predicted_vector[:, i] # calculate node specific ROC value node_ROC = roc_auc_score(ttH_true_labels, out_values) # fill lists according to class bkg_hists = [] weight_integral = 0 # loop over all classes to fill hist according to predicted class for j, truth_cls in enumerate(self.event_classes): class_index = self.data.class_translation[truth_cls] # filter values per event class if cut_on_variable: filtered_values = [ out_values[k] for k in range(len(out_values)) \ if self.data.get_test_labels(as_categorical = False)[k] == class_index \ and cut_prediction[k] <= cut_value] filtered_weights = [ self.data.get_lumi_weights()[k] for k in range(len(out_values)) \ if self.data.get_test_labels(as_categorical = False)[k] == class_index \ and cut_prediction[k] <= cut_value] else: filtered_values = [ out_values[k] for k in range(len(out_values)) \ if self.data.get_test_labels(as_categorical = False)[k] == class_index ] filtered_weights = [ self.data.get_lumi_weights()[k] for k in range(len(out_values)) \ if self.data.get_test_labels(as_categorical = False)[k] == class_index ] if j == ttH_index: # ttH signal sig_values = filtered_values sig_label = str(truth_cls) sig_weights = filtered_weights else: # background in this node weight_integral += sum(filtered_weights) hist = rp.Hist(nbins, *bin_range, title=str(truth_cls)) pltstyle.set_bkg_hist_style(hist, truth_cls) hist.fill_array(filtered_values, filtered_weights) bkg_hists.append(hist) # stack backgrounds bkg_stack = rp.HistStack(bkg_hists, stacked=True, drawstyle="HIST E1 X0") bkg_stack.SetMinimum(1e-4) max_val = bkg_stack.GetMaximum() * 1.3 bkg_stack.SetMaximum(max_val) # plot signal weight_sum = sum(sig_weights) scale_factor = 1. * weight_integral / weight_sum sig_weights = [w * scale_factor for w in sig_weights] sig_title = sig_label + "*{:.3f}".format(scale_factor) sig_hist = rp.Hist(nbins, *bin_range, title=sig_title) pltstyle.set_sig_hist_style(sig_hist, sig_label) sig_hist.fill_array(sig_values, sig_weights) # creating canvas canvas = pltstyle.init_canvas() # drawing histograms rp.utils.draw([bkg_stack, sig_hist], xtitle=node_cls + " Discriminator", ytitle="Events", pad=canvas) if log: canvas.cd().SetLogy() # creating legend legend = pltstyle.init_legend(bkg_hists + [sig_hist]) pltstyle.add_lumi(canvas) pltstyle.add_category_label(canvas, self.event_category) # add ROC value to plot pltstyle.add_ROC_value(canvas, node_ROC) # save canvas out_path = self.save_path + "/discriminator_{}.pdf".format( node_cls) pltstyle.save_canvas(canvas, out_path)
def plot_prenet_nodes(self, log=False): ''' plot prenet nodes ''' pltstyle.init_plot_style() n_bins = 20 bin_range = [0., 1.] for i, node_cls in enumerate(self.prenet_targets): # get outputs of class node out_values = self.prenet_predicted_vector[:, i] prenet_labels = self.data.get_prenet_test_labels()[:, i] sig_values = [ out_values[k] for k in range(len(out_values)) if prenet_labels[k] == 1 ] bkg_values = [ out_values[k] for k in range(len(out_values)) if prenet_labels[k] == 0 ] sig_weights = [ self.data.get_lumi_weights()[k] for k in range(len(out_values)) if prenet_labels[k] == 1 ] bkg_weights = [ self.data.get_lumi_weights()[k] for k in range(len(out_values)) if prenet_labels[k] == 0 ] bkg_sig_ratio = 1. * sum(bkg_weights) / sum(sig_weights) sig_weights = [w * bkg_sig_ratio for w in sig_weights] sig_label = "True" bkg_label = "False" sig_title = sig_label + "*{:.3f}".format(bkg_sig_ratio) # plot output bkg_hist = rp.Hist(n_bins, *bin_range, title=bkg_label) pltstyle.set_bkg_hist_style(bkg_hist, bkg_label) bkg_hist.fill_array(bkg_values, bkg_weights) sig_hist = rp.Hist(n_bins, *bin_range, title=sig_title) pltstyle.set_sig_hist_style(sig_hist, sig_label) sig_hist.fill_array(sig_values, sig_weights) stack = rp.HistStack([bkg_hist], stacked=True, drawstyle="HIST E1 X0") stack.SetMinimum(1e-4) canvas = pltstyle.init_canvas() rp.utils.draw([stack, sig_hist], xtitle="prenet node {}".format(node_cls), ytitle="Events", pad=canvas) if log: canvas.cd().SetLogy() legend = pltstyle.init_legend([bkg_hist, sig_hist]) pltstyle.add_lumi(canvas) pltstyle.add_category_label(canvas, self.event_category) out_path = self.save_path + "/prenet_output_{}.pdf".format( node_cls) pltstyle.save_canvas(canvas, out_path)
new_h_plot = new_h.Clone() new_hists.append(new_h) # loop over bins and fill average to plot hist with errors for iBin in range(new_h_plot.GetNbinsX() + 1): bin_contents = [h.GetBinContent(iBin) for h in new_hists] new_h_plot.SetBinContent(iBin, np.mean(bin_contents)) new_h_plot.SetBinError(iBin, np.std(bin_contents)) # get unsmeared histogram old_h = before_hists[i_node] old_h_plot = old_h.Clone() # plot histogram canvas = pltstyle.init_canvas(ratiopad=True) stack = rp.HistStack([old_h_plot], stacked=True, drawstyle="HIST X0") max_val = max(stack.GetMaximum(), new_h.GetMaximum()) stack.SetMaximum(max_val * 1.3) rp.utils.draw([stack] + [new_h_plot], pad=canvas.cd(1), xtitle="discriminator output for {} node".format( event_classes[i_node]), ytitle="Events") legend = pltstyle.init_legend([old_h_plot, new_h_plot]) pltstyle.add_category_label(canvas.cd(1), categories[key]) x_vals = [] ks_probs_per_bin = [] ks_error_per_bin = [] for i_bin in range(new_h.GetNbinsX()):