def calc_chi2_stats(one_hist, other_hist, cov_matrix): one_vec, one_err = cu.th1_to_ndarray(one_hist, False) # print(one_err) other_vec, _ = cu.th1_to_ndarray(other_hist, False) delta = one_vec - other_vec if isinstance(cov_matrix, ROOT.TH2): v, _ = cu.th2_to_ndarray(cov_matrix) else: v = cov_matrix # print("delta:", delta) # v = np.diag(np.diag(v)) # turn off correlations # print("v:", v) try: v_inv = np.linalg.inv(v) except np.linalg.LinAlgError: print("Trying pseudo-inverse instead") v_inv = np.linalg.pinv(v, rcond=1E-30) inter = v_inv.dot(delta.T) # print("parts:", delta * inter.T) chi2 = delta.dot(inter)[0][0] ndof = delta.shape[1] p = 1 - scipy.stats.chi2.cdf(chi2, int(ndof)) return chi2, ndof, p
def get_correlated_mean_err(hist, ematrix, is_density=True): contents, _ = cu.th1_to_ndarray(hist) contents = contents.reshape(-1) # necessary for jax to avoid shape discrepancy centers = cu.get_th1_bin_centers(hist) cov_matrix, _ = cu.th2_to_ndarray(ematrix) if is_density: widths = cu.get_th1_bin_widths(hist) bin_areas = contents*widths # need to scale ematrix by bin areas cov_matrix = scale_ematrix_by_bin_widths(cov_matrix, widths) else: bin_areas = contents mean = float(metrics.calc_mean_jax(bin_areas, centers)) mean_err = float(metrics.calc_mean_correlated_error_jax(bin_areas, centers, cov_matrix)) return mean, mean_err
def get_uncorrelated_mean_err(hist, is_density=True): contents, errors = cu.th1_to_ndarray(hist) centers = cu.get_th1_bin_centers(hist) if is_density: # need to multiply by widths, since the original hist has bin contents divided by width widths = cu.get_th1_bin_widths(hist) bin_areas = contents*widths bin_errors = errors*widths else: bin_areas = contents bin_errors = errors # convert to uncertainty arrays areas, centers = metrics.hist_values_to_uarray(bin_areas=bin_areas, bin_centers=centers, bin_errors=bin_errors) mean_u = metrics.calc_mean_ucert(areas, centers) return mean_u.nominal_value, mean_u.std_dev
def plot_unfolded_with_yoda_normalised(self, do_chi2=False, do_zoomed=True): data_total_errors_style = dict( label="Data (total unc.)", line_color=self.plot_styles['unfolded_total_colour'], line_width=self.line_width, line_style=1, marker_color=self.plot_styles['unfolded_total_colour'], marker_style=cu.Marker.get('circle'), marker_size=self.plot_styles['unfolded_marker_size'], leg_draw_opt="LEP") data_stat_errors_style = dict( label="Data (stat. unc.)", line_color=self.plot_styles['unfolded_stat_colour'], line_width=self.line_width, line_style=1, marker_color=self.plot_styles['unfolded_stat_colour'], marker_style=cu.Marker.get('circle'), marker_size=0.0001, leg_draw_opt="LEP" ) # you need a non-0 marker to get the horizontal bars at the end of errors mc_style = dict(label=self.region['mc_label'], line_color=self.plot_styles['gen_colour'], line_width=self.line_width, marker_color=self.plot_styles['gen_colour'], marker_size=self.plot_styles['gen_marker_size'], marker_style=self.plot_styles['gen_marker'], leg_draw_opt="LEP" if self.plot_styles['gen_marker_size'] > 0 else "LE") rivet_path, rivet_region, rivet_radius, rivet_lambda, rivet_pt_bins = get_matching_rivet_setup( self.setup) for ibin, (bin_edge_low, bin_edge_high) in enumerate( zip(self.bins[:-1], self.bins[1:])): hbc_args = dict(ind=ibin, binning_scheme='generator') mc_gen_hist_bin = self.hist_bin_chopper.get_pt_bin_normed_div_bin_width( 'hist_truth', **hbc_args) unfolded_hist_bin_stat_errors = self.hist_bin_chopper.get_pt_bin_normed_div_bin_width( 'unfolded_stat_err', **hbc_args) unfolded_hist_bin_total_errors = self.hist_bin_chopper.get_pt_bin_normed_div_bin_width( 'unfolded', **hbc_args) # Get RIVET hists, which are absolute counts, so need normalising rivet_hist_name = '/%s/%s' % ( rivet_path, rn.get_plot_name(rivet_radius, rivet_region, rivet_lambda, rivet_pt_bins[ibin])) rivet_hists = [ qgp.normalise_hist_divide_bin_width( yoda.root.to_root(ent['yoda_dict'][rivet_hist_name])) for ent in self.rivet_entries ] # Create copy of data to go on top of stat unc, # but remove vertical error bar so we can see the stat unc # Note that you CAN'T set it to 0, otherwise vertical lines connecting # bins start being drawn. Instead set it to some super small value. unfolded_hist_bin_total_errors_marker_noerror = unfolded_hist_bin_total_errors.Clone( ) # clone to avoid restyling the original as well for i in range( 1, unfolded_hist_bin_total_errors_marker_noerror.GetNbinsX() + 1): unfolded_hist_bin_total_errors_marker_noerror.SetBinError( i, 1E-100) data_entries = [ Contribution(unfolded_hist_bin_total_errors, **data_total_errors_style), Contribution(unfolded_hist_bin_stat_errors, **data_stat_errors_style), # do data with black marker to get it on top Contribution(unfolded_hist_bin_total_errors_marker_noerror, **data_total_errors_style), ] # For subplot to ensure only MC errors drawn, not MC+data data_no_errors = unfolded_hist_bin_total_errors_marker_noerror.Clone( ) cu.remove_th1_errors(data_no_errors) this_mc_style = deepcopy(mc_style) rivet_styles = [] for ind, _ in enumerate(rivet_hists): s_dict = self.rivet_entries[ind]['style_dict'] rivet_styles.append( dict(label=s_dict['label'], line_color=s_dict['color'], line_width=self.line_width, marker_color=s_dict['color'], marker_size=s_dict.get( 'marker_size', self.plot_styles['gen_marker_size']), marker_style=s_dict['marker_style'], leg_draw_opt="LEP" if self.plot_styles['gen_marker_size'] > 0 else "LE")) # Calculate chi2 between data and MCs if desired if do_chi2: # print("unfolded_alt_truth bin", ibin) ematrix = self.hist_bin_chopper.get_pt_bin_normed_div_bin_width( self.unfolder.total_ematrix_name, **hbc_args) # stats are chi2, ndof, p mc_stats = calc_chi2_stats(unfolded_hist_bin_total_errors, mc_gen_hist_bin, ematrix) # print(mc_stats) # print(alt_mc_stats) nbins = sum([ 1 for i in range( 1, unfolded_hist_bin_total_errors.GetNbinsX() + 1) if unfolded_hist_bin_total_errors.GetBinContent(i) != 0 ]) # reduced_chi2 = mc_stats[0] / nbins # alt_reduced_chi2 = alt_mc_stats[0] / nbins n_sig_fig = 2 chi2_template = "\n#lower[-0.1]{{(#chi^{{2}} / N_{{bins}} = {chi2:g} / {nbins:d})}}" this_mc_style['label'] += chi2_template.format(chi2=cu.nsf( mc_stats[0], n_sig_fig), nbins=nbins) for ind, h in enumerate(rivet_hists): this_stats = calc_chi2_stats( unfolded_hist_bin_total_errors, h, ematrix) rivet_styles[ind]['label'] += chi2_template.format( chi2=cu.nsf(this_stats[0], n_sig_fig), nbins=nbins) mc_entries = [ Contribution(mc_gen_hist_bin, subplot=data_no_errors, **this_mc_style), ] for h, s_dict in zip(rivet_hists, rivet_styles): mc_entries.append( Contribution(h, subplot=data_no_errors, **s_dict)) entries = [ # Draw MC *mc_entries, # Draw data after to put on top of MC *data_entries ] func_name = cu.get_current_func_name() if not self.check_entries(entries, "%s bin %d" % (func_name, ibin)): return ymin = 0 if np.any( cu.th1_to_ndarray(unfolded_hist_bin_total_errors)[0] < 0): ymin = None # let it do its thing and auto calc ymin max_rel_err = 0.5 if "multiplicity" in self.setup.angle.var.lower( ) else -1 plot = Plot( entries, ytitle=self.setup.pt_bin_normalised_differential_label, title=self.get_pt_bin_title(bin_edge_low, bin_edge_high), legend=True, xlim=qgp.calc_auto_xlim( entries[2:3], max_rel_err=0.5), # set x lim to where data is non-0 ylim=[ymin, None], **self.pt_bin_plot_args) plot.subplot_title = qgc.SIM_DATA_STR self._modify_plot_paper(plot) # disable adding objects to legend & drawing - we'll do it manually plot.do_legend = False plot.legend.SetTextSize(0.03) plot.legend.SetY1(0.6) plot.legend.SetX1(0.57) plot.legend.SetX2(0.93) if len(entries) > 4: # if lots of entries, try auto-expand plot.legend.SetY1(0.6 - (0.02 * (len(entries) - 4))) # plot.legend.SetEntrySeparation(0.005) subplot_draw_opts = "NOSTACK E1" plot.plot("NOSTACK E1", subplot_draw_opts) dummy_graphs = qgp.do_fancy_legend(chain(data_entries[:2], mc_entries), plot, use_splitline=False) plot.canvas.cd() plot.legend.Draw() # Create hists for data with error region for ratio # Easiest way to get errors right is to do data (with 0 errors) # and divide by data (with errors), as if you had MC = data with 0 error data_stat_ratio = data_no_errors.Clone() data_stat_ratio.Divide(unfolded_hist_bin_stat_errors) data_stat_ratio.SetFillStyle(3245) data_stat_ratio.SetFillColor( self.plot_styles['unfolded_stat_colour']) data_stat_ratio.SetLineWidth(0) data_stat_ratio.SetMarkerSize(0) data_total_ratio = data_no_errors.Clone() data_total_ratio.Divide(unfolded_hist_bin_total_errors) data_total_ratio.SetFillStyle(3254) data_total_ratio.SetFillColor( self.plot_styles['unfolded_total_colour']) data_total_ratio.SetLineWidth(0) data_total_ratio.SetMarkerSize(0) # now draw the data error shaded area # this is a bit hacky - basically draw them on the ratio pad, # then redraw the existing hists & line to get them ontop # note that we use "same" for all - this is to keep the original axes # (we may want to rethink this later?) plot.subplot_pad.cd() draw_opt = "E2 SAME" data_stat_ratio.Draw(draw_opt) data_total_ratio.Draw(draw_opt) plot.subplot_line.Draw() plot.subplot_container.Draw("SAME" + subplot_draw_opts) # Add subplot legend x_left = 0.25 y_bottom = 0.75 width = 0.67 height = 0.15 plot.subplot_legend = ROOT.TLegend(x_left, y_bottom, x_left + width, y_bottom + height) plot.subplot_legend.AddEntry(data_total_ratio, qgc.DATA_TOTAL_UNC_STR, "F") plot.subplot_legend.AddEntry(data_stat_ratio, qgc.DATA_STAT_UNC_STR, "F") plot.subplot_legend.SetTextSize(0.085) plot.subplot_legend.SetFillStyle(0) plot.subplot_legend.SetNColumns(2) plot.subplot_legend.Draw() plot.canvas.cd() stp = self.setup fname = f"unfolded_{stp.append}_rivet_bin_{ibin:d}_divBinWidth{stp.paper_str}.{stp.output_fmt}" self.save_plot(plot, os.path.join(stp.output_dir, fname)) # Do version with small x values only if do_zoomed: if self.setup.angle.var in [ "jet_thrust_charged", "jet_width_charged", "jet_thrust", "jet_width" ]: # plot.ylim = (1E-5) plot.y_padding_max_log = 50 plot.y_padding_min_log = 0.5 plot.ylim = None plot.set_logy(do_exponent=False, do_more_labels=False) fname = f"unfolded_{stp.append}_alt_truth_bin_{ibin:d}_divBinWidth_logY.{stp.output_fmt}" self.save_plot(plot, os.path.join(stp.output_dir, fname)) if self.setup.angle.var in [ "jet_LHA_charged", "jet_thrust_charged", "jet_width_charged", "jet_thrust", "jet_width" ]: bin_edges = cu.get_bin_edges(mc_gen_hist_bin, 'x') # get the bin edge thats smallest between 0.2, and 5th bin bin_lt_lim = [x for x in bin_edges if x < 0.2][-1] upper_bin = min(bin_edges[5], bin_lt_lim) plot2 = Plot( entries, ytitle=self.setup.pt_bin_normalised_differential_label, title=self.get_pt_bin_title(bin_edge_low, bin_edge_high), xlim=(0, upper_bin), **self.pt_bin_plot_args) self._modify_plot(plot2) plot2.subplot_title = "* / Generator" plot2.plot("NOSTACK E1") # plot2.set_logx(do_exponent=False) fname = f"unfolded_{stp.append}_rivet_bin_{ibin:d}_divBinWidth_lowX.{stp.output_fmt}" self.save_plot(plot2, os.path.join(stp.output_dir, fname))