def fit(): """Runs the mass fit. Either nominal with making pretty plots or in spearmint mode which does not save the workspace and returns a metric.""" # Get the data # TODO: rewrite selection to use gcm itself mode = gcm() sel = selection.get_final_selection() df = mode.get_data([dtf_dm(), m(mode.D0)]) df = df[sel] from . import fit_config from ROOT import RooFit as RF from .fit_setup import setup_workspace wsp, _ = setup_workspace() data = fit_config.pandas_to_roodataset(df, wsp.set('datavars')) model = wsp.pdf('total') plot_fit('_start_values', wsp=wsp) result = model.fitTo(data, RF.NumCPU(4), RF.Save(True), RF.Strategy(2), RF.Extended(True)) if not helpers.check_fit_result(result, log): log.error('Bad fit quality') fit_config.dump_workspace(mode, wsp)
def plot_comparison(): extra_vars = [gcm().ltime_var] # Current mode stuff data = gcm().get_data([f.var for f in extra_vars]) add_variables.append_phsp(data) df_sel = final_selection.get_final_selection() df_sel &= selection.mass_signal_region() gen = get_model() outfile = gcm().get_output_path('effs') + 'Gen_DATA_Comp.pdf' with PdfPages(outfile) as pdf: for pc in gcm().phsp_vars + extra_vars: log.info('Plotting {}'.format(pc.var)) filled = gen[pc.var] errorbars = data[pc.var][df_sel] if pc.convert is not None: filled = pc.convert(filled) errorbars = pc.convert(errorbars) ax = comparison.plot_comparison(pc, filled, errorbars, 'Model', 'Data') ax.set_xlabel(pc.xlabel) ax.yaxis.set_visible(False) ax.legend() pdf.savefig(plt.gcf())
def plot_fit(suffix=None, wsp=None): from . import roofit_to_matplotlib from . import fit_config shapes.load_shape_class('RooCruijff') shapes.load_shape_class('RooJohnsonSU') shapes.load_shape_class('RooBackground') mode = gcm() if wsp is None: wsp = fit_config.load_workspace(mode) sel = selection.get_final_selection() df = mode.get_data([dtf_dm(), m(mode.D0)]) df = df[sel] data = fit_config.pandas_to_roodataset(df, wsp.set('datavars')) fit_config.WS_DMASS_NAME = dtf_dm() fit_config.WS_MASS_NAME = m(mode.D0) outfile = mode.get_output_path('sweight_fit') + 'fits{}.pdf'.format( suffix if suffix is not None else '') with PdfPages(outfile) as pdf: for func in [m, dtf_dm]: roofit_to_matplotlib.plot_fit( mode.D0, wsp, func, data=data, pdf=pdf, do_comb_bkg=mode.mode in config.twotag_modes) roofit_to_matplotlib.plot_fit( mode.D0, wsp, func, data=data, pdf=pdf, do_pulls=False, do_comb_bkg=mode.mode in config.twotag_modes)
def train_reweighter(): extra_vars = [ gcm().ltime_var ] all_vars = gcm().phsp_vars + extra_vars columns = [v.var for v in all_vars if 'phi' not in v.var] columns += ['cosphi', 'sinphi'] # Current mode stuff data = gcm().get_data([f.var for f in extra_vars]) add_variables.append_phsp(data) data['cosphi'] = np.cos(data.phi1) data['sinphi'] = np.sin(data.phi1) df_sel = final_selection.get_final_selection() df_sel &= selection.delta_mass_signal_region() gen = get_model() gen['cosphi'] = np.cos(gen.phi1) gen['sinphi'] = np.sin(gen.phi1) limits = {v.var: v.binning[1:] for v in all_vars} limits['cosphi'] = (-1., 1) limits['sinphi'] = (-1., 1) for c in columns: mi, ma = limits[c] data[c] = (data[c] - mi) / (ma - mi) + 2. gen[c] = (gen[c] - mi) / (ma - mi) + 2. log.info('Training BDT reweighter for {}'.format(', '.join(columns))) reweighter = GBReweighter(n_estimators=300, max_depth=5, learning_rate=0.2) reweighter.fit(original=gen[columns].sample(n=250000), target=data[columns][df_sel].sample(n=250000)) bdt_utils.dump_reweighter(reweighter)
def lifetime_study(correct_efficiencies=False): # Current mode stuff data = gcm().get_data([gcm().ltime_var.var]) add_variables.append_phsp(data) df_sel = final_selection.get_final_selection() df_sel &= selection.delta_mass_signal_region() data['weight'] = 1. if correct_efficiencies: outfile = gcm().get_output_path('effs') + 'DATA_ltime_dep_effs.pdf' else: outfile = gcm().get_output_path('effs') + 'DATA_ltime_dep.pdf' percentiles = np.arange(0, 1.1, 0.2) boundaries = helpers.weighted_quantile( data[gcm().ltime_var.var][df_sel], percentiles) if correct_efficiencies: data['weight'] = 1./get_efficiency() boundaries = boundaries[1:] with PdfPages(outfile) as pdf: for var in gcm().phsp_vars: fig, ax = plt.subplots(figsize=(10, 10)) for low, high in zip(boundaries[:-1], boundaries[1:]): sel = (data[gcm().ltime_var.var] > low) & (data[gcm().ltime_var.var] < high) # NOQA df = data[var.var][df_sel & sel] weight = data['weight'][df_sel & sel] rlow, prec = helpers.rounder(low*1000, [low*1000, high*1000]) rhigh, _ = helpers.rounder(high*1000, [low*1000, high*1000]) spec = '{{:.{}f}}'.format(prec) label = r'${} < \tau \mathrm{{ [ps]}} < {}$'.format( spec.format(rlow), spec.format(rhigh)) values, edges = np.histogram(df, bins=int(var.binning[0]/5.), range=var.binning[1:], weights=weight) # NOQA err, edges = np.histogram(df, bins=int(var.binning[0]/5.), range=var.binning[1:], weights=weight**2) # NOQA norm = np.sum(values) values = values/norm err = np.sqrt(err)/norm x_ctr = (edges[1:] + edges[:-1])/2. width = (edges[1:] - edges[:-1]) x_err = width/2. options = dict( fmt='o', markersize=5, capthick=1, capsize=0, elinewidth=2, alpha=1) ax.errorbar(x_ctr, values, err, x_err, label=label, **options) ax.set_xlabel(var.xlabel) ax.yaxis.set_visible(False) ax.legend() pdf.savefig(plt.gcf()) plt.close()
def run_spearmint_fit(spearmint_selection=None, metric='punzi'): """Runs the mass fit. Either nominal with making pretty plots or in spearmint mode which does not save the workspace and returns a metric.""" from . import fit_config from ROOT import RooFit as RF shapes.load_shape_class('RooCruijff') shapes.load_shape_class('RooJohnsonSU') shapes.load_shape_class('RooBackground') mode = gcm() wsp = fit_config.load_workspace(mode) sel = selection.get_final_selection() # Get the data df = mode.get_data([dtf_dm(), m(mode.D0)]) if spearmint_selection is not None: sel = sel & spearmint_selection df = df[sel] data = fit_config.pandas_to_roodataset(df, wsp.set('datavars')) model = wsp.pdf('total') metric = get_metric(metric)(wsp) if spearmint_selection is not None: result = model.fitTo(data, RF.NumCPU(4), RF.Save(True), RF.Strategy(2), RF.Extended(True)) if not helpers.check_fit_result(result, log): result = model.fitTo(data, RF.NumCPU(4), RF.Save(True), RF.Strategy(1), RF.Extended(True)) if not helpers.check_fit_result(result, log): result = model.fitTo(data, RF.NumCPU(4), RF.Save(True), RF.Strategy(0), RF.Extended(True)) if not helpers.check_fit_result(result, log): log.warn('Bad fit quality') return 0.0 return metric()
def get_sweights(do_comb_bkg=False): helpers.allow_root() df = gcm().get_data([m(gcm().D0), dtf_dm()]) from . import fit_config from hep_ml import splot shapes.load_shape_class('RooCruijff') shapes.load_shape_class('RooJohnsonSU') shapes.load_shape_class('RooBackground') wsp = fit_config.load_workspace(gcm()) sel = selection.get_final_selection() do_comb_bkg = gcm().mode in config.twotag_modes df = df[sel] sig_pdf = wsp.pdf('signal') rnd_pdf = wsp.pdf('random') comb_pdf = wsp.pdf('combinatorial') sig_prob = call_after_set(sig_pdf, wsp, **df) rnd_prob = call_after_set(rnd_pdf, wsp, **df) if do_comb_bkg: comb_prob = call_after_set(comb_pdf, wsp, **df) if do_comb_bkg: probs = pd.DataFrame(dict(sig=sig_prob*wsp.var('NSig').getVal(), rnd=rnd_prob*wsp.var('NSPi').getVal(), comb=comb_prob*wsp.var('NBkg').getVal()), index=df.index) else: probs = pd.DataFrame(dict(sig=sig_prob*wsp.var('NSig').getVal(), rnd=rnd_prob*wsp.var('NSPi').getVal()), index=df.index) probs = probs.div(probs.sum(axis=1), axis=0) sweights = splot.compute_sweights(probs) sweights.index = probs.index if not do_comb_bkg: sweights['comb'] = 0.0 return sweights
def run_spearmint_sweights(spearmint_selection=None): """Runs the mass fit. Either nominal with making pretty plots or in spearmint mode which does not save the workspace and returns a metric.""" sel = selection.get_final_selection() sweights = get_sweights(gcm()) sweights['bkg'] = sweights.rnd + sweights.comb df = sweights[sel.reindex(sweights.index)] sig0 = np.sum(df.sig) if spearmint_selection is not None: sel = sel & spearmint_selection df = sweights[sel.reindex(sweights.index)] sig = np.sum(df.sig) bkg = np.sum(df.bkg) log.info('sig={}, bkg={}, sig0={}'.format(sig, bkg, sig0)) if bkg < 0: bkg = 0 return -(sig/sig0)/(0.5 + np.sqrt(bkg))
def simple_phsp_efficiencies(): extra_vars = [ gcm().ltime_var ] # Current mode stuff data = gcm().get_data([f.var for f in extra_vars]) add_variables.append_phsp(data) df_sel = final_selection.get_final_selection() df_sel &= selection.delta_mass_signal_region() gen = get_model() outfile = gcm().get_output_path('effs') + 'Gen_DATA_Eff.pdf' with PdfPages(outfile) as pdf: for pc in gcm().phsp_vars + extra_vars: log.info('Plotting {}'.format(pc.var)) denominator = gen[pc.var] numerator = data[pc.var][df_sel] weight_d = np.ones(denominator.index.size)*1./denominator.index.size # NOQA weight_n = np.ones(numerator.index.size)*1./numerator.index.size fig, ax = plt.subplots(figsize=(10, 10)) if pc.convert is not None: numerator = pc.convert(numerator) denominator = pc.convert(denominator) x, y, x_err, y_err = helpers.make_efficiency( numerator, denominator, 100, weight_n, weight_d, independent=True) # NOQA options = dict( fmt='o', markersize=5, capthick=1, capsize=0, elinewidth=2, alpha=1) ax.errorbar(x, y, y_err, x_err, **options) ax.set_xlabel(pc.xlabel) ax.set_ylabel('Relative efficiency') pdf.savefig(plt.gcf()) plt.close()
def misid_plots(): """Remove wrong sign D0 candidates which are combined and end up in the signal window in the right sign sample""" # Get the necessary information from the current mode if gcm().mode in config.wrong_sign_modes: wrong_spi = add_variables.other_slowpi_ws() else: wrong_spi = add_variables.other_slowpi() dst_mass = gcm().get_data([vars.m(gcm().head)])[vars.m(gcm().head)] sel = final_selection.get_final_selection() bins, xmin, xmax = gcm().mass_var.binning ybins, ymin, ymax = gcm().dmass_var.binning bins = 30 df_sel = final_selection.get_final_selection() misid = add_variables.double_misid() data = gcm().get_data([vars.dtf_dm(), vars.m(gcm().D0)]) outfile = gcm().get_output_path('misid') + 'overview.pdf' with PdfPages(outfile) as pdf: for i, pc in enumerate(double_misid_pc): fig, ax = plt.subplots(figsize=(10, 10)) nbins, xmin, xmax = pc.binning ax.hist(misid[df_sel][pc.var], bins=nbins, range=(xmin, xmax)) ax.set_xlabel(pc.xlabel) ax.set_ylabel('Candidates') ax.set_xlim((xmin, xmax)) pdf.savefig(fig) plt.close() if i % 2 == 0: fig, ax = plt.subplots(figsize=(10, 10)) nbins, xmin, xmax = pc.binning cutvar = double_misid_pc[i+1].var narrow = misid[cutvar] < 147.5 ax.hist(misid[df_sel&narrow][pc.var], bins=nbins, range=(xmin, xmax)) # NOQA ax.set_xlabel(pc.xlabel) ax.set_ylabel(r'Candidates with $\Delta m <147.5$') ax.set_xlim((xmin, xmax)) pdf.savefig(fig) plt.close() cut = misid_selection.misid_cut() dm = gcm().dmass_var nbins, xmin, xmax = dm.binning fig, ax = plt.subplots(figsize=(10, 10)) ax.hist(data[dm.var][sel & cut], bins=nbins, color='#D3EFFB', # NOQA range=(xmin, xmax), label='Kept', edgecolor='#D3EFFB') ax.hist(data[dm.var][sel & ~cut], bins=nbins, range=(xmin, xmax), label='Removed', color='#006EB6', edgecolor='#006EB6') # NOQA ax.set_xlim((xmin, xmax)) ax.set_xlabel(dm.xlabel) ax.set_ylabel('Candidates') ax.legend() pdf.savefig(fig) plt.clf() outfile = gcm().get_output_path('misid') + 'wrong_spi.pdf' pdf = PdfPages(outfile) fig, ax = plt.subplots(figsize=(10, 10)) ax.hist(wrong_spi[sel], bins=bins, range=(xmin, xmax), normed=True, color='#006EB6', edgecolor='#006EB6') # NOQA ax.set_xlabel(gcm().mass_var.xlabel) ax.set_xlim((xmin, xmax)) ax.set_ylabel('Arbitrary units') pdf.savefig(fig) fig, ax = plt.subplots(figsize=(10, 10)) ax.hist((dst_mass - wrong_spi)[sel], bins=ybins, range=(ymin, ymax), color='#006EB6', edgecolor='#006EB6') # NOQA ax.set_xlabel(gcm().dmass_var.xlabel) ax.set_xlim((xmin, xmax)) pdf.savefig(fig) plt.clf() plt.clf() pdf.close()
def plot_mass_regions(): sel = get_final_selection() df = gcm().get_data([vars.m(gcm().D0), vars.dtf_dm()]) selected = df[sel] nbins = 100 name = 'mass_regions' if config.optimised_selection: name += '_opt' if config.candidates_selection: name += '_cand' outfile = gcm().get_output_path('selection') + name + '.pdf' with PdfPages(outfile) as pdf: fig, ax = plt.subplots(figsize=(10, 10)) # Doing D0 mass first xmin, xmax = 1810, 1920 # Signal window boundaries sw_lo = config.PDG_MASSES['D0'] - 18. sw_hi = config.PDG_MASSES['D0'] + 18. # Lower sideband boundaries sb_lo_lo = xmin sb_lo_hi = config.PDG_MASSES['D0'] - 30. # Upper sideband boundaries sb_hi_lo = config.PDG_MASSES['D0'] + 30. sb_hi_hi = xmax bkg = np.array([(sb_lo_hi + sb_lo_lo) / 2., (sb_hi_hi + sb_hi_lo) / 2.]) bkgw = np.array([(sb_lo_hi - sb_lo_lo), (sb_hi_hi - sb_hi_lo)]) sig = np.array([(sw_lo + sw_hi) / 2.]) sigw = np.array([(sw_hi - sw_lo)]) h_vals, edges = np.histogram(selected[vars.m(gcm().D0)], bins=nbins, range=(xmin, xmax)) h_errorbars = np.sqrt(h_vals) x_ctr = (edges[1:] + edges[:-1]) / 2. width = (edges[1:] - edges[:-1]) x_err = width / 2. dt_options = dict(fmt='o', markersize=5, capthick=1, capsize=0, elinewidth=2, color='#000000', markeredgecolor='#000000') ax.errorbar(x_ctr, h_vals, xerr=x_err, yerr=h_errorbars, **dt_options) hmax = np.max(ax.lines[0].get_ydata()) ax.bar(sig, 1.10 * np.array(hmax), sigw, color='#D3EFFB', edgecolor='#D3EFFB', label='Signal', alpha=0.5) ax.bar(bkg, 1.10 * np.ones(len(bkg)) * hmax, bkgw, label='Background', color='#006EB6', edgecolor='#006EB6', alpha=0.5) ax.set_xlabel(vars.m.latex((gcm().D0), with_unit=True)) unit = r'{} {}'.format((xmax - xmin) / nbins, vars.m.unit) ylabel = r'Candidates / ({0})'.format(unit) ax.set_ylabel(ylabel) ax.legend() ax.set_xlim(xmin, 0.9999 * xmax) plot_utils.y_margin_scaler(ax, lf=0, la=True) pdf.savefig(fig) plt.clf() # Now delta mass fig, ax = plt.subplots(figsize=(10, 10)) xmin, xmax = 140.5, 152.5 # Signal window boundaries sw_lo = config.PDG_MASSES['delta'] - 0.5 sw_hi = config.PDG_MASSES['delta'] + 0.5 # Lower sideband boundaries sb_lo_lo = xmin sb_lo_hi = config.PDG_MASSES['delta'] - 2.3 # Upper sideband boundaries sb_hi_lo = config.PDG_MASSES['delta'] + 2.3 sb_hi_hi = xmax bkg = np.array([(sb_lo_hi + sb_lo_lo) / 2., (sb_hi_hi + sb_hi_lo) / 2.]) bkgw = np.array([(sb_lo_hi - sb_lo_lo), (sb_hi_hi - sb_hi_lo)]) sig = np.array([(sw_lo + sw_hi) / 2.]) sigw = np.array([(sw_hi - sw_lo)]) h_vals, edges = np.histogram(selected[vars.dtf_dm()], bins=nbins, range=(xmin, xmax)) h_errorbars = np.sqrt(h_vals) x_ctr = (edges[1:] + edges[:-1]) / 2. width = (edges[1:] - edges[:-1]) x_err = width / 2. ax.errorbar(x_ctr, h_vals, xerr=x_err, yerr=h_errorbars, **dt_options) hmax = np.max(ax.lines[0].get_ydata()) ax.bar(sig, 1.10 * np.array(hmax), sigw, color='#D3EFFB', edgecolor='#D3EFFB', label='Signal', alpha=0.5) ax.bar(bkg, 1.10 * np.ones(len(bkg)) * hmax, bkgw, label='Background', color='#006EB6', edgecolor='#006EB6', alpha=0.5) ax.set_xlabel(vars.dtf_dm.latex(with_unit=True)) unit = r'{} {}'.format((xmax - xmin) / nbins, vars.dtf_dm.unit) ylabel = r'Candidates / ({0})'.format(unit) ax.set_ylabel(ylabel) ax.legend() ax.set_xlim(xmin, 0.9999 * xmax) plot_utils.y_margin_scaler(ax, lf=0, la=True) pdf.savefig(fig) plt.clf()
def phsp_comparison_plots(): """Plots the mode sidebands and the opposite mode signal region phsp distributions. Only really meaningful if executed for the WS events. Opposite mode is plotted as solid, with the uncertainty propagated to the mode error plot. """ # Beside phase space, also plot D0 momentum and flight distance extra_vars = [ gcm().ltime_var, PlotConfig(vars.pt, gcm().D0, (100, 0, 15000)), PlotConfig(vars.vdchi2, gcm().D0, (100, 0, 10), np.log, r'$\ln(\text{{{}}})$'), # NOQA ] # opposite_mode with opposite_mode(): OS = gcm().get_data([f.var for f in extra_vars]) add_variables.append_phsp(OS) os_sel = final_selection.get_final_selection() os_sel &= selection.delta_mass_signal_region() OS_weight = erf(OS[gcm().ltime_var.var] * 1600) / 24. + 0.038 + OS[ gcm().ltime_var.var] * 4 # NOQA # Current mode stuff DF = gcm().get_data([f.var for f in extra_vars]) add_variables.append_phsp(DF) df_sel = final_selection.get_final_selection() df_sel &= selection.mass_sideband_region() outfile = gcm().get_output_path('selection') + 'phsp_comp.pdf' with PdfPages(outfile) as pdf: for pc in gcm().phsp_vars + extra_vars: log.info('Plotting {}'.format(pc.var)) filled = OS[pc.var][os_sel] filled_weights = OS_weight[os_sel] errorbars = DF[pc.var][df_sel] if pc.convert is not None: filled = pc.convert(filled) errorbars = pc.convert(errorbars) ax = comparison.plot_comparison(pc, filled, errorbars, 'RS signal', 'WS background', normed_max=True) ax.set_xlabel(pc.xlabel) plot_utils.y_margin_scaler(ax, lf=0, la=True) ax.set_ylabel('Arbitrary units') ax.legend() pdf.savefig(plt.gcf()) plt.clf() ax = comparison.plot_comparison(pc, filled, errorbars, 'RS signal', 'WS background', filled_weight=filled_weights, normed_max=True) ax.set_xlabel(pc.xlabel) plot_utils.y_margin_scaler(ax, lf=0, la=True) ax.set_ylabel('Arbitrary units') ax.legend() pdf.savefig(plt.gcf())
def dependence_study(use_efficiencies=False): extra_vars = [ gcm().ltime_var ] all_vars = gcm().phsp_vars + extra_vars # Current mode stuff data = gcm().get_data([f.var for f in extra_vars]) add_variables.append_phsp(data) df_sel = final_selection.get_final_selection() df_sel &= selection.delta_mass_signal_region() gen = get_model() if use_efficiencies: outfile = gcm().get_output_path('effs') + 'Gen_DATA_Eff_dep_eff.pdf' gen['weight'] = get_efficiency_gen() else: outfile = gcm().get_output_path('effs') + 'Gen_DATA_Eff_dep.pdf' gen['weight'] = 1. lim_file = gcm().get_output_path('effs') + 'limits_for_eff.p' with PdfPages(outfile) as pdf: for selected, plotted in permutations(all_vars, 2): log.info('Plotting {} in intervals of {}'.format( plotted.var, selected.var)) percentiles = np.arange(0, 1.1, 0.2) boundaries = helpers.weighted_quantile( data[selected.var][df_sel], percentiles) fig, ax = plt.subplots(figsize=(10, 10)) for low, high in zip(boundaries[:-1], boundaries[1:]): num_sel = (data[selected.var] > low) & (data[selected.var] < high) # NOQA den_sel = (gen[selected.var] > low) & (gen[selected.var] < high) denominator = gen[plotted.var][den_sel] numerator = data[plotted.var][df_sel & num_sel] weight_d = gen['weight'][den_sel] weight_d /= np.sum(weight_d) weight_n = np.ones(numerator.index.size)*1./numerator.index.size # NOQA x, y, x_err, y_err = helpers.make_efficiency( numerator, denominator, 50, weight_n, weight_d, independent=True) # NOQA options = dict( fmt='o', markersize=5, capthick=1, capsize=0, elinewidth=2, alpha=1) rlow, prec = helpers.rounder(low, boundaries) rhigh, _ = helpers.rounder(high, boundaries) spec = '{{:.{}f}}'.format(prec) label = r'${} <$ {} $ < {}$'.format( spec.format(rlow), selected.xlabel, spec.format(rhigh)) ax.errorbar(x, y, y_err, x_err, label=label, **options) ax.set_xlabel(plotted.xlabel) ax.set_ylabel('Relative efficiency') try: limits = load(lim_file) except: log.info('Creating new limits file') limits = {} if limits is None: log.info('Creating new limits file') limits = {} if (plotted.var, selected.var) not in limits or use_efficiencies is False: # NOQA plot_utils.y_margin_scaler(ax, hf=0.4) limits[(plotted.var, selected.var)] = ax.get_ylim() else: log.info('Applying limits') lim = limits[(plotted.var, selected.var)] ax.set_ylim(lim) dump(limits, lim_file) ax.legend() pdf.savefig(plt.gcf()) plt.close()