def compute_efficiency(df): """Returns or first trains the BDT efficiency.""" extra_vars = [ gcm().ltime_var ] all_vars = gcm().phsp_vars + extra_vars columns = [v.var for v in all_vars if 'phi' not in v.var] columns += ['cosphi', 'sinphi'] log.info('Getting efficiencies for {}'.format(', '.join(columns))) # Current mode stuff data = df.copy() data['cosphi'] = np.cos(data.phi1) data['sinphi'] = np.sin(data.phi1) failed_lcut = data[gcm().ltime_var.var] < 0.0001725 failed_lcut = data[gcm().ltime_var.var] > 0.003256 limits = {v.var: v.binning[1:] for v in all_vars} limits['cosphi'] = (-1., 1) limits['sinphi'] = (-1., 1) for c in columns: mi, ma = limits[c] data[c] = (data[c] - mi) / (ma - mi) + 2. reweighter = bdt_utils.load_reweighter() weight = reweighter.predict_weights(data[columns]) weight = pd.Series(weight, index=data.index) weight[failed_lcut] = 0. weight[weight > 6.] = 6. return weight/6.
def train_reweighter(): extra_vars = [ gcm().ltime_var ] all_vars = gcm().phsp_vars + extra_vars columns = [v.var for v in all_vars if 'phi' not in v.var] columns += ['cosphi', 'sinphi'] # Current mode stuff data = gcm().get_data([f.var for f in extra_vars]) add_variables.append_phsp(data) data['cosphi'] = np.cos(data.phi1) data['sinphi'] = np.sin(data.phi1) df_sel = final_selection.get_final_selection() df_sel &= selection.delta_mass_signal_region() gen = get_model() gen['cosphi'] = np.cos(gen.phi1) gen['sinphi'] = np.sin(gen.phi1) limits = {v.var: v.binning[1:] for v in all_vars} limits['cosphi'] = (-1., 1) limits['sinphi'] = (-1., 1) for c in columns: mi, ma = limits[c] data[c] = (data[c] - mi) / (ma - mi) + 2. gen[c] = (gen[c] - mi) / (ma - mi) + 2. log.info('Training BDT reweighter for {}'.format(', '.join(columns))) reweighter = GBReweighter(n_estimators=300, max_depth=5, learning_rate=0.2) reweighter.fit(original=gen[columns].sample(n=250000), target=data[columns][df_sel].sample(n=250000)) bdt_utils.dump_reweighter(reweighter)
def create_feature_importance(comb_bkg=False): log.info('Feature importance for {}'.format( 'comb. bkg' if comb_bkg else 'rand. pion bkg.')) classifiers = bdt_utils.load_classifiers(comb_bkg) bdt = classifiers['KnnFlatness'] if comb_bkg: features = [ f.functor.latex(f.particle) for f in gcm().comb_bkg_bdt_vars ] # NOQA bdt_folder = 'bdt_comb_bkg' else: features = [ f.functor.latex(f.particle) for f in gcm().rand_spi_bdt_vars ] # NOQA bdt_folder = 'bdt_rand_spi' log.info('Features: {}'.format(' '.join(features))) paired = sorted(zip(features, bdt.feature_importances_), key=lambda x: -x[1]) row_template = r'{} & {:.0f}\\' fn = gcm().get_output_path(bdt_folder) + 'feature_importance.tex' log.info('Saving to {}'.format(fn)) with open(fn, 'w') as of: print(r'\begin{tabular}{l|r}', file=of) print(r'Feature & Importance [\%] \\', file=of) print(r'\hline ', file=of) for f, i in paired: print(row_template.format(f, i * 100.), file=of) print(r'\end{tabular}', file=of) tex_compile.convert_tex_to_pdf(fn)
def plot_comparison(): extra_vars = [gcm().ltime_var] # Current mode stuff data = gcm().get_data([f.var for f in extra_vars]) add_variables.append_phsp(data) df_sel = final_selection.get_final_selection() df_sel &= selection.mass_signal_region() gen = get_model() outfile = gcm().get_output_path('effs') + 'Gen_DATA_Comp.pdf' with PdfPages(outfile) as pdf: for pc in gcm().phsp_vars + extra_vars: log.info('Plotting {}'.format(pc.var)) filled = gen[pc.var] errorbars = data[pc.var][df_sel] if pc.convert is not None: filled = pc.convert(filled) errorbars = pc.convert(errorbars) ax = comparison.plot_comparison(pc, filled, errorbars, 'Model', 'Data') ax.set_xlabel(pc.xlabel) ax.yaxis.set_visible(False) ax.legend() pdf.savefig(plt.gcf())
def plot_bdt_variables(sw=False, comb_bkg=False): sig_df, bkg_df, sig_wgt, bkg_wgt = bdt_data.get_bdt_data( sw=sw, sklearn=False, comb_data=comb_bkg, plot=True) if comb_bkg: bdt_vars = gcm().comb_bkg_bdt_vars[:] bdt_folder = 'bdt_comb_bkg' else: bdt_vars = gcm().rand_spi_bdt_vars[:] bdt_folder = 'bdt_rand_spi' bdt_vars += gcm().spectator_vars + gcm().just_plot outfile = gcm().get_output_path(bdt_folder) + 'bdt_vars.pdf' with PdfPages(outfile) as pdf: for v in tqdm(bdt_vars, smoothing=0.3): ax = plot_comparison(v, sig_df[v.var], bkg_df[v.var], 'Signal', 'Background', filled_weight=sig_wgt, errorbars_weight=bkg_wgt, normed=False, normed_max=True) ax.set_xlabel(v.xlabel) ax.set_ylabel('Arbitrary units') # ax.yaxis.set_visible(False) plot_utils.y_margin_scaler(ax, lf=0, la=True) ax.legend() pdf.savefig(plt.gcf()) plt.clf() plt.close()
def mass_fiducial_selection(df): ret = True ret &= (df[m(gcm().D0)] >= 1810.) ret &= (df[m(gcm().D0)] < 1920.) ret &= (df[dtf_dm()] >= 140.5) ret &= (df[dtf_dm()] < 160.5) return ret
def d0_selection(df): ret = True if gcm().mode not in config.twotag_modes: ret &= np.log(df[ipchi2(gcm().D0)]) < 1. ret &= df[pt(gcm().D0)] > 4000. ret &= df[vchi2(gcm().D0)] < 4. ret &= df[maxdoca(gcm().D0)] < .2 return ret
def overlap_plotting(): df = gcm().get_data([vars.dtf_dm()]) sel = extended_selection.get_complete_selection(True) sel &= misid_selection.misid_cut() passed = remove_right_sign_candidates() outfile = gcm().get_output_path('selection') + 'RS_candidates.pdf' with PdfPages(outfile) as pdf: nbins = 50 xmin = min(df[sel][vars.dtf_dm()]) xmax = max(df[sel][vars.dtf_dm()]) fig, ax = plt.subplots(figsize=(10, 10)) ax.hist(df[sel & passed][vars.dtf_dm()], bins=nbins, range=(xmin, xmax), color='#006EB6', edgecolor='#006EB6', label='Ghost') ax.set_xlabel(vars.dtf_dm.latex(with_unit=True)) ax.set_xlim((xmin, xmax)) ax.set_ylabel('Arbitrary units') pdf.savefig(fig) plt.clf() fig, ax = plt.subplots(figsize=(10, 10)) ax.hist(df[sel & ~passed][vars.dtf_dm()], bins=nbins, range=(xmin, xmax), color='#006EB6', edgecolor='#006EB6', label='Ghost') ax.set_xlim((xmin, xmax)) ax.set_xlabel(vars.dtf_dm.latex(with_unit=True)) ax.set_ylabel('Arbitrary units') pdf.savefig(fig) plt.clf() fig, ax = plt.subplots(figsize=(10, 10)) ax.hist(df[sel & passed][vars.dtf_dm()], bins=nbins, color='#D3EFFB', range=(xmin, xmax), label='Kept', edgecolor='#D3EFFB') ax.hist(df[sel & ~passed][vars.dtf_dm()], bins=nbins, range=(xmin, xmax), label='Removed', color='#006EB6', edgecolor='#006EB6') ax.set_xlim((xmin, xmax)) ax.set_xlabel(vars.dtf_dm.latex(with_unit=True)) ax.set_ylabel('Candidates') ax.legend() pdf.savefig(fig) plt.clf()
def full_selection(): sel = pid_fiducial_selection() if gcm().mc is None: sel &= pid_selection() sel &= mass_fiducial_selection() sel &= d0_selection() sel &= slow_pion() sel &= dtf_cuts() if gcm().mode not in config.twotag_modes: sel &= d0_lifetime_permille() return sel
def randomly_remove_candidates(): """After applying the full selection, creates selection mask to reject multiple candidates randomly. Multiple candidates are defined as those having the same eventNumber and same D0 transverse momentum""" df = gcm().get_data([vars.evt_num(), vars.pt(gcm().D0)]) fsel = extended_selection.get_complete_selection(True) passed = remove_right_sign_candidates() passed &= remove_clones() selected = df[fsel & passed] # select candidates randomly so shuffle selected = selected.reindex(np.random.permutation(selected.index)) return ~selected.duplicated(['eventNumber', 'D0_PT'])
def prep_data_for_sklearn(**kwargs): if kwargs.get('comb_data', False): features = [f.functor(f.particle) for f in gcm().comb_bkg_bdt_vars] else: features = [f.functor(f.particle) for f in gcm().rand_spi_bdt_vars] spectators = [f.functor(f.particle) for f in gcm().spectator_vars] kwargs.update({'sklearn': True}) data = get_bdt_data(**kwargs) train, test = train_test_split(data, random_state=43) return (train, test, train['labels'].astype(np.bool), test['labels'].astype(np.bool)), features, spectators
def get_efficiency_gen(): """Returns or first trains the BDT efficiency.""" extra_vars = [ gcm().ltime_var ] all_vars = gcm().phsp_vars + extra_vars columns = [v.var for v in all_vars if 'phi' not in v.var] columns += ['cosphi', 'sinphi'] log.info('Getting efficiencies for {}'.format(', '.join(columns))) # Current mode stuff data = get_model() data['cosphi'] = np.cos(data.phi1) data['sinphi'] = np.sin(data.phi1) return compute_efficiency(data)
def fit(): """Runs the mass fit. Either nominal with making pretty plots or in spearmint mode which does not save the workspace and returns a metric.""" # Get the data # TODO: rewrite selection to use gcm itself mode = gcm() sel = selection.get_final_selection() df = mode.get_data([dtf_dm(), m(mode.D0)]) df = df[sel] from . import fit_config from ROOT import RooFit as RF from .fit_setup import setup_workspace wsp, _ = setup_workspace() data = fit_config.pandas_to_roodataset(df, wsp.set('datavars')) model = wsp.pdf('total') plot_fit('_start_values', wsp=wsp) result = model.fitTo(data, RF.NumCPU(4), RF.Save(True), RF.Strategy(2), RF.Extended(True)) if not helpers.check_fit_result(result, log): log.error('Bad fit quality') fit_config.dump_workspace(mode, wsp)
def plot_fit(suffix=None, wsp=None): from . import roofit_to_matplotlib from . import fit_config shapes.load_shape_class('RooCruijff') shapes.load_shape_class('RooJohnsonSU') shapes.load_shape_class('RooBackground') mode = gcm() if wsp is None: wsp = fit_config.load_workspace(mode) sel = selection.get_final_selection() df = mode.get_data([dtf_dm(), m(mode.D0)]) df = df[sel] data = fit_config.pandas_to_roodataset(df, wsp.set('datavars')) fit_config.WS_DMASS_NAME = dtf_dm() fit_config.WS_MASS_NAME = m(mode.D0) outfile = mode.get_output_path('sweight_fit') + 'fits{}.pdf'.format( suffix if suffix is not None else '') with PdfPages(outfile) as pdf: for func in [m, dtf_dm]: roofit_to_matplotlib.plot_fit( mode.D0, wsp, func, data=data, pdf=pdf, do_comb_bkg=mode.mode in config.twotag_modes) roofit_to_matplotlib.plot_fit( mode.D0, wsp, func, data=data, pdf=pdf, do_pulls=False, do_comb_bkg=mode.mode in config.twotag_modes)
def phsp_variables(df): """Returns m12, m34, cos1, cos2, phi1""" mode = gcm() # implementation using pybind11::array requires some special treatment # here, otherwise the passed arrays are of non-matching type. if not is_dummy_run(df): vals = vec_phsp_variables( df[vars.dtf_pt(mode.Pi_OS1)], df[vars.dtf_eta(mode.Pi_OS1)], df[vars.dtf_phi(mode.Pi_OS1)], config.PDG_MASSES['Pi'], df[vars.dtf_pt(mode.Pi_SS)], df[vars.dtf_eta(mode.Pi_SS)], df[vars.dtf_phi(mode.Pi_SS)], config.PDG_MASSES['Pi'], df[vars.dtf_pt(mode.K)], df[vars.dtf_eta(mode.K)], df[vars.dtf_phi(mode.K)], config.PDG_MASSES['K'], df[vars.dtf_pt(mode.Pi_OS2)], df[vars.dtf_eta(mode.Pi_OS2)], df[vars.dtf_phi(mode.Pi_OS2)], config.PDG_MASSES['Pi']) return pd.DataFrame( { 'm12': vals[0], 'm34': vals[1], 'cos1': vals[2], 'cos2': vals[3], 'phi1': vals[4] }, index=df.index) else: vals = (df[vars.dtf_pt(mode.K)], df[vars.dtf_eta(mode.K)], df[vars.dtf_phi(mode.K)], config.PDG_MASSES['K'], df[vars.dtf_pt(mode.Pi_OS1)], df[vars.dtf_eta(mode.Pi_OS1)], df[vars.dtf_phi(mode.Pi_OS1)], config.PDG_MASSES['Pi'], df[vars.dtf_pt(mode.Pi_SS)], df[vars.dtf_eta(mode.Pi_SS)], df[vars.dtf_phi(mode.Pi_SS)], config.PDG_MASSES['Pi'], df[vars.dtf_pt(mode.Pi_OS2)], df[vars.dtf_eta(mode.Pi_OS2)], df[vars.dtf_phi(mode.Pi_OS2)], config.PDG_MASSES['Pi']) return 1.
def sig_sec_comb_stack(v, df): sweights = get_sweights(gcm()) sig_wgt = sweights['sig'] rpi_wgt = sweights['rnd'] comb_wgt = sweights['comb'] fig, ax = plt.subplots(figsize=(10, 10)) if v.convert is None: data = df[v.var] else: data = v.convert(df[v.var]) nbins, xmin, xmax = v.binning h_sig, edges = np.histogram(data, bins=nbins, range=(xmin, xmax), weights=sig_wgt) h_rpi, _ = np.histogram(data, bins=nbins, range=(xmin, xmax), weights=rpi_wgt) h_comb, _ = np.histogram(data, bins=nbins, range=(xmin, xmax), weights=comb_wgt) x_ctr = (edges[1:] + edges[:-1]) / 2. width = (edges[1:] - edges[:-1]) x_err = width / 2. colours = palettable.tableau.TableauMedium_10.hex_colors[:3] csig, crpi, ccomb = colours ax.bar(x_ctr - x_err, h_comb, width, color=ccomb, label='Combinatorial', edgecolor=ccomb) ax.bar(x_ctr - x_err, h_rpi, width, color=crpi, bottom=h_comb, label='Random $\pi_s$', edgecolor=crpi) ax.bar(x_ctr - x_err, h_sig, width, color=csig, bottom=h_comb + h_rpi, label='Signal', edgecolor=csig) handles, labels = ax.get_legend_handles_labels() ax.legend(handles[::-1], labels[::-1], loc='best') ax.set_xlabel(v.xlabel) ax.set_xlim((xmin, xmax)) ax.yaxis.set_visible(False) return fig
def rand_spi_sideband_region(df): """Selects the signal D0 peak and delta mass sidebands to get a random slow pion enriched sample""" ret = True ret &= np.abs(df[m(gcm().D0)] - config.PDG_MASSES['D0']) < 18. ret &= np.abs(df[dtf_dm()] - config.PDG_MASSES['delta']) > 2.3 return ret
def dump_classifiers(classifiers, comb_bkg=False): if comb_bkg: bdt_folder = 'bdt_comb_bkg' else: bdt_folder = 'bdt_rand_spi' outfile = gcm().get_output_path(bdt_folder) + 'classifiers.p' helpers.dump(classifiers, outfile)
def double_misid_d0(df): """Returns d0 mass with changed kaon and ss pion mass hypthesis""" mode = gcm() val = double_misid_d0_mass( df[vars.dtf_pt(mode.K)], df[vars.dtf_eta(mode.K)], df[vars.dtf_phi(mode.K)], config.PDG_MASSES['Pi'], df[vars.dtf_pt(mode.Pi_SS)], df[vars.dtf_eta(mode.Pi_SS)], df[vars.dtf_phi(mode.Pi_SS)], config.PDG_MASSES['K'], df[vars.dtf_pt(mode.Pi_OS1)], df[vars.dtf_eta(mode.Pi_OS1)], df[vars.dtf_phi(mode.Pi_OS1)], config.PDG_MASSES['Pi'], df[vars.dtf_pt(mode.Pi_OS2)], df[vars.dtf_eta(mode.Pi_OS2)], df[vars.dtf_phi(mode.Pi_OS2)], config.PDG_MASSES['Pi']) if not is_dummy_run(df): return pd.Series(val, name=vars.m(gcm().D0), index=df.index) return 1
def mass_signal_region(df): """Selects the signal peak in both D0 and delta mass to create a signal enriched sample.""" ret = True ret &= np.abs(df[m(gcm().D0)] - config.PDG_MASSES['D0']) < 18. ret &= np.abs(df[dtf_dm()] - config.PDG_MASSES['delta']) < 0.5 return ret
def _ltime_ratio(df): mode = gcm() ret = df[vars.ltime(mode.D0)] / config.Dz_ltime if is_dummy_run(df): return 1 return pd.Series(ret, name='ltime_ratio', index=df.index)
def setup_pdf(wsp): # Only call this function once on a workspace if wsp.var('set_up_done'): return mode = modes.gcm() # ROOT.RooMsgService.instance().setGlobalKillBelow(RF.WARNING) # ROOT.RooMsgService.instance().setSilentMode(True) SIG_M, SIG_DM, BKG_DM = mode.shapes variables = [] SIG_M = shapes.d0_shapes[SIG_M] SIG_DM = shapes.dst_d0_shapes[SIG_DM] BKG_DM = shapes.dst_d0_shapes[BKG_DM] # Variables for the signal pdf sig_m, vs = SIG_M('', wsp, mode) variables += [vs] if mode.mode in config.twotag_modes: bkg_m, vs = shapes.d0_bkg('', wsp, mode) variables += [vs] # delta random slow slow_pi_dm, vs = BKG_DM('sp', wsp) variables += [vs] if mode.mode in config.twotag_modes: bkg_dm, vs = BKG_DM('bkg', wsp) variables += [vs] sig_dm, vs = SIG_DM('', wsp, mode) variables += [vs] # Signal 2D pdf wsp.factory("PROD::signal({}, {})".format(sig_m, sig_dm)) wsp.factory("PROD::random({}, {})".format(sig_m, slow_pi_dm)) if mode.mode in config.twotag_modes: wsp.factory("PROD::combinatorial({}, {})".format(bkg_m, bkg_dm)) wsp.factory(mode.get_rf_vars('NSig')) wsp.factory(mode.get_rf_vars('NSPi')) if mode.mode in config.twotag_modes: wsp.factory(mode.get_rf_vars('NBkg')) # wsp.var('NBkg').setConstant() # wsp.var('a_dm_bkg').setConstant() variables += [[ ('NSig', r'$N_{\text{Sig}}$'), ('NSPi', r'$N_{\text{Rnd}}$'), ('NBkg', r'$N_{\text{Cmb}}$'), ]] # Final model if mode.mode in config.twotag_modes: wsp.factory("SUM::total(NSig*signal,NSPi*random,NBkg*combinatorial)") else: wsp.factory("SUM::total(NSig*signal,NSPi*random)") wsp.factory('set_up_done[1]') return variables
def pid_fiducial_selection(df): ret = True for part in gcm().D0.all_daughters(): ret &= (df[p(part)] >= 3000.) ret &= (df[p(part)] < 100000.) ret &= (df[eta(part)] >= 2.) ret &= (df[eta(part)] < 5.) return ret
def _apply_pid_cut(df, min_pi_nnpi=0.3, max_pi_nnk=0.7, min_k_nnk=0.3, max_k_nnpi=0.7, max_k_nnmu=0.2, max_pi_nnmu=0.2): ret = True for kaon in gcm().head.all_pid(config.kaon): ret &= (df[probnnk(kaon)] > min_k_nnk) & ( df[probnnpi(kaon)] < max_k_nnpi) # NOQA ret &= (df[probnnmu(kaon)] < max_k_nnmu) for pion in gcm().head.all_pid(config.pion): ret &= (df[probnnpi(pion)] > min_pi_nnpi) & ( df[probnnk(pion)] < max_pi_nnk) # NOQA ret &= (df[probnnmu(pion)] < max_pi_nnmu) return ret
def lifetime_study(correct_efficiencies=False): # Current mode stuff data = gcm().get_data([gcm().ltime_var.var]) add_variables.append_phsp(data) df_sel = final_selection.get_final_selection() df_sel &= selection.delta_mass_signal_region() data['weight'] = 1. if correct_efficiencies: outfile = gcm().get_output_path('effs') + 'DATA_ltime_dep_effs.pdf' else: outfile = gcm().get_output_path('effs') + 'DATA_ltime_dep.pdf' percentiles = np.arange(0, 1.1, 0.2) boundaries = helpers.weighted_quantile( data[gcm().ltime_var.var][df_sel], percentiles) if correct_efficiencies: data['weight'] = 1./get_efficiency() boundaries = boundaries[1:] with PdfPages(outfile) as pdf: for var in gcm().phsp_vars: fig, ax = plt.subplots(figsize=(10, 10)) for low, high in zip(boundaries[:-1], boundaries[1:]): sel = (data[gcm().ltime_var.var] > low) & (data[gcm().ltime_var.var] < high) # NOQA df = data[var.var][df_sel & sel] weight = data['weight'][df_sel & sel] rlow, prec = helpers.rounder(low*1000, [low*1000, high*1000]) rhigh, _ = helpers.rounder(high*1000, [low*1000, high*1000]) spec = '{{:.{}f}}'.format(prec) label = r'${} < \tau \mathrm{{ [ps]}} < {}$'.format( spec.format(rlow), spec.format(rhigh)) values, edges = np.histogram(df, bins=int(var.binning[0]/5.), range=var.binning[1:], weights=weight) # NOQA err, edges = np.histogram(df, bins=int(var.binning[0]/5.), range=var.binning[1:], weights=weight**2) # NOQA norm = np.sum(values) values = values/norm err = np.sqrt(err)/norm x_ctr = (edges[1:] + edges[:-1])/2. width = (edges[1:] - edges[:-1]) x_err = width/2. options = dict( fmt='o', markersize=5, capthick=1, capsize=0, elinewidth=2, alpha=1) ax.errorbar(x_ctr, values, err, x_err, label=label, **options) ax.set_xlabel(var.xlabel) ax.yaxis.set_visible(False) ax.legend() pdf.savefig(plt.gcf()) plt.close()
def get_sweights(do_comb_bkg=False): helpers.allow_root() df = gcm().get_data([m(gcm().D0), dtf_dm()]) from . import fit_config from hep_ml import splot shapes.load_shape_class('RooCruijff') shapes.load_shape_class('RooJohnsonSU') shapes.load_shape_class('RooBackground') wsp = fit_config.load_workspace(gcm()) sel = selection.get_final_selection() do_comb_bkg = gcm().mode in config.twotag_modes df = df[sel] sig_pdf = wsp.pdf('signal') rnd_pdf = wsp.pdf('random') comb_pdf = wsp.pdf('combinatorial') sig_prob = call_after_set(sig_pdf, wsp, **df) rnd_prob = call_after_set(rnd_pdf, wsp, **df) if do_comb_bkg: comb_prob = call_after_set(comb_pdf, wsp, **df) if do_comb_bkg: probs = pd.DataFrame(dict(sig=sig_prob*wsp.var('NSig').getVal(), rnd=rnd_prob*wsp.var('NSPi').getVal(), comb=comb_prob*wsp.var('NBkg').getVal()), index=df.index) else: probs = pd.DataFrame(dict(sig=sig_prob*wsp.var('NSig').getVal(), rnd=rnd_prob*wsp.var('NSPi').getVal()), index=df.index) probs = probs.div(probs.sum(axis=1), axis=0) sweights = splot.compute_sweights(probs) sweights.index = probs.index if not do_comb_bkg: sweights['comb'] = 0.0 return sweights
def load_reweighter(): mode = gcm() # Hard coded check here: Use the RS mode if WS is supplied. Also get a new # mode object to remove possible MC flags. # Just recreate the mode here to get rid of potential MC flags mode = get_mode(mode.polarity, mode.year, mode.mode_short) if mode.mode == config.D0ToKpipipi_WS: mode = get_mode(mode.polarity, mode.year, 'RS') if mode.mode == config.D0ToKpipipi_2tag_WS: mode = get_mode(mode.polarity, mode.year, '2tag_RS') outfile = mode.get_output_path('effs') + 'reweighter.p' return helpers.load(outfile)
def remove_right_sign_candidates(): """Remove wrong sign D0 candidates which are combined and end up in the signal window in the right sign sample""" # Get the necessary information from the current mode year = gcm().year polarity = gcm().polarity polarity = gcm().polarity if gcm().mode not in config.twotag_modes: rs, ws = 'RS', 'WS' else: rs, ws = '2tag_RS', '2tag_WS' with MODE(polarity, year, rs): RS = gcm().get_data( [vars.evt_num(), vars.run_num(), vars.dtf_dm(), vars.pt(gcm().D0)]) rs_sel = extended_selection.get_complete_selection(True) # RS modes should not be selected using this: if gcm().mode not in config.wrong_sign_modes: return pd.Series(True, RS.index) with MODE(polarity, year, ws): WS = gcm().get_data([ vars.evt_num(), vars.dtf_dm(), vars.pt(gcm().D0), vars.dtf_chi2(gcm().head) ]) OL = RS[rs_sel].merge(WS, on=['eventNumber'], left_index=True, suffixes=['_RS', '_WS']) dm_ref = config.PDG_MASSES['delta'] OLS = OL.query('(abs(delta_m_dtf_RS-{})<1.) &' '(abs(D0_PT_RS-D0_PT_WS)<1.)'.format(dm_ref)) return pd.Series(~WS.index.isin(OLS.index), index=WS.index)
def plot_efficiencies(sw=False, comb_bkg=False): """Plots the efficiencies for all spectator variables. Signal contribution only.""" if comb_bkg: bdt_folder = 'bdt_comb_bkg' else: bdt_folder = 'bdt_rand_spi' classifiers = bdt_utils.load_classifiers(comb_bkg=comb_bkg) log.info('Plotting efficiencies for {} {} {}'.format( gcm().mode, gcm().polarity, gcm().year)) (train, test, train_lbl, test_lbl), features, spectators = bdt_data.prep_data_for_sklearn( sw=sw, comb_data=comb_bkg) # NOQA outfile = gcm().get_output_path(bdt_folder) + 'effs.pdf' with PdfPages(outfile) as pdf: for var in gcm().spectator_vars: for bdt_name in ['Exponential', 'KnnFlatness', 'BinFlatness']: add_separation_page( pdf, '{}: {}'.format(bdt_name, var.functor.latex(var.particle))) fig = bdt.plot_eff(var, test[features + spectators], classifiers[bdt_name], test_lbl, test.weights, features=features) pdf.savefig(fig) plt.clf() fig = bdt.plot_eff(var, test[features + spectators], classifiers[bdt_name], ~test_lbl, test.weights, features=features) pdf.savefig(fig) plt.clf()
def get_named_bdt_discriminant(df, name='KnnFlatness', comb_bkg=False): # Trigger the loading of the needed objects if selective_load.is_dummy_run(df): [ df[f.functor(f.particle)] for f in gcm().bdt_vars if f.functor != vars.angle ] # NOQA return 1 log.info('Getting discriminant {} for {}'.format( name, 'comb. bkg' if comb_bkg else 'rand. pion bkg.')) if comb_bkg: features = [f.functor(f.particle) for f in gcm().comb_bkg_bdt_vars] bdt_vars = gcm().comb_bkg_bdt_vars else: features = [f.functor(f.particle) for f in gcm().rand_spi_bdt_vars] bdt_vars = gcm().rand_spi_bdt_vars log.info('Features: {}'.format(' '.join(features))) if vars.angle() in features: log.info('Adding angle.') add_variables.append_angle(df) for f in bdt_vars: if f.convert is not None: log.info('Converting {}'.format(f.var)) df[f.var] = f.convert(df[f.var]) df = df[features] classifiers = bdt_utils.load_classifiers(comb_bkg=comb_bkg) assert False not in (features == df.columns), 'Mismatching feature order' bdt = classifiers[name] probs = bdt.predict_proba(df).transpose()[1] log.info('Returning probability.') return pd.Series(probs, index=df.index)