示例#1
0
def compute_efficiency(df):
    """Returns or first trains the BDT efficiency."""
    extra_vars = [
        gcm().ltime_var
    ]
    all_vars = gcm().phsp_vars + extra_vars
    columns = [v.var for v in all_vars if 'phi' not in v.var]
    columns += ['cosphi', 'sinphi']
    log.info('Getting efficiencies for {}'.format(', '.join(columns)))

    # Current mode stuff
    data = df.copy()
    data['cosphi'] = np.cos(data.phi1)
    data['sinphi'] = np.sin(data.phi1)
    failed_lcut = data[gcm().ltime_var.var] < 0.0001725
    failed_lcut = data[gcm().ltime_var.var] > 0.003256
    limits = {v.var: v.binning[1:] for v in all_vars}
    limits['cosphi'] = (-1., 1)
    limits['sinphi'] = (-1., 1)
    for c in columns:
        mi, ma = limits[c]
        data[c] = (data[c] - mi) / (ma - mi) + 2.

    reweighter = bdt_utils.load_reweighter()
    weight = reweighter.predict_weights(data[columns])
    weight = pd.Series(weight, index=data.index)
    weight[failed_lcut] = 0.
    weight[weight > 6.] = 6.
    return weight/6.
示例#2
0
def train_reweighter():
    extra_vars = [
        gcm().ltime_var
    ]
    all_vars = gcm().phsp_vars + extra_vars
    columns = [v.var for v in all_vars if 'phi' not in v.var]
    columns += ['cosphi', 'sinphi']

    # Current mode stuff
    data = gcm().get_data([f.var for f in extra_vars])
    add_variables.append_phsp(data)
    data['cosphi'] = np.cos(data.phi1)
    data['sinphi'] = np.sin(data.phi1)
    df_sel = final_selection.get_final_selection()
    df_sel &= selection.delta_mass_signal_region()

    gen = get_model()
    gen['cosphi'] = np.cos(gen.phi1)
    gen['sinphi'] = np.sin(gen.phi1)

    limits = {v.var: v.binning[1:] for v in all_vars}
    limits['cosphi'] = (-1., 1)
    limits['sinphi'] = (-1., 1)
    for c in columns:
        mi, ma = limits[c]
        data[c] = (data[c] - mi) / (ma - mi) + 2.
        gen[c] = (gen[c] - mi) / (ma - mi) + 2.

    log.info('Training BDT reweighter for {}'.format(', '.join(columns)))
    reweighter = GBReweighter(n_estimators=300, max_depth=5, learning_rate=0.2)

    reweighter.fit(original=gen[columns].sample(n=250000),
                   target=data[columns][df_sel].sample(n=250000))
    bdt_utils.dump_reweighter(reweighter)
示例#3
0
def create_feature_importance(comb_bkg=False):
    log.info('Feature importance for {}'.format(
        'comb. bkg' if comb_bkg else 'rand. pion bkg.'))
    classifiers = bdt_utils.load_classifiers(comb_bkg)
    bdt = classifiers['KnnFlatness']
    if comb_bkg:
        features = [
            f.functor.latex(f.particle) for f in gcm().comb_bkg_bdt_vars
        ]  # NOQA
        bdt_folder = 'bdt_comb_bkg'
    else:
        features = [
            f.functor.latex(f.particle) for f in gcm().rand_spi_bdt_vars
        ]  # NOQA
        bdt_folder = 'bdt_rand_spi'

    log.info('Features: {}'.format(' '.join(features)))

    paired = sorted(zip(features, bdt.feature_importances_),
                    key=lambda x: -x[1])
    row_template = r'{} & {:.0f}\\'

    fn = gcm().get_output_path(bdt_folder) + 'feature_importance.tex'
    log.info('Saving to {}'.format(fn))
    with open(fn, 'w') as of:
        print(r'\begin{tabular}{l|r}', file=of)
        print(r'Feature & Importance [\%] \\', file=of)
        print(r'\hline ', file=of)
        for f, i in paired:
            print(row_template.format(f, i * 100.), file=of)
        print(r'\end{tabular}', file=of)
    tex_compile.convert_tex_to_pdf(fn)
示例#4
0
def plot_comparison():

    extra_vars = [gcm().ltime_var]

    # Current mode stuff
    data = gcm().get_data([f.var for f in extra_vars])
    add_variables.append_phsp(data)
    df_sel = final_selection.get_final_selection()
    df_sel &= selection.mass_signal_region()

    gen = get_model()

    outfile = gcm().get_output_path('effs') + 'Gen_DATA_Comp.pdf'
    with PdfPages(outfile) as pdf:
        for pc in gcm().phsp_vars + extra_vars:
            log.info('Plotting {}'.format(pc.var))
            filled = gen[pc.var]
            errorbars = data[pc.var][df_sel]
            if pc.convert is not None:
                filled = pc.convert(filled)
                errorbars = pc.convert(errorbars)
            ax = comparison.plot_comparison(pc, filled, errorbars, 'Model',
                                            'Data')
            ax.set_xlabel(pc.xlabel)
            ax.yaxis.set_visible(False)
            ax.legend()
            pdf.savefig(plt.gcf())
示例#5
0
def plot_bdt_variables(sw=False, comb_bkg=False):
    sig_df, bkg_df, sig_wgt, bkg_wgt = bdt_data.get_bdt_data(
        sw=sw, sklearn=False, comb_data=comb_bkg, plot=True)
    if comb_bkg:
        bdt_vars = gcm().comb_bkg_bdt_vars[:]
        bdt_folder = 'bdt_comb_bkg'
    else:
        bdt_vars = gcm().rand_spi_bdt_vars[:]
        bdt_folder = 'bdt_rand_spi'
    bdt_vars += gcm().spectator_vars + gcm().just_plot

    outfile = gcm().get_output_path(bdt_folder) + 'bdt_vars.pdf'
    with PdfPages(outfile) as pdf:
        for v in tqdm(bdt_vars, smoothing=0.3):
            ax = plot_comparison(v,
                                 sig_df[v.var],
                                 bkg_df[v.var],
                                 'Signal',
                                 'Background',
                                 filled_weight=sig_wgt,
                                 errorbars_weight=bkg_wgt,
                                 normed=False,
                                 normed_max=True)
            ax.set_xlabel(v.xlabel)
            ax.set_ylabel('Arbitrary units')
            # ax.yaxis.set_visible(False)
            plot_utils.y_margin_scaler(ax, lf=0, la=True)
            ax.legend()
            pdf.savefig(plt.gcf())
            plt.clf()
            plt.close()
示例#6
0
def mass_fiducial_selection(df):
    ret = True
    ret &= (df[m(gcm().D0)] >= 1810.)
    ret &= (df[m(gcm().D0)] < 1920.)
    ret &= (df[dtf_dm()] >= 140.5)
    ret &= (df[dtf_dm()] < 160.5)

    return ret
示例#7
0
def d0_selection(df):
    ret = True

    if gcm().mode not in config.twotag_modes:
        ret &= np.log(df[ipchi2(gcm().D0)]) < 1.
        ret &= df[pt(gcm().D0)] > 4000.
    ret &= df[vchi2(gcm().D0)] < 4.
    ret &= df[maxdoca(gcm().D0)] < .2
    return ret
示例#8
0
def overlap_plotting():
    df = gcm().get_data([vars.dtf_dm()])
    sel = extended_selection.get_complete_selection(True)
    sel &= misid_selection.misid_cut()
    passed = remove_right_sign_candidates()

    outfile = gcm().get_output_path('selection') + 'RS_candidates.pdf'
    with PdfPages(outfile) as pdf:
        nbins = 50
        xmin = min(df[sel][vars.dtf_dm()])
        xmax = max(df[sel][vars.dtf_dm()])

        fig, ax = plt.subplots(figsize=(10, 10))
        ax.hist(df[sel & passed][vars.dtf_dm()],
                bins=nbins,
                range=(xmin, xmax),
                color='#006EB6',
                edgecolor='#006EB6',
                label='Ghost')
        ax.set_xlabel(vars.dtf_dm.latex(with_unit=True))
        ax.set_xlim((xmin, xmax))
        ax.set_ylabel('Arbitrary units')
        pdf.savefig(fig)
        plt.clf()

        fig, ax = plt.subplots(figsize=(10, 10))
        ax.hist(df[sel & ~passed][vars.dtf_dm()],
                bins=nbins,
                range=(xmin, xmax),
                color='#006EB6',
                edgecolor='#006EB6',
                label='Ghost')
        ax.set_xlim((xmin, xmax))
        ax.set_xlabel(vars.dtf_dm.latex(with_unit=True))
        ax.set_ylabel('Arbitrary units')
        pdf.savefig(fig)
        plt.clf()

        fig, ax = plt.subplots(figsize=(10, 10))
        ax.hist(df[sel & passed][vars.dtf_dm()],
                bins=nbins,
                color='#D3EFFB',
                range=(xmin, xmax),
                label='Kept',
                edgecolor='#D3EFFB')
        ax.hist(df[sel & ~passed][vars.dtf_dm()],
                bins=nbins,
                range=(xmin, xmax),
                label='Removed',
                color='#006EB6',
                edgecolor='#006EB6')
        ax.set_xlim((xmin, xmax))
        ax.set_xlabel(vars.dtf_dm.latex(with_unit=True))
        ax.set_ylabel('Candidates')
        ax.legend()
        pdf.savefig(fig)
        plt.clf()
示例#9
0
def full_selection():
    sel = pid_fiducial_selection()
    if gcm().mc is None:
        sel &= pid_selection()
    sel &= mass_fiducial_selection()
    sel &= d0_selection()
    sel &= slow_pion()
    sel &= dtf_cuts()
    if gcm().mode not in config.twotag_modes:
        sel &= d0_lifetime_permille()
    return sel
示例#10
0
def randomly_remove_candidates():
    """After applying the full selection, creates selection mask to reject
    multiple candidates randomly. Multiple candidates are defined as those
    having the same eventNumber and same D0 transverse momentum"""

    df = gcm().get_data([vars.evt_num(), vars.pt(gcm().D0)])
    fsel = extended_selection.get_complete_selection(True)
    passed = remove_right_sign_candidates()
    passed &= remove_clones()
    selected = df[fsel & passed]
    # select candidates randomly so shuffle
    selected = selected.reindex(np.random.permutation(selected.index))
    return ~selected.duplicated(['eventNumber', 'D0_PT'])
示例#11
0
def prep_data_for_sklearn(**kwargs):
    if kwargs.get('comb_data', False):
        features = [f.functor(f.particle) for f in gcm().comb_bkg_bdt_vars]
    else:
        features = [f.functor(f.particle) for f in gcm().rand_spi_bdt_vars]
    spectators = [f.functor(f.particle) for f in gcm().spectator_vars]

    kwargs.update({'sklearn': True})
    data = get_bdt_data(**kwargs)

    train, test = train_test_split(data, random_state=43)
    return (train, test, train['labels'].astype(np.bool),
            test['labels'].astype(np.bool)), features, spectators
示例#12
0
def get_efficiency_gen():
    """Returns or first trains the BDT efficiency."""
    extra_vars = [
        gcm().ltime_var
    ]
    all_vars = gcm().phsp_vars + extra_vars
    columns = [v.var for v in all_vars if 'phi' not in v.var]
    columns += ['cosphi', 'sinphi']
    log.info('Getting efficiencies for {}'.format(', '.join(columns)))

    # Current mode stuff
    data = get_model()
    data['cosphi'] = np.cos(data.phi1)
    data['sinphi'] = np.sin(data.phi1)
    return compute_efficiency(data)
示例#13
0
def fit():
    """Runs the mass fit. Either nominal with making pretty plots or
    in spearmint mode which does not save the workspace and returns a
    metric."""
    # Get the data
    # TODO: rewrite selection to use gcm itself
    mode = gcm()
    sel = selection.get_final_selection()

    df = mode.get_data([dtf_dm(), m(mode.D0)])
    df = df[sel]

    from . import fit_config
    from ROOT import RooFit as RF
    from .fit_setup import setup_workspace

    wsp, _ = setup_workspace()
    data = fit_config.pandas_to_roodataset(df, wsp.set('datavars'))
    model = wsp.pdf('total')

    plot_fit('_start_values', wsp=wsp)
    result = model.fitTo(data, RF.NumCPU(4), RF.Save(True), RF.Strategy(2),
                         RF.Extended(True))

    if not helpers.check_fit_result(result, log):
        log.error('Bad fit quality')
    fit_config.dump_workspace(mode, wsp)
示例#14
0
def plot_fit(suffix=None, wsp=None):
    from . import roofit_to_matplotlib
    from . import fit_config
    shapes.load_shape_class('RooCruijff')
    shapes.load_shape_class('RooJohnsonSU')
    shapes.load_shape_class('RooBackground')
    mode = gcm()
    if wsp is None:
        wsp = fit_config.load_workspace(mode)
    sel = selection.get_final_selection()

    df = mode.get_data([dtf_dm(), m(mode.D0)])
    df = df[sel]
    data = fit_config.pandas_to_roodataset(df, wsp.set('datavars'))
    fit_config.WS_DMASS_NAME = dtf_dm()
    fit_config.WS_MASS_NAME = m(mode.D0)

    outfile = mode.get_output_path('sweight_fit') + 'fits{}.pdf'.format(
        suffix if suffix is not None else '')
    with PdfPages(outfile) as pdf:
        for func in [m, dtf_dm]:
            roofit_to_matplotlib.plot_fit(
                mode.D0, wsp, func, data=data, pdf=pdf,
                do_comb_bkg=mode.mode in config.twotag_modes)
            roofit_to_matplotlib.plot_fit(
                mode.D0, wsp, func, data=data, pdf=pdf, do_pulls=False,
                do_comb_bkg=mode.mode in config.twotag_modes)
示例#15
0
def phsp_variables(df):
    """Returns m12, m34, cos1, cos2, phi1"""
    mode = gcm()

    # implementation using pybind11::array requires some special treatment
    # here, otherwise the passed arrays are of non-matching type.
    if not is_dummy_run(df):
        vals = vec_phsp_variables(
            df[vars.dtf_pt(mode.Pi_OS1)], df[vars.dtf_eta(mode.Pi_OS1)],
            df[vars.dtf_phi(mode.Pi_OS1)], config.PDG_MASSES['Pi'],
            df[vars.dtf_pt(mode.Pi_SS)], df[vars.dtf_eta(mode.Pi_SS)],
            df[vars.dtf_phi(mode.Pi_SS)], config.PDG_MASSES['Pi'],
            df[vars.dtf_pt(mode.K)], df[vars.dtf_eta(mode.K)],
            df[vars.dtf_phi(mode.K)], config.PDG_MASSES['K'],
            df[vars.dtf_pt(mode.Pi_OS2)], df[vars.dtf_eta(mode.Pi_OS2)],
            df[vars.dtf_phi(mode.Pi_OS2)], config.PDG_MASSES['Pi'])
        return pd.DataFrame(
            {
                'm12': vals[0],
                'm34': vals[1],
                'cos1': vals[2],
                'cos2': vals[3],
                'phi1': vals[4]
            },
            index=df.index)
    else:
        vals = (df[vars.dtf_pt(mode.K)], df[vars.dtf_eta(mode.K)],
                df[vars.dtf_phi(mode.K)], config.PDG_MASSES['K'],
                df[vars.dtf_pt(mode.Pi_OS1)], df[vars.dtf_eta(mode.Pi_OS1)],
                df[vars.dtf_phi(mode.Pi_OS1)], config.PDG_MASSES['Pi'],
                df[vars.dtf_pt(mode.Pi_SS)], df[vars.dtf_eta(mode.Pi_SS)],
                df[vars.dtf_phi(mode.Pi_SS)], config.PDG_MASSES['Pi'],
                df[vars.dtf_pt(mode.Pi_OS2)], df[vars.dtf_eta(mode.Pi_OS2)],
                df[vars.dtf_phi(mode.Pi_OS2)], config.PDG_MASSES['Pi'])
        return 1.
示例#16
0
def sig_sec_comb_stack(v, df):
    sweights = get_sweights(gcm())
    sig_wgt = sweights['sig']
    rpi_wgt = sweights['rnd']
    comb_wgt = sweights['comb']
    fig, ax = plt.subplots(figsize=(10, 10))
    if v.convert is None:
        data = df[v.var]
    else:
        data = v.convert(df[v.var])

    nbins, xmin, xmax = v.binning

    h_sig, edges = np.histogram(data,
                                bins=nbins,
                                range=(xmin, xmax),
                                weights=sig_wgt)
    h_rpi, _ = np.histogram(data,
                            bins=nbins,
                            range=(xmin, xmax),
                            weights=rpi_wgt)
    h_comb, _ = np.histogram(data,
                             bins=nbins,
                             range=(xmin, xmax),
                             weights=comb_wgt)
    x_ctr = (edges[1:] + edges[:-1]) / 2.
    width = (edges[1:] - edges[:-1])
    x_err = width / 2.

    colours = palettable.tableau.TableauMedium_10.hex_colors[:3]
    csig, crpi, ccomb = colours

    ax.bar(x_ctr - x_err,
           h_comb,
           width,
           color=ccomb,
           label='Combinatorial',
           edgecolor=ccomb)
    ax.bar(x_ctr - x_err,
           h_rpi,
           width,
           color=crpi,
           bottom=h_comb,
           label='Random $\pi_s$',
           edgecolor=crpi)
    ax.bar(x_ctr - x_err,
           h_sig,
           width,
           color=csig,
           bottom=h_comb + h_rpi,
           label='Signal',
           edgecolor=csig)

    handles, labels = ax.get_legend_handles_labels()
    ax.legend(handles[::-1], labels[::-1], loc='best')
    ax.set_xlabel(v.xlabel)
    ax.set_xlim((xmin, xmax))
    ax.yaxis.set_visible(False)

    return fig
示例#17
0
def rand_spi_sideband_region(df):
    """Selects the signal D0 peak and delta mass sidebands to get a random
    slow pion enriched sample"""
    ret = True
    ret &= np.abs(df[m(gcm().D0)] - config.PDG_MASSES['D0']) < 18.
    ret &= np.abs(df[dtf_dm()] - config.PDG_MASSES['delta']) > 2.3
    return ret
示例#18
0
def dump_classifiers(classifiers, comb_bkg=False):
    if comb_bkg:
        bdt_folder = 'bdt_comb_bkg'
    else:
        bdt_folder = 'bdt_rand_spi'
    outfile = gcm().get_output_path(bdt_folder) + 'classifiers.p'
    helpers.dump(classifiers, outfile)
示例#19
0
def double_misid_d0(df):
    """Returns d0 mass with changed kaon and ss pion mass hypthesis"""
    mode = gcm()

    val = double_misid_d0_mass(
        df[vars.dtf_pt(mode.K)], df[vars.dtf_eta(mode.K)],
        df[vars.dtf_phi(mode.K)], config.PDG_MASSES['Pi'],
        df[vars.dtf_pt(mode.Pi_SS)], df[vars.dtf_eta(mode.Pi_SS)],
        df[vars.dtf_phi(mode.Pi_SS)], config.PDG_MASSES['K'],
        df[vars.dtf_pt(mode.Pi_OS1)], df[vars.dtf_eta(mode.Pi_OS1)],
        df[vars.dtf_phi(mode.Pi_OS1)], config.PDG_MASSES['Pi'],
        df[vars.dtf_pt(mode.Pi_OS2)], df[vars.dtf_eta(mode.Pi_OS2)],
        df[vars.dtf_phi(mode.Pi_OS2)], config.PDG_MASSES['Pi'])
    if not is_dummy_run(df):
        return pd.Series(val, name=vars.m(gcm().D0), index=df.index)
    return 1
示例#20
0
def mass_signal_region(df):
    """Selects the signal peak in both D0 and delta mass to create a signal
    enriched sample."""
    ret = True
    ret &= np.abs(df[m(gcm().D0)] - config.PDG_MASSES['D0']) < 18.
    ret &= np.abs(df[dtf_dm()] - config.PDG_MASSES['delta']) < 0.5
    return ret
示例#21
0
def _ltime_ratio(df):

    mode = gcm()
    ret = df[vars.ltime(mode.D0)] / config.Dz_ltime
    if is_dummy_run(df):
        return 1
    return pd.Series(ret, name='ltime_ratio', index=df.index)
示例#22
0
def setup_pdf(wsp):
    # Only call this function once on a workspace
    if wsp.var('set_up_done'):
        return
    mode = modes.gcm()
    # ROOT.RooMsgService.instance().setGlobalKillBelow(RF.WARNING)
    # ROOT.RooMsgService.instance().setSilentMode(True)
    SIG_M, SIG_DM, BKG_DM = mode.shapes

    variables = []

    SIG_M = shapes.d0_shapes[SIG_M]
    SIG_DM = shapes.dst_d0_shapes[SIG_DM]
    BKG_DM = shapes.dst_d0_shapes[BKG_DM]

    # Variables for the signal pdf
    sig_m, vs = SIG_M('', wsp, mode)
    variables += [vs]
    if mode.mode in config.twotag_modes:
        bkg_m, vs = shapes.d0_bkg('', wsp, mode)
        variables += [vs]

    # delta random slow
    slow_pi_dm, vs = BKG_DM('sp', wsp)
    variables += [vs]
    if mode.mode in config.twotag_modes:
        bkg_dm, vs = BKG_DM('bkg', wsp)
        variables += [vs]
    sig_dm, vs = SIG_DM('', wsp, mode)
    variables += [vs]
    # Signal 2D pdf
    wsp.factory("PROD::signal({}, {})".format(sig_m, sig_dm))
    wsp.factory("PROD::random({}, {})".format(sig_m, slow_pi_dm))
    if mode.mode in config.twotag_modes:
        wsp.factory("PROD::combinatorial({}, {})".format(bkg_m, bkg_dm))

    wsp.factory(mode.get_rf_vars('NSig'))
    wsp.factory(mode.get_rf_vars('NSPi'))
    if mode.mode in config.twotag_modes:
        wsp.factory(mode.get_rf_vars('NBkg'))

    # wsp.var('NBkg').setConstant()
    # wsp.var('a_dm_bkg').setConstant()

    variables += [[
        ('NSig', r'$N_{\text{Sig}}$'),
        ('NSPi', r'$N_{\text{Rnd}}$'),
        ('NBkg', r'$N_{\text{Cmb}}$'),
    ]]

    # Final model
    if mode.mode in config.twotag_modes:
        wsp.factory("SUM::total(NSig*signal,NSPi*random,NBkg*combinatorial)")
    else:
        wsp.factory("SUM::total(NSig*signal,NSPi*random)")

    wsp.factory('set_up_done[1]')

    return variables
示例#23
0
def pid_fiducial_selection(df):
    ret = True
    for part in gcm().D0.all_daughters():
        ret &= (df[p(part)] >= 3000.)
        ret &= (df[p(part)] < 100000.)
        ret &= (df[eta(part)] >= 2.)
        ret &= (df[eta(part)] < 5.)

    return ret
示例#24
0
def _apply_pid_cut(df,
                   min_pi_nnpi=0.3,
                   max_pi_nnk=0.7,
                   min_k_nnk=0.3,
                   max_k_nnpi=0.7,
                   max_k_nnmu=0.2,
                   max_pi_nnmu=0.2):
    ret = True
    for kaon in gcm().head.all_pid(config.kaon):
        ret &= (df[probnnk(kaon)] > min_k_nnk) & (
            df[probnnpi(kaon)] < max_k_nnpi)  # NOQA
        ret &= (df[probnnmu(kaon)] < max_k_nnmu)
    for pion in gcm().head.all_pid(config.pion):
        ret &= (df[probnnpi(pion)] > min_pi_nnpi) & (
            df[probnnk(pion)] < max_pi_nnk)  # NOQA
        ret &= (df[probnnmu(pion)] < max_pi_nnmu)

    return ret
示例#25
0
def lifetime_study(correct_efficiencies=False):
    # Current mode stuff
    data = gcm().get_data([gcm().ltime_var.var])
    add_variables.append_phsp(data)
    df_sel = final_selection.get_final_selection()
    df_sel &= selection.delta_mass_signal_region()
    data['weight'] = 1.

    if correct_efficiencies:
        outfile = gcm().get_output_path('effs') + 'DATA_ltime_dep_effs.pdf'
    else:
        outfile = gcm().get_output_path('effs') + 'DATA_ltime_dep.pdf'
    percentiles = np.arange(0, 1.1, 0.2)
    boundaries = helpers.weighted_quantile(
        data[gcm().ltime_var.var][df_sel], percentiles)
    if correct_efficiencies:
        data['weight'] = 1./get_efficiency()
        boundaries = boundaries[1:]
    with PdfPages(outfile) as pdf:
        for var in gcm().phsp_vars:
            fig, ax = plt.subplots(figsize=(10, 10))
            for low, high in zip(boundaries[:-1], boundaries[1:]):
                sel = (data[gcm().ltime_var.var] > low) & (data[gcm().ltime_var.var] < high)  # NOQA

                df = data[var.var][df_sel & sel]
                weight = data['weight'][df_sel & sel]

                rlow, prec = helpers.rounder(low*1000, [low*1000, high*1000])
                rhigh, _ = helpers.rounder(high*1000, [low*1000, high*1000])

                spec = '{{:.{}f}}'.format(prec)
                label = r'${} < \tau \mathrm{{ [ps]}}  < {}$'.format(
                    spec.format(rlow), spec.format(rhigh))

                values, edges = np.histogram(df, bins=int(var.binning[0]/5.), range=var.binning[1:], weights=weight)  # NOQA
                err, edges = np.histogram(df, bins=int(var.binning[0]/5.), range=var.binning[1:], weights=weight**2)  # NOQA
                norm = np.sum(values)
                values = values/norm
                err = np.sqrt(err)/norm
                x_ctr = (edges[1:] + edges[:-1])/2.
                width = (edges[1:] - edges[:-1])
                x_err = width/2.

                options = dict(
                    fmt='o', markersize=5, capthick=1, capsize=0, elinewidth=2,
                    alpha=1)

                ax.errorbar(x_ctr, values, err, x_err, label=label, **options)
            ax.set_xlabel(var.xlabel)
            ax.yaxis.set_visible(False)
            ax.legend()
            pdf.savefig(plt.gcf())
            plt.close()
示例#26
0
def get_sweights(do_comb_bkg=False):
    helpers.allow_root()
    df = gcm().get_data([m(gcm().D0), dtf_dm()])
    from . import fit_config
    from hep_ml import splot
    shapes.load_shape_class('RooCruijff')
    shapes.load_shape_class('RooJohnsonSU')
    shapes.load_shape_class('RooBackground')
    wsp = fit_config.load_workspace(gcm())

    sel = selection.get_final_selection()
    do_comb_bkg = gcm().mode in config.twotag_modes

    df = df[sel]

    sig_pdf = wsp.pdf('signal')
    rnd_pdf = wsp.pdf('random')
    comb_pdf = wsp.pdf('combinatorial')

    sig_prob = call_after_set(sig_pdf, wsp, **df)
    rnd_prob = call_after_set(rnd_pdf, wsp, **df)
    if do_comb_bkg:
        comb_prob = call_after_set(comb_pdf, wsp, **df)

    if do_comb_bkg:
        probs = pd.DataFrame(dict(sig=sig_prob*wsp.var('NSig').getVal(),
                                  rnd=rnd_prob*wsp.var('NSPi').getVal(),
                                  comb=comb_prob*wsp.var('NBkg').getVal()),
                             index=df.index)
    else:
        probs = pd.DataFrame(dict(sig=sig_prob*wsp.var('NSig').getVal(),
                                  rnd=rnd_prob*wsp.var('NSPi').getVal()),
                             index=df.index)
    probs = probs.div(probs.sum(axis=1), axis=0)

    sweights = splot.compute_sweights(probs)
    sweights.index = probs.index
    if not do_comb_bkg:
        sweights['comb'] = 0.0

    return sweights
示例#27
0
def load_reweighter():
    mode = gcm()
    # Hard coded check here: Use the RS mode if WS is supplied. Also get a new
    # mode object to remove possible MC flags.
    # Just recreate the mode here to get rid of potential MC flags
    mode = get_mode(mode.polarity, mode.year, mode.mode_short)
    if mode.mode == config.D0ToKpipipi_WS:
        mode = get_mode(mode.polarity, mode.year, 'RS')
    if mode.mode == config.D0ToKpipipi_2tag_WS:
        mode = get_mode(mode.polarity, mode.year, '2tag_RS')
    outfile = mode.get_output_path('effs') + 'reweighter.p'
    return helpers.load(outfile)
示例#28
0
def remove_right_sign_candidates():
    """Remove wrong sign D0 candidates which are combined and end up
    in the signal window in the right sign sample"""
    # Get the necessary information from the current mode
    year = gcm().year
    polarity = gcm().polarity
    polarity = gcm().polarity
    if gcm().mode not in config.twotag_modes:
        rs, ws = 'RS', 'WS'
    else:
        rs, ws = '2tag_RS', '2tag_WS'
    with MODE(polarity, year, rs):
        RS = gcm().get_data(
            [vars.evt_num(),
             vars.run_num(),
             vars.dtf_dm(),
             vars.pt(gcm().D0)])
        rs_sel = extended_selection.get_complete_selection(True)
    # RS modes should not be selected using this:
    if gcm().mode not in config.wrong_sign_modes:
        return pd.Series(True, RS.index)

    with MODE(polarity, year, ws):
        WS = gcm().get_data([
            vars.evt_num(),
            vars.dtf_dm(),
            vars.pt(gcm().D0),
            vars.dtf_chi2(gcm().head)
        ])

    OL = RS[rs_sel].merge(WS,
                          on=['eventNumber'],
                          left_index=True,
                          suffixes=['_RS', '_WS'])
    dm_ref = config.PDG_MASSES['delta']
    OLS = OL.query('(abs(delta_m_dtf_RS-{})<1.) &'
                   '(abs(D0_PT_RS-D0_PT_WS)<1.)'.format(dm_ref))

    return pd.Series(~WS.index.isin(OLS.index), index=WS.index)
示例#29
0
def plot_efficiencies(sw=False, comb_bkg=False):
    """Plots the efficiencies for all spectator variables. Signal contribution
    only."""
    if comb_bkg:
        bdt_folder = 'bdt_comb_bkg'
    else:
        bdt_folder = 'bdt_rand_spi'
    classifiers = bdt_utils.load_classifiers(comb_bkg=comb_bkg)
    log.info('Plotting efficiencies for {} {} {}'.format(
        gcm().mode,
        gcm().polarity,
        gcm().year))
    (train, test, train_lbl,
     test_lbl), features, spectators = bdt_data.prep_data_for_sklearn(
         sw=sw, comb_data=comb_bkg)  # NOQA
    outfile = gcm().get_output_path(bdt_folder) + 'effs.pdf'
    with PdfPages(outfile) as pdf:
        for var in gcm().spectator_vars:
            for bdt_name in ['Exponential', 'KnnFlatness', 'BinFlatness']:
                add_separation_page(
                    pdf, '{}: {}'.format(bdt_name,
                                         var.functor.latex(var.particle)))
                fig = bdt.plot_eff(var,
                                   test[features + spectators],
                                   classifiers[bdt_name],
                                   test_lbl,
                                   test.weights,
                                   features=features)
                pdf.savefig(fig)
                plt.clf()
                fig = bdt.plot_eff(var,
                                   test[features + spectators],
                                   classifiers[bdt_name],
                                   ~test_lbl,
                                   test.weights,
                                   features=features)
                pdf.savefig(fig)
                plt.clf()
示例#30
0
def get_named_bdt_discriminant(df, name='KnnFlatness', comb_bkg=False):
    # Trigger the loading of the needed objects
    if selective_load.is_dummy_run(df):
        [
            df[f.functor(f.particle)] for f in gcm().bdt_vars
            if f.functor != vars.angle
        ]  # NOQA
        return 1

    log.info('Getting discriminant {} for {}'.format(
        name, 'comb. bkg' if comb_bkg else 'rand. pion bkg.'))

    if comb_bkg:
        features = [f.functor(f.particle) for f in gcm().comb_bkg_bdt_vars]
        bdt_vars = gcm().comb_bkg_bdt_vars
    else:
        features = [f.functor(f.particle) for f in gcm().rand_spi_bdt_vars]
        bdt_vars = gcm().rand_spi_bdt_vars

    log.info('Features: {}'.format(' '.join(features)))

    if vars.angle() in features:
        log.info('Adding angle.')
        add_variables.append_angle(df)

    for f in bdt_vars:
        if f.convert is not None:
            log.info('Converting {}'.format(f.var))
            df[f.var] = f.convert(df[f.var])

    df = df[features]

    classifiers = bdt_utils.load_classifiers(comb_bkg=comb_bkg)
    assert False not in (features == df.columns), 'Mismatching feature order'
    bdt = classifiers[name]
    probs = bdt.predict_proba(df).transpose()[1]
    log.info('Returning probability.')
    return pd.Series(probs, index=df.index)