Exemplo n.º 1
0
def efficiency(data, args, feat, title=None):
    """
    Perform study of background efficiency vs. mass for different inclusive
    efficiency cuts

    Saves plot `figures/efficiency_[feat].pdf`

    Arguments:
        data: Pandas data frame from which to read data.
        args: Namespace holding command-line arguments.
        feat: Feature for which to study efficiencies
    """

    # Define common variables
    msk = data['signal'] == 0
    effs = [5, 10, 20, 40, 80]

    # Define cuts
    cuts = list()
    for eff in effs:
        cut = wpercentile(data.loc[msk, feat].values,
                          eff if signal_low(feat) else 100 - eff,
                          weights=data.loc[msk, 'weight_test'].values)
        cuts.append(cut)
        pass

    # Compute cut efficiency vs. mass
    profiles = list()
    for cut, eff in zip(cuts, effs):

        # Get correct pass-cut mask
        msk_pass = data[feat] > cut
        if signal_low(feat):
            msk_pass = ~msk_pass
            pass

        # Fill efficiency profile
        profile = ROOT.TProfile('profile_{}_{}'.format(feat, cut), "",
                                len(MASSBINS) - 1, MASSBINS)

        M = np.vstack((data.loc[msk, 'm'].values, msk_pass[msk])).T
        weights = data.loc[msk, 'weight_test'].values

        root_numpy.fill_profile(profile, M, weights=weights)

        # Add to list
        profiles.append(profile)
        pass

    # Perform plotting
    c = plot(args, data, feat, profiles, cuts, effs)

    # Output
    if title is None:
        path = 'figures/efficiency_{}.pdf'.format(standardise(feat))
    else:
        path = 'figures/' + title + '_efficiency_{}.pdf'.format(
            standardise(feat))
    c.save(path=path)
    return c, args, path
Exemplo n.º 2
0
def jetmass (data, args, feat, eff_sig=50):
    """
    Perform study of jet mass distributions before and after subtructure cut.

    Saves plot `figures/jetmass_[feat]__eff_sig_[eff_sig].pdf`

    Arguments:
        data: Pandas data frame from which to read data.
        args: Namespace holding command-line arguments.
        feat: Feature for which to plot signal- and background distributions.
        eff_sig: Signal efficiency at which to impose cut
    """

    # Define masks and direction-dependent cut value
    msk_sig = data['signal'] == 1
    msk_bkg = ~msk_sig
    eff_cut = eff_sig if signal_low(feat) else 100 - eff_sig
    cut = wpercentile(data.loc[msk_sig, feat].values, eff_cut, weights=data.loc[msk_sig, 'weight_test'].values)
    msk_pass = data[feat] > cut

    # Ensure correct cut direction
    if signal_low(feat):
        msk_pass = ~msk_pass
        pass

    # Perform plotting
    c = plot(data, args, feat, msk_pass, msk_bkg, eff_sig)

    # Output
    path = 'figures/jetmass_{}__eff_sig_{:d}.pdf'.format(standardise(feat), int(eff_sig))

    return c, args, path
Exemplo n.º 3
0
def plot (*argv):
    """
    Method for delegating plotting.
    """

    # Unpack arguments
    args, data, features, ROCs, AUCs, masscut, pt_range = argv

    # Canvas
    c = rp.canvas(batch=not args.show)

    # Plots
    # -- Random guessing
    bins = np.linspace(0.2, 1., 100 + 1, endpoint=True)
    bins = np.array([bins[0], bins[0] + 0.01 * np.diff(bins[:2])[0]] + list(bins[1:]))
    #bins = np.array([0.2] + list(bins[1:]))
    #edges = bins[1:-1]
    edges = bins
    centres = edges[:-1] + 0.5 * np.diff(edges)
    c.hist(np.power(centres, -1.), bins=edges, linecolor=ROOT.kGray + 2, fillcolor=ROOT.kBlack, alpha=0.05, linewidth=1, option='HISTC')

    # -- ROCs
    for is_simple in [True, False]:

        # Split the legend into simple- and MVA taggers
        for ifeat, feat in filter(lambda t: is_simple == signal_low(t[1]), enumerate(features)):
            eff_sig, eff_bkg = ROCs[feat]
            c.graph(np.power(eff_bkg, -1.), bins=eff_sig, linestyle=1 + (ifeat % 2), linecolor=rp.colours[(ifeat // 2) % len(rp.colours)], linewidth=2, label=latex(feat, ROOT=True), option='L')
            pass

        # Draw class-specific legend
        width = 0.17
        c.legend(header=("Analytical:" if is_simple else "MVA:"),
                 width=width, xmin=0.58 + (width) * (is_simple), ymax=0.888)
        pass

    # Decorations
    c.xlabel("Signal efficiency #varepsilon_{sig}^{rel}")
    c.ylabel("Background rejection 1/#varepsilon_{bkg}^{rel}")
    c.text([], xmin=0.15, ymax=0.96, qualifier=QUALIFIER)
    c.text(["#sqrt{s} = 13 TeV",
            "#it{W} jet tagging"] + (
                ["p_{{T}} #in  [{:.0f}, {:.0f}] GeV".format(pt_range[0], pt_range[1])] if pt_range is not None else []
            ) + (
                ["Cut: m #in  [60, 100] GeV"] if masscut else []
            ),
           ATLAS=False)

    ranges = int(pt_range is not None) + int(masscut)
    mult = 10. if ranges == 2 else (2. if ranges == 1 else 1.)

    c.latex("Random guessing", 0.4, 1./0.4 * 0.9, align=23, angle=-12 + 2 * ranges, textsize=13, textcolor=ROOT.kGray + 2)
    c.xlim(0.2, 1.)
    c.ylim(1E+00, 5E+02 * mult)
    c.logy()
    c.legend()

    return c
Exemplo n.º 4
0
def jetmasscomparison(data, args, features, eff_sig=50):
    """
    Perform study of jet mass distributions before and after subtructure cut for
    different substructure taggers.

    Saves plot `figures/jetmasscomparison__eff_sig_[eff_sig].pdf`

    Arguments:
        data: Pandas data frame from which to read data.
        args: Namespace holding command-line arguments.
        features: Features for which to plot signal- and background distributions.
        eff_sig: Signal efficiency at which to impose cut.
    """

    # Define masks and direction-dependent cut value
    msk_sig = data['signal'] == 1
    cuts, msks_pass = dict(), dict()
    for feat in features:
        eff_cut = eff_sig if signal_low(feat) else 100 - eff_sig
        cut = wpercentile(data.loc[msk_sig, feat].values,
                          eff_cut,
                          weights=data.loc[msk_sig, 'weight_test'].values)
        msks_pass[feat] = data[feat] > cut

        # Ensure correct cut direction
        if signal_low(feat):
            msks_pass[feat] = ~msks_pass[feat]
            pass
        pass

    # Perform plotting
    c = plot(data, args, features, msks_pass, eff_sig)

    # Perform plotting on individual figures
    plot_individual(data, args, features, msks_pass, eff_sig)

    # Output
    path = 'figures/jetmasscomparison__eff_sig_{:d}.pdf'.format(int(eff_sig))

    return c, args, path
Exemplo n.º 5
0
def jetmasscomparison(data_, args, features, pt_range, eff_sig=50, title=None):
    """
    Perform study of jet mass distributions before and after subtructure cut for
    different substructure taggers.

    Saves plot `figures/jetmasscomparison__eff_sig_[eff_sig].pdf`

    Arguments:
        data: Pandas data frame from which to read data.
        args: Namespace holding command-line arguments.
        features: Features for which to plot signal- and background distributions.
        eff_sig: Signal efficiency at which to impose cut.
	pt_range: pT selection of the data.
    """

    # Define masks and direction-dependent cut value

    # Select pT-range
    if pt_range is not None:
        data = data_[(data_['pt'] > pt_range[0]) & (data_['pt'] < pt_range[1])]
    else:
        data = data_
        pass

    msk_sig = data['signal'] == 1
    cuts, msks_pass = dict(), dict()
    for feat in features:
        eff_cut = eff_sig if signal_low(feat) else 100 - eff_sig
        cut = wpercentile(data.loc[msk_sig, feat].values,
                          eff_cut,
                          weights=data.loc[msk_sig, 'weight_test'].values)
        msks_pass[feat] = data[feat] > cut

        # Ensure correct cut direction
        if signal_low(feat):
            msks_pass[feat] = ~msks_pass[feat]
            pass
        pass

    # Perform plotting
    c = plot(data, args, features, msks_pass, eff_sig, pt_range)

    # Perform plotting on individual figures
    plot_individual(data, args, features, msks_pass, eff_sig, pt_range, title)

    # Output
    #path = 'figures/jetmasscomparison__eff_sig_{:d}.pdf'.format(int(eff_sig))
    if title is None:
        if pt_range is not None:
            path = 'figures/jetmasscomparison_pT{}to{}__eff_sig_{:d}.pdf'.format(
                pt_range[0], pt_range[1], int(eff_sig))
        else:
            path = 'figures/jetmasscomparison__eff_sig_{:d}.pdf'.format(
                int(eff_sig))
    else:
        if pt_range is not None:
            path = 'figures/' + title + '_jetmasscomparison_pT{}to{}__eff_sig_{:d}.pdf'.format(
                pt_range[0], pt_range[1], int(eff_sig))
        else:
            path = 'figures/' + title + '_jetmasscomparison__eff_sig_{:d}.pdf'.format(
                int(eff_sig))

    return c, args, path
Exemplo n.º 6
0
def jsd(data_, args, feature_dict, pt_range, title=None):
    """
    Perform study of ...

    Saves plot `figures/jsd.pdf`

    Arguments:
        data: Pandas data frame from which to read data.
        args: Namespace holding command-line arguments.
        features: Features for ...
    """

    # Extract features and count appearance of each base variable
    features = []
    appearances = []
    for basevar in feature_dict.keys():
        for suffix in feature_dict[basevar]:
            features.append(basevar + suffix)
        appearances.append(len(feature_dict[basevar]))

    # Select data
    if pt_range is not None:
        data = data_[(data_['pt'] > pt_range[0]) & (data_['pt'] < pt_range[1])]
    else:
        data = data_
        pass

    # Create local histogram style dict
    histstyle = dict(**HISTSTYLE)
    histstyle[True]['label'] = "Pass"
    histstyle[False]['label'] = "Fail"

    # Define common variables
    msk = data['signal'] == 0
    effs = np.linspace(0, 100, 10 * 2, endpoint=False)[1:].astype(int)

    # Loop tagger features
    jsd = {feat: [] for feat in features}
    for ifeat, feat in enumerate(features):

        if len(jsd[feat]): continue  # Duplicate feature.

        # Define cuts
        cuts = list()
        for eff in effs:
            cut = wpercentile(data.loc[msk, feat].values,
                              eff if signal_low(feat) else 100 - eff,
                              weights=data.loc[msk, 'weight_test'].values)
            cuts.append(cut)
            pass

        # Compute KL divergence for successive cuts
        for cut, eff in zip(cuts, effs):

            # Create ROOT histograms
            msk_pass = data[feat] > cut
            if signal_low(feat):
                msk_pass = ~msk_pass
                pass

            # Get histograms / plot
            c = rp.canvas(batch=not args.show)
            h_pass = c.hist(data.loc[msk_pass & msk, 'm'].values,
                            bins=MASSBINS,
                            weights=data.loc[msk_pass & msk,
                                             'weight_test'].values,
                            normalise=True,
                            **histstyle[True])  #, display=False)
            h_fail = c.hist(data.loc[~msk_pass & msk, 'm'].values,
                            bins=MASSBINS,
                            weights=data.loc[~msk_pass & msk,
                                             'weight_test'].values,
                            normalise=True,
                            **histstyle[False])  #, display=False)

            # Convert to numpy arrays
            p = root_numpy.hist2array(h_pass)
            f = root_numpy.hist2array(h_fail)

            # Compute Jensen-Shannon divergence
            jsd[feat].append(JSD(p, f, base=2))

            # -- Decorations
            #c.xlabel("Large-#it{R} jet mass [GeV]")
            #c.ylabel("Fraction of jets")
            #c.legend()
            #c.logy()
            #c.text(TEXT + [
            #    "{:s} {} {:.3f}".format(latex(feat, ROOT=True), '<' if signal_low(feat) else '>', cut),
            #    "JSD = {:.4f}".format(jsd[feat][-1])] + \
            #    (["p_{{T}} #in  [{:.0f}, {:.0f}] GeV".format(*pt_range)] if pt_range else []),
            #    qualifier=QUALIFIER, ATLAS=False)

            # -- Save
            #if title is None:
            #    c.save('figures/temp_jsd_{:s}_{:.0f}{}.pdf'.format(feat, eff, '' if pt_range is None else '__pT{:.0f}_{:.0f}'.format(*pt_range)))
            #else:
            #    c.save('figures/'+title+'_temp_jsd_{:s}_{:.0f}{}.pdf'.format(feat, eff, '' if pt_range is None else '__pT{:.0f}_{:.0f}'.format(*pt_range)))

            pass
        pass

    # Compute meaningful limit on JSD
    jsd_limits = list()
    sigmoid = lambda x: 1. / (1. + np.exp(-x))
    for eff in sigmoid(np.linspace(-5, 5, 20 + 1, endpoint=True)):
        limits = jsd_limit(data[msk], eff, num_bootstrap=5)
        jsd_limits.append((eff, np.mean(limits), np.std(limits)))
        pass

    # Perform plotting
    c = plot(args, data, effs, jsd, jsd_limits, features, pt_range,
             appearances)

    # Output
    if title is None:
        path = 'figures/jsd{}.pdf'.format(
            '' if pt_range is None else '__pT{:.0f}_{:.0f}'.format(*pt_range))
    else:
        path = 'figures/' + title + '_jsd{}.pdf'.format(
            '' if pt_range is None else '__pT{:.0f}_{:.0f}'.format(*pt_range))
    c.save(path=path)
    return c, args, path
Exemplo n.º 7
0
def plot(*argv):
    """
    Method for delegating plotting.
    """

    # Unpack arguments
    args, data, effs, jsd, jsd_limits, features, pt_range, appearances = argv

    with TemporaryStyle() as style:

        # Style
        style.SetTitleOffset(1.5, 'x')
        style.SetTitleOffset(2.0, 'y')

        # Canvas
        c = rp.canvas(batch=not args.show)

        # Plots
        ref = ROOT.TH1F('ref', "", 10, 0., 1.)
        for i in range(ref.GetXaxis().GetNbins()):
            ref.SetBinContent(i + 1, 1)
            pass
        c.hist(ref, linecolor=ROOT.kGray + 2, linewidth=1)
        linestyles = [1, 3, 5, 7]

        width = 0.15
        if len(appearances) != 2:
            for is_simple in [True, False]:

                indices = np.array([0] + appearances).cumsum()
                for i in range(len(indices) - 1):
                    for ifeat, feat in enumerate(
                            features[indices[i]:indices[i + 1]]):
                        if is_simple != signal_low(feat): continue
                        colour = rp.colours[i % len(rp.colours)]
                        linestyle = 1 + ifeat
                        if ifeat == 0:
                            markerstyle = 20
                        else:
                            markerstyle = 23 + ifeat
                        c.plot(jsd[feat],
                               bins=np.array(effs) / 100.,
                               linecolor=colour,
                               markercolor=colour,
                               linestyle=linestyle,
                               markerstyle=markerstyle,
                               label=latex(feat, ROOT=True),
                               option='PL')
                        pass

                c.legend(header=("Analytical:" if is_simple else "MVA:"),
                         width=width * (1 + 0.8 * int(is_simple)),
                         xmin=0.42 + (width + 0.05) * (is_simple),
                         ymax=0.888,
                         columns=2 if is_simple else 1,
                         margin=0.35)  # moved one intendation to the left
        else:
            for first_var in [True, False]:

                indices = np.array([0] + appearances).cumsum()
                for i in [0, 1]:
                    if i == 0 and not first_var: continue
                    if i == 1 and first_var: continue
                    for ifeat, feat in enumerate(
                            features[indices[i]:indices[i + 1]]):
                        colour = rp.colours[i % len(rp.colours)]
                        linestyle = linestyles[ifeat]
                        if ifeat == 0:
                            markerstyle = 20
                        else:
                            markerstyle = 23 + ifeat
                        c.plot(jsd[feat],
                               bins=np.array(effs) / 100.,
                               linecolor=colour,
                               markercolor=colour,
                               linestyle=linestyle,
                               markerstyle=markerstyle,
                               label=latex(feat, ROOT=True),
                               option='PL')
                        pass

                c.legend(header=(latex(features[0], ROOT=True) +
                                 "-based:" if first_var else
                                 latex(features[appearances[1]], ROOT=True) +
                                 "-based:"),
                         width=width,
                         xmin=0.45 + (width + 0.06) * (first_var),
                         ymax=0.888)

            pass

####  c.legend(header=(features[0]+":" if first_var else features[appearances[1]]+":"), #work in progress!!!!!!!!!!!!!!!!!!!!!
####                  width=width, xmin=0.45 + (width + 0.06) * (first_var), ymax=0.888)

# Meaningful limits on JSD
        x, y, ey = map(np.array, zip(*jsd_limits))
        ex = np.zeros_like(ey)
        gr = ROOT.TGraphErrors(len(x), x, y, ex, ey)
        smooth_tgrapherrors(gr, ntimes=2)
        c.graph(gr,
                linestyle=2,
                linecolor=ROOT.kGray + 1,
                fillcolor=ROOT.kBlack,
                alpha=0.03,
                option='L3')

        # Redraw axes
        c.pads()[0]._primitives[0].Draw('AXIS SAME')

        # Decorations
        c.xlabel("Background efficiency #varepsilon_{bkg}^{rel}")
        c.ylabel("Mass correlation, JSD")
        c.text([], xmin=0.15, ymax=0.96, qualifier=QUALIFIER, ATLAS=False)
        c.text(["#sqrt{s} = 13 TeV",  "Multijets"] + \
              (["p_{T} [GeV] #in", "    [{:.0f}, {:.0f}]".format(*pt_range)] if pt_range else []),
               ymax=0.85, ATLAS=None)

        c.latex("Maximal sculpting",
                0.065,
                1.2,
                align=11,
                textsize=11,
                textcolor=ROOT.kGray + 2)
        c.xlim(0, 1)
        #c.ymin(5E-05)
        c.ymin(1E-06)  #chosen for highest pT bin
        c.padding(0.45)
        c.logy()

        for leg in c.pad()._legends:
            leg.SetMargin(0.5)
            pass

        x_, y_, ex_, ey_ = ROOT.Double(0), ROOT.Double(0), ROOT.Double(
            0), ROOT.Double(0)
        idx = gr.GetN() - 7
        gr.GetPoint(idx, x_, y_)
        ey_ = gr.GetErrorY(idx)
        x_, y_ = map(float, (x_, y_))
        c.latex("Statistical limit",
                x_,
                y_ - ey_ / 2.,
                align=23,
                textsize=11,
                angle=12,
                textcolor=ROOT.kGray + 2)
        pass

    return c
Exemplo n.º 8
0
def roc(data_, args, features, masscut=False, pt_range=(200, 2000)):
    """
    Perform study of ...

    Saves plot `figures/roc.pdf`

    Arguments:
        data: Pandas data frame from which to read data.
        args: Namespace holding command-line arguments.
        features: Features for ...
        masscut: ...
    """

    # Select pT-range
    if pt_range is not None:
        data = data_.loc[(data_['pt'] > pt_range[0])
                         & (data_['pt'] < pt_range[1])]
    else:
        data = data_
        pass

    # (Opt.) masscut | @NOTE: Duplication with adversarial/utils/metrics.py
    msk = (data['mass'] > 50.) & (
        data['mass'] < 300.) if masscut else np.ones_like(
            data['signal']).astype(bool)

    # Computing ROC curves
    ROCs = dict()
    for feat in features:

        sign = -1. if signal_low(feat) else 1.

        eff_bkg, eff_sig, thresholds = roc_curve(
            data.loc[msk, 'signal'].values,
            data.loc[msk, feat].values * sign,
            sample_weight=data.loc[msk, 'weight_test'].values)

        if masscut:
            eff_sig_mass = np.mean(msk[data['signal'] == 1])
            eff_bkg_mass = np.mean(msk[data['signal'] == 0])

            eff_sig *= eff_sig_mass
            eff_bkg *= eff_bkg_mass
            pass

        # Filter, to advoid background rejection blowing up
        indices = np.where((eff_bkg > 0) & (eff_sig > 0))
        eff_sig = eff_sig[indices]
        eff_bkg = eff_bkg[indices]

        # Subsample to 1% steps
        targets = np.linspace(0, 1, 100 + 1, endpoint=True)
        indices = np.array([np.argmin(np.abs(eff_sig - t)) for t in targets])
        eff_sig = eff_sig[indices]
        eff_bkg = eff_bkg[indices]

        # Store
        ROCs[feat] = (eff_sig, eff_bkg)
        pass

    # Computing ROC AUCs
    AUCs = dict()
    for feat in features:
        sign = -1. if signal_low(feat) else 1.
        AUCs[feat] = roc_auc_score(data['signal'].values,
                                   data[feat].values * sign,
                                   sample_weight=data['weight_test'].values)
        pass

    # Report scores
    print "\n== pT range: {:s}".format(
        'inclusive' if pt_range is None else "[{:.0f}, {:.0f}] Gev".format(
            *pt_range))
    print "\n== {} masscut".format("With" if masscut else "Without")
    for feat in features:
        effsig = ROCs[feat][0]
        idx = np.argmin(np.abs(effsig - 0.5))
        print "\nFeature {}:".format(feat)
        print "  Background rejection at effsig = {:.0f}%: {:6.3f}".format(
            ROCs[feat][0][idx] * 100., 1. / ROCs[feat][1][idx])
        print "  AUC: {:5.4f}".format(AUCs[feat])
        pass

    # Perform plotting
    c = plot(args, data, features, ROCs, AUCs, masscut, pt_range)

    # Output
    path = 'figures/roc{}{:s}.pdf'.format(
        '__pT{:.0f}_{:.0f}'.format(pt_range[0], pt_range[1])
        if pt_range is not None else '', '__masscut' if masscut else '')

    return c, args, path
def plot (*argv):
    """
    Method for delegating plotting.
    """

    # Unpack arguments
    data, args, features, bins, effs, rejs, jsds, meanx, jsd_limits, masscut, var = argv

    with TemporaryStyle() as style:

        # Set styles
        scale = 0.9
        style.SetTextSize(scale * style.GetTextSize())
        for coord in ['x', 'y', 'z']:
            style.SetLabelSize(scale * style.GetLabelSize(coord), coord)
            style.SetTitleSize(scale * style.GetTitleSize(coord), coord)
            pass

        # Canvas
        c = rp.canvas(num_pads=2, fraction=0.55, size=(int(800 * 600 / 857.), 600), batch=not args.show)
        c.pads()[0]._bare().SetTopMargin(0.10)
        c.pads()[0]._bare().SetRightMargin(0.23)
        c.pads()[1]._bare().SetRightMargin(0.23)

        # To fix 30.5 --> 30 for NPV
        bins[-1] = np.floor(bins[-1])

        # Plots
        # -- References
        boxopts  = dict(fillcolor=ROOT.kBlack, alpha=0.05, linecolor=ROOT.kGray + 2, linewidth=1, option='HIST')
        c.pads()[0].hist([2], bins=[bins[0], bins[-1]], **boxopts)
        c.pads()[1].hist([1], bins=[bins[0], bins[-1]], **boxopts)


        for is_simple in [True, False]:
            for ifeat, feat in filter(lambda t: is_simple == signal_low(t[1]), enumerate(features)):

                opts = dict(
                    linecolor   = rp.colours[(ifeat // 2)],
                    markercolor = rp.colours[(ifeat // 2)],
                    fillcolor   = rp.colours[(ifeat // 2)],
                    linestyle   = 1 + (ifeat % 2),
                    alpha       = 0.3,
                    option      = 'E2',
                )

                mean_rej, std_rej = map(np.array, zip(*rejs[feat]))  # @TEMP
                #mean_rej, std_rej = map(np.array, zip(*effs[feat]))  # @TEMP
                mean_jsd, std_jsd = map(np.array, zip(*jsds[feat]))

                # Error boxes
                x    = np.array(bins[:-1]) + 0.5 * np.diff(bins)
                xerr = 0.5 * np.diff(bins)
                graph_rej = ROOT.TGraphErrors(len(x), x, mean_rej, xerr, std_rej)
                graph_jsd = ROOT.TGraphErrors(len(x), x, mean_jsd, xerr, std_jsd)

                c.pads()[0].hist(graph_rej, **opts)
                c.pads()[1].hist(graph_jsd, **opts)

                # Markers and lines
                opts['option']      = 'PE2L'
                opts['markerstyle'] = 20 + 4 * (ifeat % 2)

                graph_rej = ROOT.TGraph(len(x), meanx, mean_rej)
                graph_jsd = ROOT.TGraph(len(x), meanx, mean_jsd)

                c.pads()[0].hist(graph_rej, label=latex(feat, ROOT=True) if not is_simple else None, **opts)
                c.pads()[1].hist(graph_jsd, label=latex(feat, ROOT=True) if     is_simple else None, **opts)
                pass

            pass

        # Draw class-specific legend
        width = 0.20
        c.pads()[0].legend(header='MVA:',    width=width, xmin=0.79, ymax=0.92)
        c.pads()[1].legend(header='Analytical:', width=width, xmin=0.79, ymax=0.975)

        # Meaningful limits on JSD
        x, y, ey_stat, ey_syst  = map(np.array, zip(*jsd_limits))
        ex = np.zeros_like(x)
        x[0] = bins[0]
        x[-1] = bins[-1]
        format = lambda arr: arr.flatten('C').astype(float)
        gr_stat = ROOT.TGraphErrors(len(x), *list(map(format, [x, y, ex, ey_stat])))
        gr_comb = ROOT.TGraphErrors(len(x), *list(map(format, [x, y, ex, np.sqrt(np.square(ey_stat) + np.square(ey_syst))])))
        smooth_tgrapherrors(gr_stat, ntimes=2)
        smooth_tgrapherrors(gr_comb, ntimes=2)
        c.pads()[1].graph(gr_comb,                                        fillcolor=ROOT.kBlack, alpha=0.03, option='3')
        c.pads()[1].graph(gr_stat, linestyle=2, linecolor=ROOT.kGray + 1, fillcolor=ROOT.kBlack, alpha=0.03, option='L3')

        x_, y_, ex_, ey_ = ROOT.Double(0), ROOT.Double(0), ROOT.Double(0), ROOT.Double(0)
        idx = gr_comb.GetN() - 1
        gr_comb.GetPoint(idx, x_,  y_)
        ey_ = gr_comb.GetErrorY(idx)
        x_, y_ = map(float, (x_, y_))
        c.pads()[1].latex("Mean stat. #oplus #varepsilon_{bkg}^{rel} var. limit     ", x_, y_ + ey_, align=31, textsize=11, angle=0, textcolor=ROOT.kGray + 2)

        # Decorations
        for pad in c.pads():
            pad._xaxis().SetNdivisions(504)
            pass

        # -- x-axis label
        if var == 'pt':
            xlabel = "Large-#it{R} jet p_{T} [GeV]"
        elif var == 'npv':
            xlabel = "Number of reconstructed vertices N_{PV}"
        else:
            raise NotImplementedError("Variable {} is not supported.".format(xlabel))

        c.xlabel(xlabel)
        c.pads()[0].ylabel("1/#varepsilon_{bkg}^{rel} @ #varepsilon_{sig}^{rel} = 50%")
        c.pads()[1].ylabel("1/JSD @ #varepsilon_{sig}^{rel} = 50%")

        xmid = (bins[0] + bins[-1]) * 0.5
        c.pads()[0].latex("Random guessing",   xmid, 2 * 0.9, align=23, textsize=11, angle=0, textcolor=ROOT.kGray + 2)
        c.pads()[1].latex("Maximal sculpting", xmid, 1 * 0.8, align=23, textsize=11, angle=0, textcolor=ROOT.kGray + 2)

        c.text([], qualifier=QUALIFIER, xmin=0.15, ymax=0.93)

        c.text(["#sqrt{s} = 13 TeV,  #it{W} jet tagging"] + \
                (['m #in  [60, 100] GeV'] if masscut else []),
                 ATLAS=False, ymax=0.76)

        c.pads()[1].text(["Multijets"], ATLAS=False)

        c.pads()[0].ylim(1, 500)
        c.pads()[1].ylim(0.2, 2E+05)

        c.pads()[0].logy()
        c.pads()[1].logy()

        pass  # Temporary style scope

    return c
def plot_individual (*argv):
    """
    Method for delegating plotting.
    """

    # Unpack arguments
    data, args, features, bins, effs, rejs, jsds, meanx, jsd_limits, masscut = argv

    # To fix 30.5 --> 30 for NPV
    bins['npv'][-1] = np.floor(bins['npv'][-1])

    # Loop combinations
    for var, metric in itertools.product(['pt', 'npv', None], ['rej', 'jsd']):

        with TemporaryStyle() as style:

            # Set styles
            scale      = 1.0
            scale_axis = 0.7
            margin_squeeze = 0.07
            margin_vert    = 0.15
            margin_hori    = 0.17
            size = (350, 300)

            style.SetTextSize(scale_axis * style.GetTextSize())
            for coord in ['x', 'y', 'z']:
                style.SetLabelSize(scale_axis * style.GetLabelSize(coord), coord)
                style.SetTitleSize(scale_axis * style.GetTitleSize(coord), coord)
                pass
            style.SetTitleOffset(1.8, 'y')
            style.SetLegendTextSize(style.GetLegendTextSize() * scale)
            style.SetTickLength(0.05, 'x')
            style.SetTickLength(0.05, 'y')

            # Canvas
            c = rp.canvas(size=size if var is not None else (150, 300), batch=not args.show)

            # Margins
            tpad = c.pad()._bare()
            tpad.SetBottomMargin(margin_vert    if var is not None else 0.49)
            tpad.SetLeftMargin  (margin_hori    if var is not None else 0.49)
            tpad.SetRightMargin (margin_squeeze if var is not None else 0.49)
            tpad.SetTopMargin   (margin_vert    if var is not None else 0.49)

            # Plots
            # -- References
            if var is not None:
                boxopts  = dict(fillcolor=ROOT.kBlack, alpha=0.05, linecolor=ROOT.kGray + 2, linewidth=1, option='HIST')
                c.hist([2 if metric == 'rej' else 1], bins=[bins[var] [0], bins[var] [-1]], **boxopts)

                for is_simple in [True, False]:
                    for ifeat, feat in filter(lambda t: is_simple == signal_low(t[1]), enumerate(features)):

                        opts = dict(
                            linecolor   = rp.colours[(ifeat // 2)],
                            markercolor = rp.colours[(ifeat // 2)],
                            fillcolor   = rp.colours[(ifeat // 2)],
                            linestyle   = 1 + (ifeat % 2),
                            alpha       = 0.3,
                            option      = 'E2',
                        )

                        mean_rej, std_rej = map(np.array, zip(*rejs[var][feat]))  # @TEMP
                        mean_jsd, std_jsd = map(np.array, zip(*jsds[var][feat]))

                        # Only _show_ mass-decorrelated features for `npv`
                        if (var == 'npv') and (ifeat % 2 == 0):
                            mean_rej *= -9999.
                            mean_jsd *= -9999.
                            pass

                        # Error boxes
                        x    = np.array(bins[var][:-1]) + 0.5 * np.diff(bins[var])
                        xerr = 0.5 * np.diff(bins[var])
                        graph_rej = ROOT.TGraphErrors(len(x), x, mean_rej, xerr, std_rej)
                        graph_jsd = ROOT.TGraphErrors(len(x), x, mean_jsd, xerr, std_jsd)

                        if metric == 'rej':
                            c.hist(graph_rej, **opts)
                        else:
                            c.hist(graph_jsd, **opts)
                            pass

                        # Markers and lines
                        opts['option']      = 'PE2L'
                        opts['markerstyle'] = 20 + 4 * (ifeat % 2)

                        graph_rej = ROOT.TGraph(len(x), meanx[var], mean_rej)
                        graph_jsd = ROOT.TGraph(len(x), meanx[var], mean_jsd)

                        if metric == 'rej':
                            c.hist(graph_rej, label=latex(feat, ROOT=True) if not is_simple else None, **opts)
                        else:
                            c.hist(graph_jsd, label=latex(feat, ROOT=True) if     is_simple else None, **opts)
                            pass
                        pass
                    pass   # end loop: `is_simple`

                # Meaningful limits on JSD
                if metric == 'jsd':
                    x, y, ey_stat, ey_syst  = map(np.array, zip(*jsd_limits[var]))
                    ex = np.zeros_like(x)
                    x[0]  = bins[var][0]
                    x[-1] = bins[var][-1]
                    format = lambda arr: arr.flatten('C').astype(float)
                    gr_stat = ROOT.TGraphErrors(len(x), *list(map(format, [x, y, ex, ey_stat])))
                    gr_comb = ROOT.TGraphErrors(len(x), *list(map(format, [x, y, ex, np.sqrt(np.square(ey_stat) + np.square(ey_syst))])))
                    smooth_tgrapherrors(gr_stat, ntimes=2)
                    smooth_tgrapherrors(gr_comb, ntimes=2)
                    c.graph(gr_comb,                                        fillcolor=ROOT.kBlack, alpha=0.03, option='3')
                    c.graph(gr_stat, linestyle=2, linecolor=ROOT.kGray + 1, fillcolor=ROOT.kBlack, alpha=0.03, option='L3')

                    x_, y_, ex_, ey_ = ROOT.Double(0), ROOT.Double(0), ROOT.Double(0), ROOT.Double(0)
                    idx = (gr_comb.GetN() - 1) if var == 'pt' else (gr_comb.GetN() // 2)
                    gr_comb.GetPoint(idx, x_,  y_)
                    ey_ = gr_comb.GetErrorY(idx)
                    x_, y_ = map(float, (x_, y_))
                    if var == 'pt':
                        c.latex("Mean stat. #oplus #varepsilon_{bkg}^{rel} var. limit     ", x_, y_ - 1.0 * ey_, align=31, textsize=11 * scale, angle=0, textcolor=ROOT.kGray + 2)
                        pass
                    pass

                # Decorations
                # -- offsets
                #c.pads()[2]._xaxis().SetTitleOffset(2.3)

                # -- x-axis label
                if   var == 'pt':
                    xlabel = "Large-#it{R} jet p_{T} [GeV]"
                elif var == 'npv':
                    xlabel = "Number of reconstructed vertices N_{PV}"
                elif var is not None:
                    raise NotImplementedError("Variable {} is not supported.".format(var))

                c.xlabel(xlabel)

                # -- y-axis label
                if   metric == 'rej':
                    ylabel = "1/#varepsilon_{bkg}^{rel} @ #varepsilon_{sig}^{rel} = 50%"
                elif metric == 'jsd':
                    ylabel = "1/JSD @ #varepsilon_{sig}^{rel} = 50%"
                else:
                    raise NotImplementedError("Metric {} is not supported.".format(metric))

                c.ylabel(ylabel)

                xmid = (bins[var][0] + bins[var][-1]) * 0.5
                if metric == 'rej':
                    c.latex("Random guessing",   xmid, 2 * 0.9, align=23, textsize=11 * scale, angle=0, textcolor=ROOT.kGray + 2)
                    c.ylim(1,   100)  # 500
                else:
                    c.latex("Maximal sculpting", xmid, 1 * 0.8, align=23, textsize=11 * scale, angle=0, textcolor=ROOT.kGray + 2)
                    c.ylim(0.2, 7E+04)  # 2E+05
                    pass

                c.logy()

                # Common decorations
                c.pad()._xaxis().SetNdivisions(504)

                c.text([], qualifier=QUALIFIER, xmin=margin_hori, ymax=1. - margin_vert + 0.03)

                c.text( ["#sqrt{s} = 13 TeV,  #it{W} jet tagging"] + \
                       (['m #in  [60, 100] GeV'] if masscut else []) + \
                       (['Multijets'] if metric == 'jsd' else []),
                       ATLAS=False, ymax=0.40 if (masscut and (var == 'pt') and (metric == 'rej')) else None)
                       #, ymax=1. - margin_vert - 0.10)

            else:

                # Draw dummy histogram
                for is_simple in [True, False]:
                    for ifeat, feat in filter(lambda t: is_simple == signal_low(t[1]), enumerate(features)):

                        opts = dict(
                            linecolor   = rp.colours[(ifeat // 2)],
                            markercolor = rp.colours[(ifeat // 2)],
                            fillcolor   = rp.colours[(ifeat // 2)],
                            linestyle   = 1 + (ifeat % 2),
                            alpha       = 0.3,
                            option      = 'E2',
                        )
                        opts['option']      = 'PE2L'
                        opts['markerstyle'] = 20 + 4 * (ifeat % 2)

                        label = latex(feat, ROOT=True) if is_simple == (metric == 'jsd') else None
                        h = c.hist([0.5], bins=[0,1], label=label, **opts)
                        pass
                    pass

                # "Remove" axes
                pad = c.pad()
                tpad = pad._bare()
                white = ROOT.kWhite
                pad._xaxis().SetLabelOffset(9999.)
                pad._xaxis().SetTitleOffset(9999.)
                pad._yaxis().SetLabelOffset(9999.)
                pad._yaxis().SetTitleOffset(9999.)
                pad._xaxis().SetAxisColor  (white)  # Remove "double ticks"
                pad._yaxis().SetAxisColor  (white)  # Remove "double ticks"
                tpad.SetFillColor          (white)
                tpad.SetFrameFillColor     (white)
                c._bare().SetFillColor     (white)
                c._bare().SetFrameFillColor(white)

                # Draw class-specific legend
                width = 0.90 #margin_hori - 0.03
                if var is None:
                    if metric == 'rej':
                        c.legend(header='MVA:',        width=width, xmin=0.05, ymax=1. - margin_vert + 0.02)  # xmin = margin_hori + 0.03
                    else:
                        c.legend(header='Analytical:', width=width, xmin=0.05, ymax=1. - margin_vert + 0.02)
                        pass
                    c.pad()._legends[-1].SetTextSize(style.GetLegendTextSize())
                    pass
                pass
            pass

            # Arrows
            '''
            c._bare().cd()
            opts_text = dict(textsize=11, textcolor=ROOT.kGray + 2)
            tlatex = ROOT.TLatex()
            tlatex.SetTextAngle(90)
            tlatex.SetTextAlign(22)
            tlatex.SetTextSize(11)
            tlatex.SetTextColor(ROOT.kGray + 2)
            tlatex.DrawLatexNDC(0.5, 0. + 0.5 * (margin_vert + 0.5 * (1.0 - margin_squeeze - margin_vert)), "    Less sculpting #rightarrow")
            tlatex.DrawLatexNDC(0.5, 1. - 0.5 * (margin_vert + 0.5 * (1.0 - margin_squeeze - margin_vert)), "     Greater separation #rightarrow")
            '''

            # Save
            c.save('figures/robustness__{}_{}{}.pdf'.format(var if var is not None else 'legend', metric if var is not None else ('mva' if metric == 'rej' else 'analytical'), '_masscut' if masscut else ''))

            pass  # Temporary style scope

        pass
    return
Exemplo n.º 11
0
def jetmasscomparison(data, args, features, eff_sig=25):
    """
    Perform study of jet mass distributions before and after subtructure cut for
    different substructure taggers.

    Saves plot `figures/jetmasscomparison__eff_sig_[eff_sig].pdf`

    Arguments:
        data: Pandas data frame from which to read data.
        args: Namespace holding command-line arguments.
        features: Features for which to plot signal- and background distributions.
        eff_sig: Signal efficiency at which to impose cut.
    """

    # Define masks and direction-dependent cut value
    msk_sig = data['sigType'] == 1
    cuts, msks_pass = dict(), dict()
    lead_features = []

    print "Features: ", features

    for feat in features:
        eff_cut = eff_sig if signal_low(feat) else 100 - eff_sig

        if (not 'lead' in feat) and (not 'sub' in feat):
            print "hej"

            cut = wpercentile(data.loc[msk_sig, feat].values,
                              eff_cut,
                              weights=data.loc[msk_sig, 'weight'].values)
            msk = (data[feat] > cut)

            fpr, tpr, thresholds = roc_curve(data['signal'],
                                             data[feat],
                                             sample_weight=data['weight'])
            idx = np.argmin(np.abs(tpr - eff_sig / 100.))

            print "Pass criteria:", feat, " > ", cut
            print "Background acceptance @ {:.2f}% sig. eff.: {:.5f}% ({} > {:.2f})".format(
                eff_sig, (fpr[idx]) * 100., feat, thresholds[idx])

            msks_pass[feat] = msk
            lead_features.append(feat)

        else:

            if 'lead' in feat:
                cut1 = wpercentile(data.loc[msk_sig, feat].values,
                                   eff_cut,
                                   weights=data.loc[msk_sig, 'weight'].values)
                msk1 = (data[feat] > cut1)

                fpr, tpr, thresholds = roc_curve(data['signal'],
                                                 data[feat],
                                                 sample_weight=data['weight'])
                idx = np.argmin(np.abs(tpr - eff_sig / 100.))

                print "H Pass criteria:", feat, " > ", cut1
                print "H Background acceptance @ {:.2f}% sig. eff.: {:.6f}% ({} > {:.2f})".format(
                    eff_sig, (fpr[idx]) * 100., feat, thresholds[idx])

                lead_features.append(feat)

                subfeat = feat.replace("lead", "sub")
                data1 = data[msk1]
                cut2 = wpercentile(data1.loc[msk_sig, subfeat].values,
                                   eff_cut,
                                   weights=data1.loc[msk_sig, 'weight'].values)
                fpr, tpr, thresholds = roc_curve(data1['signal'],
                                                 data1[subfeat],
                                                 sample_weight=data1['weight'])

                idx = np.argmin(np.abs(tpr - eff_sig / 100.))
                idy = np.argmin(np.abs(thresholds - cut1))

                print "H Pass criteria:", subfeat, " > ", cut2, idy, len(
                    thresholds)
                print "H Background acceptance @ {:.5f}% sig. eff.: {:.5f}% ({} > {:.5f})".format(
                    (tpr[idy]) * 100, (fpr[idy]) * 100., subfeat,
                    thresholds[idy])

                #msks_pass[feat]=(data[feat]>cut1) | (data[subfeat]>cut1)
                msks_pass[feat] = (data[feat] > cut1) & (data[subfeat] > cut1)

        # Ensure correct cut direction
        if signal_low(feat):
            msks_pass[feat] = ~msks_pass[feat]
            pass
        pass

    # Perform plotting
    #c = plot(data, args, features, msks_pass, eff_sig)

    # Perform plotting on individual figures
    c = plot_individual(data, args, lead_features, msks_pass, eff_sig)

    # Output
    path = 'figures/jetmasscomparison__eff_sig_{:d}_{}.pdf'.format(
        int(eff_sig), MODEL)
    path = 'figures/jetmasscomparison__eff_sig_{:d}_{}.eps'.format(
        int(eff_sig), MODEL)

    return c, args, path
Exemplo n.º 12
0
def plot(*argv):
    """
    Method for delegating plotting.
    """

    # Unpack arguments
    data, args, features, scan_features, points, jsd_limits, masscut, pt_range = argv

    with TemporaryStyle() as style:

        # Compute yaxis range
        ranges = int(pt_range is not None) + int(masscut)
        mult = 10. if ranges == 2 else (5. if ranges == 1 else 1.)

        # Define variable(s)
        axisrangex = (1.4, 100.)
        axisrangey = (0.3, 100000. * mult)
        aminx, amaxx = axisrangex
        aminy, amaxy = axisrangey

        # Styling
        scale = 0.95
        style.SetTitleOffset(1.8, 'x')
        style.SetTitleOffset(1.6, 'y')
        style.SetTextSize(style.GetTextSize() * scale)
        style.SetLegendTextSize(style.GetLegendTextSize() * scale)

        # Canvas
        c = rp.canvas(batch=not args.show, size=(600, 600))

        # Reference lines
        nullopts = dict(linecolor=0,
                        linewidth=0,
                        linestyle=0,
                        markerstyle=0,
                        markersize=0,
                        fillstyle=0)
        lineopts = dict(linecolor=ROOT.kGray + 2, linewidth=1, option='L')
        boxopts = dict(fillcolor=ROOT.kBlack,
                       alpha=0.05,
                       linewidth=0,
                       option='HIST')
        c.hist([aminy], bins=list(axisrangex), **nullopts)
        c.plot([1, amaxy], bins=[2, 2], **lineopts)
        c.plot([1, 1], bins=[2, amaxx], **lineopts)
        c.hist([amaxy], bins=[aminx, 2], **boxopts)
        c.hist([1], bins=[2, amaxx], **boxopts)

        # Meaningful limits on 1/JSD
        x, y, ey = map(np.array, zip(*jsd_limits))
        ex = np.zeros_like(ey)
        gr = ROOT.TGraphErrors(len(x), x, y, ex, ey)
        smooth_tgrapherrors(gr, ntimes=3)
        c.graph(gr,
                linestyle=2,
                linecolor=ROOT.kGray + 1,
                fillcolor=ROOT.kBlack,
                alpha=0.03,
                option='L3')

        x_, y_, ex_, ey_ = ROOT.Double(0), ROOT.Double(0), ROOT.Double(
            0), ROOT.Double(0)
        idx = 3
        gr.GetPoint(idx, x_, y_)
        ey_ = gr.GetErrorY(idx)
        x_, y_ = map(float, (x_, y_))
        c.latex("Statistical limit",
                x_,
                y_ + ey_,
                align=21,
                textsize=11,
                angle=-5,
                textcolor=ROOT.kGray + 2)

        # Markers
        for is_simple in [True, False]:

            # Split the legend into simple- and MVA taggers
            for ifeat, feat in filter(lambda t: is_simple == signal_low(t[1]),
                                      enumerate(features)):

                # Coordinates, label
                idx = map(lambda t: t[2], points).index(feat)
                x, y, label = points[idx]

                # Overwrite default name of parameter-scan classifier
                label = 'ANN' if label.startswith('ANN') else label
                label = 'uBoost' if label.startswith('uBoost') else label

                # Style
                colour = rp.colours[(ifeat // 2) % len(rp.colours)]
                markerstyle = 20 + (ifeat % 2) * 4

                # Draw
                c.graph([y],
                        bins=[x],
                        markercolor=colour,
                        markerstyle=markerstyle,
                        label='#scale[%.1f]{%s}' %
                        (scale, latex(label, ROOT=True)),
                        option='P')
                pass

            # Draw class-specific legend
            width = 0.15
            c.legend(header=("Analytical:" if is_simple else "MVA:"),
                     width=width,
                     xmin=0.60 + (width + 0.02) * (is_simple),
                     ymax=0.888)  #, ymax=0.827)
            pass

        # Make legends transparent
        for leg in c.pads()[0]._legends:
            leg.SetFillStyle(0)
            pass

        # Markers, parametrised decorrelation
        for base_feat, group in scan_features.iteritems():

            # Get index in list of features
            ifeat = features.index(base_feat)

            # Style
            colour = rp.colours[(ifeat // 2) % len(rp.colours)]
            markerstyle = 24

            for feat, label in group:
                idx = map(lambda t: t[2], points).index(feat)
                x, y, _ = points[idx]

                # Draw
                c.graph([y],
                        bins=[x],
                        markercolor=colour,
                        markerstyle=markerstyle,
                        option='P')
                if base_feat == 'NN':
                    c.latex("   " + label,
                            x,
                            y,
                            textsize=11,
                            align=12,
                            textcolor=ROOT.kGray + 2)
                else:
                    c.latex(label + "   ",
                            x,
                            y,
                            textsize=11,
                            align=32,
                            textcolor=ROOT.kGray + 2)
                    pass
                pass

            # Connecting lines (scan)
            feats = [base_feat] + map(lambda t: t[0], group)
            for feat1, feat2 in zip(feats[:-1], feats[1:]):
                idx1 = map(lambda t: t[2], points).index(feat1)
                idx2 = map(lambda t: t[2], points).index(feat2)

                x1, y1, _ = points[idx1]
                x2, y2, _ = points[idx2]

                c.graph([y1, y2],
                        bins=[x1, x2],
                        linecolor=colour,
                        linestyle=2,
                        option='L')
                pass
            pass

        # Connecting lines (simple)

        print "points: "
        print points
        points.pop(1)
        print points

        for i in range(2):
            x1, y1, _ = points[2 * i + 0]
            x2, y2, _ = points[2 * i + 1]
            colour = rp.colours[i]
            c.graph([y1, y2],
                    bins=[x1, x2],
                    linecolor=colour,
                    linestyle=2,
                    option='L')
            pass

        # Decorations
        c.xlabel(
            "Background rejection, 1 / #varepsilon_{bkg}^{rel} @ #varepsilon_{sig}^{rel} = 50%"
        )
        c.ylabel("Mass-decorrelation, 1 / JSD @ #varepsilon_{sig}^{rel} = 50%")
        c.xlim(*axisrangex)
        c.ylim(*axisrangey)
        c.logx()
        c.logy()

        opts_text = dict(textsize=11, textcolor=ROOT.kGray + 2)
        midpointx = np.power(10, 0.5 * np.log10(amaxx))
        midpointy = np.power(10, 0.5 * np.log10(amaxy))
        c.latex("No separation",
                1.91,
                midpointy,
                angle=90,
                align=21,
                **opts_text)
        c.latex("Maximal sculpting",
                midpointx,
                0.89,
                angle=0,
                align=23,
                **opts_text)
        c.latex("    Less sculpting #rightarrow",
                2.1,
                midpointy,
                angle=90,
                align=23,
                **opts_text)
        c.latex("     Greater separation #rightarrow",
                midpointx,
                1.1,
                angle=0,
                align=21,
                **opts_text)

        #c.text(TEXT + ["#it{W} jet tagging"], xmin=0.24, qualifier=QUALIFIER)
        c.text([], xmin=0.15, ymax=0.96, qualifier=QUALIFIER)
        c.text(TEXT + \
               ["#it{W} jet tagging"] + (
                    ["p_{{T}} #in  [{:.0f}, {:.0f}] GeV".format(pt_range[0], pt_range[1])] if pt_range is not None else []
                ) + (
                    ['Cut: m #in  [60, 100] GeV'] if masscut else []
                ),
               xmin=0.26, ATLAS=None)
        pass

    return c
def plot(*argv):
    """
    Method for delegating plotting.
    """

    # Unpack arguments
    args, data, effs, jsd, jsd_limits, features, pt_range = argv

    with TemporaryStyle() as style:

        # Style
        style.SetTitleOffset(1.5, 'x')
        style.SetTitleOffset(2.0, 'y')

        # Canvas
        c = rp.canvas(batch=not args.show)

        # Plots
        ref = ROOT.TH1F('ref', "", 10, 0., 1.)
        for i in range(ref.GetXaxis().GetNbins()):
            ref.SetBinContent(i + 1, 1)
            pass
        c.hist(ref, linecolor=ROOT.kGray + 2, linewidth=1)

        width = 0.15
        for is_simple in [True, False]:
            for ifeat, feat in enumerate(features):
                if is_simple != signal_low(feat): continue
                colour = rp.colours[(ifeat // 2) % len(rp.colours)]
                linestyle = 1 + (ifeat % 2)
                markerstyle = 20 + (ifeat % 2) * 4
                c.plot(jsd[feat][1:],
                       bins=np.array(effs[1:]) / 100.,
                       linecolor=colour,
                       markercolor=colour,
                       linestyle=linestyle,
                       markerstyle=markerstyle,
                       label=latex(feat, ROOT=True),
                       option='PL')
                pass

            c.legend(header=("Analytical:" if is_simple else "MVA:"),
                     width=width * (1 + 0.8 * int(is_simple)),
                     xmin=0.42 + (width + 0.05) * (is_simple),
                     ymax=0.888)
            pass

        # Meaningful limits on JSD
        x, y, ey = map(np.array, zip(*jsd_limits))

        ex = np.zeros_like(ey)
        gr = ROOT.TGraphErrors(len(x), x, y, ex, ey)
        smooth_tgrapherrors(gr, ntimes=2)
        c.graph(gr,
                linestyle=2,
                linecolor=ROOT.kGray + 1,
                fillcolor=ROOT.kBlack,
                alpha=0.03,
                option='L3')

        # Redraw axes
        c.pads()[0]._primitives[0].Draw('AXIS SAME')

        # Decorations
        c.xlabel("Background efficiency #varepsilon_{bkg}^{rel}")
        c.ylabel("Mass correlation, JSD")
        c.text([], xmin=0.15, ymax=0.96, qualifier=QUALIFIER)
        c.text(["#sqrt{s} = 13 TeV",  "Dijets"] + \
              (["p_{T} [GeV] #in", "    [{:.0f}, {:.0f}]".format(*pt_range)] if pt_range else []),
               ymax=0.85, ATLAS=None)

        c.latex("Maximal sculpting",
                0.065,
                1.2,
                align=11,
                textsize=11,
                textcolor=ROOT.kGray + 2)
        c.xlim(0, 1)
        c.ymin(1E-05)
        c.padding(0.45)
        c.logy()

        for leg in c.pad()._legends:
            leg.SetMargin(0.5)
            pass

        x_, y_, ex_, ey_ = ROOT.Double(0), ROOT.Double(0), ROOT.Double(
            0), ROOT.Double(0)
        idx = gr.GetN() - 7
        gr.GetPoint(idx, x_, y_)
        ey_ = gr.GetErrorY(idx)
        x_, y_ = map(float, (x_, y_))
        c.latex("Statistical limit",
                x_,
                y_ - ey_ / 2.,
                align=23,
                textsize=11,
                angle=12,
                textcolor=ROOT.kGray + 2)
        pass

    return c
Exemplo n.º 14
0
def plot(*argv):
    """
    Method for delegating plotting.
    """

    # Unpack arguments
    args, data, features, ROCs, AUCs, masscut, pt_range, appearances = argv

    # Canvas
    c = rp.canvas(batch=not args.show)

    # Plots
    # -- Random guessing
    bins = np.linspace(0.2, 1., 100 + 1,
                       endpoint=True)  # original representation
    #bins = np.linspace(0., 1., 100 + 1, endpoint=True) #comparison with JME-18-002
    bins = np.array([bins[0], bins[0] + 0.01 * np.diff(bins[:2])[0]] +
                    list(bins[1:]))
    #bins = np.array([0.2] + list(bins[1:]))
    #edges = bins[1:-1]
    edges = bins
    centres = edges[:-1] + 0.5 * np.diff(edges)
    c.hist(np.power(centres, -1.),
           bins=edges,
           linecolor=ROOT.kGray + 2,
           fillcolor=ROOT.kBlack,
           alpha=0.05,
           linewidth=1,
           option='HISTC')
    linestyles = [1, 3, 5, 7]

    # -- ROCs
    if len(appearances) != 2:
        for is_simple in [True, False]:

            # Split the legend into simple- and MVA taggers
            indices = np.array([0] + appearances).cumsum()
            for i in range(len(indices) - 1):
                for ifeat, feat in filter(
                        lambda t: is_simple == signal_low(t[1]),
                        enumerate(features[indices[i]:indices[i + 1]])):
                    eff_sig, eff_bkg = ROCs[feat]
                    c.graph(np.power(eff_bkg, -1.),
                            bins=eff_sig,
                            linestyle=linestyles[ifeat],
                            linecolor=rp.colours[i % len(rp.colours)],
                            linewidth=2,
                            label=latex(feat, ROOT=True),
                            option='L')  # original representation
                    #c.graph(eff_bkg, bins=eff_sig, linestyle=1 + ifeat, linecolor=rp.colours[i % len(rp.colours)], linewidth=2, label=latex(feat, ROOT=True), option='L')  #comparison with JME-18-002
                    pass

            # Draw class-specific legend
            width = 0.17  #moved from 0.17 to 0.25 and back to 0.17
            c.legend(
                header=("Analytical:" if is_simple else "MVA:"),
                width=width,
                xmin=0.45 + (width + 0.06) * (is_simple),
                ymax=0.888
            )  # xmin moved from 0.58 to 0.45, inserted width translation of 0.06

    else:
        for first_var in [True, False]:

            indices = np.array([0] + appearances).cumsum()
            for i in [0, 1]:
                if i == 0 and not first_var: continue
                if i == 1 and first_var: continue
                for ifeat, feat in enumerate(features[indices[i]:indices[i +
                                                                         1]]):
                    eff_sig, eff_bkg = ROCs[feat]
                    c.graph(np.power(eff_bkg, -1.),
                            bins=eff_sig,
                            linestyle=linestyles[ifeat],
                            linecolor=rp.colours[i % len(rp.colours)],
                            linewidth=2,
                            label=latex(feat, ROOT=True),
                            option='L')  # original representation
                    #c.graph(eff_bkg, bins=eff_sig, linestyle=1 + ifeat, linecolor=rp.colours[i % len(rp.colours)], linewidth=2, label=latex(feat, ROOT=True), option='L')  #comparison with JME-18-002
                    pass

    # Draw class-specific legend
            width = 0.15  #moved from 0.17 to 0.25 and back to 0.15
            c.legend(
                header=(latex(features[0], ROOT=True) + "-based:" if first_var
                        else latex(features[appearances[1]], ROOT=True) +
                        "-based:"),
                width=width,
                xmin=0.55 + (width + 0.06) * (first_var),
                ymax=0.888
            )  # xmin moved from 0.58 to 0.45, inserted width translation of 0.06

    # Decorations
    c.xlabel("Signal efficiency #varepsilon_{sig}^{rel}")
    c.ylabel("Background rejection 1/#varepsilon_{bkg}^{rel}")
    c.text([], xmin=0.15, ymax=0.96, qualifier=QUALIFIER, ATLAS=False)
    c.text(
        ["#sqrt{s} = 13 TeV", "#it{W} jet tagging"] + ([
            "p_{{T}} #in  [{:.0f}, {:.0f}] GeV".format(pt_range[0],
                                                       pt_range[1])
        ] if pt_range is not None else []) + (
            #["Cut: m #in  [60, 100] GeV"] if masscut else []
            [] if masscut == False else [
                "Cut: m #in  [{:.0f}, {:.0f}] GeV".format(
                    masscut[0], masscut[1])
            ]),
        ATLAS=False)

    if masscut != False: masscut = True
    ranges = int(pt_range is not None) + int(masscut)
    mult = 10. if ranges == 2 else (2. if ranges == 1 else 1.)

    c.latex("Random guessing",
            0.4,
            1. / 0.4 * 0.9,
            align=23,
            angle=-12 + 2 * ranges,
            textsize=13,
            textcolor=ROOT.kGray + 2)
    c.xlim(0.2, 1.)
    #c.ylim(1E+00, 5E+02 * mult) # original representation
    c.ylim(1E+00, 2E+02 * mult)
    #c.xlim(0., 1.)  #comparison with JME-18-002
    #c.ylim(1E-04, 1.)
    c.logy()
    c.legend()

    return c
def plot_full (*argv):
    """
    Method for delegating plotting.
    """

    # Unpack arguments
    data, args, features, bins, effs, rejs, jsds, meanx, jsd_limits, masscut = argv

    with TemporaryStyle() as style:

        # Set styles
        scale      = 1.0
        scale_axis = 0.7
        margin_squeeze = 0.035
        margin_vert    = 0.20
        margin_hori    = 0.35
        size = (800, 600)

        style.SetTextSize(scale_axis * style.GetTextSize())
        for coord in ['x', 'y', 'z']:
            style.SetLabelSize(scale_axis * style.GetLabelSize(coord), coord)
            style.SetTitleSize(scale_axis * style.GetTitleSize(coord), coord)
            pass
        style.SetLegendTextSize(style.GetLegendTextSize() * scale)
        style.SetTickLength(0.05,                                                               'x')
        style.SetTickLength(0.07 * (float(size[0])/float(size[1])) * (margin_hori/margin_vert), 'y')

        # Canvas
        c = rp.canvas(num_pads=(2,2), size=size, batch=not args.show)

        # Margins
        c.pads()[0]._bare().SetTopMargin   (margin_vert)
        c.pads()[1]._bare().SetTopMargin   (margin_vert)
        c.pads()[2]._bare().SetBottomMargin(margin_vert)
        c.pads()[3]._bare().SetBottomMargin(margin_vert)

        c.pads()[0]._bare().SetLeftMargin  (margin_hori)
        c.pads()[2]._bare().SetLeftMargin  (margin_hori)
        c.pads()[1]._bare().SetRightMargin (margin_hori)
        c.pads()[3]._bare().SetRightMargin (margin_hori)

        c.pads()[1]._bare().SetLeftMargin  (margin_squeeze)
        c.pads()[3]._bare().SetLeftMargin  (margin_squeeze)
        c.pads()[0]._bare().SetRightMargin (margin_squeeze)
        c.pads()[2]._bare().SetRightMargin (margin_squeeze)

        c.pads()[0]._bare().SetBottomMargin(margin_squeeze)
        c.pads()[1]._bare().SetBottomMargin(margin_squeeze)
        c.pads()[2]._bare().SetTopMargin   (margin_squeeze)
        c.pads()[3]._bare().SetTopMargin   (margin_squeeze)

        # To fix 30.5 --> 30 for NPV
        bins['npv'][-1] = np.floor(bins['npv'][-1])

        # Plots
        # -- References
        boxopts  = dict(fillcolor=ROOT.kBlack, alpha=0.05, linecolor=ROOT.kGray + 2, linewidth=1, option='HIST')
        c.pads()[0].hist([2], bins=[bins['pt'] [0], bins['pt'] [-1]], **boxopts)
        c.pads()[1].hist([2], bins=[bins['npv'][0], bins['npv'][-1]], **boxopts)
        c.pads()[2].hist([1], bins=[bins['pt'] [0], bins['pt'] [-1]], **boxopts)
        c.pads()[3].hist([1], bins=[bins['npv'][0], bins['npv'][-1]], **boxopts)

        nb_col = 2
        for col, var in enumerate(['pt', 'npv']):
            for is_simple in [True, False]:
                for ifeat, feat in filter(lambda t: is_simple == signal_low(t[1]), enumerate(features)):

                    opts = dict(
                        linecolor   = rp.colours[(ifeat // 2)],
                        markercolor = rp.colours[(ifeat // 2)],
                        fillcolor   = rp.colours[(ifeat // 2)],
                        linestyle   = 1 + (ifeat % 2),
                        alpha       = 0.3,
                        option      = 'E2',
                    )

                    mean_rej, std_rej = map(np.array, zip(*rejs[var][feat]))  # @TEMP
                    mean_jsd, std_jsd = map(np.array, zip(*jsds[var][feat]))

                    # Only _show_ mass-decorrelated features for `npv`
                    if (col == 1) and (ifeat % 2 == 0):
                        mean_rej *= -9999.
                        mean_jsd *= -9999.
                        pass

                    # Error boxes
                    x    = np.array(bins[var][:-1]) + 0.5 * np.diff(bins[var])
                    xerr = 0.5 * np.diff(bins[var])
                    graph_rej = ROOT.TGraphErrors(len(x), x, mean_rej, xerr, std_rej)
                    graph_jsd = ROOT.TGraphErrors(len(x), x, mean_jsd, xerr, std_jsd)

                    c.pads()[col + 0 * nb_col].hist(graph_rej, **opts)
                    c.pads()[col + 1 * nb_col].hist(graph_jsd, **opts)

                    # Markers and lines
                    opts['option']      = 'PE2L'
                    opts['markerstyle'] = 20 + 4 * (ifeat % 2)

                    graph_rej = ROOT.TGraph(len(x), meanx[var], mean_rej)
                    graph_jsd = ROOT.TGraph(len(x), meanx[var], mean_jsd)

                    c.pads()[col + 0 * nb_col].hist(graph_rej, label=latex(feat, ROOT=True) if not is_simple else None, **opts)
                    c.pads()[col + 1 * nb_col].hist(graph_jsd, label=latex(feat, ROOT=True) if     is_simple else None, **opts)
                    pass
                pass

            # Meaningful limits on JSD
            x, y, ey_stat, ey_syst  = map(np.array, zip(*jsd_limits[var]))
            ex = np.zeros_like(x)
            x[0]  = bins[var][0]
            x[-1] = bins[var][-1]
            format = lambda arr: arr.flatten('C').astype(float)
            gr_stat = ROOT.TGraphErrors(len(x), *list(map(format, [x, y, ex, ey_stat])))
            gr_comb = ROOT.TGraphErrors(len(x), *list(map(format, [x, y, ex, np.sqrt(np.square(ey_stat) + np.square(ey_syst))])))
            smooth_tgrapherrors(gr_stat, ntimes=2)
            smooth_tgrapherrors(gr_comb, ntimes=2)
            c.pads()[col + 1 * nb_col].graph(gr_comb,                                        fillcolor=ROOT.kBlack, alpha=0.03, option='3')
            c.pads()[col + 1 * nb_col].graph(gr_stat, linestyle=2, linecolor=ROOT.kGray + 1, fillcolor=ROOT.kBlack, alpha=0.03, option='L3')

            if col == 0:
                x_, y_, ex_, ey_ = ROOT.Double(0), ROOT.Double(0), ROOT.Double(0), ROOT.Double(0)
                idx = gr_comb.GetN() - 1
                gr_comb.GetPoint(idx, x_,  y_)
                ey_ = gr_comb.GetErrorY(idx)
                x_, y_ = map(float, (x_, y_))
                c.pads()[col + 1 * nb_col].latex("Mean stat. #oplus #varepsilon_{bkg}^{rel} var. limit     ", x_, y_ + 0.75 * ey_, align=31, textsize=11 * scale, angle=0, textcolor=ROOT.kGray + 2)
                pass

            # Decorations
            # -- offsets
            c.pads()[0]._xaxis().SetLabelOffset(9999.)
            c.pads()[0]._xaxis().SetTitleOffset(9999.)
            c.pads()[1]._xaxis().SetLabelOffset(9999.)
            c.pads()[1]._xaxis().SetTitleOffset(9999.)

            c.pads()[2]._xaxis().SetTitleOffset(2.3)
            c.pads()[3]._xaxis().SetTitleOffset(2.3)

            c.pads()[1]._yaxis().SetLabelOffset(9999.)
            c.pads()[1]._yaxis().SetTitleOffset(9999.)
            c.pads()[3]._yaxis().SetLabelOffset(9999.)
            c.pads()[3]._yaxis().SetTitleOffset(9999.)

            # -- x-axis label
            if   var == 'pt':
                xlabel = "Large-#it{R} jet p_{T} [GeV]"
            elif var == 'npv':
                xlabel = "Number of reconstructed vertices N_{PV}"
            else:
                raise NotImplementedError("Variable {} is not supported.".format(var))

            c.pads()[col + 1 * nb_col].xlabel(xlabel)
            if col == 0:
                pattern = "#splitline{#splitline{#splitline{%s}{}}{#splitline{}{}}}{#splitline{#splitline{}{}}{#splitline{}{}}}"
                c.pads()[col + 0 * nb_col].ylabel(pattern % "1/#varepsilon_{bkg}^{rel} @ #varepsilon_{sig}^{rel} = 50%")
                c.pads()[col + 1 * nb_col].ylabel(pattern % "1/JSD @ #varepsilon_{sig}^{rel} = 50%")
                pass

            xmid = (bins[var][0] + bins[var][-1]) * 0.5
            c.pads()[col + 0 * nb_col].latex("Random guessing",   xmid, 2 * 0.9, align=23, textsize=11 * scale, angle=0, textcolor=ROOT.kGray + 2)
            c.pads()[col + 1 * nb_col].latex("Maximal sculpting", xmid, 1 * 0.8, align=23, textsize=11 * scale, angle=0, textcolor=ROOT.kGray + 2)

            c.pads()[col + 0 * nb_col].ylim(1,   70)  # 500
            c.pads()[col + 1 * nb_col].ylim(0.2, 7E+04)  # 2E+05

            c.pads()[col + 0 * nb_col].logy()
            c.pads()[col + 1 * nb_col].logy()

            pass  # end: loop `col`

        # Draw class-specific legend
        width = margin_hori - 0.03
        c.pads()[col + 0 * nb_col].legend(header='MVA:',        width=width, xmin=1. - margin_hori + 0.03, ymax=1. - margin_vert    + 0.02)
        c.pads()[col + 1 * nb_col].legend(header='Analytical:', width=width, xmin=1. - margin_hori + 0.03, ymax=1. - margin_squeeze + 0.02)
        c.pads()[col + 0 * nb_col]._legends[-1].SetTextSize(style.GetLegendTextSize())
        c.pads()[col + 1 * nb_col]._legends[-1].SetTextSize(style.GetLegendTextSize())

        # Common decorations
        for pad in c.pads():
            pad._xaxis().SetNdivisions(504)
            pass

        c.text([], qualifier=QUALIFIER, xmin=margin_hori, ymax=1. - margin_vert + 0.03)

        c.pads()[1].text(["#sqrt{s} = 13 TeV,  #it{W} jet tagging"] + \
                        (['m #in  [60, 100] GeV'] if masscut else []),
                        ATLAS=False, ymax=1. - margin_vert - 0.10)

        c.pads()[3].text(["Multijets"],
                         ATLAS=False, ymax=1. - margin_squeeze - 0.10)

        # Arrows
        c._bare().cd()
        opts_text = dict(textsize=11, textcolor=ROOT.kGray + 2)
        tlatex = ROOT.TLatex()
        tlatex.SetTextAngle(90)
        tlatex.SetTextAlign(22)
        tlatex.SetTextSize(11)
        tlatex.SetTextColor(ROOT.kGray + 2)
        tlatex.DrawLatexNDC(0.5, 0. + 0.5 * (margin_vert + 0.5 * (1.0 - margin_squeeze - margin_vert)), "    Less sculpting #rightarrow")
        tlatex.DrawLatexNDC(0.5, 1. - 0.5 * (margin_vert + 0.5 * (1.0 - margin_squeeze - margin_vert)), "     Greater separation #rightarrow")

        pass  # Temporary style scope

    return c
Exemplo n.º 16
0
def roc(data_,
        args,
        features,
        masscut=False,
        pt_range=(200, 2000),
        title=None):
    """
    Perform study of ...

    Saves plot `figures/roc.pdf`

    Arguments:
        data: Pandas data frame from which to read data.
        args: Namespace holding command-line arguments.
        features: Features for ...
        masscut: ...
    """

    # Select pT-range
    if pt_range is not None:
        if pt_range[0] > pt_range[1]:
            data = data_.loc[((data_['pt'] > 200) &
                              (data_['pt'] < pt_range[1])) |
                             ((data_['pt'] < 2000) &
                              (data_['pt'] > pt_range[0]))]
        else:
            data = data_.loc[(data_['pt'] > pt_range[0])
                             & (data_['pt'] < pt_range[1])]
    else:
        data = data_
        pass

    #test_var = 'tau21DDT'
    #test_var_title = "tau21_DDT_pT{}to{}.root".format(pt_range[0],pt_range[1])
    #test_var = 'tau21kNN'
    #test_var_title = "tau21_kNN_pT{}to{}.root".format(pt_range[0],pt_range[1])

    #print "number of signal jets:", len(data[data['signal'] == 1])
    #print "number of background jets:", len(data[data['signal'] == 0])

    #manual_weights = data['weight_test']
    #manual_weights[manual_weights != 1.] = 0.001
    #manual_weights[manual_weights == 1.] = 0.01

    #signal_data = data[data['signal']==1]
    #bg_data = data[data['signal']==0]
    #signal_weights = manual_weights[data['signal']==1]
    #bg_weights = manual_weights[data['signal']==0]

    ## make histograms to create own ROC curves in separate script for double-check
    #Make_Binned_ROC_histograms("full_signal", signal_data['tau21DDT'], signal_data['tau21kNN'], signal_data['pt'], [200,2000], sample_weights=signal_weights)
    #Make_Binned_ROC_histograms("full_bg", bg_data['tau21DDT'], bg_data['tau21kNN'], bg_data['pt'], [200,2000], sample_weights=bg_weights)

    ## draw distributions for double-check
    #f1 = ROOT.TFile(test_var_title, "RECREATE")
    #signal_dist = ROOT.TH1D("signal_"+test_var_title, "signal_"+test_var_title, 60, 0.,1.)
    #bg_dist = ROOT.TH1D("bg_"+test_var_title, "bg_"+test_var_title, 60, 0.,1.)
    #signal_data = data[data['signal']==1]
    #bg_data = data[data['signal']==0]

    #root_numpy.fill_hist(signal_dist, signal_data[test_var], weights=signal_data["weight_test"])
    #root_numpy.fill_hist(bg_dist, bg_data[test_var], weights=bg_data["weight_test"])
    #canv = ROOT.TCanvas(test_var_title, test_var_title, 600, 600)
    #signal_dist.SetLineColor(4)
    #bg_dist.SetLineColor(2)
    #leg = ROOT.TLegend(0.5,0.8,0.9,0.9)
    #leg.AddEntry(signal_dist, "signal")
    #leg.AddEntry(bg_dist, "bg")
    #if test_var == 'tau21DDT':
    #    signal_dist.GetXaxis().SetTitle("#tau_{21}^{DDT}")
    #elif test_var == 'tau21kNN':
    #    signal_dist.GetXaxis().SetTitle("#tau_{21}^{kNN}")
    #signal_dist.Draw()
    #bg_dist.Draw("SAME")
    #canv.Write()
    #f1.Close()

    # (Opt.) masscut | @NOTE: Duplication with adversarial/utils/metrics.py
    msk = (data['m'] > 60.) & (data['m'] < 100.) if masscut else np.ones_like(
        data['signal']).astype(bool)

    # Computing ROC curves
    ROCs = dict()
    for feat in features:

        sign = -1. if signal_low(feat) else 1

        eff_bkg, eff_sig, thresholds = roc_curve(
            data.loc[msk, 'signal'].values,
            data.loc[msk, feat].values * sign,
            sample_weight=data.loc[msk, 'weight_test'].values)

        if masscut:
            eff_sig_mass = np.mean(msk[data['signal'] == 1])
            eff_bkg_mass = np.mean(msk[data['signal'] == 0])

            eff_sig *= eff_sig_mass
            eff_bkg *= eff_bkg_mass
            pass

        # Filter, to advoid background rejection blowing up
        indices = np.where((eff_bkg > 0) & (eff_sig > 0))
        eff_sig = eff_sig[indices]
        eff_bkg = eff_bkg[indices]

        # Subsample to 1% steps
        targets = np.linspace(0, 1, 100 + 1, endpoint=True)
        indices = np.array([np.argmin(np.abs(eff_sig - t)) for t in targets])
        eff_sig = eff_sig[indices]
        eff_bkg = eff_bkg[indices]

        # Store
        ROCs[feat] = (eff_sig, eff_bkg)
        pass

    # Computing ROC AUCs
    AUCs = dict()
    for feat in features:
        sign = -1. if signal_low(feat) else 1.
        AUCs[feat] = roc_auc_score(data['signal'].values,
                                   data[feat].values * sign,
                                   sample_weight=data['weight_test'].values)
        pass

    # Report scores
    print "\n== pT range: {:s}".format(
        'inclusive' if pt_range is None else "[{:.0f}, {:.0f}] Gev".format(
            *pt_range))
    print "\n== {} masscut".format("With" if masscut else "Without")
    for feat in features:
        effsig = ROCs[feat][0]
        idx = np.argmin(np.abs(effsig - 0.5))
        print "\nFeature {}:".format(feat)
        print "  Background rejection at effsig = {:.0f}%: {:6.3f}".format(
            ROCs[feat][0][idx] * 100., 1. / ROCs[feat][1][idx])
        print "  AUC: {:5.4f}".format(AUCs[feat])
        pass

    # Perform plotting
    c = plot(args, data, features, ROCs, AUCs, masscut, pt_range)

    # Output
    if title is None:
        path = 'figures/roc{}{:s}.pdf'.format(
            '__pT{:.0f}_{:.0f}'.format(pt_range[0], pt_range[1])
            if pt_range is not None else '', '__masscut' if masscut else '')
    else:
        path = 'figures/' + title + '_roc{}{:s}.pdf'.format(
            '__pT{:.0f}_{:.0f}'.format(pt_range[0], pt_range[1])
            if pt_range is not None else '', '__masscut' if masscut else '')

    c.save(path=path)

    return c, args, path
Exemplo n.º 17
0
def plot(*argv):
    """
    Method for delegating plotting.
    """

    # Unpack arguments
    data, args, features, scan_features, points, jsd_limits, masscut, pt_range, appearances = argv

    with TemporaryStyle() as style:

        # Compute yaxis range
        ranges = int(pt_range is not None) + int(masscut)
        mult = 10. if ranges == 2 else (5. if ranges == 1 else 1.)

        # Define variable(s)
        #axisrangex = (1.4,     100.)
        #axisrangey = (0.3, 100000. * mult)
        axisrangex = (1.4, 40.)
        axisrangey = (0.3, 300000. * mult)
        #axisrangex = (1.4,     100.)
        #axisrangey = (0.3, 500000.)
        aminx, amaxx = axisrangex
        aminy, amaxy = axisrangey

        # Styling
        scale = 0.95
        style.SetTitleOffset(1.8, 'x')
        style.SetTitleOffset(1.6, 'y')
        style.SetTextSize(style.GetTextSize() * scale)
        style.SetLegendTextSize(style.GetLegendTextSize() * scale)

        # Canvas
        c = rp.canvas(batch=not args.show, size=(600, 600))

        # Reference lines
        nullopts = dict(linecolor=0,
                        linewidth=0,
                        linestyle=0,
                        markerstyle=0,
                        markersize=0,
                        fillstyle=0)
        lineopts = dict(linecolor=ROOT.kGray + 2, linewidth=1, option='L')
        boxopts = dict(fillcolor=ROOT.kBlack,
                       alpha=0.05,
                       linewidth=0,
                       option='HIST')
        c.hist([aminy], bins=list(axisrangex), **nullopts)
        c.plot([1, amaxy], bins=[2, 2], **lineopts)
        c.plot([1, 1], bins=[2, amaxx], **lineopts)
        c.hist([amaxy], bins=[aminx, 2], **boxopts)
        c.hist([1], bins=[2, amaxx], **boxopts)

        # Meaningful limits on 1/JSD
        x, y, ey = map(np.array, zip(*jsd_limits))
        ex = np.zeros_like(ey)
        gr = ROOT.TGraphErrors(len(x), x, y, ex, ey)
        smooth_tgrapherrors(gr, ntimes=3)
        c.graph(gr,
                linestyle=2,
                linecolor=ROOT.kGray + 1,
                fillcolor=ROOT.kBlack,
                alpha=0.03,
                option='L3')

        x_, y_, ex_, ey_ = ROOT.Double(0), ROOT.Double(0), ROOT.Double(
            0), ROOT.Double(0)
        idx = 3
        gr.GetPoint(idx, x_, y_)
        ey_ = gr.GetErrorY(idx)
        x_, y_ = map(float, (x_, y_))
        c.latex("Statistical limit",
                x_,
                y_ + ey_,
                align=21,
                textsize=11,
                angle=-5,
                textcolor=ROOT.kGray + 2)

        # Markers
        if len(appearances) != 2:
            for is_simple in [True, False]:

                # Split the legend into simple- and MVA taggers
                indices = np.array([0] + appearances).cumsum()
                for i in range(len(indices) - 1):
                    for ifeat, feat in filter(
                            lambda t: is_simple == signal_low(t[1]),
                            enumerate(features[indices[i]:indices[i + 1]])):

                        # Coordinates, label
                        idx = map(lambda t: t[2], points).index(feat)
                        x, y, label = points[idx]

                        # Overwrite default name of parameter-scan classifier
                        label = 'ANN' if label.startswith('ANN') else label
                        label = 'uBoost' if label.startswith(
                            'uBoost') else label

                        # Style
                        colour = rp.colours[i % len(rp.colours)]
                        if ifeat == 0:
                            markerstyle = 20
                        else:
                            markerstyle = 23 + ifeat

                        # Draw
                        c.graph([y],
                                bins=[x],
                                markercolor=colour,
                                markerstyle=markerstyle,
                                label='#scale[%.1f]{%s}' %
                                (scale, latex(label, ROOT=True)),
                                option='P')
                        pass

            # Draw class-specific legend
                width = 0.2  # chagned from 0.15 to 0.2
                c.legend(
                    header=("Analytical:" if is_simple else "MVA:"),
                    width=width,
                    xmin=0.50 + (width + 0.06) * (is_simple),
                    ymax=0.888
                )  #, ymax=0.827) #changed xmin from 0.60 to 0.50, with translation from 0.02 to 0.06
            pass

        else:
            for first_var in [True, False]:

                # Split the legend into simple- and MVA taggers
                indices = np.array([0] + appearances).cumsum()
                for i in [0, 1]:
                    if i == 0 and not first_var: continue
                    if i == 1 and first_var: continue
                    for ifeat, feat in enumerate(
                            features[indices[i]:indices[i + 1]]):

                        # Coordinates, label
                        idx = map(lambda t: t[2], points).index(feat)
                        x, y, label = points[idx]

                        # Style
                        colour = rp.colours[i % len(rp.colours)]
                        if ifeat == 0:
                            markerstyle = 20
                        else:
                            markerstyle = 23 + ifeat

                        # Draw
                        c.graph([y],
                                bins=[x],
                                markercolor=colour,
                                markerstyle=markerstyle,
                                label='#scale[%.1f]{%s}' %
                                (scale, latex(label, ROOT=True)),
                                option='P')
                        pass

            # Draw class-specific legend
                width = 0.15
                c.legend(header=(latex(features[0], ROOT=True) +
                                 "-based:" if first_var else
                                 latex(features[appearances[1]], ROOT=True) +
                                 "-based:"),
                         width=width,
                         xmin=0.55 + (width + 0.06) * (first_var),
                         ymax=0.9)

        # Make legends transparent
        for leg in c.pads()[0]._legends:
            leg.SetFillStyle(0)
            pass

        # Connecting lines (simple)
        indices = np.array([0] + appearances).cumsum()
        for i in range(len(indices) - 1):
            base_x, base_y, _ = points[indices[i]]
            for j in range(appearances[i])[1:]:
                x1, y1, _ = points[indices[i] + j]
                color = rp.colours[i % len(rp.colours)]
                c.graph([base_y, y1],
                        bins=[base_x, x1],
                        linecolor=color,
                        linestyle=2,
                        option='L')
                pass

        # Decorations
        c.xlabel(
            "Background rejection, 1 / #varepsilon_{bkg}^{rel} @ #varepsilon_{sig}^{rel} = 50%"
        )
        c.ylabel("Mass-decorrelation, 1 / JSD @ #varepsilon_{sig}^{rel} = 50%")
        c.xlim(*axisrangex)
        c.ylim(*axisrangey)
        c.logx()
        c.logy()

        opts_text = dict(textsize=11, textcolor=ROOT.kGray + 2)
        midpointx = np.power(10, 0.5 * np.log10(amaxx))
        midpointy = np.power(10, 0.5 * np.log10(amaxy))
        c.latex("No separation",
                1.91,
                midpointy,
                angle=90,
                align=21,
                **opts_text)
        c.latex("Maximal sculpting",
                midpointx,
                0.89,
                angle=0,
                align=23,
                **opts_text)
        c.latex("    Less sculpting #rightarrow",
                2.1,
                midpointy,
                angle=90,
                align=23,
                **opts_text)
        c.latex("     Greater separation #rightarrow",
                midpointx,
                1.1,
                angle=0,
                align=21,
                **opts_text)

        #c.text(TEXT + ["#it{W} jet tagging"], xmin=0.24, qualifier=QUALIFIER)
        c.text([], xmin=0.15, ymax=0.96, qualifier=QUALIFIER, ATLAS=False)
        c.text(TEXT + \
               ["#it{W} jet tagging"] + (
                    ["p_{{T}} #in  [{:.0f}, {:.0f}] GeV".format(pt_range[0], pt_range[1])] if pt_range is not None else []
                ) + (
                    ['Cut: m #in  [60, 100] GeV'] if masscut else []
                ),
               xmin=0.26, ATLAS=None)
        pass

    return c