Пример #1
0
def plot2D (*argv):
    """
    Method for delegating 2D plotting.
    """

    # Unpack arguments
    data, ddt, lda, contours, binsx, binsy, variable = argv

    with TemporaryStyle() as style:

        # Style
        style.SetNumberContours(10)

        # Canvas
        c = rp.canvas(batch=True)

        # Axes
        c.hist([binsy[0]], bins=[binsx[0], binsx[-1]], linestyle=0, linewidth=0)

        # Plotting contours
        for sig in [0,1]:
            c.hist2d(contours[sig], linecolor=rp.colours[1 + 3 * sig], label="Signal" if sig else "Background", option='CONT3', legend_option='L')
            pass

        # Linear fit
        x1, x2 = 1.5, 5.0
        intercept, coef = ddt.intercept_ + ddt.offset_, ddt.coef_
        y1 = intercept + x1 * coef
        y2 = intercept + x2 * coef
        c.plot([y1,y2], bins=[x1,x2], color=rp.colours[-1], label='DDT transform fit', linewidth=1, linestyle=1, option='L')

        # LDA decision boundary
        y1 = lda.intercept_ + x1 * lda.coef_
        y2 = lda.intercept_ + x2 * lda.coef_
        c.plot([y1,y2], bins=[x1,x2],  label='LDA boundary', linewidth=1, linestyle=2, option='L')

        # Decorations
        c.text(["#sqrt{s} = 13 TeV"], qualifier=QUALIFIER, ATLAS=False)
        c.legend()
        c.ylim(binsy[0], binsy[-1])
        c.xlabel("Large-#it{R} jet " + latex('rhoDDT', ROOT=True))
	if variable == VAR_TAU21:
        	c.ylabel("Large-#it{R} jet " + latex('#tau_{21}',  ROOT=True)) #changed these to latex formatting
	elif variable == VAR_N2:
		c.ylabel("Large-#it{R} jet " + latex('N_{2}',  ROOT=True))
	elif variable == VAR_DECDEEP:
		c.ylabel("Large-#it{R} jet " + latex('dec_deepWvsQCD',  ROOT=True))
	elif variable == VAR_DEEP:
		c.ylabel("Large-#it{R} jet " + latex('deepWvsQCD',  ROOT=True))

        # Save
        mkdir('figures/ddt')
        c.save('figures/ddt/ddt_{}_2d.pdf'.format(variable))
        pass
    return
Пример #2
0
def plot(profile, fit):
    """
    Method for delegating plotting.
    """

    # rootplotting
    c = rp.canvas(batch=True)
    pad = c.pads()[0]._bare()
    pad.cd()
    pad.SetRightMargin(0.20)
    pad.SetLeftMargin(0.15)
    pad.SetTopMargin(0.10)

    # Styling
    profile.GetXaxis().SetTitle(latex(VARX, ROOT=True) +
                                " [GeV]")  #+ " = log(m^{2}/p_{T}^{2})")
    profile.GetYaxis().SetTitle(latex(VARY, ROOT=True) + " [GeV]")
    profile.GetZaxis().SetTitle("%s %s^{(%s%%)}" %
                                ("#it{k}-NN fitted" if fit else "Measured",
                                 latex(VAR, ROOT=True), EFF))

    profile.GetYaxis().SetNdivisions(505)
    profile.GetZaxis().SetNdivisions(505)
    profile.GetXaxis().SetTitleOffset(1.4)
    profile.GetYaxis().SetTitleOffset(1.8)
    profile.GetZaxis().SetTitleOffset(1.3)
    if ZRANGE:
        profile.GetZaxis().SetRangeUser(*ZRANGE)
        pass
    profile.SetContour(NB_CONTOUR)

    # Draw
    profile.Draw('COLZ')
    BOUNDS[0].DrawCopy("SAME")
    BOUNDS[1].DrawCopy("SAME")
    #c.latex("m > 50 GeV",  -4.5, BOUNDS[0].Eval(-4.5) + 30, align=21, angle=-37, textsize=13, textcolor=ROOT.kGray + 3)
    #c.latex("m < 300 GeV", -2.5, BOUNDS[1].Eval(-2.5) - 30, align=23, angle=-57, textsize=13, textcolor=ROOT.kGray + 3)

    # Decorations
    #c.text(qualifier=QUALIFIER, ymax=0.92, xmin=0.15)
    c.text(["#sqrt{s} = 13 TeV", "Multijets"],
           ATLAS=False,
           textcolor=ROOT.kWhite)

    # Save
    mkdir('figures/knn/')
    c.save('figures/knn/knn_{}_{:s}_{}_{}.pdf'.format(
        'fit' if fit else 'profile', VAR, EFF, MODEL))
    c.save('figures/knn/knn_{}_{:s}_{}_{}.eps'.format(
        'fit' if fit else 'profile', VAR, EFF, MODEL))
    pass
Пример #3
0
def plot(*argv):
    """
    Method for delegating plotting.
    """

    # Unpack arguments
    args, data, feat, profiles, cuts, effs = argv

    with TemporaryStyle() as style:

        # Style
        style.SetTitleOffset(1.6, 'y')

        # Canvas
        c = rp.canvas(batch=not args.show)

        # Plots
        for idx, (profile, cut, eff) in enumerate(zip(profiles, cuts, effs)):
            colour = rp.colours[idx + 0]
            linestyle = 1
            c.hist(profile,
                   linecolor=colour,
                   linestyle=linestyle,
                   option='HIST L')
            c.hist(profile,
                   linecolor=colour,
                   fillcolor=colour,
                   alpha=0.3,
                   option='E3',
                   label=(" " if eff < 10 else "") + "{:d}%".format(eff))
            pass

        # Decorations
        c.xlabel("Large-#it{R} jet mass [GeV]")
        c.ylabel("Background efficiency, #varepsilon_{bkg}^{rel}")
        c.text(
            [
                "#sqrt{s} = 13 TeV,  Multijets",
                #"#it{W} jet tagging",
                "Cuts on {}".format(latex(feat, ROOT=True)),
            ],
            qualifier=QUALIFIER,
            ATLAS=False)
        c.ylim(0, 2.0)
        c.legend(reverse=True,
                 width=0.25,
                 ymax=0.87,
                 header="Incl. #bar{#varepsilon}_{bkg}^{rel}:")
        pass

    return c
Пример #4
0
def plot (*argv):
    """
    Method for delegating plotting.
    """

    # Unpack arguments
    data, args, feat, msk_pass, msk_bkg, eff_sig = argv

    # Global variable override(s)
    HISTSTYLE[True] ['label'] = "Passing cut"
    HISTSTYLE[False]['label'] = "Failing cut"

    # Canvas
    c = rp.canvas(num_pads=2, size=(int(800 * 600 / 857.), 600), batch=not args.show)

    # Plots
    base = dict(bins=MASSBINS, alpha=0.3, normalise=True, linewidth=3)
    hist = dict()
    for passing, name in zip([False, True], ['fail', 'pass']):
        msk = msk_bkg & (msk_pass if passing else ~msk_pass)
        HISTSTYLE[passing].update(base)
        hist[name] = c.hist(data.loc[msk, 'm'].values, weights=data.loc[msk, 'weight_test'].values, **HISTSTYLE[passing])
        pass

    # Ratio plots
    c.ratio_plot((hist['pass'], hist['pass']), option='HIST', fillstyle=0, linecolor=ROOT.kGray + 1, linewidth=1, linestyle=1)
    c.ratio_plot((hist['pass'], hist['fail']), option='E2', fillstyle=1001, fillcolor=rp.colours[0], linecolor=rp.colours[0], alpha=0.3)

    # -- Set this before drawing OOB markers
    c.pads()[1].logy()
    c.pads()[1].ylim(1E-01, 1E+01)

    h_ratio = c.ratio_plot((hist['pass'], hist['fail']), option='HIST', fillstyle=0, linewidth=3, linecolor=rp.colours[0])  # oob=True, oob_color=rp.colours[0])

    # Decorations
    c.xlabel("Large-#it{R} jet mass [GeV]")
    c.ylabel("Fraction of jets")
    c.text(["#sqrt{s} = 13 TeV,  Multijets",
            "#varepsilon_{sig} = %d%% cut on %s" % (eff_sig, latex(feat, ROOT=True)),
            ], qualifier=QUALIFIER, ATLAS=False)

    c.ylim(2E-04, 2E+02)
    c.logy()
    c.legend()

    c.pads()[1].ylabel("Passing / failing")

    return c
Пример #5
0
def plot(*argv):
    """
    Method for delegating plotting.
    """

    # Unpack arguments
    args, data, feat, bins, pt_range, mass_range = argv

    # Canvas
    c = rp.canvas(batch=not args.show)

    # Style
    histstyle = dict(**HISTSTYLE)
    base = dict(bins=bins, alpha=0.5, normalise=True, linewidth=3)

    # Plots
    for signal in [0, 1]:
        msk = (data['signal'] == signal)
        histstyle[signal].update(base)
        c.hist(data.loc[msk, feat].values,
               weights=data.loc[msk, 'weight_test'].values,
               **histstyle[signal])
        pass

    # Decorations
    c.xlabel("Large-#it{R} jet " + latex(feat, ROOT=True))
    c.ylabel("Fraction of jets")
    c.text(TEXT + ["#it{W} jet tagging"] + ([
        "p_{{T}} #in  [{:.0f}, {:.0f}] GeV".format(pt_range[0], pt_range[1])
    ] if pt_range is not None else []) + ([
        "m #in  [{:.0f}, {:.0f}] GeV".format(mass_range[0], mass_range[1]),
    ] if mass_range is not None else []),
           qualifier=QUALIFIER,
           ATLAS=False)
    c.ylim(4E-03, 4E-01)
    c.logy()
    c.legend()
    return c
Пример #6
0
def main(args):

    # Initialise
    args, cfg = initialise(args)

    # Load data
    data, _, _ = load_data(args.input + 'data.h5', train=True)
    msk_sig = data['signal'] == 1
    msk_bkg = ~msk_sig

    # -------------------------------------------------------------------------
    ####
    #### # Initialise Keras backend
    #### initialise_backend(args)
    ####
    #### # Neural network-specific initialisation of the configuration dict
    #### initialise_config(args, cfg)
    ####
    #### # Keras import(s)
    #### from keras.models import load_model
    ####
    #### # NN
    #### from run.adversarial.common import add_nn
    #### with Profile("NN"):
    ####     classifier = load_model('models/adversarial/classifier/full/classifier.h5')
    ####     add_nn(data, classifier, 'NN')
    ####     pass
    # -------------------------------------------------------------------------

    # Fill measured profile
    profile_meas, _ = fill_profile(data[msk_bkg])

    # Add k-NN variable
    knnfeat = 'knn'
    add_knn(data,
            newfeat=knnfeat,
            path='models/knn/knn_{}_{}.pkl.gz'.format(VAR, EFF))

    # Loading KNN classifier
    knn = loadclf('models/knn/knn_{:s}_{:.0f}.pkl.gz'.format(VAR, EFF))

    # Filling fitted profile
    with Profile("Filling fitted profile"):
        rebin = 8
        edges, centres = dict(), dict()
        for ax, var in zip(['x', 'y'], [VARX, VARY]):

            # Short-hands
            vbins, vmin, vmax = AXIS[var]

            # Re-binned bin edges  @TODO: Make standardised right away?
            edges[ax] = np.interp(
                np.linspace(0, vbins, vbins * rebin + 1, endpoint=True),
                range(vbins + 1),
                np.linspace(vmin, vmax, vbins + 1, endpoint=True))

            # Re-binned bin centres
            centres[ax] = edges[ax][:-1] + 0.5 * np.diff(edges[ax])
            pass

        # Get predictions evaluated at re-binned bin centres
        g = dict()
        g['x'], g['y'] = np.meshgrid(centres['x'], centres['y'])
        g['x'], g['y'] = standardise(g['x'], g['y'])

        X = np.vstack((g['x'].flatten(), g['y'].flatten())).T
        fit = knn.predict(X).reshape(g['x'].shape).T

        # Fill ROOT "profile"
        profile_fit = ROOT.TH2F('profile_fit', "",
                                len(edges['x']) - 1, edges['x'].flatten('C'),
                                len(edges['y']) - 1, edges['y'].flatten('C'))
        root_numpy.array2hist(fit, profile_fit)
        pass

    # Plotting
    with Profile("Plotting"):
        for fit in [False, True]:

            # Select correct profile
            profile = profile_fit if fit else profile_meas

            # Plot
            plot(profile, fit)
            pass
        pass

    # Plotting local selection efficiencies for D2-kNN < 0
    # -- Compute signal efficiency
    for sig, msk in zip([True, False], [msk_sig, msk_bkg]):

        if sig:
            rgbs = [(247 / 255., 251 / 255., 255 / 255.),
                    (222 / 255., 235 / 255., 247 / 255.),
                    (198 / 255., 219 / 255., 239 / 255.),
                    (158 / 255., 202 / 255., 225 / 255.),
                    (107 / 255., 174 / 255., 214 / 255.),
                    (66 / 255., 146 / 255., 198 / 255.),
                    (33 / 255., 113 / 255., 181 / 255.),
                    (8 / 255., 81 / 255., 156 / 255.),
                    (8 / 255., 48 / 255., 107 / 255.)]

            red, green, blue = map(np.array, zip(*rgbs))
            nb_cols = len(rgbs)
            stops = np.linspace(0, 1, nb_cols, endpoint=True)
        else:
            rgbs = [(255 / 255., 51 / 255., 4 / 255.),
                    (247 / 255., 251 / 255., 255 / 255.),
                    (222 / 255., 235 / 255., 247 / 255.),
                    (198 / 255., 219 / 255., 239 / 255.),
                    (158 / 255., 202 / 255., 225 / 255.),
                    (107 / 255., 174 / 255., 214 / 255.),
                    (66 / 255., 146 / 255., 198 / 255.),
                    (33 / 255., 113 / 255., 181 / 255.),
                    (8 / 255., 81 / 255., 156 / 255.),
                    (8 / 255., 48 / 255., 107 / 255.)]

            red, green, blue = map(np.array, zip(*rgbs))
            nb_cols = len(rgbs)
            stops = np.array([0] + list(
                np.linspace(0, 1, nb_cols - 1, endpoint=True) *
                (1. - EFF / 100.) + EFF / 100.))
            pass

        ROOT.TColor.CreateGradientColorTable(nb_cols, stops, red, green, blue,
                                             NB_CONTOUR)

        # Define arrays
        shape = (AXIS[VARX][0], AXIS[VARY][0])
        bins = [
            np.linspace(AXIS[var][1],
                        AXIS[var][2],
                        AXIS[var][0] + 1,
                        endpoint=True) for var in VARS
        ]
        x, y, z = (np.zeros(shape) for _ in range(3))

        # Create `profile` histogram
        profile = ROOT.TH2F('profile', "",
                            len(bins[0]) - 1, bins[0].flatten('C'),
                            len(bins[1]) - 1, bins[1].flatten('C'))

        # Compute inclusive efficiency in bins of `VARY`
        effs = list()
        for edges in zip(bins[1][:-1], bins[1][1:]):
            msk_bin = (data[VARY] > edges[0]) & (data[VARY] < edges[1])
            msk_pass = data[knnfeat] < 0
            num = data.loc[msk & msk_bin & msk_pass,
                           'weight_test'].values.sum()
            den = data.loc[msk & msk_bin, 'weight_test'].values.sum()
            effs.append(num / den)
            pass

        # Fill profile
        for i, j in itertools.product(*map(range, shape)):

            # Bin edges in x and y
            edges = [bin[idx:idx + 2] for idx, bin in zip([i, j], bins)]

            # Masks
            msks = [(data[var] > edges[dim][0]) & (data[var] <= edges[dim][1])
                    for dim, var in enumerate(VARS)]
            msk_bin = reduce(lambda x, y: x & y, msks)
            data_ = data[msk & msk_bin]

            # Set non-zero bin content
            if np.sum(msk & msk_bin):
                msk_pass = data_[knnfeat] < 0
                num = data.loc[msk & msk_bin & msk_pass,
                               'weight_test'].values.sum()
                den = data.loc[msk & msk_bin, 'weight_test'].values.sum()
                eff = num / den
                profile.SetBinContent(i + 1, j + 1, eff)
                pass
            pass

        c = rp.canvas(batch=True)
        pad = c.pads()[0]._bare()
        pad.cd()
        pad.SetRightMargin(0.20)
        pad.SetLeftMargin(0.15)
        pad.SetTopMargin(0.10)

        # Styling
        profile.GetXaxis().SetTitle("Large-#it{R} jet " +
                                    latex(VARX, ROOT=True) +
                                    " = log(m^{2}/p_{T}^{2})")
        profile.GetYaxis().SetTitle("Large-#it{R} jet " +
                                    latex(VARY, ROOT=True) + " [GeV]")
        profile.GetZaxis().SetTitle("Selection efficiency for %s^{(%s%%)}" %
                                    (latex(VAR, ROOT=True), EFF))

        profile.GetYaxis().SetNdivisions(505)
        profile.GetZaxis().SetNdivisions(505)
        profile.GetXaxis().SetTitleOffset(1.4)
        profile.GetYaxis().SetTitleOffset(1.8)
        profile.GetZaxis().SetTitleOffset(1.3)
        zrange = (0., 1.)
        if zrange:
            profile.GetZaxis().SetRangeUser(*zrange)
            pass
        profile.SetContour(NB_CONTOUR)

        # Draw
        profile.Draw('COLZ')

        # Decorations
        c.text(qualifier=QUALIFIER, ymax=0.92, xmin=0.15)
        c.text(["#sqrt{s} = 13 TeV", "#it{W} jets" if sig else "Multijets"],
               ATLAS=False)

        # -- Efficiencies
        xaxis = profile.GetXaxis()
        yaxis = profile.GetYaxis()
        tlatex = ROOT.TLatex()
        tlatex.SetTextColor(ROOT.kGray + 2)
        tlatex.SetTextSize(0.023)
        tlatex.SetTextFont(42)
        tlatex.SetTextAlign(32)
        xt = xaxis.GetBinLowEdge(xaxis.GetNbins())
        for eff, ibin in zip(effs, range(1, yaxis.GetNbins() + 1)):
            yt = yaxis.GetBinCenter(ibin)
            tlatex.DrawLatex(
                xt, yt, "%s%.1f%%" %
                ("#bar{#varepsilon}^{rel}_{%s} = " %
                 ('sig' if sig else 'bkg') if ibin == 1 else '', eff * 100.))
            pass

        # -- Bounds
        BOUNDS[0].DrawCopy("SAME")
        BOUNDS[1].DrawCopy("SAME")
        c.latex("m > 50 GeV",
                -4.5,
                BOUNDS[0].Eval(-4.5) + 30,
                align=21,
                angle=-37,
                textsize=13,
                textcolor=ROOT.kGray + 3)
        c.latex("m < 300 GeV",
                -2.5,
                BOUNDS[1].Eval(-2.5) - 30,
                align=23,
                angle=-57,
                textsize=13,
                textcolor=ROOT.kGray + 3)

        # Save
        mkdir('figures/knn/')
        c.save('figures/knn/knn_eff_{}_{:s}_{:.0f}.pdf'.format(
            'sig' if sig else 'bkg', VAR, EFF))
        pass

    return
Пример #7
0
def plot_individual (*argv):
    """
    Method for delegating plotting.
    """

    # Unpack arguments
    data, args, features, msks_pass, eff_sig = argv

    with TemporaryStyle() as style:

        # Style @TEMP?
        ymin, ymax = 5E-05, 5E+00
        scale = 0.6
        for coord in ['x', 'y', 'z']:
            style.SetLabelSize(style.GetLabelSize(coord) * scale, coord)
            style.SetTitleSize(style.GetTitleSize(coord) * scale, coord)
            pass
        #style.SetTextSize      (style.GetTextSize()       * scale)
        #style.SetLegendTextSize(style.GetLegendTextSize() * (scale + 0.03))
        style.SetTickLength(0.07,                     'x')
        style.SetTickLength(0.07 * (5./6.) * (2./3.), 'y')

        # Global variable override(s)
        histstyle = dict(**HISTSTYLE)
        histstyle[True]['fillstyle'] = 3554
        histstyle[True] ['linewidth'] = 4
        histstyle[False]['linewidth'] = 4
        histstyle[True] ['label'] = None
        histstyle[False]['label'] = None
        for v in ['linecolor', 'fillcolor']:
            histstyle[True] [v] = 16
            histstyle[False][v] = ROOT.kBlack
            pass
        style.SetHatchesLineWidth(6)

        # Loop features
        ts  = style.GetTextSize()
        lts = style.GetLegendTextSize()
        for ifeat, feats in enumerate([None] + list(zip(features[::2], features[1::2])), start=-1):
            first = ifeat == -1

            # Style
            style.SetTitleOffset(1.25 if first else 1.2, 'x')
            style.SetTitleOffset(1.7  if first else 1.6, 'y')
            style.SetTextSize(ts * (0.8 if first else scale))
            style.SetLegendTextSize(lts * (0.8 + 0.03 if first else scale + 0.03))

            # Canvas
            c = rp.canvas(batch=not args.show, size=(300, 200))#int(200 * (1.45 if first else 1.))))

            if first:
                opts = dict(xmin=0.185, width=0.60, columns=2)
                c.legend(header=' ', categories=[
                            ("Multijets",   histstyle[False]),
                            ("#it{W} jets", histstyle[True])
                        ], ymax=0.45, **opts)
                c.legend(header='Inclusive selection:',
                         ymax=0.40, **opts)
                #c.pad()._legends[-2].SetTextSize(style.GetLegendTextSize())
                #c.pad()._legends[-1].SetTextSize(style.GetLegendTextSize())
                c.pad()._legends[-2].SetMargin(0.35)
                c.pad()._legends[-1].SetMargin(0.35)

                c.text(["#sqrt{s} = 13 TeV,  #it{W} jet tagging",
                        "Cuts at #varepsilon_{sig}^{rel} = %.0f%%" % eff_sig,
                        ], xmin=0.2, ymax=0.80, qualifier=QUALIFIER)


            else:


                # Plots
                # -- Dummy, for proper axes
                c.hist([ymin], bins=[50, 300], linestyle=0, fillstyle=0)

                # -- Inclusive
                base = dict(bins=MASSBINS, normalise=True)
                for signal, name in zip([False, True], ['bkg', 'sig']):
                    msk = data['signal'] == signal
                    histstyle[signal].update(base)
                    histstyle[signal]['option'] = 'HIST'
                    c.hist(data.loc[msk, 'm'].values, weights=data.loc[msk, 'weight_test'].values, **histstyle[signal])
                    pass

                for sig in [True, False]:
                    histstyle[sig]['option'] = 'FL'
                    pass

                # -- Tagged
                for jfeat, feat in enumerate(feats):
                    opts = dict(
                        linecolor = rp.colours[((2 * ifeat + jfeat) // 2)],
                        linestyle = 1 + 6 * (jfeat % 2),
                        linewidth = 4,
                        )
                    cfg = dict(**base)
                    cfg.update(opts)
                    msk = (data['signal'] == 0) & msks_pass[feat]
                    c.hist(data.loc[msk, 'm'].values, weights=data.loc[msk, 'weight_test'].values, label=" " + latex(feat, ROOT=True), **cfg)
                    pass

                # -- Legend(s)
                y =  0.46  if first else 0.68
                dy = 0.025 if first else 0.04
                c.legend(width=0.25, xmin=0.63, ymax=y)
                c.latex("Tagged multijets:", NDC=True, x=0.87, y=y + dy, textcolor=ROOT.kGray + 3, textsize=style.GetLegendTextSize() * 0.9, align=31)
                c.pad()._legends[-1].SetMargin(0.35)
                c.pad()._legends[-1].SetTextSize(style.GetLegendTextSize())

                # Formatting pads
                tpad = c.pad()._bare()
                tpad.SetLeftMargin  (0.20)
                tpad.SetBottomMargin(0.12 if first else 0.20)
                tpad.SetTopMargin   (0.39 if first else 0.05)

                # Re-draw axes
                tpad.RedrawAxis()
                tpad.Update()
                c.pad()._xaxis().SetAxisColor(ROOT.kWhite)  # Remove "double ticks"
                c.pad()._yaxis().SetAxisColor(ROOT.kWhite)  # Remove "double ticks"

                # Decorations
                c.xlabel("Large-#it{R} jet mass [GeV]")
                c.ylabel("Fraction of jets")

                c.text(qualifier=QUALIFIER, xmin=0.25, ymax=0.82)

                c.ylim(ymin, ymax)
                c.logy()
                pass

            # Save
            c.save(path = 'figures/jetmasscomparison__eff_sig_{:d}__{}.pdf'.format(int(eff_sig), 'legend' if first else '{}_{}'.format(*feats)))
            pass
        pass  # end temprorary style

    return
Пример #8
0
def plot (*argv):
    """
    Method for delegating plotting.
    """

    # Unpack arguments
    data, args, features, msks_pass, eff_sig = argv

    with TemporaryStyle() as style:

        # Style
        ymin, ymax = 5E-05, 5E+00
        scale = 0.8
        for coord in ['x', 'y', 'z']:
            style.SetLabelSize(style.GetLabelSize(coord) * scale, coord)
            style.SetTitleSize(style.GetTitleSize(coord) * scale, coord)
            pass
        style.SetTextSize      (style.GetTextSize()       * scale)
        style.SetLegendTextSize(style.GetLegendTextSize() * scale)
        style.SetTickLength(0.07,                     'x')
        style.SetTickLength(0.07 * (5./6.) * (2./3.), 'y')

        # Global variable override(s)
        histstyle = dict(**HISTSTYLE)
        histstyle[True]['fillstyle'] = 3554
        histstyle[True] ['label'] = None
        histstyle[False]['label'] = None
        for v in ['linecolor', 'fillcolor']:
            histstyle[True] [v] = 16
            histstyle[False][v] = ROOT.kBlack
            pass
        style.SetHatchesLineWidth(1)

        # Canvas
        c = rp.canvas(batch=not args.show, num_pads=(2,3))

        # Plots
        # -- Dummy, for proper axes
        for ipad, pad in enumerate(c.pads()[1:], 1):
            pad.hist([ymin], bins=[50, 300], linestyle=0, fillstyle=0, option=('Y+' if ipad % 2 else ''))
            pass

        # -- Inclusive
        base = dict(bins=MASSBINS, normalise=True, linewidth=2)
        for signal, name in zip([False, True], ['bkg', 'sig']):
            msk = data['signal'] == signal
            histstyle[signal].update(base)
            for ipad, pad in enumerate(c.pads()[1:], 1):
                histstyle[signal]['option'] = 'HIST'
                pad.hist(data.loc[msk, 'm'].values, weights=data.loc[msk, 'weight_test'].values, **histstyle[signal])
                pass
            pass

        for sig in [True, False]:
            histstyle[sig]['option'] = 'FL'
            pass

        c.pads()[0].legend(header='Inclusive selection:', categories=[
            ("Multijets",   histstyle[False]),
            ("#it{W} jets", histstyle[True])
            ], xmin=0.18, width= 0.60, ymax=0.28 + 0.07, ymin=0.001 + 0.07, columns=2)
        c.pads()[0]._legends[-1].SetTextSize(style.GetLegendTextSize())
        c.pads()[0]._legends[-1].SetMargin(0.35)

        # -- Tagged
        base['linewidth'] = 2
        for ifeat, feat in enumerate(features):
            opts = dict(
                linecolor = rp.colours[(ifeat // 2)],
                linestyle = 1 + (ifeat % 2),
                linewidth = 2,
                )
            cfg = dict(**base)
            cfg.update(opts)
            msk = (data['signal'] == 0) & msks_pass[feat]
            pad = c.pads()[1 + ifeat//2]
            pad.hist(data.loc[msk, 'm'].values, weights=data.loc[msk, 'weight_test'].values, label=" " + latex(feat, ROOT=True), **cfg)
            pass

        # -- Legend(s)
        for ipad, pad in enumerate(c.pads()[1:], 1):
            offsetx = (0.20 if ipad % 2 else 0.05)
            offsety =  0.20 * ((2 - (ipad // 2)) / float(2.))
            pad.legend(width=0.25, xmin=0.68 - offsetx, ymax=0.80 - offsety)
            pad.latex("Tagged multijets:", NDC=True, x=0.93 - offsetx, y=0.84 - offsety, textcolor=ROOT.kGray + 3, textsize=style.GetLegendTextSize() * 0.8, align=31)
            pad._legends[-1].SetMargin(0.35)
            pad._legends[-1].SetTextSize(style.GetLegendTextSize())
            pass

        # Formatting pads
        margin = 0.2
        for ipad, pad in enumerate(c.pads()):
            tpad = pad._bare()  # ROOT.TPad
            right = ipad % 2
            f = (ipad // 2) / float(len(c.pads()) // 2 - 1)
            tpad.SetLeftMargin (0.05 + 0.15 * (1 - right))
            tpad.SetRightMargin(0.05 + 0.15 * right)
            tpad.SetBottomMargin(f * margin)
            tpad.SetTopMargin((1 - f) * margin)
            if ipad == 0: continue
            pad._xaxis().SetNdivisions(505)
            pad._yaxis().SetNdivisions(505)
            if ipad // 2 < len(c.pads()) // 2 - 1:  # Not bottom pad(s)
                pad._xaxis().SetLabelOffset(9999.)
                pad._xaxis().SetTitleOffset(9999.)
            else:
                pad._xaxis().SetTitleOffset(2.7)
                pass
            pass

        # Re-draw axes
        for pad in c.pads()[1:]:
            pad._bare().RedrawAxis()
            pad._bare().Update()
            pad._xaxis().SetAxisColor(ROOT.kWhite)  # Remove "double ticks"
            pad._yaxis().SetAxisColor(ROOT.kWhite)  # Remove "double ticks"
            pass

        # Decorations
        c.pads()[-1].xlabel("Large-#it{R} jet mass [GeV]")
        c.pads()[-2].xlabel("Large-#it{R} jet mass [GeV]")
        c.pads()[1].ylabel("#splitline{#splitline{#splitline{#splitline{}{}}{#splitline{}{}}}{#splitline{}{}}}{#splitline{}{#splitline{}{#splitline{}{Fraction of jets}}}}")
        c.pads()[2].ylabel("#splitline{#splitline{#splitline{#splitline{Fraction of jets}{}}{}}{}}{#splitline{#splitline{}{}}{#splitline{#splitline{}{}}{#splitline{}{}}}}")
        # I have written a _lot_ of ugly code, but this ^ is probably the worst.

        c.pads()[0].text(["#sqrt{s} = 13 TeV,  #it{W} jet tagging",
                    "Cuts at #varepsilon_{sig}^{rel} = %.0f%%" % eff_sig,
                    ], xmin=0.2, ymax=0.72, qualifier=QUALIFIER)

        for pad in c.pads()[1:]:
            pad.ylim(ymin, ymax)
            pad.logy()
            pass

        pass  # end temprorary style

    return c
def plot (*argv):
    """
    Method for delegating plotting.
    """

    # Unpack arguments
    data, args, features, bins, effs, rejs, jsds, meanx, jsd_limits, masscut, var = argv

    with TemporaryStyle() as style:

        # Set styles
        scale = 0.9
        style.SetTextSize(scale * style.GetTextSize())
        for coord in ['x', 'y', 'z']:
            style.SetLabelSize(scale * style.GetLabelSize(coord), coord)
            style.SetTitleSize(scale * style.GetTitleSize(coord), coord)
            pass

        # Canvas
        c = rp.canvas(num_pads=2, fraction=0.55, size=(int(800 * 600 / 857.), 600), batch=not args.show)
        c.pads()[0]._bare().SetTopMargin(0.10)
        c.pads()[0]._bare().SetRightMargin(0.23)
        c.pads()[1]._bare().SetRightMargin(0.23)

        # To fix 30.5 --> 30 for NPV
        bins[-1] = np.floor(bins[-1])

        # Plots
        # -- References
        boxopts  = dict(fillcolor=ROOT.kBlack, alpha=0.05, linecolor=ROOT.kGray + 2, linewidth=1, option='HIST')
        c.pads()[0].hist([2], bins=[bins[0], bins[-1]], **boxopts)
        c.pads()[1].hist([1], bins=[bins[0], bins[-1]], **boxopts)


        for is_simple in [True, False]:
            for ifeat, feat in filter(lambda t: is_simple == signal_low(t[1]), enumerate(features)):

                opts = dict(
                    linecolor   = rp.colours[(ifeat // 2)],
                    markercolor = rp.colours[(ifeat // 2)],
                    fillcolor   = rp.colours[(ifeat // 2)],
                    linestyle   = 1 + (ifeat % 2),
                    alpha       = 0.3,
                    option      = 'E2',
                )

                mean_rej, std_rej = map(np.array, zip(*rejs[feat]))  # @TEMP
                #mean_rej, std_rej = map(np.array, zip(*effs[feat]))  # @TEMP
                mean_jsd, std_jsd = map(np.array, zip(*jsds[feat]))

                # Error boxes
                x    = np.array(bins[:-1]) + 0.5 * np.diff(bins)
                xerr = 0.5 * np.diff(bins)
                graph_rej = ROOT.TGraphErrors(len(x), x, mean_rej, xerr, std_rej)
                graph_jsd = ROOT.TGraphErrors(len(x), x, mean_jsd, xerr, std_jsd)

                c.pads()[0].hist(graph_rej, **opts)
                c.pads()[1].hist(graph_jsd, **opts)

                # Markers and lines
                opts['option']      = 'PE2L'
                opts['markerstyle'] = 20 + 4 * (ifeat % 2)

                graph_rej = ROOT.TGraph(len(x), meanx, mean_rej)
                graph_jsd = ROOT.TGraph(len(x), meanx, mean_jsd)

                c.pads()[0].hist(graph_rej, label=latex(feat, ROOT=True) if not is_simple else None, **opts)
                c.pads()[1].hist(graph_jsd, label=latex(feat, ROOT=True) if     is_simple else None, **opts)
                pass

            pass

        # Draw class-specific legend
        width = 0.20
        c.pads()[0].legend(header='MVA:',    width=width, xmin=0.79, ymax=0.92)
        c.pads()[1].legend(header='Analytical:', width=width, xmin=0.79, ymax=0.975)

        # Meaningful limits on JSD
        x, y, ey_stat, ey_syst  = map(np.array, zip(*jsd_limits))
        ex = np.zeros_like(x)
        x[0] = bins[0]
        x[-1] = bins[-1]
        format = lambda arr: arr.flatten('C').astype(float)
        gr_stat = ROOT.TGraphErrors(len(x), *list(map(format, [x, y, ex, ey_stat])))
        gr_comb = ROOT.TGraphErrors(len(x), *list(map(format, [x, y, ex, np.sqrt(np.square(ey_stat) + np.square(ey_syst))])))
        smooth_tgrapherrors(gr_stat, ntimes=2)
        smooth_tgrapherrors(gr_comb, ntimes=2)
        c.pads()[1].graph(gr_comb,                                        fillcolor=ROOT.kBlack, alpha=0.03, option='3')
        c.pads()[1].graph(gr_stat, linestyle=2, linecolor=ROOT.kGray + 1, fillcolor=ROOT.kBlack, alpha=0.03, option='L3')

        x_, y_, ex_, ey_ = ROOT.Double(0), ROOT.Double(0), ROOT.Double(0), ROOT.Double(0)
        idx = gr_comb.GetN() - 1
        gr_comb.GetPoint(idx, x_,  y_)
        ey_ = gr_comb.GetErrorY(idx)
        x_, y_ = map(float, (x_, y_))
        c.pads()[1].latex("Mean stat. #oplus #varepsilon_{bkg}^{rel} var. limit     ", x_, y_ + ey_, align=31, textsize=11, angle=0, textcolor=ROOT.kGray + 2)

        # Decorations
        for pad in c.pads():
            pad._xaxis().SetNdivisions(504)
            pass

        # -- x-axis label
        if var == 'pt':
            xlabel = "Large-#it{R} jet p_{T} [GeV]"
        elif var == 'npv':
            xlabel = "Number of reconstructed vertices N_{PV}"
        else:
            raise NotImplementedError("Variable {} is not supported.".format(xlabel))

        c.xlabel(xlabel)
        c.pads()[0].ylabel("1/#varepsilon_{bkg}^{rel} @ #varepsilon_{sig}^{rel} = 50%")
        c.pads()[1].ylabel("1/JSD @ #varepsilon_{sig}^{rel} = 50%")

        xmid = (bins[0] + bins[-1]) * 0.5
        c.pads()[0].latex("Random guessing",   xmid, 2 * 0.9, align=23, textsize=11, angle=0, textcolor=ROOT.kGray + 2)
        c.pads()[1].latex("Maximal sculpting", xmid, 1 * 0.8, align=23, textsize=11, angle=0, textcolor=ROOT.kGray + 2)

        c.text([], qualifier=QUALIFIER, xmin=0.15, ymax=0.93)

        c.text(["#sqrt{s} = 13 TeV,  #it{W} jet tagging"] + \
                (['m #in  [60, 100] GeV'] if masscut else []),
                 ATLAS=False, ymax=0.76)

        c.pads()[1].text(["Multijets"], ATLAS=False)

        c.pads()[0].ylim(1, 500)
        c.pads()[1].ylim(0.2, 2E+05)

        c.pads()[0].logy()
        c.pads()[1].logy()

        pass  # Temporary style scope

    return c
Пример #10
0
def plot(*argv):
    """
    Method for delegating plotting.
    """

    # Unpack arguments
    args, data, effs, jsd, jsd_limits, features, pt_range, appearances = argv

    with TemporaryStyle() as style:

        # Style
        style.SetTitleOffset(1.5, 'x')
        style.SetTitleOffset(2.0, 'y')

        # Canvas
        c = rp.canvas(batch=not args.show)

        # Plots
        ref = ROOT.TH1F('ref', "", 10, 0., 1.)
        for i in range(ref.GetXaxis().GetNbins()):
            ref.SetBinContent(i + 1, 1)
            pass
        c.hist(ref, linecolor=ROOT.kGray + 2, linewidth=1)
        linestyles = [1, 3, 5, 7]

        width = 0.15
        if len(appearances) != 2:
            for is_simple in [True, False]:

                indices = np.array([0] + appearances).cumsum()
                for i in range(len(indices) - 1):
                    for ifeat, feat in enumerate(
                            features[indices[i]:indices[i + 1]]):
                        if is_simple != signal_low(feat): continue
                        colour = rp.colours[i % len(rp.colours)]
                        linestyle = 1 + ifeat
                        if ifeat == 0:
                            markerstyle = 20
                        else:
                            markerstyle = 23 + ifeat
                        c.plot(jsd[feat],
                               bins=np.array(effs) / 100.,
                               linecolor=colour,
                               markercolor=colour,
                               linestyle=linestyle,
                               markerstyle=markerstyle,
                               label=latex(feat, ROOT=True),
                               option='PL')
                        pass

                c.legend(header=("Analytical:" if is_simple else "MVA:"),
                         width=width * (1 + 0.8 * int(is_simple)),
                         xmin=0.42 + (width + 0.05) * (is_simple),
                         ymax=0.888,
                         columns=2 if is_simple else 1,
                         margin=0.35)  # moved one intendation to the left
        else:
            for first_var in [True, False]:

                indices = np.array([0] + appearances).cumsum()
                for i in [0, 1]:
                    if i == 0 and not first_var: continue
                    if i == 1 and first_var: continue
                    for ifeat, feat in enumerate(
                            features[indices[i]:indices[i + 1]]):
                        colour = rp.colours[i % len(rp.colours)]
                        linestyle = linestyles[ifeat]
                        if ifeat == 0:
                            markerstyle = 20
                        else:
                            markerstyle = 23 + ifeat
                        c.plot(jsd[feat],
                               bins=np.array(effs) / 100.,
                               linecolor=colour,
                               markercolor=colour,
                               linestyle=linestyle,
                               markerstyle=markerstyle,
                               label=latex(feat, ROOT=True),
                               option='PL')
                        pass

                c.legend(header=(latex(features[0], ROOT=True) +
                                 "-based:" if first_var else
                                 latex(features[appearances[1]], ROOT=True) +
                                 "-based:"),
                         width=width,
                         xmin=0.45 + (width + 0.06) * (first_var),
                         ymax=0.888)

            pass

####  c.legend(header=(features[0]+":" if first_var else features[appearances[1]]+":"), #work in progress!!!!!!!!!!!!!!!!!!!!!
####                  width=width, xmin=0.45 + (width + 0.06) * (first_var), ymax=0.888)

# Meaningful limits on JSD
        x, y, ey = map(np.array, zip(*jsd_limits))
        ex = np.zeros_like(ey)
        gr = ROOT.TGraphErrors(len(x), x, y, ex, ey)
        smooth_tgrapherrors(gr, ntimes=2)
        c.graph(gr,
                linestyle=2,
                linecolor=ROOT.kGray + 1,
                fillcolor=ROOT.kBlack,
                alpha=0.03,
                option='L3')

        # Redraw axes
        c.pads()[0]._primitives[0].Draw('AXIS SAME')

        # Decorations
        c.xlabel("Background efficiency #varepsilon_{bkg}^{rel}")
        c.ylabel("Mass correlation, JSD")
        c.text([], xmin=0.15, ymax=0.96, qualifier=QUALIFIER, ATLAS=False)
        c.text(["#sqrt{s} = 13 TeV",  "Multijets"] + \
              (["p_{T} [GeV] #in", "    [{:.0f}, {:.0f}]".format(*pt_range)] if pt_range else []),
               ymax=0.85, ATLAS=None)

        c.latex("Maximal sculpting",
                0.065,
                1.2,
                align=11,
                textsize=11,
                textcolor=ROOT.kGray + 2)
        c.xlim(0, 1)
        #c.ymin(5E-05)
        c.ymin(1E-06)  #chosen for highest pT bin
        c.padding(0.45)
        c.logy()

        for leg in c.pad()._legends:
            leg.SetMargin(0.5)
            pass

        x_, y_, ex_, ey_ = ROOT.Double(0), ROOT.Double(0), ROOT.Double(
            0), ROOT.Double(0)
        idx = gr.GetN() - 7
        gr.GetPoint(idx, x_, y_)
        ey_ = gr.GetErrorY(idx)
        x_, y_ = map(float, (x_, y_))
        c.latex("Statistical limit",
                x_,
                y_ - ey_ / 2.,
                align=23,
                textsize=11,
                angle=12,
                textcolor=ROOT.kGray + 2)
        pass

    return c
Пример #11
0
def plot(*argv):
    """
    Method for delegating plotting.
    """

    # Unpack arguments
    args, data, features, ROCs, AUCs, masscut, pt_range = argv

    # Canvas
    c = rp.canvas(batch=not args.show)

    # Plots
    # -- Random guessing
    bins = np.linspace(0.2, 1., 100 + 1, endpoint=True)
    bins = np.array([bins[0], bins[0] + 0.01 * np.diff(bins[:2])[0]] +
                    list(bins[1:]))
    #bins = np.array([0.2] + list(bins[1:]))
    #edges = bins[1:-1]
    edges = bins
    centres = edges[:-1] + 0.5 * np.diff(edges)
    c.hist(np.power(centres, -1.),
           bins=edges,
           linecolor=ROOT.kGray + 2,
           fillcolor=ROOT.kBlack,
           alpha=0.05,
           linewidth=1,
           option='HISTC')

    # -- ROCs
    for is_simple in [True, False]:

        # Split the legend into simple- and MVA taggers
        for ifeat, feat in filter(lambda t: is_simple == signal_low(t[1]),
                                  enumerate(features)):
            eff_sig, eff_bkg = ROCs[feat]
            c.graph(np.power(eff_bkg, -1.),
                    bins=eff_sig,
                    linestyle=1 + (ifeat % 2),
                    linecolor=rp.colours[(ifeat // 2) % len(rp.colours)],
                    linewidth=2,
                    label=latex(feat, ROOT=True),
                    option='L')
            pass

        # Draw class-specific legend
        width = 0.17
        c.legend(header=("Analytical:" if is_simple else "MVA:"),
                 width=width,
                 xmin=0.58 + (width) * (is_simple),
                 ymax=0.888)
        pass

    # Decorations
    c.xlabel("Signal efficiency #varepsilon_{sig}^{rel}")
    c.ylabel("Background rejection 1/#varepsilon_{bkg}^{rel}")
    c.text([], xmin=0.15, ymax=0.96, qualifier=QUALIFIER, ATLAS=False)
    c.text(["#sqrt{s} = 13 TeV", "#it{W} jet tagging"] + ([
        "p_{{T}} #in  [{:.0f}, {:.0f}] GeV".format(pt_range[0], pt_range[1])
    ] if pt_range is not None else []) +
           (["Cut: m #in  [60, 100] GeV"] if masscut else []),
           ATLAS=False)

    ranges = int(pt_range is not None) + int(masscut)
    mult = 10. if ranges == 2 else (2. if ranges == 1 else 1.)

    c.latex("Random guessing",
            0.4,
            1. / 0.4 * 0.9,
            align=23,
            angle=-12 + 2 * ranges,
            textsize=13,
            textcolor=ROOT.kGray + 2)
    c.xlim(0.2, 1.)
    c.ylim(1E+00, 5E+02 * mult)
    c.logy()
    c.legend()

    return c
Пример #12
0
def perform_optimisation(var, bins, data):
    """
    ...
    """

    # Fill 2D substructure profile
    profile2d = fill_2d_profile(data, var, bins, "m", MASS_BINS)

    # Get 1D profile for lowest mass bin
    profile0 = profile2d.ProjectionY("%s_lowMass" % profile2d.GetName(), 1, 1)
    profile0 = kde(profile0)
    normalise(profile0, density=True)

    # Perform the optimisation
    bestShapeVal = 0
    bestSumChi2 = 1e20
    for shapeVal in SHAPEVAL_RANGE:
        print "Shape value: ", shapeVal
        sumChi2 = 0.

        # Each mass bin needs to be optimized over omega
        for mass in range(len(MASS_BINS) - 1):
            print "   Mass bin: ", mass

            # Get 1D profile for current mass bin
            profile = profile2d.ProjectionY(
                "%s_bin_%i" % (profile2d.GetName(), mass), mass + 1, mass + 1)

            # Fit current profile to low-mass profile
            chi2, bestOmega, _, _ = fit(profile, shapeVal, profile0,
                                        "%.2f" % mass)

            # Accumulate chi2
            sumChi2 += chi2
            pass

        # Update chi2 for current `shapeVal`
        print "-- sumChi2: {} (cp. {})".format(sumChi2, bestSumChi2)
        if sumChi2 < bestSumChi2:
            bestSumChi2 = sumChi2
            bestShapeVal = shapeVal
            pass
        pass

    # Saving CSS transforms
    with Profile("Saving CSS transform"):

        # Ensure model directory exists
        mkdir('models/css/')
        mkdir(
            'figures/css/'
        )  ## put in by me because errors were eturned when saving the pdfs

        # Get the optimal, measured `omega`s for each mass-bin
        bestOmegas = list()
        for mass in range(len(MASS_BINS) - 1):
            profile = profile2d.ProjectionY(
                "%s_bin_%i_final" % (profile2d.GetName(), mass), mass + 1,
                mass + 1)
            sumChi2, bestOmega, profile_css, profile0rebin = fit(
                profile, bestShapeVal, profile0, "%.2f" % mass)

            # Test-plot distributions used for fitting!
            # -- Canvas
            c = rp.canvas(batch=True)

            # -- Plot
            profile = kde(profile)
            normalise(profile, density=True)

            lowmassbin = "#it{{m}} #in  [{:.1f}, {:.1f}] GeV".format(
                MASS_BINS[0], MASS_BINS[1]).replace('.0', '')
            massbin = "#it{{m}} #in  [{:.1f}, {:.1f}] GeV".format(
                MASS_BINS[mass], MASS_BINS[mass + 1]).replace('.0', '')
            c.hist(profile0rebin,
                   label=latex(var, ROOT=True) + ",    {}".format(lowmassbin),
                   linecolor=rp.colours[1],
                   fillcolor=rp.colours[1],
                   alpha=0.5,
                   option='HISTL',
                   legend_option='FL')
            c.hist(profile,
                   label=latex(var, ROOT=True) + ",    {}".format(massbin),
                   linecolor=rp.colours[4],
                   linestyle=2,
                   option='HISTL')
            c.hist(profile_css,
                   label=latex(var + 'CSS', ROOT=True) +
                   ", {}".format(massbin),
                   linecolor=rp.colours[3],
                   option='HISTL')

            # -- Decorations
            c.xlabel(
                latex(var, ROOT=True) + ", " + latex(var + 'CSS', ROOT=True))
            c.ylabel("Number of jets p.d.f.")
            c.legend(xmin=0.45, ymax=0.76, width=0.25)
            c.text(["#sqrt{s} = 13 TeV,  Multijets", "KDE smoothed"],
                   qualifier=QUALIFIER,
                   ATLAS=False)
            c.pad()._xaxis().SetTitleOffset(1.3)
            c.pad()._yaxis().SetNdivisions(105)
            c.pad()._primitives[-1].Draw('SAME AXIS')
            c.padding(0.50)

            # -- Save
            c.save('figures/css/css_test_{}_mass{}.pdf'.format(var, mass))

            # Store best-fit omega in array
            print mass, bestOmega
            bestOmegas.append(bestOmega)
            pass

        # Fit best omega vs. mass
        x = MASS_BINS[:-1] + 0.5 * np.diff(MASS_BINS)
        y = np.array(bestOmegas)

        h = ROOT.TH1F('hfit', "", len(MASS_BINS) - 1, MASS_BINS)
        root_numpy.array2hist(y, h)
        for ibin in range(1, len(x) + 1):
            h.SetBinError(
                ibin,
                0.02)  # Just some value to ensure equal errors on all points
            pass

        m0 = 0.5 * (MASS_BINS[0] + MASS_BINS[1])
        f = ROOT.TF1(
            "fit",
            "[0] * (1./{m0}  - 1./x) + [1] * TMath::Log(x/{m0})".format(m0=m0),
            m0, 300)
        f.SetLineColor(rp.colours[4])
        f.SetLineStyle(2)
        h.Fit(f)

        # Write out the optimal configuration for each mass bin
        for mass in range(len(MASS_BINS) - 1):
            profile = profile2d.ProjectionY(
                "%s_bin_%i_final" % (profile2d.GetName(), mass), mass + 1,
                mass + 1)
            profile = kde(profile)
            normalise(profile, density=True)
            bestOmegaFitted_ = f.Eval(
                h.GetBinCenter(mass + 1)) + np.finfo(float).eps
            bestOmegaFitted = max(bestOmegaFitted_, 1E-04)
            #bestOmegaFitted = h.GetBinContent(mass + 1)
            print "bestOmegaFitted[{}] = {} --> {}".format(
                mass, bestOmegaFitted_, bestOmegaFitted)
            F, Ginv = get_css_fns(bestShapeVal, bestOmegaFitted, profile, "")

            # Save classifier
            saveclf(F, 'models/css/css_%s_F_%i.pkl.gz' % (var, mass))
            saveclf(Ginv, 'models/css/css_%s_Ginv_%i.pkl.gz' % (var, mass))
            pass

        # Plot best omega vs. mass
        # -- Canvas
        c = rp.canvas(batch=True)

        # -- Plots
        #c.hist(bestOmegas, bins=MASS_BINS, linecolor=rp.colours[1])
        c.hist(h, linecolor=rp.colours[1], option='HIST', label="Measured")
        f.Draw('SAME')

        # -- Decorations
        c.xlabel("Large-#it{R} jet mass [GeV]")
        c.ylabel("Best-fit #Omega_{D}")
        c.text([
            "#sqrt{s} = 13 TeV,  Multijets", "CSS applied to {}".format(
                latex(var, ROOT=True)),
            "Best-fit #alpha = {:.1f}".format(bestShapeVal)
        ],
               qualifier=QUALIFIER,
               ATLAS=False)
        c.legend(categories=[('Functional fit', {
            'linewidth': 2,
            'linestyle': 2,
            'linecolor': rp.colours[4]
        })])
        # Save
        c.save('figures/css/cssBestOmega_{}.pdf'.format(var))
        pass

    return 0
Пример #13
0
def plot(*argv):
    """
    Method for delegating plotting.
    """

    # Unpack arguments
    data, args, features, scan_features, points, jsd_limits, masscut, pt_range, appearances = argv

    with TemporaryStyle() as style:

        # Compute yaxis range
        ranges = int(pt_range is not None) + int(masscut)
        mult = 10. if ranges == 2 else (5. if ranges == 1 else 1.)

        # Define variable(s)
        #axisrangex = (1.4,     100.)
        #axisrangey = (0.3, 100000. * mult)
        axisrangex = (1.4, 40.)
        axisrangey = (0.3, 300000. * mult)
        #axisrangex = (1.4,     100.)
        #axisrangey = (0.3, 500000.)
        aminx, amaxx = axisrangex
        aminy, amaxy = axisrangey

        # Styling
        scale = 0.95
        style.SetTitleOffset(1.8, 'x')
        style.SetTitleOffset(1.6, 'y')
        style.SetTextSize(style.GetTextSize() * scale)
        style.SetLegendTextSize(style.GetLegendTextSize() * scale)

        # Canvas
        c = rp.canvas(batch=not args.show, size=(600, 600))

        # Reference lines
        nullopts = dict(linecolor=0,
                        linewidth=0,
                        linestyle=0,
                        markerstyle=0,
                        markersize=0,
                        fillstyle=0)
        lineopts = dict(linecolor=ROOT.kGray + 2, linewidth=1, option='L')
        boxopts = dict(fillcolor=ROOT.kBlack,
                       alpha=0.05,
                       linewidth=0,
                       option='HIST')
        c.hist([aminy], bins=list(axisrangex), **nullopts)
        c.plot([1, amaxy], bins=[2, 2], **lineopts)
        c.plot([1, 1], bins=[2, amaxx], **lineopts)
        c.hist([amaxy], bins=[aminx, 2], **boxopts)
        c.hist([1], bins=[2, amaxx], **boxopts)

        # Meaningful limits on 1/JSD
        x, y, ey = map(np.array, zip(*jsd_limits))
        ex = np.zeros_like(ey)
        gr = ROOT.TGraphErrors(len(x), x, y, ex, ey)
        smooth_tgrapherrors(gr, ntimes=3)
        c.graph(gr,
                linestyle=2,
                linecolor=ROOT.kGray + 1,
                fillcolor=ROOT.kBlack,
                alpha=0.03,
                option='L3')

        x_, y_, ex_, ey_ = ROOT.Double(0), ROOT.Double(0), ROOT.Double(
            0), ROOT.Double(0)
        idx = 3
        gr.GetPoint(idx, x_, y_)
        ey_ = gr.GetErrorY(idx)
        x_, y_ = map(float, (x_, y_))
        c.latex("Statistical limit",
                x_,
                y_ + ey_,
                align=21,
                textsize=11,
                angle=-5,
                textcolor=ROOT.kGray + 2)

        # Markers
        if len(appearances) != 2:
            for is_simple in [True, False]:

                # Split the legend into simple- and MVA taggers
                indices = np.array([0] + appearances).cumsum()
                for i in range(len(indices) - 1):
                    for ifeat, feat in filter(
                            lambda t: is_simple == signal_low(t[1]),
                            enumerate(features[indices[i]:indices[i + 1]])):

                        # Coordinates, label
                        idx = map(lambda t: t[2], points).index(feat)
                        x, y, label = points[idx]

                        # Overwrite default name of parameter-scan classifier
                        label = 'ANN' if label.startswith('ANN') else label
                        label = 'uBoost' if label.startswith(
                            'uBoost') else label

                        # Style
                        colour = rp.colours[i % len(rp.colours)]
                        if ifeat == 0:
                            markerstyle = 20
                        else:
                            markerstyle = 23 + ifeat

                        # Draw
                        c.graph([y],
                                bins=[x],
                                markercolor=colour,
                                markerstyle=markerstyle,
                                label='#scale[%.1f]{%s}' %
                                (scale, latex(label, ROOT=True)),
                                option='P')
                        pass

            # Draw class-specific legend
                width = 0.2  # chagned from 0.15 to 0.2
                c.legend(
                    header=("Analytical:" if is_simple else "MVA:"),
                    width=width,
                    xmin=0.50 + (width + 0.06) * (is_simple),
                    ymax=0.888
                )  #, ymax=0.827) #changed xmin from 0.60 to 0.50, with translation from 0.02 to 0.06
            pass

        else:
            for first_var in [True, False]:

                # Split the legend into simple- and MVA taggers
                indices = np.array([0] + appearances).cumsum()
                for i in [0, 1]:
                    if i == 0 and not first_var: continue
                    if i == 1 and first_var: continue
                    for ifeat, feat in enumerate(
                            features[indices[i]:indices[i + 1]]):

                        # Coordinates, label
                        idx = map(lambda t: t[2], points).index(feat)
                        x, y, label = points[idx]

                        # Style
                        colour = rp.colours[i % len(rp.colours)]
                        if ifeat == 0:
                            markerstyle = 20
                        else:
                            markerstyle = 23 + ifeat

                        # Draw
                        c.graph([y],
                                bins=[x],
                                markercolor=colour,
                                markerstyle=markerstyle,
                                label='#scale[%.1f]{%s}' %
                                (scale, latex(label, ROOT=True)),
                                option='P')
                        pass

            # Draw class-specific legend
                width = 0.15
                c.legend(header=(latex(features[0], ROOT=True) +
                                 "-based:" if first_var else
                                 latex(features[appearances[1]], ROOT=True) +
                                 "-based:"),
                         width=width,
                         xmin=0.55 + (width + 0.06) * (first_var),
                         ymax=0.9)

        # Make legends transparent
        for leg in c.pads()[0]._legends:
            leg.SetFillStyle(0)
            pass

        # Connecting lines (simple)
        indices = np.array([0] + appearances).cumsum()
        for i in range(len(indices) - 1):
            base_x, base_y, _ = points[indices[i]]
            for j in range(appearances[i])[1:]:
                x1, y1, _ = points[indices[i] + j]
                color = rp.colours[i % len(rp.colours)]
                c.graph([base_y, y1],
                        bins=[base_x, x1],
                        linecolor=color,
                        linestyle=2,
                        option='L')
                pass

        # Decorations
        c.xlabel(
            "Background rejection, 1 / #varepsilon_{bkg}^{rel} @ #varepsilon_{sig}^{rel} = 50%"
        )
        c.ylabel("Mass-decorrelation, 1 / JSD @ #varepsilon_{sig}^{rel} = 50%")
        c.xlim(*axisrangex)
        c.ylim(*axisrangey)
        c.logx()
        c.logy()

        opts_text = dict(textsize=11, textcolor=ROOT.kGray + 2)
        midpointx = np.power(10, 0.5 * np.log10(amaxx))
        midpointy = np.power(10, 0.5 * np.log10(amaxy))
        c.latex("No separation",
                1.91,
                midpointy,
                angle=90,
                align=21,
                **opts_text)
        c.latex("Maximal sculpting",
                midpointx,
                0.89,
                angle=0,
                align=23,
                **opts_text)
        c.latex("    Less sculpting #rightarrow",
                2.1,
                midpointy,
                angle=90,
                align=23,
                **opts_text)
        c.latex("     Greater separation #rightarrow",
                midpointx,
                1.1,
                angle=0,
                align=21,
                **opts_text)

        #c.text(TEXT + ["#it{W} jet tagging"], xmin=0.24, qualifier=QUALIFIER)
        c.text([], xmin=0.15, ymax=0.96, qualifier=QUALIFIER, ATLAS=False)
        c.text(TEXT + \
               ["#it{W} jet tagging"] + (
                    ["p_{{T}} #in  [{:.0f}, {:.0f}] GeV".format(pt_range[0], pt_range[1])] if pt_range is not None else []
                ) + (
                    ['Cut: m #in  [60, 100] GeV'] if masscut else []
                ),
               xmin=0.26, ATLAS=None)
        pass

    return c
Пример #14
0
def main(args):

    # Definitions
    histstyle = dict(**HISTSTYLE)

    # Initialise
    args, cfg = initialise(args)

    # Load data
    data, features, _ = load_data(args.input + 'data.h5',
                                  background=True,
                                  train=True)

    pt_bins = np.linspace(200, 2000, 18 + 1, endpoint=True)
    pt_bins = [None] + zip(pt_bins[:-1], pt_bins[1:])

    vars = ['m', 'pt']
    for var, pt_bin, log in itertools.product(vars, pt_bins, [True, False]):

        if var == 'm':
            bins = np.linspace(50, 300, (300 - 50) // 10 + 1, endpoint=True)
        else:
            bins = np.linspace(200,
                               2000, (2000 - 200) // 50 + 1,
                               endpoint=True)
            pass

        histstyle[True]['label'] = 'Training weight'
        histstyle[False]['label'] = 'Testing weight'

        # Canvas
        c = rp.canvas(batch=True)

        # Plots
        if pt_bin is not None:
            msk = (data['pt'] > pt_bin[0]) & (data['pt'] < pt_bin[1])
        else:
            msk = np.ones(data.shape[0], dtype=bool)
            pass

        if pt_bin is not None:
            c.hist(data[var].values[msk],
                   bins=bins,
                   weights=data['weight_test'].values[msk],
                   normalise=True,
                   **histstyle[False])
            c.hist(data[var].values[msk],
                   bins=bins,
                   weights=data['weight_adv'].values[msk],
                   normalise=True,
                   **histstyle[True])
            #c.hist(data[var].values,      bins=bins, weights=data['weight_adv'] .values,      normalise=True, **histstyle[True])
            #c.hist(data[var].values[msk], bins=bins, weights=data['weight_adv'] .values[msk], normalise=True, **histstyle[False])
            #c.hist(data[var].values[msk], bins=bins, weights=data['weight_test'].values[msk], normalise=True, label="Testing weight", linewidth=2, linecolor=ROOT.kGreen)
        else:
            c.hist(data[var].values[msk],
                   bins=bins,
                   weights=data['weight_test'].values[msk],
                   normalise=True,
                   **histstyle[False])
            c.hist(data[var].values[msk],
                   bins=bins,
                   weights=data['weight_adv'].values[msk],
                   normalise=True,
                   **histstyle[True])
            pass

        # Decorations
        c.text(TEXT + ["Multijets", "Training dataset"] +
               (['p_{{T}} #in  [{:.0f}, {:.0f}] GeV'.format(
                   *pt_bin)] if pt_bin is not None else []),
               qualifier='Simulation Internal')
        c.legend()
        c.xlabel("Large-#it{{R}} jet {:s} [GeV]".format('mass' if var ==
                                                        'm' else 'p_{T}'))
        c.ylabel("Fraction of jets")
        if log:
            c.logy()
            pass

        # Save
        c.save('figures/weighting_{}{:s}{}.pdf'.format(
            'mass' if var == 'm' else var,
            '_pT{:.0f}_{:.0f}'.format(*pt_bin) if pt_bin is not None else '',
            '_log' if log else ''))
        pass

    return

    data['logm'] = pd.Series(np.log(data['m']), index=data.index)

    # Check variable distributions
    axes = {
        'pt': (45, 200, 2000),
        'm': (50, 50, 300),
        'rho': (50, -8, 0),
        'logm': (50, np.log(50), np.log(300)),
    }
    weight = 'weight_adv'  # 'weight_test' / 'weight'
    pt_range = (200., 2000.)
    msk_pt = (data['pt'] > pt_range[0]) & (data['pt'] < pt_range[1])
    for var in axes:

        # Canvas
        c = rp.canvas(num_pads=2, batch=True)

        # Plot
        bins = np.linspace(axes[var][1],
                           axes[var][2],
                           axes[var][0] + 1,
                           endpoint=True)
        for adv in [0, 1]:
            msk = data['signal'] == 0  # @TEMP signal
            msk &= msk_pt
            opts = dict(normalise=True, **HISTSTYLE[adv])  # @TEMP signal
            opts['label'] = 'adv' if adv else 'test'
            if adv:
                h1 = c.hist(data.loc[msk, var].values,
                            bins=bins,
                            weights=data.loc[msk, weight].values,
                            **opts)
            else:
                h2 = c.hist(data.loc[msk, var].values,
                            bins=bins,
                            weights=data.loc[msk, 'weight_test'].values,
                            **opts)
                pass
            pass

        # Ratio
        c.pads()[1].ylim(0, 2)
        c.ratio_plot((h1, h2), oob=True)

        # Decorations
        c.legend()
        c.xlabel(latex(var, ROOT=True))
        c.ylabel("Fraction of jets")
        c.pads()[1].ylabel("adv/test")
        #c.logy()
        c.text(TEXT + [
            'p_{{T}} #in  [{:.0f}, {:.0f}] GeV'.format(pt_range[0],
                                                       pt_range[1])
        ],
               qualifier=QUALIFIER)

        # Save
        mkdir('figures/distributions')
        c.save('figures/distributions/incl_{}.pdf'.format(var))
        pass

    # 2D histograms
    msk = data['signal'] == 0
    axisvars = sorted(list(axes))
    for i, varx in enumerate(axisvars):
        for vary in axisvars[i + 1:]:
            # Canvas
            c = ROOT.TCanvas()
            c.SetRightMargin(0.20)

            # Create, fill histogram
            h2 = ROOT.TH2F('{}_{}'.format(varx, vary), "",
                           *(axes[varx] + axes[vary]))
            root_numpy.fill_hist(h2, data.loc[msk, [varx, vary]].values,
                                 100. * data.loc[msk, weight].values)

            # Draw
            h2.Draw("COLZ")

            # Decorations
            h2.GetXaxis().SetTitle(latex(varx, ROOT=True))
            h2.GetYaxis().SetTitle(latex(vary, ROOT=True))
            c.SetLogz()

            # Save
            c.SaveAs('figures/distributions/2d_{}_{}.pdf'.format(varx, vary))
            pass
        pass

    return
def plot(*argv):
    """
    Method for delegating plotting.
    """

    # Unpack arguments
    args, data, effs, jsd, jsd_limits, features, pt_range = argv

    with TemporaryStyle() as style:

        # Style
        style.SetTitleOffset(1.5, 'x')
        style.SetTitleOffset(2.0, 'y')

        # Canvas
        c = rp.canvas(batch=not args.show)

        # Plots
        ref = ROOT.TH1F('ref', "", 10, 0., 1.)
        for i in range(ref.GetXaxis().GetNbins()):
            ref.SetBinContent(i + 1, 1)
            pass
        c.hist(ref, linecolor=ROOT.kGray + 2, linewidth=1)

        width = 0.15
        for is_simple in [True, False]:
            for ifeat, feat in enumerate(features):
                if is_simple != signal_low(feat): continue
                colour = rp.colours[(ifeat // 2) % len(rp.colours)]
                linestyle = 1 + (ifeat % 2)
                markerstyle = 20 + (ifeat % 2) * 4
                c.plot(jsd[feat][1:],
                       bins=np.array(effs[1:]) / 100.,
                       linecolor=colour,
                       markercolor=colour,
                       linestyle=linestyle,
                       markerstyle=markerstyle,
                       label=latex(feat, ROOT=True),
                       option='PL')
                pass

            c.legend(header=("Analytical:" if is_simple else "MVA:"),
                     width=width * (1 + 0.8 * int(is_simple)),
                     xmin=0.42 + (width + 0.05) * (is_simple),
                     ymax=0.888)
            pass

        # Meaningful limits on JSD
        x, y, ey = map(np.array, zip(*jsd_limits))

        ex = np.zeros_like(ey)
        gr = ROOT.TGraphErrors(len(x), x, y, ex, ey)
        smooth_tgrapherrors(gr, ntimes=2)
        c.graph(gr,
                linestyle=2,
                linecolor=ROOT.kGray + 1,
                fillcolor=ROOT.kBlack,
                alpha=0.03,
                option='L3')

        # Redraw axes
        c.pads()[0]._primitives[0].Draw('AXIS SAME')

        # Decorations
        c.xlabel("Background efficiency #varepsilon_{bkg}^{rel}")
        c.ylabel("Mass correlation, JSD")
        c.text([], xmin=0.15, ymax=0.96, qualifier=QUALIFIER)
        c.text(["#sqrt{s} = 13 TeV",  "Dijets"] + \
              (["p_{T} [GeV] #in", "    [{:.0f}, {:.0f}]".format(*pt_range)] if pt_range else []),
               ymax=0.85, ATLAS=None)

        c.latex("Maximal sculpting",
                0.065,
                1.2,
                align=11,
                textsize=11,
                textcolor=ROOT.kGray + 2)
        c.xlim(0, 1)
        c.ymin(1E-05)
        c.padding(0.45)
        c.logy()

        for leg in c.pad()._legends:
            leg.SetMargin(0.5)
            pass

        x_, y_, ex_, ey_ = ROOT.Double(0), ROOT.Double(0), ROOT.Double(
            0), ROOT.Double(0)
        idx = gr.GetN() - 7
        gr.GetPoint(idx, x_, y_)
        ey_ = gr.GetErrorY(idx)
        x_, y_ = map(float, (x_, y_))
        c.latex("Statistical limit",
                x_,
                y_ - ey_ / 2.,
                align=23,
                textsize=11,
                angle=12,
                textcolor=ROOT.kGray + 2)
        pass

    return c
def jsd(data_, args, features, pt_range):
    """
    Perform study of ...

    Saves plot `figures/jsd.pdf`

    Arguments:
        data: Pandas data frame from which to read data.
        args: Namespace holding command-line arguments.
        features: Features for ...
    """

    # Select data
    if pt_range is not None:
        data = data_[(data_['pt'] > pt_range[0]) & (data_['pt'] < pt_range[1])]
    else:
        data = data_
        pass

    # Create local histogram style dict
    histstyle = dict(**HISTSTYLE)
    histstyle[True]['label'] = "Pass"
    histstyle[False]['label'] = "Fail"

    # Define common variables
    msk = data['signal'] == 0
    effs = np.linspace(0, 100, 10 * 2, endpoint=False)[1:].astype(int)

    # Loop tagger features
    jsd = {feat: [] for feat in features}
    for ifeat, feat in enumerate(features):

        if len(jsd[feat]): continue  # Duplicate feature.

        # Define cuts
        cuts = list()
        for eff in effs:
            cut = wpercentile(data.loc[msk, feat].values,
                              eff if signal_low(feat) else 100 - eff,
                              weights=data.loc[msk, 'weight_test'].values)
            cuts.append(cut)
            pass

        # Compute KL divergence for successive cuts
        for cut, eff in zip(cuts, effs):

            # Create ROOT histograms
            msk_pass = data[feat] > cut
            if signal_low(feat):
                msk_pass = ~msk_pass
                pass

            # Get histograms / plot
            c = rp.canvas(batch=not args.show)
            h_pass = c.hist(data.loc[msk_pass & msk, 'mass'].values,
                            bins=MASSBINS,
                            weights=data.loc[msk_pass & msk,
                                             'weight_test'].values,
                            normalise=True,
                            **histstyle[True])  #, display=False)
            h_fail = c.hist(data.loc[~msk_pass & msk, 'mass'].values,
                            bins=MASSBINS,
                            weights=data.loc[~msk_pass & msk,
                                             'weight_test'].values,
                            normalise=True,
                            **histstyle[False])  #, display=False)

            # Convert to numpy arrays
            p = root_numpy.hist2array(h_pass)
            f = root_numpy.hist2array(h_fail)

            # Compute Jensen-Shannon divergence
            jsd[feat].append(JSD(p, f, base=2))

            # -- Decorations
            c.xlabel("Large-#it{R} jet mass [GeV]")
            c.ylabel("Fraction of jets")
            c.legend()
            c.logy()
            c.text(TEXT + [
                "{:s} {} {:.3f}".format(latex(feat, ROOT=True), '<' if signal_low(feat) else '>', cut),
                "JSD = {:.4f}".format(jsd[feat][-1])] + \
                (["p_{{T}} #in  [{:.0f}, {:.0f}] GeV".format(*pt_range)] if pt_range else []),
                qualifier=QUALIFIER)

            # -- Save
            c.save('figures/temp_jsd_{:s}_{:.0f}{}.pdf'.format(
                feat, eff,
                '' if pt_range is None else '__pT{:.0f}_{:.0f}'.format(
                    *pt_range)))

            pass
        pass

    # Compute meaningful limit on JSD
    jsd_limits = list()
    sigmoid = lambda x: 1. / (1. + np.exp(-x))
    for eff in sigmoid(np.linspace(-5, 5, 20 + 1, endpoint=True)):
        limits = jsd_limit(data[msk], eff, num_bootstrap=5)
        jsd_limits.append((eff, np.mean(limits), np.std(limits)))
        pass

    # Perform plotting
    c = plot(args, data, effs, jsd, jsd_limits, features, pt_range)

    # Output
    path = 'figures/jsd{}.pdf'.format(
        '' if pt_range is None else '__pT{:.0f}_{:.0f}'.format(*pt_range))

    return c, args, path
Пример #17
0
def plot_distributions(data, var, bins):
    """
    Method for delegating plotting
    """

    h_D2lowmass = None
    for mass, (mass_down,
               mass_up) in enumerate(zip(MASS_BINS[:-1], MASS_BINS[1:])):

        # Canvas
        c = rp.canvas(batch=True)

        # Fill histograms
        msk = (data['m'] >= mass_down) & (data['m'] < mass_up)
        h_D2 = c.hist(data.loc[msk, var].values,
                      bins=bins,
                      weights=data.loc[msk, 'weight_test'].values,
                      display=False)
        h_D2CSS = c.hist(data.loc[msk, var + "CSS"].values,
                         bins=bins,
                         weights=data.loc[msk, 'weight_test'].values,
                         display=False)

        if h_D2lowmass is not None:
            sumChi2, bestOmega, profile_css, profile0rebin = fit(
                h_D2, 1.0, h_D2lowmass, "%.2f" % mass)
            normalise(profile_css, density=True)
        else:
            profile_css = None
            pass

        h_D2 = kde(h_D2)
        h_D2CSS = kde(h_D2CSS)

        normalise(h_D2, density=True)
        normalise(h_D2CSS, density=True)

        if h_D2lowmass is None:
            h_D2lowmass = h_D2.Clone('h_lowmass')
            pass

        # Draw histograms
        lowmassbin = "#it{{m}} #in  [{:.1f}, {:.1f}] GeV".format(
            MASS_BINS[0], MASS_BINS[1]).replace('.0', '')
        massbin = "#it{{m}} #in  [{:.1f}, {:.1f}] GeV".format(
            MASS_BINS[mass], MASS_BINS[mass + 1]).replace('.0', '')
        c.hist(h_D2lowmass,
               label=latex(var, ROOT=True) + ",    {}".format(lowmassbin),
               linecolor=rp.colours[1],
               fillcolor=rp.colours[1],
               alpha=0.5,
               option='HISTL',
               legend_option='FL')
        c.hist(h_D2,
               label=latex(var, ROOT=True) + ",    {}".format(massbin),
               linecolor=rp.colours[4],
               linestyle=2,
               option='HISTL')
        c.hist(h_D2CSS,
               label=latex(var + 'CSS', ROOT=True) + ", {}".format(massbin),
               linecolor=rp.colours[3],
               option='HISTL')
        ''' # Draw reference histogram from fit.
        if profile_css is not None:
            c.hist(profile_css, linecolor=ROOT.kBlack, linestyle=2, label='Transformed hist (CSS)')
            pass
        #'''

        # Decorations
        c.xlabel(latex(var, ROOT=True) + ", " + latex(var + 'CSS', ROOT=True))
        c.ylabel("Number of jets p.d.f.")
        c.ylim(
            0, 5.2
        )  #now optimized for N2, so probably need to adjust for other variables
        c.legend(xmin=0.45, ymax=0.76, width=0.25)
        c.text(["#sqrt{s} = 13 TeV,  Multijets", "KDE smoothed"],
               qualifier=QUALIFIER,
               ATLAS=False)
        c.pad()._xaxis().SetTitleOffset(1.3)
        c.pad()._yaxis().SetNdivisions(105)
        c.pad()._primitives[-1].Draw('SAME AXIS')

        # Save
        c.save('figures/css/cssProfile_{}_{}.pdf'.format(var, mass))
        pass

    return
Пример #18
0
def test(data, variable, bg_eff, signal_above=False):
    # Shout out to Cynthia Brewer and Mark Harrower
    # [http://colorbrewer2.org]. Palette is colorblind-safe.
    rgbs = [(247 / 255., 251 / 255., 255 / 255.),
            (222 / 255., 235 / 255., 247 / 255.),
            (198 / 255., 219 / 255., 239 / 255.),
            (158 / 255., 202 / 255., 225 / 255.),
            (107 / 255., 174 / 255., 214 / 255.),
            (66 / 255., 146 / 255., 198 / 255.),
            (33 / 255., 113 / 255., 181 / 255.),
            (8 / 255., 81 / 255., 156 / 255.),
            (8 / 255., 48 / 255., 107 / 255.)]

    red, green, blue = map(np.array, zip(*rgbs))
    nb_cols = len(rgbs)
    stops = np.linspace(0, 1, nb_cols, endpoint=True)
    ROOT.TColor.CreateGradientColorTable(nb_cols, stops, red, green, blue,
                                         NB_CONTOUR)

    msk_sig = data['signal'] == 1
    msk_bkg = ~msk_sig

    # Fill measured profile
    with Profile("filling profile"):
        profile_meas, _ = fill_profile(data[msk_bkg],
                                       variable,
                                       bg_eff,
                                       signal_above=signal_above)

    # Add k-NN variable
    with Profile("adding variable"):
        knnfeat = 'knn'
        #add_knn(data, feat=variable, newfeat=knnfeat, path='knn_fitter/models/knn_{}_{}.pkl.gz'.format(variable, bg_eff))
        add_knn(data,
                feat=variable,
                newfeat=knnfeat,
                path=args.output +
                '/models/knn_{:s}_{:.0f}.pkl.gz'.format(variable, bg_eff))

    # Loading KNN classifier
    with Profile("loading model"):
        #knn = loadclf('knn_fitter/models/knn_{:s}_{:.0f}.pkl.gz'.format(variable, bg_eff))
        knn = loadclf(
            args.output +
            '/models/knn_{:s}_{:.0f}.pkl.gz'.format(variable, bg_eff))

    # Filling fitted profile
    with Profile("Filling fitted profile"):
        rebin = 8
        edges, centres = dict(), dict()
        for ax, var in zip(['x', 'y'], [VARX, VARY]):

            # Short-hands
            vbins, vmin, vmax = AXIS[var]

            # Re-binned bin edges
            edges[ax] = np.interp(
                np.linspace(0, vbins, vbins * rebin + 1, endpoint=True),
                range(vbins + 1),
                np.linspace(vmin, vmax, vbins + 1, endpoint=True))

            # Re-binned bin centres
            centres[ax] = edges[ax][:-1] + 0.5 * np.diff(edges[ax])
            pass

        # Get predictions evaluated at re-binned bin centres
        g = dict()
        g['x'], g['y'] = np.meshgrid(centres['x'], centres['y'])
        g['x'], g['y'] = standardise(g['x'], g['y'])

        X = np.vstack((g['x'].flatten(), g['y'].flatten())).T
        fit = knn.predict(X).reshape(g['x'].shape).T

        # Fill ROOT "profile"
        profile_fit = ROOT.TH2F('profile_fit', "",
                                len(edges['x']) - 1, edges['x'].flatten('C'),
                                len(edges['y']) - 1, edges['y'].flatten('C'))
        root_numpy.array2hist(fit, profile_fit)
        pass

    # Plotting
    for fit in [False, True]:

        # Select correct profile
        profile = profile_fit if fit else profile_meas

        # Plot
        plot(profile, fit, variable, bg_eff)
        pass
    pass

    # Plotting local selection efficiencies for D2-kNN < 0
    # -- Compute signal efficiency
    for sig, msk in zip([True, False], [msk_sig, msk_bkg]):
        if sig:
            print "working on signal"
        else:
            print "working on bg"

        if sig:
            rgbs = [(247 / 255., 251 / 255., 255 / 255.),
                    (222 / 255., 235 / 255., 247 / 255.),
                    (198 / 255., 219 / 255., 239 / 255.),
                    (158 / 255., 202 / 255., 225 / 255.),
                    (107 / 255., 174 / 255., 214 / 255.),
                    (66 / 255., 146 / 255., 198 / 255.),
                    (33 / 255., 113 / 255., 181 / 255.),
                    (8 / 255., 81 / 255., 156 / 255.),
                    (8 / 255., 48 / 255., 107 / 255.)]

            red, green, blue = map(np.array, zip(*rgbs))
            nb_cols = len(rgbs)
            stops = np.linspace(0, 1, nb_cols, endpoint=True)
        else:
            rgbs = [(255 / 255., 51 / 255., 4 / 255.),
                    (247 / 255., 251 / 255., 255 / 255.),
                    (222 / 255., 235 / 255., 247 / 255.),
                    (198 / 255., 219 / 255., 239 / 255.),
                    (158 / 255., 202 / 255., 225 / 255.),
                    (107 / 255., 174 / 255., 214 / 255.),
                    (66 / 255., 146 / 255., 198 / 255.),
                    (33 / 255., 113 / 255., 181 / 255.),
                    (8 / 255., 81 / 255., 156 / 255.),
                    (8 / 255., 48 / 255., 107 / 255.)]

            red, green, blue = map(np.array, zip(*rgbs))
            nb_cols = len(rgbs)
            stops = np.array([0] + list(
                np.linspace(0, 1, nb_cols - 1, endpoint=True) *
                (1. - bg_eff / 100.) + bg_eff / 100.))
            pass

            ROOT.TColor.CreateGradientColorTable(nb_cols, stops, red, green,
                                                 blue, NB_CONTOUR)

        # Define arrays
        shape = (AXIS[VARX][0], AXIS[VARY][0])
        bins = [
            np.linspace(AXIS[var][1],
                        AXIS[var][2],
                        AXIS[var][0] + 1,
                        endpoint=True) for var in VARS
        ]
        x, y, z = (np.zeros(shape) for _ in range(3))

        # Create `profile` histogram
        profile = ROOT.TH2F('profile', "",
                            len(bins[0]) - 1, bins[0].flatten('C'),
                            len(bins[1]) - 1, bins[1].flatten('C'))

        # Compute inclusive efficiency in bins of `VARY`
        effs = list()
        for edges in zip(bins[1][:-1], bins[1][1:]):
            msk_bin = (data[VARY] > edges[0]) & (data[VARY] < edges[1])
            if signal_above:
                msk_pass = data[knnfeat] > 0  # ensure correct cut direction
            else:
                msk_pass = data[knnfeat] < 0
            num_msk = msk * msk_bin * msk_pass
            num = data.loc[num_msk, 'weight_test'].values.sum()
            den = data.loc[msk & msk_bin, 'weight_test'].values.sum()
            effs.append(num / den)
            pass

        # Fill profile
        with Profile("Fill profile"):
            for i, j in itertools.product(*map(range, shape)):
                #print "Fill profile - (i, j) = ({}, {})".format(i,j)
                # Bin edges in x and y
                edges = [bin[idx:idx + 2] for idx, bin in zip([i, j], bins)]

                # Masks
                msks = [
                    (data[var] > edges[dim][0]) & (data[var] <= edges[dim][1])
                    for dim, var in enumerate(VARS)
                ]
                msk_bin = reduce(lambda x, y: x & y, msks)

                # Set non-zero bin content
                if np.sum(msk & msk_bin):
                    if signal_above:
                        msk_pass = data[
                            knnfeat] > 0  # ensure correct cut direction
                    else:
                        msk_pass = data[knnfeat] < 0
                    num_msk = msk * msk_bin * msk_pass
                    num = data.loc[num_msk, 'weight_test'].values.sum()
                    den = data.loc[msk & msk_bin, 'weight_test'].values.sum()
                    eff = num / den
                    profile.SetBinContent(i + 1, j + 1, eff)
                    pass

        c = rp.canvas(batch=True)
        pad = c.pads()[0]._bare()
        pad.cd()
        pad.SetRightMargin(0.20)
        pad.SetLeftMargin(0.15)
        pad.SetTopMargin(0.10)

        # Styling
        profile.GetXaxis().SetTitle("Large-#it{R} jet " +
                                    latex(VARX, ROOT=True) +
                                    " = log(m^{2}/p_{T}^{2})")
        profile.GetYaxis().SetTitle("Large-#it{R} jet " +
                                    latex(VARY, ROOT=True) + " [GeV]")
        profile.GetZaxis().SetTitle("Selection efficiency for %s^{(%s%%)}" %
                                    (latex(variable, ROOT=True), bg_eff))

        profile.GetYaxis().SetNdivisions(505)
        profile.GetZaxis().SetNdivisions(505)
        profile.GetXaxis().SetTitleOffset(1.4)
        profile.GetYaxis().SetTitleOffset(1.8)
        profile.GetZaxis().SetTitleOffset(1.3)
        zrange = (0., 1.)
        if zrange:
            profile.GetZaxis().SetRangeUser(*zrange)
            pass
        profile.SetContour(NB_CONTOUR)

        # Draw
        profile.Draw('COLZ')

        # Decorations
        c.text(qualifier=QUALIFIER, ymax=0.92, xmin=0.15, ATLAS=False)
        c.text(["#sqrt{s} = 13 TeV", "#it{W} jets" if sig else "Multijets"],
               ATLAS=False)

        # -- Efficiencies
        xaxis = profile.GetXaxis()
        yaxis = profile.GetYaxis()
        tlatex = ROOT.TLatex()
        tlatex.SetTextColor(ROOT.kGray + 2)
        tlatex.SetTextSize(0.023)
        tlatex.SetTextFont(42)
        tlatex.SetTextAlign(32)
        xt = xaxis.GetBinLowEdge(xaxis.GetNbins())
        for eff, ibin in zip(effs, range(1, yaxis.GetNbins() + 1)):
            yt = yaxis.GetBinCenter(ibin)
            tlatex.DrawLatex(
                xt, yt, "%s%.1f%%" %
                ("#bar{#varepsilon}^{rel}_{%s} = " %
                 ('sig' if sig else 'bkg') if ibin == 1 else '', eff * 100.))
            pass

        # -- Bounds
        BOUNDS[0].DrawCopy("SAME")
        BOUNDS[1].DrawCopy("SAME")
        c.latex("m > 50 GeV",
                -4.5,
                BOUNDS[0].Eval(-4.5) + 30,
                align=21,
                angle=-37,
                textsize=13,
                textcolor=ROOT.kGray + 3)
        c.latex("m < 300 GeV",
                -2.5,
                BOUNDS[1].Eval(-2.5) - 30,
                align=23,
                angle=-57,
                textsize=13,
                textcolor=ROOT.kGray + 3)

        # Save
        mkdir('knn_fitter/figures/')
        c.save('knn_fitter/figures/knn_eff_{}_{:s}_{:.0f}.pdf'.format(
            'sig' if sig else 'bkg', variable, bg_eff))
        mkdir(args.output + '/figures/')
        c.save(args.output + '/figures/knn_eff_{}_{:s}_{:.0f}.pdf'.format(
            'sig' if sig else 'bkg', variable, bg_eff))
        pass

    return
Пример #19
0
def main(args):

    # Definitions
    histstyle = dict(**HISTSTYLE)

    # Initialise
    args, cfg = initialise(args)

    # Load data
    data, features, _ = load_data('data/' + args.input)

    histstyle[True]['label'] = 'Multijets'
    histstyle[False]['label'] = 'Dark jets, Model A, m = 2 TeV'

    # Add knn variables

    #base_var = ['lead_jet_ungrtrk500', 'sub_jet_ungrtrk500']
    #kNN_var = [var.replace('jet', 'knn') for var in base_var]

    #base_var = ['ntrk_sum']
    #kNN_var = [var + '-knn' for var in base_var]
    """
    with Profile("Add variables"):
        from run.knn.common import add_knn, MODEL, VAR as kNN_basevar, EFF as kNN_eff
        print "k-NN base variable: {} (cp. {})".format(kNN_basevar, kNN_var)
        for i in range(len(base_var)):
            add_knn(data, newfeat=kNN_var[i], path='models/knn/knn_{}_{}_{}.pkl.gz'.format(base_var[i], kNN_eff, MODEL))
            print 'models/knn/knn_{}_{}_{}.pkl.gz'.format(base_var[i], kNN_eff, MODEL)
    """

    # Check variable distributions
    axes = {
        'jet_ungrtrk500': (50, 0, 100),
        #'lead_knn_ungrtrk500': (50, -100, 50),
        'jet_pt': (50, 0, 3000),
        'dijetmass': (50, 0, 7000),
    }

    scale = 139 * 1000000

    weight = 'weight'  # 'weight_test' / 'weight'
    msk_bkg = data['signal'] == 0  # @TEMP signal
    msk_sig = data['sigType'] == 1  # @TEMP signal
    #msk_weight = data['weight']<0.002
    #msk_bkg = msk_bkg & msk_weight

    #msk_CR = (data['lead_jet_ungrtrk500']<20) | (data['sub_jet_ungrtrk500']<20)

    ###### 3D histograms #######

    vary = 'jet_pt'
    varx = 'dijetmass'
    varz = 'jet_ungrtrk500'

    #for i,varx in enumerate(axisvars):
    #   for vary in axisvars[i+1:]:
    # Canvas
    can4 = rp.canvas(batch=True)
    pad = can4.pads()[0]._bare()
    pad.cd()
    pad.SetRightMargin(0.20)

    #can4 = ROOT.TCanvas("canvas", "", 800, 600)
    #can4.SetRightMargin(0.20)
    # Create, fill histogram
    h2_bkg = ROOT.TH2F('{}_{}'.format(varx, vary), "",
                       *(axes[varx] + axes[vary]))

    root_numpy.fill_hist(
        h2_bkg, data.loc[msk_bkg, [varx, vary]].values
    )  #, scale*data.loc[msk_bkg, weight].values)#*data.loc[msk_bkg, varz].values)

    #h2_bkg.Scale(1./h2_bkg.Integral())

    print h2_bkg.Integral()

    # Draw
    h2_bkg.Draw("COLZ")

    # Decorations
    h2_bkg.GetXaxis().SetTitle(latex(varx, ROOT=True))
    h2_bkg.GetYaxis().SetTitle(latex(vary, ROOT=True))
    #h2_bkg.GetZaxis().SetTitle(latex(varz, ROOT=True))
    #pad.SetLogz()
    #can4.zlim(0.0, 0.04)
    h2_bkg.GetZaxis().SetRangeUser(0.0, 300000)

    # Save
    can4.save('figures/distributions/3d_{}_{}_{}_bkg.pdf'.format(
        varx, vary, varz))
    can4.save('figures/distributions/3d_{}_{}_{}_bkg.eps'.format(
        varx, vary, varz))

    # ntrk distribution
    """ 
    can1 = rp.canvas(batch=True)
    bins1 = np.linspace(0, 150, 75)

    h_ungrB = can1.hist(data.loc[msk_bkg, 'lead_jet_ungrtrk500'].values, bins=bins1, weights=data.loc[msk_bkg, weight].values, label='ungrtrk, bkg', normalise=True, linecolor=ROOT.kGreen+2)

    h_ungeS = can1.hist(data.loc[msk_sig, 'lead_jet_ungrtrk500'].values, bins=bins1, weights=data.loc[msk_sig, weight].values, label='ungrtrk, sig', normalise=True, linecolor=ROOT.kGreen+2, linestyle=2)
    
    can1.legend(width=0.3, xmin=0.6, ymax=0.9)
    can1.save('figures/distributions/ungrtrk_dist.pdf')
    can1.save('figures/distributions/ungrtrk_dist.eps')


    # 2D histograms

    axisvars = sorted(list(axes))
    
    varx = 'lead_jet_ungrtrk500'
    vary = 'sub_jet_ungrtrk500'


    #for i,varx in enumerate(axisvars):
    #   for vary in axisvars[i+1:]:
    # Canvas
    can3 = ROOT.TCanvas()
    can3.SetRightMargin(0.20)
    
    # Create, fill histogram
    h2_bkg = ROOT.TH2F('{}_{}'.format(varx, vary), "", *(axes[varx] + axes[vary]))
    h2_sig = ROOT.TH2F('{}_{}'.format(varx, vary), "", *(axes[varx] + axes[vary]))

    root_numpy.fill_hist(h2_bkg, data.loc[msk_bkg, [varx, vary]].values, data.loc[msk_bkg, weight].values)
    root_numpy.fill_hist(h2_sig, data.loc[msk_sig, [varx, vary]].values, data.loc[msk_sig, weight].values)
    
    # Draw
    h2_bkg.Draw("COLZ")

    # Decorations
    h2_bkg.GetXaxis().SetTitle(latex(varx, ROOT=True))
    h2_bkg.GetYaxis().SetTitle(latex(vary, ROOT=True))
    can3.SetLogz()
    
    # Save
    can3.SaveAs('figures/distributions/2d_{}_{}_bkg.pdf'.format(varx, vary))
    can3.SaveAs('figures/distributions/2d_{}_{}_bkg.eps'.format(varx, vary))

    can6 = ROOT.TCanvas()
    can6.SetRightMargin(0.20)

    h2_sig.Draw("COLZ")

    # Decorations
    h2_sig.GetXaxis().SetTitle(latex(varx, ROOT=True))
    h2_sig.GetYaxis().SetTitle(latex(vary, ROOT=True))
    can6.SetLogz()
    
    # Save
    can6.SaveAs('figures/distributions/2d_{}_{}_sig.pdf'.format(varx, vary))
    can6.SaveAs('figures/distributions/2d_{}_{}_sig.eps'.format(varx, vary))

    ### Subleading vs. leading knn_ntrk

    varx = 'lead_knn_ungrtrk500'
    vary = 'sub_knn_ungrtrk500'


    # Canvas
    can4 = ROOT.TCanvas()
    can4.SetRightMargin(0.20)

    h2_C1_bkg = ROOT.TH2F('{}_{}'.format(varx, vary), "", *(axes[varx] + axes[vary]))
    root_numpy.fill_hist(h2_C1_bkg, data.loc[msk_bkg, [varx, vary]].values, 100. * data.loc[msk_bkg, weight].values)
    h2_C1_sig = ROOT.TH2F('{}_{}'.format(varx, vary), "", *(axes[varx] + axes[vary]))
    root_numpy.fill_hist(h2_C1_sig, data.loc[msk_sig, [varx, vary]].values, 100. * data.loc[msk_sig, weight].values)

    # Draw
    h2_C1_bkg.Draw("COLZ")

    # Decorations
    h2_C1_bkg.GetXaxis().SetTitle(latex(varx, ROOT=True))
    h2_C1_bkg.GetYaxis().SetTitle(latex(vary, ROOT=True))
    can4.SetLogz()

    can4.SaveAs('figures/distributions/2d_{}_{}_bkg.pdf'.format(varx, vary))
    can4.SaveAs('figures/distributions/2d_{}_{}_bkg.eps'.format(varx, vary))


    # Canvas
    can5 = ROOT.TCanvas()
    can5.SetRightMargin(0.20)

    # Draw
    h2_C1_sig.Draw("COLZ")

    # Decorations
    h2_C1_sig.GetXaxis().SetTitle(latex(varx, ROOT=True))
    h2_C1_sig.GetYaxis().SetTitle(latex(vary, ROOT=True))
    can5.SetLogz()

    can5.SaveAs('figures/distributions/2d_{}_{}_sig.pdf'.format(varx, vary))
    can5.SaveAs('figures/distributions/2d_{}_{}_sig.eps'.format(varx, vary))

    """

    return
def main(args):

    # ...

    # Load data
    data_, features, _ = load_data(args.input + 'data.h5', train=True)

    for pt_bin in [(200., 500.), (500., 1000.)]:

        # Impose pT-cut
        data = data_[(data_['pt'] >= pt_bin[0]) & (data_['pt'] < pt_bin[1])]

        var = 'Tau21'
        msk_sig = (data['signal'] == 1)
        x = data[var].values
        m = data['m'].values
        w = data['weight_test'].values

        # Get cut value
        cut = wpercentile(x[msk_sig], 50., weights=w)
        print "Cut value: {:.2f}".format(cut)

        # Discard signal
        x = x[~msk_sig]
        m = m[~msk_sig]
        w = w[~msk_sig]

        # Get pass mask
        msk_pass = x < cut
        print "Background efficiency: {:.1f}%".format(
            100. * w[msk_pass].sum() / w.sum())

        # Canvas
        offset = 0.06
        margin = 0.3
        # @NOTE
        #   A = Height of pad 0
        #   B = Height of pads 1,2
        #   C = Height of pad 3
        # -->
        #   A = 0.5
        #
        #   (1. - 2 * offset) * B = (1. - 2*offset - margin) * C
        #   ==>
        #   B = C * (1. - 2*offset - margin) / (1. - 2 * offset)
        #   ==>
        #   B = C * (1 - margin / (1. - 2 * offset))
        #
        #   A + 2 * B + C = 1
        #   ==>
        #   A + 2 * C * (1 - margin / (1. - 2 * offset)) + C = 1
        #   ==>
        #   C = (1 - A) / (1 + 2 * (1 - margin / (1. - 2 * offset)))

        A = 0.5
        C = (1 - A) / (1 + 2 * (1 - margin / (1. - 2 * offset)))
        B = C * (1 - margin / (1. - 2 * offset))

        c = rp.canvas(batch=True,
                      num_pads=4,
                      fraction=(A, B, B, C),
                      size=(600, 700))

        # Set pad margins
        c.pad(0)._bare().SetBottomMargin(offset)
        c.pad(1)._bare().SetTopMargin(offset)
        c.pad(1)._bare().SetBottomMargin(offset)
        c.pad(2)._bare().SetTopMargin(offset)
        c.pad(2)._bare().SetBottomMargin(offset)
        c.pad(3)._bare().SetTopMargin(offset)
        c.pad(3)._bare().SetBottomMargin(offset + margin)

        # Styling
        HISTSTYLE[True]['label'] = 'Passing cut, #it{{P}}'.format(
            latex(var, ROOT=True))
        HISTSTYLE[False]['label'] = 'Failing cut, #it{{F}}'.format(
            latex(var, ROOT=True))

        # Histograms
        F = c.hist(m[~msk_pass],
                   bins=MASSBINS,
                   weights=w[~msk_pass],
                   normalise=True,
                   **HISTSTYLE[False])
        P = c.hist(m[msk_pass],
                   bins=MASSBINS,
                   weights=w[msk_pass],
                   normalise=True,
                   **HISTSTYLE[True])

        P, F = map(root_numpy.hist2array, [P, F])
        M = (P + F) / 2
        c.hist(M,
               bins=MASSBINS,
               normalise=True,
               linewidth=3,
               linecolor=ROOT.kViolet,
               linestyle=2,
               label='Average, #it{M}')

        # Compute divergences
        KL_PM = -P * np.log2(M / P)
        KL_FM = -F * np.log2(M / F)
        JSD = (KL_PM + KL_FM) / 2.
        JSDsum = np.cumsum(JSD)

        opts = dict(bins=MASSBINS, fillcolor=ROOT.kGray, alpha=0.5)

        # Draw divergences
        c.pad(1).hist(KL_PM, **opts)
        c.pad(1).ylim(-0.12, 0.05)
        c.pad(1).yline(0.)

        c.pad(2).hist(KL_FM, **opts)
        c.pad(2).ylim(-0.05, 0.12)
        c.pad(2).yline(0.)

        c.pad(3).hist(JSD, **opts)
        c.pad(3).ylim(0., 0.03)
        c.pad(3).yline(0.)

        o = rp.overlay(c.pad(3), color=ROOT.kViolet, ndiv=502)
        o.hist(JSDsum, bins=MASSBINS, linecolor=ROOT.kViolet)
        o.label("#sum_{i #leq n} JSD(P #parallel F)")
        o.lim(0, 0.2)
        #o._update_overlay()

        # Styling axes
        c.pad(0)._xaxis().SetTitleOffset(999.)
        c.pad(1)._xaxis().SetTitleOffset(999.)
        c.pad(2)._xaxis().SetTitleOffset(999.)
        c.pad(3)._xaxis().SetTitleOffset(5.)
        c.pad(0)._xaxis().SetLabelOffset(999.)
        c.pad(1)._xaxis().SetLabelOffset(999.)
        c.pad(2)._xaxis().SetLabelOffset(999.)

        c.pad(0)._yaxis().SetNdivisions(505)
        c.pad(1)._yaxis().SetNdivisions(502)
        c.pad(2)._yaxis().SetNdivisions(502)
        c.pad(3)._yaxis().SetNdivisions(502)

        c.pad(0).ylim(0, 0.20)
        c.pad(0).cd()
        c.pad(0)._get_first_primitive().Draw('SAME AXIS')

        # Decorations
        c.text(TEXT + [
            "Multijets, training dataset",
            "Cut on {:s} at #varepsilon_{{sig}}^{{rel}} = 50%".format(
                latex(var, ROOT=True)),
            "p_{{T}} #in  [{:.0f}, {:.0f}] GeV".format(*pt_bin)
        ],
               qualifier='Simulation Internal')
        c.legend(width=0.25)
        c.xlabel("Large-#it{R} jet mass [GeV]")
        c.ylabel("Fraction of jets")
        c.pad(1).ylabel('KL(P #parallel M)')
        c.pad(2).ylabel('KL(F #parallel M)')
        c.pad(3).ylabel('JSD(P #parallel F)')

        # Save
        c.save('figures/massdecorrelationmetric_{:s}__pT{:.0f}_{:.0f}GeV.pdf'.
               format(var, *pt_bin))
        pass
    return 0
def plot_full (*argv):
    """
    Method for delegating plotting.
    """

    # Unpack arguments
    data, args, features, bins, effs, rejs, jsds, meanx, jsd_limits, masscut = argv

    with TemporaryStyle() as style:

        # Set styles
        scale      = 1.0
        scale_axis = 0.7
        margin_squeeze = 0.035
        margin_vert    = 0.20
        margin_hori    = 0.35
        size = (800, 600)

        style.SetTextSize(scale_axis * style.GetTextSize())
        for coord in ['x', 'y', 'z']:
            style.SetLabelSize(scale_axis * style.GetLabelSize(coord), coord)
            style.SetTitleSize(scale_axis * style.GetTitleSize(coord), coord)
            pass
        style.SetLegendTextSize(style.GetLegendTextSize() * scale)
        style.SetTickLength(0.05,                                                               'x')
        style.SetTickLength(0.07 * (float(size[0])/float(size[1])) * (margin_hori/margin_vert), 'y')

        # Canvas
        c = rp.canvas(num_pads=(2,2), size=size, batch=not args.show)

        # Margins
        c.pads()[0]._bare().SetTopMargin   (margin_vert)
        c.pads()[1]._bare().SetTopMargin   (margin_vert)
        c.pads()[2]._bare().SetBottomMargin(margin_vert)
        c.pads()[3]._bare().SetBottomMargin(margin_vert)

        c.pads()[0]._bare().SetLeftMargin  (margin_hori)
        c.pads()[2]._bare().SetLeftMargin  (margin_hori)
        c.pads()[1]._bare().SetRightMargin (margin_hori)
        c.pads()[3]._bare().SetRightMargin (margin_hori)

        c.pads()[1]._bare().SetLeftMargin  (margin_squeeze)
        c.pads()[3]._bare().SetLeftMargin  (margin_squeeze)
        c.pads()[0]._bare().SetRightMargin (margin_squeeze)
        c.pads()[2]._bare().SetRightMargin (margin_squeeze)

        c.pads()[0]._bare().SetBottomMargin(margin_squeeze)
        c.pads()[1]._bare().SetBottomMargin(margin_squeeze)
        c.pads()[2]._bare().SetTopMargin   (margin_squeeze)
        c.pads()[3]._bare().SetTopMargin   (margin_squeeze)

        # To fix 30.5 --> 30 for NPV
        bins['npv'][-1] = np.floor(bins['npv'][-1])

        # Plots
        # -- References
        boxopts  = dict(fillcolor=ROOT.kBlack, alpha=0.05, linecolor=ROOT.kGray + 2, linewidth=1, option='HIST')
        c.pads()[0].hist([2], bins=[bins['pt'] [0], bins['pt'] [-1]], **boxopts)
        c.pads()[1].hist([2], bins=[bins['npv'][0], bins['npv'][-1]], **boxopts)
        c.pads()[2].hist([1], bins=[bins['pt'] [0], bins['pt'] [-1]], **boxopts)
        c.pads()[3].hist([1], bins=[bins['npv'][0], bins['npv'][-1]], **boxopts)

        nb_col = 2
        for col, var in enumerate(['pt', 'npv']):
            for is_simple in [True, False]:
                for ifeat, feat in filter(lambda t: is_simple == signal_low(t[1]), enumerate(features)):

                    opts = dict(
                        linecolor   = rp.colours[(ifeat // 2)],
                        markercolor = rp.colours[(ifeat // 2)],
                        fillcolor   = rp.colours[(ifeat // 2)],
                        linestyle   = 1 + (ifeat % 2),
                        alpha       = 0.3,
                        option      = 'E2',
                    )

                    mean_rej, std_rej = map(np.array, zip(*rejs[var][feat]))  # @TEMP
                    mean_jsd, std_jsd = map(np.array, zip(*jsds[var][feat]))

                    # Only _show_ mass-decorrelated features for `npv`
                    if (col == 1) and (ifeat % 2 == 0):
                        mean_rej *= -9999.
                        mean_jsd *= -9999.
                        pass

                    # Error boxes
                    x    = np.array(bins[var][:-1]) + 0.5 * np.diff(bins[var])
                    xerr = 0.5 * np.diff(bins[var])
                    graph_rej = ROOT.TGraphErrors(len(x), x, mean_rej, xerr, std_rej)
                    graph_jsd = ROOT.TGraphErrors(len(x), x, mean_jsd, xerr, std_jsd)

                    c.pads()[col + 0 * nb_col].hist(graph_rej, **opts)
                    c.pads()[col + 1 * nb_col].hist(graph_jsd, **opts)

                    # Markers and lines
                    opts['option']      = 'PE2L'
                    opts['markerstyle'] = 20 + 4 * (ifeat % 2)

                    graph_rej = ROOT.TGraph(len(x), meanx[var], mean_rej)
                    graph_jsd = ROOT.TGraph(len(x), meanx[var], mean_jsd)

                    c.pads()[col + 0 * nb_col].hist(graph_rej, label=latex(feat, ROOT=True) if not is_simple else None, **opts)
                    c.pads()[col + 1 * nb_col].hist(graph_jsd, label=latex(feat, ROOT=True) if     is_simple else None, **opts)
                    pass
                pass

            # Meaningful limits on JSD
            x, y, ey_stat, ey_syst  = map(np.array, zip(*jsd_limits[var]))
            ex = np.zeros_like(x)
            x[0]  = bins[var][0]
            x[-1] = bins[var][-1]
            format = lambda arr: arr.flatten('C').astype(float)
            gr_stat = ROOT.TGraphErrors(len(x), *list(map(format, [x, y, ex, ey_stat])))
            gr_comb = ROOT.TGraphErrors(len(x), *list(map(format, [x, y, ex, np.sqrt(np.square(ey_stat) + np.square(ey_syst))])))
            smooth_tgrapherrors(gr_stat, ntimes=2)
            smooth_tgrapherrors(gr_comb, ntimes=2)
            c.pads()[col + 1 * nb_col].graph(gr_comb,                                        fillcolor=ROOT.kBlack, alpha=0.03, option='3')
            c.pads()[col + 1 * nb_col].graph(gr_stat, linestyle=2, linecolor=ROOT.kGray + 1, fillcolor=ROOT.kBlack, alpha=0.03, option='L3')

            if col == 0:
                x_, y_, ex_, ey_ = ROOT.Double(0), ROOT.Double(0), ROOT.Double(0), ROOT.Double(0)
                idx = gr_comb.GetN() - 1
                gr_comb.GetPoint(idx, x_,  y_)
                ey_ = gr_comb.GetErrorY(idx)
                x_, y_ = map(float, (x_, y_))
                c.pads()[col + 1 * nb_col].latex("Mean stat. #oplus #varepsilon_{bkg}^{rel} var. limit     ", x_, y_ + 0.75 * ey_, align=31, textsize=11 * scale, angle=0, textcolor=ROOT.kGray + 2)
                pass

            # Decorations
            # -- offsets
            c.pads()[0]._xaxis().SetLabelOffset(9999.)
            c.pads()[0]._xaxis().SetTitleOffset(9999.)
            c.pads()[1]._xaxis().SetLabelOffset(9999.)
            c.pads()[1]._xaxis().SetTitleOffset(9999.)

            c.pads()[2]._xaxis().SetTitleOffset(2.3)
            c.pads()[3]._xaxis().SetTitleOffset(2.3)

            c.pads()[1]._yaxis().SetLabelOffset(9999.)
            c.pads()[1]._yaxis().SetTitleOffset(9999.)
            c.pads()[3]._yaxis().SetLabelOffset(9999.)
            c.pads()[3]._yaxis().SetTitleOffset(9999.)

            # -- x-axis label
            if   var == 'pt':
                xlabel = "Large-#it{R} jet p_{T} [GeV]"
            elif var == 'npv':
                xlabel = "Number of reconstructed vertices N_{PV}"
            else:
                raise NotImplementedError("Variable {} is not supported.".format(var))

            c.pads()[col + 1 * nb_col].xlabel(xlabel)
            if col == 0:
                pattern = "#splitline{#splitline{#splitline{%s}{}}{#splitline{}{}}}{#splitline{#splitline{}{}}{#splitline{}{}}}"
                c.pads()[col + 0 * nb_col].ylabel(pattern % "1/#varepsilon_{bkg}^{rel} @ #varepsilon_{sig}^{rel} = 50%")
                c.pads()[col + 1 * nb_col].ylabel(pattern % "1/JSD @ #varepsilon_{sig}^{rel} = 50%")
                pass

            xmid = (bins[var][0] + bins[var][-1]) * 0.5
            c.pads()[col + 0 * nb_col].latex("Random guessing",   xmid, 2 * 0.9, align=23, textsize=11 * scale, angle=0, textcolor=ROOT.kGray + 2)
            c.pads()[col + 1 * nb_col].latex("Maximal sculpting", xmid, 1 * 0.8, align=23, textsize=11 * scale, angle=0, textcolor=ROOT.kGray + 2)

            c.pads()[col + 0 * nb_col].ylim(1,   70)  # 500
            c.pads()[col + 1 * nb_col].ylim(0.2, 7E+04)  # 2E+05

            c.pads()[col + 0 * nb_col].logy()
            c.pads()[col + 1 * nb_col].logy()

            pass  # end: loop `col`

        # Draw class-specific legend
        width = margin_hori - 0.03
        c.pads()[col + 0 * nb_col].legend(header='MVA:',        width=width, xmin=1. - margin_hori + 0.03, ymax=1. - margin_vert    + 0.02)
        c.pads()[col + 1 * nb_col].legend(header='Analytical:', width=width, xmin=1. - margin_hori + 0.03, ymax=1. - margin_squeeze + 0.02)
        c.pads()[col + 0 * nb_col]._legends[-1].SetTextSize(style.GetLegendTextSize())
        c.pads()[col + 1 * nb_col]._legends[-1].SetTextSize(style.GetLegendTextSize())

        # Common decorations
        for pad in c.pads():
            pad._xaxis().SetNdivisions(504)
            pass

        c.text([], qualifier=QUALIFIER, xmin=margin_hori, ymax=1. - margin_vert + 0.03)

        c.pads()[1].text(["#sqrt{s} = 13 TeV,  #it{W} jet tagging"] + \
                        (['m #in  [60, 100] GeV'] if masscut else []),
                        ATLAS=False, ymax=1. - margin_vert - 0.10)

        c.pads()[3].text(["Multijets"],
                         ATLAS=False, ymax=1. - margin_squeeze - 0.10)

        # Arrows
        c._bare().cd()
        opts_text = dict(textsize=11, textcolor=ROOT.kGray + 2)
        tlatex = ROOT.TLatex()
        tlatex.SetTextAngle(90)
        tlatex.SetTextAlign(22)
        tlatex.SetTextSize(11)
        tlatex.SetTextColor(ROOT.kGray + 2)
        tlatex.DrawLatexNDC(0.5, 0. + 0.5 * (margin_vert + 0.5 * (1.0 - margin_squeeze - margin_vert)), "    Less sculpting #rightarrow")
        tlatex.DrawLatexNDC(0.5, 1. - 0.5 * (margin_vert + 0.5 * (1.0 - margin_squeeze - margin_vert)), "     Greater separation #rightarrow")

        pass  # Temporary style scope

    return c
Пример #22
0
def main (args):

    # Definitions
    histstyle = dict(**HISTSTYLE)

    # Initialise
    args, cfg = initialise(args)

    # Load data
    data, features, _ = load_data(args.input + 'data.h5', background=True, train=True)

    pt_bins = np.linspace(200, 2000, 18 + 1, endpoint=True)
    pt_bins = zip(pt_bins[:-1], pt_bins[1:])
    bins = np.linspace(50, 300, (300 - 50) // 10 + 1, endpoint=True)

    for pt_bin in pt_bins:

        histstyle[True] ['label'] = 'Inclusive'
        histstyle[False]['label'] = 'p_{{T}} #in  [{:.0f}, {:.0f}] GeV'.format(*pt_bin)

        # Canvas
        c = rp.canvas(batch=True)

        # Plots
        msk = (data['pt'] > pt_bin[0]) & (data['pt'] < pt_bin[1])
        c.hist(data['m'].values,      bins=bins, weight=data['weight_adv'] .values,      normalise=True, **histstyle[True])
        c.hist(data['m'].values[msk], bins=bins, weight=data['weight_adv'] .values[msk], normalise=True, **histstyle[False])
        c.hist(data['m'].values[msk], bins=bins, weight=data['weight_test'].values[msk], normalise=True, label="Testing weight", linewidth=2, linecolor=ROOT.kGreen)

        # Decorations
        c.legend()
        c.xlabel("Large-#it{R} jet mass [GeV]")
        c.ylabel("Fraction of jets")

        # Save
        c.save('figures/temp_mass_pT{:.0f}_{:.0f}.pdf'.format(*pt_bin))
        pass

    return


    # Perform selection  @NOTE: For Rel. 20.7 only
    #data = data[(data['m']  >  50) & (data['m']  <  300)]
    #data = data[(data['pt'] > 200) & (data['pt'] < 2000)]

    # Add variables  @NOTE: For Rel. 20.7 only
    #data['rho']    = pd.Series(np.log(np.square(data['m']) / np.square(data['pt'])), index=data.index)
    #data['rhoDDT'] = pd.Series(np.log(np.square(data['m']) / data['pt'] / 1.), index=data.index)

    data['logm'] = pd.Series(np.log(data['m']), index=data.index)

    # Check variable distributions
    axes = {
        'pt':   (45, 200, 2000),
        'm':    (50,  50,  300),
        'rho':  (50,  -8,    0),
        'logm': (50,  np.log(50),  np.log(300)),
    }
    weight = 'weight_adv'  # 'weight_test' / 'weight'
    pt_range = (200., 2000.)
    msk_pt = (data['pt'] > pt_range[0]) & (data['pt'] < pt_range[1])
    for var in axes:

        # Canvas
        c = rp.canvas(num_pads=2, batch=True)

        # Plot
        bins = np.linspace(axes[var][1], axes[var][2], axes[var][0] + 1, endpoint=True)
        for adv in [0,1]:
            msk  = data['signal'] == 0   # @TEMP signal
            msk &= msk_pt
            opts = dict(normalise=True, **HISTSTYLE[adv])  # @TEMP signal
            opts['label'] = 'adv' if adv else 'test'
            if adv:
                h1 = c.hist(data.loc[msk, var].values, bins=bins, weights=data.loc[msk, weight].values, **opts)
            else:
                h2 = c.hist(data.loc[msk, var].values, bins=bins, weights=data.loc[msk, 'weight_test'].values, **opts)
                pass
            pass

        # Ratio
        c.pads()[1].ylim(0,2)
        c.ratio_plot((h1,h2), oob=True)

        # Decorations
        c.legend()
        c.xlabel(latex(var, ROOT=True))
        c.ylabel("Fraction of jets")
        c.pads()[1].ylabel("adv/test")
        #c.logy()
        c.text(TEXT + ['p_{{T}} #in  [{:.0f}, {:.0f}] GeV'.format(pt_range[0], pt_range[1])], qualifier=QUALIFIER)

        # Save
        mkdir('figures/distributions')
        c.save('figures/distributions/incl_{}.pdf'.format(var))
        pass


    # 2D histograms
    msk = data['signal'] == 0
    axisvars = sorted(list(axes))
    for i,varx in enumerate(axisvars):
        for vary in axisvars[i+1:]:
            # Canvas
            c = ROOT.TCanvas()
            c.SetRightMargin(0.20)

            # Create, fill histogram
            h2 = ROOT.TH2F('{}_{}'.format(varx, vary), "", *(axes[varx] + axes[vary]))
            root_numpy.fill_hist(h2, data.loc[msk, [varx, vary]].values, 100. * data.loc[msk, weight].values)

            # Draw
            h2.Draw("COLZ")

            # Decorations
            h2.GetXaxis().SetTitle(latex(varx, ROOT=True))
            h2.GetYaxis().SetTitle(latex(vary, ROOT=True))
            c.SetLogz()

            # Save
            c.SaveAs('figures/distributions/2d_{}_{}.pdf'.format(varx, vary))
            pass
        pass

    return
def plot_individual (*argv):
    """
    Method for delegating plotting.
    """

    # Unpack arguments
    data, args, features, bins, effs, rejs, jsds, meanx, jsd_limits, masscut = argv

    # To fix 30.5 --> 30 for NPV
    bins['npv'][-1] = np.floor(bins['npv'][-1])

    # Loop combinations
    for var, metric in itertools.product(['pt', 'npv', None], ['rej', 'jsd']):

        with TemporaryStyle() as style:

            # Set styles
            scale      = 1.0
            scale_axis = 0.7
            margin_squeeze = 0.07
            margin_vert    = 0.15
            margin_hori    = 0.17
            size = (350, 300)

            style.SetTextSize(scale_axis * style.GetTextSize())
            for coord in ['x', 'y', 'z']:
                style.SetLabelSize(scale_axis * style.GetLabelSize(coord), coord)
                style.SetTitleSize(scale_axis * style.GetTitleSize(coord), coord)
                pass
            style.SetTitleOffset(1.8, 'y')
            style.SetLegendTextSize(style.GetLegendTextSize() * scale)
            style.SetTickLength(0.05, 'x')
            style.SetTickLength(0.05, 'y')

            # Canvas
            c = rp.canvas(size=size if var is not None else (150, 300), batch=not args.show)

            # Margins
            tpad = c.pad()._bare()
            tpad.SetBottomMargin(margin_vert    if var is not None else 0.49)
            tpad.SetLeftMargin  (margin_hori    if var is not None else 0.49)
            tpad.SetRightMargin (margin_squeeze if var is not None else 0.49)
            tpad.SetTopMargin   (margin_vert    if var is not None else 0.49)

            # Plots
            # -- References
            if var is not None:
                boxopts  = dict(fillcolor=ROOT.kBlack, alpha=0.05, linecolor=ROOT.kGray + 2, linewidth=1, option='HIST')
                c.hist([2 if metric == 'rej' else 1], bins=[bins[var] [0], bins[var] [-1]], **boxopts)

                for is_simple in [True, False]:
                    for ifeat, feat in filter(lambda t: is_simple == signal_low(t[1]), enumerate(features)):

                        opts = dict(
                            linecolor   = rp.colours[(ifeat // 2)],
                            markercolor = rp.colours[(ifeat // 2)],
                            fillcolor   = rp.colours[(ifeat // 2)],
                            linestyle   = 1 + (ifeat % 2),
                            alpha       = 0.3,
                            option      = 'E2',
                        )

                        mean_rej, std_rej = map(np.array, zip(*rejs[var][feat]))  # @TEMP
                        mean_jsd, std_jsd = map(np.array, zip(*jsds[var][feat]))

                        # Only _show_ mass-decorrelated features for `npv`
                        if (var == 'npv') and (ifeat % 2 == 0):
                            mean_rej *= -9999.
                            mean_jsd *= -9999.
                            pass

                        # Error boxes
                        x    = np.array(bins[var][:-1]) + 0.5 * np.diff(bins[var])
                        xerr = 0.5 * np.diff(bins[var])
                        graph_rej = ROOT.TGraphErrors(len(x), x, mean_rej, xerr, std_rej)
                        graph_jsd = ROOT.TGraphErrors(len(x), x, mean_jsd, xerr, std_jsd)

                        if metric == 'rej':
                            c.hist(graph_rej, **opts)
                        else:
                            c.hist(graph_jsd, **opts)
                            pass

                        # Markers and lines
                        opts['option']      = 'PE2L'
                        opts['markerstyle'] = 20 + 4 * (ifeat % 2)

                        graph_rej = ROOT.TGraph(len(x), meanx[var], mean_rej)
                        graph_jsd = ROOT.TGraph(len(x), meanx[var], mean_jsd)

                        if metric == 'rej':
                            c.hist(graph_rej, label=latex(feat, ROOT=True) if not is_simple else None, **opts)
                        else:
                            c.hist(graph_jsd, label=latex(feat, ROOT=True) if     is_simple else None, **opts)
                            pass
                        pass
                    pass   # end loop: `is_simple`

                # Meaningful limits on JSD
                if metric == 'jsd':
                    x, y, ey_stat, ey_syst  = map(np.array, zip(*jsd_limits[var]))
                    ex = np.zeros_like(x)
                    x[0]  = bins[var][0]
                    x[-1] = bins[var][-1]
                    format = lambda arr: arr.flatten('C').astype(float)
                    gr_stat = ROOT.TGraphErrors(len(x), *list(map(format, [x, y, ex, ey_stat])))
                    gr_comb = ROOT.TGraphErrors(len(x), *list(map(format, [x, y, ex, np.sqrt(np.square(ey_stat) + np.square(ey_syst))])))
                    smooth_tgrapherrors(gr_stat, ntimes=2)
                    smooth_tgrapherrors(gr_comb, ntimes=2)
                    c.graph(gr_comb,                                        fillcolor=ROOT.kBlack, alpha=0.03, option='3')
                    c.graph(gr_stat, linestyle=2, linecolor=ROOT.kGray + 1, fillcolor=ROOT.kBlack, alpha=0.03, option='L3')

                    x_, y_, ex_, ey_ = ROOT.Double(0), ROOT.Double(0), ROOT.Double(0), ROOT.Double(0)
                    idx = (gr_comb.GetN() - 1) if var == 'pt' else (gr_comb.GetN() // 2)
                    gr_comb.GetPoint(idx, x_,  y_)
                    ey_ = gr_comb.GetErrorY(idx)
                    x_, y_ = map(float, (x_, y_))
                    if var == 'pt':
                        c.latex("Mean stat. #oplus #varepsilon_{bkg}^{rel} var. limit     ", x_, y_ - 1.0 * ey_, align=31, textsize=11 * scale, angle=0, textcolor=ROOT.kGray + 2)
                        pass
                    pass

                # Decorations
                # -- offsets
                #c.pads()[2]._xaxis().SetTitleOffset(2.3)

                # -- x-axis label
                if   var == 'pt':
                    xlabel = "Large-#it{R} jet p_{T} [GeV]"
                elif var == 'npv':
                    xlabel = "Number of reconstructed vertices N_{PV}"
                elif var is not None:
                    raise NotImplementedError("Variable {} is not supported.".format(var))

                c.xlabel(xlabel)

                # -- y-axis label
                if   metric == 'rej':
                    ylabel = "1/#varepsilon_{bkg}^{rel} @ #varepsilon_{sig}^{rel} = 50%"
                elif metric == 'jsd':
                    ylabel = "1/JSD @ #varepsilon_{sig}^{rel} = 50%"
                else:
                    raise NotImplementedError("Metric {} is not supported.".format(metric))

                c.ylabel(ylabel)

                xmid = (bins[var][0] + bins[var][-1]) * 0.5
                if metric == 'rej':
                    c.latex("Random guessing",   xmid, 2 * 0.9, align=23, textsize=11 * scale, angle=0, textcolor=ROOT.kGray + 2)
                    c.ylim(1,   100)  # 500
                else:
                    c.latex("Maximal sculpting", xmid, 1 * 0.8, align=23, textsize=11 * scale, angle=0, textcolor=ROOT.kGray + 2)
                    c.ylim(0.2, 7E+04)  # 2E+05
                    pass

                c.logy()

                # Common decorations
                c.pad()._xaxis().SetNdivisions(504)

                c.text([], qualifier=QUALIFIER, xmin=margin_hori, ymax=1. - margin_vert + 0.03)

                c.text( ["#sqrt{s} = 13 TeV,  #it{W} jet tagging"] + \
                       (['m #in  [60, 100] GeV'] if masscut else []) + \
                       (['Multijets'] if metric == 'jsd' else []),
                       ATLAS=False, ymax=0.40 if (masscut and (var == 'pt') and (metric == 'rej')) else None)
                       #, ymax=1. - margin_vert - 0.10)

            else:

                # Draw dummy histogram
                for is_simple in [True, False]:
                    for ifeat, feat in filter(lambda t: is_simple == signal_low(t[1]), enumerate(features)):

                        opts = dict(
                            linecolor   = rp.colours[(ifeat // 2)],
                            markercolor = rp.colours[(ifeat // 2)],
                            fillcolor   = rp.colours[(ifeat // 2)],
                            linestyle   = 1 + (ifeat % 2),
                            alpha       = 0.3,
                            option      = 'E2',
                        )
                        opts['option']      = 'PE2L'
                        opts['markerstyle'] = 20 + 4 * (ifeat % 2)

                        label = latex(feat, ROOT=True) if is_simple == (metric == 'jsd') else None
                        h = c.hist([0.5], bins=[0,1], label=label, **opts)
                        pass
                    pass

                # "Remove" axes
                pad = c.pad()
                tpad = pad._bare()
                white = ROOT.kWhite
                pad._xaxis().SetLabelOffset(9999.)
                pad._xaxis().SetTitleOffset(9999.)
                pad._yaxis().SetLabelOffset(9999.)
                pad._yaxis().SetTitleOffset(9999.)
                pad._xaxis().SetAxisColor  (white)  # Remove "double ticks"
                pad._yaxis().SetAxisColor  (white)  # Remove "double ticks"
                tpad.SetFillColor          (white)
                tpad.SetFrameFillColor     (white)
                c._bare().SetFillColor     (white)
                c._bare().SetFrameFillColor(white)

                # Draw class-specific legend
                width = 0.90 #margin_hori - 0.03
                if var is None:
                    if metric == 'rej':
                        c.legend(header='MVA:',        width=width, xmin=0.05, ymax=1. - margin_vert + 0.02)  # xmin = margin_hori + 0.03
                    else:
                        c.legend(header='Analytical:', width=width, xmin=0.05, ymax=1. - margin_vert + 0.02)
                        pass
                    c.pad()._legends[-1].SetTextSize(style.GetLegendTextSize())
                    pass
                pass
            pass

            # Arrows
            '''
            c._bare().cd()
            opts_text = dict(textsize=11, textcolor=ROOT.kGray + 2)
            tlatex = ROOT.TLatex()
            tlatex.SetTextAngle(90)
            tlatex.SetTextAlign(22)
            tlatex.SetTextSize(11)
            tlatex.SetTextColor(ROOT.kGray + 2)
            tlatex.DrawLatexNDC(0.5, 0. + 0.5 * (margin_vert + 0.5 * (1.0 - margin_squeeze - margin_vert)), "    Less sculpting #rightarrow")
            tlatex.DrawLatexNDC(0.5, 1. - 0.5 * (margin_vert + 0.5 * (1.0 - margin_squeeze - margin_vert)), "     Greater separation #rightarrow")
            '''

            # Save
            c.save('figures/robustness__{}_{}{}.pdf'.format(var if var is not None else 'legend', metric if var is not None else ('mva' if metric == 'rej' else 'analytical'), '_masscut' if masscut else ''))

            pass  # Temporary style scope

        pass
    return
Пример #24
0
def plot(*argv):
    """
    Method for delegating plotting.
    """

    # Unpack arguments
    data, args, features, scan_features, points, jsd_limits, masscut, pt_range = argv

    with TemporaryStyle() as style:

        # Compute yaxis range
        ranges = int(pt_range is not None) + int(masscut)
        mult = 10. if ranges == 2 else (5. if ranges == 1 else 1.)

        # Define variable(s)
        axisrangex = (1.4, 100.)
        axisrangey = (0.3, 100000. * mult)
        aminx, amaxx = axisrangex
        aminy, amaxy = axisrangey

        # Styling
        scale = 0.95
        style.SetTitleOffset(1.8, 'x')
        style.SetTitleOffset(1.6, 'y')
        style.SetTextSize(style.GetTextSize() * scale)
        style.SetLegendTextSize(style.GetLegendTextSize() * scale)

        # Canvas
        c = rp.canvas(batch=not args.show, size=(600, 600))

        # Reference lines
        nullopts = dict(linecolor=0,
                        linewidth=0,
                        linestyle=0,
                        markerstyle=0,
                        markersize=0,
                        fillstyle=0)
        lineopts = dict(linecolor=ROOT.kGray + 2, linewidth=1, option='L')
        boxopts = dict(fillcolor=ROOT.kBlack,
                       alpha=0.05,
                       linewidth=0,
                       option='HIST')
        c.hist([aminy], bins=list(axisrangex), **nullopts)
        c.plot([1, amaxy], bins=[2, 2], **lineopts)
        c.plot([1, 1], bins=[2, amaxx], **lineopts)
        c.hist([amaxy], bins=[aminx, 2], **boxopts)
        c.hist([1], bins=[2, amaxx], **boxopts)

        # Meaningful limits on 1/JSD
        x, y, ey = map(np.array, zip(*jsd_limits))
        ex = np.zeros_like(ey)
        gr = ROOT.TGraphErrors(len(x), x, y, ex, ey)
        smooth_tgrapherrors(gr, ntimes=3)
        c.graph(gr,
                linestyle=2,
                linecolor=ROOT.kGray + 1,
                fillcolor=ROOT.kBlack,
                alpha=0.03,
                option='L3')

        x_, y_, ex_, ey_ = ROOT.Double(0), ROOT.Double(0), ROOT.Double(
            0), ROOT.Double(0)
        idx = 3
        gr.GetPoint(idx, x_, y_)
        ey_ = gr.GetErrorY(idx)
        x_, y_ = map(float, (x_, y_))
        c.latex("Statistical limit",
                x_,
                y_ + ey_,
                align=21,
                textsize=11,
                angle=-5,
                textcolor=ROOT.kGray + 2)

        # Markers
        for is_simple in [True, False]:

            # Split the legend into simple- and MVA taggers
            for ifeat, feat in filter(lambda t: is_simple == signal_low(t[1]),
                                      enumerate(features)):

                # Coordinates, label
                idx = map(lambda t: t[2], points).index(feat)
                x, y, label = points[idx]

                # Overwrite default name of parameter-scan classifier
                label = 'ANN' if label.startswith('ANN') else label
                label = 'uBoost' if label.startswith('uBoost') else label

                # Style
                colour = rp.colours[(ifeat // 2) % len(rp.colours)]
                markerstyle = 20 + (ifeat % 2) * 4

                # Draw
                c.graph([y],
                        bins=[x],
                        markercolor=colour,
                        markerstyle=markerstyle,
                        label='#scale[%.1f]{%s}' %
                        (scale, latex(label, ROOT=True)),
                        option='P')
                pass

            # Draw class-specific legend
            width = 0.15
            c.legend(header=("Analytical:" if is_simple else "MVA:"),
                     width=width,
                     xmin=0.60 + (width + 0.02) * (is_simple),
                     ymax=0.888)  #, ymax=0.827)
            pass

        # Make legends transparent
        for leg in c.pads()[0]._legends:
            leg.SetFillStyle(0)
            pass

        # Markers, parametrised decorrelation
        for base_feat, group in scan_features.iteritems():

            # Get index in list of features
            ifeat = features.index(base_feat)

            # Style
            colour = rp.colours[(ifeat // 2) % len(rp.colours)]
            markerstyle = 24

            for feat, label in group:
                idx = map(lambda t: t[2], points).index(feat)
                x, y, _ = points[idx]

                # Draw
                c.graph([y],
                        bins=[x],
                        markercolor=colour,
                        markerstyle=markerstyle,
                        option='P')
                if base_feat == 'NN':
                    c.latex("   " + label,
                            x,
                            y,
                            textsize=11,
                            align=12,
                            textcolor=ROOT.kGray + 2)
                else:
                    c.latex(label + "   ",
                            x,
                            y,
                            textsize=11,
                            align=32,
                            textcolor=ROOT.kGray + 2)
                    pass
                pass

            # Connecting lines (scan)
            feats = [base_feat] + map(lambda t: t[0], group)
            for feat1, feat2 in zip(feats[:-1], feats[1:]):
                idx1 = map(lambda t: t[2], points).index(feat1)
                idx2 = map(lambda t: t[2], points).index(feat2)

                x1, y1, _ = points[idx1]
                x2, y2, _ = points[idx2]

                c.graph([y1, y2],
                        bins=[x1, x2],
                        linecolor=colour,
                        linestyle=2,
                        option='L')
                pass
            pass

        # Connecting lines (simple)

        print "points: "
        print points
        points.pop(1)
        print points

        for i in range(2):
            x1, y1, _ = points[2 * i + 0]
            x2, y2, _ = points[2 * i + 1]
            colour = rp.colours[i]
            c.graph([y1, y2],
                    bins=[x1, x2],
                    linecolor=colour,
                    linestyle=2,
                    option='L')
            pass

        # Decorations
        c.xlabel(
            "Background rejection, 1 / #varepsilon_{bkg}^{rel} @ #varepsilon_{sig}^{rel} = 50%"
        )
        c.ylabel("Mass-decorrelation, 1 / JSD @ #varepsilon_{sig}^{rel} = 50%")
        c.xlim(*axisrangex)
        c.ylim(*axisrangey)
        c.logx()
        c.logy()

        opts_text = dict(textsize=11, textcolor=ROOT.kGray + 2)
        midpointx = np.power(10, 0.5 * np.log10(amaxx))
        midpointy = np.power(10, 0.5 * np.log10(amaxy))
        c.latex("No separation",
                1.91,
                midpointy,
                angle=90,
                align=21,
                **opts_text)
        c.latex("Maximal sculpting",
                midpointx,
                0.89,
                angle=0,
                align=23,
                **opts_text)
        c.latex("    Less sculpting #rightarrow",
                2.1,
                midpointy,
                angle=90,
                align=23,
                **opts_text)
        c.latex("     Greater separation #rightarrow",
                midpointx,
                1.1,
                angle=0,
                align=21,
                **opts_text)

        #c.text(TEXT + ["#it{W} jet tagging"], xmin=0.24, qualifier=QUALIFIER)
        c.text([], xmin=0.15, ymax=0.96, qualifier=QUALIFIER)
        c.text(TEXT + \
               ["#it{W} jet tagging"] + (
                    ["p_{{T}} #in  [{:.0f}, {:.0f}] GeV".format(pt_range[0], pt_range[1])] if pt_range is not None else []
                ) + (
                    ['Cut: m #in  [60, 100] GeV'] if masscut else []
                ),
               xmin=0.26, ATLAS=None)
        pass

    return c
Пример #25
0
def plot(*argv):
    """
    Method for delegating plotting.
    """

    # Unpack arguments
    args, data, features, ROCs, AUCs, masscut, pt_range, appearances = argv

    # Canvas
    c = rp.canvas(batch=not args.show)

    # Plots
    # -- Random guessing
    bins = np.linspace(0.2, 1., 100 + 1,
                       endpoint=True)  # original representation
    #bins = np.linspace(0., 1., 100 + 1, endpoint=True) #comparison with JME-18-002
    bins = np.array([bins[0], bins[0] + 0.01 * np.diff(bins[:2])[0]] +
                    list(bins[1:]))
    #bins = np.array([0.2] + list(bins[1:]))
    #edges = bins[1:-1]
    edges = bins
    centres = edges[:-1] + 0.5 * np.diff(edges)
    c.hist(np.power(centres, -1.),
           bins=edges,
           linecolor=ROOT.kGray + 2,
           fillcolor=ROOT.kBlack,
           alpha=0.05,
           linewidth=1,
           option='HISTC')
    linestyles = [1, 3, 5, 7]

    # -- ROCs
    if len(appearances) != 2:
        for is_simple in [True, False]:

            # Split the legend into simple- and MVA taggers
            indices = np.array([0] + appearances).cumsum()
            for i in range(len(indices) - 1):
                for ifeat, feat in filter(
                        lambda t: is_simple == signal_low(t[1]),
                        enumerate(features[indices[i]:indices[i + 1]])):
                    eff_sig, eff_bkg = ROCs[feat]
                    c.graph(np.power(eff_bkg, -1.),
                            bins=eff_sig,
                            linestyle=linestyles[ifeat],
                            linecolor=rp.colours[i % len(rp.colours)],
                            linewidth=2,
                            label=latex(feat, ROOT=True),
                            option='L')  # original representation
                    #c.graph(eff_bkg, bins=eff_sig, linestyle=1 + ifeat, linecolor=rp.colours[i % len(rp.colours)], linewidth=2, label=latex(feat, ROOT=True), option='L')  #comparison with JME-18-002
                    pass

            # Draw class-specific legend
            width = 0.17  #moved from 0.17 to 0.25 and back to 0.17
            c.legend(
                header=("Analytical:" if is_simple else "MVA:"),
                width=width,
                xmin=0.45 + (width + 0.06) * (is_simple),
                ymax=0.888
            )  # xmin moved from 0.58 to 0.45, inserted width translation of 0.06

    else:
        for first_var in [True, False]:

            indices = np.array([0] + appearances).cumsum()
            for i in [0, 1]:
                if i == 0 and not first_var: continue
                if i == 1 and first_var: continue
                for ifeat, feat in enumerate(features[indices[i]:indices[i +
                                                                         1]]):
                    eff_sig, eff_bkg = ROCs[feat]
                    c.graph(np.power(eff_bkg, -1.),
                            bins=eff_sig,
                            linestyle=linestyles[ifeat],
                            linecolor=rp.colours[i % len(rp.colours)],
                            linewidth=2,
                            label=latex(feat, ROOT=True),
                            option='L')  # original representation
                    #c.graph(eff_bkg, bins=eff_sig, linestyle=1 + ifeat, linecolor=rp.colours[i % len(rp.colours)], linewidth=2, label=latex(feat, ROOT=True), option='L')  #comparison with JME-18-002
                    pass

    # Draw class-specific legend
            width = 0.15  #moved from 0.17 to 0.25 and back to 0.15
            c.legend(
                header=(latex(features[0], ROOT=True) + "-based:" if first_var
                        else latex(features[appearances[1]], ROOT=True) +
                        "-based:"),
                width=width,
                xmin=0.55 + (width + 0.06) * (first_var),
                ymax=0.888
            )  # xmin moved from 0.58 to 0.45, inserted width translation of 0.06

    # Decorations
    c.xlabel("Signal efficiency #varepsilon_{sig}^{rel}")
    c.ylabel("Background rejection 1/#varepsilon_{bkg}^{rel}")
    c.text([], xmin=0.15, ymax=0.96, qualifier=QUALIFIER, ATLAS=False)
    c.text(
        ["#sqrt{s} = 13 TeV", "#it{W} jet tagging"] + ([
            "p_{{T}} #in  [{:.0f}, {:.0f}] GeV".format(pt_range[0],
                                                       pt_range[1])
        ] if pt_range is not None else []) + (
            #["Cut: m #in  [60, 100] GeV"] if masscut else []
            [] if masscut == False else [
                "Cut: m #in  [{:.0f}, {:.0f}] GeV".format(
                    masscut[0], masscut[1])
            ]),
        ATLAS=False)

    if masscut != False: masscut = True
    ranges = int(pt_range is not None) + int(masscut)
    mult = 10. if ranges == 2 else (2. if ranges == 1 else 1.)

    c.latex("Random guessing",
            0.4,
            1. / 0.4 * 0.9,
            align=23,
            angle=-12 + 2 * ranges,
            textsize=13,
            textcolor=ROOT.kGray + 2)
    c.xlim(0.2, 1.)
    #c.ylim(1E+00, 5E+02 * mult) # original representation
    c.ylim(1E+00, 2E+02 * mult)
    #c.xlim(0., 1.)  #comparison with JME-18-002
    #c.ylim(1E-04, 1.)
    c.logy()
    c.legend()

    return c