Пример #1
0
def region_comparison_plot(tag):
    for year in [2017,2018]:
        regions = ['1m', '2m', '1e','1m_hlt','2m_hlt']
        opts = markers('data')
        opts['markersize'] = 5.
        opts['fillstyle'] = 'none'
        emarker = opts.pop('emarker', '')

        fig, ax, rax = fig_ratio()

        x, y, yerr = {}, {}, {}
        for region in regions:
            if region.endswith('e'):
                file = f'output/{tag}/table_{region}_EGamma_{year}.txt'
            else:
                file = f'output/{tag}/table_{region}_SingleMuon_{year}.txt'
            x[region], y[region], yerr[region] = get_xy(file)
            opts['color'] = colors[region]
            ax.errorbar(x[region], y[region], yerr=yerr[region],label=f'{region} region', **opts)

        # opts.pop('elinewidth')
            if region=='1m':
                continue

            rax.errorbar(x['1m'], y[region]/y['1m'], yerr[region]/y['1m'], **opts)

        # for f in files: plot(f)
        outdir = f"./output/{tag}"
        # ax.set_ylim(0.9,1)
        ax.legend()
        ax.set_ylabel("Efficiency")
        ax.xaxis.set_major_locator(MultipleLocator(200))
        ax.xaxis.set_minor_locator(MultipleLocator(50))
        ax.yaxis.set_major_locator(MultipleLocator(0.05))
        ax.yaxis.set_minor_locator(MultipleLocator(0.01))
        ax.set_ylim(0.9,1.02)
        ax.grid(1)
        rax.set_ylim(0.9,1.1)
        rax.grid(1)
        rax.set_xlabel("Recoil or $p_{T}^{miss}$ (GeV)")
        rax.set_ylabel(r"Ratio to single-$\mu$")
        plt.text(1., 1., r"$\approx$ %.1f fb$^{-1}$ (13 TeV)" % lumi(year),
                fontsize=16,
                horizontalalignment='right',
                verticalalignment='bottom',
                transform=ax.transAxes
               )
        plt.text(0., 1., f'{year}',
                fontsize=16,
                horizontalalignment='left',
                verticalalignment='bottom',
                transform=ax.transAxes
               )
        fig.savefig(pjoin(outdir, f'region_comparison_data_{year}.pdf'))
        fig.clear()
        plt.close(fig)
Пример #2
0
def pdf_plot(acc):
    outdir = './output/pdfstudy/'
    if not os.path.exists(outdir):
        os.makedirs(outdir)

    datasets = [
        'WJetsToLNu_HT_MLM_2017',
        'DYJetsToLL_M-50_HT_MLM_2017',
    ]
    for ds in datasets:
        fig, ax, rax = fig_ratio()
        h = acc['gen_vpt']
        h = h.rebin(h.axis('vpt'),
                    hist.Bin("vpt", r"$p_{T}^{V}$ (GeV)", 10, 0, 2000))
        h = merge_extensions(h, acc, reweight_pu=False)
        scale_xs_lumi(h)
        h = merge_datasets(h)

        h = h.project(h.axis('dataset'), ds)

        for pdf in h.axis('pdf').identifiers():

            if str(pdf) == 'none':
                continue
            data_err_opts['color'] = colors[str(pdf)]
            hist.plot1d(
                h.project('pdf', pdf),
                # overlay='pdf',
                error_opts=data_err_opts,
                ax=ax,
                overflow='all',
                clear=False)

            hist.plotratio(
                h.project('pdf', pdf),
                h.project('pdf', 'none'),
                ax=rax,
                denom_fill_opts={},
                guide_opts={},
                unc='num',
                overflow='all',
                error_opts=data_err_opts,
                clear=False,
            )
        ax.set_ylim(1e-3, 1e8)
        rax.set_ylim(0.9, 1.6)
        ax.set_yscale('log')
        leg = ax.legend()
        for i, pdf in enumerate(h.axis('pdf').identifiers()):
            if str(pdf) == 'none':
                continue
            leg.get_texts()[i].set_text(str(pdf))
        fig.savefig(pjoin(outdir, f'{ds}.pdf'))
        plt.close(fig)
Пример #3
0
def main():
    """
    A script to easily compare template files between different runs.

    Usage: ./compare_templates.py /path/to/first/template_file.root /path/to/second/template_file.root

    The script will loop over all templates in the files and create a comparison plot for each of them.
    All plots are dumped into a folder for inspection.
    """

    args = parse_commandline()

    # Based on input locations, derive tag names to identify the files
    tag1 = os.path.basename(os.path.dirname(args.fname1))
    tag2 = os.path.basename(os.path.dirname(args.fname2))

    # filter
    regex = re.compile(args.filter)

    # Convert to dictionary
    h1 = make_dict(args.fname1)
    h2 = make_dict(args.fname2)

    # Make sure the two files have consistent keys
    # assert(h1.keys()==h2.keys())

    # Create plot folder
    if not os.path.exists(args.outdir):
        os.makedirs(args.outdir)

    # Do the actual plotting
    table = []
    for key in tqdm.tqdm(h1.keys()):
        if key not in h2:
            print("Found missing key ", key)
        if not regex.match(str(key)):
            continue
        fig, ax, rax = fig_ratio()
        x = 0.5 * np.sum(h1[key].bins, axis=1)
        edges = np.unique(h1[key].bins)

        try:
            v1 = np.sum(h1[key].allvalues)
            v2 = np.sum(h2[key].allvalues)
            if v2 == v1:
                ratio = 1
            elif v1 == 0:
                ratio = "-"
            else:
                ratio = v2 / v1
            table.append([key, v1, v2, ratio])
        except KeyError:
            plt.close(fig)
            continue
        hep.histplot(
            sum_overflow_into_last_bin(h1[key].allvalues),
            edges,
            ax=ax,
            label=f"{tag1}, Integral={np.sum(h1[key].values):.1f}",
            color='navy',
        )

        hep.histplot(sum_overflow_into_last_bin(h2[key].allvalues),
                     edges,
                     yerr=np.sqrt(h2[key].variances),
                     ax=ax,
                     label=f"{tag2}, Integral={np.sum(h2[key].values):.1f}",
                     color='crimson',
                     marker='o',
                     markersize=5,
                     histtype='errorbar')

        ax.legend()

        # Bottom panel: ratio plot
        valid = h1[key].values != 0

        rax.errorbar(
            x[valid],
            h2[key].values[valid] / h1[key].values[valid],
            np.sqrt(h2[key].variances[valid]) / h1[key].values[valid],
            linestyle='none',
            marker='o',
            color="crimson",
        )

        # Add indicators for bins where we could not calculate the ratio
        if np.any(~valid):
            rax.plot(0.5 * np.sum(h1[key].bins, axis=1)[~valid],
                     np.ones(np.sum(~valid)),
                     'x',
                     color="k",
                     fillstyle="none")

        # Aesthetics
        ax.set_title(key)
        rax.set_ylim(*map(float, args.rlim.split(",")))
        rax.set_xlabel("Recoil (GeV)")
        rax.set_ylabel("Ratio")
        ax.set_ylabel("Events / bin")
        ax.set_yscale("log")

        try:
            ax.set_ylim(
                0.5 * min(h2[key].values[h2[key].values > 0]),
                1.5 * max(h2[key].values),
            )
        except ValueError:
            plt.close(fig)
            continue
        rax.grid(linestyle='--')
        fig.savefig(pjoin(args.outdir, f"{key}.png"))
        plt.close(fig)
    print(tabulate(table))
Пример #4
0
def sf_comparison_plot(tag):
    for year in [2017,2018]:
        regions = ['1m', '2m', '1m_hlt', '2m_hlt']
        opts = markers('data')
        opts['markersize'] = 5
        opts['fillstyle'] = 'none'
        emarker = opts.pop('emarker', '')

        fig, ax, rax = fig_ratio()

        x, y, yerr = {}, {}, {}
        for region in regions:
            if '1e' in region:
                fnum = f'output/{tag}/table_{region}_EGamma_{year}.txt'
                fden = f'output/{tag}/table_{region}_WJetsToLNu-MLM_{year}.txt'
            elif '1m' in region:
                fnum = f'output/{tag}/table_{region}_SingleMuon_{year}.txt'
                fden = f'output/{tag}/table_{region}_WJetsToLNu-MLM_{year}.txt'
            elif '2m' in region:
                fnum = f'output/{tag}/table_{region}_SingleMuon_{year}.txt'
                fden = f'output/{tag}/table_{region}_DYNJetsToLL_M-50-MLM_{year}.txt'


            xnum, ynum, yerrnum = get_xy(fnum)
            xden, yden, yerrden = get_xy(fden)
            x[region] = xnum
            y[region] = ynum / yden
            yerr[region] = ratio_unc(ynum, yden, yerrnum, yerrden)
            opts['color'] = colors[region]
            opts['marker'] = region_marker[region]
            ax.errorbar(x[region], y[region], yerr=yerr[region],label=f'{region} region', **opts)

        # opts.pop('elinewidth')
            if region=='1m':
                continue

            rax.errorbar(x['1m'], y[region]/y['1m'], ratio_unc(y[region],y['1m'],yerr[region],yerr['1m']), **opts)

        # for f in files: plot(f)
        outdir = f"./output/{tag}"
        # ax.set_ylim(0.9,1)
        ax.legend()
        ax.set_ylabel("Data / MC SF")
        ax.xaxis.set_major_locator(MultipleLocator(200))
        ax.xaxis.set_minor_locator(MultipleLocator(50))
        ax.yaxis.set_major_locator(MultipleLocator(0.05))
        ax.yaxis.set_minor_locator(MultipleLocator(0.01))
        ax.set_ylim(0.9,1.1)
        ax.grid(1)
        rax.set_ylim(0.95,1.05)
        rax.yaxis.set_major_locator(MultipleLocator(0.05))
        rax.yaxis.set_minor_locator(MultipleLocator(0.01))
        rax.grid(1)
        rax.set_xlabel("Recoil or $p_{T}^{miss}$ (GeV)")
        rax.set_ylabel(r"Ratio to single-$\mu$")
        plt.text(1., 1., r"$\approx$ %.1f fb$^{-1}$ (13 TeV)" % lumi(year),
                fontsize=16,
                horizontalalignment='right',
                verticalalignment='bottom',
                transform=ax.transAxes
               )
        plt.text(0., 1., f'{year}',
                fontsize=16,
                horizontalalignment='left',
                verticalalignment='bottom',
                transform=ax.transAxes
               )
        fig.savefig(pjoin(outdir, f'sf_comparison_{year}.pdf'))
        fig.clear()
        plt.close(fig)
Пример #5
0
def fit(tag, year):
    outdir = './output/gamma/fit/'
    try:
        os.makedirs(outdir)
    except FileExistsError:
        pass

    x, eff, eff_up, eff_down = {}, {}, {}, {}
    x['data'], eff['data'], eff_up['data'], eff_down['data'] = load(tag, 'JetHT', year)
    x['mc'], eff['mc'], eff_up['mc'], eff_down['mc'] = load(tag, 'GJets_HT_MLM', year)

    pars = {}
    cross = {}
    for key in ['data','mc']:
        pars[key], _ = curve_fit(sigmoid, x[key], eff[key], sigma=0.5*(eff_up[key]-eff_down[key]), p0=[1e-3,200,0.1,1])    
        cross[key] = minimize( lambda x: np.abs(sigmoid(x, *pars[key]) - 0.95),x0=230)

    fig, ax, rax = fig_ratio()

    xinterp = np.linspace(min(x['data']), max(x['data']), 1000)

    handles = []
    ax.errorbar(x['data'], eff['data'], 0.5*(eff_up['data']-eff_down['data']),fmt='o',label='Data',color=colors["data"])
    ax.errorbar(x['mc'], eff['mc'], 0.5*(eff_up['mc']-eff_down['mc']),fmt='s',label='MC',fillstyle='none',color=colors["mc"])
    ax.plot(xinterp, sigmoid(xinterp, *pars['data']), label='Data fit',color=colors["data"],zorder=-1)
    ax.plot(xinterp, sigmoid(xinterp, *pars['mc']), label='MC fit',color=colors["mc"],zorder=-1,linestyle='--')

    ax.set_ylim(0.,1.1)
    ax.set_xlim(100,1100)
    ax.legend()


    ax.text(350,.4,'f(x) = c + (d-c) / (1 + exp(-a * (x-b)))')
    ax.text(
            300,
            0.1,
            '\n'.join([
                f"a = {pars['data'][0]:.3f} / GeV",
                f"b = {pars['data'][1]:.2f} GeV",
                f"c = {pars['data'][2]:.3f}",
                f"d = {pars['data'][3]:.3f}"
            ]),
            color=colors['data']
            )
    ax.text(
            600,
            0.1,
            '\n'.join([
                f"a = {pars['mc'][0]:.3f} / GeV",
                f"b = {pars['mc'][1]:.2f} GeV",
                f"c = {pars['mc'][2]:.3f}",
                f"d = {pars['mc'][3]:.3f}"
            ]),
            color=colors['mc']
            )

    ax.text(700,0.8, 
                    "\n".join([
                              f"Data > 95% @ {cross['data'].x[0]:.0f} GeV",
                              f"MC > 95% @ {cross['mc'].x[0]:.0f} GeV",
                    ])
                              )
    ax.text(1., 1., r"%.1f fb$^{-1}$ (13 TeV)" % lumi(year),
                fontsize=16,
                horizontalalignment='right',
                verticalalignment='bottom',
                transform=ax.transAxes
                )
    rax.set_ylim(0.95,1.1)
    rax.grid()
    # rax.plot([cross['data'].x[0],cross['data'].x[0]], [0.8,1.05],color='k',)
    # ax.plot([cross['data'].x[0],cross['data'].x[0]], [0.,1.05],color='k',linestyle='--')
    # ax.plot([cross['mc'].x[0],cross['mc'].x[0]], [0.9,1.05],color='r')


    # ax.plot([215,215],[0.9,1.05])

    rax.errorbar(x["data"], eff["data"] / eff["mc"], 0.5*(eff_up["data"] - eff_down["data"]) / eff["mc"],fmt='o',label='Data / MC',color=colors['data'])
    rxinterp = np.linspace(cross['data'].x[0], max(xinterp),1000)

    
    rax.plot(rxinterp, sigmoid(rxinterp, *pars['data']) / sigmoid(rxinterp, *pars['mc']),label=f"Data / MC fit ratio, plateau at {100*(sigmoid(rxinterp, *pars['data']) / sigmoid(rxinterp, *pars['mc']))[-1]:.1f} %",color='k')
    rax.plot(rxinterp, 0.99*sigmoid(rxinterp, *pars['data']) / sigmoid(rxinterp, *pars['mc']), label='1% uncertainty on fit',linestyle='--',color='gray')
    rax.plot(rxinterp, 1.01*sigmoid(rxinterp, *pars['data']) / sigmoid(rxinterp, *pars['mc']),linestyle='--',color='gray')
    # rxinterp2 = np.linspace(min(xinterp),cross['data'].x[0], 1000)
    # rax.plot(rxinterp2, sigmoid(rxinterp2, *pars['data']) / sigmoid(rxinterp2, *pars['mc']),color='k',linestyle=':')

    rax.legend()
    rax.set_ylabel("Ratio")
    ax.set_ylabel("Trigger efficiency")
    ax.set_xlabel("Photon $p_{T}$ (GeV)")
    rax.set_xlabel("Photon $p_{T}$ (GeV)")
    ax.figure.savefig(pjoin(outdir, f'fit_{tag}_{year}.pdf'))
    ax.figure.clf()
Пример #6
0
def data_mc_comparison_plot(tag,
                            ymin=0,
                            ymax=1.1,
                            distribution='recoil',
                            jeteta_config=None,
                            output_format='pdf'):
    if 'gamma' in tag:
        regions = [
            'g_HLT_PFHT1050', 'g_HLT_PFHT590', 'g_HLT_PFHT680',
            'g_HLT_PFHT780', 'g_HLT_PFHT890'
        ]
    elif 'recoil' in tag:
        regions = ['1m', '2m']
    else:
        regions = ['1m', '2m', '1e', '2m_hlt']
    opts = markers('data')
    # opts['markersize'] = 5
    # opts['fillstyle'] = 'none'
    emarker = opts.pop('emarker', '')
    outdir = f"./output/{tag}"
    outpath = pjoin(outdir, f'trig_sf.root')
    try:
        outfile = uproot.recreate(outpath)
    except OSError:
        outfile = uproot.update(outpath)

    for year in [2017, 2018]:
        for region in regions:
            fig, ax, rax = fig_ratio()
            if '1e' in region:
                fnum = f'output/{tag}/table_{region}_met_EGamma_{year}.txt'
                fden = f'output/{tag}/table_{region}_met_WJetsToLNu_HT_MLM_{year}.txt'
                xlabel = "$p_{T}^{miss}$ (GeV)"
            elif '1m' in region:
                fnum = f'output/{tag}/table_{region}_recoil_SingleMuon_{year}{"_"+jeteta_config if jeteta_config else ""}.txt'
                fden = f'output/{tag}/table_{region}_recoil_WJetsToLNu_HT_MLM_{year}{"_"+jeteta_config if jeteta_config else ""}.txt'
                xlabel = "Recoil (GeV)"
            elif '2m' in region:
                fnum = f'output/{tag}/table_{region}_recoil_SingleMuon_{year}{"_"+jeteta_config if jeteta_config else ""}.txt'
                fden = f'output/{tag}/table_{region}_recoil_VDYJetsToLL_M-50_HT_MLM_{year}{"_"+jeteta_config if jeteta_config else ""}.txt'
                xlabel = "Recoil (GeV)"
            elif 'g_' in region:
                fnum = f'output/{tag}/table_{region}_photon_pt0_JetHT_{year}.txt'
                fden = f'output/{tag}/table_{region}_photon_pt0_GJets_HT_MLM_{year}.txt'
                xlabel = "Photon $p_{T}$ (GeV)"

            if not os.path.exists(fnum):
                print(f"File not found {fnum}")
                continue
            if not os.path.exists(fden):
                print(f"File not found {fden}")
                continue

            xnum, xedgnum, ynum, yerrnum = get_xy(fnum)
            xden, xedgden, yden, yerrden = get_xy(fden)

            xsf = xnum
            ysf = ynum / yden
            ysferr = ratio_unc(ynum, yden, yerrnum, yerrden)

            opts['color'] = 'k'
            ax.errorbar(xnum,
                        ynum,
                        yerr=yerrnum,
                        label=f'Data, {region} region',
                        **opts)
            opts['color'] = 'r'
            ax.errorbar(xden,
                        yden,
                        yerr=yerrden,
                        label=f'MC, {region} region',
                        **opts)
            #            rax.plot([0,1000],[0.98,0.98],color='blue')
            #            rax.plot([0,1000],[0.99,0.99],color='blue',linestyle='--')

            if 'g_' in region:
                ax.plot([215, 215], [0.9, 1.1], color='blue')
                rax.plot([215, 215], [0.95, 1.05], color='blue')
            elif distribution == 'recoil':
                ax.plot([250, 250], [0.0, 1.1], color='blue')
                rax.plot([250, 250], [0.95, 1.05], color='blue')
            else:
                ax.plot([250, 250], [0.9, 1.1], color='blue')
                rax.plot([250, 250], [0.95, 1.05], color='blue')
            opts['color'] = 'k'
            rax.errorbar(xsf, ysf, ysferr, **opts)

            ax.legend()
            ax.set_ylabel("Efficiency")
            ax.xaxis.set_major_locator(MultipleLocator(200))
            ax.xaxis.set_minor_locator(MultipleLocator(50))
            ax.set_ylim(ymin, ymax)
            ax.grid(1)

            if distribution == 'mjj':
                ax.yaxis.set_major_locator(MultipleLocator(0.05))
                ax.yaxis.set_minor_locator(MultipleLocator(0.01))
            elif distribution == 'recoil':
                ax.yaxis.set_major_locator(MultipleLocator(0.1))
                ax.yaxis.set_minor_locator(MultipleLocator(0.05))

            rax.set_xlabel(xlabel)
            rax.set_ylabel("Data / MC SF")
            rax.set_ylim(0.95, 1.05)
            rax.yaxis.set_major_locator(MultipleLocator(0.05))
            rax.yaxis.set_minor_locator(MultipleLocator(0.01))
            rax.grid(1)

            plt.text(1.,
                     1.,
                     r"$\approx$ %.1f fb$^{-1}$ (13 TeV)" %
                     lumi_by_region(region, year),
                     fontsize=16,
                     horizontalalignment='right',
                     verticalalignment='bottom',
                     transform=ax.transAxes)
            plt.text(1.,
                     0.95,
                     f'{jeteta_config if jeteta_config else ""}',
                     fontsize=12,
                     horizontalalignment='right',
                     verticalalignment='bottom',
                     transform=ax.transAxes)
            plt.text(0.,
                     1.,
                     f'{year}',
                     fontsize=16,
                     horizontalalignment='left',
                     verticalalignment='bottom',
                     transform=ax.transAxes)
            fig.savefig(
                pjoin(
                    outdir,
                    f'data_mc_comparison_{region}_{year}{"_"+jeteta_config if jeteta_config else ""}.{output_format}'
                ))
            fig.clear()
            plt.close(fig)

            vals = np.array(sorted(list(set(list(xedgnum.flatten())))))
            ysf[np.isnan(ysf) | np.isinf(np.abs(ysf))] = 1
            outfile[f'{tag}_{region}_{year}'] = (ysf, vals)
Пример #7
0
def extract_yields_in_cr(acc, distribution, region='cr_vbf_qcd_rs', year=2017):
    '''Calculate the data - (nonQCD MC) in the QCD CR.'''
    acc.load(distribution)
    h = acc[distribution]

    h = merge_extensions(h, acc)
    scale_xs_lumi(h)
    h = merge_datasets(h)

    if distribution in BINNINGS.keys():
        new_ax = BINNINGS[distribution]
        h = h.rebin(new_ax.name, new_ax)

    h = h.integrate('region', region)
    data = f'MET_{year}'
    mc = re.compile(
        f'(ZJetsToNuNu.*|EW.*|Top_FXFX.*|Diboson.*|DYJetsToLL_M-50_HT_MLM.*|WJetsToLNu.*HT.*).*{year}'
    )

    fig, ax, rax = fig_ratio()
    data_err_opts = {
        'linestyle': 'none',
        'marker': '.',
        'markersize': 10.,
        'color': 'k',
        'elinewidth': 1,
    }

    hist.plot1d(h[data],
                ax=ax,
                overlay='dataset',
                binwnorm=1,
                error_opts=data_err_opts)

    hist.plot1d(h[mc],
                ax=ax,
                overlay='dataset',
                binwnorm=1,
                stack=True,
                clear=False)

    ax.set_yscale('log')
    ax.set_ylim(1e-4, 1e6)
    ax.set_ylabel('Events / GeV')

    ax.yaxis.set_ticks_position('both')

    handles, labels = ax.get_legend_handles_labels()
    for handle, label in zip(handles, labels):
        for regex, new_label in PRETTY_LEGEND_LABELS.items():
            if re.match(regex, label):
                handle.set_label(new_label)

    ax.legend(title='VBF QCD CR', handles=handles, ncol=2)

    # Calculate data - MC
    h_data = h[data].integrate('dataset')
    h_mc = h[mc].integrate('dataset')
    h_mc.scale(-1)
    h_data.add(h_mc)

    # Plot data - MC on the bottom pad
    hist.plot1d(h_data, ax=rax, binwnorm=1)

    rax.set_ylabel('(Data - MC) / GeV')
    rax.set_ylim(1e-3, 1e1)
    rax.set_yscale('log')

    rax.get_legend().remove()

    rax.yaxis.set_ticks_position('both')

    outdir = './output/qcd_cr'
    try:
        os.makedirs(outdir)
    except FileExistsError:
        pass
    outpath = pjoin(outdir, f'qcd_cr_{distribution}.pdf')
    fig.savefig(outpath)
    plt.close(fig)

    print(f'File saved: {outpath}')

    # Return the QCD yield
    return h_data