示例#1
0
def dijet(category):

    channel = 'bb'
    stype = channel
    isSB = True  # relict from using Alberto's more complex script
    isData = not ISMC
    nTupleDir = NTUPLEDIR

    samples = data if isData else back
    pd = []
    for sample_name in samples:
        if YEAR == 'run2':
            pd += sample[sample_name]['files']
        else:
            pd += [x for x in sample[sample_name]['files'] if YEAR in x]
    print "datasets:", pd
    if not os.path.exists(PLOTDIR): os.makedirs(PLOTDIR)
    if BIAS: print "Running in BIAS mode"

    order = 0
    RSS = {}

    X_mass = RooRealVar("jj_mass_widejet", "m_{jj}", X_min, X_max, "GeV")
    weight = RooRealVar("MANtag_weight", "", -1.e9, 1.e9)

    variables = RooArgSet(X_mass)
    variables.add(RooArgSet(weight))

    if VARBINS:
        binsXmass = RooBinning(len(abins) - 1, abins)
        X_mass.setBinning(RooBinning(len(abins_narrow) - 1, abins_narrow))
        plot_binning = RooBinning(
            int((X_mass.getMax() - X_mass.getMin()) / 100), X_mass.getMin(),
            X_mass.getMax())
    else:
        X_mass.setBins(int((X_mass.getMax() - X_mass.getMin()) / 10))
        binsXmass = RooBinning(int((X_mass.getMax() - X_mass.getMin()) / 100),
                               X_mass.getMin(), X_mass.getMax())
        plot_binning = binsXmass

    baseCut = ""

    print stype, "|", baseCut

    print " - Reading from Tree"
    treeBkg = TChain("tree")
    for ss in pd:
        if os.path.exists(nTupleDir + ss + "_" + BTAGGING + ".root"):
            treeBkg.Add(nTupleDir + ss + "_" + BTAGGING + ".root")
        else:
            print "found no file for sample:", ss
    setData = RooDataSet("setData", "Data (QCD+TTbar MC)", variables,
                         RooFit.Cut(baseCut), RooFit.WeightVar(weight),
                         RooFit.Import(treeBkg))

    nevents = setData.sumEntries()
    dataMin, dataMax = array('d', [0.]), array('d', [0.])
    setData.getRange(X_mass, dataMin, dataMax)
    xmin, xmax = dataMin[0], dataMax[0]

    lastBin = X_mass.getMax()
    if VARBINS:
        for b in narrow_bins:
            if b > xmax:
                lastBin = b
                break

    print "Imported", (
        "data" if isData else "MC"
    ), "RooDataSet with", nevents, "events between [%.1f, %.1f]" % (xmin, xmax)
    #xmax = xmax+binsXmass.averageBinWidth() # start form next bin

    # 1 parameter
    print "fitting 1 parameter model"
    p1_1 = RooRealVar("CMS" + YEAR + "_" + category + "_p1_1", "p1", 7.0, 0.,
                      2000.)
    modelBkg1 = RooGenericPdf("Bkg1", "Bkg. fit (2 par.)",
                              "1./pow(@0/13000, @1)", RooArgList(X_mass, p1_1))
    normzBkg1 = RooRealVar(
        modelBkg1.GetName() + "_norm", "Number of background events", nevents,
        0., 5. * nevents)  #range dependent of actual number of events!
    modelExt1 = RooExtendPdf(modelBkg1.GetName() + "_ext",
                             modelBkg1.GetTitle(), modelBkg1, normzBkg1)
    fitRes1 = modelExt1.fitTo(setData, RooFit.Extended(True), RooFit.Save(1),
                              RooFit.SumW2Error(not isData),
                              RooFit.Strategy(2), RooFit.Minimizer("Minuit2"),
                              RooFit.PrintLevel(1 if VERBOSE else -1))
    fitRes1.Print()
    RSS[1] = drawFit("Bkg1", category, X_mass, modelBkg1, setData, binsXmass,
                     [fitRes1], normzBkg1.getVal())

    # 2 parameters
    print "fitting 2 parameter model"
    p2_1 = RooRealVar("CMS" + YEAR + "_" + category + "_p2_1", "p1", 0., -100.,
                      1000.)
    p2_2 = RooRealVar("CMS" + YEAR + "_" + category + "_p2_2", "p2",
                      p1_1.getVal(), -100., 600.)
    modelBkg2 = RooGenericPdf("Bkg2", "Bkg. fit (3 par.)",
                              "pow(1-@0/13000, @1) / pow(@0/13000, @2)",
                              RooArgList(X_mass, p2_1, p2_2))
    normzBkg2 = RooRealVar(modelBkg2.GetName() + "_norm",
                           "Number of background events", nevents, 0.,
                           5. * nevents)
    modelExt2 = RooExtendPdf(modelBkg2.GetName() + "_ext",
                             modelBkg2.GetTitle(), modelBkg2, normzBkg2)
    fitRes2 = modelExt2.fitTo(setData, RooFit.Extended(True), RooFit.Save(1),
                              RooFit.SumW2Error(not isData),
                              RooFit.Strategy(2), RooFit.Minimizer("Minuit2"),
                              RooFit.PrintLevel(1 if VERBOSE else -1))
    fitRes2.Print()
    RSS[2] = drawFit("Bkg2", category, X_mass, modelBkg2, setData, binsXmass,
                     [fitRes2], normzBkg2.getVal())

    # 3 parameters
    print "fitting 3 parameter model"
    p3_1 = RooRealVar("CMS" + YEAR + "_" + category + "_p3_1", "p1",
                      p2_1.getVal(), -2000., 2000.)
    p3_2 = RooRealVar("CMS" + YEAR + "_" + category + "_p3_2", "p2",
                      p2_2.getVal(), -400., 2000.)
    p3_3 = RooRealVar("CMS" + YEAR + "_" + category + "_p3_3", "p3", -2.5,
                      -500., 500.)
    modelBkg3 = RooGenericPdf(
        "Bkg3", "Bkg. fit (4 par.)",
        "pow(1-@0/13000, @1) / pow(@0/13000, @2+@3*log(@0/13000))",
        RooArgList(X_mass, p3_1, p3_2, p3_3))
    normzBkg3 = RooRealVar(modelBkg3.GetName() + "_norm",
                           "Number of background events", nevents, 0.,
                           5. * nevents)
    modelExt3 = RooExtendPdf(modelBkg3.GetName() + "_ext",
                             modelBkg3.GetTitle(), modelBkg3, normzBkg3)
    fitRes3 = modelExt3.fitTo(setData, RooFit.Extended(True), RooFit.Save(1),
                              RooFit.SumW2Error(not isData),
                              RooFit.Strategy(2), RooFit.Minimizer("Minuit2"),
                              RooFit.PrintLevel(1 if VERBOSE else -1))
    fitRes3.Print()
    RSS[3] = drawFit("Bkg3", category, X_mass, modelBkg3, setData, binsXmass,
                     [fitRes3], normzBkg3.getVal())

    # 4 parameters
    print "fitting 4 parameter model"
    p4_1 = RooRealVar("CMS" + YEAR + "_" + category + "_p4_1", "p1",
                      p3_1.getVal(), -2000., 2000.)
    p4_2 = RooRealVar("CMS" + YEAR + "_" + category + "_p4_2", "p2",
                      p3_2.getVal(), -2000., 2000.)
    p4_3 = RooRealVar("CMS" + YEAR + "_" + category + "_p4_3", "p3",
                      p3_3.getVal(), -50., 50.)
    p4_4 = RooRealVar("CMS" + YEAR + "_" + category + "_p4_4", "p4", 0.1, -50.,
                      50.)
    modelBkg4 = RooGenericPdf(
        "Bkg4", "Bkg. fit (5 par.)",
        "pow(1 - @0/13000, @1) / pow(@0/13000, @2+@3*log(@0/13000)+@4*pow(log(@0/13000), 2))",
        RooArgList(X_mass, p4_1, p4_2, p4_3, p4_4))
    normzBkg4 = RooRealVar(modelBkg4.GetName() + "_norm",
                           "Number of background events", nevents, 0.,
                           5. * nevents)
    modelExt4 = RooExtendPdf(modelBkg4.GetName() + "_ext",
                             modelBkg4.GetTitle(), modelBkg4, normzBkg4)
    fitRes4 = modelExt4.fitTo(setData, RooFit.Extended(True), RooFit.Save(1),
                              RooFit.SumW2Error(not isData),
                              RooFit.Strategy(2), RooFit.Minimizer("Minuit2"),
                              RooFit.PrintLevel(1 if VERBOSE else -1))
    fitRes4.Print()
    RSS[4] = drawFit("Bkg4", category, X_mass, modelBkg4, setData, binsXmass,
                     [fitRes4], normzBkg4.getVal())

    # Normalization parameters are should be set constant, but shape ones should not
    #    if BIAS:
    #        p1_1.setConstant(True)
    #        p2_1.setConstant(True)
    #        p2_2.setConstant(True)
    #        p3_1.setConstant(True)
    #        p3_2.setConstant(True)
    #        p3_3.setConstant(True)
    #        p4_1.setConstant(True)
    #        p4_2.setConstant(True)
    #        p4_3.setConstant(True)
    #        p4_4.setConstant(True)
    normzBkg1.setConstant(True)
    normzBkg2.setConstant(True)
    normzBkg3.setConstant(True)
    normzBkg4.setConstant(True)

    #*******************************************************#
    #                                                       #
    #                         Fisher                        #
    #                                                       #
    #*******************************************************#

    # Fisher test
    with open(PLOTDIR + "/Fisher_" + category + ".tex", 'w') as fout:
        fout.write(r"\begin{tabular}{c|c|c|c|c}")
        fout.write("\n")
        fout.write(r"function & $\chi^2$ & RSS & ndof & F-test \\")
        fout.write("\n")
        fout.write("\hline")
        fout.write("\n")
        CL_high = False
        for o1 in range(1, 5):
            o2 = min(o1 + 1, 5)
            fout.write("%d par & %.2f & %.2f & %d & " %
                       (o1 + 1, RSS[o1]["chi2"], RSS[o1]["rss"],
                        RSS[o1]["nbins"] - RSS[o1]["npar"]))
            if o2 > len(RSS):
                fout.write(r"\\")
                fout.write("\n")
                continue  #order==0 and
            CL = fisherTest(RSS[o1]['rss'], RSS[o2]['rss'], o1 + 1., o2 + 1.,
                            RSS[o1]["nbins"])
            fout.write("CL=%.3f " % (CL))
            if CL > 0.10:  # The function with less parameters is enough
                if not CL_high:
                    order = o1
                    #fout.write( "%d par are sufficient " % (o1+1))
                    CL_high = True
            else:
                #fout.write( "%d par are needed " % (o2+1))
                if not CL_high:
                    order = o2
            fout.write(r"\\")
            fout.write("\n")
        fout.write("\hline")
        fout.write("\n")
        fout.write(r"\end{tabular}")
    print "saved F-test table as", PLOTDIR + "/Fisher_" + category + ".tex"

    #print "-"*25
    #print "function & $\\chi^2$ & RSS & ndof & F-test & result \\\\"
    #print "\\multicolumn{6}{c}{", "Zprime_to_bb", "} \\\\"
    #print "\\hline"
    #CL_high = False
    #for o1 in range(1, 5):
    #    o2 = min(o1 + 1, 5)
    #    print "%d par & %.2f & %.2f & %d & " % (o1+1, RSS[o1]["chi2"], RSS[o1]["rss"], RSS[o1]["nbins"]-RSS[o1]["npar"]),
    #    if o2 > len(RSS):
    #        print "\\\\"
    #        continue #order==0 and
    #    CL = fisherTest(RSS[o1]['rss'], RSS[o2]['rss'], o1+1., o2+1., RSS[o1]["nbins"])
    #    print "%d par vs %d par CL=%f & " % (o1+1, o2+1, CL),
    #    if CL > 0.10: # The function with less parameters is enough
    #        if not CL_high:
    #            order = o1
    #            print "%d par are sufficient" % (o1+1),
    #            CL_high=True
    #    else:
    #        print "%d par are needed" % (o2+1),
    #        if not CL_high:
    #            order = o2
    #    print "\\\\"
    #print "\\hline"
    #print "-"*25
    #print "@ Order is", order, "("+category+")"

    #order = min(3, order)
    #order = 2
    if order == 1:
        modelBkg = modelBkg1  #.Clone("Bkg")
        modelAlt = modelBkg2  #.Clone("BkgAlt")
        normzBkg = normzBkg1  #.Clone("Bkg_norm")
        fitRes = fitRes1
    elif order == 2:
        modelBkg = modelBkg2  #.Clone("Bkg")
        modelAlt = modelBkg3  #.Clone("BkgAlt")
        normzBkg = normzBkg2  #.Clone("Bkg_norm")
        fitRes = fitRes2
    elif order == 3:
        modelBkg = modelBkg3  #.Clone("Bkg")
        modelAlt = modelBkg4  #.Clone("BkgAlt")
        normzBkg = normzBkg3  #.Clone("Bkg_norm")
        fitRes = fitRes3
    elif order == 4:
        modelBkg = modelBkg4  #.Clone("Bkg")
        modelAlt = modelBkg3  #.Clone("BkgAlt")
        normzBkg = normzBkg4  #.Clone("Bkg_norm")
        fitRes = fitRes4
    else:
        print "Functions with", order + 1, "or more parameters are needed to fit the background"
        exit()

    modelBkg.SetName("Bkg_" + YEAR + "_" + category)
    modelAlt.SetName("Alt_" + YEAR + "_" + category)
    normzBkg.SetName("Bkg_" + YEAR + "_" + category + "_norm")

    print "-" * 25

    # Generate pseudo data
    setToys = RooDataSet()
    setToys.SetName("data_toys")
    setToys.SetTitle("Data (toys)")
    if not isData:
        print " - Generating", nevents, "events for toy data"
        setToys = modelBkg.generate(RooArgSet(X_mass), nevents)
        #setToys = modelAlt.generate(RooArgSet(X_mass), nevents)
        print "toy data generated"

    if VERBOSE: raw_input("Press Enter to continue...")

    #*******************************************************#
    #                                                       #
    #                         Plot                          #
    #                                                       #
    #*******************************************************#

    print "starting to plot"
    c = TCanvas("c_" + category, category, 800, 800)
    c.Divide(1, 2)
    setTopPad(c.GetPad(1), RATIO)
    setBotPad(c.GetPad(2), RATIO)
    c.cd(1)
    frame = X_mass.frame()
    setPadStyle(frame, 1.25, True)
    if VARBINS: frame.GetXaxis().SetRangeUser(X_mass.getMin(), lastBin)
    signal = getSignal(
        category, stype,
        2000)  #replacing Alberto's getSignal by own dummy function

    graphData = setData.plotOn(frame, RooFit.Binning(plot_binning),
                               RooFit.Scaling(False), RooFit.Invisible())
    modelBkg.plotOn(frame, RooFit.VisualizeError(fitRes, 1, False),
                    RooFit.LineColor(602), RooFit.FillColor(590),
                    RooFit.FillStyle(1001), RooFit.DrawOption("FL"),
                    RooFit.Name("1sigma"))
    modelBkg.plotOn(frame, RooFit.LineColor(602), RooFit.FillColor(590),
                    RooFit.FillStyle(1001), RooFit.DrawOption("L"),
                    RooFit.Name(modelBkg.GetName()))
    modelAlt.plotOn(frame, RooFit.LineStyle(7), RooFit.LineColor(613),
                    RooFit.FillColor(609), RooFit.FillStyle(1001),
                    RooFit.DrawOption("L"), RooFit.Name(modelAlt.GetName()))
    if not isSB and signal[0] is not None:  # FIXME remove /(2./3.)
        signal[0].plotOn(
            frame,
            RooFit.Normalization(signal[1] * signal[2], RooAbsReal.NumEvent),
            RooFit.LineStyle(3), RooFit.LineWidth(6), RooFit.LineColor(629),
            RooFit.DrawOption("L"), RooFit.Name("Signal"))
    graphData = setData.plotOn(
        frame, RooFit.Binning(plot_binning), RooFit.Scaling(False),
        RooFit.XErrorSize(0 if not VARBINS else 1),
        RooFit.DataError(RooAbsData.Poisson if isData else RooAbsData.SumW2),
        RooFit.DrawOption("PE0"), RooFit.Name(setData.GetName()))
    fixData(graphData.getHist(), True, True, not isData)
    pulls = frame.pullHist(setData.GetName(), modelBkg.GetName(), True)
    chi = frame.chiSquare(setData.GetName(), modelBkg.GetName(), True)
    #setToys.plotOn(frame, RooFit.DataError(RooAbsData.Poisson), RooFit.DrawOption("PE0"), RooFit.MarkerColor(2))
    frame.GetYaxis().SetTitle("Events / ( 100 GeV )")
    frame.GetYaxis().SetTitleOffset(1.05)
    frame.Draw()
    #print "frame drawn"
    # Get Chi2
    #    chi2[1] = frame.chiSquare(modelBkg1.GetName(), setData.GetName())
    #    chi2[2] = frame.chiSquare(modelBkg2.GetName(), setData.GetName())
    #    chi2[3] = frame.chiSquare(modelBkg3.GetName(), setData.GetName())
    #    chi2[4] = frame.chiSquare(modelBkg4.GetName(), setData.GetName())

    frame.SetMaximum(frame.GetMaximum() * 10)
    frame.SetMinimum(max(frame.GetMinimum(), 1.e-1))
    c.GetPad(1).SetLogy()

    drawAnalysis(category)
    drawRegion(category, True)
    #drawCMS(LUMI, "Simulation Preliminary")
    drawCMS(LUMI, "Work in Progress", suppressCMS=True)

    leg = TLegend(0.575, 0.6, 0.95, 0.9)
    leg.SetBorderSize(0)
    leg.SetFillStyle(0)  #1001
    leg.SetFillColor(0)
    leg.AddEntry(setData.GetName(),
                 setData.GetTitle() + " (%d events)" % nevents, "PEL")
    leg.AddEntry(modelBkg.GetName(), modelBkg.GetTitle(),
                 "FL")  #.SetTextColor(629)
    leg.AddEntry(modelAlt.GetName(), modelAlt.GetTitle(), "L")
    if not isSB and signal[0] is not None:
        leg.AddEntry("Signal", signal[0].GetTitle(), "L")
    leg.SetY1(0.9 - leg.GetNRows() * 0.05)
    leg.Draw()

    latex = TLatex()
    latex.SetNDC()
    latex.SetTextSize(0.04)
    latex.SetTextFont(42)
    if not isSB:
        latex.DrawLatex(leg.GetX1() * 1.16,
                        leg.GetY1() - 0.04, "HVT model B (g_{V}=3)")
    #    latex.DrawLatex(0.67, leg.GetY1()-0.045, "#sigma_{X} = 1.0 pb")

    c.cd(2)
    frame_res = X_mass.frame()
    setPadStyle(frame_res, 1.25)
    frame_res.addPlotable(pulls, "P")
    setBotStyle(frame_res, RATIO, False)
    if VARBINS: frame_res.GetXaxis().SetRangeUser(X_mass.getMin(), lastBin)
    frame_res.GetYaxis().SetRangeUser(-5, 5)
    frame_res.GetYaxis().SetTitle("pulls(#sigma)")
    frame_res.GetYaxis().SetTitleOffset(0.3)
    frame_res.Draw()
    fixData(pulls, False, True, False)

    drawChi2(RSS[order]["chi2"], RSS[order]["nbins"] - (order + 1), True)
    line = drawLine(X_mass.getMin(), 0, lastBin, 0)

    if VARBINS:
        c.SaveAs(PLOTDIR + "/BkgSR_" + category + ".pdf")
        c.SaveAs(PLOTDIR + "/BkgSR_" + category + ".png")
    else:
        c.SaveAs(PLOTDIR + "/BkgSR_" + category + ".pdf")
        c.SaveAs(PLOTDIR + "/BkgSR_" + category + ".png")

    #*******************************************************#
    #                                                       #
    #                   Generate workspace                  #
    #                                                       #
    #*******************************************************#

    if BIAS:
        gSystem.Load("libHiggsAnalysisCombinedLimit.so")
        from ROOT import RooMultiPdf
        cat = RooCategory("pdf_index", "Index of Pdf which is active")
        pdfs = RooArgList(modelBkg, modelAlt)
        roomultipdf = RooMultiPdf("roomultipdf", "All Pdfs", cat, pdfs)
        normulti = RooRealVar("roomultipdf_norm",
                              "Number of background events", nevents, 0., 1.e6)

    normzBkg.setConstant(
        False
    )  ## newly put here to ensure it's freely floating in the combine fit

    # create workspace
    w = RooWorkspace("Zprime_" + YEAR, "workspace")
    # Dataset
    if isData: getattr(w, "import")(setData, RooFit.Rename("data_obs"))
    else: getattr(w, "import")(setToys, RooFit.Rename("data_obs"))
    #getattr(w, "import")(setData, RooFit.Rename("data_obs"))
    if BIAS:
        getattr(w, "import")(cat, RooFit.Rename(cat.GetName()))
        getattr(w, "import")(normulti, RooFit.Rename(normulti.GetName()))
        getattr(w, "import")(roomultipdf, RooFit.Rename(roomultipdf.GetName()))
    getattr(w, "import")(modelBkg, RooFit.Rename(modelBkg.GetName()))
    getattr(w, "import")(modelAlt, RooFit.Rename(modelAlt.GetName()))
    getattr(w, "import")(normzBkg, RooFit.Rename(normzBkg.GetName()))
    w.writeToFile(WORKDIR + "%s_%s.root" % (DATA_TYPE + "_" + YEAR, category),
                  True)
    print "Workspace", WORKDIR + "%s_%s.root" % (
        DATA_TYPE + "_" + YEAR, category), "saved successfully"
    if VERBOSE: raw_input("Press Enter to continue...")