Exemplo n.º 1
0
    def calculate_expected_limit(self, selection, idx, m_point, temp_results):
        yields = {}
        for i in range(len(selection)):
            bin = "Bin_%d" % i
            yields[bin] = {}
            for process in self.signal + self.resonant_bkgs:
                signalModelConfig = {
                    "var":
                    "mass",
                    "weightVar":
                    "weight",
                    "plotpath":
                    self.scanConfig["plotpath"],
                    "modelpath":
                    self.scanConfig["modelpath"],
                    "filename":
                    self.input,
                    "savename":
                    "CMS-HGG_sigfit_mva_" + process + "_hgg_" + self.channel +
                    "_" + str(i) + "_" + str(idx),
                    "tag":
                    "hggpdfsmrel_" + process + "_hgg_" + self.channel + "_" +
                    str(i) + "_" + str(idx),
                    "selection":
                    self.base_selection() + "&&" +
                    self.process_selection(process) + " && " + selection[i],
                }
                model = makeModel(signalModelConfig)
                model.getTree(self.scanner.getTree())
                sig_yield = model.makeSignalModel(
                    "wsig_13TeV",
                    {
                        "replaceNorm": False,
                        "norm_in": -1,
                        "fixParameters": True
                    },
                )
                yields[bin][process] = sig_yield

            bkgModelConfig = {
                "var":
                "mass",
                "weightVar":
                "weight",
                "plotpath":
                self.scanConfig["plotpath"],
                "modelpath":
                self.scanConfig["modelpath"],
                "filename":
                self.input,
                "savename":
                "CMS-HGG_bkg_" + self.channel + "_" + str(i) + "_" + str(idx),
                "tag":
                "CMS_hgg_bkgshape_" + self.channel + "_" + str(i) + "_" +
                str(idx),
                "selection":
                self.base_selection() + "&&" + self.process_selection("bkg") +
                " && " + selection[i],
            }

            model = makeModel(bkgModelConfig)
            model.getTree(self.scanner.getTree())
            bkg_yield, bkg_yield_raw = model.makeBackgroundModel(
                "wbkg_13TeV", self.channel + "_" + str(i) + "_" + str(idx))

            yields[bin]["bkg"] = bkg_yield

        datacard = makeCards(
            self.scanConfig["modelpath"],
            "CMS-HGG_mva_13TeV_datacard_" + str(idx) + ".txt",
            {"sm_higgs_unc": self.sm_higgs_unc},
        )
        tagList = [self.channel + "_" + str(x) for x in range(len(selection))]
        sigList = [self.signal[0] + "_hgg"]
        bkgList = ["bkg_mass"]
        for bkg in self.resonant_bkgs:
            bkgList.append(bkg + "_hgg")

        datacard.WriteCard(sigList, bkgList, tagList, "_" + str(idx))

        combineConfig = {
            "combineOption": self.combineOption,
            "combineOutName": "sig_" + str(idx),
            "cardName": "CMS-HGG_mva_13TeV_datacard_" + str(idx) + ".txt",
            "outtxtName": "sig_" + str(idx) + ".txt",
        }

        exp_lim, exp_lim_up1sigma, exp_lim_down1sigma, exp_lim_up2sigma, exp_lim_down2sigma = self.scanner.runCombine(
            combineConfig)

        result = {
            "idx":
            idx,
            "x": [float(x) for x in m_point],
            "exp_lim": [
                exp_lim, exp_lim_up1sigma, exp_lim_down1sigma,
                exp_lim_up2sigma, exp_lim_down2sigma
            ],
            "selection":
            selection,
            "yields":
            yields
        }

        temp_results[",".join(selection) + str(idx)] = result

        if self.verbose:
            print("[GUIDED OPTIMIZER]", result)

        return
Exemplo n.º 2
0
    def calculate_expected_limit(self, selection, idx, m_point, temp_results):
        yields = {}

        disqualify_srs = False  # disqualify certain binning combinations if they don't have enough non-res bkg events in mgg sidebands (require 10 expected)

        for i in range(len(selection)):
            bin = "Bin_%d" % i
            yields[bin] = {}
            for process in self.signal + self.resonant_bkgs:
                signalModelConfig = {
                    "var":
                    "mass",
                    "weightVar":
                    "weight_central",
                    "plotpath":
                    self.scanConfig["plotpath"],
                    "modelpath":
                    self.scanConfig["modelpath"],
                    "filename":
                    self.input,
                    "savename":
                    "CMS-HGG_sigfit_mva_" + process + "_hgg_" + self.channel +
                    "_" + str(i) + "_" + str(idx),
                    "tag":
                    "hggpdfsmrel_" + process + "_hgg_" + self.channel + "_" +
                    str(i) + "_" + str(idx),
                    "selection":
                    self.base_selection() + "&&" +
                    self.process_selection(process) + " && " + selection[i],
                }
                if "nottH" in self.resonant_bkgs[0]:
                    simple = True  # just fit a single gaussian
                else:
                    simple = False
                model = makeModel(signalModelConfig)
                model.getTree(self.scanner.getTree())
                sig_yield = model.makeSignalModel(
                    "wsig_13TeV",
                    {
                        "replaceNorm": False,
                        "norm_in": -1,
                        "fixParameters": True,
                        "simple": simple
                    },
                )
                yields[bin][process] = sig_yield

            bkgModelConfig = {
                "var":
                "mass",
                "weightVar":
                "weight_central",
                "plotpath":
                self.scanConfig["plotpath"],
                "modelpath":
                self.scanConfig["modelpath"],
                "filename":
                self.input,
                "savename":
                "CMS-HGG_bkg_" + self.channel + "_" + str(i) + "_" + str(idx),
                "tag":
                "CMS_hgg_bkgshape_" + self.channel + "_" + str(i) + "_" +
                str(idx),
                "selection":
                self.base_selection() + "&&" +
                self.process_selection(self.nrb_choice) + " && " +
                selection[i],
            }

            model = makeModel(bkgModelConfig)
            model.getTree(self.scanner.getTree())
            bkg_yield, bkg_yield_full, bkg_yield_raw = model.makeBackgroundModel(
                "wbkg_13TeV", self.channel + "_" + str(i) + "_" + str(idx))

            bkgModelConfig["selection"] = self.base_selection(
            ) + "&&" + self.process_selection("data") + " && " + selection[i]
            bkgModelConfig["savename"] = "dummy"
            model2 = makeModel(bkgModelConfig)
            model2.getTree(self.scanner.getTree())
            bkg_yield_data, bkg_yield_data_full, bkg_yield_raw_data = model2.makeBackgroundModel(
                "wdata_13TeV",
                self.channel + "_" + str(i) + "_" + str(idx) + "dummy")

            #print("[GUIDED OPTIMIZER] Bkg events from fit: %

            yields[bin]["bkg"] = bkg_yield
            # if bkg_yield_raw < 8. or bkg_yield_raw_data < 8.:
            #     print("[GUIDED OPTIMIZER] Only %.6f expected background events in one bin, disqualifying signal region set." % bkg_yield_raw)
            #     disqualify_srs = True

        datacard = makeCards(
            self.scanConfig["modelpath"],
            "CMS-HGG_mva_13TeV_datacard_" + str(idx) + ".txt",
            {"sm_higgs_unc": self.sm_higgs_unc},
        )
        tagList = [self.channel + "_" + str(x) for x in range(len(selection))]
        sigList = [self.signal[0] + "_hgg"]
        bkgList = ["bkg_mass"]
        for bkg in self.resonant_bkgs:
            bkgList.append(bkg + "_hgg")

        datacard.WriteCard(sigList, bkgList, tagList, "_" + str(idx))
        for tag in tagList:
            datacard = makeCards(
                self.scanConfig["modelpath"],
                "CMS-HGG_mva_13TeV_datacard_" + str(idx) + "_" + tag + ".txt",
                {"sm_higgs_unc": self.sm_higgs_unc},
            )
            datacard.WriteCard(sigList, bkgList, [tag], "_" + str(idx))

        combineConfig = {
            "combineOption": self.combineOption,
            "combineOutName": "sig_" + str(idx),
            "cardName": "CMS-HGG_mva_13TeV_datacard_" + str(idx) + ".txt",
            "outtxtName": "sig_" + str(idx) + ".txt",
        }

        exp_lim, exp_lim_up1sigma, exp_lim_down1sigma, exp_lim_up2sigma, exp_lim_down2sigma = self.scanner.runCombine(
            combineConfig)
        if "Significance" in self.combineOption:
            exp_lim = 1. / exp_lim  # make negative so that we can still minimize the POI
        # if disqualify_srs:
        #     exp_lim *= 3 # double the expected limit if the SR combination is disqualified bc too few non-res bkg events
        #     # the reason we double the exp_lim, is that we still want the expected limit to be a relatively smooth function of the cut values. this way, the optimization bdt can hopefully learn that cut values resulting in very narrow bins have a penalty applied on them

        exp_lim_full = {}
        exp_lim_full["combined"] = [
            exp_lim, exp_lim_up1sigma, exp_lim_down1sigma, exp_lim_up2sigma,
            exp_lim_down2sigma
        ]

        for tag in tagList:
            combineConfig["combineOutName"] = "sig_" + str(idx) + "_" + tag
            combineConfig["cardName"] = "CMS-HGG_mva_13TeV_datacard_" + str(
                idx) + "_" + tag + ".txt"
            combineConfig["outtxtName"] = "sig_" + str(
                idx) + "_" + tag + ".txt"
            lim, lim_up1, lim_down1, lim_up2, lim_down2 = self.scanner.runCombine(
                combineConfig)
            exp_lim_full[tag] = [lim, lim_up1, lim_down1, lim_up2, lim_down2]

        result = {
            "idx":
            idx,
            "x": [float(x) for x in m_point],
            "exp_lim": [
                exp_lim, exp_lim_up1sigma, exp_lim_down1sigma,
                exp_lim_up2sigma, exp_lim_down2sigma
            ],
            "exp_lim_full":
            exp_lim_full,
            "selection":
            selection,
            "yields":
            yields,
            "disqualified":
            str(disqualify_srs)
        }

        temp_results[",".join(selection) + str(idx)] = result

        if self.verbose:
            print("[GUIDED OPTIMIZER]", result)

        return
Exemplo n.º 3
0
def calculate_bins_significance(idx, scanConfig, scanner, cuts, results):
    tree = scanner.getTree()

    nBins = len(cuts)

    print "[BINNING_SCRIPT_INFO] Testing cut combo %d" % idx

    for i in range(nBins):
        # MVA score cut
        cut = "mva_score > " + str(cuts[i])
        if not (i == (nBins - 1)):
            cut += " && mva_score < " + str(cuts[i + 1])

        #cut_string = ""
        #for mva_name, cut in zip(mvas, cuts):
        #    cut_string += " ( %s > " + str(cut[i])
        #    if not (i == (nBins - 1)):
        #        cut_string += " && %s < " + str(cut[i+1])
        #    cut_string += " ) &&"

        # Make signal model
        signals = args.signal.split(",") + args.resonant_bkgs.split(",")
        for signal in signals:
            sigModelConfig = {
                "var": "mass",
                "weightVar": "weight",
                "plotpath": scanConfig["plotpath"],
                "modelpath": scanConfig["modelpath"],
                "filename": scanConfig["filename"]
            }
            sigModelConfig[
                "savename"] = "CMS-HGG_sigfit_mva_" + signal + "_hgg_" + scanConfig[
                    "tag"] + "_" + str(i) + "_" + str(idx)
            sigModelConfig[
                "tag"] = "hggpdfsmrel_" + signal + "_hgg_" + scanConfig[
                    "tag"] + "_" + str(i) + "_" + str(idx)
            processSelection = "("
            for j in range(len(process_dict[signal])):
                processSelection += "process_id == " + str(
                    process_dict[signal][j])
                if j == (len(process_dict[signal]) - 1):
                    processSelection += ")"
                else:
                    processSelection += " || "
            sigModelConfig["selection"] = scanConfig[
                "selection"] + " && " + cut + " && " + processSelection
            if "FCNC" not in args.signal.split(
                    ","
            )[0]:  # this is the ttH analysis, we use M127 for optimization
                signal_sample_selection = " && signal_mass_category == 127"
            else:  # this is FCNC, use M125 since we don't have other mass points for FCNC
                signal_sample_selection = " && ((process_id >= 22 && process_id <= 25) || (signal_mass_label == 0 && signal_mass_category == 125))"  # accept all FCNC events, only M125 Madgraph (not Powheg) for all other SM Higgs # FIXME
            sigModelConfig["selection"] += signal_sample_selection
            print "\n[BINNING_SCRIPT_INFO] Making signal model with selection %s" % sigModelConfig[
                "selection"]
            print "\n[BINNING_SCRIPT_INFO] Saving as %s" % "CMS-HGG_sigfit_mva_" + signal + "_hgg_" + scanConfig[
                "tag"] + "_" + str(i) + "_" + str(idx)
            print "\n\n"
            model = makeModel(sigModelConfig)
            model.getTree(tree)
            model.makeSignalModel("wsig_13TeV", {
                "replaceNorm": False,
                "norm_in": -1,
                "fixParameters": True
            })

        # Make background model
        bkgModelConfig = {
            "var": "mass",
            "weightVar": "weight",
            "plotpath": scanConfig["plotpath"],
            "modelpath": scanConfig["modelpath"],
            "filename": scanConfig["filename"]
        }
        bkgModelConfig["selection"] = scanConfig[
            "selection"] + " && " + cut + " && (process_id == 1 || process_id == 2 || process_id == 3 || process_id == 5 || process_id == 6 || process_id == 7 || process_id == 9 || process_id == 13 || process_id == 18 || process_id == 19 || process_id == 20 || process_id == 21 || process_id == 26)"
        bkgModelConfig["savename"] = "CMS-HGG_bkg_" + scanConfig[
            "tag"] + "_" + str(i) + "_" + str(idx)
        bkgModelConfig["tag"] = "CMS_hgg_bkgshape_" + scanConfig[
            "tag"] + "_" + str(i) + "_" + str(idx)

        print "[BINNING_SCRIPT_INFO] Making background model with selection %s" % bkgModelConfig[
            "selection"]

        model = makeModel(bkgModelConfig)
        model.getTree(tree)
        model.makeBackgroundModel(
            "wbkg_13TeV", scanConfig["tag"] + "_" + str(i) + "_" + str(idx))

    # Make datacard
    print "[BINNING_SCRIPT_INFO] Making datacard for cut combo %d" % idx
    sigList = [args.signal + "_hgg"]
    bkgList = ["bkg_mass"]
    for bkg in args.resonant_bkgs.split(","):
        bkgList.append(bkg + "_hgg")

    card = makeCards(scanConfig["modelpath"],
                     "CMS-HGG_mva_13TeV_datacard_" + str(idx) + ".txt")
    tagList = [(scanConfig["tag"] + "_%d" % x) for x in range(nBins)]
    print "tagList is", tagList, "\n\n\n\n"
    card.WriteCard(sigList, bkgList, tagList, "_" + str(idx))

    if args.limit:
        combineOption = "AsymptoticLimits -m 125 "
    else:
        combineOption = "Significance --significance "
    combineConfig = {
        "combineOption": combineOption,
        "combineOutName": "sig_" + str(idx),
        "cardName": "CMS-HGG_mva_13TeV_datacard_" + str(idx) + ".txt",
        "outtxtName": "sig_" + str(idx) + ".txt",
        "grepContent": "\"Significance\""
    }

    significance = float(scanner.runCombine(combineConfig))
    print "[BINNING_SCRIPT_INFO] %s for cut combo %d is %.6f" % (
        "Significance" if not args.limit else "Limit", idx, significance)

    for cut in cuts:
        cut = float(cut)
    result = {
        "mva_cuts": [str(cut) for cut in cuts],
        ("limit" if args.limit else "significance"): significance,
        "datacard": scanConfig["modelpath"] + combineConfig["cardName"]
    }

    results[idx] = result
    print results
    return