Пример #1
0
 def estimationMethodAndClassMapGenerator():
     estimationMethodList = [
         DataEstimation(era, args.base_path, channel),
         ggHEstimation("ggH125", era, args.base_path, channel),
         qqHEstimation("qqH125", era, args.base_path, channel),
         HWWEstimation(era, args.base_path, channel),
     ]
     return (estimationMethodList)
def main(args):
    # Systematics container
    logger.info("Set up shape variations.")
    systematics = Systematics("shapes.root", num_threads=args.num_threads)

    # Era selection
    from shape_producer.estimation_methods_2017 import DataEstimation, ZTTEstimation, ZTTEmbeddedEstimation, ZLEstimation, ZJEstimation, TTLEstimation, TTJEstimation, TTTEstimation, VVLEstimation, VVTEstimation, VVJEstimation, WEstimation, ggHEstimation, qqHEstimation, VHEstimation, WHEstimation, ZHEstimation, ttHEstimation, QCDEstimation_ABCD_TT_ISO2, QCDEstimation_SStoOS_MTETEM, NewFakeEstimationLT, NewFakeEstimationTT, HWWEstimation, ggHWWEstimation, qqHWWEstimation

    from shape_producer.era import Run2017
    era = Run2017(args.datasets)

    # Channels and processes
    directory = args.directory
    et_friend_directory = args.et_friend_directory
    mt_friend_directory = args.mt_friend_directory
    tt_friend_directory = args.tt_friend_directory
    em_friend_directory = args.em_friend_directory
    ff_friend_directory = args.fake_factor_friend_directory

    mt = MTSM2017()
    mt_processes = {
        "data"  : Process("data_obs", DataEstimation      (era, directory, mt, friend_directory=mt_friend_directory)),
        "ZTT"   : Process("ZTT",      ZTTEstimation       (era, directory, mt, friend_directory=mt_friend_directory)),
        "ZJ"    : Process("ZJ",       ZJEstimation        (era, directory, mt, friend_directory=mt_friend_directory)),
        "ZL"    : Process("ZL",       ZLEstimation        (era, directory, mt, friend_directory=mt_friend_directory)),
        "TTT"   : Process("TTT",      TTTEstimation       (era, directory, mt, friend_directory=mt_friend_directory)),
        "TTJ"   : Process("TTJ",      TTJEstimation       (era, directory, mt, friend_directory=mt_friend_directory)),
        "TTL"   : Process("TTL",      TTLEstimation       (era, directory, mt, friend_directory=mt_friend_directory)),
        "VVT"   : Process("VVT",      VVTEstimation       (era, directory, mt, friend_directory=mt_friend_directory)),
        "VVJ"   : Process("VVJ",      VVJEstimation       (era, directory, mt, friend_directory=mt_friend_directory)),
        "VVL"   : Process("VVL",      VVLEstimation       (era, directory, mt, friend_directory=mt_friend_directory)),
        "W"     : Process("W",        WEstimation         (era, directory, mt, friend_directory=mt_friend_directory)),
        "ggH125": Process("ggH125",   ggHEstimation       ("ggH125", era, directory, mt, friend_directory=mt_friend_directory)),
        "qqH125": Process("qqH125",   qqHEstimation       ("qqH125", era, directory, mt, friend_directory=mt_friend_directory))
        }

    mt_processes["QCD"] = Process("QCD", QCDEstimation_SStoOS_MTETEM(era, directory, mt,
            [mt_processes[process] for process in ["ZTT", "ZL", "ZJ", "W", "TTL", "TTJ", "VVL", "VVJ"]],
            mt_processes["data"], friend_directory=mt_friend_directory, extrapolation_factor=1.00))

    # Variables and categories
    variable_names = ["m_vis"]
    binning = yaml.load(open(args.binning), Loader=yaml.Loader)
    variables = {v: Variable(v,VariableBinning(binning["control"]["mt"][v]["bins"]), expression=binning["control"]["mt"][v]["expression"]) for v in variable_names}
    cuts = Cuts()
    mt_categories = []
    for var in variable_names:
        mt_categories.append(
            Category(
                var,
                mt,
                cuts,
                variable=variables[var]))

    # Nominal histograms
    for process, category in product(mt_processes.values(), mt_categories):
        systematics.add(
            Systematic(
                category=category,
                process=process,
                analysis="smhtt",
                era=era,
                variation=Nominal(),
                mass="125"))

    # Produce histograms
    logger.info("Start producing shapes.")
    systematics.produce()
    logger.info("Done producing shapes.")
def main(args):
    # Container for all distributions to be drawn
    logger.info("Set up shape variations.")
    systematics = Systematics(
        "{}_shapes.root".format(args.tag),
        num_threads=args.num_threads,
        skip_systematic_variations=args.skip_systematic_variations)

    # Era selection
    if "2017" in args.era:
        from shape_producer.estimation_methods_2017 import DataEstimation, ZTTEstimation, ZTTEmbeddedEstimation, ZLEstimation, ZJEstimation, TTLEstimation, TTJEstimation, TTTEstimation, VVLEstimation, VVTEstimation, VVJEstimation, WEstimation, ggHEstimation, qqHEstimation, VHEstimation, WHEstimation, ZHEstimation, ttHEstimation, QCDEstimation_ABCD_TT_ISO2, QCDEstimation_SStoOS_MTETEM, NewFakeEstimationLT, NewFakeEstimationTT, HWWEstimation, ggHWWEstimation, qqHWWEstimation, DYJetsToLLEstimation, TTEstimation, VVEstimation

        from shape_producer.era import Run2017
        era = Run2017(args.datasets)

    else:
        logger.critical("Era {} is not implemented.".format(args.era))
        raise Exception

    wp_dict_mva = {
        "vvloose": "byVVLooseIsolationMVArun2017v2DBoldDMwLT2017_2",
        "vloose": "byVLooseIsolationMVArun2017v2DBoldDMwLT2017_2",
        "loose": "byLooseIsolationMVArun2017v2DBoldDMwLT2017_2",
        "medium": "byMediumIsolationMVArun2017v2DBoldDMwLT2017_2",
        "tight": "byTightIsolationMVArun2017v2DBoldDMwLT2017_2",
        "vtight": "byVTightIsolationMVArun2017v2DBoldDMwLT2017_2",
        "vvtight": "byVVTightIsolationMVArun2017v2DBoldDMwLT2017_2",
    }
    wp_dict_deeptau = {
        "vvvloose": "byVVVLooseDeepTau2017v2p1VSjet_2",
        "vvloose": "byVVLooseDeepTau2017v2p1VSjet_2",
        "vloose": "byVLooseDeepTau2017v2p1VSjet_2",
        "loose": "byLooseDeepTau2017v2p1VSjet_2",
        "medium": "byMediumDeepTau2017v2p1VSjet_2",
        "tight": "byTightDeepTau2017v2p1VSjet_2",
        "vtight": "byVTightDeepTau2017v2p1VSjet_2",
        "vvtight": "byVVTightDeepTau2017v2p1VSjet_2",
        "mm": "0<1",
    }
    wp_dict = wp_dict_deeptau

    logger.info("Produce shapes for the %s working point of the MVA Tau ID",
                args.working_point)
    # Channels and processes
    # yapf: disable
    directory = args.directory
    ff_friend_directory = args.fake_factor_friend_directory
    mt = MTTauID2017()
    mt.cuts.add(Cut(wp_dict[args.working_point]+">0.5", "tau_iso"))
    # if args.gof_channel == "mt":
    #     mt.cuts.remove("m_t")
    #     mt.cuts.remove("dZeta")
    #     mt.cuts.remove("absEta")
    mt_processes = {
        "data"  : Process("data_obs", DataEstimation      (era, directory, mt, friend_directory=[])),
        "ZTT"   : Process("ZTT",      ZTTEstimation       (era, directory, mt, friend_directory=[])),
        "EMB"   : Process("EMB",      ZTTEmbeddedEstimation  (era, directory, mt, friend_directory=[])),
        "ZJ"    : Process("ZJ",       ZJEstimation        (era, directory, mt, friend_directory=[])),
        "ZL"    : Process("ZL",       ZLEstimation        (era, directory, mt, friend_directory=[])),
        "TTT"   : Process("TTT",      TTTEstimation       (era, directory, mt, friend_directory=[])),
        "TTJ"   : Process("TTJ",      TTJEstimation       (era, directory, mt, friend_directory=[])),
        "TTL"   : Process("TTL",      TTLEstimation       (era, directory, mt, friend_directory=[])),
        "VVT"   : Process("VVT",      VVTEstimation       (era, directory, mt, friend_directory=[])),
        "VVJ"   : Process("VVJ",      VVJEstimation       (era, directory, mt, friend_directory=[])),
        "VVL"   : Process("VVL",      VVLEstimation       (era, directory, mt, friend_directory=[])),
        "W"     : Process("W",        WEstimation         (era, directory, mt, friend_directory=[])),
        }
    # TODO: Include alternative jet fake estimation.
    # mt_processes["FAKES"] = Process("jetFakes", NewFakeEstimationLT(era, directory, mt, [mt_processes[process] for process in ["EMB", "ZL", "TTL", "VVL"]], mt_processes["data"], friend_directory=mt_friend_directory+[ff_friend_directory]))
    # mt_processes["FAKES"] = Process("jetFakes", NewFakeEstimationLT(era, directory, mt, [mt_processes[process] for process in ["ZTT", "ZL", "TTL", "TTT", "VVL", "VVT"]], mt_processes["data"], friend_directory=mt_friend_directory+[ff_friend_directory]))
    mt_processes["QCD"] = Process("QCD", QCDEstimation_SStoOS_MTETEM(era, directory, mt,
            [mt_processes[process] for process in ["ZTT", "ZL", "ZJ", "TTT", "TTJ", "TTL", "VVT", "VVJ", "VVL", "W"]],
            mt_processes["data"], friend_directory=[], extrapolation_factor=1.17))
    mt_processes["QCDEMB"] = Process("QCDEMB", QCDEstimation_SStoOS_MTETEM(era, directory, mt,
            [mt_processes[process] for process in ["EMB", "ZL", "ZJ", "TTJ", "TTL", "VVJ", "VVL", "W"]],
            mt_processes["data"], friend_directory=[], extrapolation_factor=1.17))

    # TODO: Include Z-> mumu control region.
    mm = MMTauID2017()
    mm_processes = {
        "data"  : Process("data_obs", DataEstimation       (era, directory, mm, friend_directory=[])),
        "ZLL"   : Process("ZLL",      DYJetsToLLEstimation (era, directory, mm, friend_directory=[])),
        "MMEMB" : Process("MMEMB",    ZTTEmbeddedEstimation(era, directory, mm, friend_directory=[])),
        "TT"    : Process("TT",       TTEstimation         (era, directory, mm, friend_directory=[])),
        "VV"    : Process("VV",       VVEstimation         (era, directory, mm, friend_directory=[])),
        "W"     : Process("W",        WEstimation          (era, directory, mm, friend_directory=[])),
        }
    # mm_processes["FAKES"] = None  TODO: Add fake factors or alternative fake rate estimation here
    mm_processes["QCD"] = Process("QCD", QCDEstimation_SStoOS_MTETEM(era, directory, mm,
            [mm_processes[process] for process in ["ZLL", "W", "TT", "VV"]],
            mm_processes["data"], friend_directory=[], extrapolation_factor=1.17))
    mm_processes["QCDEMB"] = Process("QCDEMB", QCDEstimation_SStoOS_MTETEM(era, directory, mm,
            [mm_processes[process] for process in ["MMEMB", "W"]],
            mm_processes["data"], friend_directory=[], extrapolation_factor=1.17))



    # Variables and categories
    binning = yaml.load(open(args.binning))

    mt_categories = []
    # Goodness of fit shapes
    if args.gof_channel == "mt":
        score = Variable(
                args.gof_variable,
                VariableBinning(binning["control"]["mt"][args.gof_variable]["bins"]),
                expression=binning["control"]["mt"][args.gof_variable]["expression"])
        if "cut" in binning["control"]["mt"][args.gof_variable].keys():
            cuts=Cuts(Cut(binning["control"]["mt"][args.gof_variable]["cut"], "binning"))
        else:
            cuts=Cuts()
        mt_categories.append(
            Category(
                args.gof_variable,
                mt,
                cuts,
                variable=score))
    elif "mt" in args.channels:
        for cat in binning["categories"]["mt"]:
            category = Category(
                        cat,
                        mt,
                        Cuts(Cut(binning["categories"]["mt"][cat]["cut"], "category")),
                        variable=Variable(binning["categories"]["mt"][cat]["var"],
                            VariableBinning(binning["categories"]["mt"][cat]["bins"]),
                            expression=binning["categories"]["mt"][cat]["expression"]))
            mt_categories.append(category)

    # yapf: enable
    if "mt" in [args.gof_channel] + args.channels:
        for process, category in product(mt_processes.values(), mt_categories):
            systematics.add(
                Systematic(category=category,
                           process=process,
                           analysis="smhtt",
                           era=era,
                           variation=Nominal(),
                           mass="125"))

    mm_categories = []
    if "mm" in args.channels:
        category = Category("control",
                            mm,
                            Cuts(),
                            variable=Variable("m_vis",
                                              ConstantBinning(1, 50, 150),
                                              "m_vis"))
        mm_categories.append(category)

    if "mm" in args.channels:
        for process, category in product(mm_processes.values(), mm_categories):
            systematics.add(
                Systematic(category=category,
                           process=process,
                           analysis="smhtt",
                           era=era,
                           variation=Nominal(),
                           mass="125"))

    # Shapes variations

    # TODO: Check if prefiring weights are necessary for tau id sf measurement.
    # Prefiring weights
    prefiring_variaitons = [
        ReplaceWeight("CMS_prefiring_Run2017", "prefireWeight",
                      Weight("prefiringweightup", "prefireWeight"), "Up"),
        ReplaceWeight("CMS_prefiring_Run2017", "prefireWeight",
                      Weight("prefiringweightdown", "prefireWeight"), "Down"),
    ]
    for variation in prefiring_variaitons:
        for process_nick in [
                "ZTT",
                "ZL",
                "ZJ",
                "W",
                "TTT",
                "TTL",
                "TTJ",
                "VVT",
                "VVJ",
                "VVL",
        ]:
            if "mt" in [args.gof_channel] + args.channels:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=mt_processes[process_nick],
                    channel=mt,
                    era=era)

    # MC tau energy scale
    tau_es_3prong_variations = create_systematic_variations(
        "CMS_scale_mc_t_3prong_Run2017", "tauEsThreeProng", DifferentPipeline)
    tau_es_1prong_variations = create_systematic_variations(
        "CMS_scale_mc_t_1prong_Run2017", "tauEsOneProng", DifferentPipeline)
    tau_es_1prong1pizero_variations = create_systematic_variations(
        "CMS_scale_mc_t_1prong1pizero_Run2017", "tauEsOneProngOnePiZero",
        DifferentPipeline)
    for variation in tau_es_3prong_variations + tau_es_1prong_variations + tau_es_1prong1pizero_variations:
        for process_nick in [
                "ZTT",
                "TTT",
                "TTL",
                "VVL",
                "VVT",  # "FAKES"
        ]:
            if "mt" in [args.gof_channel] + args.channels:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=mt_processes[process_nick],
                    channel=mt,
                    era=era)
    # Tau energy scale
    tau_es_3prong_variations = create_systematic_variations(
        "CMS_scale_t_3prong_Run2017", "tauEsThreeProng", DifferentPipeline)
    tau_es_1prong_variations = create_systematic_variations(
        "CMS_scale_t_1prong_Run2017", "tauEsOneProng", DifferentPipeline)
    tau_es_1prong1pizero_variations = create_systematic_variations(
        "CMS_scale_t_1prong1pizero_Run2017", "tauEsOneProngOnePiZero",
        DifferentPipeline)
    for variation in tau_es_3prong_variations + tau_es_1prong_variations + tau_es_1prong1pizero_variations:
        for process_nick in [
                "ZTT",
                "TTT",
                "TTL",
                "VVT",
                "VVL",
                "EMB",  # "FAKES"
        ]:
            if "mt" in [args.gof_channel] + args.channels:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=mt_processes[process_nick],
                    channel=mt,
                    era=era)

    # Jet energy scale

    # Inclusive JES shapes
    jet_es_variations = []
    '''jet_es_variations += create_systematic_variations(
        "CMS_scale_j_Run2017", "jecUnc", DifferentPipeline)'''

    # Splitted JES shapes
    jet_es_variations += create_systematic_variations(
        "CMS_scale_j_eta0to3_Run2017", "jecUncEta0to3", DifferentPipeline)
    jet_es_variations += create_systematic_variations(
        "CMS_scale_j_eta0to5_Run2017", "jecUncEta0to5", DifferentPipeline)
    jet_es_variations += create_systematic_variations(
        "CMS_scale_j_eta3to5_Run2017", "jecUncEta3to5", DifferentPipeline)
    jet_es_variations += create_systematic_variations(
        "CMS_scale_j_RelativeBal_Run2017", "jecUncRelativeBal",
        DifferentPipeline)
    jet_es_variations += create_systematic_variations(
        "CMS_scale_j_RelativeSample_Run2017", "jecUncRelativeSample",
        DifferentPipeline)

    for variation in jet_es_variations:
        for process_nick in [
                "ZTT",
                "ZL",
                "ZJ",
                "W",
                "TTT",
                "TTL",
                "TTJ",
                "VVT",
                "VVJ",
                "VVL",
        ]:
            if "mt" in [args.gof_channel] + args.channels:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=mt_processes[process_nick],
                    channel=mt,
                    era=era)

    # MET energy scale. Note: only those variations for non-resonant processes are used in the stat. inference
    met_unclustered_variations = create_systematic_variations(
        "CMS_scale_met_unclustered", "metUnclusteredEn", DifferentPipeline)
    for variation in met_unclustered_variations:  # + met_clustered_variations:
        for process_nick in [
                "ZTT",
                "ZL",
                "ZJ",
                "W",
                "TTT",
                "TTL",
                "TTJ",
                "VVT",
                "VVJ",
                "VVL",
        ]:
            if "mt" in [args.gof_channel] + args.channels:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=mt_processes[process_nick],
                    channel=mt,
                    era=era)

    # Recoil correction unc
    recoil_resolution_variations = create_systematic_variations(
        "CMS_htt_boson_reso_met_Run2017", "metRecoilResolution",
        DifferentPipeline)
    recoil_response_variations = create_systematic_variations(
        "CMS_htt_boson_scale_met_Run2017", "metRecoilResponse",
        DifferentPipeline)
    for variation in recoil_resolution_variations + recoil_response_variations:
        for process_nick in ["ZTT", "ZL", "ZJ", "W"]:
            if "mt" in [args.gof_channel] + args.channels:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=mt_processes[process_nick],
                    channel=mt,
                    era=era)

    # Z pt reweighting
    zpt_variations = create_systematic_variations("CMS_htt_dyShape_Run2017",
                                                  "zPtReweightWeight",
                                                  SquareAndRemoveWeight)
    for variation in zpt_variations:
        for process_nick in ["ZTT", "ZL", "ZJ"]:
            if "mt" in [args.gof_channel] + args.channels:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=mt_processes[process_nick],
                    channel=mt,
                    era=era)

    # top pt reweighting
    top_pt_variations = create_systematic_variations("CMS_htt_ttbarShape",
                                                     "topPtReweightWeight",
                                                     SquareAndRemoveWeight)
    for variation in top_pt_variations:
        for process_nick in ["TTT", "TTL", "TTJ"]:
            if "mt" in [args.gof_channel] + args.channels:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=mt_processes[process_nick],
                    channel=mt,
                    era=era)

    # TODO: likely not necessary, to be checked
    # jet to tau fake efficiency
    jet_to_tau_fake_variations = []
    jet_to_tau_fake_variations.append(
        AddWeight("CMS_htt_jetToTauFake_Run2017", "jetToTauFake_weight",
                  Weight("max(1.0-pt_2*0.002, 0.6)", "jetToTauFake_weight"),
                  "Up"))
    jet_to_tau_fake_variations.append(
        AddWeight("CMS_htt_jetToTauFake_Run2017", "jetToTauFake_weight",
                  Weight("min(1.0+pt_2*0.002, 1.4)", "jetToTauFake_weight"),
                  "Down"))
    for variation in jet_to_tau_fake_variations:
        for process_nick in ["ZJ", "TTJ", "W", "VVJ"]:
            if "mt" in [args.gof_channel] + args.channels:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=mt_processes[process_nick],
                    channel=mt,
                    era=era)

    # ZL fakes energy scale
    mu_fake_es_1prong_variations = create_systematic_variations(
        "CMS_ZLShape_mt_1prong_Run2017", "tauMuFakeEsOneProng",
        DifferentPipeline)
    mu_fake_es_1prong1pizero_variations = create_systematic_variations(
        "CMS_ZLShape_mt_1prong1pizero_Run2017", "tauMuFakeEsOneProngPiZeros",
        DifferentPipeline)

    if "mt" in [args.gof_channel] + args.channels:
        for process_nick in ["ZL"]:
            for variation in mu_fake_es_1prong_variations + mu_fake_es_1prong1pizero_variations:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=mt_processes[process_nick],
                    channel=mt,
                    era=era)

    # lepton trigger efficiency
    lep_trigger_eff_variations = []
    lep_trigger_eff_variations.append(
        AddWeight(
            "CMS_eff_trigger_mt_Run2017", "trg_mt_eff_weight",
            Weight("(1.0*(pt_1<=25)+1.02*(pt_1>25))", "trg_mt_eff_weight"),
            "Up"))
    lep_trigger_eff_variations.append(
        AddWeight(
            "CMS_eff_trigger_mt_Run2017", "trg_mt_eff_weight",
            Weight("(1.0*(pt_1<=25)+0.98*(pt_1>25))", "trg_mt_eff_weight"),
            "Down"))
    for variation in lep_trigger_eff_variations:
        for process_nick in [
                "ZTT",
                "ZL",
                "ZJ",
                "W",
                "TTT",
                "TTL",
                "TTJ",
                "VVL",
                "VVT",
                "VVJ",
        ]:
            if "mt" in [args.gof_channel] + args.channels:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=mt_processes[process_nick],
                    channel=mt,
                    era=era)
        for process_nick in ["ZLL", "TT", "VV", "W"]:
            if "mm" in [args.gof_channel] + args.channels:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=mm_processes[process_nick],
                    channel=mm,
                    era=era)

    lep_trigger_eff_variations = []
    lep_trigger_eff_variations.append(
        AddWeight(
            "CMS_eff_trigger_emb_mt_Run2017", "trg_mt_eff_weight",
            Weight("(1.0*(pt_1<=25)+1.02*(pt_1>25))", "trg_mt_eff_weight"),
            "Up"))
    lep_trigger_eff_variations.append(
        AddWeight(
            "CMS_eff_trigger_emb_mt_Run2017", "trg_mt_eff_weight",
            Weight("(1.0*(pt_1<=25)+0.98*(pt_1>25))", "trg_mt_eff_weight"),
            "Down"))
    for variation in lep_trigger_eff_variations:
        for process_nick in ["EMB"]:
            if "mt" in [args.gof_channel] + args.channels:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=mt_processes[process_nick],
                    channel=mt,
                    era=era)
        for process_nick in ["MMEMB"]:
            if "mm" in [args.gof_channel] + args.channels:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=mm_processes[process_nick],
                    channel=mm,
                    era=era)

    # b tagging
    # btag_eff_variations = create_systematic_variations(
    #     "CMS_htt_eff_b_Run2017", "btagEff", DifferentPipeline)
    # mistag_eff_variations = create_systematic_variations(
    #     "CMS_htt_mistag_b_Run2017", "btagMistag", DifferentPipeline)
    # for variation in btag_eff_variations + mistag_eff_variations:
    #     for process_nick in [
    #             "ZTT", "ZL", "ZJ", "W", "TTT", "TTL", "TTJ", "VVT", "VVJ",
    #             "VVL"
    #     ]:
    #         if "et" in [args.gof_channel] + args.channels:
    #             systematics.add_systematic_variation(
    #                 variation=variation,
    #                 process=et_processes[process_nick],
    #                 channel=et,
    #                 era=era)
    #         if "mt" in [args.gof_channel] + args.channels:
    #             systematics.add_systematic_variation(
    #                 variation=variation,
    #                 process=mt_processes[process_nick],
    #                 channel=mt,
    #                 era=era)
    #         if "tt" in [args.gof_channel] + args.channels:
    #             systematics.add_systematic_variation(
    #                 variation=variation,
    #                 process=tt_processes[process_nick],
    #                 channel=tt,
    #                 era=era)
    #     for process_nick in ["ZTT", "ZL", "W", "TTT", "TTL",  "VVL", "VVT"
    #                         ]:
    #         if "em" in [args.gof_channel] + args.channels:
    #             systematics.add_systematic_variation(
    #                 variation=variation,
    #                 process=em_processes[process_nick],
    #                 channel=em,
    #                 era=era)

    # Embedded event specifics
    # Tau energy scale
    tau_es_3prong_variations = create_systematic_variations(
        "CMS_scale_emb_t_3prong_Run2017", "tauEsThreeProng", DifferentPipeline)
    tau_es_1prong_variations = create_systematic_variations(
        "CMS_scale_emb_t_1prong_Run2017", "tauEsOneProng", DifferentPipeline)
    tau_es_1prong1pizero_variations = create_systematic_variations(
        "CMS_scale_emb_t_1prong1pizero_Run2017", "tauEsOneProngOnePiZero",
        DifferentPipeline)
    for variation in tau_es_3prong_variations + tau_es_1prong_variations + tau_es_1prong1pizero_variations:
        for process_nick in ["EMB"]:  #,  "FAKES"]:
            if "mt" in [args.gof_channel] + args.channels:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=mt_processes[process_nick],
                    channel=mt,
                    era=era)

    mt_decayMode_variations = []
    mt_decayMode_variations.append(
        ReplaceWeight(
            "CMS_3ProngEff_Run2017", "decayMode_SF",
            Weight("embeddedDecayModeWeight_effUp_pi0Nom", "decayMode_SF"),
            "Up"))
    mt_decayMode_variations.append(
        ReplaceWeight(
            "CMS_3ProngEff_Run2017", "decayMode_SF",
            Weight("embeddedDecayModeWeight_effDown_pi0Nom", "decayMode_SF"),
            "Down"))
    mt_decayMode_variations.append(
        ReplaceWeight(
            "CMS_1ProngPi0Eff_Run2017", "decayMode_SF",
            Weight("embeddedDecayModeWeight_effNom_pi0Up", "decayMode_SF"),
            "Up"))
    mt_decayMode_variations.append(
        ReplaceWeight(
            "CMS_1ProngPi0Eff_Run2017", "decayMode_SF",
            Weight("embeddedDecayModeWeight_effNom_pi0Down", "decayMode_SF"),
            "Down"))
    for variation in mt_decayMode_variations:
        for process_nick in ["EMB"]:
            if "mt" in [args.gof_channel] + args.channels:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=mt_processes[process_nick],
                    channel=mt,
                    era=era)
    # 10% removed events in ttbar simulation (ttbar -> real tau tau events) will be added/subtracted to ZTT shape to use as systematic
    tttautau_process_mt = Process(
        "TTT", TTTEstimation(era, directory, mt, friend_directory=[]))
    if "mt" in [args.gof_channel] + args.channels:
        for category in mt_categories:
            mt_processes['ZTTpTTTauTauDown'] = Process(
                "ZTTpTTTauTauDown",
                AddHistogramEstimationMethod(
                    "AddHistogram", "nominal", era, directory, mt,
                    [mt_processes["EMB"], tttautau_process_mt], [1.0, -0.1]))
            systematics.add(
                Systematic(category=category,
                           process=mt_processes['ZTTpTTTauTauDown'],
                           analysis="smhtt",
                           era=era,
                           variation=Relabel("CMS_htt_emb_ttbar_Run2017",
                                             "Down"),
                           mass="125"))

            mt_processes['ZTTpTTTauTauUp'] = Process(
                "ZTTpTTTauTauUp",
                AddHistogramEstimationMethod(
                    "AddHistogram", "nominal", era, directory, mt,
                    [mt_processes["EMB"], tttautau_process_mt], [1.0, 0.1]))
            systematics.add(
                Systematic(category=category,
                           process=mt_processes['ZTTpTTTauTauUp'],
                           analysis="smhtt",
                           era=era,
                           variation=Relabel("CMS_htt_emb_ttbar_Run2017",
                                             "Up"),
                           mass="125"))

    # jetfakes
    fake_factor_variations_mt = []
    for systematic_shift in [
            "ff_qcd{ch}_syst_Run2017{shift}",
            "ff_qcd_dm0_njet0{ch}_stat_Run2017{shift}",
            "ff_qcd_dm0_njet1{ch}_stat_Run2017{shift}",
            #"ff_qcd_dm1_njet0{ch}_stat_Run2017{shift}",
            #"ff_qcd_dm1_njet1{ch}_stat_Run2017{shift}",
            "ff_w_syst_Run2017{shift}",
            "ff_w_dm0_njet0{ch}_stat_Run2017{shift}",
            "ff_w_dm0_njet1{ch}_stat_Run2017{shift}",
            #"ff_w_dm1_njet0{ch}_stat_Run2017{shift}",
            #"ff_w_dm1_njet1{ch}_stat_Run2017{shift}",
            "ff_tt_syst_Run2017{shift}",
            "ff_tt_dm0_njet0_stat_Run2017{shift}",
            "ff_tt_dm0_njet1_stat_Run2017{shift}",
            #"ff_tt_dm1_njet0_stat_Run2017{shift}",
            #"ff_tt_dm1_njet1_stat_Run2017{shift}"
    ]:
        for shift_direction in ["Up", "Down"]:
            fake_factor_variations_mt.append(
                ReplaceWeight(
                    "CMS_%s" % (systematic_shift.format(
                        ch='_mt', shift="").replace("_dm0", "")),
                    "fake_factor",
                    Weight(
                        "ff2_{syst}".format(syst=systematic_shift.format(
                            ch="", shift="_%s" %
                            shift_direction.lower()).replace("_Run2017", "")),
                        "fake_factor"), shift_direction))
    # if "mt" in [args.gof_channel] + args.channels:
    #     for variation in fake_factor_variations_mt:
    #         systematics.add_systematic_variation(
    #             variation=variation,
    #             process=mt_processes["FAKES"],
    #             channel=mt,
    #             era=era)

    # Produce histograms
    logger.info("Start producing shapes.")
    systematics.produce()
    logger.info("Done producing shapes.")
Пример #4
0
def main(args):
    # Container for all distributions to be drawn
    logger.info("Set up shape variations.")
    systematics = Systematics(
        "{}_cutbased_shapes_{}.root".format(args.tag,args.discriminator_variable),
        num_threads=args.num_threads,
        skip_systematic_variations=args.skip_systematic_variations)

    # Era selection
    if "2017" in args.era:
        from shape_producer.estimation_methods_2017 import DataEstimation, ZTTEstimation, ZTTEmbeddedEstimation, ZLEstimation, ZJEstimation, TTLEstimation, TTJEstimation, TTTEstimation, VVLEstimation, VVTEstimation, VVJEstimation, WEstimation, ggHEstimation, qqHEstimation, VHEstimation, WHEstimation, ZHEstimation, ttHEstimation, QCDEstimation_ABCD_TT_ISO2, QCDEstimation_SStoOS_MTETEM, NewFakeEstimationLT, NewFakeEstimationTT, HWWEstimation, ggHWWEstimation, qqHWWEstimation, SUSYggHEstimation, SUSYbbHEstimation

        from shape_producer.era import Run2017
        era = Run2017(args.datasets)

    else:
        logger.critical("Era {} is not implemented.".format(args.era))
        raise Exception

    # Channels and processes
    # yapf: disable
    directory = args.directory
    friend_directories = {
        "et" : args.et_friend_directory,
        "mt" : args.mt_friend_directory,
        "tt" : args.tt_friend_directory,
        "em" : args.em_friend_directory,
    }
    ff_friend_directory = args.fake_factor_friend_directory

    channel_dict = {
        "mt" : MTMSSM2017(),
        "et" : ETMSSM2017(),
        "tt" : TTMSSM2017(),
        "em" : EMMSSM2017(),

    }

    susyggH_masses = [100, 110, 120, 130, 140, 180, 200, 250, 300, 350, 400, 450, 600, 700, 800, 900, 1200, 1400, 1500, 1600, 1800, 2000, 2300, 2600, 2900, 3200]
    susybbH_masses = [90, 110, 120, 125, 130, 140, 160, 180, 200, 250, 300, 350, 400, 450, 500, 600, 700, 800, 900, 1000, 1200, 1400, 1800, 2000, 2300, 2600, 3200]

    processes = {
        "mt" : {},
        "et" : {},
        "tt" : {},
        "em" : {},
    }

    for ch in args.channels:

        # common processes
        if args.shape_group == "backgrounds":
            processes[ch]["data"] = Process("data_obs", DataEstimation         (era, directory, channel_dict[ch], friend_directory=friend_directories[ch]))
            processes[ch]["EMB"]  = Process("EMB",      ZTTEmbeddedEstimation  (era, directory, channel_dict[ch], friend_directory=friend_directories[ch]))
            processes[ch]["ZL"]   = Process("ZL",       ZLEstimation           (era, directory, channel_dict[ch], friend_directory=friend_directories[ch]))
            processes[ch]["TTL"]  = Process("TTL",      TTLEstimation          (era, directory, channel_dict[ch], friend_directory=friend_directories[ch]))
            processes[ch]["VVL"]  = Process("VVL",      VVLEstimation          (era, directory, channel_dict[ch], friend_directory=friend_directories[ch]))

            processes[ch]["VH125"]   = Process("VH125",    VHEstimation        (era, directory, channel_dict[ch], friend_directory=friend_directories[ch]))
            processes[ch]["WH125"]   = Process("WH125",    WHEstimation        (era, directory, channel_dict[ch], friend_directory=friend_directories[ch]))
            processes[ch]["ZH125"]   = Process("ZH125",    ZHEstimation        (era, directory, channel_dict[ch], friend_directory=friend_directories[ch]))
            processes[ch]["ttH125"]  = Process("ttH125",   ttHEstimation       (era, directory, channel_dict[ch], friend_directory=friend_directories[ch]))

            processes[ch]["ggHWW125"] = Process("ggHWW125", ggHWWEstimation       (era, directory, channel_dict[ch], friend_directory=friend_directories[ch]))
            processes[ch]["qqHWW125"] = Process("qqHWW125", qqHWWEstimation       (era, directory, channel_dict[ch], friend_directory=friend_directories[ch]))

        # mssm ggH and bbH signals
        if "gg" in args.shape_group:
            for m in susyggH_masses:
                name = args.shape_group + "_" + str(m)
                processes[ch][name] = Process(name, SUSYggHEstimation(era, directory, channel_dict[ch], str(m), args.shape_group.replace("gg",""), friend_directory=friend_directories[ch]))
        if args.shape_group == "bbH":
            for m in susybbH_masses:
                name = "bbH_" + str(m)
                processes[ch][name] = Process(name, SUSYbbHEstimation(era, directory, channel_dict[ch], str(m), friend_directory=friend_directories[ch]))

        if args.shape_group == "sm_signals":
            # stage 0 and stage 1.1 ggh and qqh
            for ggH_htxs in ggHEstimation.htxs_dict:
                processes[ch][ggH_htxs] = Process(ggH_htxs, ggHEstimation(ggH_htxs, era, directory, channel_dict[ch], friend_directory=friend_directories[ch]))
            for qqH_htxs in qqHEstimation.htxs_dict:
                processes[ch][qqH_htxs] = Process(qqH_htxs, qqHEstimation(qqH_htxs, era, directory, channel_dict[ch], friend_directory=friend_directories[ch]))

        # channel-specific processes
        if args.shape_group == "backgrounds":
            if ch in ["mt", "et"]:
                processes[ch]["FAKES"] = Process("jetFakes", NewFakeEstimationLT(era, directory, channel_dict[ch], [processes[ch][process] for process in ["EMB", "ZL", "TTL", "VVL"]], processes[ch]["data"], friend_directory=friend_directories[ch]+[ff_friend_directory]))
            elif ch == "tt":
                processes[ch]["FAKES"] = Process("jetFakes", NewFakeEstimationTT(era, directory, channel_dict[ch], [processes[ch][process] for process in ["EMB", "ZL", "TTL", "VVL"]], processes[ch]["data"], friend_directory=friend_directories[ch]+[ff_friend_directory]))
            elif ch == "em":
                processes[ch]["W"]   = Process("W",   WEstimation(era, directory, channel_dict[ch], friend_directory=friend_directories[ch]))
                processes[ch]["QCD"] = Process("QCD", QCDEstimation_SStoOS_MTETEM(era, directory, channel_dict[ch], [processes[ch][process] for process in ["EMB", "ZL", "W", "VVL", "TTL"]], processes[ch]["data"], extrapolation_factor=1.0, qcd_weight = Weight("em_qcd_extrap_up_Weight","qcd_weight")))

    # Variables and categories
    if sys.version_info.major <= 2 and sys.version_info.minor <= 7 and sys.version_info.micro <= 15:
        binning = yaml.load(open(args.binning))
    else:
        binning = yaml.load(open(args.binning), Loader=yaml.FullLoader)

    # Cut-based analysis shapes
    categories = {
        "mt" : [],
        "et" : [],
        "tt" : [],
        "em" : [],
    }

    for ch in args.channels:
        discriminator = construct_variable(binning, args.discriminator_variable)
        for category in binning["cutbased"][ch]:
            cuts = Cuts(Cut(binning["cutbased"][ch][category], category))
            categories[ch].append(Category(category, channel_dict[ch], cuts, variable=discriminator))
            if category in [ "nobtag", "nobtag_lowmsv"]:
                for subcategory in sorted(binning["stxs_stage1p1_v2"][ch]):
                    stage1p1cuts = copy.deepcopy(cuts)
                    stage1p1cuts.add(Cut(binning["stxs_stage1p1_v2"][ch][subcategory], category + "_" + subcategory))
                    categories[ch].append(Category(category + "_" + subcategory, channel_dict[ch], stage1p1cuts, variable=discriminator))


    # Choice of activated signal processes
    signal_nicks = []

    sm_htt_backgrounds_nicks = ["WH125", "ZH125", "VH125", "ttH125"]
    sm_hww_nicks = ["ggHWW125", "qqHWW125"]
    sm_htt_signals_nicks = [ggH_htxs for ggH_htxs in ggHEstimation.htxs_dict] + [qqH_htxs for qqH_htxs in qqHEstimation.htxs_dict]
    susy_nicks = []
    if "gg" in args.shape_group:
        for m in susyggH_masses:
            susy_nicks.append(args.shape_group + "_" + str(m))
    if args.shape_group == "bbH":
        for m in susybbH_masses:
            susy_nicks.append("bbH_" + str(m))

    if args.shape_group == "backgrounds":
        signal_nicks = sm_htt_backgrounds_nicks + sm_hww_nicks
    elif args.shape_group == "sm_signals":
        signal_nicks = sm_htt_signals_nicks
    else:
        signal_nicks = susy_nicks

    # Nominal histograms
    for ch in args.channels:
        for process, category in product(processes[ch].values(), categories[ch]):
            systematics.add(Systematic(category=category, process=process, analysis="mssmvssm", era=era, variation=Nominal(), mass="125"))

    # Setup shapes variations

    # EMB: 10% removed events in ttbar simulation (ttbar -> real tau tau events) will be added/subtracted to EMB shape to use as systematic. Technical procedure different to usual systematic variations
    if args.shape_group == "backgrounds":
        tttautau_process = {}
        for ch in args.channels:
            tttautau_process[ch] = Process("TTT", TTTEstimation(era, directory, channel_dict[ch], friend_directory=friend_directories[ch]))
            processes[ch]['ZTTpTTTauTauDown'] = Process("ZTTpTTTauTauDown", AddHistogramEstimationMethod("AddHistogram", "nominal", era, directory, channel_dict[ch], [processes[ch]["EMB"], tttautau_process[ch]], [1.0, -0.1]))
            processes[ch]['ZTTpTTTauTauUp'] = Process("ZTTpTTTauTauUp", AddHistogramEstimationMethod("AddHistogram", "nominal", era, directory, channel_dict[ch], [processes[ch]["EMB"], tttautau_process[ch]], [1.0, 0.1]))
            for category in categories[ch]:
                for updownvar in ["Down", "Up"]:
                    systematics.add(Systematic(category=category, process=processes[ch]['ZTTpTTTauTau%s'%updownvar], analysis="smhtt", era=era, variation=Relabel("CMS_htt_emb_ttbar_Run2017", updownvar), mass="125"))

    # Prefiring weights
    prefiring_variations = [
        ReplaceWeight("CMS_prefiring_Run2017", "prefireWeight", Weight("prefiringweightup", "prefireWeight"),"Up"),
        ReplaceWeight("CMS_prefiring_Run2017", "prefireWeight", Weight("prefiringweightdown", "prefireWeight"),"Down"),
    ]

    # Splitted JES shapes
    jet_es_variations = create_systematic_variations("CMS_scale_j_eta0to3_Run2017", "jecUncEta0to3", DifferentPipeline)
    jet_es_variations += create_systematic_variations("CMS_scale_j_eta0to5_Run2017", "jecUncEta0to5", DifferentPipeline)
    jet_es_variations += create_systematic_variations("CMS_scale_j_eta3to5_Run2017", "jecUncEta3to5", DifferentPipeline)
    jet_es_variations += create_systematic_variations("CMS_scale_j_RelativeBal_Run2017", "jecUncRelativeBal", DifferentPipeline)
    jet_es_variations += create_systematic_variations("CMS_scale_j_RelativeSample_Run2017", "jecUncRelativeSample",DifferentPipeline)

    # B-tagging
    btag_eff_variations = create_systematic_variations("CMS_htt_eff_b_Run2017", "btagEff", DifferentPipeline)
    mistag_eff_variations = create_systematic_variations("CMS_htt_mistag_b_Run2017", "btagMistag", DifferentPipeline)

    ## Variations common for all groups (most of the mc-related systematics)
    common_mc_variations = prefiring_variations + btag_eff_variations + mistag_eff_variations + jet_es_variations

    # MET energy scale. Note: only those variations for non-resonant processes are used in the stat. inference
    met_unclustered_variations = create_systematic_variations("CMS_scale_met_unclustered", "metUnclusteredEn", DifferentPipeline)

    # Recoil correction unc, for resonant processes
    recoil_variations = create_systematic_variations( "CMS_htt_boson_reso_met_Run2017", "metRecoilResolution", DifferentPipeline)
    recoil_variations += create_systematic_variations( "CMS_htt_boson_scale_met_Run2017", "metRecoilResponse", DifferentPipeline)

    # Tau energy scale (general, MC-specific & EMB-specific), it is mt, et & tt specific
    tau_es_variations = {}

    for unctype in ["", "_mc", "_emb"]:
        tau_es_variations[unctype] = create_systematic_variations("CMS_scale%s_t_3prong_Run2017"%unctype, "tauEsThreeProng", DifferentPipeline)
        tau_es_variations[unctype] += create_systematic_variations("CMS_scale%s_t_1prong_Run2017"%unctype, "tauEsOneProng", DifferentPipeline)
        tau_es_variations[unctype] += create_systematic_variations("CMS_scale%s_t_1prong1pizero_Run2017"%unctype, "tauEsOneProngOnePiZero", DifferentPipeline)

    # Ele energy scale & smear uncertainties (MC-specific), it is et & em specific
    ele_es_variations = create_systematic_variations("CMS_scale_mc_e", "eleScale", DifferentPipeline)
    ele_es_variations += create_systematic_variations("CMS_reso_mc_e", "eleSmear", DifferentPipeline)

    # Z pt reweighting
    zpt_variations = create_systematic_variations("CMS_htt_dyShape_Run2017", "zPtReweightWeight", SquareAndRemoveWeight)

    # top pt reweighting
    top_pt_variations = create_systematic_variations("CMS_htt_ttbarShape", "topPtReweightWeight", SquareAndRemoveWeight)

    # Ele energy scale (EMB-specific),  it is et & em specific
    ele_es_emb_variations = create_systematic_variations("CMS_scale_emb_e", "eleEs", DifferentPipeline)

    # EMB charged track correction uncertainty (DM-dependent)
    decayMode_variations = []
    decayMode_variations.append(ReplaceWeight("CMS_3ProngEff_Run2017", "decayMode_SF", Weight("embeddedDecayModeWeight_effUp_pi0Nom", "decayMode_SF"), "Up"))
    decayMode_variations.append(ReplaceWeight("CMS_3ProngEff_Run2017", "decayMode_SF", Weight("embeddedDecayModeWeight_effDown_pi0Nom", "decayMode_SF"), "Down"))
    decayMode_variations.append(ReplaceWeight("CMS_1ProngPi0Eff_Run2017", "decayMode_SF", Weight("embeddedDecayModeWeight_effNom_pi0Up", "decayMode_SF"), "Up"))
    decayMode_variations.append(ReplaceWeight("CMS_1ProngPi0Eff_Run2017", "decayMode_SF", Weight("embeddedDecayModeWeight_effNom_pi0Down", "decayMode_SF"), "Down"))

    # QCD for em
    qcd_variations = []
    qcd_variations.append(ReplaceWeight("CMS_htt_qcd_0jet_rate_Run2017", "qcd_weight", Weight("em_qcd_osss_0jet_rateup_Weight*em_qcd_extrap_uncert_Weight", "qcd_weight"), "Up"))
    qcd_variations.append(ReplaceWeight("CMS_htt_qcd_0jet_rate_Run2017", "qcd_weight", Weight("em_qcd_osss_0jet_ratedown_Weight*em_qcd_extrap_uncert_Weight", "qcd_weight"), "Down"))
    qcd_variations.append(ReplaceWeight("CMS_htt_qcd_0jet_shape_Run2017", "qcd_weight", Weight("em_qcd_osss_0jet_shapeup_Weight*em_qcd_extrap_uncert_Weight", "qcd_weight"), "Up"))
    qcd_variations.append(ReplaceWeight("CMS_htt_qcd_0jet_shape_Run2017", "qcd_weight", Weight("em_qcd_osss_0jet_shapedown_Weight*em_qcd_extrap_uncert_Weight", "qcd_weight"), "Down"))
    qcd_variations.append(ReplaceWeight("CMS_htt_qcd_1jet_shape_Run2017", "qcd_weight", Weight("em_qcd_osss_1jet_shapeup_Weight*em_qcd_extrap_uncert_Weight", "qcd_weight"), "Up"))
    qcd_variations.append(ReplaceWeight("CMS_htt_qcd_1jet_shape_Run2017", "qcd_weight", Weight("em_qcd_osss_1jet_shapedown_Weight*em_qcd_extrap_uncert_Weight", "qcd_weight"), "Down"))
    qcd_variations.append(ReplaceWeight("CMS_htt_qcd_iso_Run2017", "qcd_weight", Weight("em_qcd_extrap_up_Weight*em_qcd_extrap_uncert_Weight", "qcd_weight"), "Up"))
    qcd_variations.append(ReplaceWeight("CMS_htt_qcd_iso_Run2017", "qcd_weight", Weight("em_qcd_osss_binned_Weight", "qcd_weight"), "Down"))
    qcd_variations.append(ReplaceWeight("CMS_htt_qcd_iso", "qcd_weight", Weight("em_qcd_extrap_up_Weight*em_qcd_extrap_uncert_Weight", "qcd_weight"), "Up"))
    qcd_variations.append(ReplaceWeight("CMS_htt_qcd_iso", "qcd_weight", Weight("em_qcd_osss_binned_Weight", "qcd_weight"), "Down"))

    # Gluon-fusion WG1 uncertainty scheme
    ggh_variations = []
    for unc in [
            "THU_ggH_Mig01", "THU_ggH_Mig12", "THU_ggH_Mu", "THU_ggH_PT120",
            "THU_ggH_PT60", "THU_ggH_Res", "THU_ggH_VBF2j", "THU_ggH_VBF3j",
            "THU_ggH_qmtop"
    ]:
        ggh_variations.append(AddWeight(unc, "{}_weight".format(unc), Weight("({})".format(unc), "{}_weight".format(unc)), "Up"))
        ggh_variations.append(AddWeight(unc, "{}_weight".format(unc), Weight("(1.0/{})".format(unc), "{}_weight".format(unc)), "Down"))

    # ZL fakes energy scale
    fakelep_dict = {"et" : "Ele", "mt" : "Mu"}
    lep_fake_es_variations = {}
    for ch in ["mt", "et"]:
        lep_fake_es_variations[ch] = create_systematic_variations("CMS_ZLShape_%s_1prong_Run2017"%ch, "tau%sFakeEsOneProng"%fakelep_dict[ch], DifferentPipeline)
        lep_fake_es_variations[ch] += create_systematic_variations("CMS_ZLShape_%s_1prong1pizero_Run2017"%ch, "tau%sFakeEsOneProngPiZeros"%fakelep_dict[ch], DifferentPipeline)

    # Lepton trigger efficiency; the same values for (MC & EMB) and (mt & et)
    lep_trigger_eff_variations = {}
    for ch in ["mt", "et"]:
        lep_trigger_eff_variations[ch] = {}
        for unctype in ["", "_emb"]:
            lep_trigger_eff_variations[ch][unctype] = []
            lep_trigger_eff_variations[ch][unctype].append(AddWeight("CMS_eff_trigger%s_%s_Run2017"%(unctype, ch), "trg_%s_eff_weight"%ch, Weight("(1.0*(pt_1<=25)+1.02*(pt_1>25))", "trg_%s_eff_weight"%ch), "Up"))
            lep_trigger_eff_variations[ch][unctype].append(AddWeight("CMS_eff_trigger%s_%s_Run2017"%(unctype, ch), "trg_%s_eff_weight"%ch, Weight("(1.0*(pt_1<=25)+0.98*(pt_1>25))", "trg_%s_eff_weight"%ch), "Down"))
            lep_trigger_eff_variations[ch][unctype].append(AddWeight("CMS_eff_xtrigger%s_%s_Run2017"%(unctype, ch), "xtrg_%s_eff_weight"%ch, Weight("(1.054*(pt_1<=25)+1.0*(pt_1>25))", "xtrg_%s_eff_weight"%ch), "Up"))
            lep_trigger_eff_variations[ch][unctype].append(AddWeight("CMS_eff_xtrigger%s_%s_Run2017"%(unctype, ch), "xtrg_%s_eff_weight"%ch, Weight("(0.946*(pt_1<=25)+1.0*(pt_1>25))", "xtrg_%s_eff_weight"%ch), "Down"))

    # jetfakes
    fake_factor_variations = {}
    for ch in ["mt", "et", "tt"]:
        fake_factor_variations[ch] = []
        if ch in ["mt", "et"]:
            for systematic_shift in [
                    "ff_qcd{ch}_syst_Run2017{shift}",
                    "ff_qcd_dm0_njet0{ch}_stat_Run2017{shift}",
                    "ff_qcd_dm0_njet1{ch}_stat_Run2017{shift}",
                    "ff_w_syst_Run2017{shift}",
                    "ff_w_dm0_njet0{ch}_stat_Run2017{shift}",
                    "ff_w_dm0_njet1{ch}_stat_Run2017{shift}",
                    "ff_tt_syst_Run2017{shift}",
                    "ff_tt_dm0_njet0_stat_Run2017{shift}",
                    "ff_tt_dm0_njet1_stat_Run2017{shift}",
            ]:
                for shift_direction in ["Up", "Down"]:
                    fake_factor_variations[ch].append(ReplaceWeight("CMS_%s"%(systematic_shift.format(ch="_"+ch, shift="").replace("_dm0", "")), "fake_factor", Weight("ff2_{syst}".format(syst=systematic_shift.format(ch="", shift="_%s" %shift_direction.lower()).replace("_Run2017", "")), "fake_factor"), shift_direction))
        elif ch == "tt":
            for systematic_shift in [
                    "ff_qcd{ch}_syst_Run2017{shift}",
                    "ff_qcd_dm0_njet0{ch}_stat_Run2017{shift}",
                    "ff_qcd_dm0_njet1{ch}_stat_Run2017{shift}",
                    "ff_w{ch}_syst_Run2017{shift}", "ff_tt{ch}_syst_Run2017{shift}",
                    "ff_w_frac{ch}_syst_Run2017{shift}",
                    "ff_tt_frac{ch}_syst_Run2017{shift}"
            ]:
                for shift_direction in ["Up", "Down"]:
                    fake_factor_variations[ch].append(ReplaceWeight("CMS_%s" % (systematic_shift.format(ch="_"+ch, shift="").replace("_dm0", "")), "fake_factor", Weight("(0.5*ff1_{syst}*(byTightIsolationMVArun2017v2DBoldDMwLT2017_1<0.5)+0.5*ff2_{syst}*(byTightIsolationMVArun2017v2DBoldDMwLT2017_2<0.5))".format(syst=systematic_shift.format(ch="", shift="_%s" % shift_direction.lower()).replace("_Run2017", "")), "fake_factor"), shift_direction))

    ## Group nicks
    mc_nicks = ["ZL", "TTL", "VVL"] + signal_nicks # to be extended with 'W' in em
    boson_mc_nicks = ["ZL"]         + signal_nicks # to be extended with 'W' in em

    ## Add variations to systematics
    for ch in args.channels:

        channel_mc_nicks = mc_nicks + ["W"] if ch == "em" else mc_nicks
        channel_boson_mc_nicks = boson_mc_nicks + ["W"] if ch == "em" else boson_mc_nicks
        if args.shape_group != "backgrounds":
            channel_mc_nicks = signal_nicks
            channel_boson_mc_nicks = signal_nicks
    
        channel_mc_common_variations = common_mc_variations
        if ch in ["et", "em"]:
            channel_mc_common_variations += ele_es_variations
        if ch in ["et", "mt", "tt"]:
            channel_mc_common_variations += tau_es_variations[""] + tau_es_variations["_mc"]
        if ch in ["et", "mt"]:
            channel_mc_common_variations += lep_trigger_eff_variations[ch][""]

        # variations common accross all shape groups
        for variation in channel_mc_common_variations:
            for process_nick in channel_mc_nicks:
                systematics.add_systematic_variation(variation=variation, process=processes[ch][process_nick], channel=channel_dict[ch], era=era)

        for variation in recoil_variations:
            for process_nick in channel_boson_mc_nicks:
                systematics.add_systematic_variation(variation=variation, process=processes[ch][process_nick], channel=channel_dict[ch], era=era)

        # variations relevant for ggH signals in 'sm_signals' shape group
        if args.shape_group == "sm_signals":
            for variation in ggh_variations:
                for process_nick in [nick for nick in signal_nicks if "ggH" in nick and "HWW" not in nick and "ggH_" not in nick]:
                    systematics.add_systematic_variation(variation=variation, process=processes[ch][process_nick], channel=channel_dict[ch], era=era)

        # variations only relevant for the 'background' shape group
        if args.shape_group == "backgrounds":
            for variation in top_pt_variations:
                systematics.add_systematic_variation(variation=variation, process=processes[ch]["TTL"], channel=channel_dict[ch], era=era)

            for variation in met_unclustered_variations:
                    for process_nick in ["TTL", "VVL"]:
                        systematics.add_systematic_variation(variation=variation, process=processes[ch][process_nick], channel=channel_dict[ch], era=era)

            zl_variations = zpt_variations
            if ch in ["et", "mt"]:
                zl_variations += lep_fake_es_variations[ch]
            for variation in zl_variations:
                systematics.add_systematic_variation(variation=variation, process=processes[ch]["ZL"], channel=channel_dict[ch], era=era)

            if ch == "em":
                for variation in qcd_variations:
                    systematics.add_systematic_variation(variation=variation ,process=processes[ch]["QCD"], channel=channel_dict[ch], era=era)

            if ch in ["mt","et", "tt"]:
                ff_variations = fake_factor_variations[ch] + tau_es_variations[""] + tau_es_variations["_mc"] + tau_es_variations["_emb"]
                for variation in ff_variations:
                    systematics.add_systematic_variation(variation=variation, process=processes[ch]["FAKES"], channel=channel_dict[ch], era=era)

            emb_variations = []
            if ch in ["mt","et", "tt"]:
                emb_variations += tau_es_variations[""] + tau_es_variations["_emb"] + decayMode_variations
            if ch in ["mt", "et"]:
                emb_variations += lep_trigger_eff_variations[ch]["_emb"]
            if ch in ["et", "em"]:
                emb_variations += ele_es_emb_variations
            for variation in emb_variations:
                systematics.add_systematic_variation(variation=variation, process=processes[ch]["EMB"], channel=channel_dict[ch], era=era)

    # Produce histograms
    logger.info("Start producing shapes.")
    systematics.produce()
    logger.info("Done producing shapes.")
def main(args):
    # Container for all distributions to be drawn
    logger.info("Set up shape variations.")
    systematics = Systematics(
        "fake-factor-application/{}_ff_yields.root".format(args.tag),
        num_threads=args.num_threads)

    # Era selection
    if "2017" in args.era:
        from shape_producer.estimation_methods_2017 import DataEstimation, ZTTEstimation, ZTTEmbeddedEstimation, ZLEstimation, ZJEstimation, TTLEstimation, TTJEstimation, TTTEstimation, VVLEstimation, VVTEstimation, VVJEstimation, WEstimation
        from shape_producer.era import Run2017
        era = Run2017(args.datasets)
    else:
        logger.critical("Era {} is not implemented.".format(args.era))
        raise Exception

    # Channels and processes
    # yapf: disable
    directory = args.directory
    et_friend_directory = args.et_friend_directory
    mt_friend_directory = args.mt_friend_directory
    tt_friend_directory = args.tt_friend_directory

    mt = MTSM2017()
    mt.cuts.remove("tau_iso")
    mt.cuts.add(Cut("(byTightIsolationMVArun2017v2DBoldDMwLT2017_2<0.5&&byVLooseIsolationMVArun2017v2DBoldDMwLT2017_2>0.5)", "tau_anti_iso"))
    mt_processes = {
        "data"  : Process("data_obs", DataEstimation      (era, directory, mt, friend_directory=mt_friend_directory)),
        "EMB"   : Process("EMB",      ZTTEmbeddedEstimation(era, directory, mt, friend_directory=mt_friend_directory)),
        "ZTT"   : Process("ZTT",      ZTTEstimation       (era, directory, mt, friend_directory=mt_friend_directory)),
        "ZJ"    : Process("ZJ",       ZJEstimation        (era, directory, mt, friend_directory=mt_friend_directory)),
        "ZL"    : Process("ZL",       ZLEstimation        (era, directory, mt, friend_directory=mt_friend_directory)),
        "TTT"   : Process("TTT",      TTTEstimation       (era, directory, mt, friend_directory=mt_friend_directory)),
        "TTJ"   : Process("TTJ",      TTJEstimation       (era, directory, mt, friend_directory=mt_friend_directory)),
        "TTL"   : Process("TTL",      TTLEstimation       (era, directory, mt, friend_directory=mt_friend_directory)),
        "VVT"   : Process("VVT",      VVTEstimation       (era, directory, mt, friend_directory=mt_friend_directory)),
        "VVJ"   : Process("VVJ",      VVJEstimation       (era, directory, mt, friend_directory=mt_friend_directory)),
        "VVL"   : Process("VVL",      VVLEstimation       (era, directory, mt, friend_directory=mt_friend_directory)),
        "W"     : Process("W",        WEstimation         (era, directory, mt, friend_directory=mt_friend_directory))
        }

    et = ETSM2017()
    et.cuts.remove("tau_iso")
    et.cuts.add(Cut("(byTightIsolationMVArun2017v2DBoldDMwLT2017_2<0.5&&byVLooseIsolationMVArun2017v2DBoldDMwLT2017_2>0.5)", "tau_anti_iso"))
    et_processes = {
        "data"  : Process("data_obs", DataEstimation      (era, directory, et, friend_directory=et_friend_directory)),
        "EMB"   : Process("EMB",      ZTTEmbeddedEstimation(era, directory, et, friend_directory=et_friend_directory)),
        "ZTT"   : Process("ZTT",      ZTTEstimation       (era, directory, et, friend_directory=et_friend_directory)),
        "ZJ"    : Process("ZJ",       ZJEstimation        (era, directory, et, friend_directory=et_friend_directory)),
        "ZL"    : Process("ZL",       ZLEstimation        (era, directory, et, friend_directory=et_friend_directory)),
        "TTT"   : Process("TTT",      TTTEstimation       (era, directory, et, friend_directory=et_friend_directory)),
        "TTJ"   : Process("TTJ",      TTJEstimation       (era, directory, et, friend_directory=et_friend_directory)),
        "TTL"   : Process("TTL",      TTLEstimation       (era, directory, et, friend_directory=et_friend_directory)),
        "VVT"   : Process("VVT",      VVTEstimation       (era, directory, et, friend_directory=et_friend_directory)),
        "VVJ"   : Process("VVJ",      VVJEstimation       (era, directory, et, friend_directory=et_friend_directory)),
        "VVL"   : Process("VVL",      VVLEstimation       (era, directory, et, friend_directory=et_friend_directory)),
        "W"     : Process("W",        WEstimation         (era, directory, et, friend_directory=et_friend_directory))
        }

    #in tt two 'channels' are needed: antiisolated region for each tau respectively
    tt1 = TTSM2017()
    tt1.cuts.remove("tau_1_iso")
    tt1.cuts.add(Cut("(byTightIsolationMVArun2017v2DBoldDMwLT2017_1<0.5&&byVLooseIsolationMVArun2017v2DBoldDMwLT2017_1>0.5)", "tau_1_anti_iso"))
    tt1_processes = {
        "data"  : Process("data_obs", DataEstimation   (era, directory, tt1, friend_directory=tt_friend_directory)),
        "EMB"   : Process("EMB",      ZTTEmbeddedEstimation(era, directory, tt1, friend_directory=tt_friend_directory)),
        "ZTT"   : Process("ZTT",      ZTTEstimation   (era, directory, tt1, friend_directory=tt_friend_directory)),
        "ZJ"    : Process("ZJ",       ZJEstimation        (era, directory, tt1, friend_directory=tt_friend_directory)),
        "ZL"    : Process("ZL",       ZLEstimation        (era, directory, tt1, friend_directory=tt_friend_directory)),
        "TTT"   : Process("TTT",      TTTEstimation   (era, directory, tt1, friend_directory=tt_friend_directory)),
        "TTJ"   : Process("TTJ",      TTJEstimation   (era, directory, tt1, friend_directory=tt_friend_directory)),
        "TTL"   : Process("TTL",      TTLEstimation   (era, directory, tt1, friend_directory=tt_friend_directory)),
        "VVT"   : Process("VVT",      VVTEstimation   (era, directory, tt1, friend_directory=tt_friend_directory)),
        "VVJ"   : Process("VVJ",      VVJEstimation   (era, directory, tt1, friend_directory=tt_friend_directory)),
        "VVL"   : Process("VVL",      VVLEstimation   (era, directory, tt1, friend_directory=tt_friend_directory)),
        "W"     : Process("W",        WEstimation         (era, directory, tt1, friend_directory=tt_friend_directory))
        }
    tt2 = TTSM2017()
    tt2.cuts.remove("tau_2_iso")
    tt2.cuts.add(Cut("(byTightIsolationMVArun2017v2DBoldDMwLT2017_2<0.5&&byVLooseIsolationMVArun2017v2DBoldDMwLT2017_2>0.5)", "tau_2_anti_iso"))
    tt2_processes = {
        "data"  : Process("data_obs", DataEstimation      (era, directory, tt2, friend_directory=tt_friend_directory)),
        "EMB"   : Process("EMB",      ZTTEmbeddedEstimation(era, directory, tt2, friend_directory=tt_friend_directory)),
        "ZTT"   : Process("ZTT",      ZTTEstimation       (era, directory, tt2, friend_directory=tt_friend_directory)),
        "ZJ"    : Process("ZJ",       ZJEstimation        (era, directory, tt2, friend_directory=tt_friend_directory)),
        "ZL"    : Process("ZL",       ZLEstimation        (era, directory, tt2, friend_directory=tt_friend_directory)),
        "TTT"   : Process("TTT",      TTTEstimation       (era, directory, tt2, friend_directory=tt_friend_directory)),
        "TTJ"   : Process("TTJ",      TTJEstimation       (era, directory, tt2, friend_directory=tt_friend_directory)),
        "TTL"   : Process("TTL",      TTLEstimation       (era, directory, tt2, friend_directory=tt_friend_directory)),
        "VVT"   : Process("VVT",      VVTEstimation       (era, directory, tt2, friend_directory=tt_friend_directory)),
        "VVJ"   : Process("VVJ",      VVJEstimation       (era, directory, tt2, friend_directory=tt_friend_directory)),
        "VVL"   : Process("VVL",      VVLEstimation       (era, directory, tt2, friend_directory=tt_friend_directory)),
        "W"     : Process("W",        WEstimation         (era, directory, tt2, friend_directory=tt_friend_directory))
        }

    # Variables and categories
    config = yaml.load(open("fake-factor-application/config.yaml"))
    if not args.config in config.keys():
        logger.critical("Requested config key %s not available in fake-factor-application/config.yaml!" % args.config)
        raise Exception
    config = config[args.config]

    et_categories = []
    # et
    et_categories.append(
        Category(
            "inclusive",
            et,
            Cuts(),
            variable=Variable(args.config, VariableBinning(config["et"]["binning"]), config["et"]["expression"])))
    if not args.only_inclusive:
        for i, label in enumerate(["ggh", "qqh", "ztt", "zll", "w", "tt", "ss", "misc"]):
            et_categories.append(
                Category(
                    label,
                    et,
                    Cuts(
                        Cut("et_max_index=={index}".format(index=i), "exclusive_score")),
                    variable=Variable(args.config, VariableBinning(config["et"]["binning"]), config["et"]["expression"])))
    mt_categories = []
    # mt
    mt_categories.append(
        Category(
            "inclusive",
            mt,
            Cuts(),
            variable=Variable(args.config, VariableBinning(config["mt"]["binning"]), config["mt"]["expression"])))
    if not args.only_inclusive:
        for i, label in enumerate(["ggh", "qqh", "ztt", "zll", "w", "tt", "ss", "misc"]):
            mt_categories.append(
                Category(
                    label,
                    mt,
                    Cuts(
                        Cut("mt_max_index=={index}".format(index=i), "exclusive_score")),
                    variable=Variable(args.config, VariableBinning(config["mt"]["binning"]), config["mt"]["expression"])))
    tt1_categories = []
    tt2_categories = []
    # tt
    tt1_categories.append(
        Category(
            "tt1_inclusive",
            tt1,
            Cuts(),
            variable=Variable(args.config, VariableBinning(config["tt"]["binning"]), config["tt"]["expression"])))
    tt2_categories.append(
        Category(
            "tt2_inclusive",
            tt2,
            Cuts(),
            variable=Variable(args.config, VariableBinning(config["tt"]["binning"]), config["tt"]["expression"])))
    if not args.only_inclusive:
        for i, label in enumerate(["ggh", "qqh", "ztt", "noniso", "misc"]):
            tt1_categories.append(
                Category(
                    "tt1_"+label,
                    tt1,
                    Cuts(
                        Cut("tt_max_index=={index}".format(index=i), "exclusive_score")),
                    variable=Variable(args.config, VariableBinning(config["tt"]["binning"]), config["tt"]["expression"])))
            tt2_categories.append(
                Category(
                    "tt2_"+label,
                    tt2,
                    Cuts(
                        Cut("tt_max_index=={index}".format(index=i), "exclusive_score")),
                    variable=Variable(args.config, VariableBinning(config["tt"]["binning"]), config["tt"]["expression"])))

    # Nominal histograms
    # yapf: enable
    for process, category in product(et_processes.values(), et_categories):
        systematics.add(
            Systematic(category=category,
                       process=process,
                       analysis="smhtt",
                       era=era,
                       variation=Nominal(),
                       mass="125"))

    for process, category in product(mt_processes.values(), mt_categories):
        systematics.add(
            Systematic(category=category,
                       process=process,
                       analysis="smhtt",
                       era=era,
                       variation=Nominal(),
                       mass="125"))

    for process, category in product(tt1_processes.values(), tt1_categories):
        systematics.add(
            Systematic(category=category,
                       process=process,
                       analysis="smhtt",
                       era=era,
                       variation=Nominal(),
                       mass="125"))

    for process, category in product(tt2_processes.values(), tt2_categories):
        systematics.add(
            Systematic(category=category,
                       process=process,
                       analysis="smhtt",
                       era=era,
                       variation=Nominal(),
                       mass="125"))

    # Produce histograms
    logger.info("Start producing shapes.")
    systematics.produce()
    logger.info("Done producing shapes.")
Пример #6
0
    def __init__(self, eraName, channelName, bkgName):
        self.meta = {"era": eraName, "channel": channelName, "bkg": bkgName}
        self.era = eraD[eraName]
        self.channel = copy.deepcopy(channelDict[eraName][channelName])

        if self.meta["era"] not in ["2016", "2017"]: raise Exception

        ### remove old isolation cuts and add the very Loose isolation, that is needed to to exclude other backgrounds from all regions
        # for cut_ in self.channel.cuts.names:
        #    self.channel.cuts.remove(cut_)
        # for cut_ in cutDB(channelName,"baseline", eraName="2017"):
        #    self.channel.cuts.add(cut_)
        if self.meta["channel"] in ["et", "mt"]:
            self.channel.cuts.remove("tau_iso")
            self.channel.cuts.remove("os")
            ##switch to deeptauID
            self.channel.cuts.get("againstMuonDiscriminator"
                                  ).variable = "byTightDeepTau2017v2p1VSmu_2"
            self.channel.cuts.get("againstElectronDiscriminator"
                                  ).variable = "byVLooseDeepTau2017v2p1VSe_2"
            self.channel.cuts.remove("trg_selection")
            self.channel.cuts.add(
                Cut(
                    "(pt_2>30 && ((trg_singlemuon == 1) || (trg_mutaucross == 1)))",
                    "trg_selection"))
            #self.channel.cuts.add(Cut("byVLooseIsolationMVArun2017v2DBoldDMwLT2017_2>.5","VLooseTauIso"))
            self.channel.cuts.add(
                Cut("byVLooseDeepTau2017v2p1VSjet_2>.5", "VLooseTauIso"))

        if self.meta["channel"] == "tt":
            self.channel.cuts.remove("tau_1_iso")
            self.channel.cuts.remove("tau_2_iso")
            self.channel.cuts.remove("os")
            ##switch to deeptauid
            raise Exception
            # self.channel.cuts.add(Cut("byVLooseIsolationMVArun2017v2DBoldDMwLT2017_1>.5 &&byVLooseIsolationMVArun2017v2DBoldDMwLT2017_2>.5","VLooseTauIso"))
            self.channel.cuts.add(
                Cut(
                    "byVLooseDeepTau2017v2p1VSjet_1>.5 &&byVLooseDeepTau2017v2p1VSjet_2>.5",
                    "VLooseTauIso"))

        # ###OldFFWeights
        # if self.meta["channel"] in ["et","mt"]:
        #     self.fakeWeightstring="ff2_nom"
        # elif self.meta["channel"]=="tt":
        #     self.fakeWeightstring="(0.5*ffcutDB(channelName,cutName,bkgName,signalLikeRegion, eraName="2017")1_nom*(byTightIsolationMVArun2017v2DBoldDMwLT2017_1<0.5)+0.5*ff2_nom*(byTightIsolationMVArun2017v2DBoldDMwLT2017_2<0.5))"

        self.CutString = self.channel.cuts.expand()
        logger.debug(self.CutString)
        self.estimation = DataEstimation(
            era=eraD[eraName],
            directory="/ceph/htautau/" + eraName + "/ntuples/",
            friend_directory=[
                "/ceph/htautau/" + eraName + "/" + ft
                for ft in ["ff_friends/", "mela_friends/", "svfit_friends/"]
            ],
            channel=self.channel)
        self.createRDF()
        if self.meta["bkg"] == "ttbar":
            self.createttbarRDF()
        self.createClosureRDF()
        self.SR = ParSpaceCrop(self.channel,
                               self.meta,
                               self.RDF,
                               signalLikeRegion=True,
                               determinationRegion=False)

        self.AR = ParSpaceCrop(self.channel,
                               self.meta,
                               self.RDF,
                               signalLikeRegion=False,
                               determinationRegion=False)

        ## Define the Background and Signal like Determination Region
        if self.meta["bkg"] != "ttbar":
            self.DR_sl = ParSpaceCrop(self.channel,
                                      self.meta,
                                      self.RDF,
                                      signalLikeRegion=True,
                                      determinationRegion=True)
            self.DR_bl = ParSpaceCrop(self.channel,
                                      self.meta,
                                      self.RDF,
                                      signalLikeRegion=False,
                                      determinationRegion=True)
        else:
            self.DR_sl = ParSpaceCrop(self.channel,
                                      self.meta,
                                      self.ttbarRDF,
                                      signalLikeRegion=True,
                                      determinationRegion=True)
            self.DR_bl = ParSpaceCrop(self.channel,
                                      self.meta,
                                      self.ttbarRDF,
                                      signalLikeRegion=False,
                                      determinationRegion=True)

        ## Define RDFs for the closure correction
        self.Closure_sl = ParSpaceCrop(self.channel,
                                       self.meta,
                                       self.ClosureRDF,
                                       signalLikeRegion=True,
                                       determinationRegion=True)
        self.Closure_bl = ParSpaceCrop(self.channel,
                                       self.meta,
                                       self.ClosureRDF,
                                       signalLikeRegion=False,
                                       determinationRegion=True)
Пример #7
0
class ParSpaceRegion(object):
    def __init__(self, eraName, channelName, bkgName):
        self.meta = {"era": eraName, "channel": channelName, "bkg": bkgName}
        self.era = eraD[eraName]
        self.channel = copy.deepcopy(channelDict[eraName][channelName])

        if self.meta["era"] not in ["2016", "2017"]: raise Exception

        ### remove old isolation cuts and add the very Loose isolation, that is needed to to exclude other backgrounds from all regions
        # for cut_ in self.channel.cuts.names:
        #    self.channel.cuts.remove(cut_)
        # for cut_ in cutDB(channelName,"baseline", eraName="2017"):
        #    self.channel.cuts.add(cut_)
        if self.meta["channel"] in ["et", "mt"]:
            self.channel.cuts.remove("tau_iso")
            self.channel.cuts.remove("os")
            ##switch to deeptauID
            self.channel.cuts.get("againstMuonDiscriminator"
                                  ).variable = "byTightDeepTau2017v2p1VSmu_2"
            self.channel.cuts.get("againstElectronDiscriminator"
                                  ).variable = "byVLooseDeepTau2017v2p1VSe_2"
            self.channel.cuts.remove("trg_selection")
            self.channel.cuts.add(
                Cut(
                    "(pt_2>30 && ((trg_singlemuon == 1) || (trg_mutaucross == 1)))",
                    "trg_selection"))
            #self.channel.cuts.add(Cut("byVLooseIsolationMVArun2017v2DBoldDMwLT2017_2>.5","VLooseTauIso"))
            self.channel.cuts.add(
                Cut("byVLooseDeepTau2017v2p1VSjet_2>.5", "VLooseTauIso"))

        if self.meta["channel"] == "tt":
            self.channel.cuts.remove("tau_1_iso")
            self.channel.cuts.remove("tau_2_iso")
            self.channel.cuts.remove("os")
            ##switch to deeptauid
            raise Exception
            # self.channel.cuts.add(Cut("byVLooseIsolationMVArun2017v2DBoldDMwLT2017_1>.5 &&byVLooseIsolationMVArun2017v2DBoldDMwLT2017_2>.5","VLooseTauIso"))
            self.channel.cuts.add(
                Cut(
                    "byVLooseDeepTau2017v2p1VSjet_1>.5 &&byVLooseDeepTau2017v2p1VSjet_2>.5",
                    "VLooseTauIso"))

        # ###OldFFWeights
        # if self.meta["channel"] in ["et","mt"]:
        #     self.fakeWeightstring="ff2_nom"
        # elif self.meta["channel"]=="tt":
        #     self.fakeWeightstring="(0.5*ffcutDB(channelName,cutName,bkgName,signalLikeRegion, eraName="2017")1_nom*(byTightIsolationMVArun2017v2DBoldDMwLT2017_1<0.5)+0.5*ff2_nom*(byTightIsolationMVArun2017v2DBoldDMwLT2017_2<0.5))"

        self.CutString = self.channel.cuts.expand()
        logger.debug(self.CutString)
        self.estimation = DataEstimation(
            era=eraD[eraName],
            directory="/ceph/htautau/" + eraName + "/ntuples/",
            friend_directory=[
                "/ceph/htautau/" + eraName + "/" + ft
                for ft in ["ff_friends/", "mela_friends/", "svfit_friends/"]
            ],
            channel=self.channel)
        self.createRDF()
        if self.meta["bkg"] == "ttbar":
            self.createttbarRDF()
        self.createClosureRDF()
        self.SR = ParSpaceCrop(self.channel,
                               self.meta,
                               self.RDF,
                               signalLikeRegion=True,
                               determinationRegion=False)

        self.AR = ParSpaceCrop(self.channel,
                               self.meta,
                               self.RDF,
                               signalLikeRegion=False,
                               determinationRegion=False)

        ## Define the Background and Signal like Determination Region
        if self.meta["bkg"] != "ttbar":
            self.DR_sl = ParSpaceCrop(self.channel,
                                      self.meta,
                                      self.RDF,
                                      signalLikeRegion=True,
                                      determinationRegion=True)
            self.DR_bl = ParSpaceCrop(self.channel,
                                      self.meta,
                                      self.RDF,
                                      signalLikeRegion=False,
                                      determinationRegion=True)
        else:
            self.DR_sl = ParSpaceCrop(self.channel,
                                      self.meta,
                                      self.ttbarRDF,
                                      signalLikeRegion=True,
                                      determinationRegion=True)
            self.DR_bl = ParSpaceCrop(self.channel,
                                      self.meta,
                                      self.ttbarRDF,
                                      signalLikeRegion=False,
                                      determinationRegion=True)

        ## Define RDFs for the closure correction
        self.Closure_sl = ParSpaceCrop(self.channel,
                                       self.meta,
                                       self.ClosureRDF,
                                       signalLikeRegion=True,
                                       determinationRegion=True)
        self.Closure_bl = ParSpaceCrop(self.channel,
                                       self.meta,
                                       self.ClosureRDF,
                                       signalLikeRegion=False,
                                       determinationRegion=True)

    def createRDF(self):
        tree_path = self.channel.name + "_nominal/ntuple"
        self.rdfFilePath = "/ceph/mscham/data/fakefaktorRDFs/{}-{}.root".format(
            self.meta["era"], self.meta["channel"])
        if os.path.isfile(self.rdfFilePath) and os.path.exists(
                self.rdfFilePath):
            self.RDF = ROOT.RDataFrame(tree_path, self.rdfFilePath)
        else:
            logger.info("Creating new RDF!")
            #exit(1)
            self.chain = ROOT.TChain(tree_path)  ##
            self.chainsD = {}
            dontDelMyChainsL = []
            for i, ntupleFilename in enumerate(self.estimation.get_files()):
                #if i!=0: continue
                #### get the file basename
                filename = os.path.basename(os.path.normpath(ntupleFilename))
                ### instance the chain with selector
                if "ntuple" not in self.chainsD:
                    self.chainsD["ntuple"] = ROOT.TChain(tree_path)
                    logger.info("creating ntuple chain")

                if not os.path.exists(ntupleFilename):
                    logger.fatal("File does not exist: {}".format(path))
                    raise Exception
                logger.info("Adding ntuple:{}".format(ntupleFilename))
                self.chainsD["ntuple"].AddFile(ntupleFilename)

                j = 0
                # Make sure, that friend files are put in the same order together
                for friend in self.estimation.get_friend_files(
                ):  # friend in mela, svfit, ...
                    friendFileName = friend[i]
                    if not os.path.exists(friendFileName):
                        logger.fatal(
                            "File does not exist: {}".format(friendFileName))
                        raise Exception

                    friendVarL = ROOT.RDataFrame(
                        tree_path, str(friendFileName)).GetColumnNames()
                    if "ff2_nom" in friendVarL: friendName = "ff"
                    elif "ME_phi" in friendVarL: friendName = "mela"
                    elif "m_sv" in friendVarL: friendName = "svfit"
                    else:
                        friendName = str(j)
                        j = j + 1

                    if friendName not in self.chainsD:
                        self.chainsD[friendName] = ROOT.TChain(tree_path)
                        logger.info("creating friend tree chain for " +
                                    friendName)
                    logger.info("Adding {} as {} friend.".format(
                        friendFileName, friendName))
                    self.chainsD[friendName].AddFile(friendFileName)

            ### Collect the friend Chains
            self.chain.Add(self.chainsD["ntuple"])
            for key in self.chainsD.keys():
                if key == "ntuple": continue
                logger.info("Add " + key + " to chain")
                self.chain.AddFriend(self.chainsD[key], key)
            chain_numentries = self.chain.GetEntries()
            if chain_numentries == 0:
                logger.fatal(
                    "Chain (before skimming) does not contain any events.")
                raise Exception
            logger.debug("Found {} events.".format(chain_numentries))
            ### Convert Chain to an RDF
            self.RDF = ROOT.RDataFrame(self.chain)

            opt = ROOT.ROOT.RDF.RSnapshotOptions()
            opt.fMode = "RECREATE"
            self.RDF.Snapshot(tree_path, self.rdfFilePath, ".*", opt)

        logger.debug("Skim events with cut string: {}".format(self.CutString))
        self.RDF = self.RDF.Filter(self.CutString)
        self.eventCount = self.RDF.Count().GetValue()

        logger.debug("RDF after basic cuts contains {} events.".format(
            self.eventCount))
        self.varL = list(self.RDF.GetColumnNames())

    def createttbarRDF(self):
        tree_path = self.channel.name + "_nominal/ntuple"
        self.rdfttbarFilePath = "/ceph/mscham/data/fakefaktorRDFs/{}-{}-ttbar.root".format(
            self.meta["era"], self.meta["channel"])
        if os.path.isfile(self.rdfttbarFilePath) and os.path.exists(
                self.rdfttbarFilePath):
            self.ttbarRDF = ROOT.RDataFrame(tree_path, self.rdfttbarFilePath)
        else:
            raise Exception

        logger.debug("Skim ttbar with cut string: {}".format(self.CutString))
        self.ttbarRDF = self.RDF.Filter(self.CutString)
        self.ttbareventCount = self.ttbarRDF.Count().GetValue()

        logger.debug("ttbarRDF after basic cuts contains {} events.".format(
            self.ttbareventCount))
        self.ttbarvarL = list(self.ttbarRDF.GetColumnNames())

    def createClosureRDF(self):
        tree_path = self.channel.name + "_nominal/ntuple"
        FilePath = "/ceph/mscham/data/fakefaktorRDFs/{}-{}-Closure.root".format(
            self.meta["era"], self.meta["channel"])
        if os.path.isfile(FilePath) and os.path.exists(FilePath):
            self.ClosureRDF = ROOT.RDataFrame(tree_path, FilePath)
        else:
            raise Exception

        logger.debug("Skim Closure with cut string: {}".format(self.CutString))
        self.ClosureRDF = self.RDF.Filter(self.CutString)
        self.ClosureEventCount = self.ClosureRDF.Count().GetValue()

        logger.debug("ClosureRDF after basic cuts contains {} events.".format(
            self.ClosureEventCount))
        self.ClosureVarL = list(self.ClosureRDF.GetColumnNames())