Пример #1
0
def main(args):
    # Use 2016 dataset
    era = Run2016(args.datasets)

    # Channel
    if args.channel == "et":
        channel = ETSM2016()
        friend_directory = args.et_friend_directory
    elif args.channel == "mt":
        channel = MTSM2016()
        friend_directory = args.mt_friend_directory
    elif args.channel == "tt":
        channel = TTSM2016()
        friend_directory = args.tt_friend_directory
    else:
        raise Exception

    # Data estimation
    data = DataEstimation(era,
                          args.directory,
                          channel,
                          friend_directory=friend_directory)
    files = data.get_files()
    cuts = (data.get_cuts() + channel.cuts).expand()
    weights = data.get_weights().extract()

    # Combine all files
    tree = ROOT.TChain()
    for f in files:
        tree.Add(f + "/{}_nominal/ntuple".format(args.channel))
        #print("Add file to tree: {}".format(f))

    friend = ROOT.TChain()
    for f in files:
        friendname = os.path.basename(f).replace(".root", "")
        friendpath = os.path.join(friend_directory, friendname,
                                  friendname + ".root")
        friend.Add(friendpath + "/{}_nominal/ntuple".format(args.channel))
        #print("Add file to friend: {}".format(friendpath))

    tree.AddFriend(friend)

    # All events after baseline selection
    tree.Draw("m_sv>>all_events", cuts + "*({})".format(weights), "goff")
    all_events = ROOT.gDirectory.Get("all_events").Integral(-1000, 1000)

    # Only 16043
    tree.Draw(
        "m_sv>>only_16043", cuts + "*(({})==0)*(({})==1)*({})".format(
            args.cut18032, args.cut16043, weights), "goff")
    only_16043 = ROOT.gDirectory.Get("only_16043").Integral(-1000, 1000)

    # All 16043
    tree.Draw("m_sv>>all_16043",
              cuts + "*(({})==1)*({})".format(args.cut16043, weights), "goff")
    all_16043 = ROOT.gDirectory.Get("all_16043").Integral(-1000, 1000)

    # Only 18032
    tree.Draw(
        "m_sv>>only_18032", cuts + "*(({})==1)*(({})==0)*({})".format(
            args.cut18032, args.cut16043, weights), "goff")
    only_18032 = ROOT.gDirectory.Get("only_18032").Integral(-1000, 1000)

    # All 18032
    tree.Draw("m_sv>>all_18032",
              cuts + "*(({})==1)*({})".format(args.cut18032, weights), "goff")
    all_18032 = ROOT.gDirectory.Get("all_18032").Integral(-1000, 1000)

    # Both
    tree.Draw(
        "m_sv>>both", cuts + "*(({})==1)*(({})==1)*({})".format(
            args.cut18032, args.cut16043, weights), "goff")
    both = ROOT.gDirectory.Get("both").Integral(-1000, 1000)

    # None
    tree.Draw(
        "m_sv>>none", cuts + "*(({})==0)*(({})==0)*({})".format(
            args.cut18032, args.cut16043, weights), "goff")
    none = ROOT.gDirectory.Get("none").Integral(-1000, 1000)

    # Print
    print("Cross-check: {}, {}".format(both + only_18032 + only_16043 + none,
                                       all_events))
    print("Cross-check: {}, {}".format(all_18032 + only_16043 + none,
                                       all_events))
    print("Cross-check: {}, {}".format(only_18032 + all_16043 + none,
                                       all_events))
    print("Cross-check: {}, {}".format(all_16043, only_16043 + both))
    print("Cross-check: {}, {}".format(all_18032, only_18032 + both))
    print("Cross-check: {}, {}".format(
        all_events - both - only_18032 - only_16043, none))
    print("All events: {}".format(all_events))
    print("In none of both selection: {}".format(none))
    print("In both selections together: {}".format(both))
    print("In at least one selection: {}".format(both + only_18032 +
                                                 only_16043))
    print("Only 16043: {}".format(only_16043))
    print("All 16043: {}".format(all_16043))
    print("Only 18032: {}".format(only_18032))
    print("All 18032: {}".format(all_18032))
def main(args):
    # Container for all distributions to be drawn
    logger.info("Set up shape variations.")
    systematics = Systematics(
        "{}_shapes.root".format(args.tag),
        num_threads=args.num_threads,
        skip_systematic_variations=args.skip_systematic_variations)

    # Era selection
    if "2016" in args.era:
        from shape_producer.estimation_methods_2016 import DataEstimation, HTTEstimation, ggHEstimation, qqHEstimation, VHEstimation, WHEstimation, ZHEstimation, ttHEstimation, ZTTEstimation, ZLEstimation, ZJEstimation, WEstimation, VVLEstimation, VVTEstimation, VVJEstimation, TTLEstimation, TTTEstimation, TTJEstimation, QCDEstimation_SStoOS_MTETEM, QCDEstimationTT, ZTTEmbeddedEstimation, FakeEstimationLT, NewFakeEstimationLT, FakeEstimationTT, NewFakeEstimationTT, DYJetsToLLEstimation, TTEstimation, VVEstimation
        from shape_producer.era import Run2016
        era = Run2016(args.datasets)
    else:
        logger.critical("Era {} is not implemented.".format(args.era))
        raise Exception

    wp_dict_mva = {
               "vvloose": "byVVLooseIsolationMVArun2017v2DBoldDMwLT2017_2",
               "vloose": "byVLooseIsolationMVArun2017v2DBoldDMwLT2017_2",
               "loose": "byLooseIsolationMVArun2017v2DBoldDMwLT2017_2",
               "medium": "byMediumIsolationMVArun2017v2DBoldDMwLT2017_2",
               "tight": "byTightIsolationMVArun2017v2DBoldDMwLT2017_2",
               "vtight": "byVTightIsolationMVArun2017v2DBoldDMwLT2017_2",
               "vvtight": "byVVTightIsolationMVArun2017v2DBoldDMwLT2017_2",
               "mm": "0<1",
               }
    wp_dict_deeptau = {
               "vvvloose": "byVVVLooseDeepTau2017v2p1VSjet_2",
               "vvloose": "byVVLooseDeepTau2017v2p1VSjet_2",
               "vloose": "byVLooseDeepTau2017v2p1VSjet_2",
               "loose": "byLooseDeepTau2017v2p1VSjet_2",
               "medium": "byMediumDeepTau2017v2p1VSjet_2",
               "tight": "byTightDeepTau2017v2p1VSjet_2",
               "vtight": "byVTightDeepTau2017v2p1VSjet_2",
               "vvtight": "byVVTightDeepTau2017v2p1VSjet_2",
               "mm": "0<1",
               }
    wp_dict = wp_dict_deeptau

    logger.info("Produce shapes for the %s working point of the MVA Tau ID", args.working_point)
    # Channels and processes
    # yapf: disable
    directory = args.directory
    ff_friend_directory = args.fake_factor_friend_directory
    mt = MTTauID2016()
    mt.cuts.add(Cut(wp_dict[args.working_point]+">0.5", "tau_iso"))
    mt_processes = {
        "data"  : Process("data_obs", DataEstimation      (era, directory, mt, friend_directory=[])),
        "ZTT"   : Process("ZTT",      ZTTEstimation       (era, directory, mt, friend_directory=[])),
        "EMB"   : Process("EMB",      ZTTEmbeddedEstimation  (era, directory, mt, friend_directory=[])),
        "ZJ"    : Process("ZJ",       ZJEstimation        (era, directory, mt, friend_directory=[])),
        "ZL"    : Process("ZL",       ZLEstimation        (era, directory, mt, friend_directory=[])),
        "TTT"   : Process("TTT",      TTTEstimation       (era, directory, mt, friend_directory=[])),
        "TTJ"   : Process("TTJ",      TTJEstimation       (era, directory, mt, friend_directory=[])),
        "TTL"   : Process("TTL",      TTLEstimation       (era, directory, mt, friend_directory=[])),
        "VVT"   : Process("VVT",      VVTEstimation       (era, directory, mt, friend_directory=[])),
        "VVJ"   : Process("VVJ",      VVJEstimation       (era, directory, mt, friend_directory=[])),
        "VVL"   : Process("VVL",      VVLEstimation       (era, directory, mt, friend_directory=[])),
        "W"     : Process("W",        WEstimation         (era, directory, mt, friend_directory=[])),
        }
    # TODO: Include alternative jet fake estimation.
    # mt_processes["FAKES"] = Process("jetFakes", NewFakeEstimationLT(era, directory, mt, [mt_processes[process] for process in ["EMB", "ZL", "TTL", "VVL"]], mt_processes["data"], friend_directory=mt_friend_directory+[ff_friend_directory]))
    # mt_processes["FAKES"] = Process("jetFakes", NewFakeEstimationLT(era, directory, mt, [mt_processes[process] for process in ["ZTT", "ZL", "TTL", "TTT", "VVL", "VVT"]], mt_processes["data"], friend_directory=mt_friend_directory+[ff_friend_directory]))
    mt_processes["QCD"] = Process("QCD", QCDEstimation_SStoOS_MTETEM(era, directory, mt,
            [mt_processes[process] for process in ["ZTT", "ZL", "ZJ", "TTT", "TTJ", "TTL", "VVT", "VVJ", "VVL", "W"]],
            mt_processes["data"], friend_directory=[], extrapolation_factor=1.17))
    mt_processes["QCDEMB"] = Process("QCDEMB", QCDEstimation_SStoOS_MTETEM(era, directory, mt,
            [mt_processes[process] for process in ["EMB", "ZL", "ZJ", "TTJ", "TTL", "VVJ", "VVL", "W"]],
            mt_processes["data"], friend_directory=[], extrapolation_factor=1.17))
    # TODO: Include Z-> mumu control region.
    mm = MMTauID2016()
    mm_processes = {
        "data"  : Process("data_obs", DataEstimation       (era, directory, mm, friend_directory=[])),
        "ZLL"   : Process("ZLL",      DYJetsToLLEstimation (era, directory, mm, friend_directory=[])),
        "MMEMB" : Process("MMEMB",    ZTTEmbeddedEstimation(era, directory, mm, friend_directory=[])),
        "TT"    : Process("TT",       TTEstimation         (era, directory, mm, friend_directory=[])),
        "VV"    : Process("VV",       VVEstimation         (era, directory, mm, friend_directory=[])),
        "W"     : Process("W",        WEstimation          (era, directory, mm, friend_directory=[])),
        }
    # mm_processes["FAKES"] = None  TODO: Add fake factors or alternative fake rate estimation here
    mm_processes["QCD"] = Process("QCD", QCDEstimation_SStoOS_MTETEM(era, directory, mm,
            [mm_processes[process] for process in ["ZLL", "W", "TT", "VV"]],
            mm_processes["data"], friend_directory=[], extrapolation_factor=1.17))
    mm_processes["QCDEMB"] = Process("QCDEMB", QCDEstimation_SStoOS_MTETEM(era, directory, mm,
            [mm_processes[process] for process in ["MMEMB", "W"]],
            mm_processes["data"], friend_directory=[], extrapolation_factor=1.17))

    # Stage 0 and 1.1 signals for ggH & qqH
    # mt_processes["FAKES"] = Process("jetFakes", NewFakeEstimationLT(era, directory, mt, [mt_processes[process] for process in ["ZTT", "TTT", "VVT", "ZL", "TTL", "VVL"]], mt_processes["data"], friend_directory=mt_friend_directory+[ff_friend_directory]))
    # mt_processes["FAKESEMB"] = Process("jetFakesEMB", NewFakeEstimationLT(era, directory, mt, [mt_processes[process] for process in ["EMB", "ZL", "TTL", "VVL"]], mt_processes["data"], friend_directory=mt_friend_directory+[ff_friend_directory]))
    # Variables and categories
    binning = yaml.load(open(args.binning))

    mt_categories = []
    # Goodness of fit shapes
    if args.gof_channel == "mt":
        score = Variable(
                args.gof_variable,
                VariableBinning(binning["control"]["mt"][args.gof_variable]["bins"]),
                expression=binning["control"]["mt"][args.gof_variable]["expression"])
        if "cut" in binning["control"]["mt"][args.gof_variable].keys():
            cuts=Cuts(Cut(binning["control"]["mt"][args.gof_variable]["cut"], "binning"))
        else:
            cuts=Cuts()
        mt_categories.append(
            Category(
                args.gof_variable,
                mt,
                cuts,
                variable=score))
    elif "mt" in args.channels:
        for cat in binning["categories"]["mt"]:
            category = Category(
                        cat,
                        mt,
                        Cuts(Cut(binning["categories"]["mt"][cat]["cut"], "category")),
                        variable=Variable(binning["categories"]["mt"][cat]["var"],
                            VariableBinning(binning["categories"]["mt"][cat]["bins"]),
                            expression=binning["categories"]["mt"][cat]["expression"]))
            mt_categories.append(category)

    # yapf: enable
    if "mt" in [args.gof_channel] + args.channels:
        for process, category in product(mt_processes.values(), mt_categories):
            systematics.add(
                Systematic(
                    category=category,
                    process=process,
                    analysis="smhtt",
                    era=era,
                    variation=Nominal(),
                    mass="125"))

    mm_categories = []
    if "mm" in args.channels:
        category = Category(
                    "control",
                    mm,
                    Cuts(),
                    variable=Variable("m_vis",
                        ConstantBinning(1, 50, 150),
                        "m_vis"))
        mm_categories.append(category)

    if "mm" in args.channels:
        for process, category in product(mm_processes.values(), mm_categories):
            systematics.add(
                    Systematic(
                        category=category,
                        process=process,
                        analysis="smhtt",
                        era=era,
                        variation=Nominal(),
                        mass="125"))

    # Shapes variations

    # MC tau energy scale
    tau_es_3prong_variations = create_systematic_variations(
        "CMS_scale_mc_t_3prong_Run2016", "tauEsThreeProng", DifferentPipeline)
    tau_es_1prong_variations = create_systematic_variations(
        "CMS_scale_mc_t_1prong_Run2016", "tauEsOneProng", DifferentPipeline)
    tau_es_1prong1pizero_variations = create_systematic_variations(
        "CMS_scale_mc_t_1prong1pizero_Run2016", "tauEsOneProngOnePiZero",
        DifferentPipeline)
    for variation in tau_es_3prong_variations + tau_es_1prong_variations + tau_es_1prong1pizero_variations:
        for process_nick in ["ZTT", "TTT", "TTL", "VVL", "VVT",# "FAKES"
                            ]:
            if "mt" in [args.gof_channel] + args.channels:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=mt_processes[process_nick],
                    channel=mt,
                    era=era)

    # Tau energy scale
    tau_es_3prong_variations = create_systematic_variations(
        "CMS_scale_t_3prong_Run2016", "tauEsThreeProng", DifferentPipeline)
    tau_es_1prong_variations = create_systematic_variations(
        "CMS_scale_t_1prong_Run2016", "tauEsOneProng", DifferentPipeline)
    tau_es_1prong1pizero_variations = create_systematic_variations(
        "CMS_scale_t_1prong1pizero_Run2016", "tauEsOneProngOnePiZero",
        DifferentPipeline)
    for variation in tau_es_3prong_variations + tau_es_1prong_variations + tau_es_1prong1pizero_variations:
        for process_nick in ["ZTT", "TTT", "TTL", "VVL", "VVT", "EMB",# "FAKES"
                            ]:
            if "mt" in [args.gof_channel] + args.channels:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=mt_processes[process_nick],
                    channel=mt,
                    era=era)

    # Jet energy scale

    # Inclusive JES shapes
    jet_es_variations = []
    '''jet_es_variations += create_systematic_variations(
        "CMS_scale_j_Run2016", "jecUnc", DifferentPipeline)'''

    # Splitted JES shapes
    jet_es_variations += create_systematic_variations(
        "CMS_scale_j_eta0to3_Run2016", "jecUncEta0to3", DifferentPipeline)
    jet_es_variations += create_systematic_variations(
        "CMS_scale_j_eta0to5_Run2016", "jecUncEta0to5", DifferentPipeline)
    jet_es_variations += create_systematic_variations(
        "CMS_scale_j_eta3to5_Run2016", "jecUncEta3to5", DifferentPipeline)
    jet_es_variations += create_systematic_variations(
        "CMS_scale_j_RelativeBal_Run2016", "jecUncRelativeBal",
        DifferentPipeline)

    for variation in jet_es_variations:
        for process_nick in [
                "ZTT", "ZL", "ZJ", "W", "TTT", "TTL", "TTJ", "VVL", "VVT", "VVJ"
        ]:
            if "mt" in [args.gof_channel] + args.channels:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=mt_processes[process_nick],
                    channel=mt,
                    era=era)

    # # MET energy scale
    met_unclustered_variations = create_systematic_variations(
        "CMS_scale_met_unclustered", "metUnclusteredEn",
        DifferentPipeline)
    # NOTE: Clustered MET not used anymore in the uncertainty model
    #met_clustered_variations = create_systematic_variations(
    #    "CMS_scale_met_clustered_Run2016", "metJetEn", DifferentPipeline)
    for variation in met_unclustered_variations:  # + met_clustered_variations:
        for process_nick in [
                "ZTT", "ZL", "ZJ", "W", "TTT", "TTL", "TTJ", "VVL", "VVT", "VVJ"
        ]:
            if "mt" in [args.gof_channel] + args.channels:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=mt_processes[process_nick],
                    channel=mt,
                    era=era)

    # # Recoil correction unc
    recoil_resolution_variations = create_systematic_variations(
        "CMS_htt_boson_reso_met_Run2016", "metRecoilResolution",
        DifferentPipeline)
    recoil_response_variations = create_systematic_variations(
        "CMS_htt_boson_scale_met_Run2016", "metRecoilResponse",
        DifferentPipeline)
    for variation in recoil_resolution_variations + recoil_response_variations:
        for process_nick in [
                "ZTT", "ZL", "ZJ", "W"]:
            if "mt" in [args.gof_channel] + args.channels:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=mt_processes[process_nick],
                    channel=mt,
                    era=era)

    # Z pt reweighting
    zpt_variations = create_systematic_variations(
        "CMS_htt_dyShape_Run2016", "zPtReweightWeight", SquareAndRemoveWeight)
    for variation in zpt_variations:
        for process_nick in ["ZTT", "ZL", "ZJ"]:
            if "mt" in [args.gof_channel] + args.channels:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=mt_processes[process_nick],
                    channel=mt,
                    era=era)

    # top pt reweighting
    top_pt_variations = create_systematic_variations(
        "CMS_htt_ttbarShape", "topPtReweightWeight",
        SquareAndRemoveWeight)
    for variation in top_pt_variations:
        for process_nick in ["TTT", "TTL", "TTJ"]:
            if "mt" in [args.gof_channel] + args.channels:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=mt_processes[process_nick],
                    channel=mt,
                    era=era)

    # jet to tau fake efficiency
    jet_to_tau_fake_variations = []
    jet_to_tau_fake_variations.append(
        AddWeight("CMS_htt_jetToTauFake_Run2016", "jetToTauFake_weight",
                  Weight("max(1.0-pt_2*0.002, 0.6)", "jetToTauFake_weight"), "Up"))
    jet_to_tau_fake_variations.append(
        AddWeight("CMS_htt_jetToTauFake_Run2016", "jetToTauFake_weight",
                  Weight("min(1.0+pt_2*0.002, 1.4)", "jetToTauFake_weight"), "Down"))
    for variation in jet_to_tau_fake_variations:
        for process_nick in ["ZJ", "TTJ", "W", "VVJ"]:
            if "mt" in [args.gof_channel] + args.channels:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=mt_processes[process_nick],
                    channel=mt,
                    era=era)

    # ZL fakes energy scale
    mu_fake_es_1prong_variations = create_systematic_variations(
        "CMS_ZLShape_mt_1prong_Run2016", "tauMuFakeEsOneProng",
        DifferentPipeline)
    mu_fake_es_1prong1pizero_variations = create_systematic_variations(
        "CMS_ZLShape_mt_1prong1pizero_Run2016", "tauMuFakeEsOneProngPiZeros",
        DifferentPipeline)

    if "mt" in [args.gof_channel] + args.channels:
        for process_nick in ["ZL"]:
            for variation in mu_fake_es_1prong_variations + mu_fake_es_1prong1pizero_variations:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=mt_processes[process_nick],
                    channel=mt,
                    era=era)

    # # lepton trigger efficiency
    lep_trigger_eff_variations = []
    lep_trigger_eff_variations.append(
        AddWeight("CMS_eff_trigger_mt_Run2016", "trg_mt_eff_weight",
                  Weight("(1.0*(pt_1<=23)+1.02*(pt_1>23))", "trg_mt_eff_weight"), "Up"))
    lep_trigger_eff_variations.append(
        AddWeight("CMS_eff_trigger_mt_Run2016", "trg_mt_eff_weight",
                  Weight("(1.0*(pt_1<=23)+0.98*(pt_1>23))", "trg_mt_eff_weight"), "Down"))
    for variation in lep_trigger_eff_variations:
        for process_nick in [
            "ZTT", "ZL", "ZJ", "W", "TTT", "TTL", "TTJ", "VVL", "VVT", "VVJ"
        ]:
            if "mt" in [args.gof_channel] + args.channels:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=mt_processes[process_nick],
                    channel=mt,
                    era=era)
        for process_nick in ["ZLL", "TT", "VV", "W"]:
            if "mm" in [args.gof_channel] + args.channels:
                systematics.add_systematic_variation(
                        variation=variation,
                        process=mm_processes[process_nick],
                        channel=mm,
                        era=era)

    lep_trigger_eff_variations = []
    lep_trigger_eff_variations.append(
        AddWeight("CMS_eff_trigger_emb_mt_Run2016", "trg_mt_eff_weight",
                  Weight("(1.0*(pt_1<=23)+1.02*(pt_1>23))", "trg_mt_eff_weight"), "Up"))
    lep_trigger_eff_variations.append(
        AddWeight("CMS_eff_trigger_emb_mt_Run2016", "trg_mt_eff_weight",
                  Weight("(1.0*(pt_1<=23)+0.98*(pt_1>23))", "trg_mt_eff_weight"), "Down"))
    for variation in lep_trigger_eff_variations:
        for process_nick in ["EMB"]:
            if "mt" in [args.gof_channel] + args.channels:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=mt_processes[process_nick],
                    channel=mt,
                    era=era)
        for process_nick in ["MMEMB"]:
            if "mm" in [args.gof_channel] + args.channels:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=mm_processes[process_nick],
                    channel=mm,
                    era=era)


    # # Zll reweighting !!! replaced by log normal uncertainties: CMS_eFakeTau_Run2016 15.5%; CMS_mFakeTau_Run2016 27.2%
    # '''zll_et_weight_variations = []
    # zll_mt_weight_variations = []
    # zll_mt_weight_variations.append(
    #     AddWeight(
    #         "CMS_mFakeTau_Run2016", "mFakeTau_reweight",
    #         Weight(
    #             "(((abs(eta_1) < 0.4)*1.63/1.47) + ((abs(eta_1) >= 0.4 && abs(eta_1) < 0.8)*1.85/1.55) + ((abs(eta_1) >= 0.8 && abs(eta_1) < 1.2)*1.38/1.33) + ((abs(eta_1) >= 1.2 && abs(eta_1) < 1.7)*2.26/1.72) + ((abs(eta_1) >= 1.7 && abs(eta_1) < 2.3)*3.13/2.5) + (abs(eta_1) >= 2.3))",
    #             "mFakeTau_reweight"), "Up"))
    # zll_mt_weight_variations.append(
    #     AddWeight(
    #         "CMS_mFakeTau_Run2016", "mFakeTau_reweight",
    #         Weight(
    #             "(((abs(eta_1) < 0.4)*1.31/1.47) + ((abs(eta_1) >= 0.4 && abs(eta_1) < 0.8)*1.25/1.55) + ((abs(eta_1) >= 0.8 && abs(eta_1) < 1.2)*1.28/1.33) + ((abs(eta_1) >= 1.2 && abs(eta_1) < 1.7)*1.18/1.72) + ((abs(eta_1) >= 1.7 && abs(eta_1) < 2.3)*1.87/2.5) + (abs(eta_1) >= 2.3))",
    #             "mFakeTau_reweight"), "Down"))
    # for variation in zll_mt_weight_variations:
    #     for process_nick in ["ZL"]:
    #         if "mt" in [args.gof_channel] + args.channels:
    #             systematics.add_systematic_variation(
    #                 variation=variation,
    #                 process=mt_processes[process_nick],
    #                 channel=mt,
    #                 era=era)'''

    # Embedded event specifics
    # Tau energy scale
    tau_es_3prong_variations = create_systematic_variations(
        "CMS_scale_emb_t_3prong_Run2016", "tauEsThreeProng", DifferentPipeline)
    tau_es_1prong_variations = create_systematic_variations(
        "CMS_scale_emb_t_1prong_Run2016", "tauEsOneProng", DifferentPipeline)
    tau_es_1prong1pizero_variations = create_systematic_variations(
        "CMS_scale_emb_t_1prong1pizero_Run2016", "tauEsOneProngOnePiZero",
        DifferentPipeline)
    for variation in tau_es_3prong_variations + tau_es_1prong_variations + tau_es_1prong1pizero_variations:
        for process_nick in ["EMB"]: #, "FAKES"]:
            if "mt" in [args.gof_channel] + args.channels:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=mt_processes[process_nick],
                    channel=mt,
                    era=era)

    mt_decayMode_variations = []
    mt_decayMode_variations.append(
        ReplaceWeight(
            "CMS_3ProngEff_Run2016", "decayMode_SF",
            Weight("embeddedDecayModeWeight_effUp_pi0Nom", "decayMode_SF"),
            "Up"))
    mt_decayMode_variations.append(
        ReplaceWeight(
            "CMS_3ProngEff_Run2016", "decayMode_SF",
            Weight("embeddedDecayModeWeight_effDown_pi0Nom", "decayMode_SF"),
            "Down"))
    mt_decayMode_variations.append(
        ReplaceWeight(
            "CMS_1ProngPi0Eff_Run2016", "decayMode_SF",
            Weight("embeddedDecayModeWeight_effNom_pi0Up", "decayMode_SF"),
            "Up"))
    mt_decayMode_variations.append(
        ReplaceWeight(
            "CMS_1ProngPi0Eff_Run2016", "decayMode_SF",
            Weight("embeddedDecayModeWeight_effNom_pi0Down", "decayMode_SF"),
            "Down"))
    for variation in mt_decayMode_variations:
        for process_nick in ["EMB"]:
            if "mt" in [args.gof_channel] + args.channels:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=mt_processes[process_nick],
                    channel=mt,
                    era=era)

    # 10% removed events in ttbar simulation (ttbar -> real tau tau events) will be added/subtracted to ZTT shape to use as systematic
    tttautau_process_mt = Process(
        "TTT",
        TTTEstimation(
            era, directory, mt, friend_directory=[]))
    if "mt" in [args.gof_channel] + args.channels:
        for category in mt_categories:
            mt_processes['ZTTpTTTauTauDown'] = Process(
                "ZTTpTTTauTauDown",
                AddHistogramEstimationMethod(
                    "AddHistogram", "nominal", era, directory, mt,
                    [mt_processes["EMB"], tttautau_process_mt], [1.0, -0.1]))
            systematics.add(
                Systematic(
                    category=category,
                    process=mt_processes['ZTTpTTTauTauDown'],
                    analysis="smhtt",
                    era=era,
                    variation=Relabel("CMS_htt_emb_ttbar_Run2016", "Down"),
                    mass="125"))

            mt_processes['ZTTpTTTauTauUp'] = Process(
                "ZTTpTTTauTauUp",
                AddHistogramEstimationMethod(
                    "AddHistogram", "nominal", era, directory, mt,
                    [mt_processes["EMB"], tttautau_process_mt], [1.0, 0.1]))
            systematics.add(
                Systematic(
                    category=category,
                    process=mt_processes['ZTTpTTTauTauUp'],
                    analysis="smhtt",
                    era=era,
                    variation=Relabel("CMS_htt_emb_ttbar_Run2016", "Up"),
                    mass="125"))



    # Produce histograms
    logger.info("Start producing shapes.")
    systematics.produce()
    logger.info("Done producing shapes.")
def main(args):
    # Container for all distributions to be drawn
    systematics_mm = Systematics("counts_zptm_2016.root",
                                 num_threads=args.num_threads,
                                 find_unique_objects=True)

    # Era
    era = Run2016(args.datasets)

    # Channels and processes
    # yapf: disable
    directory = args.directory

    mm = MM()
    mm_processes = {
        "data"  : Process("data_obs", DataEstimation      (era, directory, mm, friend_directory=[])),
        "ZTT"   : Process("ZTT",      ZTTEstimation       (era, directory, mm, friend_directory=[])),
        "ZL"    : Process("ZL",       ZLEstimation        (era, directory, mm, friend_directory=[])),
        "TTT"   : Process("TTT",      TTTEstimation       (era, directory, mm, friend_directory=[])),
        "TTL"   : Process("TTL",      TTLEstimation       (era, directory, mm, friend_directory=[])),
        "VVT"   : Process("VVT",      VVTEstimation       (era, directory, mm, friend_directory=[])),
        "VVL"   : Process("VVL",      VVLEstimation       (era, directory, mm, friend_directory=[])),
        "W"     : Process("W",        WEstimation         (era, directory, mm, friend_directory=[])),
        }
    mm_processes["QCD"] = Process("QCD", QCDEstimation_SStoOS_MTETEM(era, directory, mm,
            [mm_processes[process] for process in ["ZTT", "ZL", "W", "TTT", "TTL", "VVT", "VVL"]],
            mm_processes["data"], friend_directory=[], extrapolation_factor=2.0))


    # Variables and categories
    mm_categories = []

    variable_bins = {
        "m_vis" : [50, 100, 200, 500, 1000],
        "ptvis" : [0, 10, 20, 30, 40, 50, 100, 150, 200, 300, 400, 1000],
    }

    for mass_bin in range(len(variable_bins["m_vis"]) - 1):
        for pt_bin in range(len(variable_bins["ptvis"]) - 1):
            name = "%s_bin_%s_vs_%s_bin_%s"%("m_vis",str(mass_bin),"ptvis",str(pt_bin))
            cuts = Cuts(Cut("(m_vis > %s && m_vis < %s) && (ptvis > %s && ptvis < %s)"%(str(variable_bins["m_vis"][mass_bin]),str(variable_bins["m_vis"][mass_bin+1]),str(variable_bins["ptvis"][pt_bin]),str(variable_bins["ptvis"][pt_bin+1])),"zptm_category"))
            mm_categories.append(
                Category(
                    name,
                    mm,
                    cuts,
                    variable=None))

    # Nominal histograms
    for process, category in product(mm_processes.values(), mm_categories):
        #if process.name in ["ZTT","ZLL"]:
        #    process.estimation_method.get_weights().remove("zPtReweightWeight")
        systematics_mm.add(
            Systematic(
                category=category,
                process=process,
                analysis="smhtt",
                era=era,
                variation=Nominal(),
                mass="125"))


    # Produce histograms
    systematics_mm.produce()
def main(args):
    # Container for all distributions to be drawn
    systematics_mt = Systematics("shapes_mt_2016.root",
                                 num_threads=args.num_threads,
                                 find_unique_objects=True)
    systematics_et = Systematics("shapes_et_2016.root",
                                 num_threads=args.num_threads,
                                 find_unique_objects=True)
    systematics_tt = Systematics("shapes_tt_2016.root",
                                 num_threads=args.num_threads,
                                 find_unique_objects=True)
    systematics_em = Systematics("shapes_em_2016.root",
                                 num_threads=args.num_threads,
                                 find_unique_objects=True)
    systematics_mm = Systematics("shapes_mm_2016.root",
                                 num_threads=args.num_threads,
                                 find_unique_objects=True)

    # Era
    era = Run2016(args.datasets)

    # Channels and processes
    # yapf: disable
    directory = args.directory
    et_friend_directory = args.et_friend_directory
    mt_friend_directory = args.mt_friend_directory
    tt_friend_directory = args.tt_friend_directory
    em_friend_directory = args.em_friend_directory
    mm_friend_directory = args.mm_friend_directory

    ff_friend_directory = args.fake_factor_friend_directory

    #mt = MT()
    #mt_processes = {
    #    "data"  : Process("data_obs", DataEstimation      (era, directory, mt, friend_directory=mt_friend_directory)),
    #    "ZTT"   : Process("ZTT",      ZTTEstimation       (era, directory, mt, friend_directory=mt_friend_directory)),
    #    "EMB"   : Process("EMB",      ZTTEmbeddedEstimation  (era, directory, mt, friend_directory=mt_friend_directory)),
    #    "ZJ"    : Process("ZJ",       ZJEstimation        (era, directory, mt, friend_directory=mt_friend_directory)),
    #    "ZL"    : Process("ZL",       ZLEstimation        (era, directory, mt, friend_directory=mt_friend_directory)),
    #    "TTT"   : Process("TTT",      TTTEstimation       (era, directory, mt, friend_directory=mt_friend_directory)),
    #    "TTJ"   : Process("TTJ",      TTJEstimation       (era, directory, mt, friend_directory=mt_friend_directory)),
    #    "TTL"   : Process("TTL",      TTLEstimation       (era, directory, mt, friend_directory=mt_friend_directory)),
    #    "VVT"   : Process("VVT",      VVTEstimation       (era, directory, mt, friend_directory=mt_friend_directory)),
    #    "VVJ"   : Process("VVJ",      VVJEstimation       (era, directory, mt, friend_directory=mt_friend_directory)),
    #    "VVL"   : Process("VVL",      VVLEstimation       (era, directory, mt, friend_directory=mt_friend_directory)),
    #    "W"     : Process("W",        WEstimation         (era, directory, mt, friend_directory=mt_friend_directory)),
    #    "ggH"   : Process("ggH125",   ggHEstimation       ("ggH125", era, directory, mt, friend_directory=mt_friend_directory)),
    #    "qqH"   : Process("qqH125",   qqHEstimation       ("qqH125", era, directory, mt, friend_directory=mt_friend_directory)),
    #    "VH"    : Process("VH125",    VHEstimation        (era, directory, mt, friend_directory=mt_friend_directory)),
    #    "WH"    : Process("WH125",    WHEstimation        (era, directory, mt, friend_directory=mt_friend_directory)),
    #    "ZH"    : Process("ZH125",    ZHEstimation        (era, directory, mt, friend_directory=mt_friend_directory)),
    #    "ttH"   : Process("ttH125",   ttHEstimation       (era, directory, mt, friend_directory=mt_friend_directory)),
    #    }
    #mt_processes["FAKES"] = Process("jetFakes", NewFakeEstimationLT(era, directory, mt, [mt_processes[process] for process in ["ZTT", "ZL", "TTT", "TTL", "VVT", "VVL"]], mt_processes["data"], friend_directory=mt_friend_directory+[ff_friend_directory]))
    #mt_processes["FAKESEMB"] = Process("jetFakesEMB", NewFakeEstimationLT(era, directory, mt, [mt_processes[process] for process in ["EMB", "ZL", "TTL", "VVL"]], mt_processes["data"], friend_directory=mt_friend_directory+[ff_friend_directory]))

    #mt_processes["QCD"] = Process("QCD", QCDEstimation_SStoOS_MTETEM(era, directory, mt,
    #        [mt_processes[process] for process in ["ZTT", "ZL", "ZJ", "W", "TTT", "TTJ", "TTL", "VVT", "VVJ", "VVL"]],
    #        mt_processes["data"], friend_directory=mt_friend_directory, extrapolation_factor=1.00))
    #mt_processes["QCDEMB"] = Process("QCDEMB", QCDEstimation_SStoOS_MTETEM(era, directory, mt,
    #        [mt_processes[process] for process in ["EMB", "ZL", "ZJ", "W", "TTJ", "TTL", "VVJ", "VVL"]],
    #        mt_processes["data"], friend_directory=mt_friend_directory, extrapolation_factor=1.00))


    #et = ET()
    #et_processes = {
    #    "data"  : Process("data_obs", DataEstimation      (era, directory, et, friend_directory=et_friend_directory)),
    #    "ZTT"   : Process("ZTT",      ZTTEstimation       (era, directory, et, friend_directory=et_friend_directory)),
    #    "EMB"   : Process("EMB",      ZTTEmbeddedEstimation  (era, directory, et, friend_directory=et_friend_directory)),
    #    "ZJ"    : Process("ZJ",       ZJEstimation        (era, directory, et, friend_directory=et_friend_directory)),
    #    "ZL"    : Process("ZL",       ZLEstimation        (era, directory, et, friend_directory=et_friend_directory)),
    #    "TTT"   : Process("TTT",      TTTEstimation       (era, directory, et, friend_directory=et_friend_directory)),
    #    "TTJ"   : Process("TTJ",      TTJEstimation       (era, directory, et, friend_directory=et_friend_directory)),
    #    "TTL"   : Process("TTL",      TTLEstimation       (era, directory, et, friend_directory=et_friend_directory)),
    #    "VVT"   : Process("VVT",      VVTEstimation       (era, directory, et, friend_directory=et_friend_directory)),
    #    "VVJ"   : Process("VVJ",      VVJEstimation       (era, directory, et, friend_directory=et_friend_directory)),
    #    "VVL"   : Process("VVL",      VVLEstimation       (era, directory, et, friend_directory=et_friend_directory)),
    #    "W"     : Process("W",        WEstimation         (era, directory, et, friend_directory=et_friend_directory)),
    #    "ggH"   : Process("ggH125",   ggHEstimation       ("ggH125", era, directory, et, friend_directory=et_friend_directory)),
    #    "qqH"   : Process("qqH125",   qqHEstimation       ("qqH125", era, directory, et, friend_directory=et_friend_directory)),
    #    "VH"    : Process("VH125",    VHEstimation        (era, directory, et, friend_directory=et_friend_directory)),
    #    "WH"    : Process("WH125",    WHEstimation        (era, directory, et, friend_directory=et_friend_directory)),
    #    "ZH"    : Process("ZH125",    ZHEstimation        (era, directory, et, friend_directory=et_friend_directory)),
    #    "ttH"   : Process("ttH125",   ttHEstimation       (era, directory, et, friend_directory=et_friend_directory)),
    #    }
    #et_processes["FAKES"] = Process("jetFakes", NewFakeEstimationLT(era, directory, et, [et_processes[process] for process in ["ZTT", "ZL", "TTT", "TTL", "VVT", "VVL"]], et_processes["data"], friend_directory=et_friend_directory+[ff_friend_directory]))
    #et_processes["FAKESEMB"] = Process("jetFakesEMB", NewFakeEstimationLT(era, directory, et, [et_processes[process] for process in ["EMB", "ZL", "TTL", "VVL"]], et_processes["data"], friend_directory=et_friend_directory+[ff_friend_directory]))

    #et_processes["QCD"] = Process("QCD", QCDEstimation_SStoOS_MTETEM(era, directory, et,
    #        [et_processes[process] for process in ["ZTT", "ZL", "ZJ", "W", "TTT", "TTJ", "TTL", "VVT", "VVJ", "VVL"]],
    #        et_processes["data"], friend_directory=et_friend_directory, extrapolation_factor=1.00))
    #et_processes["QCDEMB"] = Process("QCDEMB", QCDEstimation_SStoOS_MTETEM(era, directory, et,
    #        [et_processes[process] for process in ["EMB", "ZL", "ZJ", "W", "TTJ", "TTL", "VVJ", "VVL"]],
    #        et_processes["data"], friend_directory=et_friend_directory, extrapolation_factor=1.00))


    #tt = TT()
    #tt_processes = {
    #    "data"  : Process("data_obs", DataEstimation      (era, directory, tt, friend_directory=tt_friend_directory)),
    #    "ZTT"   : Process("ZTT",      ZTTEstimation       (era, directory, tt, friend_directory=tt_friend_directory)),
    #    "EMB"   : Process("EMB",      ZTTEmbeddedEstimation  (era, directory, tt, friend_directory=tt_friend_directory)),
    #    "ZJ"    : Process("ZJ",       ZJEstimation        (era, directory, tt, friend_directory=tt_friend_directory)),
    #    "ZL"    : Process("ZL",       ZLEstimation        (era, directory, tt, friend_directory=tt_friend_directory)),
    #    "TTT"   : Process("TTT",      TTTEstimation       (era, directory, tt, friend_directory=tt_friend_directory)),
    #    "TTJ"   : Process("TTJ",      TTJEstimation       (era, directory, tt, friend_directory=tt_friend_directory)),
    #    "TTL"   : Process("TTL",      TTLEstimation       (era, directory, tt, friend_directory=tt_friend_directory)),
    #    "VVT"   : Process("VVT",      VVTEstimation       (era, directory, tt, friend_directory=tt_friend_directory)),
    #    "VVJ"   : Process("VVJ",      VVJEstimation       (era, directory, tt, friend_directory=tt_friend_directory)),
    #    "VVL"   : Process("VVL",      VVLEstimation       (era, directory, tt, friend_directory=tt_friend_directory)),
    #    "W"     : Process("W",        WEstimation         (era, directory, tt, friend_directory=tt_friend_directory)),
    #    "ggH"   : Process("ggH125",   ggHEstimation       ("ggH125", era, directory, tt, friend_directory=tt_friend_directory)),
    #    "qqH"   : Process("qqH125",   qqHEstimation       ("qqH125", era, directory, tt, friend_directory=tt_friend_directory)),
    #    "VH"    : Process("VH125",    VHEstimation        (era, directory, tt, friend_directory=tt_friend_directory)),
    #    "WH"    : Process("WH125",    WHEstimation        (era, directory, tt, friend_directory=tt_friend_directory)),
    #    "ZH"    : Process("ZH125",    ZHEstimation        (era, directory, tt, friend_directory=tt_friend_directory)),
    #    "ttH"   : Process("ttH125",   ttHEstimation       (era, directory, tt, friend_directory=tt_friend_directory)),
    #    }
    #tt_processes["FAKESEMB"] = Process("jetFakesEMB", NewFakeEstimationTT(era, directory, tt, [tt_processes[process] for process in ["EMB", "ZL", "TTL", "VVL"]], tt_processes["data"], friend_directory=tt_friend_directory+[ff_friend_directory]))
    #tt_processes["FAKES"] = Process("jetFakes", NewFakeEstimationTT(era, directory, tt, [tt_processes[process] for process in ["ZTT", "ZL", "TTT", "TTL", "VVT", "VVL"]], tt_processes["data"], friend_directory=tt_friend_directory+[ff_friend_directory]))

    #tt_processes["QCD"] = Process("QCD", QCDEstimation_ABCD_TT_ISO2(era, directory, tt,
    #        [tt_processes[process] for process in ["ZTT", "ZL", "ZJ", "W", "TTT", "TTJ", "TTL", "VVT", "VVJ", "VVL"]],
    #        tt_processes["data"], friend_directory=tt_friend_directory))
    #tt_processes["QCDEMB"] = Process("QCDEMB", QCDEstimation_ABCD_TT_ISO2(era, directory, tt,
    #        [tt_processes[process] for process in ["EMB", "ZL", "ZJ", "W", "TTJ", "TTL", "VVJ", "VVL"]],
    #        tt_processes["data"], friend_directory=tt_friend_directory))

    #em = EM()
    #em_processes = {
    #    "data"  : Process("data_obs", DataEstimation      (era, directory, em, friend_directory=em_friend_directory)),
    #    "ZTT"   : Process("ZTT",      ZTTEstimation       (era, directory, em, friend_directory=em_friend_directory)),
    #    "EMB"   : Process("EMB",      ZTTEmbeddedEstimation  (era, directory, em, friend_directory=em_friend_directory)),
    #    "ZL"    : Process("ZL",       ZLEstimation        (era, directory, em, friend_directory=em_friend_directory)),
    #    "TTT"   : Process("TTT",      TTTEstimation       (era, directory, em, friend_directory=em_friend_directory)),
    #    "TTL"   : Process("TTL",      TTLEstimation       (era, directory, em, friend_directory=em_friend_directory)),
    #    "VVT"   : Process("VVT",      VVTEstimation       (era, directory, em, friend_directory=em_friend_directory)),
    #    "VVL"   : Process("VVL",      VVLEstimation       (era, directory, em, friend_directory=em_friend_directory)),
    #    "W"     : Process("W",        WEstimation         (era, directory, em, friend_directory=em_friend_directory)),
    #    "ggH"   : Process("ggH125",   ggHEstimation       ("ggH125", era, directory, em, friend_directory=em_friend_directory)),
    #    "qqH"   : Process("qqH125",   qqHEstimation       ("qqH125", era, directory, em, friend_directory=em_friend_directory)),
    #    "VH"    : Process("VH125",    VHEstimation        (era, directory, em, friend_directory=em_friend_directory)),
    #    "WH"    : Process("WH125",    WHEstimation        (era, directory, em, friend_directory=em_friend_directory)),
    #    "ZH"    : Process("ZH125",    ZHEstimation        (era, directory, em, friend_directory=em_friend_directory)),
    #    "ttH"   : Process("ttH125",   ttHEstimation       (era, directory, em, friend_directory=em_friend_directory)),
    #    }

    #em_processes["QCD"] = Process("QCD", QCDEstimation_SStoOS_MTETEM(era, directory, em, [em_processes[process] for process in ["ZTT", "ZL", "W", "TTT", "VVT", "VVL"]], em_processes["data"], extrapolation_factor=1.0, qcd_weight = Weight("em_qcd_extrap_up_Weight","qcd_weight")))
    #em_processes["QCDEMB"] = Process("QCDEMB", QCDEstimation_SStoOS_MTETEM(era, directory, em, [em_processes[process] for process in ["EMB", "ZL", "W", "VVL"]], em_processes["data"], extrapolation_factor=1.0, qcd_weight = Weight("em_qcd_extrap_up_Weight","qcd_weight")))

    mm = MM()
    mm_processes = {
        "data"  : Process("data_obs", DataEstimation      (era, directory, mm, friend_directory=mm_friend_directory)),
        "ZTT"   : Process("ZTT",      ZTTEstimation       (era, directory, mm, friend_directory=mm_friend_directory)),
        "ZL"    : Process("ZL",       ZLEstimation        (era, directory, mm, friend_directory=mm_friend_directory)),
        "TTT"   : Process("TTT",      TTTEstimation       (era, directory, mm, friend_directory=mm_friend_directory)),
        "TTL"   : Process("TTL",      TTLEstimation       (era, directory, mm, friend_directory=mm_friend_directory)),
        "VVT"   : Process("VVT",      VVTEstimation       (era, directory, mm, friend_directory=mm_friend_directory)),
        "VVL"   : Process("VVL",      VVLEstimation       (era, directory, mm, friend_directory=mm_friend_directory)),
        "W"     : Process("W",        WEstimation         (era, directory, mm, friend_directory=mm_friend_directory)),
        }
    mm_processes["QCD"] = Process("QCD", QCDEstimation_SStoOS_MTETEM(era, directory, mm,
            [mm_processes[process] for process in ["ZTT", "ZL", "W", "TTT", "TTL", "VVT", "VVL"]],
            mm_processes["data"], friend_directory=mm_friend_directory, extrapolation_factor=2.0))


    # Variables and categories
    binning = yaml.load(open(args.binning))

    mt_categories = []
    et_categories = []
    tt_categories = []
    em_categories = []
    mm_categories = []

    variable_names = [
        "m_vis", "ptvis",
    #    "DiTauDeltaR",

    #    "m_sv", "pt_sv", "eta_sv",
    #    "m_sv_puppi", "pt_sv_puppi", "eta_sv_puppi",
    #    "m_fastmtt", "pt_fastmtt", "eta_fastmtt",
    #    "m_fastmtt_puppi", "pt_fastmtt_puppi", "eta_fastmtt_puppi",

    #    "ME_D", "ME_vbf", "ME_z2j_1", "ME_z2j_2", "ME_q2v1", "ME_q2v2", "ME_costheta1", "ME_costheta2", "ME_costhetastar", "ME_phi", "ME_phi1",

        "pt_1", "pt_2", "eta_1", "eta_2",

    #    "mjj", "jdeta", "dijetpt",
        "njets", "jpt_1", "jpt_2", "jeta_1", "jeta_2",
    #    "nbtag", "bpt_1", "bpt_2", "beta_1", "beta_2",

       "met", #"mt_1", "mt_2", "pt_tt", "pZetaMissVis", "pt_ttjj", "mt_tot", "mTdileptonMET",
        "puppimet", #"mt_1_puppi", "mt_2_puppi", "pt_tt_puppi", "pZetaPuppiMissVis", "pt_ttjj_puppi", "mt_tot_puppi", "mTdileptonMET_puppi",
    #    "NNrecoil_pt", "nnmet", "mt_1_nn", "mt_2_nn", "pt_tt_nn", "pZetaNNMissVis", "pt_ttjj_nn", "mt_tot_nn", "mTdileptonMET_nn",

        "metParToZ", "metPerpToZ",
        "puppimetParToZ", "puppimetPerpToZ",
    ]

    #if "mt" in args.channels:
    #    variables = [Variable(v,VariableBinning(binning["control"]["mt"][v]["bins"]), expression=binning["control"]["mt"][v]["expression"]) for v in variable_names]
    #    cuts = Cuts()
    #    for name, var in zip(variable_names, variables):
    #        mt_categories.append(
    #            Category(
    #                name,
    #                mt,
    #                cuts,
    #                variable=var))

    #if "et" in args.channels:
    #    variables = [Variable(v,VariableBinning(binning["control"]["et"][v]["bins"]), expression=binning["control"]["et"][v]["expression"]) for v in variable_names]
    #    cuts = Cuts()
    #    for name, var in zip(variable_names, variables):
    #        et_categories.append(
    #            Category(
    #                name,
    #                et,
    #                cuts,
    #                variable=var))

    #if "tt" in args.channels:
    #    variables = [Variable(v,VariableBinning(binning["control"]["tt"][v]["bins"]), expression=binning["control"]["tt"][v]["expression"]) for v in variable_names]
    #    cuts = Cuts()
    #    for name, var in zip(variable_names, variables):
    #        tt_categories.append(
    #            Category(
    #                name,
    #                tt,
    #                cuts,
    #                variable=var))

    #if "em" in args.channels:
    #    variables = [Variable(v,VariableBinning(binning["control"]["em"][v]["bins"]), expression=binning["control"]["em"][v]["expression"]) for v in variable_names]
    #    cuts = Cuts()
    #    for name, var in zip(variable_names, variables):
    #        em_categories.append(
    #            Category(
    #                name,
    #                em,
    #                cuts,
    #                variable=var))

    if "mm" in args.channels:
        variables = [Variable(v,VariableBinning(binning["control"]["mm"][v]["bins"]), expression=binning["control"]["mm"][v]["expression"]) for v in variable_names]
        variables.append(Variable("m_vis_high",ConstantBinning(19,50.0,1000.0),expression="m_vis"))
        variable_names.append("m_vis_high")
        cuts = Cuts()
        for name, var in zip(variable_names, variables):
            mm_categories.append(
                Category(
                    name,
                    mm,
                    cuts,
                    variable=var))
            mm_categories.append(
                Category(
                    name+"_peak",
                    mm,
                    Cuts(Cut("m_vis > 70 && m_vis < 110","m_vis_peak")),
                    variable=var))

    # Nominal histograms
    #if "mt" in args.channels:
    #    for process, category in product(mt_processes.values(), mt_categories):
    #        systematics_mt.add(
    #            Systematic(
    #                category=category,
    #                process=process,
    #                analysis="smhtt",
    #                era=era,
    #                variation=Nominal(),
    #                mass="125"))

    #if "et" in args.channels:
    #    for process, category in product(et_processes.values(), et_categories):
    #        systematics_et.add(
    #            Systematic(
    #                category=category,
    #                process=process,
    #                analysis="smhtt",
    #                era=era,
    #                variation=Nominal(),
    #                mass="125"))

    #if "tt" in args.channels:
    #    for process, category in product(tt_processes.values(), tt_categories):
    #        systematics_tt.add(
    #            Systematic(
    #                category=category,
    #                process=process,
    #                analysis="smhtt",
    #                era=era,
    #                variation=Nominal(),
    #                mass="125"))

    #if "em" in args.channels:
    #    for process, category in product(em_processes.values(), em_categories):
    #        systematics_em.add(
    #            Systematic(
    #                category=category,
    #                process=process,
    #                analysis="smhtt",
    #                era=era,
    #                variation=Nominal(),
    #                mass="125"))

    if "mm" in args.channels:
        for process, category in product(mm_processes.values(), mm_categories):
            systematics_mm.add(
                Systematic(
                    category=category,
                    process=process,
                    analysis="smhtt",
                    era=era,
                    variation=Nominal(),
                    mass="125"))


    # Produce histograms
    #if "mt" in args.channels: systematics_mt.produce()
    #if "et" in args.channels: systematics_et.produce()
    #if "tt" in args.channels: systematics_tt.produce()
    #if "em" in args.channels: systematics_em.produce()
    if "mm" in args.channels: systematics_mm.produce()
Пример #5
0
def main(args):
    # Write arparse arguments to YAML config
    logger.debug("Write argparse arguments to YAML config.")
    output_config = {}
    output_config["base_path"] = args.base_path
    output_config["output_path"] = args.output_path
    output_config["output_filename"] = args.output_filename
    output_config["tree_path"] = args.tree_path
    output_config["event_branch"] = args.event_branch
    output_config["training_weight_branch"] = args.training_weight_branch

    # Define era
    if "2016" in args.era:
        from shape_producer.estimation_methods_2016 import DataEstimation, HTTEstimation, ggHEstimation, qqHEstimation, VHEstimation, ZTTEstimation, ZTTEstimationTT, ZLEstimationMTSM, ZLEstimationETSM, ZLEstimationTT, ZJEstimationMT, ZJEstimationET, ZJEstimationTT, WEstimationRaw, TTTEstimationMT, TTTEstimationET, TTTEstimationTT, TTJEstimationMT, TTJEstimationET, TTJEstimationTT, VVEstimation, QCDEstimationMT, QCDEstimationET, QCDEstimationTT, ZTTEmbeddedEstimation, TTLEstimationMT, TTLEstimationET, TTLEstimationTT, TTTTEstimationMT, TTTTEstimationET, EWKWpEstimation, EWKWmEstimation, EWKZllEstimation, EWKZnnEstimation
        from shape_producer.era import Run2016
        era = Run2016(args.database)
    else:
        logger.fatal("Era {} is not implemented.".format(args.era))
        raise Exception

    ############################################################################

    # Channel: mt
    if args.channel == "mt":
        channel = MTSM()

        # Set up `processes` part of config
        output_config["processes"] = {}

        # Additional cuts
        additional_cuts = Cuts()
        logger.warning("Use additional cuts for mt: %s",
                       additional_cuts.expand())

        # MC-driven processes
        # NOTE: Define here the mappig of the process estimations to the training classes
        classes_map = {
            "ggH": "ggh",
            "qqH": "qqh",
            "ZTT": "ztt",
            "EMB": "ztt",
            "ZL": "zll",
            "ZJ": "zll",
            "TTT": "tt",
            "TTL": "tt",
            "TTJ": "tt",
            "W": "w",
            "EWKWp": "w",
            "EWKWm": "w",
            "VV": "misc",
            "EWKZll": "misc",
            "EWKZnn": "misc"
        }
        for estimation in [
                ggHEstimation(era, args.base_path, channel),
                qqHEstimation(era, args.base_path, channel),
                ZTTEstimation(era, args.base_path, channel),
                #ZTTEmbeddedEstimation(era, args.base_path, channel),
                ZLEstimationMTSM(era, args.base_path, channel),
                ZJEstimationMT(era, args.base_path, channel),
                TTTEstimationMT(era, args.base_path, channel),
                #TTLEstimationMT(era, args.base_path, channel),
                TTJEstimationMT(era, args.base_path, channel),
                WEstimationRaw(era, args.base_path, channel),
                EWKWpEstimation(era, args.base_path, channel),
                EWKWmEstimation(era, args.base_path, channel),
                VVEstimation(era, args.base_path, channel),
                EWKZllEstimation(era, args.base_path, channel),
                #EWKZnnEstimation(era, args.base_path, channel)
        ]:
            output_config["processes"][estimation.name] = {
                "files": [
                    str(f).replace(args.base_path + "/", "")
                    for f in estimation.get_files()
                ],
                "cut_string": (estimation.get_cuts() + channel.cuts +
                               additional_cuts).expand(),
                "weight_string":
                estimation.get_weights().extract(),
                "class":
                classes_map[estimation.name]
            }

        # Same sign selection for data-driven QCD
        estimation = DataEstimation(era, args.base_path, channel)
        estimation.name = "QCD"
        channel_ss = copy.deepcopy(channel)
        channel_ss.cuts.get("os").invert()
        output_config["processes"][estimation.name] = {
            "files": [
                str(f).replace(args.base_path + "/", "")
                for f in estimation.get_files()
            ],
            "cut_string": (estimation.get_cuts() + channel_ss.cuts +
                           additional_cuts).expand(),
            "weight_string":
            estimation.get_weights().extract(),
            "class":
            "ss"
        }

    ############################################################################

    # Channel: et
    if args.channel == "et":
        channel = ETSM()

        # Set up `processes` part of config
        output_config["processes"] = {}

        # Additional cuts
        additional_cuts = Cuts()
        logger.warning("Use additional cuts for et: %s",
                       additional_cuts.expand())

        # MC-driven processes
        # NOTE: Define here the mappig of the process estimations to the training classes
        classes_map = {
            "ggH": "ggh",
            "qqH": "qqh",
            "ZTT": "ztt",
            "EMB": "ztt",
            "ZL": "zll",
            "ZJ": "zll",
            "TTT": "tt",
            "TTL": "tt",
            "TTJ": "tt",
            "W": "w",
            "EWKWp": "w",
            "EWKWm": "w",
            "VV": "misc",
            "EWKZll": "misc",
            "EWKZnn": "misc"
        }
        for estimation in [
                ggHEstimation(era, args.base_path, channel),
                qqHEstimation(era, args.base_path, channel),
                ZTTEstimation(era, args.base_path, channel),
                #ZTTEmbeddedEstimation(era, args.base_path, channel),
                ZLEstimationETSM(era, args.base_path, channel),
                ZJEstimationET(era, args.base_path, channel),
                TTTEstimationET(era, args.base_path, channel),
                #TTLEstimationET(era, args.base_path, channel),
                TTJEstimationET(era, args.base_path, channel),
                WEstimationRaw(era, args.base_path, channel),
                EWKWpEstimation(era, args.base_path, channel),
                EWKWmEstimation(era, args.base_path, channel),
                VVEstimation(era, args.base_path, channel),
                EWKZllEstimation(era, args.base_path, channel),
                #EWKZnnEstimation(era, args.base_path, channel)
        ]:
            output_config["processes"][estimation.name] = {
                "files": [
                    str(f).replace(args.base_path + "/", "")
                    for f in estimation.get_files()
                ],
                "cut_string": (estimation.get_cuts() + channel.cuts +
                               additional_cuts).expand(),
                "weight_string":
                estimation.get_weights().extract(),
                "class":
                classes_map[estimation.name]
            }

        # Same sign selection for data-driven QCD
        estimation = DataEstimation(era, args.base_path, channel)
        estimation.name = "QCD"
        channel_ss = copy.deepcopy(channel)
        channel_ss.cuts.get("os").invert()
        output_config["processes"][estimation.name] = {
            "files": [
                str(f).replace(args.base_path + "/", "")
                for f in estimation.get_files()
            ],
            "cut_string": (estimation.get_cuts() + channel_ss.cuts +
                           additional_cuts).expand(),
            "weight_string":
            estimation.get_weights().extract(),
            "class":
            "ss"
        }

    ############################################################################

    # Channel: tt
    if args.channel == "tt":
        channel = TTSM()

        # Set up `processes` part of config
        output_config["processes"] = {}

        # Additional cuts
        additional_cuts = Cuts()
        logger.warning("Use additional cuts for tt: %s",
                       additional_cuts.expand())

        # MC-driven processes
        # NOTE: Define here the mappig of the process estimations to the training classes
        classes_map = {
            "ggH": "ggh",
            "qqH": "qqh",
            "ZTT": "ztt",
            "EMB": "ztt",
            "ZL": "misc",
            "ZJ": "misc",
            "TTT": "misc",
            "TTL": "misc",
            "TTJ": "misc",
            "W": "misc",
            "EWKWp": "misc",
            "EWKWm": "misc",
            "VV": "misc",
            "EWKZll": "misc",
            "EWKZnn": "misc"
        }
        for estimation in [
                ggHEstimation(era, args.base_path, channel),
                qqHEstimation(era, args.base_path, channel),
                ZTTEstimationTT(era, args.base_path, channel),
                #ZTTEmbeddedEstimation(era, args.base_path, channel),
                ZLEstimationTT(era, args.base_path, channel),
                ZJEstimationTT(era, args.base_path, channel),
                TTTEstimationTT(era, args.base_path, channel),
                #TTLEstimationTT(era, args.base_path, channel),
                TTJEstimationTT(era, args.base_path, channel),
                WEstimationRaw(era, args.base_path, channel),
                EWKWpEstimation(era, args.base_path, channel),
                EWKWmEstimation(era, args.base_path, channel),
                VVEstimation(era, args.base_path, channel),
                EWKZllEstimation(era, args.base_path, channel),
                #EWKZnnEstimation(era, args.base_path, channel)
        ]:
            output_config["processes"][estimation.name] = {
                "files": [
                    str(f).replace(args.base_path + "/", "")
                    for f in estimation.get_files()
                ],
                "cut_string": (estimation.get_cuts() + channel.cuts +
                               additional_cuts).expand(),
                "weight_string":
                estimation.get_weights().extract(),
                "class":
                classes_map[estimation.name]
            }

        # Same sign selection for data-driven QCD
        estimation = DataEstimation(era, args.base_path, channel)
        estimation.name = "QCD"
        channel_iso = copy.deepcopy(channel)
        channel_iso.cuts.remove("tau_2_iso")
        channel_iso.cuts.add(
            Cut("byTightIsolationMVArun2v1DBoldDMwLT_2<0.5", "tau_2_iso"))
        channel_iso.cuts.add(
            Cut("byLooseIsolationMVArun2v1DBoldDMwLT_2>0.5",
                "tau_2_iso_loose"))
        output_config["processes"][estimation.name] = {
            "files": [
                str(f).replace(args.base_path + "/", "")
                for f in estimation.get_files()
            ],
            "cut_string": (estimation.get_cuts() + channel_iso.cuts +
                           additional_cuts).expand(),
            "weight_string":
            estimation.get_weights().extract(),
            "class":
            "noniso"
        }

    ############################################################################

    # Write output config
    logger.info("Write config to file: {}".format(args.output_config))
    yaml.dump(output_config,
              open(args.output_config, 'w'),
              default_flow_style=False)
Пример #6
0
def main(args):
    # Define era and channel
    era = Run2016(args.datasets)

    if "et" in args.channel:
        channel = ETSM()
    elif "mt" in args.channel:
        channel = MTSM()
    elif "tt" in args.channel:
        channel = TTSM()
    else:
        logger.fatal("Channel %s not known.", args.channel)
        raise Exception
    logger.debug("Use channel %s.", args.channel)

    # Get cut string
    estimation = DataEstimation(era, args.directory, channel)
    cut_string = (estimation.get_cuts() + channel.cuts).expand()
    logger.debug("Data cut string: %s", cut_string)

    # Get chain
    tree_path = "{}_nominal/ntuple".format(args.channel)
    logger.debug("Use tree path %s to get tree.", tree_path)

    files = [str(f) for f in estimation.get_files()]
    chain = ROOT.TChain()
    for i, f in enumerate(files):
        base = os.path.basename(f).replace(".root", "")
        f_friend = os.path.join(args.artus_friends, base,
                                base + ".root") + "/" + tree_path
        logger.debug("Add file with scores %d: %s", i, f_friend)
        chain.Add(f_friend)
        logger.debug("Add friend with ntuple %d: %s", i, f)
        chain.AddFriend(tree_path, f)

    chain_numentries = chain.GetEntries()
    if not chain_numentries > 0:
        logger.fatal("Chain (before skimming) does not contain any events.")
        raise Exception
    logger.debug("Found %s events before skimming with cut string.",
                 chain_numentries)

    # Skim chain
    chain_skimmed = chain.CopyTree(cut_string)
    chain_skimmed_numentries = chain_skimmed.GetEntries()

    if not chain_skimmed_numentries > 0:
        logger.fatal("Chain (after skimming) does not contain any events.")
        raise Exception
    logger.debug("Found %s events after skimming with cut string.",
                 chain_skimmed_numentries)

    # Calculate binning
    logger.debug("Load classes from config %s.", args.training_config)
    classes = yaml.load(open(args.training_config))["classes"]
    logger.debug("Use classes %s.", classes)
    scores = [[] for c in classes]
    for event in chain_skimmed:
        max_score = float(getattr(event, args.channel + "_max_score"))
        max_index = int(getattr(event, args.channel + "_max_index"))
        scores[max_index].append(max_score)

    binning = {}
    percentiles = range(0, 105, 5)
    logger.debug("Use percentiles %s for binning.", percentiles)
    for i, name in enumerate(classes):
        logger.debug("Process class %s.", name)
        x = scores[i] + [1.0 / float(len(classes)), 1.0]
        logger.debug("Found %s events in class %s.", len(x), name)
        binning[name] = [float(x) for x in np.percentile(x, percentiles)]

    # Write binning to output
    config = yaml.load(open(args.output))
    config["analysis"][args.channel] = binning
    logger.info("Write binning to %s.", args.output)
    yaml.dump(config, open(args.output, "w"))
Пример #7
0
def main(args):
    # Container for all distributions to be drawn
    logger.info("Set up shape variations.")
    systematics = Systematics("{}_shapes.root".format(args.tag),
                              num_threads=args.num_threads)

    # Era selection
    if "2016" in args.era:
        from shape_producer.estimation_methods_2016 import DataEstimation, HTTEstimation, ggHEstimation, ggHEstimation_0J, ggHEstimation_1J, ggHEstimation_GE2J, ggHEstimation_VBFTOPO, qqHEstimation, qqHEstimation_VBFTOPO_JET3VETO, qqHEstimation_VBFTOPO_JET3, qqHEstimation_REST, qqHEstimation_PTJET1_GT200, VHEstimation, ZTTEstimation, ZTTEstimationTT, ZLEstimationMTSM, ZLEstimationETSM, ZLEstimationTT, ZJEstimationMT, ZJEstimationET, ZJEstimationTT, WEstimation, TTTEstimationMT, TTTEstimationET, TTTEstimationTT, TTJEstimationMT, TTJEstimationET, TTJEstimationTT, VVEstimation, EWKZEstimation, QCDEstimationMT, QCDEstimationET, QCDEstimationTT, ZTTEmbeddedEstimation, TTLEstimationMT, TTLEstimationET, TTLEstimationTT, TTTTEstimationMT, TTTTEstimationET
        from shape_producer.era import Run2016
        era = Run2016(args.datasets)
    else:
        logger.critical("Era {} is not implemented.".format(args.era))
        raise Exception

    # Channels and processes
    # yapf: disable
    directory = args.directory
    et_friend_directory = args.et_friend_directory
    mt_friend_directory = args.mt_friend_directory
    tt_friend_directory = args.tt_friend_directory
    mt = MTSM()
    if args.QCD_extrap_fit:
        mt.cuts.remove("muon_iso")
        mt.cuts.add(Cut("(iso_1<0.5)*(iso_1>=0.15)", "muon_iso_loose"))
    if args.embedding:
        mt.cuts.remove("trg_singlemuoncross")
        mt.cuts.add(Cut("(trg_singlemuon==1 && pt_1>23 && pt_2>30)", "trg_singlemuon"))
    mt_processes = {
        "data"  : Process("data_obs", DataEstimation  (era, directory, mt, friend_directory=mt_friend_directory)),
        "HTT"   : Process("HTT",      HTTEstimation   (era, directory, mt, friend_directory=mt_friend_directory)),
        "ggH"   : Process("ggH125",   ggHEstimation   (era, directory, mt, friend_directory=mt_friend_directory)),
        "qqH"   : Process("qqH125",   qqHEstimation   (era, directory, mt, friend_directory=mt_friend_directory)),
        "ggH_0J"               : Process("ggH125_0J",               ggHEstimation_0J              (era, directory, mt, friend_directory=mt_friend_directory)),
        "ggH_1J"               : Process("ggH125_1J",               ggHEstimation_1J              (era, directory, mt, friend_directory=mt_friend_directory)),
        "ggH_GE2J"             : Process("ggH125_GE2J",             ggHEstimation_GE2J            (era, directory, mt, friend_directory=mt_friend_directory)),
        "ggH_VBFTOPO"          : Process("ggH125_VBFTOPO",          ggHEstimation_VBFTOPO         (era, directory, mt, friend_directory=mt_friend_directory)),
        "qqH"                  : Process("qqH125",                  qqHEstimation                 (era, directory, mt, friend_directory=mt_friend_directory)),
        "qqH_VBFTOPO_JET3VETO" : Process("qqH125_VBFTOPO_JET3VETO", qqHEstimation_VBFTOPO_JET3VETO(era, directory, mt, friend_directory=mt_friend_directory)),
        "qqH_VBFTOPO_JET3"     : Process("qqH125_VBFTOPO_JET3",     qqHEstimation_VBFTOPO_JET3    (era, directory, mt, friend_directory=mt_friend_directory)),
        "qqH_REST"             : Process("qqH125_REST",             qqHEstimation_REST            (era, directory, mt, friend_directory=mt_friend_directory)),
        "qqH_PTJET1_GT200"     : Process("qqH125_PTJET1_GT200",     qqHEstimation_PTJET1_GT200    (era, directory, mt, friend_directory=mt_friend_directory)),
        "VH"    : Process("VH125",    VHEstimation    (era, directory, mt, friend_directory=mt_friend_directory)),
        "ZTT"   : Process("ZTT",      ZTTEstimation   (era, directory, mt, friend_directory=mt_friend_directory)),
        "ZL"    : Process("ZL",       ZLEstimationMTSM(era, directory, mt, friend_directory=mt_friend_directory)),
        "ZJ"    : Process("ZJ",       ZJEstimationMT  (era, directory, mt, friend_directory=mt_friend_directory)),
        "W"     : Process("W",        WEstimation     (era, directory, mt, friend_directory=mt_friend_directory)),
        "TTT"   : Process("TTT",      TTTEstimationMT (era, directory, mt, friend_directory=mt_friend_directory)),
        "TTJ"   : Process("TTJ",      TTJEstimationMT (era, directory, mt, friend_directory=mt_friend_directory)),
        "VV"    : Process("VV",       VVEstimation    (era, directory, mt, friend_directory=mt_friend_directory)),
        "EWKZ"  : Process("EWKZ",     EWKZEstimation  (era, directory, mt, friend_directory=mt_friend_directory))
        }
    if args.embedding:
        mt_processes["ZTT"] = Process("ZTT", ZTTEmbeddedEstimation(era, directory, mt, friend_directory=mt_friend_directory))
        mt_processes["TTT"] = Process("TTT", TTLEstimationMT (era, directory, mt, friend_directory=mt_friend_directory))
    mt_processes["QCD"] = Process("QCD", QCDEstimationMT(era, directory, mt, [mt_processes[process] for process in ["ZTT", "ZJ", "ZL", "W", "TTT", "TTJ", "VV", "EWKZ"]], mt_processes["data"], extrapolation_factor=1.17))
    et = ETSM()
    if args.QCD_extrap_fit:
        et.cuts.remove("ele_iso")
        et.cuts.add(Cut("(iso_1<0.5)*(iso_1>=0.1)", "ele_iso_loose"))
    et_processes = {
        "data"  : Process("data_obs", DataEstimation  (era, directory, et, friend_directory=et_friend_directory)),
        "HTT"   : Process("HTT",      HTTEstimation   (era, directory, et, friend_directory=et_friend_directory)),
        "ggH"   : Process("ggH125",   ggHEstimation   (era, directory, et, friend_directory=et_friend_directory)),
        "qqH"   : Process("qqH125",   qqHEstimation   (era, directory, et, friend_directory=et_friend_directory)),
        "ggH_0J"               : Process("ggH125_0J",               ggHEstimation_0J              (era, directory, et, friend_directory=et_friend_directory)),
        "ggH_1J"               : Process("ggH125_1J",               ggHEstimation_1J              (era, directory, et, friend_directory=et_friend_directory)),
        "ggH_GE2J"             : Process("ggH125_GE2J",             ggHEstimation_GE2J            (era, directory, et, friend_directory=et_friend_directory)),
        "ggH_VBFTOPO"          : Process("ggH125_VBFTOPO",          ggHEstimation_VBFTOPO         (era, directory, et, friend_directory=et_friend_directory)),
        "qqH"                  : Process("qqH125",                  qqHEstimation                 (era, directory, et, friend_directory=et_friend_directory)),
        "qqH_VBFTOPO_JET3VETO" : Process("qqH125_VBFTOPO_JET3VETO", qqHEstimation_VBFTOPO_JET3VETO(era, directory, et, friend_directory=et_friend_directory)),
        "qqH_VBFTOPO_JET3"     : Process("qqH125_VBFTOPO_JET3",     qqHEstimation_VBFTOPO_JET3    (era, directory, et, friend_directory=et_friend_directory)),
        "qqH_REST"             : Process("qqH125_REST",             qqHEstimation_REST            (era, directory, et, friend_directory=et_friend_directory)),
        "qqH_PTJET1_GT200"     : Process("qqH125_PTJET1_GT200",     qqHEstimation_PTJET1_GT200    (era, directory, et, friend_directory=et_friend_directory)),
        "VH"    : Process("VH125",    VHEstimation    (era, directory, et, friend_directory=et_friend_directory)),
        "ZTT"   : Process("ZTT",      ZTTEstimation   (era, directory, et, friend_directory=et_friend_directory)),
        "ZL"    : Process("ZL",       ZLEstimationETSM(era, directory, et, friend_directory=et_friend_directory)),
        "ZJ"    : Process("ZJ",       ZJEstimationET  (era, directory, et, friend_directory=et_friend_directory)),
        "W"     : Process("W",        WEstimation     (era, directory, et, friend_directory=et_friend_directory)),
        "TTT"   : Process("TTT",      TTTEstimationET (era, directory, et, friend_directory=et_friend_directory)),
        "TTJ"   : Process("TTJ",      TTJEstimationET (era, directory, et, friend_directory=et_friend_directory)),
        "VV"    : Process("VV",       VVEstimation    (era, directory, et, friend_directory=et_friend_directory)),
        "EWKZ"  : Process("EWKZ",     EWKZEstimation  (era, directory, et, friend_directory=et_friend_directory))
        }
    if args.embedding:
        et_processes["ZTT"] = Process("ZTT", ZTTEmbeddedEstimation(era, directory, et, friend_directory=et_friend_directory))
        et_processes["TTT"] = Process("TTT", TTLEstimationET (era, directory, et, friend_directory=et_friend_directory))
    et_processes["QCD"] = Process("QCD", QCDEstimationET(era, directory, et, [et_processes[process] for process in ["ZTT", "ZJ", "ZL", "W", "TTT", "TTJ", "VV", "EWKZ"]], et_processes["data"], extrapolation_factor=1.16))
    tt = TTSM()
    if args.QCD_extrap_fit:
        tt.cuts.get("os").invert()
    if args.HIG16043:
        tt.cuts.remove("pt_h")
    tt_processes = {
        "data"  : Process("data_obs", DataEstimation (era, directory, tt, friend_directory=tt_friend_directory)),
        "HTT"   : Process("HTT",      HTTEstimation  (era, directory, tt, friend_directory=tt_friend_directory)),
        "ggH"   : Process("ggH125",   ggHEstimation  (era, directory, tt, friend_directory=tt_friend_directory)),
        "qqH"   : Process("qqH125",   qqHEstimation  (era, directory, tt, friend_directory=tt_friend_directory)),
        "ggH_0J"               : Process("ggH125_0J",               ggHEstimation_0J              (era, directory, tt, friend_directory=tt_friend_directory)),
        "ggH_1J"               : Process("ggH125_1J",               ggHEstimation_1J              (era, directory, tt, friend_directory=tt_friend_directory)),
        "ggH_GE2J"             : Process("ggH125_GE2J",             ggHEstimation_GE2J            (era, directory, tt, friend_directory=tt_friend_directory)),
        "ggH_VBFTOPO"          : Process("ggH125_VBFTOPO",          ggHEstimation_VBFTOPO         (era, directory, tt, friend_directory=tt_friend_directory)),
        "qqH"                  : Process("qqH125",                  qqHEstimation                 (era, directory, tt, friend_directory=tt_friend_directory)),
        "qqH_VBFTOPO_JET3VETO" : Process("qqH125_VBFTOPO_JET3VETO", qqHEstimation_VBFTOPO_JET3VETO(era, directory, tt, friend_directory=tt_friend_directory)),
        "qqH_VBFTOPO_JET3"     : Process("qqH125_VBFTOPO_JET3",     qqHEstimation_VBFTOPO_JET3    (era, directory, tt, friend_directory=tt_friend_directory)),
        "qqH_REST"             : Process("qqH125_REST",             qqHEstimation_REST            (era, directory, tt, friend_directory=tt_friend_directory)),
        "qqH_PTJET1_GT200"     : Process("qqH125_PTJET1_GT200",     qqHEstimation_PTJET1_GT200    (era, directory, tt, friend_directory=tt_friend_directory)),
        "VH"    : Process("VH125",    VHEstimation   (era, directory, tt, friend_directory=tt_friend_directory)),
        "ZTT"   : Process("ZTT",      ZTTEstimationTT(era, directory, tt, friend_directory=tt_friend_directory)),
        "ZL"    : Process("ZL",       ZLEstimationTT (era, directory, tt, friend_directory=tt_friend_directory)),
        "ZJ"    : Process("ZJ",       ZJEstimationTT (era, directory, tt, friend_directory=tt_friend_directory)),
        "W"     : Process("W",        WEstimation    (era, directory, tt, friend_directory=tt_friend_directory)),
        "TTT"   : Process("TTT",      TTTEstimationTT(era, directory, tt, friend_directory=tt_friend_directory)),
        "TTJ"   : Process("TTJ",      TTJEstimationTT(era, directory, tt, friend_directory=tt_friend_directory)),
        "VV"    : Process("VV",       VVEstimation   (era, directory, tt, friend_directory=tt_friend_directory)),
        "EWKZ"  : Process("EWKZ",     EWKZEstimation (era, directory, tt, friend_directory=tt_friend_directory)),
        }
    if args.embedding:
        tt_processes["ZTT"] = Process("ZTT", ZTTEmbeddedEstimation(era, directory, tt, friend_directory=tt_friend_directory))
        tt_processes["TTT"] = Process("TTT", TTLEstimationTT (era, directory, tt, friend_directory=tt_friend_directory))

    tt_processes["QCD"] = Process("QCD", QCDEstimationTT(era, directory, tt, [tt_processes[process] for process in ["ZTT", "ZJ", "ZL", "W", "TTT", "TTJ", "VV", "EWKZ"]], tt_processes["data"]))

    # Variables and categories
    binning = yaml.load(open(args.binning))

    et_categories = []
    # HIG16043 shapes
    if "et" in args.channels and args.HIG16043:
        for category in ["0jet", "vbf", "boosted"]:
            variable = Variable(
                    binning["HIG16043"]["et"][category]["variable"],
                    VariableBinning(binning["HIG16043"]["et"][category]["binning"]),
                    expression=binning["HIG16043"]["et"][category]["expression"])
            et_categories.append(
                Category(
                    category,
                    et,
                    Cuts(
                        Cut(binning["HIG16043"]["et"][category]["cut_unrolling"],
                            "et_cut_unrolling_{}".format(category)),
                        Cut(binning["HIG16043"]["et"][category]["cut_category"],
                            "et_cut_category_{}".format(category))
                        ),
                    variable=variable))
    # Analysis shapes
    elif "et" in args.channels:
        for i, label in enumerate(["ggh", "qqh", "ztt", "zll", "w", "tt", "ss", "misc"]):
            score = Variable(
                "et_max_score",
                 VariableBinning(binning["analysis"]["et"][label]))
            et_categories.append(
                Category(
                    label,
                    et,
                    Cuts(
                        Cut("et_max_index=={index}".format(index=i), "exclusive_score")),
                    variable=score))
    # Goodness of fit shapes
    elif "et" == args.gof_channel:
        score = Variable(
                args.gof_variable,
                VariableBinning(binning["gof"]["et"][args.gof_variable]["bins"]),
                expression=binning["gof"]["et"][args.gof_variable]["expression"])
        if "cut" in binning["gof"]["et"][args.gof_variable].keys():
            cuts=Cuts(Cut(binning["gof"]["et"][args.gof_variable]["cut"], "binning"))
        else:
            cuts=Cuts()
        et_categories.append(
            Category(
                args.gof_variable,
                et,
                cuts,
                variable=score))

    mt_categories = []
    # HIG16043 shapes
    if "mt" in args.channels and args.HIG16043:
        for category in ["0jet", "vbf", "boosted"]:
            variable = Variable(
                    binning["HIG16043"]["mt"][category]["variable"],
                    VariableBinning(binning["HIG16043"]["mt"][category]["binning"]),
                    expression=binning["HIG16043"]["mt"][category]["expression"])
            mt_categories.append(
                Category(
                    category,
                    mt,
                    Cuts(
                        Cut(binning["HIG16043"]["mt"][category]["cut_unrolling"],
                            "mt_cut_unrolling_{}".format(category)),
                        Cut(binning["HIG16043"]["mt"][category]["cut_category"],
                            "mt_cut_category_{}".format(category))
                        ),
                    variable=variable))
    # Analysis shapes
    elif "mt" in args.channels:
        for i, label in enumerate(["ggh", "qqh", "ztt", "zll", "w", "tt", "ss", "misc"]):
            score = Variable(
                "mt_max_score",
                 VariableBinning(binning["analysis"]["mt"][label]))
            mt_categories.append(
                Category(
                    label,
                    mt,
                    Cuts(
                        Cut("mt_max_index=={index}".format(index=i), "exclusive_score")),
                    variable=score))
    # Goodness of fit shapes
    elif args.gof_channel == "mt":
        score = Variable(
                args.gof_variable,
                VariableBinning(binning["gof"]["mt"][args.gof_variable]["bins"]),
                expression=binning["gof"]["mt"][args.gof_variable]["expression"])
        if "cut" in binning["gof"]["mt"][args.gof_variable].keys():
            cuts=Cuts(Cut(binning["gof"]["mt"][args.gof_variable]["cut"], "binning"))
        else:
            cuts=Cuts()
        mt_categories.append(
            Category(
                args.gof_variable,
                mt,
                cuts,
                variable=score))

    tt_categories = []
    # HIG16043 shapes
    if "tt" in args.channels and args.HIG16043:
        for category in ["0jet", "vbf", "boosted"]:
            variable = Variable(
                    binning["HIG16043"]["tt"][category]["variable"],
                    VariableBinning(binning["HIG16043"]["tt"][category]["binning"]),
                    expression=binning["HIG16043"]["tt"][category]["expression"])
            tt_categories.append(
                Category(
                    category,
                    tt,
                    Cuts(
                        Cut(binning["HIG16043"]["tt"][category]["cut_unrolling"],
                            "tt_cut_unrolling_{}".format(category)),
                        Cut(binning["HIG16043"]["tt"][category]["cut_category"],
                            "tt_cut_category_{}".format(category))
                        ),
                    variable=variable))
    # Analysis shapes
    elif "tt" in args.channels:
        for i, label in enumerate(["ggh", "qqh", "ztt", "noniso", "misc"]):
            score = Variable(
                "tt_max_score",
                 VariableBinning(binning["analysis"]["tt"][label]))
            tt_categories.append(
                Category(
                    label,
                    tt,
                    Cuts(
                        Cut("tt_max_index=={index}".format(index=i), "exclusive_score")),
                    variable=score))
    # Goodness of fit shapes
    elif args.gof_channel == "tt":
        score = Variable(
                args.gof_variable,
                VariableBinning(binning["gof"]["tt"][args.gof_variable]["bins"]),
                expression=binning["gof"]["tt"][args.gof_variable]["expression"])
        if "cut" in binning["gof"]["tt"][args.gof_variable].keys():
            cuts=Cuts(Cut(binning["gof"]["tt"][args.gof_variable]["cut"], "binning"))
        else:
            cuts=Cuts()
        tt_categories.append(
            Category(
                args.gof_variable,
                tt,
                cuts,
                variable=score))

    # Nominal histograms
    # yapf: enable
    if "et" in [args.gof_channel] + args.channels:
        for process, category in product(et_processes.values(), et_categories):
            systematics.add(
                Systematic(category=category,
                           process=process,
                           analysis="smhtt",
                           era=era,
                           variation=Nominal(),
                           mass="125"))

    if "mt" in [args.gof_channel] + args.channels:
        for process, category in product(mt_processes.values(), mt_categories):
            systematics.add(
                Systematic(category=category,
                           process=process,
                           analysis="smhtt",
                           era=era,
                           variation=Nominal(),
                           mass="125"))
    if "tt" in [args.gof_channel] + args.channels:
        for process, category in product(tt_processes.values(), tt_categories):
            systematics.add(
                Systematic(category=category,
                           process=process,
                           analysis="smhtt",
                           era=era,
                           variation=Nominal(),
                           mass="125"))

    # Shapes variations

    # Tau energy scale
    tau_es_3prong_variations = create_systematic_variations(
        "CMS_scale_t_3prong_13TeV", "tauEsThreeProng", DifferentPipeline)
    tau_es_1prong_variations = create_systematic_variations(
        "CMS_scale_t_1prong_13TeV", "tauEsOneProng", DifferentPipeline)
    tau_es_1prong1pizero_variations = create_systematic_variations(
        "CMS_scale_t_1prong1pizero_13TeV", "tauEsOneProngPiZeros",
        DifferentPipeline)
    for variation in tau_es_3prong_variations + tau_es_1prong_variations + tau_es_1prong1pizero_variations:
        for process_nick in [
                "HTT", "VH", "ggH", "ggH_0J", "ggH_1J", "ggH_GE2J",
                "ggH_VBFTOPO", "qqH", "qqH_VBFTOPO_JET3VETO",
                "qqH_VBFTOPO_JET3", "qqH_REST", "qqH_PTJET1_GT200", "ZTT",
                "TTT", "VV", "EWKZ"
        ]:
            if "et" in [args.gof_channel] + args.channels:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=et_processes[process_nick],
                    channel=et,
                    era=era)
            if "mt" in [args.gof_channel] + args.channels:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=mt_processes[process_nick],
                    channel=mt,
                    era=era)
            if "tt" in [args.gof_channel] + args.channels:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=tt_processes[process_nick],
                    channel=tt,
                    era=era)

    # Jet energy scale
    jet_es_variations = create_systematic_variations("CMS_scale_j_13TeV",
                                                     "jecUnc",
                                                     DifferentPipeline)
    for variation in jet_es_variations:
        for process_nick in [
                "HTT", "VH", "ggH", "ggH_0J", "ggH_1J", "ggH_GE2J",
                "ggH_VBFTOPO", "qqH", "qqH_VBFTOPO_JET3VETO",
                "qqH_VBFTOPO_JET3", "qqH_REST", "qqH_PTJET1_GT200", "ZTT",
                "ZL", "ZJ", "W", "TTT", "TTJ", "VV", "EWKZ"
        ]:
            if args.embedding and process_nick == 'ZTT':
                continue
            if "et" in [args.gof_channel] + args.channels:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=et_processes[process_nick],
                    channel=et,
                    era=era)
            if "mt" in [args.gof_channel] + args.channels:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=mt_processes[process_nick],
                    channel=mt,
                    era=era)
            if "tt" in [args.gof_channel] + args.channels:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=tt_processes[process_nick],
                    channel=tt,
                    era=era)

    # MET energy scale
    met_unclustered_variations = create_systematic_variations(
        "CMS_scale_met_unclustered_13TeV", "metUnclusteredEn",
        DifferentPipeline)
    met_clustered_variations = create_systematic_variations(
        "CMS_scale_met_clustered_13TeV", "metJetEn", DifferentPipeline)
    for variation in met_unclustered_variations + met_clustered_variations:
        for process_nick in [
                "HTT", "VH", "ggH", "ggH_0J", "ggH_1J", "ggH_GE2J",
                "ggH_VBFTOPO", "qqH", "qqH_VBFTOPO_JET3VETO",
                "qqH_VBFTOPO_JET3", "qqH_REST", "qqH_PTJET1_GT200", "ZTT",
                "ZL", "ZJ", "W", "TTT", "TTJ", "VV", "EWKZ"
        ]:
            if args.embedding and process_nick == 'ZTT':
                continue
            if "et" in [args.gof_channel] + args.channels:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=et_processes[process_nick],
                    channel=et,
                    era=era)
            if "mt" in [args.gof_channel] + args.channels:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=mt_processes[process_nick],
                    channel=mt,
                    era=era)
            if "tt" in [args.gof_channel] + args.channels:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=tt_processes[process_nick],
                    channel=tt,
                    era=era)

    # Z pt reweighting
    zpt_variations = create_systematic_variations("CMS_htt_dyShape_13TeV",
                                                  "zPtReweightWeight",
                                                  SquareAndRemoveWeight)
    for variation in zpt_variations:
        for process_nick in ["ZTT", "ZL", "ZJ"]:
            if args.embedding and process_nick == 'ZTT':
                continue
            if "et" in [args.gof_channel] + args.channels:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=et_processes[process_nick],
                    channel=et,
                    era=era)
            if "mt" in [args.gof_channel] + args.channels:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=mt_processes[process_nick],
                    channel=mt,
                    era=era)
            if "tt" in [args.gof_channel] + args.channels:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=tt_processes[process_nick],
                    channel=tt,
                    era=era)

    # top pt reweighting
    top_pt_variations = create_systematic_variations(
        "CMS_htt_ttbarShape_13TeV", "topPtReweightWeight",
        SquareAndRemoveWeight)
    for variation in top_pt_variations:
        for process_nick in ["TTT", "TTJ"]:
            if "et" in [args.gof_channel] + args.channels:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=et_processes[process_nick],
                    channel=et,
                    era=era)
            if "mt" in [args.gof_channel] + args.channels:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=mt_processes[process_nick],
                    channel=mt,
                    era=era)
            if "tt" in [args.gof_channel] + args.channels:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=tt_processes[process_nick],
                    channel=tt,
                    era=era)

    # jet to tau fake efficiency

    jet_to_tau_fake_variations = []
    jet_to_tau_fake_variations.append(
        AddWeight("CMS_htt_jetToTauFake_13TeV", "jetToTauFake_weight",
                  Weight("(1.0+pt_2*0.002)", "jetToTauFake_weight"), "Up"))
    jet_to_tau_fake_variations.append(
        AddWeight("CMS_htt_jetToTauFake_13TeV", "jetToTauFake_weight",
                  Weight("(1.0-pt_2*0.002)", "jetToTauFake_weight"), "Down"))
    for variation in jet_to_tau_fake_variations:
        for process_nick in ["ZJ", "TTJ", "W"]:
            if "et" in [args.gof_channel] + args.channels:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=et_processes[process_nick],
                    channel=et,
                    era=era)
            if "mt" in [args.gof_channel] + args.channels:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=mt_processes[process_nick],
                    channel=mt,
                    era=era)
            if "tt" in [args.gof_channel] + args.channels:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=tt_processes[process_nick],
                    channel=tt,
                    era=era)

    # Zll reweighting
    zll_et_weight_variations = []
    zll_et_weight_variations.append(
        ReplaceWeight(
            "CMS_htt_eFakeTau_1prong_13TeV", "decay_mode_reweight",
            Weight(
                "(((decayMode_2 == 0)*0.98*1.12) + ((decayMode_2 == 1 || decayMode_2 == 2)*1.2) + ((decayMode_2 == 10)*1.0))",
                "decay_mode_reweight"), "Up"))
    zll_et_weight_variations.append(
        ReplaceWeight(
            "CMS_htt_eFakeTau_1prong_13TeV", "decay_mode_reweight",
            Weight(
                "(((decayMode_2 == 0)*0.98*0.88) + ((decayMode_2 == 1 || decayMode_2 == 2)*1.2) + ((decayMode_2 == 10)*1.0))",
                "decay_mode_reweight"), "Down"))
    zll_et_weight_variations.append(
        ReplaceWeight(
            "CMS_htt_eFakeTau_1prong1pizero_13TeV", "decay_mode_reweight",
            Weight(
                "(((decayMode_2 == 0)*0.98) + ((decayMode_2 == 1 || decayMode_2 == 2)*1.2*1.12) + ((decayMode_2 == 10)*1.0))",
                "decay_mode_reweight"), "Up"))
    zll_et_weight_variations.append(
        ReplaceWeight(
            "CMS_htt_eFakeTau_1prong1pizero_13TeV", "decay_mode_reweight",
            Weight(
                "(((decayMode_2 == 0)*0.98) + ((decayMode_2 == 1 || decayMode_2 == 2)*1.2*0.88) + ((decayMode_2 == 10)*1.0))",
                "decay_mode_reweight"), "Down"))
    for variation in zll_et_weight_variations:
        for process_nick in ["ZL"]:
            if "et" in [args.gof_channel] + args.channels:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=et_processes[process_nick],
                    channel=et,
                    era=era)
    zll_mt_weight_variations = []
    zll_mt_weight_variations.append(
        ReplaceWeight(
            "CMS_htt_mFakeTau_1prong_13TeV", "decay_mode_reweight",
            Weight(
                "(((decayMode_2 == 0)*0.75*1.25) + ((decayMode_2 == 1 || decayMode_2 == 2)*1.0) + ((decayMode_2 == 10)*1.0))",
                "decay_mode_reweight"), "Up"))
    zll_mt_weight_variations.append(
        ReplaceWeight(
            "CMS_htt_mFakeTau_1prong_13TeV", "decay_mode_reweight",
            Weight(
                "(((decayMode_2 == 0)*0.75*0.75) + ((decayMode_2 == 1 || decayMode_2 == 2)*1.0) + ((decayMode_2 == 10)*1.0))",
                "decay_mode_reweight"), "Down"))
    zll_mt_weight_variations.append(
        ReplaceWeight(
            "CMS_htt_mFakeTau_1prong1pizero_13TeV", "decay_mode_reweight",
            Weight(
                "(((decayMode_2 == 0)*0.75) + ((decayMode_2 == 1 || decayMode_2 == 2)*1.25) + ((decayMode_2 == 10)*1.0))",
                "decay_mode_reweight"), "Up"))
    zll_mt_weight_variations.append(
        ReplaceWeight(
            "CMS_htt_mFakeTau_1prong1pizero_13TeV", "decay_mode_reweight",
            Weight(
                "(((decayMode_2 == 0)*0.75) + ((decayMode_2 == 1 || decayMode_2 == 2)*0.75) + ((decayMode_2 == 10)*1.0))",
                "decay_mode_reweight"), "Down"))
    for variation in zll_mt_weight_variations:
        for process_nick in ["ZL"]:
            if "mt" in [args.gof_channel] + args.channels:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=mt_processes[process_nick],
                    channel=mt,
                    era=era)

    # b tagging
    btag_eff_variations = create_systematic_variations("CMS_htt_eff_b_13TeV",
                                                       "btagEff",
                                                       DifferentPipeline)
    mistag_eff_variations = create_systematic_variations(
        "CMS_htt_mistag_b_13TeV", "btagMistag", DifferentPipeline)
    for variation in btag_eff_variations + mistag_eff_variations:
        for process_nick in [
                "HTT", "VH", "ggH", "ggH_0J", "ggH_1J", "ggH_GE2J",
                "ggH_VBFTOPO", "qqH", "qqH_VBFTOPO_JET3VETO",
                "qqH_VBFTOPO_JET3", "qqH_REST", "qqH_PTJET1_GT200", "ZTT",
                "ZL", "ZJ", "W", "TTT", "TTJ", "VV", "EWKZ"
        ]:
            if args.embedding and process_nick == 'ZTT':
                continue
            if "et" in [args.gof_channel] + args.channels:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=et_processes[process_nick],
                    channel=et,
                    era=era)
            if "mt" in [args.gof_channel] + args.channels:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=mt_processes[process_nick],
                    channel=mt,
                    era=era)
            if "tt" in [args.gof_channel] + args.channels:
                systematics.add_systematic_variation(
                    variation=variation,
                    process=tt_processes[process_nick],
                    channel=tt,
                    era=era)
    if args.embedding:
        # Embedded event specifics

        # 10% removed events in ttbar simulation (ttbar -> real tau tau events) will be added/subtracted to ZTT shape to use as systematic
        tttautau_process_mt = Process(
            "TTTT",
            TTTTEstimationMT(era,
                             directory,
                             mt,
                             friend_directory=mt_friend_directory))
        tttautau_process_et = Process(
            "TTTT",
            TTTTEstimationET(era,
                             directory,
                             et,
                             friend_directory=et_friend_directory))
        tttautau_process_tt = Process(
            "TTTT",
            TTTEstimationTT(era,
                            directory,
                            tt,
                            friend_directory=tt_friend_directory))
        if 'mt' in [args.gof_channel] + args.channels:
            for category in mt_categories:
                mt_processes['ZTTpTTTauTauDown'] = Process(
                    "ZTTpTTTauTauDown",
                    AddHistogramEstimationMethod(
                        "AddHistogram", "nominal", era, directory, mt,
                        [mt_processes["ZTT"], tttautau_process_mt],
                        [1.0, -0.1]))
                systematics.add(
                    Systematic(category=category,
                               process=mt_processes['ZTTpTTTauTauDown'],
                               analysis="smhtt",
                               era=era,
                               variation=Relabel("CMS_htt_emb_ttbar", "Down"),
                               mass="125"))

                mt_processes['ZTTpTTTauTauUp'] = Process(
                    "ZTTpTTTauTauUp",
                    AddHistogramEstimationMethod(
                        "AddHistogram", "nominal", era, directory, mt,
                        [mt_processes["ZTT"], tttautau_process_mt],
                        [1.0, 0.1]))
                systematics.add(
                    Systematic(category=category,
                               process=mt_processes['ZTTpTTTauTauUp'],
                               analysis="smhtt",
                               era=era,
                               variation=Relabel("CMS_htt_emb_ttbar", "Up"),
                               mass="125"))

                #Muon ES uncertainty (needed for smearing due to initial reconstruction)
                muon_es_variations = create_systematic_variations(
                    "CMS_scale_muonES", "muonES", DifferentPipeline)
                for variation in muon_es_variations:
                    for process_nick in ["ZTT"]:
                        if "mt" in [args.gof_channel] + args.channels:
                            systematics.add_systematic_variation(
                                variation=variation,
                                process=mt_processes[process_nick],
                                channel=mt,
                                era=era)

        if 'et' in [args.gof_channel] + args.channels:
            for category in et_categories:
                et_processes['ZTTpTTTauTauDown'] = Process(
                    "ZTTpTTTauTauDown",
                    AddHistogramEstimationMethod(
                        "AddHistogram", "nominal", era, directory, et,
                        [et_processes["ZTT"], tttautau_process_et],
                        [1.0, -0.1]))
                systematics.add(
                    Systematic(category=category,
                               process=et_processes['ZTTpTTTauTauDown'],
                               analysis="smhtt",
                               era=era,
                               variation=Relabel("CMS_htt_emb_ttbar", "Down"),
                               mass="125"))

                et_processes['ZTTpTTTauTauUp'] = Process(
                    "ZTTpTTTauTauUp",
                    AddHistogramEstimationMethod(
                        "AddHistogram", "nominal", era, directory, et,
                        [et_processes["ZTT"], tttautau_process_et],
                        [1.0, 0.1]))
                systematics.add(
                    Systematic(category=category,
                               process=et_processes['ZTTpTTTauTauUp'],
                               analysis="smhtt",
                               era=era,
                               variation=Relabel("CMS_htt_emb_ttbar", "Up"),
                               mass="125"))
        if 'tt' in [args.gof_channel] + args.channels:
            for category in tt_categories:
                tt_processes['ZTTpTTTauTauDown'] = Process(
                    "ZTTpTTTauTauDown",
                    AddHistogramEstimationMethod(
                        "AddHistogram", "nominal", era, directory, tt,
                        [tt_processes["ZTT"], tttautau_process_tt],
                        [1.0, -0.1]))
                systematics.add(
                    Systematic(category=category,
                               process=tt_processes['ZTTpTTTauTauDown'],
                               analysis="smhtt",
                               era=era,
                               variation=Relabel("CMS_htt_emb_ttbar", "Down"),
                               mass="125"))

                tt_processes['ZTTpTTTauTauUp'] = Process(
                    "ZTTpTTTauTauUp",
                    AddHistogramEstimationMethod(
                        "AddHistogram", "nominal", era, directory, tt,
                        [tt_processes["ZTT"], tttautau_process_tt],
                        [1.0, 0.1]))
                systematics.add(
                    Systematic(category=category,
                               process=tt_processes['ZTTpTTTauTauUp'],
                               analysis="smhtt",
                               era=era,
                               variation=Relabel("CMS_htt_emb_ttbar", "Up"),
                               mass="125"))

    # Produce histograms
    logger.info("Start producing shapes.")
    systematics.produce()
    logger.info("Done producing shapes.")
Пример #8
0
def main(args):
    # Define era
    if "2016" in args.era:
        from shape_producer.era import Run2016
        era = Run2016(args.datasets)
    elif "2017" in args.era:
        from shape_producer.era import Run2017
        era = Run2017(args.datasets)
    else:
        logger.fatal("Era {} is not implemented.".format(args.era))
        raise Exception

    # Load variables
    variables = yaml.load(open(args.variables))["selected_variables"]

    # Define bins and range of binning for variables in enabled channels
    channel_dict = {
        "em": {
            "2016": EMSM2016(),
            "2017": EMSM2017()
        },
        "et": {
            "2016": ETSM2016(),
            "2017": ETSM2017()
        },
        "mt": {
            "2016": MTSM2016(),
            "2017": MTSM2017()
        },
        "tt": {
            "2016": TTSM2016(),
            "2017": TTSM2017()
        },
    }
    friend_directories_dict = {
        "em": args.em_friend_directories,
        "et": args.et_friend_directories,
        "mt": args.mt_friend_directories,
        "tt": args.tt_friend_directories,
    }
    percentiles = [
        1.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 99.0
    ]

    config = {"gof": {}}

    for ch in channel_dict:
        # Get properties
        if "2016" in args.era:
            eraname = "2016"
        elif "2017" in args.era:
            eraname = "2017"
        channel = channel_dict[ch][eraname]
        logger.info("Channel: %s" % ch)
        dict_ = {}
        additional_cuts = Cuts()
        logger.warning("Use additional cuts for %s: %s" %
                       (ch, additional_cuts.expand()))
        dict_ = get_properties(dict_, era, channel, args.directory,
                               additional_cuts)

        # Build chain
        dict_["tree_path"] = "%s_nominal/ntuple" % ch
        chain = build_chain(dict_, friend_directories_dict[ch])

        # Get percentiles and calculate 1d binning
        binning = get_1d_binning(ch, chain, variables[int(eraname)][ch],
                                 percentiles)

        # Add binning for unrolled 2d distributions
        binning = add_2d_unrolled_binning(variables[int(eraname)][ch], binning)

        # Append binning to config
        config["gof"][ch] = binning

    # Write config
    logger.info("Write binning config to %s.", args.output)
    yaml.dump(config, open(args.output, 'w'))
Пример #9
0
def main(args):
    # Write arparse arguments to YAML config
    logger.debug("Write argparse arguments to YAML config.")
    output_config = {}
    output_config["base_path"] = args.base_path
    output_config["friend_paths"] = args.friend_paths
    output_config["output_path"] = args.output_path
    output_config["output_filename"] = args.output_filename
    output_config["tree_path"] = args.tree_path
    output_config["event_branch"] = args.event_branch
    output_config["training_weight_branch"] = args.training_weight_branch

    # Define era
    if "2016" in args.era:
        from shape_producer.estimation_methods_2016 import DataEstimation, ggHEstimation, qqHEstimation, ZTTEstimation, ZLEstimation, ZJEstimation, WEstimation, TTTEstimation, TTJEstimation, ZTTEmbeddedEstimation, TTLEstimation, EWKZEstimation, VVLEstimation, VVJEstimation, VVEstimation, VVTEstimation
        #QCDEstimation_SStoOS_MTETEM, QCDEstimationTT, EWKWpEstimation, EWKWmEstimation, , VHEstimation, HTTEstimation,
        from shape_producer.era import Run2016
        era = Run2016(args.database)

    elif "2017" in args.era:
        from shape_producer.estimation_methods_2017 import DataEstimation, ZTTEstimation, ZJEstimation, ZLEstimation, TTLEstimation, TTJEstimation, TTTEstimation, VVTEstimation, VVJEstimation, VVLEstimation, WEstimation, ggHEstimation, qqHEstimation, EWKZEstimation, ZTTEmbeddedEstimation

        from shape_producer.era import Run2017
        era = Run2017(args.database)

    elif "2018" in args.era:
        from shape_producer.estimation_methods_2018 import DataEstimation, ZTTEstimation, ZJEstimation, ZLEstimation, TTLEstimation, TTJEstimation, TTTEstimation, VVTEstimation, VVJEstimation, VVLEstimation, WEstimation, ggHEstimation, qqHEstimation, EWKZEstimation, ZTTEmbeddedEstimation

        from shape_producer.era import Run2018
        era = Run2018(args.database)
    else:
        logger.fatal("Era {} is not implemented.".format(args.era))
        raise Exception

    def estimationMethodAndClassMapGenerator():
        ###### common processes
        classes_map = {"ggH": "ggh", "qqH": "qqh", "EWKZ": "misc"}
        estimationMethodList = [
            ggHEstimation("ggH", era, args.base_path, channel),
            qqHEstimation("qqH", era, args.base_path, channel),
            EWKZEstimation(era, args.base_path, channel),
            VVLEstimation(era, args.base_path, channel),
            WEstimation(era, args.base_path, channel)
        ]
        ######## Check for emb vs MC
        if args.training_z_estimation_method == "emb":
            classes_map["EMB"] = "ztt"
            estimationMethodList.extend(
                [ZTTEmbeddedEstimation(era, args.base_path, channel)])

        elif args.training_z_estimation_method == "mc":
            classes_map["ZTT"] = "ztt"
            estimationMethodList.extend([
                ZTTEstimation(era, args.base_path, channel),
                TTTEstimation(era, args.base_path, channel),
                VVTEstimation(era, args.base_path, channel)
            ])
        else:
            logger.fatal(
                "No valid training-z-estimation-method! Options are emb, mc. Argument was {}"
                .format(args.training_z_estimation_method))
            raise Exception

        ##### TT* zl,zj processes
        estimationMethodList.extend([
            TTLEstimation(era, args.base_path, channel),
            ZLEstimation(era, args.base_path, channel)
        ])
        # less data-> less categories for tt
        if args.channel == "tt":
            classes_map.update({
                "TTT": "misc",
                "TTL": "misc",
                "TTJ": "misc",
                "ZL": "misc",
                "ZJ": "misc"
            })
            estimationMethodList.extend([
                ZJEstimation(era, args.base_path, channel),
                TTJEstimation(era, args.base_path, channel)
            ])
        ## not TTJ,ZJ for em
        elif args.channel == "em":
            classes_map.update({"TTT": "tt", "TTL": "tt", "ZL": "misc"})
        else:
            classes_map.update({
                "TTT": "tt",
                "TTL": "tt",
                "TTJ": "tt",
                "ZL": "zll",
                "ZJ": "zll"
            })
            estimationMethodList.extend([
                ZJEstimation(era, args.base_path, channel),
                TTJEstimation(era, args.base_path, channel)
            ])
        ###w:
        # estimation metho already included, just different mapping fror et and mt
        if args.channel in ["et", "mt"]:
            classes_map["W"] = "w"
        else:
            classes_map["W"] = "misc"

        #####  VV/[VVT,VVL,VVJ] split
        # VVL in common, VVT in "EMBvsMC"
        if args.channel == "em":
            classes_map.update({"VVT": "db", "VVL": "db"})
        else:
            classes_map.update({"VVT": "misc", "VVL": "misc", "VVJ": "misc"})
            estimationMethodList.extend([
                VVJEstimation(era, args.base_path, channel),
            ])
        ### QCD class

        if args.channel == "tt":
            classes_map["QCD"] = "noniso"
        else:
            classes_map["QCD"] = "ss"
        return ([classes_map, estimationMethodList])

    channelDict = {}
    channelDict["2016"] = {
        "mt": MTSM2016(),
        "et": ETSM2016(),
        "tt": TTSM2016(),
        "em": EMSM2016()
    }
    channelDict["2017"] = {
        "mt": MTSM2017(),
        "et": ETSM2017(),
        "tt": TTSM2017(),
        "em": EMSM2017()
    }
    channelDict["2018"] = {
        "mt": MTSM2018(),
        "et": ETSM2018(),
        "tt": TTSM2018(),
        "em": EMSM2018()
    }

    channel = channelDict[args.era][args.channel]

    # Set up `processes` part of config
    output_config["processes"] = {}

    # Additional cuts
    additional_cuts = Cuts()
    logger.warning("Use additional cuts for mt: %s", additional_cuts.expand())

    classes_map, estimationMethodList = estimationMethodAndClassMapGenerator()

    ##MC+/Embedding Processes
    for estimation in estimationMethodList:
        output_config["processes"][estimation.name] = {
            "files": [
                str(f).replace(args.base_path.rstrip("/") + "/", "")
                for f in estimation.get_files()
            ],
            "cut_string":
            (estimation.get_cuts() + channel.cuts + additional_cuts).expand(),
            "weight_string":
            estimation.get_weights().extract(),
            "class":
            classes_map[estimation.name]
        }
    ###
    # Same sign selection for data-driven QCD
    estimation = DataEstimation(era, args.base_path, channel)
    estimation.name = "QCD"
    channel_qcd = copy.deepcopy(channel)

    if args.channel != "tt":
        ## os= opposite sign
        channel_qcd.cuts.get("os").invert()
    # Same sign selection for data-driven QCD
    else:
        channel_qcd.cuts.remove("tau_2_iso")
        channel_qcd.cuts.add(
            Cut("byTightIsolationMVArun2017v2DBoldDMwLT2017_2<0.5",
                "tau_2_iso"))
        channel_qcd.cuts.add(
            Cut("byLooseIsolationMVArun2017v2DBoldDMwLT2017_2>0.5",
                "tau_2_iso_loose"))

    output_config["processes"][estimation.name] = {
        "files": [
            str(f).replace(args.base_path.rstrip("/") + "/", "")
            for f in estimation.get_files()
        ],
        "cut_string":
        (estimation.get_cuts() + channel_qcd.cuts + additional_cuts).expand(),
        "weight_string":
        estimation.get_weights().extract(),
        "class":
        classes_map[estimation.name]
    }

    #####################################
    # Write output config
    logger.info("Write config to file: {}".format(args.output_config))
    yaml.dump(output_config,
              open(args.output_config, 'w'),
              default_flow_style=False)
Пример #10
0
def main(args):
    # Write arparse arguments to YAML config
    logger.debug("Write argparse arguments to YAML config.")
    output_config = {}
    output_config["base_path"] = args.base_path
    output_config["friend_paths"] = args.friend_paths
    output_config["output_path"] = args.output_path
    output_config["output_filename"] = args.output_filename
    output_config["tree_path"] = args.tree_path
    output_config["event_branch"] = args.event_branch
    output_config["training_weight_branch"] = args.training_weight_branch
    logger.debug("Channel" + args.channel + " Era " + args.era)

    # Define era
    if "2016" in args.era:
        from shape_producer.estimation_methods_2016 import DataEstimation, ggHEstimation, qqHEstimation, \
            ZTTEstimation, ZLEstimation, ZJEstimation, TTTEstimation, TTJEstimation, \
            ZTTEmbeddedEstimation, TTLEstimation, \
            EWKZEstimation, VVLEstimation, VVTEstimation, VVJEstimation, WEstimation

        from shape_producer.era import Run2016
        era = Run2016(args.database)

    elif "2017" in args.era:
        from shape_producer.estimation_methods_2017 import DataEstimation, ZTTEstimation, ZJEstimation, ZLEstimation, \
            TTLEstimation, TTJEstimation, TTTEstimation, VVTEstimation, VVJEstimation, VVLEstimation, WEstimation, \
            ggHEstimation, qqHEstimation, EWKZEstimation, ZTTEmbeddedEstimation

        from shape_producer.era import Run2017
        era = Run2017(args.database)

    elif "2018" in args.era:
        from shape_producer.estimation_methods_2018 import DataEstimation, ZTTEstimation, ZJEstimation, ZLEstimation, \
            TTLEstimation, TTJEstimation, TTTEstimation, VVTEstimation, VVJEstimation, VVLEstimation, WEstimation, \
            ggHEstimation, qqHEstimation, EWKZEstimation, ZTTEmbeddedEstimation

        from shape_producer.era import Run2018
        era = Run2018(args.database)
    else:
        logger.fatal("Era {} is not implemented.".format(args.era))
        raise Exception

    def estimationMethodAndClassMapGenerator():
        ###### common processes
        if args.training_stxs1p1:
            classes_map = {
# class1
"ggH_GG2H_PTH_GT200125": "ggh_PTHGT200",
# class2
"ggH_GG2H_0J_PTH_0_10125": "ggh_0J",
"ggH_GG2H_0J_PTH_GT10125": "ggh_0J",
# class3
"ggH_GG2H_1J_PTH_0_60125": "ggh_1J_PTH0to120",
"ggH_GG2H_1J_PTH_60_120125": "ggh_1J_PTH0to120",
# class4
"ggH_GG2H_1J_PTH_120_200125": "ggh_1J_PTH120to200",
# class5
"ggH_GG2H_GE2J_MJJ_0_350_PTH_0_60125": "ggh_2J",
"ggH_GG2H_GE2J_MJJ_0_350_PTH_60_120125": "ggh_2J",
"ggH_GG2H_GE2J_MJJ_0_350_PTH_120_200125": "ggh_2J",
# class6
"ggH_GG2H_GE2J_MJJ_350_700_PTH_0_200_PTHJJ_0_25125": "vbftopo_lowmjj",
"ggH_GG2H_GE2J_MJJ_350_700_PTH_0_200_PTHJJ_GT25125": "vbftopo_lowmjj",
"qqH_QQ2HQQ_GE2J_MJJ_350_700_PTH_0_200_PTHJJ_0_25125": "vbftopo_lowmjj",
"qqH_QQ2HQQ_GE2J_MJJ_350_700_PTH_0_200_PTHJJ_GT25125": "vbftopo_lowmjj",
# class7
"ggH_GG2H_GE2J_MJJ_GT700_PTH_0_200_PTHJJ_0_25125": "vbftopo_highmjj",
"ggH_GG2H_GE2J_MJJ_GT700_PTH_0_200_PTHJJ_GT25125": "vbftopo_highmjj",
"qqH_QQ2HQQ_GE2J_MJJ_GT700_PTH_0_200_PTHJJ_0_25125": "vbftopo_highmjj",
"qqH_QQ2HQQ_GE2J_MJJ_GT700_PTH_0_200_PTHJJ_GT25125": "vbftopo_highmjj",
# class8
"qqH_QQ2HQQ_GE2J_MJJ_0_60125": "qqh_2J",
"qqH_QQ2HQQ_GE2J_MJJ_60_120125": "qqh_2J",
"qqH_QQ2HQQ_GE2J_MJJ_120_350125": "qqh_2J",
# class9
"qqH_QQ2HQQ_GE2J_MJJ_GT350_PTH_GT200125": "qqh_PTHGT200",
            }
            estimationMethodList = [
ggHEstimation("ggH_GG2H_PTH_GT200125", era, args.base_path, channel),
ggHEstimation("ggH_GG2H_0J_PTH_0_10125", era, args.base_path, channel),
ggHEstimation("ggH_GG2H_0J_PTH_GT10125", era, args.base_path, channel),
ggHEstimation("ggH_GG2H_1J_PTH_0_60125", era, args.base_path, channel),
ggHEstimation("ggH_GG2H_1J_PTH_60_120125", era, args.base_path, channel),
ggHEstimation("ggH_GG2H_1J_PTH_120_200125", era, args.base_path, channel),
ggHEstimation("ggH_GG2H_GE2J_MJJ_0_350_PTH_0_60125", era, args.base_path, channel),
ggHEstimation("ggH_GG2H_GE2J_MJJ_0_350_PTH_60_120125", era, args.base_path, channel),
ggHEstimation("ggH_GG2H_GE2J_MJJ_0_350_PTH_120_200125", era, args.base_path, channel),
ggHEstimation("ggH_GG2H_GE2J_MJJ_350_700_PTH_0_200_PTHJJ_0_25125", era, args.base_path, channel),
ggHEstimation("ggH_GG2H_GE2J_MJJ_350_700_PTH_0_200_PTHJJ_GT25125", era, args.base_path, channel),
qqHEstimation("qqH_QQ2HQQ_GE2J_MJJ_350_700_PTH_0_200_PTHJJ_0_25125", era, args.base_path, channel),
qqHEstimation("qqH_QQ2HQQ_GE2J_MJJ_350_700_PTH_0_200_PTHJJ_GT25125", era, args.base_path, channel),
ggHEstimation("ggH_GG2H_GE2J_MJJ_GT700_PTH_0_200_PTHJJ_0_25125", era, args.base_path, channel),
ggHEstimation("ggH_GG2H_GE2J_MJJ_GT700_PTH_0_200_PTHJJ_GT25125", era, args.base_path, channel),
qqHEstimation("qqH_QQ2HQQ_GE2J_MJJ_GT700_PTH_0_200_PTHJJ_0_25125", era, args.base_path, channel),
qqHEstimation("qqH_QQ2HQQ_GE2J_MJJ_GT700_PTH_0_200_PTHJJ_GT25125", era, args.base_path, channel),
qqHEstimation("qqH_QQ2HQQ_GE2J_MJJ_0_60125", era, args.base_path, channel),
qqHEstimation("qqH_QQ2HQQ_GE2J_MJJ_60_120125", era, args.base_path, channel),
qqHEstimation("qqH_QQ2HQQ_GE2J_MJJ_120_350125", era, args.base_path, channel),
qqHEstimation("qqH_QQ2HQQ_GE2J_MJJ_GT350_PTH_GT200125", era, args.base_path, channel),
            ]
        elif args.training_inclusive:
            classes_map = {
                "ggH125": "xxh",
                "qqH125": "xxh",
            }
            estimationMethodList = [
                ggHEstimation("ggH125", era, args.base_path, channel),
                qqHEstimation("qqH125", era, args.base_path, channel),

            ]
        else:
            classes_map = {
                "ggH125": "ggh",
                "qqH125": "qqh",
            }
            estimationMethodList = [
                ggHEstimation("ggH125", era, args.base_path, channel),
                qqHEstimation("qqH125", era, args.base_path, channel),

            ]
        estimationMethodList.extend([
            EWKZEstimation(era, args.base_path, channel),
            VVLEstimation(era, args.base_path, channel)
        ])
        classes_map["EWKZ"]="misc"
        ##### TT* zl,zj processes
        estimationMethodList.extend([
            TTLEstimation(era, args.base_path, channel),
            ZLEstimation(era, args.base_path, channel)
        ])
        if args.channel == "tt":
            classes_map.update({
                "TTL": "misc",
                "ZL": "misc",
                "VVL": "misc"
            })
        ## not TTJ,ZJ for em
        elif args.channel == "em":
            classes_map.update({
                "TTL": "tt",
                "ZL": "misc",
                "VVL": "db"
            })
        else:
            classes_map.update({
                "TTL": "tt",
                "ZL": "zll",
                "VVL": "misc"
            })
        ######## Check for emb vs MC
        if args.training_z_estimation_method == "emb":
            classes_map["EMB"] = "emb"
            estimationMethodList.extend([
                ZTTEmbeddedEstimation(era, args.base_path, channel)])
        elif args.training_z_estimation_method == "mc":
            classes_map["ZTT"] = "ztt"
            estimationMethodList.extend([
                ZTTEstimation(era, args.base_path, channel),
                TTTEstimation(era, args.base_path, channel),
                VVTEstimation(era, args.base_path, channel)
            ])
            if args.channel == "tt":
                classes_map.update({
                    "TTT": "misc",
                    "VVT": "misc"
                })
            ## not TTJ,ZJ for em
            elif args.channel == "em":
                classes_map.update({
                    "TTT": "tt",
                    "VVT": "db"
                })
            else:
                classes_map.update({
                    "TTT": "tt",
                    "VVT": "misc"
                })

        else:
            logger.fatal("No valid training-z-estimation-method! Options are emb, mc. Argument was {}".format(
                args.training_z_estimation_method))
            raise Exception

        if args.training_jetfakes_estimation_method == "ff" and args.channel != "em":
            classes_map.update({
                "ff": "ff"
            })
        elif args.training_jetfakes_estimation_method == "mc" or args.channel == "em":
            # less data-> less categories for tt
            if args.channel == "tt":
                classes_map.update({
                    "TTJ": "misc",
                    "ZJ": "misc"
                })
            ## not TTJ,ZJ for em
            elif args.channel != "em":
                classes_map.update({
                    "TTJ": "tt",
                    "ZJ": "zll"
                })
            if args.channel != "em":
                classes_map.update({
                    "VVJ": "misc"
                })
                estimationMethodList.extend([
                    VVJEstimation(era, args.base_path, channel),
                    ZJEstimation(era, args.base_path, channel),
                    TTJEstimation(era, args.base_path, channel)
                ])
            ###w:
            estimationMethodList.extend([WEstimation(era, args.base_path, channel)])
            if args.channel in ["et", "mt"]:
                classes_map["W"] = "w"
            else:
                classes_map["W"] = "misc"
            ### QCD class
            if args.channel == "tt":
                classes_map["QCD"] = "noniso"
            else:
                classes_map["QCD"] = "ss"

        else:
            logger.fatal("No valid training-jetfakes-estimation-method! Options are ff, mc. Argument was {}".format(
                args.training_jetfakes_estimation_method))
            raise Exception
        return ([classes_map, estimationMethodList])

    channelDict = {}
    channelDict["2016"] = {"mt": MTSM2016(), "et": ETSM2016(), "tt": TTSM2016(), "em": EMSM2016()}
    channelDict["2017"] = {"mt": MTSM2017(), "et": ETSM2017(), "tt": TTSM2017(), "em": EMSM2017()}
    channelDict["2018"] = {"mt": MTSM2018(), "et": ETSM2018(), "tt": TTSM2018(), "em": EMSM2018()}

    channel = channelDict[args.era][args.channel]

    # Set up `processes` part of config
    output_config["processes"] = {}

    # Additional cuts
    additional_cuts = Cuts()
    logger.warning("Use additional cuts for mt: %s", additional_cuts.expand())

    classes_map, estimationMethodList = estimationMethodAndClassMapGenerator()

    ### disables all other estimation methods
    # classes_map={"ff":"ff"}
    # estimationMethodList=[]

    for estimation in estimationMethodList:
        output_config["processes"][estimation.name] = {
            "files": [
                str(f).replace(args.base_path.rstrip("/") + "/", "")
                for f in estimation.get_files()
            ],
            "cut_string": (estimation.get_cuts() + channel.cuts +
                           additional_cuts).expand(),
            "weight_string":
                estimation.get_weights().extract(),
            "class":
                classes_map[estimation.name]
        }

    if args.training_jetfakes_estimation_method == "mc" or args.channel == "em":
        if args.training_jetfakes_estimation_method == "ff":
            logger.warn("ff+em: using mc for em channel")
        # Same sign selection for data-driven QCD
        estimation = DataEstimation(era, args.base_path, channel)
        estimation.name = "QCD"
        channel_qcd = copy.deepcopy(channel)

        if args.channel != "tt":
            ## os= opposite sign
            channel_qcd.cuts.get("os").invert()
        # Same sign selection for data-driven QCD
        else:
            channel_qcd.cuts.remove("tau_2_iso")
            channel_qcd.cuts.add(
                Cut("byTightDeepTau2017v2p1VSjet_2<0.5", "tau_2_iso"))
            channel_qcd.cuts.add(
                Cut("byMediumDeepTau2017v2p1VSjet_2>0.5", "tau_2_iso_loose"))

        output_config["processes"][estimation.name] = {
            "files": [
                str(f).replace(args.base_path.rstrip("/") + "/", "")
                for f in estimation.get_files()
            ],
            "cut_string": (estimation.get_cuts() + channel_qcd.cuts + additional_cuts).expand(),
            "weight_string": estimation.get_weights().extract(),
            "class": classes_map[estimation.name]
        }
    else:  ## ff and not em
        estimation = DataEstimation(era, args.base_path, channel)
        estimation.name = "ff"
        aiso = copy.deepcopy(channel)
        if args.channel in ["et", "mt"]:
            aisoCut = Cut(
                "byTightDeepTau2017v2p1VSjet_2<0.5&&byVLooseDeepTau2017v2p1VSjet_2>0.5",
                "tau_aiso")
            fakeWeightstring = "ff2_nom"
            aiso.cuts.remove("tau_iso")
        elif args.channel == "tt":
            aisoCut = Cut(
                "(byTightDeepTau2017v2p1VSjet_2>0.5&&byTightDeepTau2017v2p1VSjet_1<0.5&&byVLooseDeepTau2017v2p1VSjet_1>0.5)||(byTightDeepTau2017v2p1VSjet_1>0.5&&byTightDeepTau2017v2p1VSjet_2<0.5&&byVLooseDeepTau2017v2p1VSjet_2>0.5)",
                "tau_aiso")
            fakeWeightstring = "(0.5*ff1_nom*(byTightDeepTau2017v2p1VSjet_1<0.5)+0.5*ff2_nom*(byTightDeepTau2017v2p1VSjet_2<0.5))"
            aiso.cuts.remove("tau_1_iso")
            aiso.cuts.remove("tau_2_iso")
        # self._nofake_processes = [copy.deepcopy(p) for p in nofake_processes]

        aiso.cuts.add(aisoCut)
        additionalWeights = Weights(Weight(fakeWeightstring, "fake_factor"))

        output_config["processes"][estimation.name] = {
            "files": [
                str(f).replace(args.base_path.rstrip("/") + "/", "")
                for f in estimation.get_files()
            ],
            "cut_string": (estimation.get_cuts() + aiso.cuts).expand(),
            "weight_string": (estimation.get_weights() + additionalWeights).extract(),
            "class": classes_map[estimation.name]
        }

    output_config["datasets"] = [args.output_path + "/fold" + fold + "_training_dataset.root" for fold in ["0", "1"]]
    #####################################
    # Write output config
    logger.info("Write config to file: {}".format(args.output_config))
    yaml.dump(output_config, open(args.output_config, 'w'), default_flow_style=False)
Пример #11
0
def main(args):
    # Container for all distributions to be drawn
    logger.info("Set up shape variations.")
    systematics = Systematics("fake-factors/{}_ff_yields.root".format(
        args.tag),
                              num_threads=args.num_threads)

    # Era selection
    if "2016" in args.era:
        from shape_producer.estimation_methods_2016 import DataEstimation, HTTEstimation, ggHEstimation, qqHEstimation, VHEstimation, ZTTEstimation, ZLEstimation, ZJEstimation, WEstimation, TTTEstimation, TTJEstimation, TTLEstimation, VVTEstimation, VVJEstimation, VVLEstimation, ZTTEmbeddedEstimation
        from shape_producer.era import Run2016
        era = Run2016(args.datasets)
    else:
        logger.critical("Era {} is not implemented.".format(args.era))
        raise Exception

    # Channels and processes
    channels = ["et", "mt", "tt"]

    additional_cuts = dict()
    additional_friends = dict()
    for channel in channels:
        with open(args.additional_cuts.format(channel), "r") as stream:
            config = yaml.load(stream)
        additional_cuts[channel] = config["cutstrings"]

        with open(args.additional_friends.format(channel), "r") as stream:
            config = yaml.load(stream)
        additional_friends[channel] = {
            key: value
            for key, value in zip(config["friend_dirs"],
                                  config["friend_aliases"])
        }

    # yapf: disable
    directory = args.directory
    et_friend_directory = {args.et_friend_directory: ""}
    et_friend_directory.update(additional_friends["et"])
    mt_friend_directory = {args.mt_friend_directory: ""}
    mt_friend_directory.update(additional_friends["mt"])
    tt_friend_directory = {args.tt_friend_directory: ""}
    tt_friend_directory.update(additional_friends["tt"])

    mt = MTSM2016()
    for cutstring in additional_cuts["mt"]:
        mt.cuts.add(Cut(cutstring))
    mt.cuts.remove("tau_iso")
    mt.cuts.add(Cut("(byTightIsolationMVArun2v1DBoldDMwLT_2<0.5&&byVLooseIsolationMVArun2v1DBoldDMwLT_2>0.5)", "tau_anti_iso"))
    mt_processes = {
        "data"  : Process("data_obs", DataEstimation  (era, directory, mt, friend_directory=mt_friend_directory)),
        "EMB"   : Process("EMB",      ZTTEmbeddedEstimation(era, directory, mt, friend_directory=mt_friend_directory)),
        "ZTT"   : Process("ZTT",      ZTTEstimation   (era, directory, mt, friend_directory=mt_friend_directory)),
        "ZL"    : Process("ZL",       ZLEstimation    (era, directory, mt, friend_directory=mt_friend_directory)),
        "ZJ"    : Process("ZJ",       ZJEstimation    (era, directory, mt, friend_directory=mt_friend_directory)),
        "W"     : Process("W",        WEstimation     (era, directory, mt, friend_directory=mt_friend_directory)),
        "TTT"   : Process("TTT",      TTTEstimation   (era, directory, mt, friend_directory=mt_friend_directory)),
        "TTL"   : Process("TTL",      TTLEstimation   (era, directory, mt, friend_directory=mt_friend_directory)),
        "TTJ"   : Process("TTJ",      TTJEstimation   (era, directory, mt, friend_directory=mt_friend_directory)),
        "VVT"   : Process("VVT",      VVTEstimation   (era, directory, mt, friend_directory=mt_friend_directory)),
        "VVL"   : Process("VVL",      VVLEstimation   (era, directory, mt, friend_directory=mt_friend_directory)),
        "VVJ"   : Process("VVJ",      VVJEstimation   (era, directory, mt, friend_directory=mt_friend_directory))
        #"EWKZ"  : Process("EWKZ",     EWKZEstimation  (era, directory, mt, friend_directory=mt_friend_directory))
        }

    et = ETSM2016()
    for cutstring in additional_cuts["et"]:
        et.cuts.add(Cut(cutstring))
    et.cuts.remove("tau_iso")
    et.cuts.add(Cut("(byTightIsolationMVArun2v1DBoldDMwLT_2<0.5&&byVLooseIsolationMVArun2v1DBoldDMwLT_2>0.5)", "tau_anti_iso"))
    et_processes = {
        "data"  : Process("data_obs", DataEstimation  (era, directory, et, friend_directory=et_friend_directory)),
        "EMB"   : Process("EMB",      ZTTEmbeddedEstimation(era, directory, et, friend_directory=et_friend_directory)),
        "ZTT"   : Process("ZTT",      ZTTEstimation   (era, directory, et, friend_directory=et_friend_directory)),
        "ZL"    : Process("ZL",       ZLEstimation    (era, directory, et, friend_directory=et_friend_directory)),
        "ZJ"    : Process("ZJ",       ZJEstimation    (era, directory, et, friend_directory=et_friend_directory)),
        "W"     : Process("W",        WEstimation     (era, directory, et, friend_directory=et_friend_directory)),
        "TTT"   : Process("TTT",      TTTEstimation   (era, directory, et, friend_directory=et_friend_directory)),
        "TTL"   : Process("TTL",      TTLEstimation   (era, directory, et, friend_directory=et_friend_directory)),
        "TTJ"   : Process("TTJ",      TTJEstimation   (era, directory, et, friend_directory=et_friend_directory)),
        "VVT"   : Process("VVT",      VVTEstimation   (era, directory, et, friend_directory=et_friend_directory)),
        "VVL"   : Process("VVL",      VVLEstimation   (era, directory, et, friend_directory=et_friend_directory)),
        "VVJ"   : Process("VVJ",      VVJEstimation   (era, directory, et, friend_directory=et_friend_directory))
        #"EWKZ"  : Process("EWKZ",     EWKZEstimation  (era, directory, et, friend_directory=et_friend_directory))
        }

    #in tt two 'channels' are needed: antiisolated region for each tau respectively
    tt1 = TTSM2016()
    for cutstring in additional_cuts["tt"]:
        tt1.cuts.add(Cut(cutstring))
    tt1.cuts.remove("tau_1_iso")
    tt1.cuts.add(Cut("(byTightIsolationMVArun2v1DBoldDMwLT_1<0.5&&byVLooseIsolationMVArun2v1DBoldDMwLT_1>0.5)", "tau_1_anti_iso"))
    tt1_processes = {
        "data"  : Process("data_obs", DataEstimation (era, directory, tt1, friend_directory=tt_friend_directory)),
        "EMB"   : Process("EMB",      ZTTEmbeddedEstimation(era, directory, tt1, friend_directory=tt_friend_directory)),
        "ZTT"   : Process("ZTT",      ZTTEstimation  (era, directory, tt1, friend_directory=tt_friend_directory)),
        "ZL"    : Process("ZL",       ZLEstimation   (era, directory, tt1, friend_directory=tt_friend_directory)),
        "ZJ"    : Process("ZJ",       ZJEstimation   (era, directory, tt1, friend_directory=tt_friend_directory)),
        "W"     : Process("W",        WEstimation    (era, directory, tt1, friend_directory=tt_friend_directory)),
        "TTT"   : Process("TTT",      TTTEstimation  (era, directory, tt1, friend_directory=tt_friend_directory)),
        "TTL"   : Process("TTL",      TTLEstimation  (era, directory, tt1, friend_directory=tt_friend_directory)),
        "TTJ"   : Process("TTJ",      TTJEstimation  (era, directory, tt1, friend_directory=tt_friend_directory)),
        "VVT"   : Process("VVT",      VVTEstimation  (era, directory, tt1, friend_directory=tt_friend_directory)),
        "VVL"   : Process("VVL",      VVLEstimation  (era, directory, tt1, friend_directory=tt_friend_directory)),
        "VVJ"   : Process("VVJ",      VVJEstimation  (era, directory, tt1, friend_directory=tt_friend_directory))
        #"EWKZ"  : Process("EWKZ",     EWKZEstimation (era, directory, tt1, friend_directory=tt_friend_directory)),
        }
    tt2 = TTSM2016()
    for cutstring in additional_cuts["tt"]:
        tt2.cuts.add(Cut(cutstring))
    tt2.cuts.remove("tau_2_iso")
    tt2.cuts.add(Cut("(byTightIsolationMVArun2v1DBoldDMwLT_2<0.5&&byVLooseIsolationMVArun2v1DBoldDMwLT_2>0.5)", "tau_2_anti_iso"))
    tt2_processes = {
        "data"  : Process("data_obs", DataEstimation (era, directory, tt2, friend_directory=tt_friend_directory)),
        "EMB"   : Process("EMB",      ZTTEmbeddedEstimation(era, directory, tt2, friend_directory=tt_friend_directory)),
        "ZTT"   : Process("ZTT",      ZTTEstimation  (era, directory, tt2, friend_directory=tt_friend_directory)),
        "ZL"    : Process("ZL",       ZLEstimation   (era, directory, tt2, friend_directory=tt_friend_directory)),
        "ZJ"    : Process("ZJ",       ZJEstimation   (era, directory, tt2, friend_directory=tt_friend_directory)),
        "W"     : Process("W",        WEstimation    (era, directory, tt2, friend_directory=tt_friend_directory)),
        "TTT"   : Process("TTT",      TTTEstimation  (era, directory, tt2, friend_directory=tt_friend_directory)),
        "TTL"   : Process("TTL",      TTLEstimation  (era, directory, tt2, friend_directory=tt_friend_directory)),
        "TTJ"   : Process("TTJ",      TTJEstimation  (era, directory, tt2, friend_directory=tt_friend_directory)),
        "VVT"   : Process("VVT",      VVTEstimation  (era, directory, tt2, friend_directory=tt_friend_directory)),
        "VVL"   : Process("VVL",      VVLEstimation  (era, directory, tt2, friend_directory=tt_friend_directory)),
        "VVJ"   : Process("VVJ",      VVJEstimation  (era, directory, tt2, friend_directory=tt_friend_directory))
        #"EWKZ"  : Process("EWKZ",     EWKZEstimation (era, directory, tt2, friend_directory=tt_friend_directory)),
        }

    # Variables and categories
    config = yaml.load(open("fake-factors/config.yaml"))
    if not args.config in config.keys():
        logger.critical("Requested config key %s not available in fake-factors/config.yaml!" % args.config)
        raise Exception
    config = config[args.config]

    et_categories = []
    # Analysis shapes
    et_categories.append(
        Category(
            "inclusive",
            et,
            Cuts(),
            variable=Variable(args.config, VariableBinning(config["et"]["binning"]), config["et"]["expression"])))
    for i, label in enumerate(["ggh", "qqh", "ztt", "zll", "w", "tt", "ss", "misc"]):
        et_categories.append(
            Category(
                label,
                et,
                Cuts(
                    Cut("et_max_index=={index}".format(index=i), "exclusive_score")),
                variable=Variable(args.config, VariableBinning(config["et"]["binning"]), config["et"]["expression"])))
    mt_categories = []
    # Analysis shapes
    mt_categories.append(
        Category(
            "inclusive",
            mt,
            Cuts(),
            variable=Variable(args.config, VariableBinning(config["mt"]["binning"]), config["mt"]["expression"])))
    for i, label in enumerate(["ggh", "qqh", "ztt", "zll", "w", "tt", "ss", "misc"]):
        mt_categories.append(
            Category(
                label,
                mt,
                Cuts(
                    Cut("mt_max_index=={index}".format(index=i), "exclusive_score")),
                variable=Variable(args.config, VariableBinning(config["mt"]["binning"]), config["mt"]["expression"])))
    tt1_categories = []
    tt2_categories = []
    # Analysis shapes
    tt1_categories.append(
        Category(
            "tt1_inclusive",
            tt1,
            Cuts(),
            variable=Variable(args.config, VariableBinning(config["tt"]["binning"]), config["tt"]["expression"])))
    tt2_categories.append(
        Category(
            "tt2_inclusive",
            tt2,
            Cuts(),
            variable=Variable(args.config, VariableBinning(config["tt"]["binning"]), config["tt"]["expression"])))
    for i, label in enumerate(["ggh", "qqh", "ztt", "noniso", "misc"]):
        tt1_categories.append(
            Category(
                "tt1_"+label,
                tt1,
                Cuts(
                    Cut("tt_max_index=={index}".format(index=i), "exclusive_score")),
                variable=Variable(args.config, VariableBinning(config["tt"]["binning"]), config["tt"]["expression"])))
        tt2_categories.append(
            Category(
                "tt2_"+label,
                tt2,
                Cuts(
                    Cut("tt_max_index=={index}".format(index=i), "exclusive_score")),
                variable=Variable(args.config, VariableBinning(config["tt"]["binning"]), config["tt"]["expression"])))

    # Nominal histograms
    # yapf: enable
    for process, category in product(et_processes.values(), et_categories):
        systematics.add(
            Systematic(category=category,
                       process=process,
                       analysis="smhtt",
                       era=era,
                       variation=Nominal(),
                       mass="125"))

    for process, category in product(mt_processes.values(), mt_categories):
        systematics.add(
            Systematic(category=category,
                       process=process,
                       analysis="smhtt",
                       era=era,
                       variation=Nominal(),
                       mass="125"))

    for process, category in product(tt1_processes.values(), tt1_categories):
        systematics.add(
            Systematic(category=category,
                       process=process,
                       analysis="smhtt",
                       era=era,
                       variation=Nominal(),
                       mass="125"))

    for process, category in product(tt2_processes.values(), tt2_categories):
        systematics.add(
            Systematic(category=category,
                       process=process,
                       analysis="smhtt",
                       era=era,
                       variation=Nominal(),
                       mass="125"))

    # Produce histograms
    logger.info("Start producing shapes.")
    systematics.produce()
    logger.info("Done producing shapes.")
def main(args):
    # Define era
    if "2016" in args.era:
        from shape_producer.era import Run2016
        era = Run2016(args.datasets)
    else:
        logger.fatal("Era {} is not implemented.".format(args.era))
        raise Exception

    # Load variables
    variables = yaml.load(open(args.variables))["variables"]

    # Define bins and range of binning for variables in enabled channels
    channels = ["et", "mt", "tt"]
    num_borders = 9
    min_percentile = 1.0
    max_percentile = 99.0

    config = {"gof": {}}

    # Channel: ET
    if "et" in channels:
        # Get properties
        channel = ETSM()
        logger.info("Channel: et")
        dict_ = {}
        additional_cuts = Cuts()
        logger.warning("Use additional cuts for et: %s",
                       additional_cuts.expand())
        dict_ = get_properties(dict_, era, channel, args.directory,
                               additional_cuts)

        # Build chain
        dict_["tree_path"] = "et_nominal/ntuple"
        chain = build_chain(dict_)

        # Get percentiles and calculate 1d binning
        binning = get_1d_binning("et", chain, variables, min_percentile,
                                 max_percentile, num_borders)

        # Add binning for unrolled 2d distributions
        binning = add_2d_unrolled_binning(variables, binning)

        # Append binning to config
        config["gof"]["et"] = binning

    # Channel: MT
    if "mt" in channels:
        # Get properties
        channel = MTSM()
        logger.info("Channel: mt")
        dict_ = {}
        additional_cuts = Cuts()
        logger.warning("Use additional cuts for mt: %s",
                       additional_cuts.expand())
        dict_ = get_properties(dict_, era, channel, args.directory,
                               additional_cuts)

        # Build chain
        dict_["tree_path"] = "mt_nominal/ntuple"
        chain = build_chain(dict_)

        # Get percentiles
        binning = get_1d_binning("mt", chain, variables, min_percentile,
                                 max_percentile, num_borders)

        # Add binning for unrolled 2d distributions
        binning = add_2d_unrolled_binning(variables, binning)

        # Append binning to config
        config["gof"]["mt"] = binning

    # Channel: TT
    if "tt" in channels:
        # Get properties
        channel = TTSM()
        logger.info("Channel: tt")
        dict_ = {}
        additional_cuts = Cuts()
        logger.warning("Use additional cuts for tt: %s",
                       additional_cuts.expand())
        dict_ = get_properties(dict_, era, channel, args.directory,
                               additional_cuts)

        # Build chain
        dict_["tree_path"] = "tt_nominal/ntuple"
        chain = build_chain(dict_)

        # Get percentiles
        binning = get_1d_binning("tt", chain, variables, min_percentile,
                                 max_percentile, num_borders)

        # Add binning for unrolled 2d distributions
        binning = add_2d_unrolled_binning(variables, binning)

        # Append binning to config
        config["gof"]["tt"] = binning

    # Write config
    logger.info("Write binning config to %s.", args.output)
    yaml.dump(config, open(args.output, 'w'))
def main(args):
    # Write arparse arguments to YAML config
    filelist = {}

    # Set up era
    era = Run2016(args.database)

    logger.debug("Write filelist for channel %s.", args.channel)

    ############################################################################

    # Channel: mt
    if args.channel == "mt":
        channel = MTSM()
        if args.embedding:
            mt.cuts.remove("trg_singlemuoncross")
            mt.cuts.add(
                Cut("(trg_singlemuon==1 && pt_1>23 && pt_2>30)",
                    "trg_singlemuon"))
        for estimation in [
                ggHEstimation(era, args.directory, channel),
                qqHEstimation(era, args.directory, channel),
                VHEstimation(era, args.directory, channel),
                ZTTEstimation(era, args.directory, channel)
                if not args.embedding else ZTTEmbeddedEstimation(
                    era, args.directory, channel),
                ZLEstimationMTSM(era, args.directory, channel),
                ZJEstimationMT(era, args.directory, channel),
                TTTEstimationMT(era, args.directory, channel)
                if not args.embedding else TTTNoTauTauEstimationMT(
                    era, args.directory, channel),
                TTJEstimationMT(era, args.directory, channel),
                WEstimationRaw(era, args.directory, channel),
                EWKWpEstimation(era, args.directory, channel),
                EWKWmEstimation(era, args.directory, channel),
                VVEstimation(era, args.directory, channel),
                EWKZllEstimation(era, args.directory, channel),
                EWKZnnEstimation(era, args.directory, channel),
                DataEstimation(era, args.directory, channel)
        ]:
            # Get files for estimation method
            logger.debug("Get files for estimation method %s.",
                         estimation.name)
            files = [str(f) for f in estimation.get_files()]

            # Go through files and get folders for channel
            for f in files:
                if not os.path.exists(f):
                    logger.fatal("File does not exist: %s", f)
                    raise Exception

                folders = []
                f_ = ROOT.TFile(f)
                for k in f_.GetListOfKeys():
                    if "{}_".format(args.channel) in k.GetName():
                        folders.append(k.GetName())
                f_.Close()

                filelist[f] = folders

    ############################################################################

    # Channel: et
    if args.channel == "et":
        channel = ETSM()
        for estimation in [
                ggHEstimation(era, args.directory, channel),
                qqHEstimation(era, args.directory, channel),
                VHEstimation(era, args.directory, channel),
                ZTTEstimation(era, args.directory, channel)
                if not args.embedding else ZTTEmbeddedEstimation(
                    era, args.directory, channel),
                ZLEstimationETSM(era, args.directory, channel),
                ZJEstimationET(era, args.directory, channel),
                TTTEstimationET(era, args.directory, channel)
                if not args.embedding else TTTNoTauTauEstimationET(
                    era, args.directory, channel),
                TTJEstimationET(era, args.directory, channel),
                WEstimationRaw(era, args.directory, channel),
                EWKWpEstimation(era, args.directory, channel),
                EWKWmEstimation(era, args.directory, channel),
                VVEstimation(era, args.directory, channel),
                EWKZllEstimation(era, args.directory, channel),
                EWKZnnEstimation(era, args.directory, channel),
                DataEstimation(era, args.directory, channel)
        ]:
            # Get files for estimation method
            logger.debug("Get files for estimation method %s.",
                         estimation.name)
            files = [str(f) for f in estimation.get_files()]

            # Go through files and get folders for channel
            for f in files:
                if not os.path.exists(f):
                    logger.fatal("File does not exist: %s", f)
                    raise Exception

                folders = []
                f_ = ROOT.TFile(f)
                for k in f_.GetListOfKeys():
                    if "{}_".format(args.channel) in k.GetName():
                        folders.append(k.GetName())
                f_.Close()

                filelist[f] = folders

    ############################################################################

    # Channel: tt
    if args.channel == "tt":
        channel = TTSM()
        for estimation in [
                ggHEstimation(era, args.directory, channel),
                qqHEstimation(era, args.directory, channel),
                VHEstimation(era, args.directory, channel),
                ZTTEstimation(era, args.directory, channel)
                if not args.embedding else ZTTEmbeddedEstimation(
                    era, args.directory, channel),
                ZLEstimationTT(era, args.directory, channel),
                ZJEstimationTT(era, args.directory, channel),
                TTTEstimationTT(era, args.directory, channel)
                if not args.embedding else TTTNoTauTauEstimationTT(
                    era, args.directory, channel),
                TTJEstimationTT(era, args.directory, channel),
                WEstimationRaw(era, args.directory, channel),
                EWKWpEstimation(era, args.directory, channel),
                EWKWmEstimation(era, args.directory, channel),
                VVEstimation(era, args.directory, channel),
                EWKZllEstimation(era, args.directory, channel),
                EWKZnnEstimation(era, args.directory, channel),
                DataEstimation(era, args.directory, channel)
        ]:
            # Get files for estimation method
            logger.debug("Get files for estimation method %s.",
                         estimation.name)
            files = [str(f) for f in estimation.get_files()]

            # Go through files and get folders for channel
            for f in files:
                if not os.path.exists(f):
                    logger.fatal("File does not exist: %s", f)
                    raise Exception

                folders = []
                f_ = ROOT.TFile(f)
                for k in f_.GetListOfKeys():
                    if "{}_".format(args.channel) in k.GetName():
                        folders.append(k.GetName())
                f_.Close()

                filelist[f] = folders

    ############################################################################

    # Write output filelist
    logger.info("Write filelist to file: {}".format(args.output))
    yaml.dump(filelist, open(args.output, 'w'), default_flow_style=False)
Пример #14
0
def main(args):
    # Write arparse arguments to YAML config
    logger.debug("Write argparse arguments to YAML config.")
    output_config = {}
    output_config["base_path"] = args.base_path
    output_config["friend_paths"] = args.friend_paths
    output_config["output_path"] = args.output_path
    output_config["output_filename"] = args.output_filename
    output_config["tree_path"] = args.tree_path
    output_config["event_branch"] = args.event_branch
    output_config["training_weight_branch"] = args.training_weight_branch

    # Define era
    if "2016" in args.era:
        from shape_producer.estimation_methods_2016 import ggHEstimation, qqHEstimation, HWWEstimation
        from shape_producer.era import Run2016
        era = Run2016(args.database)

    elif "2017" in args.era:
        from shape_producer.estimation_methods_2017 import ggHEstimation, qqHEstimation, HWWEstimation, DataEstimation
        from shape_producer.era import Run2017
        era = Run2017(args.database)
    else:
        logger.fatal("Era {} is not implemented.".format(args.era))
        raise Exception

    def estimationMethodAndClassMapGenerator():
        estimationMethodList = [
            DataEstimation(era, args.base_path, channel),
            ggHEstimation("ggH125", era, args.base_path, channel),
            qqHEstimation("qqH125", era, args.base_path, channel),
            HWWEstimation(era, args.base_path, channel),
        ]
        return (estimationMethodList)

    channelDict = {}
    channelDict["2016"] = {
        "mt": MTSM2016(),
        "et": ETSM2016(),
        "tt": TTSM2016(),
        "em": EMSM2016()
    }
    channelDict["2017"] = {
        "mt": MTSM2017(),
        "et": ETSM2017(),
        "tt": TTSM2017(),
        "em": EMSM2017()
    }

    channel = channelDict[args.era][args.channel]

    # Set up `processes` part of config
    output_config["processes"] = {}

    # Additional cuts
    additional_cuts = Cuts()
    logger.warning("Use additional cuts for mt: %s", additional_cuts.expand())

    estimationMethodList = estimationMethodAndClassMapGenerator()

    for estimation in estimationMethodList:
        output_config["processes"][estimation.name] = {
            "files": [
                str(f).replace(args.base_path.rstrip("/") + "/", "")
                for f in estimation.get_files()
            ],
            "cut_string":
            (estimation.get_cuts() + channel.cuts + additional_cuts).expand(),
            "weight_string":
            estimation.get_weights().extract(),
        }

    # Write output config
    if not os.path.exists(args.output_path):
        os.makedirs(args.output_path)
    logger.info("Write config to file: {}".format(args.output_config))
    yaml.dump(output_config,
              open(args.output_config, 'w'),
              default_flow_style=False)
Пример #15
0
def main(args):
    # Container for all distributions to be drawn
    logger.info("Set up shape variations.")
    systematics = Systematics(
        "{}_cutbased_shapes_{}.root".format(args.tag,
                                            args.discriminator_variable),
        num_threads=args.num_threads,
        skip_systematic_variations=args.skip_systematic_variations)

    # Era selection
    if "2016" in args.era:
        from shape_producer.estimation_methods_2016 import DataEstimation, ZTTEstimation, ZTTEmbeddedEstimation, ZLEstimation, ZJEstimation, TTTEstimation, TTLEstimation, TTJEstimation, VVTEstimation, VVLEstimation, VVJEstimation, WEstimation, HTTEstimation, ggHEstimation, qqHEstimation, VHEstimation, WHEstimation, ZHEstimation, ttHEstimation, HWWEstimation, ggHWWEstimation, qqHWWEstimation, SUSYggHEstimation, SUSYbbHEstimation, QCDEstimation_SStoOS_MTETEM, QCDEstimationTT, NewFakeEstimationLT, NewFakeEstimationTT
        from shape_producer.era import Run2016
        era = Run2016(args.datasets)
    else:
        logger.critical("Era {} is not implemented.".format(args.era))
        raise Exception

    # Channels and processes
    # yapf: disable
    directory = args.directory
    friend_directories = {
        "et" : args.et_friend_directory,
        "mt" : args.mt_friend_directory,
        "tt" : args.tt_friend_directory,
        "em" : args.em_friend_directory,
    }
    ff_friend_directory = args.fake_factor_friend_directory

    channel_dict = {
        "et": ETMSSM2016(),
        "mt": MTMSSM2016(),
        "tt": TTMSSM2016(),
        "em": EMMSSM2016(),
    }

    susyggH_masses = [80, 90, 100, 110, 120, 130, 140, 160, 180, 200, 250, 300, 350, 400, 450, 500, 600, 700, 800, 900, 1000, 1200, 1400, 1500, 1600, 1800, 2000, 2300, 2600, 2900, 3200]
    susybbH_masses = [80, 90, 100, 110, 120, 130, 140, 160, 180, 200, 250, 300, 350, 400, 450, 500, 600, 700, 800, 900, 1000, 1200, 1400, 1500, 1600, 1800, 2000, 2300, 2600, 2900, 3200]
    susybbH_nlo_masses = []

    processes = {
        "mt" : {},
        "et" : {},
        "tt" : {},
        "em" : {},
    }

    for ch in args.channels:

        # common processes
        if args.shape_group == "backgrounds":
            processes[ch]["data"] = Process("data_obs", DataEstimation         (era, directory, channel_dict[args.era][ch], friend_directory=friend_directories[ch]))
            processes[ch]["EMB"]  = Process("EMB",      ZTTEmbeddedEstimation  (era, directory, channel_dict[args.era][ch], friend_directory=friend_directories[ch]))
            processes[ch]["ZL"]   = Process("ZL",       ZLEstimation           (era, directory, channel_dict[args.era][ch], friend_directory=friend_directories[ch]))
            processes[ch]["TTL"]  = Process("TTL",      TTLEstimation          (era, directory, channel_dict[args.era][ch], friend_directory=friend_directories[ch]))
            processes[ch]["VVL"]  = Process("VVL",      VVLEstimation          (era, directory, channel_dict[args.era][ch], friend_directory=friend_directories[ch]))

            processes[ch]["VH125"]   = Process("VH125",    VHEstimation        (era, directory, channel_dict[args.era][ch], friend_directory=friend_directories[ch]))
            processes[ch]["WH125"]   = Process("WH125",    WHEstimation        (era, directory, channel_dict[args.era][ch], friend_directory=friend_directories[ch]))
            processes[ch]["ZH125"]   = Process("ZH125",    ZHEstimation        (era, directory, channel_dict[args.era][ch], friend_directory=friend_directories[ch]))
            processes[ch]["ttH125"]  = Process("ttH125",   ttHEstimation       (era, directory, channel_dict[args.era][ch], friend_directory=friend_directories[ch]))

            processes[ch]["ggHWW125"] = Process("ggHWW125", ggHWWEstimation       (era, directory, channel_dict[ch], friend_directory=friend_directories[ch]))
            processes[ch]["qqHWW125"] = Process("qqHWW125", qqHWWEstimation       (era, directory, channel_dict[ch], friend_directory=friend_directories[ch]))

        # mssm ggH and bbH signals
        if "gg" in args.shape_group:
            for m in susyggH_masses:
                name = args.shape_group + "_" + str(m)
                processes[ch][name] = Process(name, SUSYggHEstimation(era, directory, channel_dict[ch], str(m), args.shape_group.replace("gg",""), friend_directory=friend_directories[ch]))
        if args.shape_group == "bbH":
            for m in susybbH_masses:
                name = "bbH_" + str(m)
                processes[ch][name] = Process(name, SUSYbbHEstimation(era, directory, channel_dict[ch], str(m), friend_directory=friend_directories[ch]))

        if args.shape_group == "sm_signals":
            # stage 0 and stage 1.1 ggh and qqh
            for ggH_htxs in ggHEstimation.htxs_dict:
                processes[ch][ggH_htxs] = Process(ggH_htxs, ggHEstimation(ggH_htxs, era, directory, channel_dict[ch], friend_directory=[]))  # friend_directories[ch]))
            for qqH_htxs in qqHEstimation.htxs_dict:
                processes[ch][qqH_htxs] = Process(qqH_htxs, qqHEstimation(qqH_htxs, era, directory, channel_dict[ch], friend_directory=[]))  # friend_directories[ch]))

        # channel-specific processes
        if args.shape_group == "backgrounds":
            if ch in ["mt", "et"]:
                processes[ch]["FAKES"] = Process("jetFakes", NewFakeEstimationLT(era, directory, channel_dict[ch], [processes[ch][process] for process in ["EMB", "ZL", "TTL", "VVL"]], processes[ch]["data"], friend_directory=friend_directories[ch]+[ff_friend_directory]))
            elif ch == "tt":
                processes[ch]["FAKES"] = Process("jetFakes", NewFakeEstimationTT(era, directory, channel_dict[ch], [processes[ch][process] for process in ["EMB", "ZL", "TTL", "VVL"]], processes[ch]["data"], friend_directory=friend_directories[ch]+[ff_friend_directory]))
            elif ch == "em":
                processes[ch]["W"]   = Process("W",   WEstimation(era, directory, channel_dict[ch], friend_directory=friend_directories[ch]))
                processes[ch]["QCD"] = Process("QCD", QCDEstimation_SStoOS_MTETEM(era, directory, channel_dict[ch], [processes[ch][process] for process in ["EMB", "ZL", "W", "VVL", "TTL"]], processes[ch]["data"], extrapolation_factor=1.0, qcd_weight = Weight("em_qcd_extrap_up_Weight","qcd_weight")))

    # Variables and categories
    if sys.version_info.major <= 2 and sys.version_info.minor <= 7 and sys.version_info.micro <= 15:
        binning = yaml.load(open(args.binning))
    else:
        binning = yaml.load(open(args.binning), Loader=yaml.FullLoader)

    # Cut-based analysis shapes
    categories = {
        "mt" : [],
        "et" : [],
        "tt" : [],
        "em" : [],
    }

    for ch in args.channels:
        discriminator = construct_variable(binning, args.discriminator_variable)
        # Get dictionary mapping category name to cut objects.
        cut_dict = create_cut_map(binning, ch)
        # Create full set of cuts from dict and create category using these cuts.
        cuts = Cuts(*cut_dict[args.category])
        categories[ch].append(Category(args.category, channel_dict[ch], cuts, variable=discriminator))


    # Choice of activated signal processes
    signal_nicks = []

    sm_htt_backgrounds_nicks = ["WH125", "ZH125", "VH125", "ttH125"]
    sm_hww_nicks = ["ggHWW125", "qqHWW125"]
    sm_htt_signals_nicks = [ggH_htxs for ggH_htxs in ggHEstimation.htxs_dict] + [qqH_htxs for qqH_htxs in qqHEstimation.htxs_dict]
    susy_nicks = []
    if "gg" in args.shape_group:
        for m in susyggH_masses:
            susy_nicks.append(args.shape_group + "_" + str(m))
    if args.shape_group == "bbH":
        for m in susybbH_masses:
            susy_nicks.append("bbH_" + str(m))

    if args.shape_group == "backgrounds":
        signal_nicks = sm_htt_backgrounds_nicks + sm_hww_nicks
    elif args.shape_group == "sm_signals":
        signal_nicks = sm_htt_signals_nicks
    else:
        signal_nicks = susy_nicks

    # Nominal histograms
    for ch in args.channels:
        for process, category in product(processes[ch].values(), categories[ch]):
            systematics.add(Systematic(category=category, process=process, analysis="mssmvssm", era=era, variation=Nominal(), mass="125"))

    # Setup shapes variations

    # EMB: 10% removed events in ttbar simulation (ttbar -> real tau tau events) will be added/subtracted to ZTT shape to use as systematic
    if args.shape_group == "backgrounds":
        tttautau_process = {}
        for ch in args.channels:
            tttautau_process[ch] = Process("TTT", TTTEstimation(era, directory, channel_dict[ch], friend_directory=friend_directories[ch]))
            processes[ch]['ZTTpTTTauTauDown'] = Process("ZTTpTTTauTauDown", AddHistogramEstimationMethod("AddHistogram", "nominal", era, directory, channel_dict[ch], [processes[ch]["EMB"], tttautau_process[ch]], [1.0, -0.1]))
            processes[ch]['ZTTpTTTauTauUp'] = Process("ZTTpTTTauTauUp", AddHistogramEstimationMethod("AddHistogram", "nominal", era, directory, channel_dict[ch], [processes[ch]["EMB"], tttautau_process[ch]], [1.0, 0.1]))
            for category in categories[ch]:
                for updownvar in ["Down", "Up"]:
                    systematics.add(Systematic(category=category, process=processes[ch]['ZTTpTTTauTau%s'%updownvar], analysis="smhtt", era=era, variation=Relabel("CMS_htt_emb_ttbar_Run2016", updownvar), mass="125"))

    # Prefiring weights
    prefiring_variations = [
        ReplaceWeight("CMS_prefiring_Run2016", "prefireWeight", Weight("prefiringweightup", "prefireWeight"),"Up"),
        ReplaceWeight("CMS_prefiring_Run2016", "prefireWeight", Weight("prefiringweightdown", "prefireWeight"),"Down"),
    ]

    # Split JES shapes
    jet_es_variations = create_systematic_variations("CMS_scale_j_Absolute", "jecUncAbsolute", DifferentPipeline)
    jet_es_variations += create_systematic_variations("CMS_scale_j_Absolute_Run2016", "jecUncAbsoluteYear", DifferentPipeline)
    jet_es_variations += create_systematic_variations("CMS_scale_j_BBEC1", "jecUncBBEC1", DifferentPipeline)
    jet_es_variations += create_systematic_variations("CMS_scale_j_BBEC1_Run2016", "jecUncBBEC1Year", DifferentPipeline)
    jet_es_variations += create_systematic_variations("CMS_scale_j_EC2", "jecUncEC2", DifferentPipeline)
    jet_es_variations += create_systematic_variations("CMS_scale_j_EC2_Run2016", "jecUncEC2Year", DifferentPipeline)
    jet_es_variations += create_systematic_variations("CMS_scale_j_FlavorQCD", "jecUncFlavorQCD", DifferentPipeline)
    jet_es_variations += create_systematic_variations("CMS_scale_j_HF", "jecUncHF", DifferentPipeline)
    jet_es_variations += create_systematic_variations("CMS_scale_j_HF_Run2016", "jecUncHFYear", DifferentPipeline)
    jet_es_variations += create_systematic_variations("CMS_scale_j_RelativeBal", "jecUncRelativeBal", DifferentPipeline)
    jet_es_variations += create_systematic_variations("CMS_scale_j_RelativeSample_Run2016", "jecUncRelativeSampleYear", DifferentPipeline)

    # B-tagging
    btag_eff_variations = create_systematic_variations("CMS_htt_eff_b_Run2016", "btagEff", DifferentPipeline)
    mistag_eff_variations = create_systematic_variations("CMS_htt_mistag_b_Run2016", "btagMistag", DifferentPipeline)

    ## Variations common for all groups (most of the mc-related systematics)
    common_mc_variations = prefiring_variations + btag_eff_variations + mistag_eff_variations + jet_es_variations

    # MET energy scale. Note: only those variations for non-resonant processes are used in the stat. inference
    met_unclustered_variations = create_systematic_variations("CMS_scale_met_unclustered", "metUnclusteredEn", DifferentPipeline)

    # Recoil correction unc, for resonant processes
    recoil_variations = create_systematic_variations("CMS_htt_boson_reso_met_Run2016", "metRecoilResolution", DifferentPipeline)
    recoil_variations += create_systematic_variations("CMS_htt_boson_scale_met_Run2016", "metRecoilResponse", DifferentPipeline)

    # Tau energy scale (general, MC-specific & EMB-specific), it is mt, et & tt specific
    tau_es_variations = {}

    for unctype in ["", "_mc", "_emb"]:
        tau_es_variations[unctype] = create_systematic_variations("CMS_scale%s_t_3prong_Run2016"% (unctype), "tauEsThreeProng", DifferentPipeline)
        tau_es_variations[unctype] += create_systematic_variations("CMS_scale%s_t_3prong1pizero_Run2016"% (unctype), "tauEsThreeProngOnePiZero", DifferentPipeline)
        tau_es_variations[unctype] += create_systematic_variations("CMS_scale%s_t_1prong_Run2016"% (unctype), "tauEsOneProng", DifferentPipeline)
        tau_es_variations[unctype] += create_systematic_variations("CMS_scale%s_t_1prong1pizero_Run2016"% (unctype), "tauEsOneProngOnePiZero", DifferentPipeline)

    # Tau ID variations (general, MC-specific & EMB specific), it is mt, et & tt specific
    # in et and mt one nuisance per pT bin, in tt per dm
    tau_id_variations = {}
    for ch in ["et" , "mt", "tt"]:
        tau_id_variations[ch] = {}
        for unctype in ["", "_emb"]:
            tau_id_variations[ch][unctype] = []
            if ch in ["et", "mt"]:
                pt = [30, 35, 40, 500, 1000, "inf"]
                for i, ptbin in enumerate(pt[:-1]):
                    bindown = ptbin
                    binup = pt[i+1]
                    if binup == "inf":
                        tau_id_variations[ch][unctype].append(
                                ReplaceWeight("CMS_eff{unctype}_t_{bindown}-{binup}_Run2016".format(unctype=unctype,bindown=bindown, binup=binup), "taubyIsoIdWeight",
                                    Weight("(((pt_2 >= {bindown})*tauIDScaleFactorWeightUp_tight_DeepTau2017v2p1VSjet_2)+((pt_2 < {bindown})*tauIDScaleFactorWeight_tight_DeepTau2017v2p1VSjet_2))".format(bindown=bindown), "taubyIsoIdWeight"), "Up"))
                        tau_id_variations[ch][unctype].append(
                                ReplaceWeight("CMS_eff{unctype}_t_{bindown}-{binup}_Run2016".format(unctype=unctype, bindown=bindown, binup=binup), "taubyIsoIdWeight",
                                    Weight("(((pt_2 >= {bindown})*tauIDScaleFactorWeightDown_tight_DeepTau2017v2p1VSjet_2)+((pt_2 < {bindown})*tauIDScaleFactorWeight_tight_DeepTau2017v2p1VSjet_2))".format(bindown=bindown),"taubyIsoIdWeight"), "Down"))
                    else:
                        tau_id_variations[ch][unctype].append(
                                ReplaceWeight("CMS_eff{unctype}_t_{bindown}-{binup}_Run2016".format(unctype=unctype, bindown=bindown, binup=binup), "taubyIsoIdWeight",
                                    Weight("(((pt_2 >= {bindown} && pt_2 <= {binup})*tauIDScaleFactorWeightUp_tight_DeepTau2017v2p1VSjet_2)+((pt_2 < {bindown} || pt_2 > {binup})*tauIDScaleFactorWeight_tight_DeepTau2017v2p1VSjet_2))".format(bindown=bindown, binup=binup),"taubyIsoIdWeight"), "Up"))
                        tau_id_variations[ch][unctype].append(
                                ReplaceWeight("CMS_eff{unctype}_t_{bindown}-{binup}_Run2016".format(unctype=unctype, bindown=bindown, binup=binup), "taubyIsoIdWeight",
                                    Weight("(((pt_2 >= {bindown} && pt_2 <= {binup})*tauIDScaleFactorWeightDown_tight_DeepTau2017v2p1VSjet_2)+((pt_2 < {bindown} || pt_2 > {binup})*tauIDScaleFactorWeight_tight_DeepTau2017v2p1VSjet_2))".format(bindown=bindown, binup=binup),"taubyIsoIdWeight"), "Down"))
            if ch in ["tt"]:
                for decaymode in [0, 1, 10, 11]:
                    tau_id_variations[ch][unctype].append(
                                ReplaceWeight("CMS_eff{unctype}_t_dm{dm}_Run2016".format(unctype=unctype, dm=decaymode), "taubyIsoIdWeight",
                                    Weight("(((decayMode_1=={dm})*tauIDScaleFactorWeightUp_tight_DeepTau2017v2p1VSjet_1)+((decayMode_1!={dm})*tauIDScaleFactorWeight_tight_DeepTau2017v2p1VSjet_1)*((decayMode_2=={dm})*tauIDScaleFactorWeightUp_tight_DeepTau2017v2p1VSjet_2)+((decayMode_2!={dm})*tauIDScaleFactorWeight_tight_DeepTau2017v2p1VSjet_2))".format(dm=decaymode), "taubyIsoIdWeight"), "Up"))
                    tau_id_variations[ch][unctype].append(
                                ReplaceWeight("CMS_eff{unctype}_t_dm{dm}_Run2016".format(unctype=unctype, dm=decaymode), "taubyIsoIdWeight",
                                    Weight("(((decayMode_1=={dm})*tauIDScaleFactorWeightDown_tight_DeepTau2017v2p1VSjet_1)+((decayMode_1!={dm})*tauIDScaleFactorWeight_tight_DeepTau2017v2p1VSjet_1)*((decayMode_2=={dm})*tauIDScaleFactorWeightDown_tight_DeepTau2017v2p1VSjet_2)+((decayMode_2!={dm})*tauIDScaleFactorWeight_tight_DeepTau2017v2p1VSjet_2))".format(dm=decaymode), "taubyIsoIdWeight"), "Down"))

    # Ele energy scale & smear uncertainties (MC-specific), it is et & em specific
    ele_es_variations = create_systematic_variations("CMS_scale_mc_e", "eleScale", DifferentPipeline)
    ele_es_variations += create_systematic_variations("CMS_reso_mc_e", "eleSmear", DifferentPipeline)
    # Ele energy scale (EMB-specific), it is et & em specific
    ele_es_emb_variations = create_systematic_variations("CMS_scale_emb_e", "eleEs", DifferentPipeline)

    # Z pt reweighting
    zpt_variations = create_systematic_variations("CMS_htt_dyShape_Run2016", "zPtReweightWeight", SquareAndRemoveWeight)

    # top pt reweighting
    top_pt_variations = create_systematic_variations( "CMS_htt_ttbarShape", "topPtReweightWeight", SquareAndRemoveWeight)

    # EMB charged track correction uncertainty (DM-dependent)
    decayMode_variations = []
    decayMode_variations.append(ReplaceWeight("CMS_3ProngEff_Run2016", "decayMode_SF", Weight("embeddedDecayModeWeight_effUp_pi0Nom", "decayMode_SF"), "Up"))
    decayMode_variations.append(ReplaceWeight("CMS_3ProngEff_Run2016", "decayMode_SF", Weight("embeddedDecayModeWeight_effDown_pi0Nom", "decayMode_SF"), "Down"))
    decayMode_variations.append(ReplaceWeight("CMS_1ProngPi0Eff_Run2016", "decayMode_SF", Weight("embeddedDecayModeWeight_effNom_pi0Up", "decayMode_SF"), "Up"))
    decayMode_variations.append(ReplaceWeight("CMS_1ProngPi0Eff_Run2016", "decayMode_SF", Weight("embeddedDecayModeWeight_effNom_pi0Down", "decayMode_SF"), "Down"))

    # QCD for em
    qcd_variations = []
    qcd_variations.append(ReplaceWeight("CMS_htt_qcd_0jet_rate_Run2016", "qcd_weight", Weight("em_qcd_osss_0jet_rateup_Weight", "qcd_weight"), "Up"))
    qcd_variations.append(ReplaceWeight("CMS_htt_qcd_0jet_rate_Run2016", "qcd_weight", Weight("em_qcd_osss_0jet_ratedown_Weight", "qcd_weight"), "Down"))
    qcd_variations.append(ReplaceWeight("CMS_htt_qcd_0jet_shape_Run2016", "qcd_weight", Weight("em_qcd_osss_0jet_shapeup_Weight", "qcd_weight"), "Up"))
    qcd_variations.append(ReplaceWeight("CMS_htt_qcd_0jet_shape_Run2016", "qcd_weight", Weight("em_qcd_osss_0jet_shapedown_Weight", "qcd_weight"), "Down"))
    qcd_variations.append(ReplaceWeight("CMS_htt_qcd_1jet_rate_Run2016", "qcd_weight", Weight("em_qcd_osss_1jet_rateup_Weight", "qcd_weight"), "Up"))
    qcd_variations.append(ReplaceWeight("CMS_htt_qcd_1jet_rate_Run2016", "qcd_weight", Weight("em_qcd_osss_1jet_ratedown_Weight", "qcd_weight"), "Down"))
    qcd_variations.append(ReplaceWeight("CMS_htt_qcd_1jet_shape_Run2016", "qcd_weight", Weight("em_qcd_osss_1jet_shapeup_Weight", "qcd_weight"), "Up"))
    qcd_variations.append(ReplaceWeight("CMS_htt_qcd_1jet_shape_Run2016", "qcd_weight", Weight("em_qcd_osss_1jet_shapedown_Weight", "qcd_weight"), "Down"))
    qcd_variations.append(ReplaceWeight("CMS_htt_qcd_iso_Run2016", "qcd_weight", Weight("em_qcd_extrap_up_Weight", "qcd_weight"), "Up"))
    qcd_variations.append(ReplaceWeight("CMS_htt_qcd_iso_Run2016", "qcd_weight", Weight("em_qcd_extrap_down_Weight", "qcd_weight"), "Down"))
    qcd_variations.append(ReplaceWeight("CMS_htt_qcd_iso", "qcd_weight", Weight("em_qcd_extrap_up_Weight", "qcd_weight"), "Up"))
    qcd_variations.append(ReplaceWeight("CMS_htt_qcd_iso", "qcd_weight", Weight("em_qcd_extrap_down_Weight", "qcd_weight"), "Down"))

    # Gluon-fusion WG1 uncertainty scheme
    ggh_variations = []
    for unc in [
            "THU_ggH_Mig01", "THU_ggH_Mig12", "THU_ggH_Mu", "THU_ggH_PT120",
            "THU_ggH_PT60", "THU_ggH_Res", "THU_ggH_VBF2j", "THU_ggH_VBF3j",
            "THU_ggH_qmtop"
    ]:
        ggh_variations.append(AddWeight(unc, "{}_weight".format(unc), Weight("({})".format(unc), "{}_weight".format(unc)), "Up"))
        ggh_variations.append(AddWeight(unc, "{}_weight".format(unc), Weight("(2.0-{})".format(unc), "{}_weight".format(unc)), "Down"))

    # ZL fakes energy scale
    fakelep_dict = {"et" : "Ele", "mt" : "Mu"}
    lep_fake_es_variations = {}
    for ch in ["mt", "et"]:
        lep_fake_es_variations[ch] = create_systematic_variations("CMS_ZLShape_%s_1prong_Run2016"% (ch), "tau%sFakeEsOneProng"%fakelep_dict[ch], DifferentPipeline)
        lep_fake_es_variations[ch] += create_systematic_variations("CMS_ZLShape_%s_1prong1pizero_Run2016"% (ch), "tau%sFakeEsOneProngPiZeros"%fakelep_dict[ch], DifferentPipeline)

    # Lepton trigger efficiency; the same values for (MC & EMB) and (mt & et)
    lep_trigger_eff_variations = {}
    for ch in ["mt", "et"]:
        lep_trigger_eff_variations[ch] = {}
        thresh_dict = {"2016": {"mt": 23., "et": 23.},
                       "2017": {"mt": 25., "et": 28.},
                       "2018": {"mt": 25., "et": 28.}}
        for unctype in ["", "_emb"]:
            lep_trigger_eff_variations[ch][unctype] = []
            lep_trigger_eff_variations[ch][unctype].append(AddWeight("CMS_eff_trigger%s_%s_Run2016"%(unctype, ch), "trg_%s_eff_weight"%ch, Weight("(1.0*(pt_1<={0})+1.02*(pt_1>{0}))".format(thresh_dict[args.era][ch]), "trg_%s_eff_weight"%ch), "Up"))
            lep_trigger_eff_variations[ch][unctype].append(AddWeight("CMS_eff_trigger%s_%s_Run2016"%(unctype, ch), "trg_%s_eff_weight"%ch, Weight("(1.0*(pt_1<={0})+0.98*(pt_1>{0}))".format(thresh_dict[args.era][ch]), "trg_%s_eff_weight"%ch), "Down"))
            lep_trigger_eff_variations[ch][unctype].append(AddWeight("CMS_eff_xtrigger%s_%s_Run2016"%(unctype, ch), "xtrg_%s_eff_weight"%ch, Weight("(1.054*(pt_1<={0})+1.0*(pt_1>{0}))".format(thresh_dict[args.era][ch]), "xtrg_%s_eff_weight"%ch), "Up"))
            lep_trigger_eff_variations[ch][unctype].append(AddWeight("CMS_eff_xtrigger%s_%s_Run2016"%(unctype, ch), "xtrg_%s_eff_weight"%ch, Weight("(0.946*(pt_1<={0})+1.0*(pt_1>{0}))".format(thresh_dict[args.era][ch]), "xtrg_%s_eff_weight"%ch), "Down"))

    # Fake factor uncertainties
    fake_factor_variations = {}
    for ch in ["mt", "et", "tt"]:
        fake_factor_variations[ch] = []
        if ch in ["mt", "et"]:
            for systematic_shift in [
                    "ff_qcd{ch}_syst_Run2016{shift}",
                    "ff_qcd_dm0_njet0{ch}_stat_Run2016{shift}",
                    "ff_qcd_dm0_njet1{ch}_stat_Run2016{shift}",
                    "ff_w_syst_Run2016{shift}",
                    "ff_w_dm0_njet0{ch}_stat_Run2016{shift}",
                    "ff_w_dm0_njet1{ch}_stat_Run2016{shift}",
                    "ff_tt_syst_Run2016{shift}",
                    "ff_tt_dm0_njet0_stat_Run2016{shift}",
                    "ff_tt_dm0_njet1_stat_Run2016{shift}",
            ]:
                for shift_direction in ["Up", "Down"]:
                    fake_factor_variations[ch].append(ReplaceWeight("CMS_%s" % (systematic_shift.format(ch="_"+ch, shift="").replace("_dm0", "")), "fake_factor", Weight("ff2_{syst}".format(syst=systematic_shift.format(ch="", shift="_%s" % shift_direction.lower()).replace("_Run2016", "")), "fake_factor"), shift_direction))
        elif ch == "tt":
            for systematic_shift in [
                    "ff_qcd{ch}_syst_Run2016{shift}",
                    "ff_qcd_dm0_njet0{ch}_stat_Run2016{shift}",
                    "ff_qcd_dm0_njet1{ch}_stat_Run2016{shift}",
                    "ff_w{ch}_syst_Run2016{shift}", "ff_tt{ch}_syst_Run2016{shift}",
                    "ff_w_frac{ch}_syst_Run2016{shift}",
                    "ff_tt_frac{ch}_syst_Run2016{shift}"
            ]:
                for shift_direction in ["Up", "Down"]:
                    fake_factor_variations[ch].append(ReplaceWeight("CMS_%s" % (systematic_shift.format(ch="_"+ch, shift="").replace("_dm0", "")), "fake_factor", Weight("(0.5*ff1_{syst}*(byTightDeepTau2017v2p1VSjet_1<0.5)+0.5*ff2_{syst}*(byTightDeepTau2017v2p1VSjet_2<0.5))".format(syst=systematic_shift.format(ch="", shift="_%s" % shift_direction.lower()).replace("_Run2016", "")), "fake_factor"), shift_direction))

    ## Group nicks
    mc_nicks = ["ZL", "TTL", "VVL"] + signal_nicks # to be extended with 'W' in em
    boson_mc_nicks = ["ZL"]         + signal_nicks # to be extended with 'W' in em

    ## Add variations to systematics
    for ch in args.channels:

        channel_mc_nicks = mc_nicks + ["W"] if ch == "em" else mc_nicks
        channel_boson_mc_nicks = boson_mc_nicks + ["W"] if ch == "em" else boson_mc_nicks
        if args.shape_group != "backgrounds":
            channel_mc_nicks = signal_nicks
            channel_boson_mc_nicks = signal_nicks

        channel_mc_common_variations = common_mc_variations
        if ch in ["et", "em"]:
            channel_mc_common_variations += ele_es_variations
        if ch in ["et", "mt", "tt"]:
            channel_mc_common_variations += tau_es_variations[""] + tau_es_variations["_mc"] + tau_id_variations[ch][""]
        if ch in ["et", "mt"]:
            channel_mc_common_variations += lep_trigger_eff_variations[ch][""]

        # variations common accross all shape groups
        for variation in channel_mc_common_variations:
            for process_nick in channel_mc_nicks:
                systematics.add_systematic_variation(variation=variation, process=processes[ch][process_nick], channel=channel_dict[ch], era=era)

        for variation in recoil_variations:
            for process_nick in channel_boson_mc_nicks:
                systematics.add_systematic_variation(variation=variation, process=processes[ch][process_nick], channel=channel_dict[ch], era=era)

        # variations relevant for ggH signals in 'sm_signals' shape group
        if args.shape_group == "sm_signals":
            for variation in ggh_variations:
                for process_nick in [nick for nick in signal_nicks if "ggH" in nick and "HWW" not in nick and "ggH_" not in nick]:
                    systematics.add_systematic_variation(variation=variation, process=processes[ch][process_nick], channel=channel_dict[ch], era=era)

        # variations only relevant for the 'background' shape group
        if args.shape_group == "backgrounds":
            for variation in top_pt_variations:
                # TODO: Needs to be adapted if one wants to use DY MC or QCD estimation(lt,tt: TTT, TTL, TTJ, em: TTT, TTL)
                systematics.add_systematic_variation(variation=variation, process=processes[ch]["TTL"], channel=channel_dict[ch], era=era)

            for variation in met_unclustered_variations:
                for process_nick in ["TTL", "VVL"]:
                    systematics.add_systematic_variation(variation=variation, process=processes[ch][process_nick], channel=channel_dict[ch], era=era)

            zl_variations = zpt_variations
            if ch in ["et", "mt"]:
                zl_variations += lep_fake_es_variations[ch]
            # TODO: maybe prepare variations for shape production with DY MC and QCD estimation, then applied to ZTT, ZL and ZJ for lt channels and ZTT and ZL for em channel
            for variation in zl_variations:
                systematics.add_systematic_variation(variation=variation, process=processes[ch]["ZL"], channel=channel_dict[ch], era=era)

            if ch == "em":
                for variation in qcd_variations:
                    systematics.add_systematic_variation(variation=variation ,process=processes[ch]["QCD"], channel=channel_dict[ch], era=era)

            if ch in ["mt","et", "tt"]:
                ff_variations = fake_factor_variations[ch] + tau_es_variations[""] + tau_es_variations["_mc"] + tau_es_variations["_emb"]
                for variation in ff_variations:
                    systematics.add_systematic_variation(variation=variation, process=processes[ch]["FAKES"], channel=channel_dict[ch], era=era)

            emb_variations = []
            if ch in ["mt","et", "tt"]:
                emb_variations += tau_es_variations[""] + tau_es_variations["_emb"] + tau_id_variations[ch]["_emb"] + decayMode_variations
            if ch in ["mt", "et"]:
                emb_variations += lep_trigger_eff_variations[ch]["_emb"]
            if ch in ["et", "em"]:
                emb_variations += ele_es_emb_variations
            for variation in emb_variations:
                systematics.add_systematic_variation(variation=variation, process=processes[ch]["EMB"], channel=channel_dict[ch], era=era)

    # Produce histograms
    logger.info("Start producing shapes.")
    systematics.produce()
    logger.info("Done producing shapes.")
Пример #16
0
def main(args):
    # Container for all distributions to be drawn
    systematics = Systematics("impact_parameter_shapes.root",
                              num_threads=args.num_threads)

    # Era
    era = Run2016(args.datasets)

    # Channels and processes
    # yapf: disable
    directory = args.directory
    ee = EESM()
    ee_processes = {
        "data"  : Process("data_obs", DataEstimation  (era, directory, ee, friend_directory=args.friend_directory)),
        "HTT"   : Process("HTT",      HTTEstimation   (era, directory, ee, friend_directory=args.friend_directory)),
        "ggH"   : Process("ggH",      ggHEstimation   (era, directory, ee, friend_directory=args.friend_directory)),
        "qqH"   : Process("qqH",      qqHEstimation   (era, directory, ee, friend_directory=args.friend_directory)),
        "VH"    : Process("VH",       VHEstimation    (era, directory, ee, friend_directory=args.friend_directory)),
        "ZTT"   : Process("ZTT",      ZTTEstimationLL (era, directory, ee, friend_directory=args.friend_directory)),
        "ZL"    : Process("ZL",       ZLEstimationLL  (era, directory, ee, friend_directory=args.friend_directory)),
        "ZJ"    : Process("ZJ",       ZJEstimationLL  (era, directory, ee, friend_directory=args.friend_directory)),
        "W"     : Process("W",        WEstimation     (era, directory, ee, friend_directory=args.friend_directory)),
        "TT"    : Process("TT",       TTEstimation    (era, directory, ee, friend_directory=args.friend_directory)),
        "VV"    : Process("VV",       VVEstimation    (era, directory, ee, friend_directory=args.friend_directory)),
        "EWK"   : Process("EWK",      EWKEstimation   (era, directory, ee, friend_directory=args.friend_directory))
        }
    ee_processes["QCD"] = Process("QCD", QCDEstimationET(era, directory, ee, [ee_processes[process] for process in ["ZTT", "ZJ", "ZL", "W", "TT", "VV", "EWK"]], ee_processes["data"], extrapolation_factor=1.0))
    ee_processes["MC"] = Process("MC", SumUpEstimationMethod("MC", "nominal", era, directory, ee, [ee_processes[process] for process in ["ZTT", "ZJ", "ZL", "W", "TT", "VV", "EWK", "QCD", "HTT"]]))
    em = EMSM()
    em.cuts.remove("ele_iso")
    em.cuts.remove("muon_iso")
    #em.cuts.remove("diLepMetMt")
    #em.cuts.get("pzeta").invert()
    em_processes = {
        "data"  : Process("data_obs", DataEstimation  (era, directory, em, friend_directory=args.friend_directory)),
        "HTT"   : Process("HTT",      HTTEstimation   (era, directory, em, friend_directory=args.friend_directory)),
        "ggH"   : Process("ggH",      ggHEstimation   (era, directory, em, friend_directory=args.friend_directory)),
        "qqH"   : Process("qqH",      qqHEstimation   (era, directory, em, friend_directory=args.friend_directory)),
        "VH"    : Process("VH",       VHEstimation    (era, directory, em, friend_directory=args.friend_directory)),
        "ZTT"   : Process("ZTT",      ZTTEstimationLL (era, directory, em, friend_directory=args.friend_directory)),
        "ZL"    : Process("ZL",       ZLEstimationLL  (era, directory, em, friend_directory=args.friend_directory)),
        "ZJ"    : Process("ZJ",       ZJEstimationLL  (era, directory, em, friend_directory=args.friend_directory)),
        "W"     : Process("W",        WEstimation     (era, directory, em, friend_directory=args.friend_directory)),
        "TT"    : Process("TT",       TTEstimation    (era, directory, em, friend_directory=args.friend_directory)),
        "VV"    : Process("VV",       VVEstimation    (era, directory, em, friend_directory=args.friend_directory)),
        "EWK"   : Process("EWK",      EWKEstimation   (era, directory, em, friend_directory=args.friend_directory))
        }
    em_processes["QCD"] = Process("QCD", QCDEstimationMT(era, directory, em, [em_processes[process] for process in ["ZTT", "ZJ", "ZL", "W", "TT", "VV", "EWK"]], em_processes["data"], extrapolation_factor=1.0))
    em_processes["MC"] = Process("MC", SumUpEstimationMethod("MC", "nominal", era, directory, em, [em_processes[process] for process in ["ZTT", "ZJ", "ZL", "W", "TT", "VV", "EWK", "QCD", "HTT"]]))
    mm = MMSM()
    mm_processes = {
        "data"  : Process("data_obs", DataEstimation  (era, directory, mm, friend_directory=args.friend_directory)),
        "HTT"   : Process("HTT",      HTTEstimation   (era, directory, mm, friend_directory=args.friend_directory)),
        "ggH"   : Process("ggH",      ggHEstimation   (era, directory, mm, friend_directory=args.friend_directory)),
        "qqH"   : Process("qqH",      qqHEstimation   (era, directory, mm, friend_directory=args.friend_directory)),
        "VH"    : Process("VH",       VHEstimation    (era, directory, mm, friend_directory=args.friend_directory)),
        "ZTT"   : Process("ZTT",      ZTTEstimationLL (era, directory, mm, friend_directory=args.friend_directory)),
        "ZL"    : Process("ZL",       ZLEstimationLL  (era, directory, mm, friend_directory=args.friend_directory)),
        "ZJ"    : Process("ZJ",       ZJEstimationLL  (era, directory, mm, friend_directory=args.friend_directory)),
        "W"     : Process("W",        WEstimation     (era, directory, mm, friend_directory=args.friend_directory)),
        "TT"    : Process("TT",       TTEstimation    (era, directory, mm, friend_directory=args.friend_directory)),
        "VV"    : Process("VV",       VVEstimation    (era, directory, mm, friend_directory=args.friend_directory)),
        "EWK"   : Process("EWK",      EWKEstimation   (era, directory, mm, friend_directory=args.friend_directory))
        }
    mm_processes["QCD"] = Process("QCD", QCDEstimationMT(era, directory, mm, [mm_processes[process] for process in ["ZTT", "ZJ", "ZL", "W", "TT", "VV", "EWK"]], mm_processes["data"], extrapolation_factor=1.0))
    mm_processes["MC"] = Process("MC", SumUpEstimationMethod("MC", "nominal", era, directory, mm, [mm_processes[process] for process in ["ZTT", "ZJ", "ZL", "W", "TT", "VV", "EWK", "QCD", "HTT"]]))
    mt = MTSM()
    mt.cuts.remove("muon_iso")
    mt.cuts.remove("tau_iso")
    mt.cuts.remove("m_t")
    mt_processes = {
        "data"  : Process("data_obs", DataEstimation  (era, directory, mt, friend_directory=args.friend_directory)),
        "HTT"   : Process("HTT",      HTTEstimation   (era, directory, mt, friend_directory=args.friend_directory)),
        "ggH"   : Process("ggH",      ggHEstimation   (era, directory, mt, friend_directory=args.friend_directory)),
        "qqH"   : Process("qqH",      qqHEstimation   (era, directory, mt, friend_directory=args.friend_directory)),
        "VH"    : Process("VH",       VHEstimation    (era, directory, mt, friend_directory=args.friend_directory)),
        "ZTT"   : Process("ZTT",      ZTTEstimation   (era, directory, mt, friend_directory=args.friend_directory)),
        "ZL"    : Process("ZL",       ZLEstimationMTSM(era, directory, mt, friend_directory=args.friend_directory)),
        "ZJ"    : Process("ZJ",       ZJEstimationMT  (era, directory, mt, friend_directory=args.friend_directory)),
        "WT"    : Process("WT",       WTEstimation    (era, directory, mt, friend_directory=args.friend_directory)),
        "WL"    : Process("WL",       WLEstimation    (era, directory, mt, friend_directory=args.friend_directory)),
        "TTT"   : Process("TTT",      TTTEstimationMT (era, directory, mt, friend_directory=args.friend_directory)),
        "TTJ"   : Process("TTJ",      TTJEstimationMT (era, directory, mt, friend_directory=args.friend_directory)),
        "VV"    : Process("VV",       VVEstimation    (era, directory, mt, friend_directory=args.friend_directory)),
        "EWK"   : Process("EWK",      EWKEstimation   (era, directory, mt, friend_directory=args.friend_directory))
        }
    mt_processes["QCD"] = Process("QCD", QCDEstimationMT(era, directory, mt, [mt_processes[process] for process in ["ZTT", "ZJ", "ZL", "WT", "WL", "TTT", "TTJ", "VV", "EWK"]], mt_processes["data"], extrapolation_factor=1.17))
    mt_processes["MC"] = Process("MC", SumUpEstimationMethod("MC", "nominal", era, directory, mt, [mt_processes[process] for process in ["ZTT", "ZJ", "ZL", "WT", "WL", "TTT", "TTJ", "VV", "EWK", "QCD", "HTT"]]))
    et = ETSM()
    et.cuts.remove("ele_iso")
    et.cuts.remove("tau_iso")
    et_processes = {
        "data"  : Process("data_obs", DataEstimation  (era, directory, et, friend_directory=args.friend_directory)),
        "HTT"   : Process("HTT",      HTTEstimation   (era, directory, et, friend_directory=args.friend_directory)),
        "ggH"   : Process("ggH",      ggHEstimation   (era, directory, et, friend_directory=args.friend_directory)),
        "qqH"   : Process("qqH",      qqHEstimation   (era, directory, et, friend_directory=args.friend_directory)),
        "VH"    : Process("VH",       VHEstimation    (era, directory, et, friend_directory=args.friend_directory)),
        "ZTT"   : Process("ZTT",      ZTTEstimation   (era, directory, et, friend_directory=args.friend_directory)),
        "ZL"    : Process("ZL",       ZLEstimationETSM(era, directory, et, friend_directory=args.friend_directory)),
        "ZJ"    : Process("ZJ",       ZJEstimationET  (era, directory, et, friend_directory=args.friend_directory)),
        "WT"    : Process("WT",       WTEstimation    (era, directory, et, friend_directory=args.friend_directory)),
        "WL"    : Process("WL",       WLEstimation    (era, directory, et, friend_directory=args.friend_directory)),
        "TTT"   : Process("TTT",      TTTEstimationET (era, directory, et, friend_directory=args.friend_directory)),
        "TTJ"   : Process("TTJ",      TTJEstimationET (era, directory, et, friend_directory=args.friend_directory)),
        "VV"    : Process("VV",       VVEstimation    (era, directory, et, friend_directory=args.friend_directory)),
        "EWK"   : Process("EWK",      EWKEstimation   (era, directory, et, friend_directory=args.friend_directory))
        }
    et_processes["QCD"] = Process("QCD", QCDEstimationET(era, directory, et, [et_processes[process] for process in ["ZTT", "ZJ", "ZL", "WT", "WL", "TTT", "TTJ", "VV", "EWK"]], et_processes["data"], extrapolation_factor=1.16))
    et_processes["MC"] = Process("MC", SumUpEstimationMethod("MC", "nominal", era, directory, et, [et_processes[process] for process in ["ZTT", "ZJ", "ZL", "WT", "WL", "TTT", "TTJ", "VV", "EWK", "QCD", "HTT"]]))
    tt = TTSM()
    tt.cuts.remove("tau_1_iso")
    #tt.cuts.remove("tau_2_iso")
    tt_processes = {
        "data"  : Process("data_obs", DataEstimation (era, directory, tt, friend_directory=args.friend_directory)),
        "HTT"   : Process("HTT",      HTTEstimation  (era, directory, tt, friend_directory=args.friend_directory)),
        "ggH"   : Process("ggH",      ggHEstimation  (era, directory, tt, friend_directory=args.friend_directory)),
        "qqH"   : Process("qqH",      qqHEstimation  (era, directory, tt, friend_directory=args.friend_directory)),
        "VH"    : Process("VH",       VHEstimation   (era, directory, tt, friend_directory=args.friend_directory)),
        "ZTT"   : Process("ZTT",      ZTTEstimationTT(era, directory, tt, friend_directory=args.friend_directory)),
        "ZL"    : Process("ZL",       ZLEstimationTT (era, directory, tt, friend_directory=args.friend_directory)),
        "ZJ"    : Process("ZJ",       ZJEstimationTT (era, directory, tt, friend_directory=args.friend_directory)),
        "W"     : Process("W",        WEstimation    (era, directory, tt, friend_directory=args.friend_directory)),
        "TTT"   : Process("TTT",      TTTEstimationTT(era, directory, tt, friend_directory=args.friend_directory)),
        "TTJ"   : Process("TTJ",      TTJEstimationTT(era, directory, tt, friend_directory=args.friend_directory)),
        "VV"    : Process("VV",       VVEstimation   (era, directory, tt, friend_directory=args.friend_directory)),
        "EWK"   : Process("EWK",      EWKEstimation  (era, directory, tt, friend_directory=args.friend_directory))
        }
    tt_processes["QCD"] = Process("QCD", QCDEstimationTT(era, directory, tt, [tt_processes[process] for process in ["ZTT", "ZJ", "ZL", "W", "TTT", "TTJ", "VV", "EWK"]], tt_processes["data"]))
    tt_processes["MC"] = Process("MC", SumUpEstimationMethod("MC", "nominal", era, directory, tt, [tt_processes[process] for process in ["ZTT", "ZJ", "ZL", "W", "TTT", "TTJ", "VV", "EWK", "QCD", "HTT"]]))

    # Variables and categories
    #binning_ll_0 = [-0.1, -0.08, -0.06, -0.04, -0.02, -0.015, -0.01, -0.008, -0.006, -0.004, -0.003, -0.002, -0.001, 0.0, 0.001, 0.002, 0.003, 0.004, 0.006, 0.008, 0.01, 0.015, 0.02, 0.04, 0.06, 0.08, 0.1]
    #binning_ll_Z = [-0.15, -0.1, -0.08, -0.06, -0.04, -0.02, -0.015, -0.01, -0.008, -0.006, -0.004, -0.003, -0.002, -0.001, 0.0, 0.001, 0.002, 0.003, 0.004, 0.006, 0.008, 0.01, 0.015, 0.02, 0.04, 0.06, 0.08, 0.1, 0.15]
    #binning_ll_0_raw = [-0.05, -0.04, -0.03, -0.02, -0.015, -0.0125, -0.01, -0.009, -0.008, -0.007, -0.006, -0.005, -0.0045, -0.004, -0.0035, -0.003, -0.0025, -0.002, -0.0015, -0.001, -0.0006, -0.0003, 0.0, 0.0003, 0.0006, 0.001, 0.0015, 0.002, 0.0025, 0.003, 0.0035, 0.004, 0.0045, 0.005, 0.006, 0.007, 0.008, 0.009, 0.01, 0.0125, 0.015, 0.02, 0.03, 0.04, 0.05]
    binning_ll_Z_raw = [0.0001, 0.0002, 0.0003, 0.0004, 0.0005, 0.0006, 0.0007, 0.0008, 0.001, 0.00125, 0.0015, 0.00175, 0.002, 0.00225, 0.0025, 0.00275, 0.003, 0.00325, 0.0035, 0.00375, 0.004, 0.0045, 0.005, 0.0055, 0.006, 0.0065, 0.007, 0.0075, 0.008, 0.0085, 0.009, 0.0095, 0.01, 0.011, 0.012, 0.013, 0.014, 0.015, 0.016, 0.018, 0.02, 0.0225, 0.025, 0.0275, 0.03, 0.035, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0.11, 0.12, 0.13, 0.15, 0.2]
    binning_ll_0_raw = [0.0001, 0.0002, 0.0003, 0.0004, 0.0005, 0.0006, 0.0007, 0.0008, 0.001, 0.00125, 0.0015, 0.00175, 0.002, 0.00225, 0.0025, 0.00275, 0.003, 0.00325, 0.0035, 0.00375, 0.004, 0.00425, 0.0045, 0.00475, 0.005, 0.0055, 0.006, 0.0065, 0.007, 0.0075, 0.008, 0.0085, 0.009, 0.0095, 0.01, 0.012, 0.014, 0.017, 0.02, 0.025, 0.03, 0.035, 0.04, 0.045]
    binning_ll_Zerr_raw = [0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.8, 2.0, 2.25, 2.5, 2.75, 3.0, 3.25, 3.5, 3.75, 4.0, 4.25, 4.5, 4.75, 5.0, 5.5, 6.0, 7.0, 8.0, 9.0, 10.0, 12.0, 15.0, 20.0, 30.0, 45.0, 70.0, 100.0, 150.0]
    binning_ll_0err_raw = [0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.8, 2.0, 2.25, 2.5, 2.75, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 6.0, 7.0, 8.0, 9.0, 10.0, 12.0, 15.0, 20.0, 30.0]
    binning_et_Z_raw = [0.001, 0.004, 0.01, 0.02, 0.04, 0.2]
    binning_et_0_raw = [0.0005, 0.002, 0.005, 0.045]
    binning_mt_Z_raw = [0.0003, 0.0006, 0.001, 0.0015, 0.00225, 0.003, 0.004, 0.0055, 0.0075, 0.01, 0.015, 0.02, 0.04, 0.2]
    binning_mt_0_raw = [0.0001, 0.0003, 0.0006, 0.001, 0.00175, 0.0025, 0.005, 0.01, 0.045]
    binning_em_Z_raw = [0.0003, 0.0006, 0.001, 0.0015, 0.00225, 0.003, 0.004, 0.0055, 0.0075, 0.01, 0.015, 0.02, 0.04, 0.08, 0.15, 0.2] # 0.08, 0.14 are only to keep splines in good region
    binning_em_0_raw = [0.0001, 0.0003, 0.0006, 0.001, 0.00175, 0.0025, 0.005, 0.01, 0.02, 0.032, 0.045] #0.02, 0.032 are only to keep splines in good region

    binning_ll_0 = []
    for i in range(len(binning_ll_0_raw)):
        binning_ll_0.append(-1*binning_ll_0_raw[-i-1])
    binning_ll_0 += [0.0] + binning_ll_0_raw
    binning_ll_Z = []
    for i in range(len(binning_ll_Z_raw)):
        binning_ll_Z.append(-1*binning_ll_Z_raw[-i-1])
    binning_ll_Z += [0.0] + binning_ll_Z_raw
    binning_ll_0err = []
    for i in range(len(binning_ll_0err_raw)):
        binning_ll_0err.append(-1*binning_ll_0err_raw[-i-1])
    binning_ll_0err += [0.0] + binning_ll_0err_raw
    binning_ll_Zerr = []
    for i in range(len(binning_ll_Zerr_raw)):
        binning_ll_Zerr.append(-1*binning_ll_Zerr_raw[-i-1])
    binning_ll_Zerr += [0.0] + binning_ll_Zerr_raw
    binning_et_0 = []
    for i in range(len(binning_et_0_raw)):
        binning_et_0.append(-1*binning_et_0_raw[-i-1])
    binning_et_0 += binning_et_0_raw
    binning_et_Z = []
    for i in range(len(binning_et_Z_raw)):
        binning_et_Z.append(-1*binning_et_Z_raw[-i-1])
    binning_et_Z += binning_et_Z_raw
    binning_et_0err = []
    binning_mt_0 = []
    for i in range(len(binning_mt_0_raw)):
        binning_mt_0.append(-1*binning_mt_0_raw[-i-1])
    binning_mt_0 += binning_mt_0_raw
    binning_mt_Z = []
    for i in range(len(binning_mt_Z_raw)):
        binning_mt_Z.append(-1*binning_mt_Z_raw[-i-1])
    binning_mt_Z += [0.0] + binning_mt_Z_raw
    binning_mt_0err = []
    binning_em_0 = []
    for i in range(len(binning_em_0_raw)):
        binning_em_0.append(-1*binning_em_0_raw[-i-1])
    binning_em_0 += binning_em_0_raw
    binning_em_Z = []
    for i in range(len(binning_em_Z_raw)):
        binning_em_Z.append(-1*binning_em_Z_raw[-i-1])
    binning_em_Z += [0.0] + binning_em_Z_raw
    binning_em_0err = []

    binning_tau = [-0.1, -0.08, -0.06, -0.04, -0.02, -0.015, -0.01, -0.008, -0.006, -0.004, -0.002, 0.0, 0.002, 0.004, 0.006, 0.008, 0.01, 0.015, 0.02, 0.04, 0.06, 0.08, 0.1]
    d0_1 = Variable("d0_1", VariableBinning(binning_ll_0))
    d0_1_calib = Variable("d0_1_calib", VariableBinning(binning_ll_0))
    d0_1_calib_all = Variable("d0_1_calib_all", VariableBinning(binning_ll_0))
    d0_te = Variable("d0_1", VariableBinning(binning_mt_0))
    d0_te_calib = Variable("d0_1_calib", VariableBinning(binning_mt_0))
    d0_te_calib_all = Variable("d0_1_calib_all", VariableBinning(binning_mt_0))
    d0_tm = Variable("d0_1", VariableBinning(binning_mt_0))
    d0_tm_calib = Variable("d0_1_calib", VariableBinning(binning_mt_0))
    d0_tm_calib_all = Variable("d0_1_calib_all", VariableBinning(binning_mt_0))
    #d0_1 = Variable("m_vis", VariableBinning([50.+x*5. for x in range(21)]))
    d0_2 = Variable("d0_2", VariableBinning(binning_tau))
    #d0_2 = Variable("m_vis", VariableBinning([50.+x*5. for x in range(21)]))
    dZ_1 = Variable("dZ_1", VariableBinning(binning_ll_Z))
    dZ_1_calib = Variable("dZ_1_calib", VariableBinning(binning_ll_Z))
    dZ_1_calib_all = Variable("dZ_1_calib_all", VariableBinning(binning_ll_Z))
    dZ_te = Variable("dZ_1", VariableBinning(binning_mt_Z))
    dZ_te_calib = Variable("dZ_1_calib", VariableBinning(binning_mt_Z))
    dZ_te_calib_all = Variable("dZ_1_calib_all", VariableBinning(binning_mt_Z))
    dZ_tm = Variable("dZ_1", VariableBinning(binning_mt_Z))
    dZ_tm_calib = Variable("dZ_1_calib", VariableBinning(binning_mt_Z))
    dZ_tm_calib_all = Variable("dZ_1_calib_all", VariableBinning(binning_mt_Z))
    dZ_2 = Variable("dZ_2", VariableBinning(binning_tau))
    DCA0_1 = Variable("DCA0_1", VariableBinning(binning_ll_0err), "d0_1/lep1ErrD0")
    DCAZ_1 = Variable("DCAZ_1", VariableBinning(binning_ll_Zerr), "dZ_1/lep1ErrDz")
    DCA0_2 = Variable("DCA0_2", VariableBinning(binning_ll_0err), "d0_2/lep2ErrD0")
    DCAZ_2 = Variable("DCAZ_2", VariableBinning(binning_ll_Zerr), "dZ_2/lep2ErrDz")
    d0_em_te = Variable("d0_1", VariableBinning(binning_em_0))
    d0_em_te_calib_all = Variable("d0_1_calib_all", VariableBinning(binning_em_0))
    d0_em_tm = Variable("d0_2", VariableBinning(binning_em_0))
    d0_em_tm_calib_all = Variable("d0_2_calib_all", VariableBinning(binning_em_0))
    dZ_em_te = Variable("dZ_1", VariableBinning(binning_em_Z))
    dZ_em_te_calib_all = Variable("dZ_1_calib_all", VariableBinning(binning_em_Z))
    dZ_em_tm = Variable("dZ_2", VariableBinning(binning_em_Z))
    dZ_em_tm_calib_all = Variable("dZ_2_calib_all", VariableBinning(binning_em_Z))

    m_vis = Variable("m_vis", VariableBinning([50.+x*5. for x in range(21)]))
    mT_1 = Variable("mt_1", VariableBinning([0.+x*5. for x in range(41)]))
    mT_2 = Variable("mt_2", VariableBinning([0.+x*5. for x in range(41)]))
    met = Variable("met", VariableBinning([0.+x*5. for x in range(41)]))

    ee_categories = []
    if "ee" in args.channels:
        ee_categories.append(
            Category(
                "m_vis",
                ee,
                Cuts(),
                variable=m_vis))
        ee_categories.append(
            Category(
                "d0_e",
                ee,
                Cuts(
                    Cut("m_vis>80 && m_vis<100", "Zpeak")
                    ),
                variable=d0_1))
        ee_categories.append(
            Category(
                "dZ_e",
                ee,
                Cuts(
                    Cut("m_vis>80 && m_vis<100", "Zpeak")
                    ),
                variable=dZ_1))
        ee_categories.append(
            Category(
                "DCA0_e",
                ee,
                Cuts(
                    Cut("m_vis>80 && m_vis<100", "Zpeak")
                    ),
                variable=DCA0_1))
        ee_categories.append(
            Category(
                "DCAZ_e",
                ee,
                Cuts(
                    Cut("m_vis>80 && m_vis<100", "Zpeak")
                    ),
                variable=DCAZ_1))
        #calibrated
        '''ee_categories.append(
            Category(
                "d0_e",
                ee,
                Cuts(
                    Cut("m_vis>80 && m_vis<100", "Zpeak")
                    ),
                variable=d0_1_calib))
        ee_categories.append(
            Category(
                "dZ_e",
                ee,
                Cuts(
                    Cut("m_vis>80 && m_vis<100", "Zpeak")
                    ),
                variable=dZ_1_calib))
        ee_categories.append(
            Category(
                "d0_e",
                ee,
                Cuts(
                    Cut("m_vis>80 && m_vis<100", "Zpeak")
                    ),
                variable=d0_1_calib_all))
        ee_categories.append(
            Category(
                "dZ_e",
                ee,
                Cuts(
                    Cut("m_vis>80 && m_vis<100", "Zpeak")
                    ),
                variable=dZ_1_calib_all))
    '''
    em_categories = []
    if "em" in args.channels:
        '''em_categories.append(
            Category(
                "m_vis_te",
                em,
                Cuts(
                    Cut("iso_1<0.1", "ele_iso"),
                    Cut("iso_2>0.15&&iso_2<0.25", "muon_antiiso")),
                    #Cut("pZetaMissVis<-50.", "pzetatight")),
                variable=m_vis))
        em_categories.append(
            Category(
                "m_vis_tm",
                em,
                Cuts(
                    Cut("iso_1>0.1&&iso_1<0.2", "ele_antiiso"),
                    Cut("iso_2<0.15", "muon_iso")),
                variable=m_vis))'''
        em_categories.append(
            Category(
                "d0_em_te",
                em,
                Cuts(
                    Cut("iso_1<0.15", "ele_iso"),
                    Cut("iso_2<0.2", "muon_iso"),
                    Cut("m_vis<80", "Zpeak")),
                variable=d0_em_te))
        em_categories.append(
            Category(
                "d0_em_tm",
                em,
                Cuts(
                    Cut("iso_1<0.15", "ele_iso"),
                    Cut("iso_2<0.2", "muon_iso"),
                    Cut("m_vis<80", "Zpeak")),
                variable=d0_em_tm))
        em_categories.append(
            Category(
                "dZ_em_te",
                em,
                Cuts(
                    Cut("iso_1<0.15", "ele_iso"),
                    Cut("iso_2<0.2", "muon_iso"),
                    Cut("m_vis<80", "Zpeak")),
                variable=dZ_em_te))
        em_categories.append(
            Category(
                "dZ_em_tm",
                em,
                Cuts(
                    Cut("iso_1<0.15", "ele_iso"),
                    Cut("iso_2<0.2", "muon_iso"),
                    Cut("m_vis<80", "Zpeak")),
                variable=dZ_em_tm))
        #calibrated
        '''em_categories.append(
            Category(
                "d0_em_te",
                em,
                Cuts(
                    Cut("iso_1<0.15", "ele_iso"),
                    Cut("iso_2<0.2", "muon_iso"),
                    Cut("m_vis<80", "Zpeak")),
                variable=d0_em_te_calib_all))
        em_categories.append(
            Category(
                "d0_em_tm",
                em,
                Cuts(
                    Cut("iso_1<0.15", "ele_iso"),
                    Cut("iso_2<0.2", "muon_iso"),
                    Cut("m_vis<80", "Zpeak")),
                variable=d0_em_tm_calib_all))
        em_categories.append(
            Category(
                "dZ_em_te",
                em,
                Cuts(
                    Cut("iso_1<0.15", "ele_iso"),
                    Cut("iso_2<0.2", "muon_iso"),
                    Cut("m_vis<80", "Zpeak")),
                variable=dZ_em_te_calib_all))
        em_categories.append(
            Category(
                "dZ_em_tm",
                em,
                Cuts(
                    Cut("iso_1<0.15", "ele_iso"),
                    Cut("iso_2<0.2", "muon_iso"),
                    Cut("m_vis<80", "Zpeak")),
                variable=dZ_em_tm_calib_all))
    '''
    mm_categories = []
    if "mm" in args.channels:
        mm_categories.append(
            Category(
                "m_vis",
                mm,
                Cuts(),
                variable=m_vis))
        mm_categories.append(
            Category(
                "d0_m",
                mm,
                Cuts(
                    Cut("m_vis>80 && m_vis<100", "Zpeak")
                    ),
                variable=d0_1))
        mm_categories.append(
            Category(
                "dZ_m",
                mm,
                Cuts(
                    Cut("m_vis>80 && m_vis<100", "Zpeak")
                    ),
                variable=dZ_1))
        mm_categories.append(
            Category(
                "DCA0_m",
                mm,
                Cuts(
                    Cut("m_vis>80 && m_vis<100", "Zpeak")
                    ),
                variable=DCA0_1))
        mm_categories.append(
            Category(
                "DCAZ_m",
                mm,
                Cuts(
                    Cut("m_vis>80 && m_vis<100", "Zpeak")
                    ),
                variable=DCAZ_1))
        #calibrated
        '''mm_categories.append(
            Category(
                "d0_m",
                mm,
                Cuts(
                    Cut("m_vis>80 && m_vis<100", "Zpeak")
                    ),
                variable=d0_1_calib))
        mm_categories.append(
            Category(
                "dZ_m",
                mm,
                Cuts(
                    Cut("m_vis>80 && m_vis<100", "Zpeak")
                    ),
                variable=dZ_1_calib))
        mm_categories.append(
            Category(
                "d0_m",
                mm,
                Cuts(
                    Cut("m_vis>80 && m_vis<100", "Zpeak")
                    ),
                variable=d0_1_calib_all))
        mm_categories.append(
            Category(
                "dZ_m",
                mm,
                Cuts(
                    Cut("m_vis>80 && m_vis<100", "Zpeak")
                    ),
                variable=dZ_1_calib_all))
    '''
    et_categories = []
    if "et" in args.channels:
        et_categories.append(
            Category(
                "d0_te",
                et,
                Cuts(
                    Cut("iso_1<0.1", "ele_iso"),
                    Cut("byTightIsolationMVArun2v1DBoldDMwLT_2>0.5", "tau_iso")),
                variable=d0_te))
        et_categories.append(
            Category(
                "dZ_te",
                et,
                Cuts(
                    Cut("iso_1<0.1", "ele_iso"),
                    Cut("byTightIsolationMVArun2v1DBoldDMwLT_2>0.5", "tau_iso")),
                variable=dZ_te))
        et_categories.append(
            Category(
                "d0_te",
                et,
                Cuts(
                    Cut("iso_1<0.1", "ele_iso"),
                    Cut("byTightIsolationMVArun2v1DBoldDMwLT_2>0.5", "tau_iso")),
                variable=d0_te_calib))
        et_categories.append(
            Category(
                "dZ_te",
                et,
                Cuts(
                    Cut("iso_1<0.1", "ele_iso"),
                    Cut("byTightIsolationMVArun2v1DBoldDMwLT_2>0.5", "tau_iso")),
                variable=dZ_te_calib))
        '''
        et_categories.append(
            Category(
                "m_vis",
                et,
                Cuts(
                    #Cut("m_vis>60 && m_vis<75", "Zpeak"),
                    Cut("nbtag==0", "bveto"),
                    Cut("iso_1<0.1", "ele_iso"),
                    Cut("mt_2<100", "tau_mt"),
                    Cut("abs(eta_2)<1.0", "tau_eta"),
                    Cut("byTightIsolationMVArun2v1DBoldDMwLT_2<0.5 && byMediumIsolationMVArun2v1DBoldDMwLT_2>0.5", "tau_antiiso")),
                variable=m_vis))
        et_categories.append(
            Category(
                "d0_te",
                et,
                Cuts(
                    Cut("m_vis>60 && m_vis<75", "Zpeak"),
                    Cut("nbtag==0", "bveto"),
                    Cut("iso_1<0.1", "ele_iso"),
                    Cut("mt_2<100", "tau_mt"),
                    Cut("abs(eta_2)<1.0", "tau_eta"),
                    Cut("byTightIsolationMVArun2v1DBoldDMwLT_2<0.5 && byMediumIsolationMVArun2v1DBoldDMwLT_2>0.5", "tau_antiiso")),
                variable=d0_te))
        et_categories.append(
            Category(
                "dZ_te",
                et,
                Cuts(
                    Cut("m_vis>60 && m_vis<75", "Zpeak"),
                    Cut("nbtag==0", "bveto"),
                    Cut("iso_1<0.1", "ele_iso"),
                    Cut("mt_2<100", "tau_mt"),
                    Cut("abs(eta_2)<1.0", "tau_eta"),
                    Cut("byTightIsolationMVArun2v1DBoldDMwLT_2<0.5 && byMediumIsolationMVArun2v1DBoldDMwLT_2>0.5", "tau_antiiso")),
                variable=dZ_te))
        et_categories.append(
            Category(
                "d0_t",
                et,
                Cuts(
                    Cut("m_vis>60 && m_vis<75", "Zpeak"),
                    Cut("iso_1>0.1 && iso_1<0.2", "ele_antiiso"),
                    Cut("abs(eta_1)<1.0", "ele_eta"),
                    Cut("byTightIsolationMVArun2v1DBoldDMwLT_2>0.5", "tau_iso")),
                variable=d0_2))
        et_categories.append(
            Category(
                "dZ_t",
                et,
                Cuts(
                    Cut("m_vis>60 && m_vis<75", "Zpeak"),
                    Cut("iso_1>0.1 && iso_1<0.2", "ele_antiiso"),
                    Cut("abs(eta_1)<1.0", "ele_eta"),
                    Cut("byTightIsolationMVArun2v1DBoldDMwLT_2>0.5", "tau_iso")),
                variable=dZ_2))'''
    mt_categories = []
    if "mt" in args.channels:
        mt_categories.append(
            Category(
                "d0_tm",
                mt,
                Cuts(
                    Cut("mt_1<50", "m_t"),
                    Cut("iso_1<0.15", "muon_iso"),
                    Cut("byTightIsolationMVArun2v1DBoldDMwLT_2>0.5", "tau_iso")),
                variable=d0_tm))
        mt_categories.append(
            Category(
                "dZ_tm",
                mt,
                Cuts(
                    Cut("mt_1<50", "m_t"),
                    Cut("iso_1<0.15", "muon_iso"),
                    Cut("byTightIsolationMVArun2v1DBoldDMwLT_2>0.5", "tau_iso")),
                variable=dZ_tm))
        mt_categories.append(
            Category(
                "d0_tm",
                mt,
                Cuts(
                    Cut("mt_1<50", "m_t"),
                    Cut("iso_1<0.15", "muon_iso"),
                    Cut("byTightIsolationMVArun2v1DBoldDMwLT_2>0.5", "tau_iso")),
                variable=d0_tm_calib))
        mt_categories.append(
            Category(
                "dZ_tm",
                mt,
                Cuts(
                    Cut("mt_1<50", "m_t"),
                    Cut("iso_1<0.15", "muon_iso"),
                    Cut("byTightIsolationMVArun2v1DBoldDMwLT_2>0.5", "tau_iso")),
                variable=dZ_tm_calib))
        '''
        mt_categories.append(
            Category(
                "m_vis",
                mt,
                Cuts(
                    #Cut("m_vis>55 && m_vis<75", "Zpeak"),
                    Cut("nbtag==0", "bveto"),
                    Cut("mt_1<50", "m_t"),
                    Cut("iso_1<0.15", "muon_iso"),
                    Cut("mt_2<100", "tau_mt"),
                    Cut("abs(eta_2)<1.0", "tau_eta"),
                    Cut("byTightIsolationMVArun2v1DBoldDMwLT_2<0.5 && byMediumIsolationMVArun2v1DBoldDMwLT_2>0.5", "tau_antiiso")),
                variable=m_vis))
        mt_categories.append(
            Category(
                "d0_tm",
                mt,
                Cuts(
                    Cut("m_vis>55 && m_vis<75", "Zpeak"),
                    Cut("nbtag==0", "bveto"),
                    Cut("mt_1<50", "m_t"),
                    Cut("iso_1<0.15", "muon_iso"),
                    Cut("mt_2<100", "tau_mt"),
                    Cut("abs(eta_2)<1.0", "tau_eta"),
                    Cut("byTightIsolationMVArun2v1DBoldDMwLT_2<0.5 && byMediumIsolationMVArun2v1DBoldDMwLT_2>0.5", "tau_antiiso")),
                variable=d0_tm))
        mt_categories.append(
            Category(
                "dZ_tm",
                mt,
                Cuts(
                    Cut("m_vis>55 && m_vis<75", "Zpeak"),
                    Cut("nbtag==0", "bveto"),
                    Cut("mt_1<50", "m_t"),
                    Cut("iso_1<0.15", "muon_iso"),
                    Cut("mt_2<100", "tau_mt"),
                    Cut("abs(eta_2)<1.0", "tau_eta"),
                    Cut("byTightIsolationMVArun2v1DBoldDMwLT_2<0.5 && byMediumIsolationMVArun2v1DBoldDMwLT_2>0.5", "tau_antiiso")),
                variable=dZ_tm))
        mt_categories.append(
            Category(
                "d0_t",
                mt,
                Cuts(
                    Cut("m_vis>55 && m_vis<70", "Zpeak"),
                    Cut("mt_1<50", "m_t"),
                    Cut("iso_1>0.15 && iso_1<0.25", "muon_antiiso"),
                    Cut("abs(eta_1)<1.0", "muon_eta"),
                    Cut("byTightIsolationMVArun2v1DBoldDMwLT_2>0.5", "tau_iso")),
                variable=d0_2))
        mt_categories.append(
            Category(
                "dZ_t",
                mt,
                Cuts(
                    Cut("m_vis>55 && m_vis<70", "Zpeak"),
                    Cut("mt_1<50", "m_t"),
                    Cut("iso_1>0.15 && iso_1<0.25", "muon_antiiso"),
                    Cut("abs(eta_1)<1.0", "muon_eta"),
                    Cut("byTightIsolationMVArun2v1DBoldDMwLT_2>0.5", "tau_iso")),
                variable=dZ_2))
        mt_categories.append(
            Category(
                "d0_f",
                mt,
                Cuts(
                    #Cut("m_vis>55 && m_vis<70", "antiZpeak"),
                    Cut("mt_1>70 && mt_1 < 100", "m_t"),
                    Cut("iso_1<0.15", "muon_iso"),
                    Cut("byTightIsolationMVArun2v1DBoldDMwLT_2>0.5", "tau_iso")),
                variable=d0_2))
        mt_categories.append(
            Category(
                "dZ_f",
                mt,
                Cuts(
                    Cut("m_vis>95", "antiZpeak"),
                    Cut("mt_1>70 && mt_1 < 100", "m_t"),
                    Cut("iso_1<0.15", "muon_iso"),
                    Cut("byTightIsolationMVArun2v1DBoldDMwLT_2>0.5", "tau_iso")),
                variable=dZ_2))
        mt_categories.append(
            Category(
                "d0_tt",
                mt,
                Cuts(
                    Cut("mt_1>150", "m_t"),
                    Cut("iso_1<0.15", "muon_iso"),
                    Cut("byTightIsolationMVArun2v1DBoldDMwLT_2>0.5", "tau_iso")),
                variable=d0_2))
        mt_categories.append(
            Category(
                "dZ_tt",
                mt,
                Cuts(
                    Cut("mt_1>150", "m_t"),
                    Cut("iso_1<0.15", "muon_iso"),
                    Cut("byTightIsolationMVArun2v1DBoldDMwLT_2>0.5", "tau_iso")),
                variable=dZ_2))'''

    tt_categories = []
    if "tt" in args.channels:
        tt_categories.append(
            Category(
                "d0_t2",
                tt,
                Cuts(
                    Cut("m_vis>60 && m_vis<80", "Zpeak"),
                    Cut("abs(eta_1)<1.0", "eta_tau_1"),
                    Cut("byTightIsolationMVArun2v1DBoldDMwLT_1<0.5 && byLooseIsolationMVArun2v1DBoldDMwLT_1>0.5", "tau_1_antiiso")),
                variable=d0_2))
        tt_categories.append(
            Category(
                "dZ_t2",
                tt,
                Cuts(
                    Cut("m_vis>60 && m_vis<80", "Zpeak"),
                    Cut("abs(eta_1)<1.0", "eta_tau_1"),
                    Cut("byTightIsolationMVArun2v1DBoldDMwLT_1<0.5 && byLooseIsolationMVArun2v1DBoldDMwLT_1>0.5", "tau_1_antiiso")),
                variable=dZ_2))
    # Nominal histograms
    # yapf: enable
    if "ee" in args.channels:
        for process, category in product(ee_processes.values(), ee_categories):
            systematics.add(
                Systematic(category=category,
                           process=process,
                           analysis="smhtt",
                           era=era,
                           variation=Nominal(),
                           mass="125"))
    if "em" in args.channels:
        for process, category in product(em_processes.values(), em_categories):
            systematics.add(
                Systematic(category=category,
                           process=process,
                           analysis="smhtt",
                           era=era,
                           variation=Nominal(),
                           mass="125"))
    if "mm" in args.channels:
        for process, category in product(mm_processes.values(), mm_categories):
            systematics.add(
                Systematic(category=category,
                           process=process,
                           analysis="smhtt",
                           era=era,
                           variation=Nominal(),
                           mass="125"))
    if "et" in args.channels:
        for process, category in product(et_processes.values(), et_categories):
            systematics.add(
                Systematic(category=category,
                           process=process,
                           analysis="smhtt",
                           era=era,
                           variation=Nominal(),
                           mass="125"))
    if "mt" in args.channels:
        for process, category in product(mt_processes.values(), mt_categories):
            systematics.add(
                Systematic(category=category,
                           process=process,
                           analysis="smhtt",
                           era=era,
                           variation=Nominal(),
                           mass="125"))
    if "tt" in args.channels:
        for process, category in product(tt_processes.values(), tt_categories):
            systematics.add(
                Systematic(category=category,
                           process=process,
                           analysis="smhtt",
                           era=era,
                           variation=Nominal(),
                           mass="125"))

    # Produce histograms
    systematics.produce()
Пример #17
0
def main(args):
    # Write arparse arguments to YAML config
    filelist = {}

    # Define era
    if "2016" in args.era:
        from shape_producer.estimation_methods_2016 import DataEstimation, ggHEstimation, qqHEstimation, ZTTEstimation, ZLEstimation, ZJEstimation, WEstimation, TTTEstimation, TTJEstimation, ZTTEmbeddedEstimation, TTLEstimation, EWKZEstimation, VVLEstimation, VVJEstimation, VVEstimation, VVTEstimation, VHEstimation,  EWKWpEstimation, EWKWmEstimation, ttHEstimation, ggHWWEstimation, qqHWWEstimation
        #QCDEstimation_SStoOS_MTETEM, QCDEstimationTT, HTTEstimation,

        from shape_producer.era import Run2016
        era = Run2016(args.database)
    elif "2017" in args.era:
        from shape_producer.estimation_methods_2017 import DataEstimation, ZTTEstimation, ZLEstimation, ZJEstimation, TTLEstimation, TTJEstimation, TTTEstimation, VVTEstimation, VVJEstimation, VVLEstimation, WEstimation, ggHEstimation, qqHEstimation, VHEstimation, EWKZEstimation, ZTTEmbeddedEstimation, ttHEstimation

        from shape_producer.era import Run2017
        era = Run2017(args.database)
    elif "2018" in args.era:
        from shape_producer.estimation_methods_2018 import DataEstimation, ZTTEstimation, ZLEstimation, ZJEstimation, TTLEstimation, TTJEstimation, TTTEstimation, VVTEstimation, VVJEstimation, VVLEstimation, WEstimation, ggHEstimation, qqHEstimation, VHEstimation, EWKZEstimation, ZTTEmbeddedEstimation, ttHEstimation

        from shape_producer.era import Run2018
        era = Run2018(args.database)

    else:
        logger.fatal("Era {} is not implemented.".format(args.era))
        raise Exception

    logger.debug("Write filelist for channel %s in era %s.", args.channel,
                 args.era)

    ############################################################################

    # Era: 2016, Channel: mt
    if "2016" in args.era and args.channel == "mt":
        channel = MTSM2016()
        for estimation in [
                ggHEstimation("ggH", era, args.directory, channel),
                qqHEstimation("qqH", era, args.directory, channel),    
                ttHEstimation(era, args.directory, channel),
                VHEstimation(era, args.directory, channel),
                ZTTEstimation(era, args.directory, channel),
                #ZTTEmbeddedEstimation(era, args.directory, channel), #TODO include EMB again once samples are there
                ZLEstimation(era, args.directory, channel),
                ZJEstimation(era, args.directory, channel),
                TTTEstimation(era, args.directory, channel),
                TTLEstimation(era, args.directory, channel),
                TTJEstimation(era, args.directory, channel),
                WEstimation(era, args.directory, channel),
                VVEstimation(era, args.directory, channel),
                EWKZEstimation(era, args.directory, channel),
                EWKWpEstimation(era, args.directory, channel),
                EWKWmEstimation(era, args.directory, channel),
                DataEstimation(era, args.directory, channel),
                ggHWWEstimation(era, args.directory, channel),
                qqHWWEstimation(era, args.directory, channel)
        ]:
            # Get files for estimation method
            logger.debug("Get files for estimation method %s.",
                         estimation.name)
            files = [str(f) for f in estimation.get_files()]

            # Go through files and get folders for channel
            for f in files:
                if not os.path.exists(f):
                    logger.fatal("File does not exist: %s", f)
                    raise Exception

                folders = []
                f_ = ROOT.TFile(f)
                for k in f_.GetListOfKeys():
                    if "{}_".format(args.channel) in k.GetName():
                        folders.append(k.GetName())
                f_.Close()

                filelist[f] = folders

    ############################################################################

    # Era: 2017, Channel: mt
    if "2017" in args.era and args.channel == "mt":
        channel = MTSM2017()
        for estimation in [
                ggHEstimation("ggH", era, args.directory, channel),
                qqHEstimation("qqH", era, args.directory, channel),
                ttHEstimation(era, args.directory, channel),
                VHEstimation(era, args.directory, channel),
                ZTTEstimation(era, args.directory, channel),
                ZTTEmbeddedEstimation(era, args.directory, channel),
                ZLEstimation(era, args.directory, channel),
                ZJEstimation(era, args.directory, channel),
                TTTEstimation(era, args.directory, channel),
                TTJEstimation(era, args.directory, channel),
                TTLEstimation(era, args.directory, channel),
                WEstimation(era, args.directory, channel),
                VVJEstimation(era, args.directory, channel),
                VVTEstimation(era, args.directory, channel),
                VVLEstimation(era, args.directory, channel),
                EWKZEstimation(era, args.directory, channel),
                DataEstimation(era, args.directory, channel)
        ]:
            # Get files for estimation method
            logger.debug("Get files for estimation method %s.",
                         estimation.name)
            files = [str(f) for f in estimation.get_files()]

            # Go through files and get folders for channel
            for f in files:
                if not os.path.exists(f):
                    logger.fatal("File does not exist: %s", f)
                    raise Exception

                folders = []
                f_ = ROOT.TFile(f)
                for k in f_.GetListOfKeys():
                    if "{}_".format(args.channel) in k.GetName():
                        folders.append(k.GetName())
                f_.Close()

                filelist[f] = folders

    ############################################################################

    # Era: 2018, Channel: mt
    if "2018" in args.era and args.channel == "mt":
        channel = MTSM2018()
        for estimation in [
                ggHEstimation("ggH", era, args.directory, channel),
                qqHEstimation("qqH", era, args.directory, channel),
                ttHEstimation(era, args.directory, channel),
                VHEstimation(era, args.directory, channel),
                ZTTEstimation(era, args.directory, channel),
                ZTTEmbeddedEstimation(era, args.directory, channel),
                ZLEstimation(era, args.directory, channel),
                ZJEstimation(era, args.directory, channel),
                TTTEstimation(era, args.directory, channel),
                TTJEstimation(era, args.directory, channel),
                TTLEstimation(era, args.directory, channel),
                WEstimation(era, args.directory, channel),
                VVJEstimation(era, args.directory, channel),
                VVTEstimation(era, args.directory, channel),
                VVLEstimation(era, args.directory, channel),
                EWKZEstimation(era, args.directory, channel),
                DataEstimation(era, args.directory, channel)
        ]:  
            # Get files for estimation method
            logger.debug("Get files for estimation method %s.",
                         estimation.name)
            files = [str(f) for f in estimation.get_files()]

            # Go through files and get folders for channel
            for f in files:
                if not os.path.exists(f):
                    logger.fatal("File does not exist: %s", f)
                    raise Exception

                folders = []
                f_ = ROOT.TFile(f)
                for k in f_.GetListOfKeys():
                    if "{}_".format(args.channel) in k.GetName():
                        folders.append(k.GetName())
                f_.Close()

                filelist[f] = folders


    ############################################################################

    # Era: 2016, Channel: et
    if "2016" in args.era and args.channel == "et":
        channel = ETSM2016()
        for estimation in [
                ggHEstimation("ggH", era, args.directory, channel),
                qqHEstimation("qqH", era, args.directory, channel),    
                ttHEstimation(era, args.directory, channel),
                VHEstimation(era, args.directory, channel),
                ZTTEstimation(era, args.directory, channel),
                #ZTTEmbeddedEstimation(era, args.directory, channel), #TODO include EMB again once samples are there
                ZLEstimation(era, args.directory, channel),
                ZJEstimation(era, args.directory, channel),
                TTTEstimation(era, args.directory, channel),
                TTLEstimation(era, args.directory, channel),
                TTJEstimation(era, args.directory, channel),
                WEstimation(era, args.directory, channel),
                VVEstimation(era, args.directory, channel),
                EWKZEstimation(era, args.directory, channel),
                EWKWpEstimation(era, args.directory, channel),
                EWKWmEstimation(era, args.directory, channel),
                DataEstimation(era, args.directory, channel),
                ggHWWEstimation(era, args.directory, channel),
                qqHWWEstimation(era, args.directory, channel)
        ]:
            # Get files for estimation method
            logger.debug("Get files for estimation method %s.",
                         estimation.name)
            files = [str(f) for f in estimation.get_files()]

            # Go through files and get folders for channel
            for f in files:
                if not os.path.exists(f):
                    logger.fatal("File does not exist: %s", f)
                    raise Exception

                folders = []
                f_ = ROOT.TFile(f)
                for k in f_.GetListOfKeys():
                    if "{}_".format(args.channel) in k.GetName():
                        folders.append(k.GetName())
                f_.Close()

                filelist[f] = folders

    ############################################################################

    # Era: 2017, Channel: et
    if "2017" in args.era and args.channel == "et":
        channel = ETSM2017()
        for estimation in [
                ggHEstimation("ggH", era, args.directory, channel),
                qqHEstimation("qqH", era, args.directory, channel),
                ttHEstimation(era, args.directory, channel),
                VHEstimation(era, args.directory, channel),
                ZTTEstimation(era, args.directory, channel),
                ZTTEmbeddedEstimation(era, args.directory, channel),
                ZLEstimation(era, args.directory, channel),
                ZJEstimation(era, args.directory, channel),
                TTTEstimation(era, args.directory, channel),
                TTJEstimation(era, args.directory, channel),
                TTLEstimation(era, args.directory, channel),
                WEstimation(era, args.directory, channel),
                VVJEstimation(era, args.directory, channel),
                VVTEstimation(era, args.directory, channel),
                VVLEstimation(era, args.directory, channel),
                EWKZEstimation(era, args.directory, channel),
                DataEstimation(era, args.directory, channel)
        ]:
            # Get files for estimation method
            logger.debug("Get files for estimation method %s.",
                         estimation.name)
            files = [str(f) for f in estimation.get_files()]

            # Go through files and get folders for channel
            for f in files:
                if not os.path.exists(f):
                    logger.fatal("File does not exist: %s", f)
                    raise Exception

                folders = []
                f_ = ROOT.TFile(f)
                for k in f_.GetListOfKeys():
                    if "{}_".format(args.channel) in k.GetName():
                        folders.append(k.GetName())
                f_.Close()

                filelist[f] = folders

    ############################################################################

    # Era: 2018, Channel: et
    if "2018" in args.era and args.channel == "et":
        channel = ETSM2018()
        for estimation in [
                ggHEstimation("ggH", era, args.directory, channel),
                qqHEstimation("qqH", era, args.directory, channel),
                ttHEstimation(era, args.directory, channel),
                VHEstimation(era, args.directory, channel),
                ZTTEstimation(era, args.directory, channel),
                ZTTEmbeddedEstimation(era, args.directory, channel),
                ZLEstimation(era, args.directory, channel),
                ZJEstimation(era, args.directory, channel),
                TTTEstimation(era, args.directory, channel),
                TTJEstimation(era, args.directory, channel),
                TTLEstimation(era, args.directory, channel),
                WEstimation(era, args.directory, channel),
                VVJEstimation(era, args.directory, channel),
                VVTEstimation(era, args.directory, channel),
                VVLEstimation(era, args.directory, channel),
                EWKZEstimation(era, args.directory, channel),
                DataEstimation(era, args.directory, channel)
        ]:  
            # Get files for estimation method
            logger.debug("Get files for estimation method %s.",
                         estimation.name)
            files = [str(f) for f in estimation.get_files()]

            # Go through files and get folders for channel
            for f in files:
                if not os.path.exists(f):
                    logger.fatal("File does not exist: %s", f)
                    raise Exception

                folders = []
                f_ = ROOT.TFile(f)
                for k in f_.GetListOfKeys():
                    if "{}_".format(args.channel) in k.GetName():
                        folders.append(k.GetName())
                f_.Close()

                filelist[f] = folders

    ############################################################################

    # Era: 2016, Channel: tt
    if "2016" in args.era and args.channel == "tt":
        channel = TTSM2016()
        for estimation in [
                ggHEstimation("ggH", era, args.directory, channel),
                qqHEstimation("qqH", era, args.directory, channel),    
                ttHEstimation(era, args.directory, channel),
                VHEstimation(era, args.directory, channel),
                ZTTEstimation(era, args.directory, channel),
                #ZTTEmbeddedEstimation(era, args.directory, channel), #TODO include EMB again once samples are there
                ZLEstimation(era, args.directory, channel),
                ZJEstimation(era, args.directory, channel),
                TTTEstimation(era, args.directory, channel),
                TTLEstimation(era, args.directory, channel),
                TTJEstimation(era, args.directory, channel),
                WEstimation(era, args.directory, channel),
                VVEstimation(era, args.directory, channel),
                EWKZEstimation(era, args.directory, channel),
                EWKWpEstimation(era, args.directory, channel),
                EWKWmEstimation(era, args.directory, channel),
                DataEstimation(era, args.directory, channel),
                ggHWWEstimation(era, args.directory, channel),
                qqHWWEstimation(era, args.directory, channel)
        ]:
            # Get files for estimation method
            logger.debug("Get files for estimation method %s.",
                         estimation.name)
            files = [str(f) for f in estimation.get_files()]

            # Go through files and get folders for channel
            for f in files:
                if not os.path.exists(f):
                    logger.fatal("File does not exist: %s", f)
                    raise Exception

                folders = []
                f_ = ROOT.TFile(f)
                for k in f_.GetListOfKeys():
                    if "{}_".format(args.channel) in k.GetName():
                        folders.append(k.GetName())
                f_.Close()

                filelist[f] = folders

    ############################################################################

    # Era 2017, Channel: tt
    if "2017" in args.era and args.channel == "tt":
        channel = TTSM2017()
        for estimation in [
                ggHEstimation("ggH", era, args.directory, channel),
                qqHEstimation("qqH", era, args.directory, channel),
                ttHEstimation(era, args.directory, channel),
                VHEstimation(era, args.directory, channel),
                ZTTEstimation(era, args.directory, channel),
                ZTTEmbeddedEstimation(era, args.directory, channel),
                ZLEstimation(era, args.directory, channel),
                ZJEstimation(era, args.directory, channel),
                TTTEstimation(era, args.directory, channel),
                TTJEstimation(era, args.directory, channel),
                TTLEstimation(era, args.directory, channel),
                WEstimation(era, args.directory, channel),
                VVJEstimation(era, args.directory, channel),
                VVTEstimation(era, args.directory, channel),
                VVLEstimation(era, args.directory, channel),
                EWKZEstimation(era, args.directory, channel),
                DataEstimation(era, args.directory, channel)
        ]:
            # Get files for estimation method
            logger.debug("Get files for estimation method %s.",
                         estimation.name)
            files = [str(f) for f in estimation.get_files()]

            # Go through files and get folders for channel
            for f in files:
                if not os.path.exists(f):
                    logger.fatal("File does not exist: %s", f)
                    raise Exception

                folders = []
                f_ = ROOT.TFile(f)
                for k in f_.GetListOfKeys():
                    if "{}_".format(args.channel) in k.GetName():
                        folders.append(k.GetName())
                f_.Close()

                filelist[f] = folders

    ############################################################################

    # Era 2018, Channel: tt
    if "2018" in args.era and args.channel == "tt":
        channel = TTSM2018()
        for estimation in [
                ggHEstimation("ggH", era, args.directory, channel),
                qqHEstimation("qqH", era, args.directory, channel),
                ttHEstimation(era, args.directory, channel),
                VHEstimation(era, args.directory, channel),
                ZTTEstimation(era, args.directory, channel),
                ZTTEmbeddedEstimation(era, args.directory, channel),
                ZLEstimation(era, args.directory, channel),
                ZJEstimation(era, args.directory, channel),
                TTTEstimation(era, args.directory, channel),
                TTJEstimation(era, args.directory, channel),
                TTLEstimation(era, args.directory, channel),
                WEstimation(era, args.directory, channel),
                VVJEstimation(era, args.directory, channel),
                VVTEstimation(era, args.directory, channel),
                VVLEstimation(era, args.directory, channel),
                EWKZEstimation(era, args.directory, channel),
                DataEstimation(era, args.directory, channel)
        ]:
            # Get files for estimation method
            logger.debug("Get files for estimation method %s.",
                         estimation.name)
            files = [str(f) for f in estimation.get_files()]

            # Go through files and get folders for channel
            for f in files:
                if not os.path.exists(f):
                    logger.fatal("File does not exist: %s", f)
                    raise Exception

                folders = []
                f_ = ROOT.TFile(f)
                for k in f_.GetListOfKeys():
                    if "{}_".format(args.channel) in k.GetName():
                        folders.append(k.GetName())
                f_.Close()

                filelist[f] = folders


    ############################################################################

    # Era: 2016, Channel: em
    if "2016" in args.era and args.channel == "em":
        channel = EMSM2016()
        for estimation in [
                ggHEstimation("ggH", era, args.directory, channel),
                qqHEstimation("qqH", era, args.directory, channel),    
                ttHEstimation(era, args.directory, channel),
                VHEstimation(era, args.directory, channel),
                ZTTEstimation(era, args.directory, channel),
                #ZTTEmbeddedEstimation(era, args.directory, channel), #TODO include EMB again once samples are there
                ZLEstimation(era, args.directory, channel),
                TTTEstimation(era, args.directory, channel),
                TTLEstimation(era, args.directory, channel),
                WEstimation(era, args.directory, channel),
                VVEstimation(era, args.directory, channel),
                EWKZEstimation(era, args.directory, channel),
                EWKWpEstimation(era, args.directory, channel),
                EWKWmEstimation(era, args.directory, channel),
                DataEstimation(era, args.directory, channel),
                ggHWWEstimation(era, args.directory, channel),
                qqHWWEstimation(era, args.directory, channel)
        ]:
            # Get files for estimation method
            logger.debug("Get files for estimation method %s.",
                         estimation.name)
            files = [str(f) for f in estimation.get_files()]

            # Go through files and get folders for channel
            for f in files:
                if not os.path.exists(f):
                    logger.fatal("File does not exist: %s", f)
                    raise Exception

                folders = []
                f_ = ROOT.TFile(f)
                for k in f_.GetListOfKeys():
                    if "{}_".format(args.channel) in k.GetName():
                        folders.append(k.GetName())
                f_.Close()

                filelist[f] = folders

    ############################################################################

    # Era: 2017, Channel: em
    if "2017" in args.era and args.channel == "em":
        channel = EMSM2017()
        for estimation in [
                ggHEstimation("ggH", era, args.directory, channel),
                qqHEstimation("qqH", era, args.directory, channel),
                ttHEstimation(era, args.directory, channel),
                VHEstimation(era, args.directory, channel),
                ZTTEstimation(era, args.directory, channel),
                ZTTEmbeddedEstimation(era, args.directory, channel),
                ZLEstimation(era, args.directory, channel),
                TTTEstimation(era, args.directory, channel),
                TTLEstimation(era, args.directory, channel),
                WEstimation(era, args.directory, channel),
                VVTEstimation(era, args.directory, channel),
                VVLEstimation(era, args.directory, channel),
                EWKZEstimation(era, args.directory, channel),
                DataEstimation(era, args.directory, channel)
        ]:
            # Get files for estimation method
            logger.debug("Get files for estimation method %s.",
                         estimation.name)
            files = [str(f) for f in estimation.get_files()]

            # Go through files and get folders for channel
            for f in files:
                if not os.path.exists(f):
                    logger.fatal("File does not exist: %s", f)
                    raise Exception

                folders = []
                f_ = ROOT.TFile(f)
                for k in f_.GetListOfKeys():
                    if "{}_".format(args.channel) in k.GetName():
                        folders.append(k.GetName())
                f_.Close()

                filelist[f] = folders

    ############################################################################

    # Era: 2018, Channel: em
    if "2018" in args.era and args.channel == "em":
        channel = EMSM2018()
        for estimation in [
                ggHEstimation("ggH", era, args.directory, channel),
                qqHEstimation("qqH", era, args.directory, channel),
                ttHEstimation(era, args.directory, channel),
                VHEstimation(era, args.directory, channel),
                ZTTEstimation(era, args.directory, channel),
                ZTTEmbeddedEstimation(era, args.directory, channel),
                ZLEstimation(era, args.directory, channel),
                TTTEstimation(era, args.directory, channel),
                TTLEstimation(era, args.directory, channel),
                WEstimation(era, args.directory, channel),
                VVTEstimation(era, args.directory, channel),
                VVLEstimation(era, args.directory, channel),
                EWKZEstimation(era, args.directory, channel),
                DataEstimation(era, args.directory, channel)
        ]:
            # Get files for estimation method
            logger.debug("Get files for estimation method %s.",
                         estimation.name)
            files = [str(f) for f in estimation.get_files()]

            # Go through files and get folders for channel
            for f in files:
                if not os.path.exists(f):
                    logger.fatal("File does not exist: %s", f)
                    raise Exception

                folders = []
                f_ = ROOT.TFile(f)
                for k in f_.GetListOfKeys():
                    if "{}_".format(args.channel) in k.GetName():
                        folders.append(k.GetName())
                f_.Close()

                filelist[f] = folders

    ############################################################################

    # Write output filelist
    logger.info("Write filelist to file: {}".format(args.output))
    yaml.dump(filelist, open(args.output, 'w'), default_flow_style=False)
def main(args):
    # Container for all distributions to be drawn
    systematics_mm = Systematics("shapes_mm_recoilunc_2016.root",
                                 num_threads=args.num_threads,
                                 find_unique_objects=True)

    # Era
    era = Run2016(args.datasets)

    # Channels and processes
    # yapf: disable
    directory = args.directory

    mm = MM()
    mm_processes = {
        "ZL"    : Process("ZL",       ZLEstimation        (era, directory, mm, friend_directory=[])),
        }

    # Variables and categories
    binning = yaml.load(open(args.binning))
    mm_categories = []

    variable_bins = {
        "njets" : [0, 1, 2],
        "genbosonpt" : [0, 10, 20, 30, 50],
    }
    variable_names = [
        "recoilParToZ",
        "puppirecoilParToZ",
    ]

    for njets_bin in range(len(variable_bins["njets"])):
        for pt_bin in range(len(variable_bins["genbosonpt"])):
            name = "njets_bin_%s_vs_ptvis_bin_%s"%(str(njets_bin),str(pt_bin))
            category_njets = ""
            category_pt = ""
            if njets_bin == (len(variable_bins["njets"]) - 1):
                category_njets = "njets >= %s"%str(variable_bins["njets"][njets_bin])
            else:
                category_njets = "njets == %s"%str(variable_bins["njets"][njets_bin])
            if pt_bin == (len(variable_bins["genbosonpt"]) - 1):
                category_pt = "genbosonpt > %s"%str(variable_bins["genbosonpt"][pt_bin])
            else:
                category_pt= "genbosonpt > %s && genbosonpt <= %s"%(str(variable_bins["genbosonpt"][pt_bin]),str(variable_bins["genbosonpt"][pt_bin+1]))
            print category_njets, category_pt
            cuts = Cuts(
                Cut(category_njets,"njets_category"),
                Cut(category_pt,"ptvis_category"),
                Cut("m_vis > 70 && m_vis < 110","z_peak")
            )
            for v in variable_names:
                mm_categories.append(
                    Category(
                        name,
                        mm,
                        cuts,
                        variable=Variable("relative_%s"%v,ConstantBinning(400,-20.0,20.0), expression="-%s/genbosonpt"%v)))

    # Nominal histograms
    for process, category in product(mm_processes.values(), mm_categories):
        systematics_mm.add(
            Systematic(
                category=category,
                process=process,
                analysis="smhtt",
                era=era,
                variation=Nominal(),
                mass="125"))


    # Produce histograms
    systematics_mm.produce()
def main(args):
    # Container for all distributions to be drawn
    systematics_mm = Systematics("fitrecoil_mm_2016.root", num_threads=args.num_threads, find_unique_objects=True)

    # Era
    era = Run2016(args.datasets)

    # Channels and processes
    # yapf: disable
    directory = args.directory
    mm_friend_directory = args.mm_friend_directory

    mm = MM()
    mm_processes = {
        "data"  : Process("data_obs", DataEstimation      (era, directory, mm, friend_directory=mm_friend_directory)),
        "ZTT"   : Process("ZTT",      ZTTEstimation       (era, directory, mm, friend_directory=mm_friend_directory)),
        "ZL"    : Process("ZL",       ZLEstimation        (era, directory, mm, friend_directory=mm_friend_directory)),
        "TTT"   : Process("TTT",      TTTEstimation       (era, directory, mm, friend_directory=mm_friend_directory)),
        "TTL"   : Process("TTL",      TTLEstimation       (era, directory, mm, friend_directory=mm_friend_directory)),
        "VVT"   : Process("VVT",      VVTEstimation       (era, directory, mm, friend_directory=mm_friend_directory)),
        "VVL"   : Process("VVL",      VVLEstimation       (era, directory, mm, friend_directory=mm_friend_directory)),
        "W"     : Process("W",        WEstimation         (era, directory, mm, friend_directory=mm_friend_directory)),
        }
    mm_processes["QCD"] = Process("QCD", QCDEstimation_SStoOS_MTETEM(era, directory, mm,
            [mm_processes[process] for process in ["ZTT", "ZL", "W", "TTT", "TTL", "VVT", "VVL"]],
            mm_processes["data"], friend_directory=mm_friend_directory, extrapolation_factor=2.0))


    # Variables and categories
    mm_categories = []

    variable_names = [

#        "met", "metphi",
#        "puppimet", "puppimetphi",

        "metParToZ", "metPerpToZ",
        "puppimetParToZ", "puppimetPerpToZ",

#        "recoilParToZ", "recoilPerpToZ",
#        "puppirecoilParToZ", "puppirecoilPerpToZ",
    ]

    variables = [Variable(v,ConstantBinning(25,-100.0,100.0)) for v in variable_names]

    cuts = [
        Cut("njets == 0", "0jet"),
        Cut("njets == 1", "1jet"),
        Cut("njets >= 2", "ge2jet"),
    ]
    for cut in cuts:
        for var in variables:
            mm_categories.append(
                Category(
                    cut.name,
                    mm,
                    Cuts(Cut("m_vis > 70 && m_vis < 110","m_vis_peak"), cut),
                    variable=var))

    for process, category in product(mm_processes.values(), mm_categories):
        systematics_mm.add(
            Systematic(
                category=category,
                process=process,
                analysis="smhtt",
                era=era,
                variation=Nominal(),
                mass="125"))

    # Recoil correction unc
    recoil_resolution_variations = create_systematic_variations(
        "CMS_htt_boson_reso_met_Run2016", "metRecoilResolution",
        DifferentPipeline)
    recoil_response_variations = create_systematic_variations(
        "CMS_htt_boson_scale_met_Run2016", "metRecoilResponse",
        DifferentPipeline)
    for variation in recoil_resolution_variations + recoil_response_variations:
        systematics_mm.add_systematic_variation(
            variation=variation,
            process=mm_processes["ZL"],
            channel=mm,
            era=era)

    # Produce histograms
    systematics_mm.produce()
Пример #20
0
def main(args):
    # Container for all distributions to be drawn
    systematics_mm = Systematics("shapes_mm_recoil_2016.root",
                                 num_threads=args.num_threads,
                                 find_unique_objects=True)

    # Era
    era = Run2016(args.datasets)

    # Channels and processes
    # yapf: disable
    directory = args.directory

    zptm_path = "/portal/ekpbms1/home/akhmet/workdir/FriendTreeProductionMain/CMSSW_10_2_14/src/ZPtMReweighting_workdir/ZPtMReweighting_collected/"
    mm = MM()
    mm_processes = {
        "data"  : Process("data_obs", DataEstimation      (era, directory, mm, friend_directory=[])),
        "ZTT"   : Process("ZTT",      ZTTEstimation       (era, directory, mm, friend_directory=[zptm_path])),
        "ZL"    : Process("ZL",       ZLEstimation        (era, directory, mm, friend_directory=[zptm_path])),
        "TTT"   : Process("TTT",      TTTEstimation       (era, directory, mm, friend_directory=[])),
        "TTL"   : Process("TTL",      TTLEstimation       (era, directory, mm, friend_directory=[])),
        "VVT"   : Process("VVT",      VVTEstimation       (era, directory, mm, friend_directory=[])),
        "VVL"   : Process("VVL",      VVLEstimation       (era, directory, mm, friend_directory=[])),
        "W"     : Process("W",        WEstimation         (era, directory, mm, friend_directory=[])),
        }
    mm_processes["QCD"] = Process("QCD", QCDEstimation_SStoOS_MTETEM(era, directory, mm,
            [mm_processes[process] for process in ["ZTT", "ZL", "W", "TTT", "TTL", "VVT", "VVL"]],
            mm_processes["data"], friend_directory=[], extrapolation_factor=2.0))

    # Variables and categories
    binning = yaml.load(open(args.binning))
    mm_categories = []

    variable_bins = {
        "njets" : [0, 1, 2],
        "ptvis" : [0, 10, 20, 30, 50],
    }
    variable_names = [
        "metParToZ", "metPerpToZ",
        "puppimetParToZ", "puppimetPerpToZ",
    #        "recoilParToZ", "recoilPerpToZ",
    #        "puppirecoilParToZ", "puppirecoilPerpToZ",
    ]

    for njets_bin in range(len(variable_bins["njets"])):
        for pt_bin in range(len(variable_bins["ptvis"])):
            name = "njets_bin_%s_vs_ptvis_bin_%s"%(str(njets_bin),str(pt_bin))
            category_njets = ""
            category_pt = ""
            if njets_bin == (len(variable_bins["njets"]) - 1):
                category_njets = "njets >= %s"%str(variable_bins["njets"][njets_bin])
            else:
                category_njets = "njets == %s"%str(variable_bins["njets"][njets_bin])
            if pt_bin == (len(variable_bins["ptvis"]) - 1):
                category_pt = "ptvis > %s"%str(variable_bins["ptvis"][pt_bin])
            else:
                category_pt= "ptvis > %s && ptvis <= %s"%(str(variable_bins["ptvis"][pt_bin]),str(variable_bins["ptvis"][pt_bin+1]))
            print category_njets, category_pt
            cuts = Cuts(
                Cut(category_njets,"njets_category"),
                Cut(category_pt,"ptvis_category"),
                Cut("m_vis > 70 && m_vis < 110","z_peak")
            )
            for v in variable_names:
                mm_categories.append(
                    Category(
                        name,
                        mm,
                        cuts,
                        variable=Variable(v,VariableBinning(binning["control"]["mm"][v]["bins"]), expression=binning["control"]["mm"][v]["expression"])))

    # Nominal histograms
    for process, category in product(mm_processes.values(), mm_categories):
        systematics_mm.add(
            Systematic(
                category=category,
                process=process,
                analysis="smhtt",
                era=era,
                variation=Nominal(),
                mass="125"))


    # Produce histograms
    systematics_mm.produce()
Пример #21
0
    },
    "2017": {
        "mt": MTSM2017(),
        "et": ETSM2017(),
        "tt": TTSM2017(),
        "em": EMSM2017()
    },
    "2018": {
        "mt": MTSM2018(),
        "et": ETSM2018(),
        "tt": TTSM2018(),
        "em": EMSM2018()
    }
}
eraD = {
    "2016": Run2016(database),
    "2017": Run2017(database),
    "2018":Run2018(database)
}

from shape_producer.estimation_methods_2017 import DataEstimation, ZTTEstimation, ZJEstimation, ZLEstimation, TTLEstimation, TTJEstimation, TTTEstimation, VVTEstimation, VVJEstimation, VVLEstimation, WEstimation, ggHEstimation, qqHEstimation, EWKZEstimation, ZTTEmbeddedEstimation, NewFakeEstimationTT, NewFakeEstimationLT
from fake_factor_derivation.cuts import cutDB

class ParSpaceRegion(object):
    def __init__(self, eraName, channelName, bkgName):
        self.meta = {"era": eraName, "channel": channelName, "bkg": bkgName}
        self.era = eraD[eraName]
        self.channel = copy.deepcopy(channelDict[eraName][channelName])

        if self.meta["era"] not in ["2016", "2017"]: raise Exception