def model(year, recoil, category):
    def template(dictionary, process, systematic, region):
        histogram = dictionary[region].integrate("process", process)
        nominal = histogram.integrate("systematic", "nominal").values()[()][
            recoil, :, category_map[category]
        ]
        output = nominal
        if "nominal" not in systematic and "data" not in systematic:
            # print('Normalizing',systematic,'histogram of',process,'in region',region)
            output = np.nan_to_num(
                histogram.integrate("systematic", systematic).values()[()][
                    recoil, :, category_map[category]
                ]
                / nominal.sum()
            )
        if "data" not in systematic:
            # print('Removing zeros from',systematic,'histogram of',process,'in region',region)
            output[output <= 0] = 1e-7
        binning = (
            dictionary[region]
            .integrate("process", process)
            .integrate("systematic", systematic)
            .axis("fjmass")
            .edges()
        )
        return (output, binning, "fjmass")

    model_id = year + category + "recoil" + str(recoil)
    print(model_id)
    model = rl.Model("darkhiggs" + model_id)

    data_hists = hists["data"]
    bkg_hists = hists["bkg"]
    signal_hists = hists["sig"]

    ###
    # Preparing histograms for fit
    ##

    data = {}
    for r in data_hists["template"].identifiers("region"):
        data[str(r)] = data_hists["template"].integrate("region", r).sum("gentype")

    background = {}
    for r in bkg_hists["template"].identifiers("region"):
        background[str(r)] = bkg_hists["template"].integrate("region", r).sum("gentype")

    signal = {}
    for r in bkg_hists["template"].identifiers("region"):
        signal[str(r)] = signal_hists["template"].integrate("region", r).sum("gentype")

    ###
    # R0: Signal region
    ###

    ch_name = "sr" + model_id
    sr = rl.Channel(ch_name)
    model.addChannel(sr)

    ###
    # Add data distribution to the channel
    ###

    sr.setObservation(template(data, "MET", "data", "sr"))

    ###
    # Z(->nunu)+jets data-driven model
    ###
    sr_zjetsTemplate = template(background, "Z+jets", "nominal", "sr")
    sr_zjetsObservable = rl.Observable("fjmass", sr_zjetsTemplate[1])
    if category == "pass":
        sr_zjets = rl.ParametericSample(
            ch_name + "_zjets",
            rl.Sample.BACKGROUND,
            sr_zjetsObservable,
            sr_zjetsBinYields * tf_params,
        )
    else:
        sr_zjets = rl.ParametericSample(
            ch_name + "_zjets",
            rl.Sample.BACKGROUND,
            sr_zjetsObservable,
            sr_zjetsBinYields * 1.0,
        )
    sr.addSample(sr_zjets)

    for s in signal["sr"].identifiers("process"):
        # print(str(s))
        if "Mhs_50" not in str(s):
            continue
        sr_signalTemplate = template(signal, s, "nominal", "sr")
        sr_signal = rl.TemplateSample(
            ch_name + "_" + str(s), rl.Sample.SIGNAL, sr_signalTemplate
        )
        sr_signal.setParamEffect(lumi, 1.027)
        sr_signal.setParamEffect(trig_met, 1.01)
        sr_signal.setParamEffect(veto_tau, 1.03)
        sr_signal.setParamEffect(jec, 1.05)
        btagUp = template(signal, s, "btagUp", "sr")[0]
        btagDown = template(signal, s, "btagDown", "sr")[0]
        sr_signal.setParamEffect(btag, btagUp, btagDown)
        sr.addSample(sr_signal)
    ###
    # W(->lnu)+jets data-driven model
    ###

    # Adding W-Z link
    sr_wjets = rl.TransferFactorSample(
        ch_name + "_wjets", rl.Sample.BACKGROUND, sr_wjetsTransferFactor, sr_zjets
    )
    sr.addSample(sr_wjets)

    ###
    # top-antitop data-driven model
    ###

    sr_ttTemplate = template(background, "TT", "nominal", "sr")
    sr_ttObservable = rl.Observable("fjmass", sr_ttTemplate[1])
    sr_tt = rl.ParametericSample(
        ch_name + "_tt", rl.Sample.BACKGROUND, sr_ttObservable, sr_ttBinYields
    )
    sr.addSample(sr_tt)

    ###
    # R1: Single muon W control region
    ###

    ch_name = "wmcr" + model_id
    wmcr = rl.Channel(ch_name)
    model.addChannel(wmcr)

    ###
    # Add data distribution to the channel
    ###

    wmcr.setObservation(template(data, "MET", "data", "wmcr"))

    ###
    # W(->lnu)+jets data-driven model
    ###

    wmcr_wjets = rl.TransferFactorSample(
        ch_name + "_wjets", rl.Sample.BACKGROUND, wmcr_wjetsTransferFactor, sr_wjets
    )
    wmcr.addSample(wmcr_wjets)

    ###
    # top-antitop data-driven model
    ###

    wmcr_tt = rl.TransferFactorSample(
        ch_name + "_tt", rl.Sample.BACKGROUND, wmcr_ttTransferFactor, sr_tt
    )
    wmcr.addSample(wmcr_tt)

    ###
    # R2: Single muon top control region
    ###

    ch_name = "tmcr" + model_id
    tmcr = rl.Channel(ch_name)
    model.addChannel(tmcr)

    ###
    # Add data distribution to the channel
    ###

    tmcr.setObservation(template(data, "MET", "data", "tmcr"))

    ###
    # W(->lnu)+jets data-driven model
    ###

    tmcr_wjets = rl.TransferFactorSample(
        ch_name + "_wjets", rl.Sample.BACKGROUND, tmcr_wjetsTransferFactor, sr_wjets
    )
    tmcr.addSample(tmcr_wjets)

    ###
    # top-antitop data-driven model
    ###

    tmcr_tt = rl.TransferFactorSample(
        ch_name + "_tt", rl.Sample.BACKGROUND, tmcr_ttTransferFactor, sr_tt
    )
    tmcr.addSample(tmcr_tt)

    ###
    # R3: Double muon control region
    ###

    ch_name = "zmcr" + model_id
    zmcr = rl.Channel(ch_name)
    model.addChannel(zmcr)

    ###
    # Add data distribution to the channel
    ###

    zmcr.setObservation(template(data, "MET", "data", "zmcr"))

    zmcr_dyjets = rl.TransferFactorSample(
        ch_name + "_dyjets", rl.Sample.BACKGROUND, zmcr_dyjetsTransferFactor, sr_zjets
    )
    zmcr.addSample(zmcr_dyjets)

    ###
    # R4: Single electron W control region
    ###

    ch_name = "wecr" + model_id
    wecr = rl.Channel(ch_name)
    model.addChannel(wecr)

    ###
    # Add data distribution to the channel
    ###

    if year == "2018":
        wecr.setObservation(template(data, "EGamma", "data", "wecr"))
    else:
        wecr.setObservation(template(data, "SingleElectron", "data", "wecr"))

    ###
    # W(->lnu)+jets data-driven model
    ###

    wecr_wjets = rl.TransferFactorSample(
        ch_name + "_wjets", rl.Sample.BACKGROUND, wecr_wjetsTransferFactor, sr_wjets
    )
    wecr.addSample(wecr_wjets)

    ###
    # top-antitop data-driven model
    ###

    wecr_tt = rl.TransferFactorSample(
        ch_name + "_tt", rl.Sample.BACKGROUND, wecr_ttTransferFactor, sr_tt
    )
    wecr.addSample(wecr_tt)

    ###
    # R5: Single electron top control region
    ###

    ch_name = "tecr" + model_id
    tecr = rl.Channel(ch_name)
    model.addChannel(tecr)

    ###
    # Add data distribution to the channel
    ###

    if year == "2018":
        tecr.setObservation(template(data, "EGamma", "data", "tecr"))
    else:
        tecr.setObservation(template(data, "SingleElectron", "data", "tecr"))

    ###
    # W(->lnu)+jets data-driven model
    ###

    tecr_wjets = rl.TransferFactorSample(
        ch_name + "_wjets", rl.Sample.BACKGROUND, tecr_wjetsTransferFactor, sr_wjets
    )
    tecr.addSample(tecr_wjets)

    ###
    # top-antitop data-driven model
    ###

    tecr_tt = rl.TransferFactorSample(
        ch_name + "_tt", rl.Sample.BACKGROUND, tecr_ttTransferFactor, sr_tt
    )
    tecr.addSample(tecr_tt)

    ###
    # R6: Double electron control region
    ###

    ch_name = "zecr" + model_id
    zecr = rl.Channel(ch_name)
    model.addChannel(zecr)

    ###
    # Add data distribution to the channel
    ###

    if year == "2018":
        zecr.setObservation(template(data, "EGamma", "data", "zecr"))
    else:
        zecr.setObservation(template(data, "SingleElectron", "data", "zecr"))

    zecr_dyjets = rl.TransferFactorSample(
        ch_name + "_dyjets", rl.Sample.BACKGROUND, zecr_dyjetsTransferFactor, sr_zjets
    )
    zecr.addSample(zecr_dyjets)

    ###
    # R7: Single photon control region
    ###

    ch_name = "gcr" + model_id
    gcr = rl.Channel(ch_name)
    model.addChannel(gcr)

    ###
    # Add data distribution to the channel
    ###

    if year == "2018":
        gcr.setObservation(template(data, "EGamma", "data", "gcr"))
    else:
        gcr.setObservation(template(data, "SinglePhoton", "data", "gcr"))

    gcr_gjets = rl.TransferFactorSample(
        ch_name + "_gjets", rl.Sample.BACKGROUND, gcr_gjetsTransferFactor, sr_zjets
    )
    gcr.addSample(gcr_gjets)

    ### We actually need QCD here
    gcr_qcdTemplate = template(background, "QCD", "nominal", "gcr")
    gcr_qcd = rl.TemplateSample(
        ch_name + "_qcdMC", rl.Sample.BACKGROUND, gcr_qcdTemplate
    )
    gcr_qcd.setParamEffect(lumi, 1.027)
    gcr_qcd.setParamEffect(trig_pho, 1.01)
    gcr_qcd.setParamEffect(veto_tau, 1.03)
    gcr_qcd.setParamEffect(qcdpho_norm, 2.0)
    gcr_qcd.setParamEffect(jec, 1.05)
    gcr_qcd.setParamEffect(id_pho, 1.02)
    gcr.addSample(gcr_qcd)

    # Done, return model
    return model
示例#2
0
def darkhiggs_model(tmpdir,mass,category,year):

    model = rl.Model('darkhiggs_'+mass+'_'+category)

    binning_map = {
        'mass0': {
            'monohs' : [250.0, 280.0, 310.0, 340.0, 370.0, 400.0, 430.0, 550.0, 640.0, 740.0, 1250.0],
            'monojet' : [250.0, 280.0, 310.0, 340.0, 370.0, 400.0, 430.0, 470.0, 510.0, 550.0, 590.0, 640.0, 1250.0]
        },
        'mass1': {
            'monohs' : [250.0, 280.0, 310.0, 340.0, 370.0, 400.0, 1250.0],
            'monojet' : [250.0, 280.0, 310.0, 340.0, 370.0, 400.0, 430.0, 470.0, 510.0, 1250.0]
        },
        'mass2': {
            'monohs' : [250.0, 280.0, 310.0, 340.0, 430.0, 1250.0],
            'monojet' : [250.0, 280.0, 310.0, 340.0, 370.0, 400.0, 430.0, 470.0, 510.0, 1250.0]
        },
        'mass3': {
            'monohs' : [250.0, 280.0, 310.0, 340.0, 400.0, 430.0, 470.0, 1250.0],
            'monojet' : [250.0, 280.0, 310.0, 340.0, 370.0, 400.0, 430.0, 470.0, 510.0, 640.0, 1250.0]
        },
        'mass4': {
            'monohs' : [250.0, 280.0, 310.0, 340.0, 370.0, 400.0, 430.0, 470.0, 510.0, 550.0, 1250.0],
            'monojet' : [250.0, 280.0, 310.0, 340.0, 370.0, 400.0, 430.0, 470.0, 510.0, 550.0, 590.0, 640.0, 740.0, 900.0, 1250.0]
        }
    }
    
    ###
    #Extract histograms from input file
    ###

    hists = load('hists/darkhiggs'+year+'.scaled')
    
    ###
    # Regrouping histograms
    ###
    
    process = hist.Cat("process", "Process", sorting='placement')
    cats = ("process",)
    process_map = OrderedDict()
    #process_map["Hbb_merged"] = ("Hbb_merged*",)
    #process_map["Hbb_unmerged"] = ("Hbb_unmerged*",)
    process_map["Hbb"] = ("Hbb*",)    
    process_map["DY"] = ("DY*",)
    #process_map["VVbb"] = ("VVbb*",)
    #process_map["VV"] = ("VV",)
    process_map["VV"] = ("VV*",) 
    #process_map["ST_merged"] = ("ST_merged*",)
    #process_map["ST_unmerged"] = ("ST_unmerged*",)
    process_map["ST"] = ("ST*",) 
    #process_map["TT_merged"] = ("TT_merged*",)
    #process_map["TT_unmerged"] = ("TT_unmerged*",)
    process_map["TT"] = ("TT*",)  
    process_map["WJets"] = ("WJets*",)
    process_map["ZJets"] = ("ZJets*",)
    process_map["GJets"] = ("GJets*",)
    process_map["MET"]   = ("MET*",)
    process_map["SingleElectron"]   = ("SingleElectron*",)
    process_map["SinglePhoton"]   = ("SinglePhoton*",)
    
    for key in hists.keys():
        hists[key] = hists[key].group(cats, process, process_map)

    ###
    # Preparing histograms for fit
    ##

    recoil = {}
    for r in hists['recoil'].identifiers('region'):
        #if category not in str(r) or mass not in str(r): continue
        if mass not in str(r): continue
        #print(r,category,mass)
        #print('Before rebin',hists['recoil'].integrate('region',r).values(overflow='all'))
        recoil[str(r).split("_")[0]]=hists['recoil'].integrate('region',r).rebin('recoil',hist.Bin('recoil','Hadronic recoil',binning_map[mass][category]))
        #print('After rebin',recoil[str(r).split("_")[0]].values(overflow='all'))

    ###
    ###
    # Setting up rate systematics
    ###
    ###

    ###
    # Luminosity
    ###

    lumi = rl.NuisanceParameter('lumi', 'lnN')

    ###
    # MET bin migration
    ###

    #met = rl.NuisanceParameter('met', 'lnN')

    ###
    # Cross section of MC-driven processes
    ###

    QCDe_Norm = rl.NuisanceParameter('QCDe_Norm', 'lnN')
    QCDmu_Norm = rl.NuisanceParameter('QCDmu_Norm', 'lnN')
    QCDsig_Norm = rl.NuisanceParameter('QCDsig_Norm', 'lnN')
    stop_Norm = rl.NuisanceParameter('stop_Norm', 'lnN')
    VV_Norm = rl.NuisanceParameter('VV_Norm', 'lnN')
    Hbb_Norm = rl.NuisanceParameter('Hbb_Norm', 'lnN')
    dy_Norm = rl.NuisanceParameter('dy_Norm', 'lnN') #only in signal region

    ###
    # Lepton/photon ID uncertainties 
    ###

    id_e = rl.NuisanceParameter('id_e', 'lnN')
    id_mu = rl.NuisanceParameter('id_mu', 'lnN')
    id_pho = rl.NuisanceParameter('id_pho', 'lnN')
    
    ###
    # Electron reco
    ###

    reco_e = rl.NuisanceParameter('reco_e', 'lnN')

    ###
    # Muon isolation
    ###

    iso_m = rl.NuisanceParameter('reco_e', 'lnN')

    ###
    # Trigger efficiency
    ###

    trig_e = rl.NuisanceParameter('trig_e', 'lnN')
    trig_met = rl.NuisanceParameter('trig_met', 'lnN')

    ###
    # DeepAk15 signal scale factor and mistag rate for MC-driven processes
    ###

    #sf_deepAK15 = rl.NuisanceParameter('sf_deepAK15', 'lnN')
    #mistag_deepAK15 = rl.NuisanceParameter('mistag_deepAK15', 'lnN')

    ###
    # Tau veto
    ###

    veto_tau = rl.NuisanceParameter('veto_tau', 'lnN')

    ###
    # AK b-tagging of iso jet 0-tag efficiencies
    ###

    0tag_eff = {
        'whf': 0.86,
        'wlf': 0.90,
        'zhf': 0.80,
        'zlf': 0.90,
        'ttbqq': 1.,
        'ttqq': 1.,
        'ttother': 1.,
        'stbqq': 1.,
        'stqq':1.,
        'stother': 1.,
        'vvbb': 1,
        'vvqq': 1,
        'vvother': 1,
        'hbb': 1,
        'hother': 1
    }

    ###
    # Defining W/Z/gamma+jets heavy flavor fractions and their corrective k-factors
    ###

    whf_fraction = 0.18
    zhf_fraction = 0.09
    ghf_fraction = 0.12

    whf_k = rl.IndependentParameter('whf_k', 1., 0, 1/whf_fraction)
    zhf_k = rl.IndependentParameter('zhf_k', 1., 0, 1/zhf_fraction)
    ghf_k = rl.IndependentParameter('ghf_k', 1., 0, 1/ghf_fraction)

    ###
    # Taking into account the varying HF fraction to adjust the overall efficiency of ak4 btagging of iso jets
    ###

    whf_0tag_eff = 0.86
    wlf_0tag_eff = 0.90

    wj_0tag_eff = wlf_0tag_eff*(1 - whf_fraction) + whf_0tag_eff*whf_fraction
    wj_0tag_sfxeff = wlf_0tag_eff*(1 - whf_k*whf_fraction) + whf_0tag_eff*whf_k*whf_fraction

    wjets_0tag_weight = wj_0tag_sfxeff / wj_0tag_eff
    wjets_1tag_weight = (1 - wj_0tag_sfxeff) / (1 - wj_0tag_eff)

    zhf_0tag_eff = 0.80
    zlf_0tag_eff = 0.90

    zj_0tag_eff = zlf_0tag_eff*(1 - zhf_fraction) + zhf_0tag_eff*zhf_fraction
    zj_0tag_sfxeff = zlf_0tag_eff*(1 - zhf_k*zhf_fraction) + zhf_0tag_eff*zhf_k*zhf_fraction

    zjets_0tag_weight = zj_0tag_sfxeff / zj_0tag_eff


    ###
    # Setting tagger efficiency and scale factor for in-situ calculation
    ###

    whf_deepak15_eff = 0.1
    wlf_deepak15_eff = 0.04

    whf_deepak15_sf = rl.IndependentParameter('whf_deepak15_sf', 1., 0, 1/whf_deepak15_eff)
    wlf_deepak15_sf = rl.IndependentParameter('wlf_deepak15_sf', 1., 0, 1/wlf_deepak15_eff)

    wj_deepak15_sfxeff = wlf_deepak15_sf*wlf_deepak15_eff*(1-whf_k*whf_fraction) + whf_deepak15_sf*whf_deepak15_eff*whf_k*whf_fraction
    wj_deepak15_eff = wlf_deepak15_eff*(1-whf_fraction) + whf_deepak15_eff*whf_fraction

    wjets_deepak15_weight = (1 - wj_deepak15_sfxeff)/(1 - wj_deepak15_eff)
    if 'monohs' in category: wjets_deepak15_weight = wj_deepak15_sfxeff/wj_deepak15_eff

    zhf_deepak15_eff = 0.04
    zlf_deepak15_eff = 0.05

    zhf_deepak15_sf = rl.IndependentParameter('zhf_deepak15_sf', 1., 0, 1/zhf_deepak15_eff)
    zlf_deepak15_sf = rl.IndependentParameter('zlf_deepak15_sf', 1., 0, 1/zlf_deepak15_eff)

    zj_deepak15_sfxeff = zlf_deepak15_sf*zlf_deepak15_eff*(1-zhf_k*zhf_fraction) + zhf_deepak15_sf*zhf_deepak15_eff*zhf_k*zhf_fraction
    zj_deepak15_eff = zlf_deepak15_eff*(1-zhf_fraction) + zhf_deepak15_eff*zhf_fraction

    zjets_deepak15_weight = (1 - zj_deepak15_sfxeff)/(1 - zj_deepak15_eff)
    if 'monohs' in category: zjets_deepak15_weight = zj_deepak15_sfxeff/zj_deepak15_eff

    ghf_deepak15_eff = 0.03
    glf_deepak15_eff = 0.005

    ghf_deepak15_sf = rl.IndependentParameter('ghf_deepak15_sf', 1., 0, 1/ghf_deepak15_eff)
    glf_deepak15_sf = rl.IndependentParameter('glf_deepak15_sf', 1., 0, 1/glf_deepak15_eff)

    gj_deepak15_sfxeff = glf_deepak15_sf*glf_deepak15_eff*(1-ghf_k*ghf_fraction) + ghf_deepak15_sf*ghf_deepak15_eff*ghf_k*ghf_fraction
    gj_deepak15_eff = glf_deepak15_eff*(1-ghf_fraction) + ghf_deepak15_eff*ghf_fraction

    gjets_deepak15_weight = (1 - gj_deepak15_sfxeff)/(1 - gj_deepak15_eff)
    if 'monohs' in category: gjets_deepak15_weight = gj_deepak15_sfxeff/gj_deepak15_eff

    bqq_eff = 0.6
    qq_eff = 0.3
    bb_eff = 0.9
    other_eff = 0.3

    bqq_sf = rl.IndependentParameter('bqq_sf', 1., 0, 1/bqq_eff)
    qq_sf = rl.IndependentParameter('qq_sf', 1., 0, 1/qq_eff)
    bb_sf = rl.IndependentParameter('qq_sf', 1., 0, 1/bb_eff)
    other_sf = rl.IndependentParameter('other_sf', 1., 0, 1/other_eff)

    tt_bqq_fraction = {
        '0tag': {
            'mass0': 0.04,
            'mass1': 0.06,
            'mass2': 0.11,
            'mass3': 0.19,
            'mass4': 0.6
        },
        '1tag': {
            'mass0': 0.014,
            'mass1': 0.04,
            'mass2': 0.1,
            'mass3': 0.13,
            'mass4': 0.54
        }
    }

    tt_qq_fraction = {
        '0tag': {
            'mass0': 0.04,
            'mass1': 0.06,
            'mass2': 0.11,
            'mass3': 0.19,
            'mass4': 0.6
        },
        '1tag': {
            'mass0': 0.014,
            'mass1': 0.04,
            'mass2': 0.1,
            'mass3': 0.13,
            'mass4': 0.54
        }
    }

    tt_0tag_sfxeff = bqq_sf*bqq_eff*tt_bqq_fraction['0tag'][mass] + qq_sf*qq_eff*tt_qq_fraction['0tag'][mass] + other_sf*other_eff*(1 - tt_bqq_fraction['0tag'][mass] - tt_qq_fraction['0tag'][mass])
    tt_0tag_eff = bqq_eff*tt_bqq_fraction['0tag'][mass] + qq_eff*tt_qq_fraction['0tag'][mass] + other_eff*(1 - tt_bqq_fraction['0tag'][mass] - tt_qq_fraction['0tag'][mass])
    tt_1tag_sfxeff = bqq_sf*bqq_eff*tt_bqq_fraction['1tag'][mass] + qq_sf*qq_eff*tt_qq_fraction['1tag'][mass] + other_sf*other_eff*(1 - tt_bqq_fraction['1tag'][mass] - tt_qq_fraction['1tag'][mass])
    tt_1tag_eff =  bqq_eff*tt_bqq_fraction['1tag'][mass] + qq_eff*tt_qq_fraction['1tag'][mass] + other_eff*(1 - tt_bqq_fraction['1tag'][mass] - tt_qq_fraction['1tag'][mass])

    tt_0tag_weight = (1 - tt_0tag_sfxeff)/(1 - tt_0tag_eff)
    if 'monohs' in category: tt_0tag_weight = tt_0tag_sfxeff / tt_0tag_eff
    tt_1tag_weight = (1 - tt_1tag_sfxeff)/(1 - tt_1tag_eff)
    if 'monohs' in category: tt_1tag_weight = tt_1tag_sfxeff / tt_1tag_eff

    ###
    ###
    # Shape systematics
    ###
    ###

    ###
    # JEC/JER
    ###
    
    #jec = rl.NuisanceParameter('jec', 'shape')
    #jer = rl.NuisanceParameter('jer', 'shape')
    btag = rl.NuisanceParameter('btag', 'shape') #AK4 btag
    gamma_to_z_ewk = rl.NuisanceParameter('Theory_gamma_z_ewk', 'shape')

    ###
    ###
    # Signal region
    ###
    ###

    ch_name = 'sr-'+mass+'-'+category
    sr = rl.Channel(ch_name)
    model.addChannel(sr)

    ###
    # Add data distribution to the channel
    ###

    sr.setObservation(template(recoil['sr'].integrate('process', 'MET').integrate('systematic','nominal'), 'recoil'))

    ###
    # Z(->nunu)+jets data-driven model
    ###

    sr_zvvHist = recoil['sr'].integrate('process', 'ZJets').integrate('systematic','nominal')
    sr_zvvTemplate = template(sr_zvvHist, 'recoil')
    sr_zvvMC =  rl.TemplateSample(ch_name+'_zvvMC', rl.Sample.BACKGROUND, sr_zvvTemplate)
    #sr_zvvMC.setParamEffect(jec, np.random.normal(loc=1, scale=0.01, size=len(sr_zvvHist.axis('recoil').edges(overflow='all'))-1))
    
    sr_zvvBinYields = np.array([rl.IndependentParameter(ch_name+'_zvv_bin_%d' % i, b, 0, sr_zvvTemplate[0].max()*2) for i,b in enumerate(sr_zvvTemplate[0])]) 
    sr_zvvBinYields = sr_zvvBinYields * zjets_deepak15_weight * zjets_0tag_weight
    sr_zvvObservable = rl.Observable('recoil', sr_zvvHist.axis('recoil').edges(overflow='all'))
    sr_zvv = rl.ParametericSample(ch_name+'_zvv', rl.Sample.BACKGROUND, sr_zvvObservable, sr_zvvBinYields)

    sr.addSample(sr_zvv)

    ###    
    # W(->lnu)+jets data-driven model                
    ### 

    sr_wjetsHist = recoil['sr'].integrate('process', 'WJets').integrate('systematic','nominal')
    sr_wjetsTemplate = template(sr_wjetsHist, 'recoil')
    sr_wjetsMC =  rl.TemplateSample(ch_name+'_wjetsMC', rl.Sample.BACKGROUND, sr_wjetsTemplate)
    #sr_wjetsMC.setParamEffect(jec, np.random.normal(loc=1, scale=0.01, size=len(sr_wjetsHist.axis('recoil').edges(overflow='all'))-1))

    sr_wjetsBinYields = np.array([rl.IndependentParameter(ch_name+'_wjets_bin_%d' % i,b,0,sr_wjetsTemplate[0].max()*2) for i,b in enumerate(sr_wjetsTemplate[0])]) 
    sr_wjetsBinYields = sr_wjetsBinYields * wjets_deepak15_weight * wjets_0tag_weight
    sr_wjetsObservable = rl.Observable('recoil', sr_wjetsHist.axis('recoil').edges(overflow='all'))
    sr_wjets = rl.ParametericSample(ch_name+'_wjets', rl.Sample.BACKGROUND, sr_wjetsObservable, sr_wjetsBinYields)
    sr.addSample(sr_wjets)

    ###    
    # top-antitop data-driven model                                                                                                                                                                  
    ### 

    sr_ttbarHist = recoil['sr'].integrate('process', 'TT').integrate('systematic','nominal')
    sr_ttbarTemplate = template(sr_ttbarHist, 'recoil')
    sr_ttbarMC =  rl.TemplateSample(ch_name+'_ttbarMC', rl.Sample.BACKGROUND, sr_ttbarTemplate)
    #sr_ttbarMC.setParamEffect(jec, np.random.normal(loc=1, scale=0.01, size=len(sr_ttbarHist.axis('recoil').edges(overflow='all'))-1))

    # these parameters are large, should probably log-transform them
    sr_ttbarBinYields = np.array([rl.IndependentParameter(ch_name+'_ttbar_bin_%d' % i,b,0,sr_ttbarTemplate[0].max()*2) for i,b in enumerate(sr_ttbarTemplate[0])]) * tt_0tag_weight
    sr_ttbarObservable = rl.Observable('recoil', sr_ttbarHist.axis('recoil').edges(overflow='all'))
    sr_ttbar = rl.ParametericSample(ch_name+'_ttbar', rl.Sample.BACKGROUND, sr_ttbarObservable, sr_ttbarBinYields)
    sr.addSample(sr_ttbar)

    ###
    # Other MC-driven processes
    ###

    sr_singletopHist = recoil['sr'].integrate('process', 'ST').integrate('systematic','nominal')
    sr_singletopTemplate = template(sr_singletopHist, 'recoil')
    sr_singletop = rl.TemplateSample(ch_name+'_singletop', rl.Sample.BACKGROUND, sr_singletopTemplate)
    sr_singletop.setParamEffect(lumi, 1.027)
    sr_singletop.setParamEffect(stop_Norm, 1.2)
    sr_singletop.setParamEffect(trig_met, 1.01)
    sr_singletop.setParamEffect(veto_tau, 1.03)
    #sr_singletop.setParamEffect(met, 1.05)
    sr.addSample(sr_singletop)

    sr_dyHist = recoil['sr'].integrate('process', 'DY').integrate('systematic','nominal')
    sr_dyTemplate = template(sr_dyHist, 'recoil')
    sr_dy = rl.TemplateSample(ch_name+'_dy', rl.Sample.BACKGROUND, sr_dyTemplate)
    sr_dy.setParamEffect(lumi, 1.027)
    sr_dy.setParamEffect(dy_Norm, 1.2)
    sr_dy.setParamEffect(trig_met, 1.01)
    sr_dy.setParamEffect(veto_tau, 1.03)
    #sr_dy.setParamEffect(met, 1.05)
    sr.addSample(sr_dy)

    sr_dibosonHist = recoil['sr'].integrate('process', 'VV').integrate('systematic','nominal')
    sr_dibosonTemplate = template(sr_dibosonHist, 'recoil')
    sr_diboson = rl.TemplateSample(ch_name+'_diboson', rl.Sample.BACKGROUND, sr_dibosonTemplate)
    sr_diboson.setParamEffect(lumi, 1.027)
    sr_diboson.setParamEffect(VV_Norm, 1.2)
    sr_diboson.setParamEffect(trig_met, 1.01)
    sr_diboson.setParamEffect(veto_tau, 1.03)
    #sr_diboson.setParamEffect(met, 1.05)
    sr.addSample(sr_diboson)

    sr_higgsHist = recoil['sr'].integrate('process', 'Hbb').integrate('systematic','nominal')
    sr_higgsTemplate = template(sr_higgsHist, 'recoil')
    sr_higgs = rl.TemplateSample(ch_name+'_higgs', rl.Sample.BACKGROUND, sr_higgsTemplate)
    sr_higgs.setParamEffect(lumi, 1.027)
    sr_higgs.setParamEffect(Hbb_Norm, 1.2)
    sr_higgs.setParamEffect(trig_met, 1.01)
    sr_higgs.setParamEffect(veto_tau, 1.03)
    #sr_higgs.setParamEffect(met, 1.05)
    sr.addSample(sr_higgs)

    for signal in recoil['sr'].identifiers('process'):
        if 'Mono' not in str(signal): continue
        sr_dmHist = recoil['sr'].integrate('process', signal).integrate('systematic','nominal')
        sr_dmTemplate = template(sr_dmHist, 'recoil')
        sr_dm = rl.TemplateSample(ch_name+'_'+str(signal), rl.Sample.SIGNAL, sr_dmTemplate)
        sr_dm.setParamEffect(lumi, 1.027)
        sr_dm.setParamEffect(trig_met, 1.01)
        sr_dm.setParamEffect(veto_tau, 1.03)
        #sr_dm.setParamEffect(met, 1.05)
        sr.addSample(sr_dm)

    ###
    # End of SR
    ###

    ###
    ###
    # Single Lepton Control Regions
    ###
    ###

    cr={}

    ttbarHist = {}
    ttbarTemplate = {}
    ttbarMC = {}
    ttbarTransferFactor = {}
    ttbar = {}

    wjetsHist = {}
    wjetsTemplate = {}
    wjetsMC = {}
    wjetsTransferFactor = {}
    wjets = {}

    singletopHist = {}
    singletopTemplate = {}
    singletop = {}

    dyHist = {}
    dyTemplate = {}
    dyMC = {}
    dyTransferFactor = {}
    dy = {}

    dibosonHist = {}
    dibosonTemplate = {}
    diboson = {}

    higgsHist = {}
    higgsTemplate = {}
    higgs = {}

    for p in ['t','w']:
        for l in ['e','m']:
            ch_name = p+l+'cr-'+mass+'-'+category
            cr[p+l]=rl.Channel(ch_name)
            model.addChannel(cr[p+l])
            if 'e' in l: cr[p+l].setObservation(template(recoil[p+l+'cr'].integrate('process', 'SingleElectron').integrate('systematic','nominal'), 'recoil'))
            else: cr[p+l].setObservation(template(recoil[p+l+'cr'].integrate('process', 'MET').integrate('systematic','nominal'), 'recoil'))   


            ttbarHist[p+l] = recoil[p+l+'cr'].integrate('process', 'TT').integrate('systematic','nominal')
            ttbarTemplate[p+l] = template(ttbarHist[p+l], 'recoil')
            ttbarMC[p+l] =  rl.TemplateSample(ch_name+'_ttbarMC', rl.Sample.BACKGROUND, ttbarTemplate[p+l])
            #ttbarMC[p+l].setParamEffect(jec, np.random.normal(loc=1, scale=0.05, size=recoil.nbins))
            #ttbarMC[p+l].setParamEffect(ele_id_eff, np.random.normal(loc=1, scale=0.02, size=recoil.nbins), np.random.normal(loc=1, scale=0.02, size=recoil.nbins))

            ttbarTransferFactor[p+l] = ttbarMC[p+l].getExpectation() / sr_ttbarMC.getExpectation()
            ttbar[p+l] = rl.TransferFactorSample(ch_name+'_ttbar', rl.Sample.BACKGROUND, ttbarTransferFactor[p+l], sr_ttbar)
            cr[p+l].addSample(ttbar[p+l])

            wjetsHist[p+l] = recoil[p+l+'cr'].integrate('process', 'WJets').integrate('systematic','nominal')
            wjetsTemplate[p+l] = template(wjetsHist[p+l], 'recoil')
            wjetsMC[p+l] =  rl.TemplateSample(ch_name+'_wjetsMC', rl.Sample.BACKGROUND, wjetsTemplate[p+l])
            #wjetsMC[p+l].setParamEffect(jec, np.random.normal(loc=1, scale=0.05, size=recoil.nbins))
            #wjetsMC[p+l].setParamEffect(ele_id_eff, np.random.normal(loc=1, scale=0.02, size=recoil.nbins), np.random.normal(loc=1, scale=0.02, size=recoil.nbins))

            wjetsTransferFactor[p+l] = wjetsMC[p+l].getExpectation() / sr_wjetsMC.getExpectation()
            wjets[p+l] = rl.TransferFactorSample(ch_name+'_wjets', rl.Sample.BACKGROUND, wjetsTransferFactor[p+l], sr_wjets)
            cr[p+l].addSample(wjets[p+l])

            singletopHist[p+l] = recoil[p+l+'cr'].integrate('process', 'ST').integrate('systematic','nominal')
            singletopTemplate[p+l] = template(singletopHist[p+l], 'recoil')
            singletop[p+l] = rl.TemplateSample(ch_name+'_singletop', rl.Sample.BACKGROUND, singletopTemplate[p+l])
            cr[p+l].addSample(singletop[p+l])
            
            dyHist[p+l] = recoil[p+l+'cr'].integrate('process', 'DY').integrate('systematic','nominal')
            dyTemplate[p+l] = template(dyHist[p+l], 'recoil')
            dy[p+l] = rl.TemplateSample(ch_name+'_dy', rl.Sample.BACKGROUND, dyTemplate[p+l])
            cr[p+l].addSample(dy[p+l])

            dibosonHist[p+l] = recoil[p+l+'cr'].integrate('process', 'VV').integrate('systematic','nominal')
            dibosonTemplate[p+l] = template(dibosonHist[p+l], 'recoil')
            diboson[p+l] = rl.TemplateSample(ch_name+'_diboson', rl.Sample.BACKGROUND, dibosonTemplate[p+l])
            cr[p+l].addSample(diboson[p+l])

            higgsHist[p+l] = recoil[p+l+'cr'].integrate('process', 'Hbb').integrate('systematic','nominal')
            higgsTemplate[p+l] = template(higgsHist[p+l], 'recoil')
            higgs[p+l] = rl.TemplateSample(ch_name+'_higgs', rl.Sample.BACKGROUND, higgsTemplate[p+l])
            cr[p+l].addSample(higgs[p+l])
    ###
    # End of Single Lepton CR
    ###

    ###
    ###
    # Double Lepton Control Regions
    ###
    ###

    for ll in ['ze','zm']:

        ch_name = ll+'cr-'+mass+'-'+category
        cr[ll] = rl.Channel(ch_name)
        model.addChannel(cr[ll])
        if 'e' in ll: cr[ll].setObservation(template(recoil[ll+'cr'].integrate('process', 'SingleElectron').integrate('systematic','nominal'), 'recoil'))
        else: cr[ll].setObservation(template(recoil[ll+'cr'].integrate('process', 'MET').integrate('systematic','nominal'), 'recoil'))   
        
        dyHist[ll] = recoil[ll+'cr'].integrate('process', 'DY').integrate('systematic','nominal')
        dyTemplate[ll] = template(dyHist[ll], 'recoil')
        dyMC[ll] = rl.TemplateSample(ch_name+'_dyMC', rl.Sample.BACKGROUND, dyTemplate[ll])
        #zllJetsMC.setParamEffect(jec, np.random.normal(loc=1, scale=0.05, size=recoil.nbins))
        #zllJetsMC.setParamEffect(ele_id_eff, np.random.normal(loc=1, scale=0.02, size=recoil.nbins), np.random.normal(loc=1, scale=0.02, size=recoil.nbins))

        dyTransferFactor[ll] = dyMC[ll].getExpectation() / sr_zvvMC.getExpectation()
        dy[ll] = rl.TransferFactorSample(ch_name+'_dy', rl.Sample.BACKGROUND, dyTransferFactor[ll], sr_zvv)
        cr[ll].addSample(dy[ll])

        ttbarHist[ll] = recoil[ll+'cr'].integrate('process', 'TT').integrate('systematic','nominal')
        ttbarTemplate[ll] = template(ttbarHist[ll], 'recoil')
        ttbar[ll] =  rl.TemplateSample(ch_name+'_ttbar', rl.Sample.BACKGROUND, ttbarTemplate[ll])
        cr[ll].addSample(ttbar[ll])

        singletopHist[ll] = recoil[ll+'cr'].integrate('process', 'ST').integrate('systematic','nominal')
        singletopTemplate[ll] = template(singletopHist[ll], 'recoil')
        singletop[ll] = rl.TemplateSample(ch_name+'_singletop', rl.Sample.BACKGROUND, singletopTemplate[ll])
        cr[ll].addSample(singletop[ll])
        
        dibosonHist[ll] = recoil[ll+'cr'].integrate('process', 'VV').integrate('systematic','nominal')
        dibosonTemplate[ll] = template(dibosonHist[ll], 'recoil')
        diboson[ll] = rl.TemplateSample(ch_name+'_diboson', rl.Sample.BACKGROUND, dibosonTemplate[ll])
        cr[ll].addSample(diboson[ll])

        higgsHist[ll] = recoil[ll+'cr'].integrate('process', 'Hbb').integrate('systematic','nominal')
        higgsTemplate[ll] = template(higgsHist[ll], 'recoil')
        higgs[ll] = rl.TemplateSample(ch_name+'_higgs', rl.Sample.BACKGROUND, higgsTemplate[ll])
        cr[ll].addSample(higgs[ll])

    ###
    # End of Double Lepton CR
    ###

    ###
    ###
    # Single Photon Control Region
    ###
    ###

    ch_name = 'gcr-'+mass+'-'+category
    gcr = rl.Channel(ch_name)
    model.addChannel(gcr)

    gcr.setObservation(template(recoil['gcr'].integrate('process', 'SinglePhoton').integrate('systematic','nominal'), 'recoil'))

    gcr_gjetsHist = recoil['gcr'].integrate('process', 'GJets').integrate('systematic','nominal')
    gcr_gjetsTemplate = template(gcr_gjetsHist, 'recoil')
    gcr_gjetsMC = rl.TemplateSample(ch_name+'_gjetsMC', rl.Sample.BACKGROUND, gcr_gjetsTemplate)
    #gcr_gjetsMC.setParamEffect(jec, np.random.normal(loc=1, scale=0.05, size=recoil.nbins))
    #gcr_gjetsMC.setParamEffect(pho_id_eff, np.random.normal(loc=1, scale=0.02, size=recoil.nbins))

    gcr_gjetsTransferFactor = gcr_gjetsMC.getExpectation() / sr_zvvMC.getExpectation()
    gcr_gjets = rl.TransferFactorSample(ch_name+'_gjets', rl.Sample.BACKGROUND, gcr_gjetsTransferFactor, sr_zvv)
    #gammaJets.setParamEffect(gamma_to_z_ewk, np.linspace(1.01, 1.05, recoil.nbins))
    gcr.addSample(gcr_gjets)

    with open(os.path.join(str(tmpdir), 'darkhiggsModel'+year+'.pkl'), "wb") as fout:
        pickle.dump(model, fout)

    model.renderCombine(os.path.join(str(tmpdir), 'darkhiggsModel'+year+'/'+mass))
示例#3
0
def create_datacard(inputfile, carddir, nbins, nMCTF, nDataTF, passBinName,
                    failBinName):

    lumi = rl.NuisanceParameter('CMS_lumi', 'lnN')

    msdbins = np.linspace(50, nbins * 10.0 + 50.0, nbins + 1)
    msd = rl.Observable('msd', msdbins)
    msdpts = msdbins[:-1] + 0.5 * np.diff(msdbins)
    msdscaled = (msdpts - 50.) / (10.0 * nbins)

    # Build qcd MC pass+fail model and fit to polynomial
    qcdmodel = rl.Model('qcdmodel')
    qcdpass, qcdfail = 0., 0.
    failCh = rl.Channel('fail')
    passCh = rl.Channel('pass')
    qcdmodel.addChannel(failCh)
    qcdmodel.addChannel(passCh)
    # pseudodata MC template
    failTempl = get_hist(inputfile,
                         'histJet2Mass_' + failBinName + '_QCD',
                         obs=msd)
    passTempl = get_hist(inputfile,
                         'histJet2Mass_' + passBinName + '_QCD',
                         obs=msd)
    failCh.setObservation(failTempl[:-1])
    passCh.setObservation(passTempl[:-1])
    qcdfail = failCh.getObservation().sum()
    qcdpass = passCh.getObservation().sum()

    qcdeff = qcdpass / qcdfail
    tf_MCtempl = rl.BernsteinPoly("tf_MCtempl", (nMCTF, ), ['msd'],
                                  limits=(0, 10))
    tf_MCtempl_params = qcdeff * tf_MCtempl(msdscaled)

    failCh = qcdmodel['fail']
    passCh = qcdmodel['pass']
    failObs = failCh.getObservation()
    qcdparams = np.array([
        rl.IndependentParameter('qcdparam_msdbin%d' % i, 0)
        for i in range(msd.nbins)
    ])
    sigmascale = 10.
    scaledparams = failObs * (
        1 + sigmascale / np.maximum(1., np.sqrt(failObs)))**qcdparams
    fail_qcd = rl.ParametericSample('fail_qcd', rl.Sample.BACKGROUND, msd,
                                    scaledparams)
    failCh.addSample(fail_qcd)
    pass_qcd = rl.TransferFactorSample('pass_qcd', rl.Sample.BACKGROUND,
                                       tf_MCtempl_params, fail_qcd)
    passCh.addSample(pass_qcd)

    qcdfit_ws = ROOT.RooWorkspace('qcdfit_ws')
    simpdf, obs = qcdmodel.renderRoofit(qcdfit_ws)
    qcdfit = simpdf.fitTo(
        obs,
        ROOT.RooFit.Extended(True),
        ROOT.RooFit.SumW2Error(True),
        ROOT.RooFit.Strategy(2),
        ROOT.RooFit.Save(),
        ROOT.RooFit.Minimizer('Minuit2', 'migrad'),
        ROOT.RooFit.PrintLevel(-1),
    )
    qcdfit_ws.add(qcdfit)
    if "pytest" not in sys.modules:
        qcdfit_ws.writeToFile(os.path.join(str(carddir),
                                           'HHModel_qcdfit.root'))
    if qcdfit.status() != 0:
        raise RuntimeError('Could not fit qcd')

    param_names = [p.name for p in tf_MCtempl.parameters.reshape(-1)]
    decoVector = rl.DecorrelatedNuisanceVector.fromRooFitResult(
        tf_MCtempl.name + '_deco', qcdfit, param_names)
    tf_MCtempl.parameters = decoVector.correlated_params.reshape(
        tf_MCtempl.parameters.shape)
    tf_MCtempl_params_final = tf_MCtempl(msdscaled)
    tf_dataResidual = rl.BernsteinPoly("tf_dataResidual", (nDataTF, ), ['msd'],
                                       limits=(0, 10))
    tf_dataResidual_params = tf_dataResidual(msdscaled)
    tf_params = qcdeff * tf_MCtempl_params_final * tf_dataResidual_params

    # build actual fit model now
    model = rl.Model("HHModel")
    for region in ['pass', 'fail']:
        ch = rl.Channel(region)
        model.addChannel(ch)

        isPass = region == 'pass'
        templates = {
            'TTJets':
            get_hist(inputfile,
                     'histJet2Mass%s_TTJets' %
                     ('_' + passBinName if isPass else '_' + failBinName),
                     obs=msd),
            'H':
            get_hist(inputfile,
                     'histJet2Mass%s_H' %
                     ('_' + passBinName if isPass else '_' + failBinName),
                     obs=msd),
            'HH':
            get_hist(inputfile,
                     'histJet2Mass%s_HH' %
                     ('_' + passBinName if isPass else '_' + failBinName),
                     obs=msd),
            'VH':
            get_hist(inputfile,
                     'histJet2Mass%s_VH' %
                     ('_' + passBinName if isPass else '_' + failBinName),
                     obs=msd),
            'ttH':
            get_hist(inputfile,
                     'histJet2Mass%s_ttH' %
                     ('_' + passBinName if isPass else '_' + failBinName),
                     obs=msd),
            'others':
            get_hist(inputfile,
                     'histJet2Mass%s_others' %
                     ('_' + passBinName if isPass else '_' + failBinName),
                     obs=msd),
            'QCD':
            get_hist(inputfile,
                     'histJet2Mass%s_QCD' %
                     ('_' + passBinName if isPass else '_' + failBinName),
                     obs=msd),
            'Data':
            get_hist(inputfile,
                     'histJet2Mass%s_Data' %
                     ('_' + passBinName if isPass else '_' + failBinName),
                     obs=msd),
        }
        for sName in ['TTJets', 'H', 'HH', 'VH', 'ttH', 'others']:
            # get templates
            templ = templates[sName]
            stype = rl.Sample.SIGNAL if sName == 'HH' else rl.Sample.BACKGROUND
            sample = rl.TemplateSample(ch.name + '_' + sName, stype, templ)

            # set nuisance values
            sample.setParamEffect(lumi, 1.027)

            # set mc stat uncs
            sample.autoMCStats()

            #shape systematics
            valuesNominal = templ[0]
            systs = ['JMS', 'JMR', 'BDTMassShape', 'ttJetsCorr']
            for syst in systs:
                valuesUp = get_hist(inputfile,
                                    'histJet2Mass%s_%s_%sUp' %
                                    ('_' + passBinName if isPass else '_' +
                                     failBinName, sName, syst),
                                    obs=msd)[0]
                valuesDown = get_hist(inputfile,
                                      'histJet2Mass%s_%s_%sDown' %
                                      ('_' + passBinName if isPass else '_' +
                                       failBinName, sName, syst),
                                      obs=msd)[0]
                effectUp = np.ones_like(valuesNominal)
                effectDown = np.ones_like(valuesNominal)
                for i in range(len(valuesNominal)):
                    if valuesNominal[i] > 0.:
                        effectUp[i] = valuesUp[i] / valuesNominal[i]
                        effectDown[i] = valuesDown[i] / valuesNominal[i]

                syst_param = rl.NuisanceParameter(syst, 'shape')
                sample.setParamEffect(syst_param, effectUp, effectDown)

            ch.addSample(sample)

        # make up a data_obs by summing the MC templates above
        #yields = sum(tpl[0] for tpl in templates.values())
        yields = templates['Data'][0]
        data_obs = (yields, msd.binning, msd.name)
        ch.setObservation(data_obs)

    failCh = model['fail']
    passCh = model['pass']

    qcdparams = np.array([
        rl.IndependentParameter('qcdparam_msdbin%d' % i, 0)
        for i in range(msd.nbins)
    ])
    initial_qcd = failCh.getObservation().astype(
        float
    )  # was integer, and numpy complained about subtracting float from it
    for sample in failCh:
        initial_qcd -= sample.getExpectation(nominal=True)
    if np.any(initial_qcd < 0.):
        raise ValueError("initial_qcd negative for some bins..", initial_qcd)
    sigmascale = 10  # to scale the deviation from initial
    scaledparams = initial_qcd * (
        1 + sigmascale / np.maximum(1., np.sqrt(initial_qcd)))**qcdparams
    fail_qcd = rl.ParametericSample('fail_qcd', rl.Sample.BACKGROUND, msd,
                                    scaledparams)
    failCh.addSample(fail_qcd)
    pass_qcd = rl.TransferFactorSample('pass_qcd', rl.Sample.BACKGROUND,
                                       tf_params, fail_qcd)
    passCh.addSample(pass_qcd)

    with open(os.path.join(str(carddir), 'HHModel.pkl'), "wb") as fout:
        pickle.dump(model, fout)

    model.renderCombine(os.path.join(str(carddir), 'HHModel'))
示例#4
0
def test_simple():
    model = rl.Model("testModel")

    jec = rl.NuisanceParameter('CMS_jec', 'shape')
    massScale = rl.NuisanceParameter('CMS_msdScale', 'shape')
    lumi = rl.NuisanceParameter('CMS_lumi', 'lnN')
    bins = np.linspace(40, 201, 24)[:6]
    nbins = len(bins) - 1

    for chName in ['pt450to500Fail', 'pt450to500Pass']:
        ch = rl.Channel(chName)
        model.addChannel(ch)

        notqcdsum = np.zeros(nbins)
        for sName in ['zqq', 'wqq', 'hqq']:
            templ = (np.random.exponential(5, size=nbins), bins, 'x')
            notqcdsum += templ[0]
            stype = rl.Sample.SIGNAL if sName == 'hqq' else rl.Sample.BACKGROUND
            sample = rl.TemplateSample(ch.name + '_' + sName, stype, templ)

            jecup_ratio = np.random.normal(loc=1, scale=0.05, size=nbins)
            sample.setParamEffect(jec, jecup_ratio)

            msdUp = np.linspace(0.9, 1.1, nbins)
            msdDn = np.linspace(1.2, 0.8, nbins)
            sample.setParamEffect(massScale, msdUp, msdDn)

            sample.setParamEffect(lumi, 1.027)

            ch.addSample(sample)

        # make up a data_obs
        data_obs = (np.random.poisson(notqcdsum + 50), bins, 'x')
        ch.setObservation(data_obs)

    # steal observable definition from previous template
    obs = model['pt450to500Fail_wqq'].observable

    qcdparams = [
        rl.IndependentParameter('qcdparam_bin%d' % i, 0) for i in range(nbins)
    ]
    initial_qcd = model['pt450to500Fail'].getObservation().astype(
        float
    )  # was integer, and numpy complained about subtracting float from it
    for p in model['pt450to500Fail']:
        initial_qcd -= p.getExpectation(nominal=True)
    if np.any(initial_qcd < 0.):
        raise ValueError("uh-oh")
    sigmascale = 10  # to scale the deviation from initial
    scaledparams = initial_qcd + sigmascale * np.sqrt(initial_qcd) * qcdparams
    fail_sample = rl.ParametericSample('pt450to500Fail_qcd',
                                       rl.Sample.BACKGROUND, obs, scaledparams)
    model['pt450to500Fail'].addSample(fail_sample)

    tf = rl.BernsteinPoly("qcd_pass_rhalphTF", (2, 3), ['pt', 'rho'])
    # suppose the scaled sampling point is 0.02 and the original is 465 (first pt bin)
    ptval = 0.02
    # suppose 'bins' is the msd binning, here we compute rho = 2*ln(msd/pt) using the msd value 0.3 of the way into the bin
    msdpts = bins[:-1] + 0.3 * np.diff(bins)
    rhovals = 2 * np.log(msdpts / 465.)
    # here we would derive these all at once with 2D array, and thus the bounds would envelope the whole space
    rhovals = (rhovals - rhovals.min()) / np.ptp(rhovals)
    tf_params = np.array([tf(ptval, r) for r in rhovals])
    pass_sample = rl.TransferFactorSample('pt450to500Pass_qcd',
                                          rl.Sample.BACKGROUND, tf_params,
                                          fail_sample)
    model['pt450to500Pass'].addSample(pass_sample)

    import sys
    print("ROOT used? ", 'ROOT' in sys.modules)
    model.renderCombine("simplemodel")
    print("ROOT used? ", 'ROOT' in sys.modules)
示例#5
0
def rhalphabeth(msdbins):

    process = hist.Cat("process", "Process", sorting="placement")
    cats = ("process", )
    bkg_map = OrderedDict()
    # bkg_map['V+jets'] = (['Z+jets','W+jets'],)
    bkg_map["V+jets"] = (["Z+jets"], )
    vjets_hists = {}
    for key in hists["data"].keys():
        vjets_hists[key] = hists["bkg"][key].group(cats, process, bkg_map)

    # Build qcd MC pass+fail model and fit to polynomial
    qcdmodel = rl.Model("qcdmodel")
    qcdpass, qcdfail = 0.0, 0.0
    msds = np.meshgrid(msdbins[:-1] + 0.5 * np.diff(msdbins), indexing="ij")[0]
    msds = np.sqrt(msds) * np.sqrt(msds)
    print(msds)
    msdscaled = msds / 300.0
    msd = rl.Observable("fjmass", msdbins)
    failCh = rl.Channel("fail")
    passCh = rl.Channel("pass")
    qcdmodel.addChannel(failCh)
    qcdmodel.addChannel(passCh)
    # mock template
    ptnorm = 1
    vjetsHistFail = (vjets_hists["template"].integrate("region", "sr").sum(
        "gentype",
        "recoil").integrate("process",
                            "V+jets").integrate("systematic",
                                                "nominal").values()[()][:, 0])
    vjetsHistFail[vjetsHistFail <= 0] = 1e-7
    failTempl = (
        vjetsHistFail,
        vjets_hists["template"].integrate("region", "sr").sum(
            "gentype", "recoil").integrate("process", "V+jets").integrate(
                "systematic", "nominal").axis("fjmass").edges(),
        "fjmass",
    )
    vjetsHistPass = (vjets_hists["template"].integrate("region", "sr").sum(
        "gentype",
        "recoil").integrate("process",
                            "V+jets").integrate("systematic",
                                                "nominal").values()[()][:, 1])
    vjetsHistPass[vjetsHistPass <= 0] = 1e-7
    passTempl = (
        vjetsHistPass,
        vjets_hists["template"].integrate("region", "sr").sum(
            "gentype", "recoil").integrate("process", "V+jets").integrate(
                "systematic", "nominal").axis("fjmass").edges(),
        "fjmass",
    )
    failCh.setObservation(failTempl)
    passCh.setObservation(passTempl)
    qcdfail += failCh.getObservation().sum()
    qcdpass += passCh.getObservation().sum()

    qcdeff = qcdpass / qcdfail
    tf_MCtempl = rl.BernsteinPoly("tf_MCtempl", (2, ), ["fjmass"])
    tf_MCtempl_params = qcdeff * tf_MCtempl(msdscaled)
    failCh = qcdmodel["fail"]
    passCh = qcdmodel["pass"]
    failObs = failCh.getObservation()
    qcdparams = np.array([
        rl.IndependentParameter("qcdparam_msdbin%d" % i, 0)
        for i in range(msd.nbins)
    ])
    sigmascale = 10.0
    scaledparams = (
        failObs *
        (1 + sigmascale / np.maximum(1.0, np.sqrt(failObs)))**qcdparams)
    fail_qcd = rl.ParametericSample("fail_qcd", rl.Sample.BACKGROUND, msd,
                                    scaledparams)
    failCh.addSample(fail_qcd)
    print(tf_MCtempl_params)
    pass_qcd = rl.TransferFactorSample("pass_qcd", rl.Sample.BACKGROUND,
                                       tf_MCtempl_params, fail_qcd)
    passCh.addSample(pass_qcd)

    qcdfit_ws = ROOT.RooWorkspace("qcdfit_ws")
    simpdf, obs = qcdmodel.renderRoofit(qcdfit_ws)
    qcdfit = simpdf.fitTo(
        obs,
        ROOT.RooFit.Extended(True),
        ROOT.RooFit.SumW2Error(True),
        ROOT.RooFit.Strategy(2),
        ROOT.RooFit.Save(),
        ROOT.RooFit.Minimizer("Minuit2", "migrad"),
        ROOT.RooFit.PrintLevel(-1),
    )
    qcdfit_ws.add(qcdfit)
    if "pytest" not in sys.modules:
        qcdfit_ws.writeToFile(
            os.path.join(str("models"), "testModel_qcdfit.root"))
    if qcdfit.status() != 0:
        raise RuntimeError("Could not fit qcd")

    param_names = [p.name for p in tf_MCtempl.parameters.reshape(-1)]
    decoVector = rl.DecorrelatedNuisanceVector.fromRooFitResult(
        tf_MCtempl.name + "_deco", qcdfit, param_names)
    tf_MCtempl.parameters = decoVector.correlated_params.reshape(
        tf_MCtempl.parameters.shape)
    tf_MCtempl_params_final = tf_MCtempl(msdscaled)
    tf_dataResidual = rl.BernsteinPoly("tf_dataResidual", (2, ), ["fjmass"],
                                       limits=(0, 10))
    tf_dataResidual_params = tf_dataResidual(msdscaled)
    tf_params = qcdeff * tf_MCtempl_params_final * tf_dataResidual_params
    return tf_params
示例#6
0
def test_rhalphabet(tmpdir):
    throwPoisson = True  #False

    #    jec = rl.NuisanceParameter('CMS_jec', 'lnN')
    #    massScale = rl.NuisanceParameter('CMS_msdScale', 'shape')
    #    lumi = rl.NuisanceParameter('CMS_lumi', 'lnN')

    tqqeffSF = rl.IndependentParameter('tqqeffSF', 1., 0, 10)
    tqqnormSF = rl.IndependentParameter('tqqnormSF', 1., 0, 10)

    ptbins = np.array([450, 500, 550, 600, 675, 800, 1200])
    npt = len(ptbins) - 1
    msdbins = np.linspace(47, 201, 23)
    msd = rl.Observable('msd', msdbins)

    # here we derive these all at once with 2D array
    ptpts, msdpts = np.meshgrid(ptbins[:-1] + 0.3 * np.diff(ptbins),
                                msdbins[:-1] + 0.5 * np.diff(msdbins),
                                indexing='ij')
    rhopts = 2 * np.log(msdpts / ptpts)
    ptscaled = (ptpts - 450.) / (1200. - 450.)
    rhoscaled = (rhopts - (-6)) / ((-2.1) - (-6))
    validbins = (rhoscaled >= 0) & (rhoscaled <= 1)
    rhoscaled[~validbins] = 1  # we will mask these out later

    # Build qcd MC pass+fail model and fit to polynomial
    qcdmodel = rl.Model("qcdmodel")
    qcdpass, qcdfail = 0., 0.
    for ptbin in range(npt):
        failCh = rl.Channel("ptbin%d%s" % (ptbin, 'fail'))
        passCh = rl.Channel("ptbin%d%s" % (ptbin, 'pass'))
        qcdmodel.addChannel(failCh)
        qcdmodel.addChannel(passCh)

        # QCD templates from file
        failTempl = get_template("QCD", 0, ptbin + 1, obs=msd,
                                 syst="nominal")  #
        passTempl = get_template("QCD", 1, ptbin + 1, obs=msd,
                                 syst="nominal")  #

        failCh.setObservation(failTempl, read_sumw2=True)
        passCh.setObservation(passTempl, read_sumw2=True)

        qcdfail += sum([val[0] for val in failCh.getObservation()])
        qcdpass += sum([val[0] for val in passCh.getObservation()])

    qcdeff = qcdpass / qcdfail
    print("Inclusive P/F from Monte Carlo = " + str(qcdeff))

    # initial values
    print("Initial fit values read from file initial_vals.csv")
    initial_vals = np.genfromtxt('initial_vals.csv')
    initial_vals = initial_vals.reshape(3, 3)
    print(initial_vals)

    tf_MCtempl = rl.BernsteinPoly("tf_MCtempl", (2, 2), ['pt', 'rho'],
                                  init_params=initial_vals,
                                  limits=(-10, 10))
    tf_MCtempl_params = qcdeff * tf_MCtempl(ptscaled, rhoscaled)
    for ptbin in range(npt):
        failCh = qcdmodel['ptbin%dfail' % ptbin]
        passCh = qcdmodel['ptbin%dpass' % ptbin]
        failObs = failCh.getObservation()
        passObs = passCh.getObservation()

        qcdparams = np.array([
            rl.IndependentParameter('qcdparam_ptbin%d_msdbin%d' % (ptbin, i),
                                    0) for i in range(msd.nbins)
        ])
        sigmascale = 10.
        scaledparams = failObs * (
            1 + sigmascale / np.maximum(1., np.sqrt(failObs)))**qcdparams

        fail_qcd = rl.ParametericSample('ptbin%dfail_qcd' % ptbin,
                                        rl.Sample.BACKGROUND, msd,
                                        scaledparams[0])
        failCh.addSample(fail_qcd)
        pass_qcd = rl.TransferFactorSample('ptbin%dpass_qcd' % ptbin,
                                           rl.Sample.BACKGROUND,
                                           tf_MCtempl_params[ptbin, :],
                                           fail_qcd)
        passCh.addSample(pass_qcd)

        failCh.mask = validbins[ptbin]
        passCh.mask = validbins[ptbin]

    qcdfit_ws = ROOT.RooWorkspace('qcdfit_ws')

    simpdf, obs = qcdmodel.renderRoofit(qcdfit_ws)
    qcdfit = simpdf.fitTo(
        obs,
        ROOT.RooFit.Extended(True),
        ROOT.RooFit.SumW2Error(True),
        ROOT.RooFit.Strategy(2),
        ROOT.RooFit.Save(),
        ROOT.RooFit.Minimizer('Minuit2', 'migrad'),
        ROOT.RooFit.PrintLevel(1),
    )
    qcdfit_ws.add(qcdfit)
    qcdfit_ws.writeToFile(os.path.join(str(tmpdir), 'testModel_qcdfit.root'))

    # Set parameters to fitted values
    allparams = dict(zip(qcdfit.nameArray(), qcdfit.valueArray()))
    for i, p in enumerate(tf_MCtempl.parameters.reshape(-1)):
        p.value = allparams[p.name]
        print(p.name, p.value)

    if qcdfit.status() != 0:
        raise RuntimeError('Could not fit qcd')

    # arrays for plotting pt vs msd
    pts_plot = np.linspace(450, 1200, 15)
    ptpts_plot, msdpts_plot = np.meshgrid(
        pts_plot[:-1] + 0.5 * np.diff(pts_plot),
        msdbins[:-1] + 0.5 * np.diff(msdbins),
        indexing='ij')
    ptpts_plot_scaled = (ptpts_plot - 450.) / (1200. - 450.)
    rhopts_plot = 2 * np.log(msdpts_plot / ptpts_plot)

    rhopts_plot_scaled = (rhopts_plot - (-6)) / ((-2.1) - (-6))
    validbins_plot = (rhopts_plot_scaled >= 0) & (rhopts_plot_scaled <= 1)

    ptpts_plot = ptpts_plot[validbins_plot]
    msdpts_plot = msdpts_plot[validbins_plot]
    ptpts_plot_scaled = ptpts_plot_scaled[validbins_plot]
    rhopts_plot_scaled = rhopts_plot_scaled[validbins_plot]

    tf_MCtempl_vals = tf_MCtempl(ptpts_plot_scaled,
                                 rhopts_plot_scaled,
                                 nominal=True)
    df_msdpt = pd.DataFrame([])
    df_msdpt["msd"] = msdpts_plot.reshape(-1)
    df_msdpt["pt"] = ptpts_plot.reshape(-1)
    df_msdpt["eQCDMC"] = tf_MCtempl_vals.reshape(-1)
    df_msdpt.to_csv("msdpt.csv", header=False)

    # arrays for plotting pt vs rho
    rhos_plot = np.linspace(-6, -2.1, 23)
    ptpts_plot, rhopts_plot = np.meshgrid(
        pts_plot[:-1] + 0.5 * np.diff(pts_plot),
        rhos_plot[:-1] + 0.5 * np.diff(rhos_plot),
        indexing='ij')
    ptpts_plot_scaled = (ptpts_plot - 450.) / (1200. - 450.)
    rhopts_plot_scaled = (rhopts_plot - (-6)) / ((-2.1) - (-6))
    validbins_plot = (rhopts_plot_scaled >= 0) & (rhopts_plot_scaled <= 1)

    ptpts_plot = ptpts_plot[validbins_plot]
    rhopts_plot = rhopts_plot[validbins_plot]
    ptpts_plot_scaled = ptpts_plot_scaled[validbins_plot]
    rhopts_plot_scaled = rhopts_plot_scaled[validbins_plot]

    tf_MCtempl_vals = tf_MCtempl(ptpts_plot_scaled,
                                 rhopts_plot_scaled,
                                 nominal=True)

    df_rhopt = pd.DataFrame([])
    df_rhopt["rho"] = rhopts_plot.reshape(-1)
    df_rhopt["pt"] = ptpts_plot.reshape(-1)
    df_rhopt["eQCDMC"] = tf_MCtempl_vals.reshape(-1)
    df_rhopt.to_csv("rhopt.csv", header=False)

    param_names = [p.name for p in tf_MCtempl.parameters.reshape(-1)]
    decoVector = rl.DecorrelatedNuisanceVector.fromRooFitResult(
        tf_MCtempl.name + '_deco', qcdfit, param_names)
    tf_MCtempl.parameters = decoVector.correlated_params.reshape(
        tf_MCtempl.parameters.shape)
    tf_MCtempl_params_final = tf_MCtempl(ptscaled, rhoscaled)
    tf_dataResidual = rl.BernsteinPoly("tf_dataResidual", (2, 2),
                                       ['pt', 'rho'],
                                       limits=(-10, 10))
    tf_dataResidual_params = tf_dataResidual(ptscaled, rhoscaled)
    tf_params = qcdeff * tf_MCtempl_params_final * tf_dataResidual_params

    # build actual fit model now
    model = rl.Model("testModel")

    # exclud QCD from MC samps
    samps = [
        'ggF', 'VBF', 'WH', 'ZH', 'ttH', 'ttbar', 'singlet', 'Zjets', 'Wjets',
        'VV'
    ]
    sigs = ['ggF', 'VBF', 'WH', 'ZH', 'ttH']

    for ptbin in range(npt):
        for region in ['pass', 'fail']:
            ch = rl.Channel("ptbin%d%s" % (ptbin, region))
            model.addChannel(ch)

            isPass = region == 'pass'
            ptnorm = 1.

            templates = {}

            for sName in samps:

                templates[sName] = get_template(sName,
                                                isPass,
                                                ptbin + 1,
                                                obs=msd,
                                                syst="nominal")

                # some mock expectations
                templ = templates[sName]
                stype = rl.Sample.SIGNAL if sName in sigs else rl.Sample.BACKGROUND
                sample = rl.TemplateSample(ch.name + '_' + sName, stype, templ)

                ch.addSample(sample)

            data_obs = get_template("data",
                                    isPass,
                                    ptbin + 1,
                                    obs=msd,
                                    syst="nominal")
            ch.setObservation(data_obs, read_sumw2=True)

            # drop bins outside rho validity
            mask = validbins[ptbin]

            # blind bins 11, 12, 13


#            mask[11:14] = False
#            ch.mask = mask

    for ptbin in range(npt):
        failCh = model['ptbin%dfail' % ptbin]
        passCh = model['ptbin%dpass' % ptbin]

        qcdparams = np.array([
            rl.IndependentParameter('qcdparam_ptbin%d_msdbin%d' % (ptbin, i),
                                    0) for i in range(msd.nbins)
        ])
        initial_qcd = failCh.getObservation()[0].astype(
            float
        )  # was integer, and numpy complained about subtracting float from it

        for sample in failCh:
            initial_qcd -= sample.getExpectation(nominal=True)

        if np.any(initial_qcd < 0.):
            raise ValueError("initial_qcd negative for some bins..",
                             initial_qcd)

        sigmascale = 10  # to scale the deviation from initial
        scaledparams = initial_qcd * (
            1 + sigmascale / np.maximum(1., np.sqrt(initial_qcd)))**qcdparams
        fail_qcd = rl.ParametericSample('ptbin%dfail_qcd' % ptbin,
                                        rl.Sample.BACKGROUND, msd,
                                        scaledparams)
        failCh.addSample(fail_qcd)
        pass_qcd = rl.TransferFactorSample('ptbin%dpass_qcd' % ptbin,
                                           rl.Sample.BACKGROUND,
                                           tf_params[ptbin, :], fail_qcd)
        passCh.addSample(pass_qcd)

        tqqpass = passCh['ttbar']
        tqqfail = failCh['ttbar']
        tqqPF = tqqpass.getExpectation(
            nominal=True).sum() / tqqfail.getExpectation(nominal=True).sum()
        tqqpass.setParamEffect(tqqeffSF, 1 * tqqeffSF)
        tqqfail.setParamEffect(tqqeffSF, (1 - tqqeffSF) * tqqPF + 1)
        tqqpass.setParamEffect(tqqnormSF, 1 * tqqnormSF)
        tqqfail.setParamEffect(tqqnormSF, 1 * tqqnormSF)

    # Fill in muon CR
    templates = {}
    samps = ['ttbar', 'QCD', 'singlet', 'Zjets', 'Wjets', 'VV']
    for region in ['pass', 'fail']:
        ch = rl.Channel("muonCR%s" % (region, ))
        model.addChannel(ch)

        isPass = region == 'pass'

        for sName in samps:
            templates[sName] = get_template_muonCR(sName, isPass, obs=msd)

            stype = rl.Sample.BACKGROUND
            sample = rl.TemplateSample(ch.name + '_' + sName, stype,
                                       templates[sName])

            ch.addSample(sample)

        data_obs = get_template_muonCR("muondata", isPass, obs=msd)
        ch.setObservation(data_obs, read_sumw2=True)

    tqqpass = model['muonCRpass_ttbar']
    tqqfail = model['muonCRfail_ttbar']
    tqqPF = tqqpass.getExpectation(
        nominal=True).sum() / tqqfail.getExpectation(nominal=True).sum()
    tqqpass.setParamEffect(tqqeffSF, 1 * tqqeffSF)
    tqqfail.setParamEffect(tqqeffSF, (1 - tqqeffSF) * tqqPF + 1)
    tqqpass.setParamEffect(tqqnormSF, 1 * tqqnormSF)
    tqqfail.setParamEffect(tqqnormSF, 1 * tqqnormSF)

    with open(os.path.join(str(tmpdir), 'testModel.pkl'), "wb") as fout:
        pickle.dump(model, fout)

    model.renderCombine(os.path.join(str(tmpdir), 'testModel'))
示例#7
0
def jet_mass_producer(args,
                      configs=None,
                      MINIMAL_MODEL=False,
                      includeMassScales=True):
    """
    configs: configuration dict including:
    ModelName,gridHistFileName,channels,histLocation
      -> channels: dict with dict for each channels:
        -> includes histDir,samples,NormUnc,signal,regions,QcdEstimation
    """
    rebin_msd = True
    binnings = {"W": np.linspace(50, 300, 26), "top": np.linspace(50, 300, 26)}
    binning_from_config = configs.get('binning', {})
    for selection, bin_info in binning_from_config.items():
        min_msd, max_msd = (bin_info[0], bin_info[1])
        binwidth = bin_info[2]
        nbins = int(np.floor((max_msd - min_msd) / binwidth))
        msd_bins = np.linspace(min_msd, nbins * binwidth + min_msd, nbins + 1)
        binnings[selection] = msd_bins

    #channels for combined fit
    channels = configs['channels']
    qcd_estimation_channels = {
        k: v
        for k, v in channels.items()
        if "QcdEstimation" in v and v["QcdEstimation"] == "True"
    }

    print('channels:', channels.keys())

    #getting path of dir with root file from config
    hist_file = ROOT.TFile(configs['histLocation'])

    do_qcd_estimation = len(qcd_estimation_channels) > 0
    do_initial_qcd_fit = (configs.get("InitialQCDFit", "False") == "True")
    qcd_fail_region_constant = (configs.get("QCDFailConstant",
                                            "False") == "True")

    lumi_scale = 1.
    if ('Pseudo' in configs and len(configs['Pseudo']) > 0
            and 'lumiScale' in configs['Pseudo'][0]):
        lumi_scale = float(configs['Pseudo'][0].split(':')[-1])
    model_name = configs.get(
        'ModelName',
        'Jet_Mass_Model')  #get name from config, or fall back to default

    #specify if QCD estimation (using Bernstein-polynomial as TF) should be used
    ################
    #QCD Estimation#
    ################
    # derive pt bins from channel names for the pt,rho grid for the Bernstein-Polynomial
    if (do_qcd_estimation):
        print(
            'Doing some preparations for data driven QCD Estimate (Bernstein TF)'
        )
        bernstein_orders = tuple(configs.get('BernsteinOrders', [2, 2]))
        qcd_model = rl.Model('qcdmodel')
        qcd_pass, qcd_fail = 0., 0.
        qcd_estimation_relevant_selection = 'W'
        for channel_name, config in qcd_estimation_channels.items():
            qcd_estimation_relevant_selection = config['selection']
            msd_bins = binnings[qcd_estimation_relevant_selection]
            fail_ch = rl.Channel(channel_name + 'fail')
            pass_ch = rl.Channel(channel_name + 'pass')
            qcd_model.addChannel(fail_ch)
            qcd_model.addChannel(pass_ch)
            additional_bin = config.get('additional_bin', '')
            fail_hist = hist_file.Get('W_QCD__mjet_' + config['pt_bin'] +
                                      additional_bin + '_fail')
            pass_hist = hist_file.Get('W_QCD__mjet_' + config['pt_bin'] +
                                      additional_bin + '_pass')
            if (rebin_msd > 0):
                fail_hist = fail_hist.Rebin(len(msd_bins) - 1, 'msd', msd_bins)
                pass_hist = pass_hist.Rebin(len(msd_bins) - 1, 'msd', msd_bins)
            if (lumi_scale != 1.0):
                fail_hist = scale_lumi(fail_hist, lumi_scale)
                pass_hist = scale_lumi(pass_hist, lumi_scale)

            empty_hist = fail_hist.Clone()
            empty_hist.Reset()
            signal_fail = rl.TemplateSample(
                channel_name + 'fail' + '_' + 'Signal', rl.Sample.SIGNAL,
                empty_hist)
            fail_ch.addSample(signal_fail)
            signal_pass = rl.TemplateSample(
                channel_name + 'pass' + '_' + 'Signal', rl.Sample.SIGNAL,
                empty_hist)
            pass_ch.addSample(signal_pass)

            fail_ch.setObservation(fail_hist)
            pass_ch.setObservation(pass_hist)
            qcd_fail += fail_ch.getObservation().sum()
            qcd_pass += pass_ch.getObservation().sum()
        qcd_eff = qcd_pass / qcd_fail
        #get all lower edges from channel names
        # pt_edges = [float(channel.split('Pt')[-1]) for channel in qcd_estimation_channels]
        # #get last upper edge from name of last channel
        # pt_edges.append(float(channels[list(qcd_estimation_channels.keys())[-1].split('Pt')[0]+'Pt%i'%pt_edges[-1]]['pt_bin'].split('to')[-1]))
        pt_edges = configs.get('pt_edges', [500, 550, 600, 675, 800, 1200])
        pt_bins = np.array(pt_edges)
        # pt_bins = np.array([500, 550, 600, 675, 800, 1200])
        n_pt = len(pt_bins) - 1
        msd_bins = binnings[qcd_estimation_relevant_selection]
        msd = rl.Observable('msd', msd_bins)

        # here we derive these all at once with 2D array
        ptpts, msdpts = np.meshgrid(pt_bins[:-1] + 0.3 * np.diff(pt_bins),
                                    msd_bins[:-1] + 0.5 * np.diff(msd_bins),
                                    indexing='ij')
        rhopts = 2 * np.log(msdpts / ptpts)
        ptscaled = (ptpts - 500.) / (1200. - 500.)
        rhoscaled = (rhopts - (-6)) / ((-2.1) - (-6))
        validbins = (rhoscaled >= 0) & (rhoscaled <= 1)
        rhoscaled[~validbins] = 1  # we will mask these out later

        TF_suffix = configs.get('TFSuffix', "")

        if (do_initial_qcd_fit):
            initial_qcd_fit_orders = tuple(
                configs.get('InitialQCDFitOrders', [2, 2]))
            if not os.path.exists(model_name):
                os.makedirs(model_name)
            print('QCD eff:', qcd_eff)
            # tf_MCtempl = rl.BernsteinPoly("tf_MCtempl", initial_qcd_fit_orders, ['pt', 'rho'], init_params = np.ones((initial_qcd_fit_orders[0]+1,initial_qcd_fit_orders[1]+1)), limits=(-1,10))
            tf_MCtempl = rl.BernsteinPoly(
                "tf_MCtempl_" + model_name + TF_suffix,
                initial_qcd_fit_orders, ['pt', 'rho'],
                init_params=np.ones((initial_qcd_fit_orders[0] + 1,
                                     initial_qcd_fit_orders[1] + 1)),
                limits=(-50, 50))
            tf_MCtempl_params = qcd_eff * tf_MCtempl(ptscaled, rhoscaled)
            for channel_name, config in channels.items():
                # ptbin = np.where(pt_bins==float(channel_name.split('Pt')[-1]))[0][0]
                ptbin = np.where(
                    pt_bins == float(config['pt_bin'].split('to')[0]))[0][0]
                failCh = qcd_model[channel_name + 'fail']
                passCh = qcd_model[channel_name + 'pass']
                failObs = failCh.getObservation()
                if (qcd_fail_region_constant):
                    print("Setting QCD parameters in fail region constant")
                qcdparams = np.array([
                    rl.IndependentParameter('qcdparam_' + model_name +
                                            TF_suffix + '_ptbin%d_msdbin%d' %
                                            (ptbin, i),
                                            0,
                                            constant=qcd_fail_region_constant)
                    for i in range(msd.nbins)
                ])

                sigmascale = 10.
                scaledparams = failObs * (
                    1 +
                    sigmascale / np.maximum(1., np.sqrt(failObs)))**qcdparams
                fail_qcd = rl.ParametericSample('%sfail_qcd' % channel_name,
                                                rl.Sample.BACKGROUND, msd,
                                                scaledparams)
                failCh.addSample(fail_qcd)
                pass_qcd = rl.TransferFactorSample('%spass_qcd' % channel_name,
                                                   rl.Sample.BACKGROUND,
                                                   tf_MCtempl_params[ptbin, :],
                                                   fail_qcd)
                passCh.addSample(pass_qcd)

                failCh.mask = validbins[ptbin]
                passCh.mask = validbins[ptbin]

            qcd_model.renderCombine(model_name + "/qcdmodel")

            qcdfit_ws = ROOT.RooWorkspace('w')
            simpdf, obs = qcd_model.renderRoofit(qcdfit_ws)
            ROOT.Math.MinimizerOptions.SetDefaultPrecision(1e-18)
            # ROOT.Math.MinimizerOptions.SetDefaultMinimizer("Minuit2")
            # ROOT.Math.MinimizerOptions.SetDefaultTolerance(0.0001)
            # ROOT.Math.MinimizerOptions.SetDefaultPrecision(-1.0)
            qcdfit = simpdf.fitTo(
                obs,
                ROOT.RooFit.Extended(True),
                ROOT.RooFit.SumW2Error(True),
                ROOT.RooFit.Strategy(1),
                ROOT.RooFit.Save(),
                ROOT.RooFit.Minimizer('Minuit2', 'migrad'),
                # ROOT.RooFit.PrintLevel(-1),
                ROOT.RooFit.PrintLevel(1),
                ROOT.RooFit.Minos(0))

            qcdfit_ws.add(qcdfit)
            if "pytest" not in sys.modules:
                qcdfit_ws.writeToFile(model_name + '/qcdfit_' + model_name +
                                      TF_suffix + '.root')
            if qcdfit.status() != 0:
                raise RuntimeError('Could not fit qcd')

            qcd_model.readRooFitResult(qcdfit)

            param_names = [p.name for p in tf_MCtempl.parameters.reshape(-1)]
            decoVector = rl.DecorrelatedNuisanceVector.fromRooFitResult(
                tf_MCtempl.name + '_deco', qcdfit, param_names)
            tf_MCtempl.parameters = decoVector.correlated_params.reshape(
                tf_MCtempl.parameters.shape)
            tf_MCtempl_params_final = tf_MCtempl(ptscaled, rhoscaled)
            tf_dataResidual = rl.BernsteinPoly("tf_dataResidual_" +
                                               model_name + TF_suffix,
                                               bernstein_orders, ['pt', 'rho'],
                                               limits=(-50, 50))
            # tf_dataResidual = rl.BernsteinPoly("tf_dataResidual", bernstein_orders, ['pt', 'rho'], limits=(0,10))
            tf_dataResidual_params = tf_dataResidual(ptscaled, rhoscaled)
            tf_params = qcd_eff * tf_MCtempl_params_final * tf_dataResidual_params
        else:
            tf_params = None  # define later

    #Reading categories of consituent-variations for nuisance paramters from gridHist

    grid_nuisances, _ = build_mass_scale_variations(
        configs['gridHistFileName'])

    #setting up rhalphalib roofit model
    model = rl.Model(model_name)

    #setting up nuisances for systematic uncertainties
    print('CMS_lumi', 'lnN')
    lumi = rl.NuisanceParameter('CMS_lumi', 'lnN')
    lumi_effect = 1.027

    norm_nuisances = {}
    for channel_name in channels.keys():
        if (MINIMAL_MODEL):
            break
        for i, sample in enumerate(channels[channel_name]['samples']):
            if 'NormUnc' not in channels[channel_name]:
                continue
            norm_uncertainties = channels[channel_name]['NormUnc']
            for name, norm_unc in norm_uncertainties.items():
                nuisance_par = [
                    rl.NuisanceParameter(name + '_normUnc', 'lnN'), norm_unc
                ]
                for k, v in norm_nuisances.items():
                    if name in v[0].name:
                        nuisance_par = v
                if norm_unc > 0 and name in sample and sample not in norm_nuisances:
                    norm_nuisances.update({sample: nuisance_par})

    for channel_name, config in channels.items():
        print('setting up channel:', channel_name)

        #using hists with /variable/ in their name (default: Mass, if defined get from config)
        variable = 'mjet' if 'variable' not in config else config['variable']
        #getting list of samples from config
        if MINIMAL_MODEL:
            config['samples'] = ['QCD', 'WJetsMatched']
        samples = config['samples']
        #for WMass fit there are multiple regions per sample
        regions = [''] if 'regions' not in config else config['regions']

        print('getting template of variable:', variable)
        print('samples:', samples)
        print('regions:', regions)
        msd_bins = binnings[config['selection']]

        for region in regions:
            additional_bin = config.get('additional_bin', '')
            region_suffix = '_' + region if len(region) > 0 else ''
            hist_dir = config[
                'selection'] + '_%s__' + variable + '_%s' + config[
                    'pt_bin'] + additional_bin + region_suffix
            print('hist_dir:', hist_dir)
            #setting up channel for fit (name must be unique and can't include any '_')
            region_name = channel_name + region
            ch = rl.Channel(region_name)
            model.addChannel(ch)
            print('rl.Channel:', ch)

            for sample_name in samples:
                #do not include QCD template here, but rather use qcd estimation below
                if (('QcdEstimation' in config
                     and config['QcdEstimation'] == 'True')
                        and 'qcd' in sample_name.lower()):
                    continue

                #specify if sample is signal or background type
                sample_type = rl.Sample.SIGNAL if sample_name in config[
                    'signal'] else rl.Sample.BACKGROUND
                sample_hist = hist_file.Get(hist_dir % (sample_name, ""))
                print(hist_dir % (sample_name, ""))
                sample_hist.SetName('msd')

                #rebin hist
                if (rebin_msd > 0):
                    sample_hist = sample_hist.Rebin(
                        len(msd_bins) - 1, 'msd', msd_bins)
                if (lumi_scale != 1.0):
                    sample_hist = scale_lumi(sample_hist, lumi_scale)

                #setup actual rhalphalib sample
                sample = rl.TemplateSample(ch.name + '_' + sample_name,
                                           sample_type, sample_hist)
                #sample.autoMCStats()
                #setting effects of constituent variation nuisances (up/down)
                for grid_nuisance, x, y, category in grid_nuisances:
                    hist_up = hist_file.Get(hist_dir %
                                            (sample_name, str(x) + '_' +
                                             str(y) + '_' + category + '_') +
                                            '__up')
                    hist_down = hist_file.Get(hist_dir %
                                              (sample_name, str(x) + '_' +
                                               str(y) + '_' + category + '_') +
                                              '__down')

                    #rebin hists
                    if (rebin_msd > 0):
                        hist_up = hist_up.Rebin(
                            len(msd_bins) - 1, 'msd', msd_bins)
                        hist_down = hist_down.Rebin(
                            len(msd_bins) - 1, 'msd', msd_bins)
                    if (lumi_scale != 1.0):
                        hist_up = scale_lumi(hist_up, lumi_scale)
                        hist_down = scale_lumi(hist_down, lumi_scale)

                    if (includeMassScales):
                        sample.setParamEffect(grid_nuisance, hist_up,
                                              hist_down)
                sample.setParamEffect(lumi, lumi_effect)
                if sample_name in norm_nuisances.keys():
                    sample.setParamEffect(norm_nuisances[sample_name][0],
                                          norm_nuisances[sample_name][1])

                ch.addSample(sample)

            PseudoData = 'Pseudo' in configs and len(configs['Pseudo']) > 0
            if PseudoData:
                data_hist = build_pseudo(samples, hist_file, hist_dir,
                                         configs['Pseudo'], MINIMAL_MODEL)
            else:
                print('using data!!!!!')
                data_hist = hist_file.Get(hist_dir % ("Data", ""))

            if (rebin_msd > 0):
                data_hist = data_hist.Rebin(len(msd_bins) - 1, 'msd', msd_bins)
            data_hist.SetName('msd')
            ch.setObservation(data_hist, read_sumw2=PseudoData)
            if ('QcdEstimation' in config
                    and config['QcdEstimation'] == 'True'):
                mask = validbins[np.where(
                    pt_bins == float(config['pt_bin'].split('to')[0]))[0][0]]
                # dropped_events = np.sum(ch.getObservation().astype(float)[~mask])
                # percentage = dropped_events/np.sum(ch.getObservation().astype(float))
                # print('dropping due to mask: %.2f events (out of %.2f -> %.2f%%)'%(dropped_events,np.sum(ch.getObservation().astype(float)),percentage*100))
                ch.mask = mask

    if (do_qcd_estimation):
        #QCD TF
        if (not do_initial_qcd_fit):
            tf_params = rl.BernsteinPoly('tf_params_' + model_name + TF_suffix,
                                         bernstein_orders, ['pt', 'rho'],
                                         limits=(-50, 50))
            print(
                'Using QCD efficiency (N2-ddt) of %.2f%% to scale initial QCD in pass region'
                % (qcd_eff * 100))
            tf_params = qcd_eff * tf_params(ptscaled, rhoscaled)

        for channel_name, config in channels.items():
            if ('QcdEstimation' not in config
                    or config['QcdEstimation'] == "False"):
                continue
            print(channel_name, 'qcd estimation')
            fail_ch = model[channel_name + 'fail']
            pass_ch = model[channel_name + 'pass']
            ptbin = np.where(
                pt_bins == float(config['pt_bin'].split('to')[0]))[0][0]
            if (qcd_fail_region_constant):
                print("Setting QCD parameters in fail region constant")
            qcd_params = np.array([
                rl.IndependentParameter('qcdparam_' + model_name + TF_suffix +
                                        '_ptbin%i_msdbin%i' % (ptbin, i),
                                        0,
                                        constant=qcd_fail_region_constant)
                for i in range(msd.nbins)
            ])

            initial_qcd = fail_ch.getObservation()[0].astype(
                float) if isinstance(
                    fail_ch.getObservation(),
                    tuple) else fail_ch.getObservation().astype(float)
            for sample in fail_ch:
                initial_qcd -= sample.getExpectation(nominal=True)
            if np.any(initial_qcd < 0.):
                initial_qcd = np.where(initial_qcd <= 0., 0, initial_qcd)
                print('negative bins in initial_qcd in ', channel_name)
                # continue
                minimum = np.amin(initial_qcd)
                initial_qcd = np.where(initial_qcd == 0, minimum, initial_qcd)
                initial_qcd += abs(minimum)
                raise ValueError(
                    'inital qcd (fail qcd from data - mc) negative at least one bin'
                )
            sigmascale = 10.
            scaledparams = initial_qcd * (
                1 +
                sigmascale / np.maximum(1., np.sqrt(initial_qcd)))**qcd_params
            fail_qcd = rl.ParametericSample('%sfail_qcd' % channel_name,
                                            rl.Sample.BACKGROUND, msd,
                                            scaledparams)
            fail_ch.addSample(fail_qcd)
            pass_qcd = rl.TransferFactorSample('%spass_qcd' % channel_name,
                                               rl.Sample.BACKGROUND,
                                               tf_params[ptbin, :], fail_qcd)
            pass_ch.addSample(pass_qcd)

    model.renderCombine(model_name)
示例#8
0
def create_datacard(inputfile, carddir, nbins, nMCTF, nDataTF, passBinName, failBinName='fail', add_blinded=False, include_ac=False):

    # open uproot file once
    upfile = uproot.open(inputfile)

    regionPairs = [('SR'+passBinName, 'fit'+failBinName)]  # pass, fail region pairs
    if add_blinded:
        regionPairs += [('pass'+passBinName, failBinName)]  # add sideband region pairs

    regions = [item for t in regionPairs for item in t]  # all regions

    # luminosity unc https://gitlab.cern.ch/hh/naming-conventions#luminosity
    lumi_16 = 36.33
    lumi_17 = 41.48
    lumi_18 = 59.83
    lumi_run2 = lumi_16 + lumi_17 + lumi_18
    lumi_13TeV_2016 = rl.NuisanceParameter('lumi_13TeV_2016', 'lnN')
    lumi_13TeV_2017 = rl.NuisanceParameter('lumi_13TeV_2017', 'lnN')
    lumi_13TeV_2018 = rl.NuisanceParameter('lumi_13TeV_2018', 'lnN')
    lumi_13TeV_correlated = rl.NuisanceParameter('lumi_13TeV_correlated', 'lnN')
    lumi_13TeV_1718 = rl.NuisanceParameter('lumi_13TeV_1718', 'lnN')
    ttbarBin1MCstats = rl.NuisanceParameter('CMS_bbbb_boosted_ggf_ttbarBin1_yieldMCStats', 'lnN')
    PNetHbbScaleFactorssyst = rl.NuisanceParameter('CMS_bbbb_boosted_ggf_PNetHbbScaleFactors_correlated', 'lnN')
    brHbb = rl.NuisanceParameter('BR_hbb', 'lnN')
    pdfqqbar = rl.NuisanceParameter('pdf_Higgs_qqbar', 'lnN')
    pdfttH = rl.NuisanceParameter('pdf_Higgs_ttH', 'lnN')
    pdfggHH = rl.NuisanceParameter('pdf_Higgs_ggHH', 'lnN')
    pdfqqHH = rl.NuisanceParameter('pdf_Higgs_qqHH', 'lnN')
    qcdScaleVH = rl.NuisanceParameter('QCDscale_VH', 'lnN')
    qcdScalettH = rl.NuisanceParameter('QCDscale_ttH', 'lnN')
    qcdScaleqqHH = rl.NuisanceParameter('QCDscale_qqHH', 'lnN')
    alphaS = rl.NuisanceParameter('alpha_s', 'lnN')
    fsrothers = rl.NuisanceParameter('CMS_bbbb_boosted_ggf_ps_fsr_others', 'lnN')
    isrothers = rl.NuisanceParameter('CMS_bbbb_boosted_ggf_ps_isr_others', 'lnN')
    if not include_ac:
        thu_hh = rl.NuisanceParameter('THU_SMHH', 'lnN')

    msdbins = np.linspace(50, nbins*10.0+50.0, nbins+1)
    msd = rl.Observable('msd', msdbins)
    msdpts = msdbins[:-1] + 0.5 * np.diff(msdbins)
    msdscaled = (msdpts - 50.)/(10.0*nbins)

    # Build qcd MC pass+fail model and fit to polynomial
    qcdmodel = rl.Model('qcdmodel')
    qcdpass, qcdfitfail = 0., 0.
    passCh = rl.Channel('passqcdmodel')
    fitfailCh = rl.Channel('fitfailqcdmodel')
    qcdmodel.addChannel(fitfailCh)
    qcdmodel.addChannel(passCh)

    passTempl = get_hist(upfile, 'histJet2MassBlind_'+passBinName+'_QCD', obs=msd)
    fitfailTempl = get_hist(upfile, 'histJet2Massfit_fail_QCD', obs=msd)

    passCh.setObservation(passTempl[:-1])
    fitfailCh.setObservation(fitfailTempl[:-1])
    qcdpass = passCh.getObservation().sum()
    qcdfitfail = fitfailCh.getObservation().sum()

    qcdeffpass = qcdpass / qcdfitfail

    # transfer factor
    tf_dataResidual = rl.BernsteinPoly("CMS_bbbb_boosted_ggf_tf_dataResidual_"+passBinName, (nDataTF,), ['msd'], limits=(-20, 20))
    tf_dataResidual_params = tf_dataResidual(msdscaled)
    tf_params_pass = qcdeffpass * tf_dataResidual_params

    # qcd params
    qcdparams = np.array([rl.IndependentParameter('CMS_bbbb_boosted_ggf_qcdparam_msdbin%d' % i, 0) for i in range(msd.nbins)])

    # dictionary of shape systematics -> name in cards
    systs = OrderedDict([
        ('mHHTHunc', 'CMS_bbbb_boosted_ggf_mHHTHunc'),
        ('FSRPartonShower', 'ps_fsr'),
        ('ISRPartonShower', 'ps_isr'),
        ('ggHHPDFacc', 'CMS_bbbb_boosted_ggf_ggHHPDFacc'),
        ('ggHHQCDacc', 'CMS_bbbb_boosted_ggf_ggHHQCDacc'),
        ('othersQCD', 'CMS_bbbb_boosted_ggf_othersQCD'),
        ('pileupWeight2016', 'CMS_pileup_2016'),
        ('pileupWeight2017', 'CMS_pileup_2017'),
        ('pileupWeight2018', 'CMS_pileup_2018'),
        ('JER2016', 'CMS_res_j_2016'),
        ('JER2017', 'CMS_res_j_2017'),
        ('JER2018', 'CMS_res_j_2018'),
        ('JES_Abs', 'CMS_scale_j_Abs'),
        ('JES_Abs_2016', 'CMS_scale_j_Abs_2016'),
        ('JES_Abs_2017', 'CMS_scale_j_Abs_2017'),
        ('JES_Abs_2018', 'CMS_scale_j_Abs_2018'),
        ('JES_BBEC1', 'CMS_scale_j_BBEC1'),
        ('JES_BBEC1_2016', 'CMS_scale_j_BBEC1_2016'),
        ('JES_BBEC1_2017', 'CMS_scale_j_BBEC1_2017'),
        ('JES_BBEC1_2018', 'CMS_scale_j_BBEC1_2018'),
        ('JES_EC2', 'CMS_scale_j_EC2'),
        ('JES_EC2_2016', 'CMS_scale_j_EC2_2016'),
        ('JES_EC2_2017', 'CMS_scale_j_EC2_2017'),
        ('JES_EC2_2018', 'CMS_scale_j_EC2_2018'),
        ('JES_FlavQCD', 'CMS_scale_j_FlavQCD'),
        ('JES_HF', 'CMS_scale_j_HF'),
        ('JES_HF_2016', 'CMS_scale_j_HF_2016'),
        ('JES_HF_2017', 'CMS_scale_j_HF_2017'),
        ('JES_HF_2018', 'CMS_scale_j_HF_2018'),
        ('JES_RelBal', 'CMS_scale_j_RelBal'),
        ('JES_RelSample_2016', 'CMS_scale_j_RelSample_2016'),
        ('JES_RelSample_2017', 'CMS_scale_j_RelSample_2017'),
        ('JES_RelSample_2018', 'CMS_scale_j_RelSample_2018'),
        ('JMS2016', 'CMS_bbbb_boosted_ggf_jms_2016'),
        ('JMS2017', 'CMS_bbbb_boosted_ggf_jms_2017'),
        ('JMS2018', 'CMS_bbbb_boosted_ggf_jms_2018'),
        ('JMR2016', 'CMS_bbbb_boosted_ggf_jmr_2016'),
        ('JMR2017', 'CMS_bbbb_boosted_ggf_jmr_2017'),
        ('JMR2018', 'CMS_bbbb_boosted_ggf_jmr_2018'),
        ('ttbarBin1Jet2PNetCut', 'CMS_bbbb_boosted_ggf_ttbarBin1Jet2PNetCut'),
        ('ttJetsCorr', 'CMS_bbbb_boosted_ggf_ttJetsCorr'),
        ('BDTShape', 'CMS_bbbb_boosted_ggf_ttJetsBDTShape'),
        ('PNetShape', 'CMS_bbbb_boosted_ggf_ttJetsPNetShape'),
        ('PNetHbbScaleFactors', 'CMS_bbbb_boosted_ggf_PNetHbbScaleFactors_uncorrelated'),
        ('triggerEffSF', 'CMS_bbbb_boosted_ggf_triggerEffSF_uncorrelated'),
        ('trigCorrHH2016', 'CMS_bbbb_boosted_ggf_trigCorrHH2016'),
        ('trigCorrHH2017', 'CMS_bbbb_boosted_ggf_trigCorrHH2017'),
        ('trigCorrHH2018', 'CMS_bbbb_boosted_ggf_trigCorrHH2018'),
    ])

    # build actual fit model now
    model = rl.Model("HHModel")
    for region in regions:
        logging.info('starting region: %s' % region)
        ch = rl.Channel(region)
        model.addChannel(ch)

        if region == 'pass'+passBinName:
            catn = 'Blind_'+passBinName
        elif region == 'SR'+passBinName:
            catn = '_'+passBinName
        elif region == 'fit'+failBinName:
            catn = 'fit_'+failBinName
        else:
            catn = 'Blind_'+failBinName

        # dictionary of name in datacards -> name in ROOT file
        templateNames = OrderedDict([
            ('ttbar', 'histJet2Mass'+catn+'_TTJets'),
            ('VH_hbb', 'histJet2Mass'+catn+'_VH'),
            ('ttH_hbb', 'histJet2Mass'+catn+'_ttH'),
            ('bbbb_boosted_ggf_others', 'histJet2Mass'+catn+'_others'),
            ('bbbb_boosted_ggf_qcd_datadriven', 'histJet2Mass'+catn+'_QCD'),
            ('data', 'histJet2Mass'+catn+'_Data'),
            ('ggHH_kl_1_kt_1_hbbhbb', 'histJet2Mass'+catn+'_ggHH_kl_1_kt_1_boost4b'),
            ('qqHH_CV_1_C2V_1_kl_1_hbbhbb', 'histJet2Mass'+catn+'_qqHH_CV_1_C2V_1_kl_1_boost4b'),
        ])
        ac_signals = OrderedDict()
        if include_ac:
            ac_signals = OrderedDict([
                ('ggHH_kl_2p45_kt_1_hbbhbb', 'histJet2Mass'+catn+'_ggHH_kl_2p45_kt_1_boost4b'),
                ('ggHH_kl_5_kt_1_hbbhbb', 'histJet2Mass'+catn+'_ggHH_kl_5_kt_1_boost4b'),
                ('ggHH_kl_0_kt_1_hbbhbb', 'histJet2Mass'+catn+'_ggHH_kl_0_kt_1_boost4b'),
                ('qqHH_CV_1_C2V_0_kl_1_hbbhbb', 'histJet2Mass'+catn+'_qqHH_CV_1_C2V_0_kl_1_boost4b'),
                ('qqHH_CV_1p5_C2V_1_kl_1_hbbhbb', 'histJet2Mass'+catn+'_qqHH_CV_1p5_C2V_1_kl_1_boost4b'),
                ('qqHH_CV_1_C2V_1_kl_2_hbbhbb', 'histJet2Mass'+catn+'_qqHH_CV_1_C2V_1_kl_2_boost4b'),
                ('qqHH_CV_1_C2V_2_kl_1_hbbhbb', 'histJet2Mass'+catn+'_qqHH_CV_1_C2V_2_kl_1_boost4b'),
                ('qqHH_CV_1_C2V_1_kl_0_hbbhbb', 'histJet2Mass'+catn+'_qqHH_CV_1_C2V_1_kl_0_boost4b'),
                ('qqHH_CV_0p5_C2V_1_kl_1_hbbhbb', 'histJet2Mass'+catn+'_qqHH_CV_0p5_C2V_1_kl_1_boost4b'),
            ])
            templateNames.update(ac_signals)

        templates = {}
        for temp in templateNames:
            templates[temp] = get_hist(upfile, templateNames[temp], obs=msd)

        if adjust_posdef_yields:
            templates_posdef = {}
            # requires python3 and cvxpy
            if sys.version_info.major == 3:
                from bpe import BasisPointExpansion
                from adjust_to_posdef import ggHH_points, qqHH_points, plot_shape
                channel = "_hbbhbb"
                # get qqHH points
                qqHHproc = BasisPointExpansion(3)
                ggHHproc = BasisPointExpansion(2)
                newpts = {}
                newerrs = {}
                for HHproc, HH_points in zip([ggHHproc, qqHHproc], [ggHH_points, qqHH_points]):
                    for name, c in HH_points.items():
                        shape = np.clip(templates[name + channel][0], 0, None)
                        err = np.sqrt(templates[name + channel][3])
                        # set 0 bin error to something non0
                        err[err == 0] = err[err.nonzero()].min()
                        logging.debug(name + channel)
                        logging.debug("shape: {shape}".format(shape=shape))
                        logging.debug("err: {err}".format(err=err))
                        HHproc.add_point(c, shape, err)
                    # fit HH points with SCS
                    HHproc.solve("scs", tol=1e-9)
                    # get new HH points
                    for name, c in HH_points.items():
                        newshape = HHproc(c)
                        shape = templates[name + channel][0]
                        edges = templates[name + channel][1]
                        obs_name = templates[name + channel][2]
                        err = np.sqrt(templates[name + channel][3])
                        # set error to 100% if shape orignally 0 and now not
                        newerr = np.copy(err)
                        newerr[(newshape > 0) & (newerr == 0)] = newshape[(newshape > 0) & (newerr == 0)]
                        templates_posdef[name + channel] = (newshape, edges, obs_name, np.square(newerr))
                        plot_shape(shape, newshape, err, newerr, name+"_"+region)
                        newpts[name + channel] = newshape
                        newerrs[name + channel] = newerr
                np.savez("newshapes_{}.npz".format(region), **newpts)
                np.savez("newerrors_{}.npz".format(region), **newerrs)
            else:
                if not (os.path.exists("newshapes_{}.npz".format(region)) and os.path.exists("newerrors_{}.npz".format(region))):
                    raise RuntimeError("Run script in python3 first to get shapes and errors")
                newpts = dict(np.load("newshapes_{}.npz".format(region)))
                newerrs = dict(np.load("newerrors_{}.npz".format(region)))
                for temp in templateNames:
                    if "HH" in temp:
                        newshape = newpts[temp]
                        newerr = newerrs[temp]
                        edges = templates[temp][1]
                        obs_name = templates[temp][2]
                        templates_posdef[temp] = (newshape, edges, obs_name, np.square(newerr))

        syst_param_array = []
        for syst in systs:
            syst_param_array.append(rl.NuisanceParameter(systs[syst], 'shape'))

        sNames = [proc for proc in templates.keys() if proc not in ['bbbb_boosted_ggf_qcd_datadriven', 'data']]

        for sName in sNames:
            logging.info('get templates for: %s' % sName)
            # get templates
            templ = templates[sName]
            # don't allow them to go negative
            valuesNominal = np.maximum(templ[0], 0.)
            templ = (valuesNominal, templ[1], templ[2], templ[3])
            stype = rl.Sample.SIGNAL if 'HH' in sName else rl.Sample.BACKGROUND
            if adjust_posdef_yields and "HH" in sName:
                # use posdef as nominal, but keep original to get relative changes to systematics
                templ_posdef = templates_posdef[sName]
                sample = rl.TemplateSample(ch.name + '_' + sName, stype, templ_posdef)
            else:
                sample = rl.TemplateSample(ch.name + '_' + sName, stype, templ)
            sample.setParamEffect(lumi_13TeV_2016, 1.01 ** (lumi_16 / lumi_run2))
            sample.setParamEffect(lumi_13TeV_2017, 1.02 ** (lumi_17 / lumi_run2))
            sample.setParamEffect(lumi_13TeV_2018, 1.015 ** (lumi_18 / lumi_run2))
            sample.setParamEffect(
                lumi_13TeV_correlated,
                1.02 ** (lumi_18 / lumi_run2) * 1.009 ** (lumi_17 / lumi_run2) * 1.006 ** (lumi_16 / lumi_run2)
             )
            sample.setParamEffect(
                lumi_13TeV_1718,
                1.006 ** (lumi_17 / lumi_run2) * 1.002 ** (lumi_18 / lumi_run2)
            )
            if not include_ac:
                if sName == "ggHH_kl_1_kt_1_hbbhbb":
                    sample.setParamEffect(thu_hh, 1.0556, 0.7822)

            if sName == "bbbb_boosted_ggf_others":
                if "Bin1" in region:
                    sample.setParamEffect(fsrothers, 1.06, 0.82)
                    sample.setParamEffect(isrothers, 1.05, 0.94)
                elif "Bin2" in region:
                    sample.setParamEffect(fsrothers, 1.02, 0.90)
                    sample.setParamEffect(isrothers, 1.07, 0.93)
                elif "Bin3" in region:
                    sample.setParamEffect(fsrothers, 1.02, 0.91)
                    sample.setParamEffect(isrothers, 1.06, 0.93)
                elif "fail" in region:
                    sample.setParamEffect(fsrothers, 1.05, 0.92)
                    sample.setParamEffect(isrothers, 1.05, 0.94)

            if sName == "ttbar" and "Bin1" in region:
                if region == "passBin1":
                    sample.setParamEffect(ttbarBin1MCstats, 1.215)
                elif region == "SRBin1":
                    sample.setParamEffect(ttbarBin1MCstats, 1.187)

            if ("VH" in sName) or ("ttH" in sName):
                sample.setParamEffect(PNetHbbScaleFactorssyst, 1.04)
            elif "HH" in sName:
                sample.setParamEffect(PNetHbbScaleFactorssyst, 1.0816)

            if "hbbhbb" in sName:
                sample.setParamEffect(brHbb, 1.0248, 0.9748)
            elif "hbb" in sName:
                sample.setParamEffect(brHbb, 1.0124, 0.9874)

            if "ttH" in sName:
                sample.setParamEffect(pdfttH, 1.030)
                sample.setParamEffect(qcdScalettH, 1.058, 0.908)
                sample.setParamEffect(alphaS, 1.020)
            elif "VH" in sName:
                sample.setParamEffect(pdfqqbar, 1.0154)
                sample.setParamEffect(qcdScaleVH, 1.0179, 0.9840)
                sample.setParamEffect(alphaS, 1.009)
            elif "ggHH" in sName:
                sample.setParamEffect(pdfggHH, 1.030)
            elif "qqHH" in sName:
                sample.setParamEffect(pdfqqHH, 1.021)
                sample.setParamEffect(qcdScaleqqHH, 1.0003, 0.9996)

            # shape systematics
            mask = (valuesNominal > 0)
            errorsNominal = np.ones_like(valuesNominal)
            errorsNominal[mask] = 1. + np.sqrt(templ[3][mask])/valuesNominal[mask]

            # set mc stat uncs
            logging.info('setting autoMCStats for %s in %s' % (sName, region))
            logging.debug('nominal   : {nominal}'.format(nominal=valuesNominal))
            logging.debug('error     : {errors}'.format(errors=errorsNominal))
            sample.autoMCStats()

            for isyst, syst in enumerate(systs):
                # negligible uncertainty
                if 'JES_EC2' in syst or 'JES_HF' in syst:
                    continue
                # add some easy skips
                if (sName != 'ttbar') and (syst in ['ttJetsCorr', 'BDTShape', 'PNetShape']):
                    continue
                if ((sName != 'ttbar') or ('Bin1' not in region)) and (syst == 'ttbarBin1Jet2PNetCut'):
                    continue
                if ('ggHH' not in sName) and (syst in ['ggHHPDFacc', 'ggHHQCDacc', 'mHHTHunc']):
                    continue
                if ('others' not in sName) and (syst == 'othersQCD'):
                    continue
                if ('hbb' not in sName) and (syst == 'PNetHbbScaleFactors'):
                    continue
                if ('HH' not in sName) and (syst in ['trigCorrHH2016', 'trigCorrHH2017', 'trigCorrHH2018']):
                    continue
                logging.info('setting shape effect %s for %s in %s' % (syst, sName, region))
                valuesUp = get_hist(upfile, '%s_%sUp' % (templateNames[sName], syst), obs=msd)[0]
                valuesDown = get_hist(upfile, '%s_%sDown' % (templateNames[sName], syst), obs=msd)[0]
                effectUp = np.ones_like(valuesNominal)
                effectDown = np.ones_like(valuesNominal)
                maskUp = (valuesUp >= 0)
                maskDown = (valuesDown >= 0)
                effectUp[mask & maskUp] = valuesUp[mask & maskUp]/valuesNominal[mask & maskUp]
                effectDown[mask & maskDown] = valuesDown[mask & maskDown]/valuesNominal[mask & maskDown]
                # do shape checks
                normUp = np.sum(valuesUp)
                normDown = np.sum(valuesDown)
                normNominal = np.sum(valuesNominal)
                probUp = valuesUp/normUp
                probDown = valuesDown/normDown
                probNominal = valuesNominal/normNominal
                shapeEffectUp = np.sum(np.abs(probUp - probNominal)/(np.abs(probUp)+np.abs(probNominal)))
                shapeEffectDown = np.sum(np.abs(probDown - probNominal)/(np.abs(probDown)+np.abs(probNominal)))
                logger = logging.getLogger("validate_shapes_{}_{}_{}".format(region, sName, syst))
                valid = True
                if np.allclose(effectUp, 1.) and np.allclose(effectDown, 1.):
                    valid = False
                    logger.warning("No shape effect")
                elif np.allclose(effectUp, effectDown):
                    valid = False
                    logger.warning("Up is the same as Down, but different from nominal")
                elif np.allclose(effectUp, 1.) or np.allclose(effectDown, 1.):
                    valid = False
                    logger.warning("Up or Down is the same as nominal (one-sided)")
                elif shapeEffectUp < 0.001 and shapeEffectDown < 0.001:
                    valid = False
                    logger.warning("No genuine shape effect (just norm)")
                elif (normUp > normNominal and normDown > normNominal) or (normUp < normNominal and normDown < normNominal):
                    valid = False
                    logger.warning("Up and Down vary norm in the same direction")
                if valid:
                    logger.info("Shapes are valid")
                logging.debug("nominal   : {nominal}".format(nominal=valuesNominal))
                logging.debug("effectUp  : {effectUp}".format(effectUp=effectUp))
                logging.debug("effectDown: {effectDown}".format(effectDown=effectDown))
                sample.setParamEffect(syst_param_array[isyst], effectUp, effectDown)
            ch.addSample(sample)

        # data observed
        yields = templates['data'][0]
        data_obs = (yields, msd.binning, msd.name)
        ch.setObservation(data_obs)

    for passChName, failChName in regionPairs:
        logging.info('setting transfer factor for pass region %s, fail region %s' % (passChName, failChName))
        failCh = model[failChName]
        passCh = model[passChName]

        # sideband fail
        initial_qcd = failCh.getObservation().astype(float)  # was integer, and numpy complained about subtracting float from it
        for sample in failCh:
            if sample._name in [failChName+"_"+signalName for signalName in ac_signals.keys()]:
                continue
            logging.debug('subtracting %s from qcd' % sample._name)
            initial_qcd -= sample.getExpectation(nominal=True)
        if np.any(initial_qcd < 0.):
            raise ValueError("initial_qcd negative for some bins..", initial_qcd)
        sigmascale = 10  # to scale the deviation from initial
        scaledparams = initial_qcd * (1 + sigmascale/np.maximum(1., np.sqrt(initial_qcd)))**qcdparams

        # add samples
        fail_qcd = rl.ParametericSample(failChName+'_bbbb_boosted_ggf_qcd_datadriven', rl.Sample.BACKGROUND, msd, scaledparams)
        failCh.addSample(fail_qcd)

        pass_qcd = rl.TransferFactorSample(passChName+'_bbbb_boosted_ggf_qcd_datadriven', rl.Sample.BACKGROUND, tf_params_pass, fail_qcd)
        passCh.addSample(pass_qcd)

    with open(os.path.join(str(carddir), 'HHModel.pkl'), "wb") as fout:
        pickle.dump(model, fout, 2)  # use python 2 compatible protocol

    logging.info('rendering combine model')
    model.renderCombine(os.path.join(str(carddir), 'HHModel'))
示例#9
0
def test_rhalphabet(tmpdir):
    throwPoisson = True  #False

    # experimental systematics
    lumi = rl.NuisanceParameter('CMS_lumi', 'lnN')
    jet_trigger = rl.NuisanceParameter('CMS_jet_trigger', 'lnN')
    jes = rl.NuisanceParameter('CMS_jes', 'lnN')
    jer = rl.NuisanceParameter('CMS_jer', 'lnN')
    ues = rl.NuisanceParameter('CMS_ues', 'lnN')
    btagWeight = rl.NuisanceParameter('CMS_btagWeight', 'lnN')
    btagEffStat = rl.NuisanceParameter('CMS_btagEffStat', 'lnN')

    # theory systematics
    pdf_weight = rl.NuisanceParameter('PDF_weight', 'shape')
    scale_ggF = rl.NuisanceParameter('scale_ggF', 'lnN')
    scale_VBF = rl.NuisanceParameter('scale_VBF', 'lnN')
    scale_VH = rl.NuisanceParameter('scale_VH', 'lnN')
    scale_ttH = rl.NuisanceParameter('scale_ttH', 'lnN')
    ps_weight = rl.NuisanceParameter('PS_weight', 'shape')

    tqqeffSF = rl.IndependentParameter('tqqeffSF', 1., 0, 20)
    tqqnormSF = rl.IndependentParameter('tqqnormSF', 1., 0, 20)

    ptbins = np.array([450, 1200])
    npt = len(ptbins) - 1
    msdbins = np.linspace(47, 201, 23)
    msd = rl.Observable('msd', msdbins)
    mjjbins = np.array([350, 1000, 4000])
    nmjj = len(mjjbins) - 1

    # here we derive these all at once with 2D array
    ptpts, msdpts = np.meshgrid(ptbins[:-1] + 0.3 * np.diff(ptbins),
                                msdbins[:-1] + 0.5 * np.diff(msdbins),
                                indexing='ij')
    rhopts = 2 * np.log(msdpts / ptpts)
    ptscaled = (ptpts - 450.) / (1200. - 450.)
    rhoscaled = (rhopts - (-6)) / ((-2.1) - (-6))
    validbins = (rhoscaled >= 0) & (rhoscaled <= 1)
    rhoscaled[~validbins] = 1  # we will mask these out later

    # Build qcd MC pass+fail model and fit to polynomial
    qcdmodel = rl.Model("qcdmodel")
    qcdpass, qcdfail = 0., 0.
    for mjjbin in range(nmjj):
        failCh = rl.Channel("mjjbin%d%s" % (mjjbin, 'fail'))
        passCh = rl.Channel("mjjbin%d%s" % (mjjbin, 'pass'))
        qcdmodel.addChannel(failCh)
        qcdmodel.addChannel(passCh)

        # QCD templates from file
        failTempl = get_template("QCD", 0, mjjbin + 1, obs=msd,
                                 syst="nominal")  #
        passTempl = get_template("QCD", 1, mjjbin + 1, obs=msd,
                                 syst="nominal")  #

        failCh.setObservation(failTempl, read_sumw2=True)
        passCh.setObservation(passTempl, read_sumw2=True)

        qcdfail += sum([val[0] for val in failCh.getObservation()])
        qcdpass += sum([val[0] for val in passCh.getObservation()])

    qcdeff = qcdpass / qcdfail
    print("Inclusive P/F from Monte Carlo = " + str(qcdeff))

    # initial values
    print("Initial fit values read from file initial_vals.csv")
    initial_vals = np.genfromtxt('initial_vals.csv')
    initial_vals = initial_vals.reshape(1, 3)
    print(initial_vals)

    tf_MCtempl = rl.BernsteinPoly("tf_MCtempl", (0, 2), ['pt', 'rho'],
                                  init_params=initial_vals,
                                  limits=(-20, 20))
    tf_MCtempl_params = qcdeff * tf_MCtempl(ptscaled, rhoscaled)
    for mjjbin in range(nmjj):
        failCh = qcdmodel['mjjbin%dfail' % mjjbin]
        passCh = qcdmodel['mjjbin%dpass' % mjjbin]
        failObs = failCh.getObservation()
        passObs = passCh.getObservation()

        qcdparams = np.array([
            rl.IndependentParameter('qcdparam_mjjbin%d_msdbin%d' % (mjjbin, i),
                                    0) for i in range(msd.nbins)
        ])
        sigmascale = 10.
        scaledparams = failObs * (
            1 + sigmascale / np.maximum(1., np.sqrt(failObs)))**qcdparams

        fail_qcd = rl.ParametericSample('mjjbin%dfail_qcd' % mjjbin,
                                        rl.Sample.BACKGROUND, msd,
                                        scaledparams[0])
        failCh.addSample(fail_qcd)
        pass_qcd = rl.TransferFactorSample('mjjbin%dpass_qcd' % mjjbin,
                                           rl.Sample.BACKGROUND,
                                           tf_MCtempl_params[0, :], fail_qcd)
        passCh.addSample(pass_qcd)

        failCh.mask = validbins[0]
        passCh.mask = validbins[0]

    qcdfit_ws = ROOT.RooWorkspace('qcdfit_ws')

    simpdf, obs = qcdmodel.renderRoofit(qcdfit_ws)
    qcdfit = simpdf.fitTo(
        obs,
        ROOT.RooFit.Extended(True),
        ROOT.RooFit.SumW2Error(True),
        ROOT.RooFit.Strategy(2),
        ROOT.RooFit.Save(),
        ROOT.RooFit.Minimizer('Minuit2', 'migrad'),
        ROOT.RooFit.PrintLevel(1),
    )
    qcdfit_ws.add(qcdfit)
    qcdfit_ws.writeToFile(os.path.join(str(tmpdir), 'testModel_qcdfit.root'))

    # Set parameters to fitted values
    allparams = dict(zip(qcdfit.nameArray(), qcdfit.valueArray()))
    for i, p in enumerate(tf_MCtempl.parameters.reshape(-1)):
        p.value = allparams[p.name]

    if qcdfit.status() != 0:
        raise RuntimeError('Could not fit qcd')

    param_names = [p.name for p in tf_MCtempl.parameters.reshape(-1)]
    decoVector = rl.DecorrelatedNuisanceVector.fromRooFitResult(
        tf_MCtempl.name + '_deco', qcdfit, param_names)
    tf_MCtempl.parameters = decoVector.correlated_params.reshape(
        tf_MCtempl.parameters.shape)
    tf_MCtempl_params_final = tf_MCtempl(ptscaled, rhoscaled)
    tf_dataResidual = rl.BernsteinPoly("tf_dataResidual", (0, 2),
                                       ['pt', 'rho'],
                                       limits=(-20, 20))
    tf_dataResidual_params = tf_dataResidual(ptscaled, rhoscaled)
    tf_params = qcdeff * tf_MCtempl_params_final * tf_dataResidual_params

    # build actual fit model now
    model = rl.Model("testModel")

    # exclud QCD from MC samps
    samps = [
        'ggF', 'VBF', 'WH', 'ZH', 'ttH', 'ttbar', 'singlet', 'Zjets', 'Wjets',
        'VV'
    ]
    sigs = ['VBF']

    for mjjbin in range(nmjj):
        for region in ['pass', 'fail']:
            ch = rl.Channel("mjjbin%d%s" % (mjjbin, region))
            model.addChannel(ch)

            isPass = region == 'pass'
            mjjnorm = 1.

            templates = {}

            for sName in samps:

                templates[sName] = get_template(sName,
                                                isPass,
                                                mjjbin + 1,
                                                obs=msd,
                                                syst="nominal")
                nominal = templates[sName][0]

                # expectations
                templ = templates[sName]
                stype = rl.Sample.SIGNAL if sName in sigs else rl.Sample.BACKGROUND
                sample = rl.TemplateSample(ch.name + '_' + sName, stype, templ)

                if sName != "QCD":
                    sample.setParamEffect(lumi, 1.027)

                    jet_trigger_up = syst_variation(
                        get_template(sName,
                                     isPass,
                                     mjjbin + 1,
                                     obs=msd,
                                     syst="jet_triggerUp")[0], nominal)
                    jet_trigger_down = syst_variation(
                        get_template(sName,
                                     isPass,
                                     mjjbin + 1,
                                     obs=msd,
                                     syst="jet_triggerDown")[0], nominal)
                    sample.setParamEffect(jet_trigger, jet_trigger_up,
                                          jet_trigger_down)

                    jes_up = syst_variation(
                        get_template(sName,
                                     isPass,
                                     mjjbin + 1,
                                     obs=msd,
                                     syst="JESUp")[0], nominal)
                    jes_down = syst_variation(
                        get_template(sName,
                                     isPass,
                                     mjjbin + 1,
                                     obs=msd,
                                     syst="JESDown")[0], nominal)
                    sample.setParamEffect(jes, jes_up, jes_down)

                    jer_up = syst_variation(
                        get_template(sName,
                                     isPass,
                                     mjjbin + 1,
                                     obs=msd,
                                     syst="JERUp")[0], nominal)
                    jer_down = syst_variation(
                        get_template(sName,
                                     isPass,
                                     mjjbin + 1,
                                     obs=msd,
                                     syst="JERDown")[0], nominal)
                    sample.setParamEffect(jer, jer_up, jer_down)

                    ues_up = syst_variation(
                        get_template(sName,
                                     isPass,
                                     mjjbin + 1,
                                     obs=msd,
                                     syst="UESUp")[0], nominal)
                    ues_down = syst_variation(
                        get_template(sName,
                                     isPass,
                                     mjjbin + 1,
                                     obs=msd,
                                     syst="UESDown")[0], nominal)
                    sample.setParamEffect(ues, ues_up, ues_down)

                    btagWeight_up = syst_variation(
                        get_template(sName,
                                     isPass,
                                     mjjbin + 1,
                                     obs=msd,
                                     syst="btagWeightUp")[0], nominal)
                    btagWeight_down = syst_variation(
                        get_template(sName,
                                     isPass,
                                     mjjbin + 1,
                                     obs=msd,
                                     syst="btagWeightDown")[0], nominal)
                    sample.setParamEffect(btagWeight, btagWeight_up,
                                          btagWeight_down)

                    btagEffStat_up = syst_variation(
                        get_template(sName,
                                     isPass,
                                     mjjbin + 1,
                                     obs=msd,
                                     syst="btagEffStatUp")[0], nominal)
                    btagEffStat_down = syst_variation(
                        get_template(sName,
                                     isPass,
                                     mjjbin + 1,
                                     obs=msd,
                                     syst="btagEffStatDown")[0], nominal)
                    sample.setParamEffect(btagEffStat, btagEffStat_up,
                                          btagEffStat_down)

                if sName != "QCD":
                    pdf_weight_up = syst_variation(
                        get_template(sName,
                                     isPass,
                                     mjjbin + 1,
                                     obs=msd,
                                     syst="PDF_weightUp")[0], nominal)
                    pdf_weight_down = syst_variation(
                        get_template(sName,
                                     isPass,
                                     mjjbin + 1,
                                     obs=msd,
                                     syst="PDF_weightDown")[0], nominal)
                    sample.setParamEffect(pdf_weight, pdf_weight_up,
                                          pdf_weight_down)

                if sName == "ggF":
                    scale_up = syst_variation(
                        get_template(sName,
                                     isPass,
                                     mjjbin + 1,
                                     obs=msd,
                                     syst="scalevar_7ptUp")[0], nominal)
                    scale_down = syst_variation(
                        get_template(sName,
                                     isPass,
                                     mjjbin + 1,
                                     obs=msd,
                                     syst="scalevar_7ptDown")[0], nominal)
                    sample.setParamEffect(scale_ggF, scale_up, scale_down)
                if sName == "VBF":
                    scale_up = syst_variation(
                        get_template(sName,
                                     isPass,
                                     mjjbin + 1,
                                     obs=msd,
                                     syst="scalevar_3ptUp")[0], nominal)
                    scale_down = syst_variation(
                        get_template(sName,
                                     isPass,
                                     mjjbin + 1,
                                     obs=msd,
                                     syst="scalevar_3ptDown")[0], nominal)
                    sample.setParamEffect(scale_VBF, scale_up, scale_down)
                if sName == "VH":
                    scale_up = syst_variation(
                        get_template(sName,
                                     isPass,
                                     mjjbin + 1,
                                     obs=msd,
                                     syst="scalevar_3ptUp")[0], nominal)
                    scale_down = syst_variation(
                        get_template(sName,
                                     isPass,
                                     mjjbin + 1,
                                     obs=msd,
                                     syst="scalevar_3ptDown")[0], nominal)
                    sample.setParamEffect(scale_VH, scale_up, scale_down)
                if sName == "ttH":
                    scale_up = syst_variation(
                        get_template(sName,
                                     isPass,
                                     mjjbin + 1,
                                     obs=msd,
                                     syst="scalevar_7ptUp")[0], nominal)
                    scale_down = syst_variation(
                        get_template(sName,
                                     isPass,
                                     mjjbin + 1,
                                     obs=msd,
                                     syst="scalevar_7ptDown")[0], nominal)
                    sample.setParamEffect(scale_ttH, scale_up, scale_down)

                ch.addSample(sample)

            data_obs = get_template("data",
                                    isPass,
                                    mjjbin + 1,
                                    obs=msd,
                                    syst="nominal")
            ch.setObservation(data_obs, read_sumw2=True)

            # drop bins outside rho validity
            mask = validbins[0]

            # blind bins 11, 12, 13


#            mask[11:14] = False
#            ch.mask = mask

    for mjjbin in range(nmjj):
        failCh = model['mjjbin%dfail' % mjjbin]
        passCh = model['mjjbin%dpass' % mjjbin]

        qcdparams = np.array([
            rl.IndependentParameter('qcdparam_mjjbin%d_msdbin%d' % (mjjbin, i),
                                    0) for i in range(msd.nbins)
        ])
        initial_qcd = failCh.getObservation()[0].astype(
            float
        )  # was integer, and numpy complained about subtracting float from it

        for sample in failCh:
            initial_qcd -= sample.getExpectation(nominal=True)

        if np.any(initial_qcd < 0.):
            raise ValueError("initial_qcd negative for some bins..",
                             initial_qcd)

        sigmascale = 10  # to scale the deviation from initial
        scaledparams = initial_qcd * (
            1 + sigmascale / np.maximum(1., np.sqrt(initial_qcd)))**qcdparams
        fail_qcd = rl.ParametericSample('mjjbin%dfail_qcd' % mjjbin,
                                        rl.Sample.BACKGROUND, msd,
                                        scaledparams)
        failCh.addSample(fail_qcd)
        pass_qcd = rl.TransferFactorSample('mjjbin%dpass_qcd' % mjjbin,
                                           rl.Sample.BACKGROUND,
                                           tf_params[0, :], fail_qcd)
        passCh.addSample(pass_qcd)

        tqqpass = passCh['ttbar']
        tqqfail = failCh['ttbar']
        tqqPF = tqqpass.getExpectation(
            nominal=True).sum() / tqqfail.getExpectation(nominal=True).sum()
        tqqpass.setParamEffect(tqqeffSF, 1 * tqqeffSF)
        tqqfail.setParamEffect(tqqeffSF, (1 - tqqeffSF) * tqqPF + 1)
        tqqpass.setParamEffect(tqqnormSF, 1 * tqqnormSF)
        tqqfail.setParamEffect(tqqnormSF, 1 * tqqnormSF)

    # Fill in muon CR
    templates = {}
    samps = ['ttbar', 'QCD', 'singlet', 'Zjets', 'Wjets', 'VV']
    for region in ['pass', 'fail']:
        ch = rl.Channel("muonCR%s" % (region, ))
        model.addChannel(ch)

        isPass = region == 'pass'

        for sName in samps:
            templates[sName] = get_template_muonCR(sName, isPass, obs=msd)

            stype = rl.Sample.BACKGROUND
            sample = rl.TemplateSample(ch.name + '_' + sName, stype,
                                       templates[sName])

            ch.addSample(sample)

        data_obs = get_template_muonCR("muondata", isPass, obs=msd)
        ch.setObservation(data_obs, read_sumw2=True)

    tqqpass = model['muonCRpass_ttbar']
    tqqfail = model['muonCRfail_ttbar']
    tqqPF = tqqpass.getExpectation(
        nominal=True).sum() / tqqfail.getExpectation(nominal=True).sum()
    tqqpass.setParamEffect(tqqeffSF, 1 * tqqeffSF)
    tqqfail.setParamEffect(tqqeffSF, (1 - tqqeffSF) * tqqPF + 1)
    tqqpass.setParamEffect(tqqnormSF, 1 * tqqnormSF)
    tqqfail.setParamEffect(tqqnormSF, 1 * tqqnormSF)

    with open(os.path.join(str(tmpdir), 'testModel.pkl'), "wb") as fout:
        pickle.dump(model, fout)

    model.renderCombine(os.path.join(str(tmpdir), 'testModel'))
示例#10
0
def jet_mass_producer(configs=None):
    """
    configs: configuration dict including:
    ModelName,gridHistFileName,channels,histLocation
      -> channels: dict with dict for each channels:
        -> includes histDir,samples,NormUnc,signal,regions,QcdEstimation
    """
    rebin_msd = True
    # min_msd, max_msd = (50,210)
    # binwidth = 16
    # nbins = int(np.floor((max_msd-min_msd)/binwidth))
    # msd_bins = np.linspace(min_msd, nbins*binwidth+min_msd, nbins+1)
    min_msd, max_msd = (50, 190)
    binwidth = 4
    nbins = int(np.floor((max_msd - min_msd) / binwidth))
    print(nbins)
    msd_bins = np.linspace(min_msd, nbins * binwidth + min_msd, nbins + 1)
    print(msd_bins)

    #channels for combined fit
    channels = configs['channels']
    qcd_estimation_channels = {
        k: v
        for k, v in channels.items()
        if "QcdEstimation" in v and v["QcdEstimation"] == "True"
    }

    print('channels:', channels.keys())

    #getting path of dir with root file from config
    hist_file = ROOT.TFile(configs['histLocation'])

    do_qcd_estimation = len(qcd_estimation_channels) > 0

    #specify if QCD estimation (using Bernstein-polynomial as TF) should be used
    ################
    #QCD Estimation#
    ################
    # derive pt bins from channel names for the pt,rho grid for the Bernstein-Polynomial
    if (do_qcd_estimation):
        # qcd_eff = get_qcd_efficiency(configs['histLocation'], w_channels)
        qcd_model = rl.Model('qcd_helper')
        qcd_pass, qcd_fail = 0., 0.
        for channel_name, config in qcd_estimation_channels.items():
            fail_ch = rl.Channel(channel_name + 'fail')
            pass_ch = rl.Channel(channel_name + 'pass')
            qcd_model.addChannel(fail_ch)
            qcd_model.addChannel(pass_ch)
            fail_hist = hist_file.Get('W_QCD__mjet_' + config['pt_bin'] +
                                      '_fail')
            pass_hist = hist_file.Get('W_QCD__mjet_' + config['pt_bin'] +
                                      '_pass')
            if (rebin_msd > 0):
                fail_hist = fail_hist.Rebin(len(msd_bins) - 1, 'msd', msd_bins)
                pass_hist = pass_hist.Rebin(len(msd_bins) - 1, 'msd', msd_bins)
            fail_ch.setObservation(fail_hist)
            pass_ch.setObservation(pass_hist)
            qcd_fail += fail_ch.getObservation().sum()
            qcd_pass += pass_ch.getObservation().sum()
        qcd_eff = qcd_pass / qcd_fail

        #get all lower edges from channel names
        pt_edges = [
            float(channel.split('Pt')[-1])
            for channel in qcd_estimation_channels
        ]
        #get last upper edge from name of last channel
        pt_edges.append(
            float(channels[
                list(qcd_estimation_channels.keys())[-1].split('Pt')[0] +
                'Pt%i' % pt_edges[-1]]['pt_bin'].split('to')[-1]))
        pt_bins = np.array(pt_edges)
        # pt_bins = np.array([500, 550, 600, 675, 800, 1200])
        n_pt = len(pt_bins) - 1
        msd = rl.Observable('msd', msd_bins)

        # here we derive these all at once with 2D array
        ptpts, msdpts = np.meshgrid(pt_bins[:-1] + 0.3 * np.diff(pt_bins),
                                    msd_bins[:-1] + 0.5 * np.diff(msd_bins),
                                    indexing='ij')
        rhopts = 2 * np.log(msdpts / ptpts)
        ptscaled = (ptpts - 500.) / (1200. - 500.)
        rhoscaled = (rhopts - (-6)) / ((-2.1) - (-6))
        validbins = (rhoscaled >= 0) & (rhoscaled <= 1)
        rhoscaled[~validbins] = 1  # we will mask these out later

    #get name from config, or fall back to default
    if ('ModelName' in configs):
        model_name = configs['ModelName']
    else:
        model_name = 'Jet_Mass_Model'

    #Reading categories of consituent-variations for nuisance paramters from gridHist
    grid_hist_file_name = configs['gridHistFileName']

    print('reading grid for nuisance parameter:')
    grid_hist_file = ROOT.TFile(grid_hist_file_name, 'READ')
    grid_hist = grid_hist_file.Get('grid')
    grid_axes = dict(item.strip().split("=")
                     for item in grid_hist.GetTitle().split(","))
    x_bins = range(grid_hist.GetNbinsX())
    y_bins = range(grid_hist.GetNbinsY())

    categories_hist = grid_hist_file.Get('categories')
    particle_categories = []
    for i in range(1, categories_hist.GetNbinsX() + 1):
        particle_categories.append(categories_hist.GetXaxis().GetBinLabel(i))

    grid_hist_file.Close()
    print('used variation categories:', particle_categories)
    print('X: %s , %i bins' % (grid_axes['x'], len(x_bins)))
    print('Y: %s , %i bins' % (grid_axes['y'], len(y_bins)))

    #setting up rhalphalib roofit model
    model = rl.Model(model_name)

    #setting up nuisances correspondig to consituent-variation according to categories from grid
    grid_nuisances = []
    print('adding nuisance paramters:')
    for category in particle_categories:
        for x_bin in x_bins:
            for y_bin in y_bins:
                print(
                    'massScale_%s%i_%s%i_%s' %
                    (grid_axes['x'], x_bin, grid_axes['y'], y_bin, category),
                    'shape')
                grid_nuisances.append([
                    rl.NuisanceParameter(
                        'massScale_%s%i_%s%i_%s' %
                        (grid_axes['x'], x_bin, grid_axes['y'], y_bin,
                         category), 'shape'), x_bin, y_bin, category
                ])

    #setting up nuisances for systematic uncertainties
    print('CMS_lumi', 'lnN')
    lumi = rl.NuisanceParameter('CMS_lumi', 'lnN')
    lumi_effect = 1.027

    norm_nuisances = {}
    for channel_name in channels.keys():
        for i, sample in enumerate(channels[channel_name]['samples']):
            norm_uncertainties = channels[channel_name]['NormUnc']
            for name, norm_unc in norm_uncertainties.items():
                nuisance_par = [
                    rl.NuisanceParameter(name + '_normUnc', 'lnN'), norm_unc
                ]
                for k, v in norm_nuisances.items():
                    if name in v[0].name:
                        nuisance_par = v
                if norm_unc > 0 and name in sample and sample not in norm_nuisances:
                    norm_nuisances.update({sample: nuisance_par})

    for channel_name, config in channels.items():
        print('setting up channel:', channel_name)

        #using hists with /variable/ in their name (default: Mass, if defined get from config)
        variable = 'mjet' if 'variable' not in config else config['variable']
        #getting list of samples from config
        samples = config['samples']
        #for WMass fit there are multiple regions per sample
        regions = [''] if 'regions' not in config else config['regions']

        print('getting template of variable:', variable)
        print('samples:', samples)
        print('regions:', regions)

        for region in regions:
            region_suffix = '_' + region if len(region) > 0 else ''
            hist_dir = config[
                'selection'] + '_%s__' + variable + '_%s' + config[
                    'pt_bin'] + region_suffix
            print('hist_dir:', hist_dir)
            #setting up channel for fit (name must be unique and can't include any '_')
            region_name = channel_name + region
            ch = rl.Channel(region_name)
            model.addChannel(ch)
            print('rl.Channel:', ch)

            for sample_name in samples:
                #do not include QCD template here, but rather use qcd estimation below
                if (('QcdEstimation' in config
                     and config['QcdEstimation'] == 'True')
                        and 'qcd' in sample_name.lower()):
                    continue

                #specify if sample is signal or background type
                sample_type = rl.Sample.SIGNAL if sample_name in config[
                    'signal'] else rl.Sample.BACKGROUND
                sample_hist = hist_file.Get(hist_dir % (sample_name, ""))
                sample_hist.SetName('msd')

                #rebin hist
                if (rebin_msd > 0):
                    sample_hist = sample_hist.Rebin(
                        len(msd_bins) - 1, 'msd', msd_bins)

                #setup actual rhalphalib sample
                sample = rl.TemplateSample(ch.name + '_' + sample_name,
                                           sample_type, sample_hist)

                #setting effects of constituent variation nuisances (up/down)
                for grid_nuisance, x, y, category in grid_nuisances:
                    hist_up = hist_file.Get(hist_dir %
                                            (sample_name, str(x) + '_' +
                                             str(y) + '_' + category + '_') +
                                            '__up')
                    hist_down = hist_file.Get(hist_dir %
                                              (sample_name, str(x) + '_' +
                                               str(y) + '_' + category + '_') +
                                              '__down')

                    #rebin hists
                    if (rebin_msd > 0):
                        hist_up = hist_up.Rebin(
                            len(msd_bins) - 1, 'msd', msd_bins)
                        hist_down = hist_down.Rebin(
                            len(msd_bins) - 1, 'msd', msd_bins)

                    sample.setParamEffect(grid_nuisance, hist_up, hist_down)
                sample.setParamEffect(lumi, lumi_effect)
                if sample_name in norm_nuisances.keys():
                    sample.setParamEffect(norm_nuisances[sample_name][0],
                                          norm_nuisances[sample_name][1])

                ch.addSample(sample)

            if 'Pseudo' in configs:
                data_hist = build_pseudo(samples, hist_file, hist_dir,
                                         configs['Pseudo'])
            else:
                data_hist = hist_file.Get(hist_dir % ("Data", ""))

            if (rebin_msd > 0):
                data_hist = data_hist.Rebin(len(msd_bins) - 1, 'msd', msd_bins)
            data_hist.SetName('msd')
            ch.setObservation(data_hist)
            if ('QcdEstimation' in config
                    and config['QcdEstimation'] == 'True'):
                mask = validbins[np.where(
                    pt_bins == float(channel_name.split('Pt')[-1]))[0][0]]
                dropped_events = np.sum(
                    ch.getObservation().astype(float)[~mask])
                percentage = dropped_events / np.sum(
                    ch.getObservation().astype(float))
                print(
                    'dropping due to mask: %.2f events (out of %.2f -> %.2f%%)'
                    % (dropped_events, np.sum(
                        ch.getObservation().astype(float)), percentage * 100))
                ch.mask = mask

    if (do_qcd_estimation):
        #QCD TF
        tf_params = rl.BernsteinPoly('tf_params', (2, 2), ['pt', 'rho'],
                                     limits=(-10, 10))
        print(
            'Using QCD efficiency (N2-ddt) of %.2f%% to scale initial QCD in pass region'
            % (qcd_eff * 100))
        tf_params = qcd_eff * tf_params(ptscaled, rhoscaled)

        for channel_name, config in channels.items():
            if ('QcdEstimation' not in config
                    or config['QcdEstimation'] == "False"):
                continue
            print(channel_name, 'qcd estimation')
            fail_ch = model[channel_name + 'fail']
            pass_ch = model[channel_name + 'pass']
            ptbin = np.where(
                pt_bins == float(channel_name.split('Pt')[-1]))[0][0]
            qcd_params = np.array([
                rl.IndependentParameter(
                    'qcdparam_ptbin%i_msdbin%i' % (ptbin, i), 0)
                for i in range(msd.nbins)
            ])
            initial_qcd = fail_ch.getObservation().astype(float)
            for sample in fail_ch:
                initial_qcd -= sample.getExpectation(nominal=True)
            if np.any(initial_qcd < 0.):
                raise ValueError(
                    'inital qcd (fail qcd from data - mc) negative at least one bin'
                )
            sigmascale = 10.
            scaledparams = initial_qcd * (
                1 +
                sigmascale / np.maximum(1., np.sqrt(initial_qcd)))**qcd_params
            fail_qcd = rl.ParametericSample('%sfail_qcd' % channel_name,
                                            rl.Sample.BACKGROUND, msd,
                                            scaledparams)
            fail_ch.addSample(fail_qcd)
            pass_qcd = rl.TransferFactorSample('%spass_qcd' % channel_name,
                                               rl.Sample.BACKGROUND,
                                               tf_params[ptbin, :], fail_qcd)
            pass_ch.addSample(pass_qcd)

    model.renderCombine(model_name)