def model(year, recoil, category): def template(dictionary, process, systematic, region): histogram = dictionary[region].integrate("process", process) nominal = histogram.integrate("systematic", "nominal").values()[()][ recoil, :, category_map[category] ] output = nominal if "nominal" not in systematic and "data" not in systematic: # print('Normalizing',systematic,'histogram of',process,'in region',region) output = np.nan_to_num( histogram.integrate("systematic", systematic).values()[()][ recoil, :, category_map[category] ] / nominal.sum() ) if "data" not in systematic: # print('Removing zeros from',systematic,'histogram of',process,'in region',region) output[output <= 0] = 1e-7 binning = ( dictionary[region] .integrate("process", process) .integrate("systematic", systematic) .axis("fjmass") .edges() ) return (output, binning, "fjmass") model_id = year + category + "recoil" + str(recoil) print(model_id) model = rl.Model("darkhiggs" + model_id) data_hists = hists["data"] bkg_hists = hists["bkg"] signal_hists = hists["sig"] ### # Preparing histograms for fit ## data = {} for r in data_hists["template"].identifiers("region"): data[str(r)] = data_hists["template"].integrate("region", r).sum("gentype") background = {} for r in bkg_hists["template"].identifiers("region"): background[str(r)] = bkg_hists["template"].integrate("region", r).sum("gentype") signal = {} for r in bkg_hists["template"].identifiers("region"): signal[str(r)] = signal_hists["template"].integrate("region", r).sum("gentype") ### # R0: Signal region ### ch_name = "sr" + model_id sr = rl.Channel(ch_name) model.addChannel(sr) ### # Add data distribution to the channel ### sr.setObservation(template(data, "MET", "data", "sr")) ### # Z(->nunu)+jets data-driven model ### sr_zjetsTemplate = template(background, "Z+jets", "nominal", "sr") sr_zjetsObservable = rl.Observable("fjmass", sr_zjetsTemplate[1]) if category == "pass": sr_zjets = rl.ParametericSample( ch_name + "_zjets", rl.Sample.BACKGROUND, sr_zjetsObservable, sr_zjetsBinYields * tf_params, ) else: sr_zjets = rl.ParametericSample( ch_name + "_zjets", rl.Sample.BACKGROUND, sr_zjetsObservable, sr_zjetsBinYields * 1.0, ) sr.addSample(sr_zjets) for s in signal["sr"].identifiers("process"): # print(str(s)) if "Mhs_50" not in str(s): continue sr_signalTemplate = template(signal, s, "nominal", "sr") sr_signal = rl.TemplateSample( ch_name + "_" + str(s), rl.Sample.SIGNAL, sr_signalTemplate ) sr_signal.setParamEffect(lumi, 1.027) sr_signal.setParamEffect(trig_met, 1.01) sr_signal.setParamEffect(veto_tau, 1.03) sr_signal.setParamEffect(jec, 1.05) btagUp = template(signal, s, "btagUp", "sr")[0] btagDown = template(signal, s, "btagDown", "sr")[0] sr_signal.setParamEffect(btag, btagUp, btagDown) sr.addSample(sr_signal) ### # W(->lnu)+jets data-driven model ### # Adding W-Z link sr_wjets = rl.TransferFactorSample( ch_name + "_wjets", rl.Sample.BACKGROUND, sr_wjetsTransferFactor, sr_zjets ) sr.addSample(sr_wjets) ### # top-antitop data-driven model ### sr_ttTemplate = template(background, "TT", "nominal", "sr") sr_ttObservable = rl.Observable("fjmass", sr_ttTemplate[1]) sr_tt = rl.ParametericSample( ch_name + "_tt", rl.Sample.BACKGROUND, sr_ttObservable, sr_ttBinYields ) sr.addSample(sr_tt) ### # R1: Single muon W control region ### ch_name = "wmcr" + model_id wmcr = rl.Channel(ch_name) model.addChannel(wmcr) ### # Add data distribution to the channel ### wmcr.setObservation(template(data, "MET", "data", "wmcr")) ### # W(->lnu)+jets data-driven model ### wmcr_wjets = rl.TransferFactorSample( ch_name + "_wjets", rl.Sample.BACKGROUND, wmcr_wjetsTransferFactor, sr_wjets ) wmcr.addSample(wmcr_wjets) ### # top-antitop data-driven model ### wmcr_tt = rl.TransferFactorSample( ch_name + "_tt", rl.Sample.BACKGROUND, wmcr_ttTransferFactor, sr_tt ) wmcr.addSample(wmcr_tt) ### # R2: Single muon top control region ### ch_name = "tmcr" + model_id tmcr = rl.Channel(ch_name) model.addChannel(tmcr) ### # Add data distribution to the channel ### tmcr.setObservation(template(data, "MET", "data", "tmcr")) ### # W(->lnu)+jets data-driven model ### tmcr_wjets = rl.TransferFactorSample( ch_name + "_wjets", rl.Sample.BACKGROUND, tmcr_wjetsTransferFactor, sr_wjets ) tmcr.addSample(tmcr_wjets) ### # top-antitop data-driven model ### tmcr_tt = rl.TransferFactorSample( ch_name + "_tt", rl.Sample.BACKGROUND, tmcr_ttTransferFactor, sr_tt ) tmcr.addSample(tmcr_tt) ### # R3: Double muon control region ### ch_name = "zmcr" + model_id zmcr = rl.Channel(ch_name) model.addChannel(zmcr) ### # Add data distribution to the channel ### zmcr.setObservation(template(data, "MET", "data", "zmcr")) zmcr_dyjets = rl.TransferFactorSample( ch_name + "_dyjets", rl.Sample.BACKGROUND, zmcr_dyjetsTransferFactor, sr_zjets ) zmcr.addSample(zmcr_dyjets) ### # R4: Single electron W control region ### ch_name = "wecr" + model_id wecr = rl.Channel(ch_name) model.addChannel(wecr) ### # Add data distribution to the channel ### if year == "2018": wecr.setObservation(template(data, "EGamma", "data", "wecr")) else: wecr.setObservation(template(data, "SingleElectron", "data", "wecr")) ### # W(->lnu)+jets data-driven model ### wecr_wjets = rl.TransferFactorSample( ch_name + "_wjets", rl.Sample.BACKGROUND, wecr_wjetsTransferFactor, sr_wjets ) wecr.addSample(wecr_wjets) ### # top-antitop data-driven model ### wecr_tt = rl.TransferFactorSample( ch_name + "_tt", rl.Sample.BACKGROUND, wecr_ttTransferFactor, sr_tt ) wecr.addSample(wecr_tt) ### # R5: Single electron top control region ### ch_name = "tecr" + model_id tecr = rl.Channel(ch_name) model.addChannel(tecr) ### # Add data distribution to the channel ### if year == "2018": tecr.setObservation(template(data, "EGamma", "data", "tecr")) else: tecr.setObservation(template(data, "SingleElectron", "data", "tecr")) ### # W(->lnu)+jets data-driven model ### tecr_wjets = rl.TransferFactorSample( ch_name + "_wjets", rl.Sample.BACKGROUND, tecr_wjetsTransferFactor, sr_wjets ) tecr.addSample(tecr_wjets) ### # top-antitop data-driven model ### tecr_tt = rl.TransferFactorSample( ch_name + "_tt", rl.Sample.BACKGROUND, tecr_ttTransferFactor, sr_tt ) tecr.addSample(tecr_tt) ### # R6: Double electron control region ### ch_name = "zecr" + model_id zecr = rl.Channel(ch_name) model.addChannel(zecr) ### # Add data distribution to the channel ### if year == "2018": zecr.setObservation(template(data, "EGamma", "data", "zecr")) else: zecr.setObservation(template(data, "SingleElectron", "data", "zecr")) zecr_dyjets = rl.TransferFactorSample( ch_name + "_dyjets", rl.Sample.BACKGROUND, zecr_dyjetsTransferFactor, sr_zjets ) zecr.addSample(zecr_dyjets) ### # R7: Single photon control region ### ch_name = "gcr" + model_id gcr = rl.Channel(ch_name) model.addChannel(gcr) ### # Add data distribution to the channel ### if year == "2018": gcr.setObservation(template(data, "EGamma", "data", "gcr")) else: gcr.setObservation(template(data, "SinglePhoton", "data", "gcr")) gcr_gjets = rl.TransferFactorSample( ch_name + "_gjets", rl.Sample.BACKGROUND, gcr_gjetsTransferFactor, sr_zjets ) gcr.addSample(gcr_gjets) ### We actually need QCD here gcr_qcdTemplate = template(background, "QCD", "nominal", "gcr") gcr_qcd = rl.TemplateSample( ch_name + "_qcdMC", rl.Sample.BACKGROUND, gcr_qcdTemplate ) gcr_qcd.setParamEffect(lumi, 1.027) gcr_qcd.setParamEffect(trig_pho, 1.01) gcr_qcd.setParamEffect(veto_tau, 1.03) gcr_qcd.setParamEffect(qcdpho_norm, 2.0) gcr_qcd.setParamEffect(jec, 1.05) gcr_qcd.setParamEffect(id_pho, 1.02) gcr.addSample(gcr_qcd) # Done, return model return model
def darkhiggs_model(tmpdir,mass,category,year): model = rl.Model('darkhiggs_'+mass+'_'+category) binning_map = { 'mass0': { 'monohs' : [250.0, 280.0, 310.0, 340.0, 370.0, 400.0, 430.0, 550.0, 640.0, 740.0, 1250.0], 'monojet' : [250.0, 280.0, 310.0, 340.0, 370.0, 400.0, 430.0, 470.0, 510.0, 550.0, 590.0, 640.0, 1250.0] }, 'mass1': { 'monohs' : [250.0, 280.0, 310.0, 340.0, 370.0, 400.0, 1250.0], 'monojet' : [250.0, 280.0, 310.0, 340.0, 370.0, 400.0, 430.0, 470.0, 510.0, 1250.0] }, 'mass2': { 'monohs' : [250.0, 280.0, 310.0, 340.0, 430.0, 1250.0], 'monojet' : [250.0, 280.0, 310.0, 340.0, 370.0, 400.0, 430.0, 470.0, 510.0, 1250.0] }, 'mass3': { 'monohs' : [250.0, 280.0, 310.0, 340.0, 400.0, 430.0, 470.0, 1250.0], 'monojet' : [250.0, 280.0, 310.0, 340.0, 370.0, 400.0, 430.0, 470.0, 510.0, 640.0, 1250.0] }, 'mass4': { 'monohs' : [250.0, 280.0, 310.0, 340.0, 370.0, 400.0, 430.0, 470.0, 510.0, 550.0, 1250.0], 'monojet' : [250.0, 280.0, 310.0, 340.0, 370.0, 400.0, 430.0, 470.0, 510.0, 550.0, 590.0, 640.0, 740.0, 900.0, 1250.0] } } ### #Extract histograms from input file ### hists = load('hists/darkhiggs'+year+'.scaled') ### # Regrouping histograms ### process = hist.Cat("process", "Process", sorting='placement') cats = ("process",) process_map = OrderedDict() #process_map["Hbb_merged"] = ("Hbb_merged*",) #process_map["Hbb_unmerged"] = ("Hbb_unmerged*",) process_map["Hbb"] = ("Hbb*",) process_map["DY"] = ("DY*",) #process_map["VVbb"] = ("VVbb*",) #process_map["VV"] = ("VV",) process_map["VV"] = ("VV*",) #process_map["ST_merged"] = ("ST_merged*",) #process_map["ST_unmerged"] = ("ST_unmerged*",) process_map["ST"] = ("ST*",) #process_map["TT_merged"] = ("TT_merged*",) #process_map["TT_unmerged"] = ("TT_unmerged*",) process_map["TT"] = ("TT*",) process_map["WJets"] = ("WJets*",) process_map["ZJets"] = ("ZJets*",) process_map["GJets"] = ("GJets*",) process_map["MET"] = ("MET*",) process_map["SingleElectron"] = ("SingleElectron*",) process_map["SinglePhoton"] = ("SinglePhoton*",) for key in hists.keys(): hists[key] = hists[key].group(cats, process, process_map) ### # Preparing histograms for fit ## recoil = {} for r in hists['recoil'].identifiers('region'): #if category not in str(r) or mass not in str(r): continue if mass not in str(r): continue #print(r,category,mass) #print('Before rebin',hists['recoil'].integrate('region',r).values(overflow='all')) recoil[str(r).split("_")[0]]=hists['recoil'].integrate('region',r).rebin('recoil',hist.Bin('recoil','Hadronic recoil',binning_map[mass][category])) #print('After rebin',recoil[str(r).split("_")[0]].values(overflow='all')) ### ### # Setting up rate systematics ### ### ### # Luminosity ### lumi = rl.NuisanceParameter('lumi', 'lnN') ### # MET bin migration ### #met = rl.NuisanceParameter('met', 'lnN') ### # Cross section of MC-driven processes ### QCDe_Norm = rl.NuisanceParameter('QCDe_Norm', 'lnN') QCDmu_Norm = rl.NuisanceParameter('QCDmu_Norm', 'lnN') QCDsig_Norm = rl.NuisanceParameter('QCDsig_Norm', 'lnN') stop_Norm = rl.NuisanceParameter('stop_Norm', 'lnN') VV_Norm = rl.NuisanceParameter('VV_Norm', 'lnN') Hbb_Norm = rl.NuisanceParameter('Hbb_Norm', 'lnN') dy_Norm = rl.NuisanceParameter('dy_Norm', 'lnN') #only in signal region ### # Lepton/photon ID uncertainties ### id_e = rl.NuisanceParameter('id_e', 'lnN') id_mu = rl.NuisanceParameter('id_mu', 'lnN') id_pho = rl.NuisanceParameter('id_pho', 'lnN') ### # Electron reco ### reco_e = rl.NuisanceParameter('reco_e', 'lnN') ### # Muon isolation ### iso_m = rl.NuisanceParameter('reco_e', 'lnN') ### # Trigger efficiency ### trig_e = rl.NuisanceParameter('trig_e', 'lnN') trig_met = rl.NuisanceParameter('trig_met', 'lnN') ### # DeepAk15 signal scale factor and mistag rate for MC-driven processes ### #sf_deepAK15 = rl.NuisanceParameter('sf_deepAK15', 'lnN') #mistag_deepAK15 = rl.NuisanceParameter('mistag_deepAK15', 'lnN') ### # Tau veto ### veto_tau = rl.NuisanceParameter('veto_tau', 'lnN') ### # AK b-tagging of iso jet 0-tag efficiencies ### 0tag_eff = { 'whf': 0.86, 'wlf': 0.90, 'zhf': 0.80, 'zlf': 0.90, 'ttbqq': 1., 'ttqq': 1., 'ttother': 1., 'stbqq': 1., 'stqq':1., 'stother': 1., 'vvbb': 1, 'vvqq': 1, 'vvother': 1, 'hbb': 1, 'hother': 1 } ### # Defining W/Z/gamma+jets heavy flavor fractions and their corrective k-factors ### whf_fraction = 0.18 zhf_fraction = 0.09 ghf_fraction = 0.12 whf_k = rl.IndependentParameter('whf_k', 1., 0, 1/whf_fraction) zhf_k = rl.IndependentParameter('zhf_k', 1., 0, 1/zhf_fraction) ghf_k = rl.IndependentParameter('ghf_k', 1., 0, 1/ghf_fraction) ### # Taking into account the varying HF fraction to adjust the overall efficiency of ak4 btagging of iso jets ### whf_0tag_eff = 0.86 wlf_0tag_eff = 0.90 wj_0tag_eff = wlf_0tag_eff*(1 - whf_fraction) + whf_0tag_eff*whf_fraction wj_0tag_sfxeff = wlf_0tag_eff*(1 - whf_k*whf_fraction) + whf_0tag_eff*whf_k*whf_fraction wjets_0tag_weight = wj_0tag_sfxeff / wj_0tag_eff wjets_1tag_weight = (1 - wj_0tag_sfxeff) / (1 - wj_0tag_eff) zhf_0tag_eff = 0.80 zlf_0tag_eff = 0.90 zj_0tag_eff = zlf_0tag_eff*(1 - zhf_fraction) + zhf_0tag_eff*zhf_fraction zj_0tag_sfxeff = zlf_0tag_eff*(1 - zhf_k*zhf_fraction) + zhf_0tag_eff*zhf_k*zhf_fraction zjets_0tag_weight = zj_0tag_sfxeff / zj_0tag_eff ### # Setting tagger efficiency and scale factor for in-situ calculation ### whf_deepak15_eff = 0.1 wlf_deepak15_eff = 0.04 whf_deepak15_sf = rl.IndependentParameter('whf_deepak15_sf', 1., 0, 1/whf_deepak15_eff) wlf_deepak15_sf = rl.IndependentParameter('wlf_deepak15_sf', 1., 0, 1/wlf_deepak15_eff) wj_deepak15_sfxeff = wlf_deepak15_sf*wlf_deepak15_eff*(1-whf_k*whf_fraction) + whf_deepak15_sf*whf_deepak15_eff*whf_k*whf_fraction wj_deepak15_eff = wlf_deepak15_eff*(1-whf_fraction) + whf_deepak15_eff*whf_fraction wjets_deepak15_weight = (1 - wj_deepak15_sfxeff)/(1 - wj_deepak15_eff) if 'monohs' in category: wjets_deepak15_weight = wj_deepak15_sfxeff/wj_deepak15_eff zhf_deepak15_eff = 0.04 zlf_deepak15_eff = 0.05 zhf_deepak15_sf = rl.IndependentParameter('zhf_deepak15_sf', 1., 0, 1/zhf_deepak15_eff) zlf_deepak15_sf = rl.IndependentParameter('zlf_deepak15_sf', 1., 0, 1/zlf_deepak15_eff) zj_deepak15_sfxeff = zlf_deepak15_sf*zlf_deepak15_eff*(1-zhf_k*zhf_fraction) + zhf_deepak15_sf*zhf_deepak15_eff*zhf_k*zhf_fraction zj_deepak15_eff = zlf_deepak15_eff*(1-zhf_fraction) + zhf_deepak15_eff*zhf_fraction zjets_deepak15_weight = (1 - zj_deepak15_sfxeff)/(1 - zj_deepak15_eff) if 'monohs' in category: zjets_deepak15_weight = zj_deepak15_sfxeff/zj_deepak15_eff ghf_deepak15_eff = 0.03 glf_deepak15_eff = 0.005 ghf_deepak15_sf = rl.IndependentParameter('ghf_deepak15_sf', 1., 0, 1/ghf_deepak15_eff) glf_deepak15_sf = rl.IndependentParameter('glf_deepak15_sf', 1., 0, 1/glf_deepak15_eff) gj_deepak15_sfxeff = glf_deepak15_sf*glf_deepak15_eff*(1-ghf_k*ghf_fraction) + ghf_deepak15_sf*ghf_deepak15_eff*ghf_k*ghf_fraction gj_deepak15_eff = glf_deepak15_eff*(1-ghf_fraction) + ghf_deepak15_eff*ghf_fraction gjets_deepak15_weight = (1 - gj_deepak15_sfxeff)/(1 - gj_deepak15_eff) if 'monohs' in category: gjets_deepak15_weight = gj_deepak15_sfxeff/gj_deepak15_eff bqq_eff = 0.6 qq_eff = 0.3 bb_eff = 0.9 other_eff = 0.3 bqq_sf = rl.IndependentParameter('bqq_sf', 1., 0, 1/bqq_eff) qq_sf = rl.IndependentParameter('qq_sf', 1., 0, 1/qq_eff) bb_sf = rl.IndependentParameter('qq_sf', 1., 0, 1/bb_eff) other_sf = rl.IndependentParameter('other_sf', 1., 0, 1/other_eff) tt_bqq_fraction = { '0tag': { 'mass0': 0.04, 'mass1': 0.06, 'mass2': 0.11, 'mass3': 0.19, 'mass4': 0.6 }, '1tag': { 'mass0': 0.014, 'mass1': 0.04, 'mass2': 0.1, 'mass3': 0.13, 'mass4': 0.54 } } tt_qq_fraction = { '0tag': { 'mass0': 0.04, 'mass1': 0.06, 'mass2': 0.11, 'mass3': 0.19, 'mass4': 0.6 }, '1tag': { 'mass0': 0.014, 'mass1': 0.04, 'mass2': 0.1, 'mass3': 0.13, 'mass4': 0.54 } } tt_0tag_sfxeff = bqq_sf*bqq_eff*tt_bqq_fraction['0tag'][mass] + qq_sf*qq_eff*tt_qq_fraction['0tag'][mass] + other_sf*other_eff*(1 - tt_bqq_fraction['0tag'][mass] - tt_qq_fraction['0tag'][mass]) tt_0tag_eff = bqq_eff*tt_bqq_fraction['0tag'][mass] + qq_eff*tt_qq_fraction['0tag'][mass] + other_eff*(1 - tt_bqq_fraction['0tag'][mass] - tt_qq_fraction['0tag'][mass]) tt_1tag_sfxeff = bqq_sf*bqq_eff*tt_bqq_fraction['1tag'][mass] + qq_sf*qq_eff*tt_qq_fraction['1tag'][mass] + other_sf*other_eff*(1 - tt_bqq_fraction['1tag'][mass] - tt_qq_fraction['1tag'][mass]) tt_1tag_eff = bqq_eff*tt_bqq_fraction['1tag'][mass] + qq_eff*tt_qq_fraction['1tag'][mass] + other_eff*(1 - tt_bqq_fraction['1tag'][mass] - tt_qq_fraction['1tag'][mass]) tt_0tag_weight = (1 - tt_0tag_sfxeff)/(1 - tt_0tag_eff) if 'monohs' in category: tt_0tag_weight = tt_0tag_sfxeff / tt_0tag_eff tt_1tag_weight = (1 - tt_1tag_sfxeff)/(1 - tt_1tag_eff) if 'monohs' in category: tt_1tag_weight = tt_1tag_sfxeff / tt_1tag_eff ### ### # Shape systematics ### ### ### # JEC/JER ### #jec = rl.NuisanceParameter('jec', 'shape') #jer = rl.NuisanceParameter('jer', 'shape') btag = rl.NuisanceParameter('btag', 'shape') #AK4 btag gamma_to_z_ewk = rl.NuisanceParameter('Theory_gamma_z_ewk', 'shape') ### ### # Signal region ### ### ch_name = 'sr-'+mass+'-'+category sr = rl.Channel(ch_name) model.addChannel(sr) ### # Add data distribution to the channel ### sr.setObservation(template(recoil['sr'].integrate('process', 'MET').integrate('systematic','nominal'), 'recoil')) ### # Z(->nunu)+jets data-driven model ### sr_zvvHist = recoil['sr'].integrate('process', 'ZJets').integrate('systematic','nominal') sr_zvvTemplate = template(sr_zvvHist, 'recoil') sr_zvvMC = rl.TemplateSample(ch_name+'_zvvMC', rl.Sample.BACKGROUND, sr_zvvTemplate) #sr_zvvMC.setParamEffect(jec, np.random.normal(loc=1, scale=0.01, size=len(sr_zvvHist.axis('recoil').edges(overflow='all'))-1)) sr_zvvBinYields = np.array([rl.IndependentParameter(ch_name+'_zvv_bin_%d' % i, b, 0, sr_zvvTemplate[0].max()*2) for i,b in enumerate(sr_zvvTemplate[0])]) sr_zvvBinYields = sr_zvvBinYields * zjets_deepak15_weight * zjets_0tag_weight sr_zvvObservable = rl.Observable('recoil', sr_zvvHist.axis('recoil').edges(overflow='all')) sr_zvv = rl.ParametericSample(ch_name+'_zvv', rl.Sample.BACKGROUND, sr_zvvObservable, sr_zvvBinYields) sr.addSample(sr_zvv) ### # W(->lnu)+jets data-driven model ### sr_wjetsHist = recoil['sr'].integrate('process', 'WJets').integrate('systematic','nominal') sr_wjetsTemplate = template(sr_wjetsHist, 'recoil') sr_wjetsMC = rl.TemplateSample(ch_name+'_wjetsMC', rl.Sample.BACKGROUND, sr_wjetsTemplate) #sr_wjetsMC.setParamEffect(jec, np.random.normal(loc=1, scale=0.01, size=len(sr_wjetsHist.axis('recoil').edges(overflow='all'))-1)) sr_wjetsBinYields = np.array([rl.IndependentParameter(ch_name+'_wjets_bin_%d' % i,b,0,sr_wjetsTemplate[0].max()*2) for i,b in enumerate(sr_wjetsTemplate[0])]) sr_wjetsBinYields = sr_wjetsBinYields * wjets_deepak15_weight * wjets_0tag_weight sr_wjetsObservable = rl.Observable('recoil', sr_wjetsHist.axis('recoil').edges(overflow='all')) sr_wjets = rl.ParametericSample(ch_name+'_wjets', rl.Sample.BACKGROUND, sr_wjetsObservable, sr_wjetsBinYields) sr.addSample(sr_wjets) ### # top-antitop data-driven model ### sr_ttbarHist = recoil['sr'].integrate('process', 'TT').integrate('systematic','nominal') sr_ttbarTemplate = template(sr_ttbarHist, 'recoil') sr_ttbarMC = rl.TemplateSample(ch_name+'_ttbarMC', rl.Sample.BACKGROUND, sr_ttbarTemplate) #sr_ttbarMC.setParamEffect(jec, np.random.normal(loc=1, scale=0.01, size=len(sr_ttbarHist.axis('recoil').edges(overflow='all'))-1)) # these parameters are large, should probably log-transform them sr_ttbarBinYields = np.array([rl.IndependentParameter(ch_name+'_ttbar_bin_%d' % i,b,0,sr_ttbarTemplate[0].max()*2) for i,b in enumerate(sr_ttbarTemplate[0])]) * tt_0tag_weight sr_ttbarObservable = rl.Observable('recoil', sr_ttbarHist.axis('recoil').edges(overflow='all')) sr_ttbar = rl.ParametericSample(ch_name+'_ttbar', rl.Sample.BACKGROUND, sr_ttbarObservable, sr_ttbarBinYields) sr.addSample(sr_ttbar) ### # Other MC-driven processes ### sr_singletopHist = recoil['sr'].integrate('process', 'ST').integrate('systematic','nominal') sr_singletopTemplate = template(sr_singletopHist, 'recoil') sr_singletop = rl.TemplateSample(ch_name+'_singletop', rl.Sample.BACKGROUND, sr_singletopTemplate) sr_singletop.setParamEffect(lumi, 1.027) sr_singletop.setParamEffect(stop_Norm, 1.2) sr_singletop.setParamEffect(trig_met, 1.01) sr_singletop.setParamEffect(veto_tau, 1.03) #sr_singletop.setParamEffect(met, 1.05) sr.addSample(sr_singletop) sr_dyHist = recoil['sr'].integrate('process', 'DY').integrate('systematic','nominal') sr_dyTemplate = template(sr_dyHist, 'recoil') sr_dy = rl.TemplateSample(ch_name+'_dy', rl.Sample.BACKGROUND, sr_dyTemplate) sr_dy.setParamEffect(lumi, 1.027) sr_dy.setParamEffect(dy_Norm, 1.2) sr_dy.setParamEffect(trig_met, 1.01) sr_dy.setParamEffect(veto_tau, 1.03) #sr_dy.setParamEffect(met, 1.05) sr.addSample(sr_dy) sr_dibosonHist = recoil['sr'].integrate('process', 'VV').integrate('systematic','nominal') sr_dibosonTemplate = template(sr_dibosonHist, 'recoil') sr_diboson = rl.TemplateSample(ch_name+'_diboson', rl.Sample.BACKGROUND, sr_dibosonTemplate) sr_diboson.setParamEffect(lumi, 1.027) sr_diboson.setParamEffect(VV_Norm, 1.2) sr_diboson.setParamEffect(trig_met, 1.01) sr_diboson.setParamEffect(veto_tau, 1.03) #sr_diboson.setParamEffect(met, 1.05) sr.addSample(sr_diboson) sr_higgsHist = recoil['sr'].integrate('process', 'Hbb').integrate('systematic','nominal') sr_higgsTemplate = template(sr_higgsHist, 'recoil') sr_higgs = rl.TemplateSample(ch_name+'_higgs', rl.Sample.BACKGROUND, sr_higgsTemplate) sr_higgs.setParamEffect(lumi, 1.027) sr_higgs.setParamEffect(Hbb_Norm, 1.2) sr_higgs.setParamEffect(trig_met, 1.01) sr_higgs.setParamEffect(veto_tau, 1.03) #sr_higgs.setParamEffect(met, 1.05) sr.addSample(sr_higgs) for signal in recoil['sr'].identifiers('process'): if 'Mono' not in str(signal): continue sr_dmHist = recoil['sr'].integrate('process', signal).integrate('systematic','nominal') sr_dmTemplate = template(sr_dmHist, 'recoil') sr_dm = rl.TemplateSample(ch_name+'_'+str(signal), rl.Sample.SIGNAL, sr_dmTemplate) sr_dm.setParamEffect(lumi, 1.027) sr_dm.setParamEffect(trig_met, 1.01) sr_dm.setParamEffect(veto_tau, 1.03) #sr_dm.setParamEffect(met, 1.05) sr.addSample(sr_dm) ### # End of SR ### ### ### # Single Lepton Control Regions ### ### cr={} ttbarHist = {} ttbarTemplate = {} ttbarMC = {} ttbarTransferFactor = {} ttbar = {} wjetsHist = {} wjetsTemplate = {} wjetsMC = {} wjetsTransferFactor = {} wjets = {} singletopHist = {} singletopTemplate = {} singletop = {} dyHist = {} dyTemplate = {} dyMC = {} dyTransferFactor = {} dy = {} dibosonHist = {} dibosonTemplate = {} diboson = {} higgsHist = {} higgsTemplate = {} higgs = {} for p in ['t','w']: for l in ['e','m']: ch_name = p+l+'cr-'+mass+'-'+category cr[p+l]=rl.Channel(ch_name) model.addChannel(cr[p+l]) if 'e' in l: cr[p+l].setObservation(template(recoil[p+l+'cr'].integrate('process', 'SingleElectron').integrate('systematic','nominal'), 'recoil')) else: cr[p+l].setObservation(template(recoil[p+l+'cr'].integrate('process', 'MET').integrate('systematic','nominal'), 'recoil')) ttbarHist[p+l] = recoil[p+l+'cr'].integrate('process', 'TT').integrate('systematic','nominal') ttbarTemplate[p+l] = template(ttbarHist[p+l], 'recoil') ttbarMC[p+l] = rl.TemplateSample(ch_name+'_ttbarMC', rl.Sample.BACKGROUND, ttbarTemplate[p+l]) #ttbarMC[p+l].setParamEffect(jec, np.random.normal(loc=1, scale=0.05, size=recoil.nbins)) #ttbarMC[p+l].setParamEffect(ele_id_eff, np.random.normal(loc=1, scale=0.02, size=recoil.nbins), np.random.normal(loc=1, scale=0.02, size=recoil.nbins)) ttbarTransferFactor[p+l] = ttbarMC[p+l].getExpectation() / sr_ttbarMC.getExpectation() ttbar[p+l] = rl.TransferFactorSample(ch_name+'_ttbar', rl.Sample.BACKGROUND, ttbarTransferFactor[p+l], sr_ttbar) cr[p+l].addSample(ttbar[p+l]) wjetsHist[p+l] = recoil[p+l+'cr'].integrate('process', 'WJets').integrate('systematic','nominal') wjetsTemplate[p+l] = template(wjetsHist[p+l], 'recoil') wjetsMC[p+l] = rl.TemplateSample(ch_name+'_wjetsMC', rl.Sample.BACKGROUND, wjetsTemplate[p+l]) #wjetsMC[p+l].setParamEffect(jec, np.random.normal(loc=1, scale=0.05, size=recoil.nbins)) #wjetsMC[p+l].setParamEffect(ele_id_eff, np.random.normal(loc=1, scale=0.02, size=recoil.nbins), np.random.normal(loc=1, scale=0.02, size=recoil.nbins)) wjetsTransferFactor[p+l] = wjetsMC[p+l].getExpectation() / sr_wjetsMC.getExpectation() wjets[p+l] = rl.TransferFactorSample(ch_name+'_wjets', rl.Sample.BACKGROUND, wjetsTransferFactor[p+l], sr_wjets) cr[p+l].addSample(wjets[p+l]) singletopHist[p+l] = recoil[p+l+'cr'].integrate('process', 'ST').integrate('systematic','nominal') singletopTemplate[p+l] = template(singletopHist[p+l], 'recoil') singletop[p+l] = rl.TemplateSample(ch_name+'_singletop', rl.Sample.BACKGROUND, singletopTemplate[p+l]) cr[p+l].addSample(singletop[p+l]) dyHist[p+l] = recoil[p+l+'cr'].integrate('process', 'DY').integrate('systematic','nominal') dyTemplate[p+l] = template(dyHist[p+l], 'recoil') dy[p+l] = rl.TemplateSample(ch_name+'_dy', rl.Sample.BACKGROUND, dyTemplate[p+l]) cr[p+l].addSample(dy[p+l]) dibosonHist[p+l] = recoil[p+l+'cr'].integrate('process', 'VV').integrate('systematic','nominal') dibosonTemplate[p+l] = template(dibosonHist[p+l], 'recoil') diboson[p+l] = rl.TemplateSample(ch_name+'_diboson', rl.Sample.BACKGROUND, dibosonTemplate[p+l]) cr[p+l].addSample(diboson[p+l]) higgsHist[p+l] = recoil[p+l+'cr'].integrate('process', 'Hbb').integrate('systematic','nominal') higgsTemplate[p+l] = template(higgsHist[p+l], 'recoil') higgs[p+l] = rl.TemplateSample(ch_name+'_higgs', rl.Sample.BACKGROUND, higgsTemplate[p+l]) cr[p+l].addSample(higgs[p+l]) ### # End of Single Lepton CR ### ### ### # Double Lepton Control Regions ### ### for ll in ['ze','zm']: ch_name = ll+'cr-'+mass+'-'+category cr[ll] = rl.Channel(ch_name) model.addChannel(cr[ll]) if 'e' in ll: cr[ll].setObservation(template(recoil[ll+'cr'].integrate('process', 'SingleElectron').integrate('systematic','nominal'), 'recoil')) else: cr[ll].setObservation(template(recoil[ll+'cr'].integrate('process', 'MET').integrate('systematic','nominal'), 'recoil')) dyHist[ll] = recoil[ll+'cr'].integrate('process', 'DY').integrate('systematic','nominal') dyTemplate[ll] = template(dyHist[ll], 'recoil') dyMC[ll] = rl.TemplateSample(ch_name+'_dyMC', rl.Sample.BACKGROUND, dyTemplate[ll]) #zllJetsMC.setParamEffect(jec, np.random.normal(loc=1, scale=0.05, size=recoil.nbins)) #zllJetsMC.setParamEffect(ele_id_eff, np.random.normal(loc=1, scale=0.02, size=recoil.nbins), np.random.normal(loc=1, scale=0.02, size=recoil.nbins)) dyTransferFactor[ll] = dyMC[ll].getExpectation() / sr_zvvMC.getExpectation() dy[ll] = rl.TransferFactorSample(ch_name+'_dy', rl.Sample.BACKGROUND, dyTransferFactor[ll], sr_zvv) cr[ll].addSample(dy[ll]) ttbarHist[ll] = recoil[ll+'cr'].integrate('process', 'TT').integrate('systematic','nominal') ttbarTemplate[ll] = template(ttbarHist[ll], 'recoil') ttbar[ll] = rl.TemplateSample(ch_name+'_ttbar', rl.Sample.BACKGROUND, ttbarTemplate[ll]) cr[ll].addSample(ttbar[ll]) singletopHist[ll] = recoil[ll+'cr'].integrate('process', 'ST').integrate('systematic','nominal') singletopTemplate[ll] = template(singletopHist[ll], 'recoil') singletop[ll] = rl.TemplateSample(ch_name+'_singletop', rl.Sample.BACKGROUND, singletopTemplate[ll]) cr[ll].addSample(singletop[ll]) dibosonHist[ll] = recoil[ll+'cr'].integrate('process', 'VV').integrate('systematic','nominal') dibosonTemplate[ll] = template(dibosonHist[ll], 'recoil') diboson[ll] = rl.TemplateSample(ch_name+'_diboson', rl.Sample.BACKGROUND, dibosonTemplate[ll]) cr[ll].addSample(diboson[ll]) higgsHist[ll] = recoil[ll+'cr'].integrate('process', 'Hbb').integrate('systematic','nominal') higgsTemplate[ll] = template(higgsHist[ll], 'recoil') higgs[ll] = rl.TemplateSample(ch_name+'_higgs', rl.Sample.BACKGROUND, higgsTemplate[ll]) cr[ll].addSample(higgs[ll]) ### # End of Double Lepton CR ### ### ### # Single Photon Control Region ### ### ch_name = 'gcr-'+mass+'-'+category gcr = rl.Channel(ch_name) model.addChannel(gcr) gcr.setObservation(template(recoil['gcr'].integrate('process', 'SinglePhoton').integrate('systematic','nominal'), 'recoil')) gcr_gjetsHist = recoil['gcr'].integrate('process', 'GJets').integrate('systematic','nominal') gcr_gjetsTemplate = template(gcr_gjetsHist, 'recoil') gcr_gjetsMC = rl.TemplateSample(ch_name+'_gjetsMC', rl.Sample.BACKGROUND, gcr_gjetsTemplate) #gcr_gjetsMC.setParamEffect(jec, np.random.normal(loc=1, scale=0.05, size=recoil.nbins)) #gcr_gjetsMC.setParamEffect(pho_id_eff, np.random.normal(loc=1, scale=0.02, size=recoil.nbins)) gcr_gjetsTransferFactor = gcr_gjetsMC.getExpectation() / sr_zvvMC.getExpectation() gcr_gjets = rl.TransferFactorSample(ch_name+'_gjets', rl.Sample.BACKGROUND, gcr_gjetsTransferFactor, sr_zvv) #gammaJets.setParamEffect(gamma_to_z_ewk, np.linspace(1.01, 1.05, recoil.nbins)) gcr.addSample(gcr_gjets) with open(os.path.join(str(tmpdir), 'darkhiggsModel'+year+'.pkl'), "wb") as fout: pickle.dump(model, fout) model.renderCombine(os.path.join(str(tmpdir), 'darkhiggsModel'+year+'/'+mass))
def create_datacard(inputfile, carddir, nbins, nMCTF, nDataTF, passBinName, failBinName): lumi = rl.NuisanceParameter('CMS_lumi', 'lnN') msdbins = np.linspace(50, nbins * 10.0 + 50.0, nbins + 1) msd = rl.Observable('msd', msdbins) msdpts = msdbins[:-1] + 0.5 * np.diff(msdbins) msdscaled = (msdpts - 50.) / (10.0 * nbins) # Build qcd MC pass+fail model and fit to polynomial qcdmodel = rl.Model('qcdmodel') qcdpass, qcdfail = 0., 0. failCh = rl.Channel('fail') passCh = rl.Channel('pass') qcdmodel.addChannel(failCh) qcdmodel.addChannel(passCh) # pseudodata MC template failTempl = get_hist(inputfile, 'histJet2Mass_' + failBinName + '_QCD', obs=msd) passTempl = get_hist(inputfile, 'histJet2Mass_' + passBinName + '_QCD', obs=msd) failCh.setObservation(failTempl[:-1]) passCh.setObservation(passTempl[:-1]) qcdfail = failCh.getObservation().sum() qcdpass = passCh.getObservation().sum() qcdeff = qcdpass / qcdfail tf_MCtempl = rl.BernsteinPoly("tf_MCtempl", (nMCTF, ), ['msd'], limits=(0, 10)) tf_MCtempl_params = qcdeff * tf_MCtempl(msdscaled) failCh = qcdmodel['fail'] passCh = qcdmodel['pass'] failObs = failCh.getObservation() qcdparams = np.array([ rl.IndependentParameter('qcdparam_msdbin%d' % i, 0) for i in range(msd.nbins) ]) sigmascale = 10. scaledparams = failObs * ( 1 + sigmascale / np.maximum(1., np.sqrt(failObs)))**qcdparams fail_qcd = rl.ParametericSample('fail_qcd', rl.Sample.BACKGROUND, msd, scaledparams) failCh.addSample(fail_qcd) pass_qcd = rl.TransferFactorSample('pass_qcd', rl.Sample.BACKGROUND, tf_MCtempl_params, fail_qcd) passCh.addSample(pass_qcd) qcdfit_ws = ROOT.RooWorkspace('qcdfit_ws') simpdf, obs = qcdmodel.renderRoofit(qcdfit_ws) qcdfit = simpdf.fitTo( obs, ROOT.RooFit.Extended(True), ROOT.RooFit.SumW2Error(True), ROOT.RooFit.Strategy(2), ROOT.RooFit.Save(), ROOT.RooFit.Minimizer('Minuit2', 'migrad'), ROOT.RooFit.PrintLevel(-1), ) qcdfit_ws.add(qcdfit) if "pytest" not in sys.modules: qcdfit_ws.writeToFile(os.path.join(str(carddir), 'HHModel_qcdfit.root')) if qcdfit.status() != 0: raise RuntimeError('Could not fit qcd') param_names = [p.name for p in tf_MCtempl.parameters.reshape(-1)] decoVector = rl.DecorrelatedNuisanceVector.fromRooFitResult( tf_MCtempl.name + '_deco', qcdfit, param_names) tf_MCtempl.parameters = decoVector.correlated_params.reshape( tf_MCtempl.parameters.shape) tf_MCtempl_params_final = tf_MCtempl(msdscaled) tf_dataResidual = rl.BernsteinPoly("tf_dataResidual", (nDataTF, ), ['msd'], limits=(0, 10)) tf_dataResidual_params = tf_dataResidual(msdscaled) tf_params = qcdeff * tf_MCtempl_params_final * tf_dataResidual_params # build actual fit model now model = rl.Model("HHModel") for region in ['pass', 'fail']: ch = rl.Channel(region) model.addChannel(ch) isPass = region == 'pass' templates = { 'TTJets': get_hist(inputfile, 'histJet2Mass%s_TTJets' % ('_' + passBinName if isPass else '_' + failBinName), obs=msd), 'H': get_hist(inputfile, 'histJet2Mass%s_H' % ('_' + passBinName if isPass else '_' + failBinName), obs=msd), 'HH': get_hist(inputfile, 'histJet2Mass%s_HH' % ('_' + passBinName if isPass else '_' + failBinName), obs=msd), 'VH': get_hist(inputfile, 'histJet2Mass%s_VH' % ('_' + passBinName if isPass else '_' + failBinName), obs=msd), 'ttH': get_hist(inputfile, 'histJet2Mass%s_ttH' % ('_' + passBinName if isPass else '_' + failBinName), obs=msd), 'others': get_hist(inputfile, 'histJet2Mass%s_others' % ('_' + passBinName if isPass else '_' + failBinName), obs=msd), 'QCD': get_hist(inputfile, 'histJet2Mass%s_QCD' % ('_' + passBinName if isPass else '_' + failBinName), obs=msd), 'Data': get_hist(inputfile, 'histJet2Mass%s_Data' % ('_' + passBinName if isPass else '_' + failBinName), obs=msd), } for sName in ['TTJets', 'H', 'HH', 'VH', 'ttH', 'others']: # get templates templ = templates[sName] stype = rl.Sample.SIGNAL if sName == 'HH' else rl.Sample.BACKGROUND sample = rl.TemplateSample(ch.name + '_' + sName, stype, templ) # set nuisance values sample.setParamEffect(lumi, 1.027) # set mc stat uncs sample.autoMCStats() #shape systematics valuesNominal = templ[0] systs = ['JMS', 'JMR', 'BDTMassShape', 'ttJetsCorr'] for syst in systs: valuesUp = get_hist(inputfile, 'histJet2Mass%s_%s_%sUp' % ('_' + passBinName if isPass else '_' + failBinName, sName, syst), obs=msd)[0] valuesDown = get_hist(inputfile, 'histJet2Mass%s_%s_%sDown' % ('_' + passBinName if isPass else '_' + failBinName, sName, syst), obs=msd)[0] effectUp = np.ones_like(valuesNominal) effectDown = np.ones_like(valuesNominal) for i in range(len(valuesNominal)): if valuesNominal[i] > 0.: effectUp[i] = valuesUp[i] / valuesNominal[i] effectDown[i] = valuesDown[i] / valuesNominal[i] syst_param = rl.NuisanceParameter(syst, 'shape') sample.setParamEffect(syst_param, effectUp, effectDown) ch.addSample(sample) # make up a data_obs by summing the MC templates above #yields = sum(tpl[0] for tpl in templates.values()) yields = templates['Data'][0] data_obs = (yields, msd.binning, msd.name) ch.setObservation(data_obs) failCh = model['fail'] passCh = model['pass'] qcdparams = np.array([ rl.IndependentParameter('qcdparam_msdbin%d' % i, 0) for i in range(msd.nbins) ]) initial_qcd = failCh.getObservation().astype( float ) # was integer, and numpy complained about subtracting float from it for sample in failCh: initial_qcd -= sample.getExpectation(nominal=True) if np.any(initial_qcd < 0.): raise ValueError("initial_qcd negative for some bins..", initial_qcd) sigmascale = 10 # to scale the deviation from initial scaledparams = initial_qcd * ( 1 + sigmascale / np.maximum(1., np.sqrt(initial_qcd)))**qcdparams fail_qcd = rl.ParametericSample('fail_qcd', rl.Sample.BACKGROUND, msd, scaledparams) failCh.addSample(fail_qcd) pass_qcd = rl.TransferFactorSample('pass_qcd', rl.Sample.BACKGROUND, tf_params, fail_qcd) passCh.addSample(pass_qcd) with open(os.path.join(str(carddir), 'HHModel.pkl'), "wb") as fout: pickle.dump(model, fout) model.renderCombine(os.path.join(str(carddir), 'HHModel'))
def test_simple(): model = rl.Model("testModel") jec = rl.NuisanceParameter('CMS_jec', 'shape') massScale = rl.NuisanceParameter('CMS_msdScale', 'shape') lumi = rl.NuisanceParameter('CMS_lumi', 'lnN') bins = np.linspace(40, 201, 24)[:6] nbins = len(bins) - 1 for chName in ['pt450to500Fail', 'pt450to500Pass']: ch = rl.Channel(chName) model.addChannel(ch) notqcdsum = np.zeros(nbins) for sName in ['zqq', 'wqq', 'hqq']: templ = (np.random.exponential(5, size=nbins), bins, 'x') notqcdsum += templ[0] stype = rl.Sample.SIGNAL if sName == 'hqq' else rl.Sample.BACKGROUND sample = rl.TemplateSample(ch.name + '_' + sName, stype, templ) jecup_ratio = np.random.normal(loc=1, scale=0.05, size=nbins) sample.setParamEffect(jec, jecup_ratio) msdUp = np.linspace(0.9, 1.1, nbins) msdDn = np.linspace(1.2, 0.8, nbins) sample.setParamEffect(massScale, msdUp, msdDn) sample.setParamEffect(lumi, 1.027) ch.addSample(sample) # make up a data_obs data_obs = (np.random.poisson(notqcdsum + 50), bins, 'x') ch.setObservation(data_obs) # steal observable definition from previous template obs = model['pt450to500Fail_wqq'].observable qcdparams = [ rl.IndependentParameter('qcdparam_bin%d' % i, 0) for i in range(nbins) ] initial_qcd = model['pt450to500Fail'].getObservation().astype( float ) # was integer, and numpy complained about subtracting float from it for p in model['pt450to500Fail']: initial_qcd -= p.getExpectation(nominal=True) if np.any(initial_qcd < 0.): raise ValueError("uh-oh") sigmascale = 10 # to scale the deviation from initial scaledparams = initial_qcd + sigmascale * np.sqrt(initial_qcd) * qcdparams fail_sample = rl.ParametericSample('pt450to500Fail_qcd', rl.Sample.BACKGROUND, obs, scaledparams) model['pt450to500Fail'].addSample(fail_sample) tf = rl.BernsteinPoly("qcd_pass_rhalphTF", (2, 3), ['pt', 'rho']) # suppose the scaled sampling point is 0.02 and the original is 465 (first pt bin) ptval = 0.02 # suppose 'bins' is the msd binning, here we compute rho = 2*ln(msd/pt) using the msd value 0.3 of the way into the bin msdpts = bins[:-1] + 0.3 * np.diff(bins) rhovals = 2 * np.log(msdpts / 465.) # here we would derive these all at once with 2D array, and thus the bounds would envelope the whole space rhovals = (rhovals - rhovals.min()) / np.ptp(rhovals) tf_params = np.array([tf(ptval, r) for r in rhovals]) pass_sample = rl.TransferFactorSample('pt450to500Pass_qcd', rl.Sample.BACKGROUND, tf_params, fail_sample) model['pt450to500Pass'].addSample(pass_sample) import sys print("ROOT used? ", 'ROOT' in sys.modules) model.renderCombine("simplemodel") print("ROOT used? ", 'ROOT' in sys.modules)
def rhalphabeth(msdbins): process = hist.Cat("process", "Process", sorting="placement") cats = ("process", ) bkg_map = OrderedDict() # bkg_map['V+jets'] = (['Z+jets','W+jets'],) bkg_map["V+jets"] = (["Z+jets"], ) vjets_hists = {} for key in hists["data"].keys(): vjets_hists[key] = hists["bkg"][key].group(cats, process, bkg_map) # Build qcd MC pass+fail model and fit to polynomial qcdmodel = rl.Model("qcdmodel") qcdpass, qcdfail = 0.0, 0.0 msds = np.meshgrid(msdbins[:-1] + 0.5 * np.diff(msdbins), indexing="ij")[0] msds = np.sqrt(msds) * np.sqrt(msds) print(msds) msdscaled = msds / 300.0 msd = rl.Observable("fjmass", msdbins) failCh = rl.Channel("fail") passCh = rl.Channel("pass") qcdmodel.addChannel(failCh) qcdmodel.addChannel(passCh) # mock template ptnorm = 1 vjetsHistFail = (vjets_hists["template"].integrate("region", "sr").sum( "gentype", "recoil").integrate("process", "V+jets").integrate("systematic", "nominal").values()[()][:, 0]) vjetsHistFail[vjetsHistFail <= 0] = 1e-7 failTempl = ( vjetsHistFail, vjets_hists["template"].integrate("region", "sr").sum( "gentype", "recoil").integrate("process", "V+jets").integrate( "systematic", "nominal").axis("fjmass").edges(), "fjmass", ) vjetsHistPass = (vjets_hists["template"].integrate("region", "sr").sum( "gentype", "recoil").integrate("process", "V+jets").integrate("systematic", "nominal").values()[()][:, 1]) vjetsHistPass[vjetsHistPass <= 0] = 1e-7 passTempl = ( vjetsHistPass, vjets_hists["template"].integrate("region", "sr").sum( "gentype", "recoil").integrate("process", "V+jets").integrate( "systematic", "nominal").axis("fjmass").edges(), "fjmass", ) failCh.setObservation(failTempl) passCh.setObservation(passTempl) qcdfail += failCh.getObservation().sum() qcdpass += passCh.getObservation().sum() qcdeff = qcdpass / qcdfail tf_MCtempl = rl.BernsteinPoly("tf_MCtempl", (2, ), ["fjmass"]) tf_MCtempl_params = qcdeff * tf_MCtempl(msdscaled) failCh = qcdmodel["fail"] passCh = qcdmodel["pass"] failObs = failCh.getObservation() qcdparams = np.array([ rl.IndependentParameter("qcdparam_msdbin%d" % i, 0) for i in range(msd.nbins) ]) sigmascale = 10.0 scaledparams = ( failObs * (1 + sigmascale / np.maximum(1.0, np.sqrt(failObs)))**qcdparams) fail_qcd = rl.ParametericSample("fail_qcd", rl.Sample.BACKGROUND, msd, scaledparams) failCh.addSample(fail_qcd) print(tf_MCtempl_params) pass_qcd = rl.TransferFactorSample("pass_qcd", rl.Sample.BACKGROUND, tf_MCtempl_params, fail_qcd) passCh.addSample(pass_qcd) qcdfit_ws = ROOT.RooWorkspace("qcdfit_ws") simpdf, obs = qcdmodel.renderRoofit(qcdfit_ws) qcdfit = simpdf.fitTo( obs, ROOT.RooFit.Extended(True), ROOT.RooFit.SumW2Error(True), ROOT.RooFit.Strategy(2), ROOT.RooFit.Save(), ROOT.RooFit.Minimizer("Minuit2", "migrad"), ROOT.RooFit.PrintLevel(-1), ) qcdfit_ws.add(qcdfit) if "pytest" not in sys.modules: qcdfit_ws.writeToFile( os.path.join(str("models"), "testModel_qcdfit.root")) if qcdfit.status() != 0: raise RuntimeError("Could not fit qcd") param_names = [p.name for p in tf_MCtempl.parameters.reshape(-1)] decoVector = rl.DecorrelatedNuisanceVector.fromRooFitResult( tf_MCtempl.name + "_deco", qcdfit, param_names) tf_MCtempl.parameters = decoVector.correlated_params.reshape( tf_MCtempl.parameters.shape) tf_MCtempl_params_final = tf_MCtempl(msdscaled) tf_dataResidual = rl.BernsteinPoly("tf_dataResidual", (2, ), ["fjmass"], limits=(0, 10)) tf_dataResidual_params = tf_dataResidual(msdscaled) tf_params = qcdeff * tf_MCtempl_params_final * tf_dataResidual_params return tf_params
def test_rhalphabet(tmpdir): throwPoisson = True #False # jec = rl.NuisanceParameter('CMS_jec', 'lnN') # massScale = rl.NuisanceParameter('CMS_msdScale', 'shape') # lumi = rl.NuisanceParameter('CMS_lumi', 'lnN') tqqeffSF = rl.IndependentParameter('tqqeffSF', 1., 0, 10) tqqnormSF = rl.IndependentParameter('tqqnormSF', 1., 0, 10) ptbins = np.array([450, 500, 550, 600, 675, 800, 1200]) npt = len(ptbins) - 1 msdbins = np.linspace(47, 201, 23) msd = rl.Observable('msd', msdbins) # here we derive these all at once with 2D array ptpts, msdpts = np.meshgrid(ptbins[:-1] + 0.3 * np.diff(ptbins), msdbins[:-1] + 0.5 * np.diff(msdbins), indexing='ij') rhopts = 2 * np.log(msdpts / ptpts) ptscaled = (ptpts - 450.) / (1200. - 450.) rhoscaled = (rhopts - (-6)) / ((-2.1) - (-6)) validbins = (rhoscaled >= 0) & (rhoscaled <= 1) rhoscaled[~validbins] = 1 # we will mask these out later # Build qcd MC pass+fail model and fit to polynomial qcdmodel = rl.Model("qcdmodel") qcdpass, qcdfail = 0., 0. for ptbin in range(npt): failCh = rl.Channel("ptbin%d%s" % (ptbin, 'fail')) passCh = rl.Channel("ptbin%d%s" % (ptbin, 'pass')) qcdmodel.addChannel(failCh) qcdmodel.addChannel(passCh) # QCD templates from file failTempl = get_template("QCD", 0, ptbin + 1, obs=msd, syst="nominal") # passTempl = get_template("QCD", 1, ptbin + 1, obs=msd, syst="nominal") # failCh.setObservation(failTempl, read_sumw2=True) passCh.setObservation(passTempl, read_sumw2=True) qcdfail += sum([val[0] for val in failCh.getObservation()]) qcdpass += sum([val[0] for val in passCh.getObservation()]) qcdeff = qcdpass / qcdfail print("Inclusive P/F from Monte Carlo = " + str(qcdeff)) # initial values print("Initial fit values read from file initial_vals.csv") initial_vals = np.genfromtxt('initial_vals.csv') initial_vals = initial_vals.reshape(3, 3) print(initial_vals) tf_MCtempl = rl.BernsteinPoly("tf_MCtempl", (2, 2), ['pt', 'rho'], init_params=initial_vals, limits=(-10, 10)) tf_MCtempl_params = qcdeff * tf_MCtempl(ptscaled, rhoscaled) for ptbin in range(npt): failCh = qcdmodel['ptbin%dfail' % ptbin] passCh = qcdmodel['ptbin%dpass' % ptbin] failObs = failCh.getObservation() passObs = passCh.getObservation() qcdparams = np.array([ rl.IndependentParameter('qcdparam_ptbin%d_msdbin%d' % (ptbin, i), 0) for i in range(msd.nbins) ]) sigmascale = 10. scaledparams = failObs * ( 1 + sigmascale / np.maximum(1., np.sqrt(failObs)))**qcdparams fail_qcd = rl.ParametericSample('ptbin%dfail_qcd' % ptbin, rl.Sample.BACKGROUND, msd, scaledparams[0]) failCh.addSample(fail_qcd) pass_qcd = rl.TransferFactorSample('ptbin%dpass_qcd' % ptbin, rl.Sample.BACKGROUND, tf_MCtempl_params[ptbin, :], fail_qcd) passCh.addSample(pass_qcd) failCh.mask = validbins[ptbin] passCh.mask = validbins[ptbin] qcdfit_ws = ROOT.RooWorkspace('qcdfit_ws') simpdf, obs = qcdmodel.renderRoofit(qcdfit_ws) qcdfit = simpdf.fitTo( obs, ROOT.RooFit.Extended(True), ROOT.RooFit.SumW2Error(True), ROOT.RooFit.Strategy(2), ROOT.RooFit.Save(), ROOT.RooFit.Minimizer('Minuit2', 'migrad'), ROOT.RooFit.PrintLevel(1), ) qcdfit_ws.add(qcdfit) qcdfit_ws.writeToFile(os.path.join(str(tmpdir), 'testModel_qcdfit.root')) # Set parameters to fitted values allparams = dict(zip(qcdfit.nameArray(), qcdfit.valueArray())) for i, p in enumerate(tf_MCtempl.parameters.reshape(-1)): p.value = allparams[p.name] print(p.name, p.value) if qcdfit.status() != 0: raise RuntimeError('Could not fit qcd') # arrays for plotting pt vs msd pts_plot = np.linspace(450, 1200, 15) ptpts_plot, msdpts_plot = np.meshgrid( pts_plot[:-1] + 0.5 * np.diff(pts_plot), msdbins[:-1] + 0.5 * np.diff(msdbins), indexing='ij') ptpts_plot_scaled = (ptpts_plot - 450.) / (1200. - 450.) rhopts_plot = 2 * np.log(msdpts_plot / ptpts_plot) rhopts_plot_scaled = (rhopts_plot - (-6)) / ((-2.1) - (-6)) validbins_plot = (rhopts_plot_scaled >= 0) & (rhopts_plot_scaled <= 1) ptpts_plot = ptpts_plot[validbins_plot] msdpts_plot = msdpts_plot[validbins_plot] ptpts_plot_scaled = ptpts_plot_scaled[validbins_plot] rhopts_plot_scaled = rhopts_plot_scaled[validbins_plot] tf_MCtempl_vals = tf_MCtempl(ptpts_plot_scaled, rhopts_plot_scaled, nominal=True) df_msdpt = pd.DataFrame([]) df_msdpt["msd"] = msdpts_plot.reshape(-1) df_msdpt["pt"] = ptpts_plot.reshape(-1) df_msdpt["eQCDMC"] = tf_MCtempl_vals.reshape(-1) df_msdpt.to_csv("msdpt.csv", header=False) # arrays for plotting pt vs rho rhos_plot = np.linspace(-6, -2.1, 23) ptpts_plot, rhopts_plot = np.meshgrid( pts_plot[:-1] + 0.5 * np.diff(pts_plot), rhos_plot[:-1] + 0.5 * np.diff(rhos_plot), indexing='ij') ptpts_plot_scaled = (ptpts_plot - 450.) / (1200. - 450.) rhopts_plot_scaled = (rhopts_plot - (-6)) / ((-2.1) - (-6)) validbins_plot = (rhopts_plot_scaled >= 0) & (rhopts_plot_scaled <= 1) ptpts_plot = ptpts_plot[validbins_plot] rhopts_plot = rhopts_plot[validbins_plot] ptpts_plot_scaled = ptpts_plot_scaled[validbins_plot] rhopts_plot_scaled = rhopts_plot_scaled[validbins_plot] tf_MCtempl_vals = tf_MCtempl(ptpts_plot_scaled, rhopts_plot_scaled, nominal=True) df_rhopt = pd.DataFrame([]) df_rhopt["rho"] = rhopts_plot.reshape(-1) df_rhopt["pt"] = ptpts_plot.reshape(-1) df_rhopt["eQCDMC"] = tf_MCtempl_vals.reshape(-1) df_rhopt.to_csv("rhopt.csv", header=False) param_names = [p.name for p in tf_MCtempl.parameters.reshape(-1)] decoVector = rl.DecorrelatedNuisanceVector.fromRooFitResult( tf_MCtempl.name + '_deco', qcdfit, param_names) tf_MCtempl.parameters = decoVector.correlated_params.reshape( tf_MCtempl.parameters.shape) tf_MCtempl_params_final = tf_MCtempl(ptscaled, rhoscaled) tf_dataResidual = rl.BernsteinPoly("tf_dataResidual", (2, 2), ['pt', 'rho'], limits=(-10, 10)) tf_dataResidual_params = tf_dataResidual(ptscaled, rhoscaled) tf_params = qcdeff * tf_MCtempl_params_final * tf_dataResidual_params # build actual fit model now model = rl.Model("testModel") # exclud QCD from MC samps samps = [ 'ggF', 'VBF', 'WH', 'ZH', 'ttH', 'ttbar', 'singlet', 'Zjets', 'Wjets', 'VV' ] sigs = ['ggF', 'VBF', 'WH', 'ZH', 'ttH'] for ptbin in range(npt): for region in ['pass', 'fail']: ch = rl.Channel("ptbin%d%s" % (ptbin, region)) model.addChannel(ch) isPass = region == 'pass' ptnorm = 1. templates = {} for sName in samps: templates[sName] = get_template(sName, isPass, ptbin + 1, obs=msd, syst="nominal") # some mock expectations templ = templates[sName] stype = rl.Sample.SIGNAL if sName in sigs else rl.Sample.BACKGROUND sample = rl.TemplateSample(ch.name + '_' + sName, stype, templ) ch.addSample(sample) data_obs = get_template("data", isPass, ptbin + 1, obs=msd, syst="nominal") ch.setObservation(data_obs, read_sumw2=True) # drop bins outside rho validity mask = validbins[ptbin] # blind bins 11, 12, 13 # mask[11:14] = False # ch.mask = mask for ptbin in range(npt): failCh = model['ptbin%dfail' % ptbin] passCh = model['ptbin%dpass' % ptbin] qcdparams = np.array([ rl.IndependentParameter('qcdparam_ptbin%d_msdbin%d' % (ptbin, i), 0) for i in range(msd.nbins) ]) initial_qcd = failCh.getObservation()[0].astype( float ) # was integer, and numpy complained about subtracting float from it for sample in failCh: initial_qcd -= sample.getExpectation(nominal=True) if np.any(initial_qcd < 0.): raise ValueError("initial_qcd negative for some bins..", initial_qcd) sigmascale = 10 # to scale the deviation from initial scaledparams = initial_qcd * ( 1 + sigmascale / np.maximum(1., np.sqrt(initial_qcd)))**qcdparams fail_qcd = rl.ParametericSample('ptbin%dfail_qcd' % ptbin, rl.Sample.BACKGROUND, msd, scaledparams) failCh.addSample(fail_qcd) pass_qcd = rl.TransferFactorSample('ptbin%dpass_qcd' % ptbin, rl.Sample.BACKGROUND, tf_params[ptbin, :], fail_qcd) passCh.addSample(pass_qcd) tqqpass = passCh['ttbar'] tqqfail = failCh['ttbar'] tqqPF = tqqpass.getExpectation( nominal=True).sum() / tqqfail.getExpectation(nominal=True).sum() tqqpass.setParamEffect(tqqeffSF, 1 * tqqeffSF) tqqfail.setParamEffect(tqqeffSF, (1 - tqqeffSF) * tqqPF + 1) tqqpass.setParamEffect(tqqnormSF, 1 * tqqnormSF) tqqfail.setParamEffect(tqqnormSF, 1 * tqqnormSF) # Fill in muon CR templates = {} samps = ['ttbar', 'QCD', 'singlet', 'Zjets', 'Wjets', 'VV'] for region in ['pass', 'fail']: ch = rl.Channel("muonCR%s" % (region, )) model.addChannel(ch) isPass = region == 'pass' for sName in samps: templates[sName] = get_template_muonCR(sName, isPass, obs=msd) stype = rl.Sample.BACKGROUND sample = rl.TemplateSample(ch.name + '_' + sName, stype, templates[sName]) ch.addSample(sample) data_obs = get_template_muonCR("muondata", isPass, obs=msd) ch.setObservation(data_obs, read_sumw2=True) tqqpass = model['muonCRpass_ttbar'] tqqfail = model['muonCRfail_ttbar'] tqqPF = tqqpass.getExpectation( nominal=True).sum() / tqqfail.getExpectation(nominal=True).sum() tqqpass.setParamEffect(tqqeffSF, 1 * tqqeffSF) tqqfail.setParamEffect(tqqeffSF, (1 - tqqeffSF) * tqqPF + 1) tqqpass.setParamEffect(tqqnormSF, 1 * tqqnormSF) tqqfail.setParamEffect(tqqnormSF, 1 * tqqnormSF) with open(os.path.join(str(tmpdir), 'testModel.pkl'), "wb") as fout: pickle.dump(model, fout) model.renderCombine(os.path.join(str(tmpdir), 'testModel'))
def jet_mass_producer(args, configs=None, MINIMAL_MODEL=False, includeMassScales=True): """ configs: configuration dict including: ModelName,gridHistFileName,channels,histLocation -> channels: dict with dict for each channels: -> includes histDir,samples,NormUnc,signal,regions,QcdEstimation """ rebin_msd = True binnings = {"W": np.linspace(50, 300, 26), "top": np.linspace(50, 300, 26)} binning_from_config = configs.get('binning', {}) for selection, bin_info in binning_from_config.items(): min_msd, max_msd = (bin_info[0], bin_info[1]) binwidth = bin_info[2] nbins = int(np.floor((max_msd - min_msd) / binwidth)) msd_bins = np.linspace(min_msd, nbins * binwidth + min_msd, nbins + 1) binnings[selection] = msd_bins #channels for combined fit channels = configs['channels'] qcd_estimation_channels = { k: v for k, v in channels.items() if "QcdEstimation" in v and v["QcdEstimation"] == "True" } print('channels:', channels.keys()) #getting path of dir with root file from config hist_file = ROOT.TFile(configs['histLocation']) do_qcd_estimation = len(qcd_estimation_channels) > 0 do_initial_qcd_fit = (configs.get("InitialQCDFit", "False") == "True") qcd_fail_region_constant = (configs.get("QCDFailConstant", "False") == "True") lumi_scale = 1. if ('Pseudo' in configs and len(configs['Pseudo']) > 0 and 'lumiScale' in configs['Pseudo'][0]): lumi_scale = float(configs['Pseudo'][0].split(':')[-1]) model_name = configs.get( 'ModelName', 'Jet_Mass_Model') #get name from config, or fall back to default #specify if QCD estimation (using Bernstein-polynomial as TF) should be used ################ #QCD Estimation# ################ # derive pt bins from channel names for the pt,rho grid for the Bernstein-Polynomial if (do_qcd_estimation): print( 'Doing some preparations for data driven QCD Estimate (Bernstein TF)' ) bernstein_orders = tuple(configs.get('BernsteinOrders', [2, 2])) qcd_model = rl.Model('qcdmodel') qcd_pass, qcd_fail = 0., 0. qcd_estimation_relevant_selection = 'W' for channel_name, config in qcd_estimation_channels.items(): qcd_estimation_relevant_selection = config['selection'] msd_bins = binnings[qcd_estimation_relevant_selection] fail_ch = rl.Channel(channel_name + 'fail') pass_ch = rl.Channel(channel_name + 'pass') qcd_model.addChannel(fail_ch) qcd_model.addChannel(pass_ch) additional_bin = config.get('additional_bin', '') fail_hist = hist_file.Get('W_QCD__mjet_' + config['pt_bin'] + additional_bin + '_fail') pass_hist = hist_file.Get('W_QCD__mjet_' + config['pt_bin'] + additional_bin + '_pass') if (rebin_msd > 0): fail_hist = fail_hist.Rebin(len(msd_bins) - 1, 'msd', msd_bins) pass_hist = pass_hist.Rebin(len(msd_bins) - 1, 'msd', msd_bins) if (lumi_scale != 1.0): fail_hist = scale_lumi(fail_hist, lumi_scale) pass_hist = scale_lumi(pass_hist, lumi_scale) empty_hist = fail_hist.Clone() empty_hist.Reset() signal_fail = rl.TemplateSample( channel_name + 'fail' + '_' + 'Signal', rl.Sample.SIGNAL, empty_hist) fail_ch.addSample(signal_fail) signal_pass = rl.TemplateSample( channel_name + 'pass' + '_' + 'Signal', rl.Sample.SIGNAL, empty_hist) pass_ch.addSample(signal_pass) fail_ch.setObservation(fail_hist) pass_ch.setObservation(pass_hist) qcd_fail += fail_ch.getObservation().sum() qcd_pass += pass_ch.getObservation().sum() qcd_eff = qcd_pass / qcd_fail #get all lower edges from channel names # pt_edges = [float(channel.split('Pt')[-1]) for channel in qcd_estimation_channels] # #get last upper edge from name of last channel # pt_edges.append(float(channels[list(qcd_estimation_channels.keys())[-1].split('Pt')[0]+'Pt%i'%pt_edges[-1]]['pt_bin'].split('to')[-1])) pt_edges = configs.get('pt_edges', [500, 550, 600, 675, 800, 1200]) pt_bins = np.array(pt_edges) # pt_bins = np.array([500, 550, 600, 675, 800, 1200]) n_pt = len(pt_bins) - 1 msd_bins = binnings[qcd_estimation_relevant_selection] msd = rl.Observable('msd', msd_bins) # here we derive these all at once with 2D array ptpts, msdpts = np.meshgrid(pt_bins[:-1] + 0.3 * np.diff(pt_bins), msd_bins[:-1] + 0.5 * np.diff(msd_bins), indexing='ij') rhopts = 2 * np.log(msdpts / ptpts) ptscaled = (ptpts - 500.) / (1200. - 500.) rhoscaled = (rhopts - (-6)) / ((-2.1) - (-6)) validbins = (rhoscaled >= 0) & (rhoscaled <= 1) rhoscaled[~validbins] = 1 # we will mask these out later TF_suffix = configs.get('TFSuffix', "") if (do_initial_qcd_fit): initial_qcd_fit_orders = tuple( configs.get('InitialQCDFitOrders', [2, 2])) if not os.path.exists(model_name): os.makedirs(model_name) print('QCD eff:', qcd_eff) # tf_MCtempl = rl.BernsteinPoly("tf_MCtempl", initial_qcd_fit_orders, ['pt', 'rho'], init_params = np.ones((initial_qcd_fit_orders[0]+1,initial_qcd_fit_orders[1]+1)), limits=(-1,10)) tf_MCtempl = rl.BernsteinPoly( "tf_MCtempl_" + model_name + TF_suffix, initial_qcd_fit_orders, ['pt', 'rho'], init_params=np.ones((initial_qcd_fit_orders[0] + 1, initial_qcd_fit_orders[1] + 1)), limits=(-50, 50)) tf_MCtempl_params = qcd_eff * tf_MCtempl(ptscaled, rhoscaled) for channel_name, config in channels.items(): # ptbin = np.where(pt_bins==float(channel_name.split('Pt')[-1]))[0][0] ptbin = np.where( pt_bins == float(config['pt_bin'].split('to')[0]))[0][0] failCh = qcd_model[channel_name + 'fail'] passCh = qcd_model[channel_name + 'pass'] failObs = failCh.getObservation() if (qcd_fail_region_constant): print("Setting QCD parameters in fail region constant") qcdparams = np.array([ rl.IndependentParameter('qcdparam_' + model_name + TF_suffix + '_ptbin%d_msdbin%d' % (ptbin, i), 0, constant=qcd_fail_region_constant) for i in range(msd.nbins) ]) sigmascale = 10. scaledparams = failObs * ( 1 + sigmascale / np.maximum(1., np.sqrt(failObs)))**qcdparams fail_qcd = rl.ParametericSample('%sfail_qcd' % channel_name, rl.Sample.BACKGROUND, msd, scaledparams) failCh.addSample(fail_qcd) pass_qcd = rl.TransferFactorSample('%spass_qcd' % channel_name, rl.Sample.BACKGROUND, tf_MCtempl_params[ptbin, :], fail_qcd) passCh.addSample(pass_qcd) failCh.mask = validbins[ptbin] passCh.mask = validbins[ptbin] qcd_model.renderCombine(model_name + "/qcdmodel") qcdfit_ws = ROOT.RooWorkspace('w') simpdf, obs = qcd_model.renderRoofit(qcdfit_ws) ROOT.Math.MinimizerOptions.SetDefaultPrecision(1e-18) # ROOT.Math.MinimizerOptions.SetDefaultMinimizer("Minuit2") # ROOT.Math.MinimizerOptions.SetDefaultTolerance(0.0001) # ROOT.Math.MinimizerOptions.SetDefaultPrecision(-1.0) qcdfit = simpdf.fitTo( obs, ROOT.RooFit.Extended(True), ROOT.RooFit.SumW2Error(True), ROOT.RooFit.Strategy(1), ROOT.RooFit.Save(), ROOT.RooFit.Minimizer('Minuit2', 'migrad'), # ROOT.RooFit.PrintLevel(-1), ROOT.RooFit.PrintLevel(1), ROOT.RooFit.Minos(0)) qcdfit_ws.add(qcdfit) if "pytest" not in sys.modules: qcdfit_ws.writeToFile(model_name + '/qcdfit_' + model_name + TF_suffix + '.root') if qcdfit.status() != 0: raise RuntimeError('Could not fit qcd') qcd_model.readRooFitResult(qcdfit) param_names = [p.name for p in tf_MCtempl.parameters.reshape(-1)] decoVector = rl.DecorrelatedNuisanceVector.fromRooFitResult( tf_MCtempl.name + '_deco', qcdfit, param_names) tf_MCtempl.parameters = decoVector.correlated_params.reshape( tf_MCtempl.parameters.shape) tf_MCtempl_params_final = tf_MCtempl(ptscaled, rhoscaled) tf_dataResidual = rl.BernsteinPoly("tf_dataResidual_" + model_name + TF_suffix, bernstein_orders, ['pt', 'rho'], limits=(-50, 50)) # tf_dataResidual = rl.BernsteinPoly("tf_dataResidual", bernstein_orders, ['pt', 'rho'], limits=(0,10)) tf_dataResidual_params = tf_dataResidual(ptscaled, rhoscaled) tf_params = qcd_eff * tf_MCtempl_params_final * tf_dataResidual_params else: tf_params = None # define later #Reading categories of consituent-variations for nuisance paramters from gridHist grid_nuisances, _ = build_mass_scale_variations( configs['gridHistFileName']) #setting up rhalphalib roofit model model = rl.Model(model_name) #setting up nuisances for systematic uncertainties print('CMS_lumi', 'lnN') lumi = rl.NuisanceParameter('CMS_lumi', 'lnN') lumi_effect = 1.027 norm_nuisances = {} for channel_name in channels.keys(): if (MINIMAL_MODEL): break for i, sample in enumerate(channels[channel_name]['samples']): if 'NormUnc' not in channels[channel_name]: continue norm_uncertainties = channels[channel_name]['NormUnc'] for name, norm_unc in norm_uncertainties.items(): nuisance_par = [ rl.NuisanceParameter(name + '_normUnc', 'lnN'), norm_unc ] for k, v in norm_nuisances.items(): if name in v[0].name: nuisance_par = v if norm_unc > 0 and name in sample and sample not in norm_nuisances: norm_nuisances.update({sample: nuisance_par}) for channel_name, config in channels.items(): print('setting up channel:', channel_name) #using hists with /variable/ in their name (default: Mass, if defined get from config) variable = 'mjet' if 'variable' not in config else config['variable'] #getting list of samples from config if MINIMAL_MODEL: config['samples'] = ['QCD', 'WJetsMatched'] samples = config['samples'] #for WMass fit there are multiple regions per sample regions = [''] if 'regions' not in config else config['regions'] print('getting template of variable:', variable) print('samples:', samples) print('regions:', regions) msd_bins = binnings[config['selection']] for region in regions: additional_bin = config.get('additional_bin', '') region_suffix = '_' + region if len(region) > 0 else '' hist_dir = config[ 'selection'] + '_%s__' + variable + '_%s' + config[ 'pt_bin'] + additional_bin + region_suffix print('hist_dir:', hist_dir) #setting up channel for fit (name must be unique and can't include any '_') region_name = channel_name + region ch = rl.Channel(region_name) model.addChannel(ch) print('rl.Channel:', ch) for sample_name in samples: #do not include QCD template here, but rather use qcd estimation below if (('QcdEstimation' in config and config['QcdEstimation'] == 'True') and 'qcd' in sample_name.lower()): continue #specify if sample is signal or background type sample_type = rl.Sample.SIGNAL if sample_name in config[ 'signal'] else rl.Sample.BACKGROUND sample_hist = hist_file.Get(hist_dir % (sample_name, "")) print(hist_dir % (sample_name, "")) sample_hist.SetName('msd') #rebin hist if (rebin_msd > 0): sample_hist = sample_hist.Rebin( len(msd_bins) - 1, 'msd', msd_bins) if (lumi_scale != 1.0): sample_hist = scale_lumi(sample_hist, lumi_scale) #setup actual rhalphalib sample sample = rl.TemplateSample(ch.name + '_' + sample_name, sample_type, sample_hist) #sample.autoMCStats() #setting effects of constituent variation nuisances (up/down) for grid_nuisance, x, y, category in grid_nuisances: hist_up = hist_file.Get(hist_dir % (sample_name, str(x) + '_' + str(y) + '_' + category + '_') + '__up') hist_down = hist_file.Get(hist_dir % (sample_name, str(x) + '_' + str(y) + '_' + category + '_') + '__down') #rebin hists if (rebin_msd > 0): hist_up = hist_up.Rebin( len(msd_bins) - 1, 'msd', msd_bins) hist_down = hist_down.Rebin( len(msd_bins) - 1, 'msd', msd_bins) if (lumi_scale != 1.0): hist_up = scale_lumi(hist_up, lumi_scale) hist_down = scale_lumi(hist_down, lumi_scale) if (includeMassScales): sample.setParamEffect(grid_nuisance, hist_up, hist_down) sample.setParamEffect(lumi, lumi_effect) if sample_name in norm_nuisances.keys(): sample.setParamEffect(norm_nuisances[sample_name][0], norm_nuisances[sample_name][1]) ch.addSample(sample) PseudoData = 'Pseudo' in configs and len(configs['Pseudo']) > 0 if PseudoData: data_hist = build_pseudo(samples, hist_file, hist_dir, configs['Pseudo'], MINIMAL_MODEL) else: print('using data!!!!!') data_hist = hist_file.Get(hist_dir % ("Data", "")) if (rebin_msd > 0): data_hist = data_hist.Rebin(len(msd_bins) - 1, 'msd', msd_bins) data_hist.SetName('msd') ch.setObservation(data_hist, read_sumw2=PseudoData) if ('QcdEstimation' in config and config['QcdEstimation'] == 'True'): mask = validbins[np.where( pt_bins == float(config['pt_bin'].split('to')[0]))[0][0]] # dropped_events = np.sum(ch.getObservation().astype(float)[~mask]) # percentage = dropped_events/np.sum(ch.getObservation().astype(float)) # print('dropping due to mask: %.2f events (out of %.2f -> %.2f%%)'%(dropped_events,np.sum(ch.getObservation().astype(float)),percentage*100)) ch.mask = mask if (do_qcd_estimation): #QCD TF if (not do_initial_qcd_fit): tf_params = rl.BernsteinPoly('tf_params_' + model_name + TF_suffix, bernstein_orders, ['pt', 'rho'], limits=(-50, 50)) print( 'Using QCD efficiency (N2-ddt) of %.2f%% to scale initial QCD in pass region' % (qcd_eff * 100)) tf_params = qcd_eff * tf_params(ptscaled, rhoscaled) for channel_name, config in channels.items(): if ('QcdEstimation' not in config or config['QcdEstimation'] == "False"): continue print(channel_name, 'qcd estimation') fail_ch = model[channel_name + 'fail'] pass_ch = model[channel_name + 'pass'] ptbin = np.where( pt_bins == float(config['pt_bin'].split('to')[0]))[0][0] if (qcd_fail_region_constant): print("Setting QCD parameters in fail region constant") qcd_params = np.array([ rl.IndependentParameter('qcdparam_' + model_name + TF_suffix + '_ptbin%i_msdbin%i' % (ptbin, i), 0, constant=qcd_fail_region_constant) for i in range(msd.nbins) ]) initial_qcd = fail_ch.getObservation()[0].astype( float) if isinstance( fail_ch.getObservation(), tuple) else fail_ch.getObservation().astype(float) for sample in fail_ch: initial_qcd -= sample.getExpectation(nominal=True) if np.any(initial_qcd < 0.): initial_qcd = np.where(initial_qcd <= 0., 0, initial_qcd) print('negative bins in initial_qcd in ', channel_name) # continue minimum = np.amin(initial_qcd) initial_qcd = np.where(initial_qcd == 0, minimum, initial_qcd) initial_qcd += abs(minimum) raise ValueError( 'inital qcd (fail qcd from data - mc) negative at least one bin' ) sigmascale = 10. scaledparams = initial_qcd * ( 1 + sigmascale / np.maximum(1., np.sqrt(initial_qcd)))**qcd_params fail_qcd = rl.ParametericSample('%sfail_qcd' % channel_name, rl.Sample.BACKGROUND, msd, scaledparams) fail_ch.addSample(fail_qcd) pass_qcd = rl.TransferFactorSample('%spass_qcd' % channel_name, rl.Sample.BACKGROUND, tf_params[ptbin, :], fail_qcd) pass_ch.addSample(pass_qcd) model.renderCombine(model_name)
def create_datacard(inputfile, carddir, nbins, nMCTF, nDataTF, passBinName, failBinName='fail', add_blinded=False, include_ac=False): # open uproot file once upfile = uproot.open(inputfile) regionPairs = [('SR'+passBinName, 'fit'+failBinName)] # pass, fail region pairs if add_blinded: regionPairs += [('pass'+passBinName, failBinName)] # add sideband region pairs regions = [item for t in regionPairs for item in t] # all regions # luminosity unc https://gitlab.cern.ch/hh/naming-conventions#luminosity lumi_16 = 36.33 lumi_17 = 41.48 lumi_18 = 59.83 lumi_run2 = lumi_16 + lumi_17 + lumi_18 lumi_13TeV_2016 = rl.NuisanceParameter('lumi_13TeV_2016', 'lnN') lumi_13TeV_2017 = rl.NuisanceParameter('lumi_13TeV_2017', 'lnN') lumi_13TeV_2018 = rl.NuisanceParameter('lumi_13TeV_2018', 'lnN') lumi_13TeV_correlated = rl.NuisanceParameter('lumi_13TeV_correlated', 'lnN') lumi_13TeV_1718 = rl.NuisanceParameter('lumi_13TeV_1718', 'lnN') ttbarBin1MCstats = rl.NuisanceParameter('CMS_bbbb_boosted_ggf_ttbarBin1_yieldMCStats', 'lnN') PNetHbbScaleFactorssyst = rl.NuisanceParameter('CMS_bbbb_boosted_ggf_PNetHbbScaleFactors_correlated', 'lnN') brHbb = rl.NuisanceParameter('BR_hbb', 'lnN') pdfqqbar = rl.NuisanceParameter('pdf_Higgs_qqbar', 'lnN') pdfttH = rl.NuisanceParameter('pdf_Higgs_ttH', 'lnN') pdfggHH = rl.NuisanceParameter('pdf_Higgs_ggHH', 'lnN') pdfqqHH = rl.NuisanceParameter('pdf_Higgs_qqHH', 'lnN') qcdScaleVH = rl.NuisanceParameter('QCDscale_VH', 'lnN') qcdScalettH = rl.NuisanceParameter('QCDscale_ttH', 'lnN') qcdScaleqqHH = rl.NuisanceParameter('QCDscale_qqHH', 'lnN') alphaS = rl.NuisanceParameter('alpha_s', 'lnN') fsrothers = rl.NuisanceParameter('CMS_bbbb_boosted_ggf_ps_fsr_others', 'lnN') isrothers = rl.NuisanceParameter('CMS_bbbb_boosted_ggf_ps_isr_others', 'lnN') if not include_ac: thu_hh = rl.NuisanceParameter('THU_SMHH', 'lnN') msdbins = np.linspace(50, nbins*10.0+50.0, nbins+1) msd = rl.Observable('msd', msdbins) msdpts = msdbins[:-1] + 0.5 * np.diff(msdbins) msdscaled = (msdpts - 50.)/(10.0*nbins) # Build qcd MC pass+fail model and fit to polynomial qcdmodel = rl.Model('qcdmodel') qcdpass, qcdfitfail = 0., 0. passCh = rl.Channel('passqcdmodel') fitfailCh = rl.Channel('fitfailqcdmodel') qcdmodel.addChannel(fitfailCh) qcdmodel.addChannel(passCh) passTempl = get_hist(upfile, 'histJet2MassBlind_'+passBinName+'_QCD', obs=msd) fitfailTempl = get_hist(upfile, 'histJet2Massfit_fail_QCD', obs=msd) passCh.setObservation(passTempl[:-1]) fitfailCh.setObservation(fitfailTempl[:-1]) qcdpass = passCh.getObservation().sum() qcdfitfail = fitfailCh.getObservation().sum() qcdeffpass = qcdpass / qcdfitfail # transfer factor tf_dataResidual = rl.BernsteinPoly("CMS_bbbb_boosted_ggf_tf_dataResidual_"+passBinName, (nDataTF,), ['msd'], limits=(-20, 20)) tf_dataResidual_params = tf_dataResidual(msdscaled) tf_params_pass = qcdeffpass * tf_dataResidual_params # qcd params qcdparams = np.array([rl.IndependentParameter('CMS_bbbb_boosted_ggf_qcdparam_msdbin%d' % i, 0) for i in range(msd.nbins)]) # dictionary of shape systematics -> name in cards systs = OrderedDict([ ('mHHTHunc', 'CMS_bbbb_boosted_ggf_mHHTHunc'), ('FSRPartonShower', 'ps_fsr'), ('ISRPartonShower', 'ps_isr'), ('ggHHPDFacc', 'CMS_bbbb_boosted_ggf_ggHHPDFacc'), ('ggHHQCDacc', 'CMS_bbbb_boosted_ggf_ggHHQCDacc'), ('othersQCD', 'CMS_bbbb_boosted_ggf_othersQCD'), ('pileupWeight2016', 'CMS_pileup_2016'), ('pileupWeight2017', 'CMS_pileup_2017'), ('pileupWeight2018', 'CMS_pileup_2018'), ('JER2016', 'CMS_res_j_2016'), ('JER2017', 'CMS_res_j_2017'), ('JER2018', 'CMS_res_j_2018'), ('JES_Abs', 'CMS_scale_j_Abs'), ('JES_Abs_2016', 'CMS_scale_j_Abs_2016'), ('JES_Abs_2017', 'CMS_scale_j_Abs_2017'), ('JES_Abs_2018', 'CMS_scale_j_Abs_2018'), ('JES_BBEC1', 'CMS_scale_j_BBEC1'), ('JES_BBEC1_2016', 'CMS_scale_j_BBEC1_2016'), ('JES_BBEC1_2017', 'CMS_scale_j_BBEC1_2017'), ('JES_BBEC1_2018', 'CMS_scale_j_BBEC1_2018'), ('JES_EC2', 'CMS_scale_j_EC2'), ('JES_EC2_2016', 'CMS_scale_j_EC2_2016'), ('JES_EC2_2017', 'CMS_scale_j_EC2_2017'), ('JES_EC2_2018', 'CMS_scale_j_EC2_2018'), ('JES_FlavQCD', 'CMS_scale_j_FlavQCD'), ('JES_HF', 'CMS_scale_j_HF'), ('JES_HF_2016', 'CMS_scale_j_HF_2016'), ('JES_HF_2017', 'CMS_scale_j_HF_2017'), ('JES_HF_2018', 'CMS_scale_j_HF_2018'), ('JES_RelBal', 'CMS_scale_j_RelBal'), ('JES_RelSample_2016', 'CMS_scale_j_RelSample_2016'), ('JES_RelSample_2017', 'CMS_scale_j_RelSample_2017'), ('JES_RelSample_2018', 'CMS_scale_j_RelSample_2018'), ('JMS2016', 'CMS_bbbb_boosted_ggf_jms_2016'), ('JMS2017', 'CMS_bbbb_boosted_ggf_jms_2017'), ('JMS2018', 'CMS_bbbb_boosted_ggf_jms_2018'), ('JMR2016', 'CMS_bbbb_boosted_ggf_jmr_2016'), ('JMR2017', 'CMS_bbbb_boosted_ggf_jmr_2017'), ('JMR2018', 'CMS_bbbb_boosted_ggf_jmr_2018'), ('ttbarBin1Jet2PNetCut', 'CMS_bbbb_boosted_ggf_ttbarBin1Jet2PNetCut'), ('ttJetsCorr', 'CMS_bbbb_boosted_ggf_ttJetsCorr'), ('BDTShape', 'CMS_bbbb_boosted_ggf_ttJetsBDTShape'), ('PNetShape', 'CMS_bbbb_boosted_ggf_ttJetsPNetShape'), ('PNetHbbScaleFactors', 'CMS_bbbb_boosted_ggf_PNetHbbScaleFactors_uncorrelated'), ('triggerEffSF', 'CMS_bbbb_boosted_ggf_triggerEffSF_uncorrelated'), ('trigCorrHH2016', 'CMS_bbbb_boosted_ggf_trigCorrHH2016'), ('trigCorrHH2017', 'CMS_bbbb_boosted_ggf_trigCorrHH2017'), ('trigCorrHH2018', 'CMS_bbbb_boosted_ggf_trigCorrHH2018'), ]) # build actual fit model now model = rl.Model("HHModel") for region in regions: logging.info('starting region: %s' % region) ch = rl.Channel(region) model.addChannel(ch) if region == 'pass'+passBinName: catn = 'Blind_'+passBinName elif region == 'SR'+passBinName: catn = '_'+passBinName elif region == 'fit'+failBinName: catn = 'fit_'+failBinName else: catn = 'Blind_'+failBinName # dictionary of name in datacards -> name in ROOT file templateNames = OrderedDict([ ('ttbar', 'histJet2Mass'+catn+'_TTJets'), ('VH_hbb', 'histJet2Mass'+catn+'_VH'), ('ttH_hbb', 'histJet2Mass'+catn+'_ttH'), ('bbbb_boosted_ggf_others', 'histJet2Mass'+catn+'_others'), ('bbbb_boosted_ggf_qcd_datadriven', 'histJet2Mass'+catn+'_QCD'), ('data', 'histJet2Mass'+catn+'_Data'), ('ggHH_kl_1_kt_1_hbbhbb', 'histJet2Mass'+catn+'_ggHH_kl_1_kt_1_boost4b'), ('qqHH_CV_1_C2V_1_kl_1_hbbhbb', 'histJet2Mass'+catn+'_qqHH_CV_1_C2V_1_kl_1_boost4b'), ]) ac_signals = OrderedDict() if include_ac: ac_signals = OrderedDict([ ('ggHH_kl_2p45_kt_1_hbbhbb', 'histJet2Mass'+catn+'_ggHH_kl_2p45_kt_1_boost4b'), ('ggHH_kl_5_kt_1_hbbhbb', 'histJet2Mass'+catn+'_ggHH_kl_5_kt_1_boost4b'), ('ggHH_kl_0_kt_1_hbbhbb', 'histJet2Mass'+catn+'_ggHH_kl_0_kt_1_boost4b'), ('qqHH_CV_1_C2V_0_kl_1_hbbhbb', 'histJet2Mass'+catn+'_qqHH_CV_1_C2V_0_kl_1_boost4b'), ('qqHH_CV_1p5_C2V_1_kl_1_hbbhbb', 'histJet2Mass'+catn+'_qqHH_CV_1p5_C2V_1_kl_1_boost4b'), ('qqHH_CV_1_C2V_1_kl_2_hbbhbb', 'histJet2Mass'+catn+'_qqHH_CV_1_C2V_1_kl_2_boost4b'), ('qqHH_CV_1_C2V_2_kl_1_hbbhbb', 'histJet2Mass'+catn+'_qqHH_CV_1_C2V_2_kl_1_boost4b'), ('qqHH_CV_1_C2V_1_kl_0_hbbhbb', 'histJet2Mass'+catn+'_qqHH_CV_1_C2V_1_kl_0_boost4b'), ('qqHH_CV_0p5_C2V_1_kl_1_hbbhbb', 'histJet2Mass'+catn+'_qqHH_CV_0p5_C2V_1_kl_1_boost4b'), ]) templateNames.update(ac_signals) templates = {} for temp in templateNames: templates[temp] = get_hist(upfile, templateNames[temp], obs=msd) if adjust_posdef_yields: templates_posdef = {} # requires python3 and cvxpy if sys.version_info.major == 3: from bpe import BasisPointExpansion from adjust_to_posdef import ggHH_points, qqHH_points, plot_shape channel = "_hbbhbb" # get qqHH points qqHHproc = BasisPointExpansion(3) ggHHproc = BasisPointExpansion(2) newpts = {} newerrs = {} for HHproc, HH_points in zip([ggHHproc, qqHHproc], [ggHH_points, qqHH_points]): for name, c in HH_points.items(): shape = np.clip(templates[name + channel][0], 0, None) err = np.sqrt(templates[name + channel][3]) # set 0 bin error to something non0 err[err == 0] = err[err.nonzero()].min() logging.debug(name + channel) logging.debug("shape: {shape}".format(shape=shape)) logging.debug("err: {err}".format(err=err)) HHproc.add_point(c, shape, err) # fit HH points with SCS HHproc.solve("scs", tol=1e-9) # get new HH points for name, c in HH_points.items(): newshape = HHproc(c) shape = templates[name + channel][0] edges = templates[name + channel][1] obs_name = templates[name + channel][2] err = np.sqrt(templates[name + channel][3]) # set error to 100% if shape orignally 0 and now not newerr = np.copy(err) newerr[(newshape > 0) & (newerr == 0)] = newshape[(newshape > 0) & (newerr == 0)] templates_posdef[name + channel] = (newshape, edges, obs_name, np.square(newerr)) plot_shape(shape, newshape, err, newerr, name+"_"+region) newpts[name + channel] = newshape newerrs[name + channel] = newerr np.savez("newshapes_{}.npz".format(region), **newpts) np.savez("newerrors_{}.npz".format(region), **newerrs) else: if not (os.path.exists("newshapes_{}.npz".format(region)) and os.path.exists("newerrors_{}.npz".format(region))): raise RuntimeError("Run script in python3 first to get shapes and errors") newpts = dict(np.load("newshapes_{}.npz".format(region))) newerrs = dict(np.load("newerrors_{}.npz".format(region))) for temp in templateNames: if "HH" in temp: newshape = newpts[temp] newerr = newerrs[temp] edges = templates[temp][1] obs_name = templates[temp][2] templates_posdef[temp] = (newshape, edges, obs_name, np.square(newerr)) syst_param_array = [] for syst in systs: syst_param_array.append(rl.NuisanceParameter(systs[syst], 'shape')) sNames = [proc for proc in templates.keys() if proc not in ['bbbb_boosted_ggf_qcd_datadriven', 'data']] for sName in sNames: logging.info('get templates for: %s' % sName) # get templates templ = templates[sName] # don't allow them to go negative valuesNominal = np.maximum(templ[0], 0.) templ = (valuesNominal, templ[1], templ[2], templ[3]) stype = rl.Sample.SIGNAL if 'HH' in sName else rl.Sample.BACKGROUND if adjust_posdef_yields and "HH" in sName: # use posdef as nominal, but keep original to get relative changes to systematics templ_posdef = templates_posdef[sName] sample = rl.TemplateSample(ch.name + '_' + sName, stype, templ_posdef) else: sample = rl.TemplateSample(ch.name + '_' + sName, stype, templ) sample.setParamEffect(lumi_13TeV_2016, 1.01 ** (lumi_16 / lumi_run2)) sample.setParamEffect(lumi_13TeV_2017, 1.02 ** (lumi_17 / lumi_run2)) sample.setParamEffect(lumi_13TeV_2018, 1.015 ** (lumi_18 / lumi_run2)) sample.setParamEffect( lumi_13TeV_correlated, 1.02 ** (lumi_18 / lumi_run2) * 1.009 ** (lumi_17 / lumi_run2) * 1.006 ** (lumi_16 / lumi_run2) ) sample.setParamEffect( lumi_13TeV_1718, 1.006 ** (lumi_17 / lumi_run2) * 1.002 ** (lumi_18 / lumi_run2) ) if not include_ac: if sName == "ggHH_kl_1_kt_1_hbbhbb": sample.setParamEffect(thu_hh, 1.0556, 0.7822) if sName == "bbbb_boosted_ggf_others": if "Bin1" in region: sample.setParamEffect(fsrothers, 1.06, 0.82) sample.setParamEffect(isrothers, 1.05, 0.94) elif "Bin2" in region: sample.setParamEffect(fsrothers, 1.02, 0.90) sample.setParamEffect(isrothers, 1.07, 0.93) elif "Bin3" in region: sample.setParamEffect(fsrothers, 1.02, 0.91) sample.setParamEffect(isrothers, 1.06, 0.93) elif "fail" in region: sample.setParamEffect(fsrothers, 1.05, 0.92) sample.setParamEffect(isrothers, 1.05, 0.94) if sName == "ttbar" and "Bin1" in region: if region == "passBin1": sample.setParamEffect(ttbarBin1MCstats, 1.215) elif region == "SRBin1": sample.setParamEffect(ttbarBin1MCstats, 1.187) if ("VH" in sName) or ("ttH" in sName): sample.setParamEffect(PNetHbbScaleFactorssyst, 1.04) elif "HH" in sName: sample.setParamEffect(PNetHbbScaleFactorssyst, 1.0816) if "hbbhbb" in sName: sample.setParamEffect(brHbb, 1.0248, 0.9748) elif "hbb" in sName: sample.setParamEffect(brHbb, 1.0124, 0.9874) if "ttH" in sName: sample.setParamEffect(pdfttH, 1.030) sample.setParamEffect(qcdScalettH, 1.058, 0.908) sample.setParamEffect(alphaS, 1.020) elif "VH" in sName: sample.setParamEffect(pdfqqbar, 1.0154) sample.setParamEffect(qcdScaleVH, 1.0179, 0.9840) sample.setParamEffect(alphaS, 1.009) elif "ggHH" in sName: sample.setParamEffect(pdfggHH, 1.030) elif "qqHH" in sName: sample.setParamEffect(pdfqqHH, 1.021) sample.setParamEffect(qcdScaleqqHH, 1.0003, 0.9996) # shape systematics mask = (valuesNominal > 0) errorsNominal = np.ones_like(valuesNominal) errorsNominal[mask] = 1. + np.sqrt(templ[3][mask])/valuesNominal[mask] # set mc stat uncs logging.info('setting autoMCStats for %s in %s' % (sName, region)) logging.debug('nominal : {nominal}'.format(nominal=valuesNominal)) logging.debug('error : {errors}'.format(errors=errorsNominal)) sample.autoMCStats() for isyst, syst in enumerate(systs): # negligible uncertainty if 'JES_EC2' in syst or 'JES_HF' in syst: continue # add some easy skips if (sName != 'ttbar') and (syst in ['ttJetsCorr', 'BDTShape', 'PNetShape']): continue if ((sName != 'ttbar') or ('Bin1' not in region)) and (syst == 'ttbarBin1Jet2PNetCut'): continue if ('ggHH' not in sName) and (syst in ['ggHHPDFacc', 'ggHHQCDacc', 'mHHTHunc']): continue if ('others' not in sName) and (syst == 'othersQCD'): continue if ('hbb' not in sName) and (syst == 'PNetHbbScaleFactors'): continue if ('HH' not in sName) and (syst in ['trigCorrHH2016', 'trigCorrHH2017', 'trigCorrHH2018']): continue logging.info('setting shape effect %s for %s in %s' % (syst, sName, region)) valuesUp = get_hist(upfile, '%s_%sUp' % (templateNames[sName], syst), obs=msd)[0] valuesDown = get_hist(upfile, '%s_%sDown' % (templateNames[sName], syst), obs=msd)[0] effectUp = np.ones_like(valuesNominal) effectDown = np.ones_like(valuesNominal) maskUp = (valuesUp >= 0) maskDown = (valuesDown >= 0) effectUp[mask & maskUp] = valuesUp[mask & maskUp]/valuesNominal[mask & maskUp] effectDown[mask & maskDown] = valuesDown[mask & maskDown]/valuesNominal[mask & maskDown] # do shape checks normUp = np.sum(valuesUp) normDown = np.sum(valuesDown) normNominal = np.sum(valuesNominal) probUp = valuesUp/normUp probDown = valuesDown/normDown probNominal = valuesNominal/normNominal shapeEffectUp = np.sum(np.abs(probUp - probNominal)/(np.abs(probUp)+np.abs(probNominal))) shapeEffectDown = np.sum(np.abs(probDown - probNominal)/(np.abs(probDown)+np.abs(probNominal))) logger = logging.getLogger("validate_shapes_{}_{}_{}".format(region, sName, syst)) valid = True if np.allclose(effectUp, 1.) and np.allclose(effectDown, 1.): valid = False logger.warning("No shape effect") elif np.allclose(effectUp, effectDown): valid = False logger.warning("Up is the same as Down, but different from nominal") elif np.allclose(effectUp, 1.) or np.allclose(effectDown, 1.): valid = False logger.warning("Up or Down is the same as nominal (one-sided)") elif shapeEffectUp < 0.001 and shapeEffectDown < 0.001: valid = False logger.warning("No genuine shape effect (just norm)") elif (normUp > normNominal and normDown > normNominal) or (normUp < normNominal and normDown < normNominal): valid = False logger.warning("Up and Down vary norm in the same direction") if valid: logger.info("Shapes are valid") logging.debug("nominal : {nominal}".format(nominal=valuesNominal)) logging.debug("effectUp : {effectUp}".format(effectUp=effectUp)) logging.debug("effectDown: {effectDown}".format(effectDown=effectDown)) sample.setParamEffect(syst_param_array[isyst], effectUp, effectDown) ch.addSample(sample) # data observed yields = templates['data'][0] data_obs = (yields, msd.binning, msd.name) ch.setObservation(data_obs) for passChName, failChName in regionPairs: logging.info('setting transfer factor for pass region %s, fail region %s' % (passChName, failChName)) failCh = model[failChName] passCh = model[passChName] # sideband fail initial_qcd = failCh.getObservation().astype(float) # was integer, and numpy complained about subtracting float from it for sample in failCh: if sample._name in [failChName+"_"+signalName for signalName in ac_signals.keys()]: continue logging.debug('subtracting %s from qcd' % sample._name) initial_qcd -= sample.getExpectation(nominal=True) if np.any(initial_qcd < 0.): raise ValueError("initial_qcd negative for some bins..", initial_qcd) sigmascale = 10 # to scale the deviation from initial scaledparams = initial_qcd * (1 + sigmascale/np.maximum(1., np.sqrt(initial_qcd)))**qcdparams # add samples fail_qcd = rl.ParametericSample(failChName+'_bbbb_boosted_ggf_qcd_datadriven', rl.Sample.BACKGROUND, msd, scaledparams) failCh.addSample(fail_qcd) pass_qcd = rl.TransferFactorSample(passChName+'_bbbb_boosted_ggf_qcd_datadriven', rl.Sample.BACKGROUND, tf_params_pass, fail_qcd) passCh.addSample(pass_qcd) with open(os.path.join(str(carddir), 'HHModel.pkl'), "wb") as fout: pickle.dump(model, fout, 2) # use python 2 compatible protocol logging.info('rendering combine model') model.renderCombine(os.path.join(str(carddir), 'HHModel'))
def test_rhalphabet(tmpdir): throwPoisson = True #False # experimental systematics lumi = rl.NuisanceParameter('CMS_lumi', 'lnN') jet_trigger = rl.NuisanceParameter('CMS_jet_trigger', 'lnN') jes = rl.NuisanceParameter('CMS_jes', 'lnN') jer = rl.NuisanceParameter('CMS_jer', 'lnN') ues = rl.NuisanceParameter('CMS_ues', 'lnN') btagWeight = rl.NuisanceParameter('CMS_btagWeight', 'lnN') btagEffStat = rl.NuisanceParameter('CMS_btagEffStat', 'lnN') # theory systematics pdf_weight = rl.NuisanceParameter('PDF_weight', 'shape') scale_ggF = rl.NuisanceParameter('scale_ggF', 'lnN') scale_VBF = rl.NuisanceParameter('scale_VBF', 'lnN') scale_VH = rl.NuisanceParameter('scale_VH', 'lnN') scale_ttH = rl.NuisanceParameter('scale_ttH', 'lnN') ps_weight = rl.NuisanceParameter('PS_weight', 'shape') tqqeffSF = rl.IndependentParameter('tqqeffSF', 1., 0, 20) tqqnormSF = rl.IndependentParameter('tqqnormSF', 1., 0, 20) ptbins = np.array([450, 1200]) npt = len(ptbins) - 1 msdbins = np.linspace(47, 201, 23) msd = rl.Observable('msd', msdbins) mjjbins = np.array([350, 1000, 4000]) nmjj = len(mjjbins) - 1 # here we derive these all at once with 2D array ptpts, msdpts = np.meshgrid(ptbins[:-1] + 0.3 * np.diff(ptbins), msdbins[:-1] + 0.5 * np.diff(msdbins), indexing='ij') rhopts = 2 * np.log(msdpts / ptpts) ptscaled = (ptpts - 450.) / (1200. - 450.) rhoscaled = (rhopts - (-6)) / ((-2.1) - (-6)) validbins = (rhoscaled >= 0) & (rhoscaled <= 1) rhoscaled[~validbins] = 1 # we will mask these out later # Build qcd MC pass+fail model and fit to polynomial qcdmodel = rl.Model("qcdmodel") qcdpass, qcdfail = 0., 0. for mjjbin in range(nmjj): failCh = rl.Channel("mjjbin%d%s" % (mjjbin, 'fail')) passCh = rl.Channel("mjjbin%d%s" % (mjjbin, 'pass')) qcdmodel.addChannel(failCh) qcdmodel.addChannel(passCh) # QCD templates from file failTempl = get_template("QCD", 0, mjjbin + 1, obs=msd, syst="nominal") # passTempl = get_template("QCD", 1, mjjbin + 1, obs=msd, syst="nominal") # failCh.setObservation(failTempl, read_sumw2=True) passCh.setObservation(passTempl, read_sumw2=True) qcdfail += sum([val[0] for val in failCh.getObservation()]) qcdpass += sum([val[0] for val in passCh.getObservation()]) qcdeff = qcdpass / qcdfail print("Inclusive P/F from Monte Carlo = " + str(qcdeff)) # initial values print("Initial fit values read from file initial_vals.csv") initial_vals = np.genfromtxt('initial_vals.csv') initial_vals = initial_vals.reshape(1, 3) print(initial_vals) tf_MCtempl = rl.BernsteinPoly("tf_MCtempl", (0, 2), ['pt', 'rho'], init_params=initial_vals, limits=(-20, 20)) tf_MCtempl_params = qcdeff * tf_MCtempl(ptscaled, rhoscaled) for mjjbin in range(nmjj): failCh = qcdmodel['mjjbin%dfail' % mjjbin] passCh = qcdmodel['mjjbin%dpass' % mjjbin] failObs = failCh.getObservation() passObs = passCh.getObservation() qcdparams = np.array([ rl.IndependentParameter('qcdparam_mjjbin%d_msdbin%d' % (mjjbin, i), 0) for i in range(msd.nbins) ]) sigmascale = 10. scaledparams = failObs * ( 1 + sigmascale / np.maximum(1., np.sqrt(failObs)))**qcdparams fail_qcd = rl.ParametericSample('mjjbin%dfail_qcd' % mjjbin, rl.Sample.BACKGROUND, msd, scaledparams[0]) failCh.addSample(fail_qcd) pass_qcd = rl.TransferFactorSample('mjjbin%dpass_qcd' % mjjbin, rl.Sample.BACKGROUND, tf_MCtempl_params[0, :], fail_qcd) passCh.addSample(pass_qcd) failCh.mask = validbins[0] passCh.mask = validbins[0] qcdfit_ws = ROOT.RooWorkspace('qcdfit_ws') simpdf, obs = qcdmodel.renderRoofit(qcdfit_ws) qcdfit = simpdf.fitTo( obs, ROOT.RooFit.Extended(True), ROOT.RooFit.SumW2Error(True), ROOT.RooFit.Strategy(2), ROOT.RooFit.Save(), ROOT.RooFit.Minimizer('Minuit2', 'migrad'), ROOT.RooFit.PrintLevel(1), ) qcdfit_ws.add(qcdfit) qcdfit_ws.writeToFile(os.path.join(str(tmpdir), 'testModel_qcdfit.root')) # Set parameters to fitted values allparams = dict(zip(qcdfit.nameArray(), qcdfit.valueArray())) for i, p in enumerate(tf_MCtempl.parameters.reshape(-1)): p.value = allparams[p.name] if qcdfit.status() != 0: raise RuntimeError('Could not fit qcd') param_names = [p.name for p in tf_MCtempl.parameters.reshape(-1)] decoVector = rl.DecorrelatedNuisanceVector.fromRooFitResult( tf_MCtempl.name + '_deco', qcdfit, param_names) tf_MCtempl.parameters = decoVector.correlated_params.reshape( tf_MCtempl.parameters.shape) tf_MCtempl_params_final = tf_MCtempl(ptscaled, rhoscaled) tf_dataResidual = rl.BernsteinPoly("tf_dataResidual", (0, 2), ['pt', 'rho'], limits=(-20, 20)) tf_dataResidual_params = tf_dataResidual(ptscaled, rhoscaled) tf_params = qcdeff * tf_MCtempl_params_final * tf_dataResidual_params # build actual fit model now model = rl.Model("testModel") # exclud QCD from MC samps samps = [ 'ggF', 'VBF', 'WH', 'ZH', 'ttH', 'ttbar', 'singlet', 'Zjets', 'Wjets', 'VV' ] sigs = ['VBF'] for mjjbin in range(nmjj): for region in ['pass', 'fail']: ch = rl.Channel("mjjbin%d%s" % (mjjbin, region)) model.addChannel(ch) isPass = region == 'pass' mjjnorm = 1. templates = {} for sName in samps: templates[sName] = get_template(sName, isPass, mjjbin + 1, obs=msd, syst="nominal") nominal = templates[sName][0] # expectations templ = templates[sName] stype = rl.Sample.SIGNAL if sName in sigs else rl.Sample.BACKGROUND sample = rl.TemplateSample(ch.name + '_' + sName, stype, templ) if sName != "QCD": sample.setParamEffect(lumi, 1.027) jet_trigger_up = syst_variation( get_template(sName, isPass, mjjbin + 1, obs=msd, syst="jet_triggerUp")[0], nominal) jet_trigger_down = syst_variation( get_template(sName, isPass, mjjbin + 1, obs=msd, syst="jet_triggerDown")[0], nominal) sample.setParamEffect(jet_trigger, jet_trigger_up, jet_trigger_down) jes_up = syst_variation( get_template(sName, isPass, mjjbin + 1, obs=msd, syst="JESUp")[0], nominal) jes_down = syst_variation( get_template(sName, isPass, mjjbin + 1, obs=msd, syst="JESDown")[0], nominal) sample.setParamEffect(jes, jes_up, jes_down) jer_up = syst_variation( get_template(sName, isPass, mjjbin + 1, obs=msd, syst="JERUp")[0], nominal) jer_down = syst_variation( get_template(sName, isPass, mjjbin + 1, obs=msd, syst="JERDown")[0], nominal) sample.setParamEffect(jer, jer_up, jer_down) ues_up = syst_variation( get_template(sName, isPass, mjjbin + 1, obs=msd, syst="UESUp")[0], nominal) ues_down = syst_variation( get_template(sName, isPass, mjjbin + 1, obs=msd, syst="UESDown")[0], nominal) sample.setParamEffect(ues, ues_up, ues_down) btagWeight_up = syst_variation( get_template(sName, isPass, mjjbin + 1, obs=msd, syst="btagWeightUp")[0], nominal) btagWeight_down = syst_variation( get_template(sName, isPass, mjjbin + 1, obs=msd, syst="btagWeightDown")[0], nominal) sample.setParamEffect(btagWeight, btagWeight_up, btagWeight_down) btagEffStat_up = syst_variation( get_template(sName, isPass, mjjbin + 1, obs=msd, syst="btagEffStatUp")[0], nominal) btagEffStat_down = syst_variation( get_template(sName, isPass, mjjbin + 1, obs=msd, syst="btagEffStatDown")[0], nominal) sample.setParamEffect(btagEffStat, btagEffStat_up, btagEffStat_down) if sName != "QCD": pdf_weight_up = syst_variation( get_template(sName, isPass, mjjbin + 1, obs=msd, syst="PDF_weightUp")[0], nominal) pdf_weight_down = syst_variation( get_template(sName, isPass, mjjbin + 1, obs=msd, syst="PDF_weightDown")[0], nominal) sample.setParamEffect(pdf_weight, pdf_weight_up, pdf_weight_down) if sName == "ggF": scale_up = syst_variation( get_template(sName, isPass, mjjbin + 1, obs=msd, syst="scalevar_7ptUp")[0], nominal) scale_down = syst_variation( get_template(sName, isPass, mjjbin + 1, obs=msd, syst="scalevar_7ptDown")[0], nominal) sample.setParamEffect(scale_ggF, scale_up, scale_down) if sName == "VBF": scale_up = syst_variation( get_template(sName, isPass, mjjbin + 1, obs=msd, syst="scalevar_3ptUp")[0], nominal) scale_down = syst_variation( get_template(sName, isPass, mjjbin + 1, obs=msd, syst="scalevar_3ptDown")[0], nominal) sample.setParamEffect(scale_VBF, scale_up, scale_down) if sName == "VH": scale_up = syst_variation( get_template(sName, isPass, mjjbin + 1, obs=msd, syst="scalevar_3ptUp")[0], nominal) scale_down = syst_variation( get_template(sName, isPass, mjjbin + 1, obs=msd, syst="scalevar_3ptDown")[0], nominal) sample.setParamEffect(scale_VH, scale_up, scale_down) if sName == "ttH": scale_up = syst_variation( get_template(sName, isPass, mjjbin + 1, obs=msd, syst="scalevar_7ptUp")[0], nominal) scale_down = syst_variation( get_template(sName, isPass, mjjbin + 1, obs=msd, syst="scalevar_7ptDown")[0], nominal) sample.setParamEffect(scale_ttH, scale_up, scale_down) ch.addSample(sample) data_obs = get_template("data", isPass, mjjbin + 1, obs=msd, syst="nominal") ch.setObservation(data_obs, read_sumw2=True) # drop bins outside rho validity mask = validbins[0] # blind bins 11, 12, 13 # mask[11:14] = False # ch.mask = mask for mjjbin in range(nmjj): failCh = model['mjjbin%dfail' % mjjbin] passCh = model['mjjbin%dpass' % mjjbin] qcdparams = np.array([ rl.IndependentParameter('qcdparam_mjjbin%d_msdbin%d' % (mjjbin, i), 0) for i in range(msd.nbins) ]) initial_qcd = failCh.getObservation()[0].astype( float ) # was integer, and numpy complained about subtracting float from it for sample in failCh: initial_qcd -= sample.getExpectation(nominal=True) if np.any(initial_qcd < 0.): raise ValueError("initial_qcd negative for some bins..", initial_qcd) sigmascale = 10 # to scale the deviation from initial scaledparams = initial_qcd * ( 1 + sigmascale / np.maximum(1., np.sqrt(initial_qcd)))**qcdparams fail_qcd = rl.ParametericSample('mjjbin%dfail_qcd' % mjjbin, rl.Sample.BACKGROUND, msd, scaledparams) failCh.addSample(fail_qcd) pass_qcd = rl.TransferFactorSample('mjjbin%dpass_qcd' % mjjbin, rl.Sample.BACKGROUND, tf_params[0, :], fail_qcd) passCh.addSample(pass_qcd) tqqpass = passCh['ttbar'] tqqfail = failCh['ttbar'] tqqPF = tqqpass.getExpectation( nominal=True).sum() / tqqfail.getExpectation(nominal=True).sum() tqqpass.setParamEffect(tqqeffSF, 1 * tqqeffSF) tqqfail.setParamEffect(tqqeffSF, (1 - tqqeffSF) * tqqPF + 1) tqqpass.setParamEffect(tqqnormSF, 1 * tqqnormSF) tqqfail.setParamEffect(tqqnormSF, 1 * tqqnormSF) # Fill in muon CR templates = {} samps = ['ttbar', 'QCD', 'singlet', 'Zjets', 'Wjets', 'VV'] for region in ['pass', 'fail']: ch = rl.Channel("muonCR%s" % (region, )) model.addChannel(ch) isPass = region == 'pass' for sName in samps: templates[sName] = get_template_muonCR(sName, isPass, obs=msd) stype = rl.Sample.BACKGROUND sample = rl.TemplateSample(ch.name + '_' + sName, stype, templates[sName]) ch.addSample(sample) data_obs = get_template_muonCR("muondata", isPass, obs=msd) ch.setObservation(data_obs, read_sumw2=True) tqqpass = model['muonCRpass_ttbar'] tqqfail = model['muonCRfail_ttbar'] tqqPF = tqqpass.getExpectation( nominal=True).sum() / tqqfail.getExpectation(nominal=True).sum() tqqpass.setParamEffect(tqqeffSF, 1 * tqqeffSF) tqqfail.setParamEffect(tqqeffSF, (1 - tqqeffSF) * tqqPF + 1) tqqpass.setParamEffect(tqqnormSF, 1 * tqqnormSF) tqqfail.setParamEffect(tqqnormSF, 1 * tqqnormSF) with open(os.path.join(str(tmpdir), 'testModel.pkl'), "wb") as fout: pickle.dump(model, fout) model.renderCombine(os.path.join(str(tmpdir), 'testModel'))
def jet_mass_producer(configs=None): """ configs: configuration dict including: ModelName,gridHistFileName,channels,histLocation -> channels: dict with dict for each channels: -> includes histDir,samples,NormUnc,signal,regions,QcdEstimation """ rebin_msd = True # min_msd, max_msd = (50,210) # binwidth = 16 # nbins = int(np.floor((max_msd-min_msd)/binwidth)) # msd_bins = np.linspace(min_msd, nbins*binwidth+min_msd, nbins+1) min_msd, max_msd = (50, 190) binwidth = 4 nbins = int(np.floor((max_msd - min_msd) / binwidth)) print(nbins) msd_bins = np.linspace(min_msd, nbins * binwidth + min_msd, nbins + 1) print(msd_bins) #channels for combined fit channels = configs['channels'] qcd_estimation_channels = { k: v for k, v in channels.items() if "QcdEstimation" in v and v["QcdEstimation"] == "True" } print('channels:', channels.keys()) #getting path of dir with root file from config hist_file = ROOT.TFile(configs['histLocation']) do_qcd_estimation = len(qcd_estimation_channels) > 0 #specify if QCD estimation (using Bernstein-polynomial as TF) should be used ################ #QCD Estimation# ################ # derive pt bins from channel names for the pt,rho grid for the Bernstein-Polynomial if (do_qcd_estimation): # qcd_eff = get_qcd_efficiency(configs['histLocation'], w_channels) qcd_model = rl.Model('qcd_helper') qcd_pass, qcd_fail = 0., 0. for channel_name, config in qcd_estimation_channels.items(): fail_ch = rl.Channel(channel_name + 'fail') pass_ch = rl.Channel(channel_name + 'pass') qcd_model.addChannel(fail_ch) qcd_model.addChannel(pass_ch) fail_hist = hist_file.Get('W_QCD__mjet_' + config['pt_bin'] + '_fail') pass_hist = hist_file.Get('W_QCD__mjet_' + config['pt_bin'] + '_pass') if (rebin_msd > 0): fail_hist = fail_hist.Rebin(len(msd_bins) - 1, 'msd', msd_bins) pass_hist = pass_hist.Rebin(len(msd_bins) - 1, 'msd', msd_bins) fail_ch.setObservation(fail_hist) pass_ch.setObservation(pass_hist) qcd_fail += fail_ch.getObservation().sum() qcd_pass += pass_ch.getObservation().sum() qcd_eff = qcd_pass / qcd_fail #get all lower edges from channel names pt_edges = [ float(channel.split('Pt')[-1]) for channel in qcd_estimation_channels ] #get last upper edge from name of last channel pt_edges.append( float(channels[ list(qcd_estimation_channels.keys())[-1].split('Pt')[0] + 'Pt%i' % pt_edges[-1]]['pt_bin'].split('to')[-1])) pt_bins = np.array(pt_edges) # pt_bins = np.array([500, 550, 600, 675, 800, 1200]) n_pt = len(pt_bins) - 1 msd = rl.Observable('msd', msd_bins) # here we derive these all at once with 2D array ptpts, msdpts = np.meshgrid(pt_bins[:-1] + 0.3 * np.diff(pt_bins), msd_bins[:-1] + 0.5 * np.diff(msd_bins), indexing='ij') rhopts = 2 * np.log(msdpts / ptpts) ptscaled = (ptpts - 500.) / (1200. - 500.) rhoscaled = (rhopts - (-6)) / ((-2.1) - (-6)) validbins = (rhoscaled >= 0) & (rhoscaled <= 1) rhoscaled[~validbins] = 1 # we will mask these out later #get name from config, or fall back to default if ('ModelName' in configs): model_name = configs['ModelName'] else: model_name = 'Jet_Mass_Model' #Reading categories of consituent-variations for nuisance paramters from gridHist grid_hist_file_name = configs['gridHistFileName'] print('reading grid for nuisance parameter:') grid_hist_file = ROOT.TFile(grid_hist_file_name, 'READ') grid_hist = grid_hist_file.Get('grid') grid_axes = dict(item.strip().split("=") for item in grid_hist.GetTitle().split(",")) x_bins = range(grid_hist.GetNbinsX()) y_bins = range(grid_hist.GetNbinsY()) categories_hist = grid_hist_file.Get('categories') particle_categories = [] for i in range(1, categories_hist.GetNbinsX() + 1): particle_categories.append(categories_hist.GetXaxis().GetBinLabel(i)) grid_hist_file.Close() print('used variation categories:', particle_categories) print('X: %s , %i bins' % (grid_axes['x'], len(x_bins))) print('Y: %s , %i bins' % (grid_axes['y'], len(y_bins))) #setting up rhalphalib roofit model model = rl.Model(model_name) #setting up nuisances correspondig to consituent-variation according to categories from grid grid_nuisances = [] print('adding nuisance paramters:') for category in particle_categories: for x_bin in x_bins: for y_bin in y_bins: print( 'massScale_%s%i_%s%i_%s' % (grid_axes['x'], x_bin, grid_axes['y'], y_bin, category), 'shape') grid_nuisances.append([ rl.NuisanceParameter( 'massScale_%s%i_%s%i_%s' % (grid_axes['x'], x_bin, grid_axes['y'], y_bin, category), 'shape'), x_bin, y_bin, category ]) #setting up nuisances for systematic uncertainties print('CMS_lumi', 'lnN') lumi = rl.NuisanceParameter('CMS_lumi', 'lnN') lumi_effect = 1.027 norm_nuisances = {} for channel_name in channels.keys(): for i, sample in enumerate(channels[channel_name]['samples']): norm_uncertainties = channels[channel_name]['NormUnc'] for name, norm_unc in norm_uncertainties.items(): nuisance_par = [ rl.NuisanceParameter(name + '_normUnc', 'lnN'), norm_unc ] for k, v in norm_nuisances.items(): if name in v[0].name: nuisance_par = v if norm_unc > 0 and name in sample and sample not in norm_nuisances: norm_nuisances.update({sample: nuisance_par}) for channel_name, config in channels.items(): print('setting up channel:', channel_name) #using hists with /variable/ in their name (default: Mass, if defined get from config) variable = 'mjet' if 'variable' not in config else config['variable'] #getting list of samples from config samples = config['samples'] #for WMass fit there are multiple regions per sample regions = [''] if 'regions' not in config else config['regions'] print('getting template of variable:', variable) print('samples:', samples) print('regions:', regions) for region in regions: region_suffix = '_' + region if len(region) > 0 else '' hist_dir = config[ 'selection'] + '_%s__' + variable + '_%s' + config[ 'pt_bin'] + region_suffix print('hist_dir:', hist_dir) #setting up channel for fit (name must be unique and can't include any '_') region_name = channel_name + region ch = rl.Channel(region_name) model.addChannel(ch) print('rl.Channel:', ch) for sample_name in samples: #do not include QCD template here, but rather use qcd estimation below if (('QcdEstimation' in config and config['QcdEstimation'] == 'True') and 'qcd' in sample_name.lower()): continue #specify if sample is signal or background type sample_type = rl.Sample.SIGNAL if sample_name in config[ 'signal'] else rl.Sample.BACKGROUND sample_hist = hist_file.Get(hist_dir % (sample_name, "")) sample_hist.SetName('msd') #rebin hist if (rebin_msd > 0): sample_hist = sample_hist.Rebin( len(msd_bins) - 1, 'msd', msd_bins) #setup actual rhalphalib sample sample = rl.TemplateSample(ch.name + '_' + sample_name, sample_type, sample_hist) #setting effects of constituent variation nuisances (up/down) for grid_nuisance, x, y, category in grid_nuisances: hist_up = hist_file.Get(hist_dir % (sample_name, str(x) + '_' + str(y) + '_' + category + '_') + '__up') hist_down = hist_file.Get(hist_dir % (sample_name, str(x) + '_' + str(y) + '_' + category + '_') + '__down') #rebin hists if (rebin_msd > 0): hist_up = hist_up.Rebin( len(msd_bins) - 1, 'msd', msd_bins) hist_down = hist_down.Rebin( len(msd_bins) - 1, 'msd', msd_bins) sample.setParamEffect(grid_nuisance, hist_up, hist_down) sample.setParamEffect(lumi, lumi_effect) if sample_name in norm_nuisances.keys(): sample.setParamEffect(norm_nuisances[sample_name][0], norm_nuisances[sample_name][1]) ch.addSample(sample) if 'Pseudo' in configs: data_hist = build_pseudo(samples, hist_file, hist_dir, configs['Pseudo']) else: data_hist = hist_file.Get(hist_dir % ("Data", "")) if (rebin_msd > 0): data_hist = data_hist.Rebin(len(msd_bins) - 1, 'msd', msd_bins) data_hist.SetName('msd') ch.setObservation(data_hist) if ('QcdEstimation' in config and config['QcdEstimation'] == 'True'): mask = validbins[np.where( pt_bins == float(channel_name.split('Pt')[-1]))[0][0]] dropped_events = np.sum( ch.getObservation().astype(float)[~mask]) percentage = dropped_events / np.sum( ch.getObservation().astype(float)) print( 'dropping due to mask: %.2f events (out of %.2f -> %.2f%%)' % (dropped_events, np.sum( ch.getObservation().astype(float)), percentage * 100)) ch.mask = mask if (do_qcd_estimation): #QCD TF tf_params = rl.BernsteinPoly('tf_params', (2, 2), ['pt', 'rho'], limits=(-10, 10)) print( 'Using QCD efficiency (N2-ddt) of %.2f%% to scale initial QCD in pass region' % (qcd_eff * 100)) tf_params = qcd_eff * tf_params(ptscaled, rhoscaled) for channel_name, config in channels.items(): if ('QcdEstimation' not in config or config['QcdEstimation'] == "False"): continue print(channel_name, 'qcd estimation') fail_ch = model[channel_name + 'fail'] pass_ch = model[channel_name + 'pass'] ptbin = np.where( pt_bins == float(channel_name.split('Pt')[-1]))[0][0] qcd_params = np.array([ rl.IndependentParameter( 'qcdparam_ptbin%i_msdbin%i' % (ptbin, i), 0) for i in range(msd.nbins) ]) initial_qcd = fail_ch.getObservation().astype(float) for sample in fail_ch: initial_qcd -= sample.getExpectation(nominal=True) if np.any(initial_qcd < 0.): raise ValueError( 'inital qcd (fail qcd from data - mc) negative at least one bin' ) sigmascale = 10. scaledparams = initial_qcd * ( 1 + sigmascale / np.maximum(1., np.sqrt(initial_qcd)))**qcd_params fail_qcd = rl.ParametericSample('%sfail_qcd' % channel_name, rl.Sample.BACKGROUND, msd, scaledparams) fail_ch.addSample(fail_qcd) pass_qcd = rl.TransferFactorSample('%spass_qcd' % channel_name, rl.Sample.BACKGROUND, tf_params[ptbin, :], fail_qcd) pass_ch.addSample(pass_qcd) model.renderCombine(model_name)