def __init__(self, f_bkg, f_mig, f_eff, f_data, fb = 1.0, regMode = ROOT.TUnfold.kRegModeDerivative, normMode = 0): self.f_bkg = f_bkg self.tunfolder_reg = getTUnfolder(f_bkg, f_mig, f_eff, f_data, regMode = regMode, normMode = normMode) self.f_eff = f_eff self.fb = fb self.f_bkg_noerr = H1D(self.f_bkg) for k in range(0, len(self.f_bkg_noerr.err)): self.f_bkg_noerr.err[k] = 0 self.f_eff_noerr = H1D(self.f_eff) for k in range(0, len(self.f_eff_noerr.err)): self.f_eff_noerr.err[k] = 0
def getHistogramsFromPkl(fname = "histograms.pkl", direc = "A"): m = None with open(fname, 'rb') as inp: model = pickle.load(inp) while model != None: if model["name"] != direc: model = pickle.load(inp) continue m = model break truth = m["truth"] recoWithFakes = m["reco"] # input assumed to have reco in X axis and truth in Y, so transpose it to the truth in X axis convention mig = m["mig"].T() # fakes bkg = m["bkg"] tr_1dtruth = mig.project('x') nrt = truth - tr_1dtruth ones = H1D(np.ones(len(nrt.val))) ones.err = copy.deepcopy(np.zeros(len(nrt.val))) ones.err_up = copy.deepcopy(np.zeros(len(nrt.val))) ones.err_dw = copy.deepcopy(np.zeros(len(nrt.val))) ones.x = copy.deepcopy(nrt.x) ones.x_err = copy.deepcopy(nrt.x_err) eff = ones + nrt.divideBinomial(truth)*(-1.0) #eff = mig.project('x').divideBinomial(truth) return [truth, recoWithFakes, bkg, mig, eff, nrt]
def __call__(self, tau, data): dataMinusBkg = (data - self.f_bkg_noerr).toROOT("data_minus_bkg_tmp") dataMinusBkg.SetDirectory(0) self.tunfolder_reg.SetInput(dataMinusBkg, self.fb) self.tunfolder_reg.DoUnfold(tau) tmp = self.tunfolder_reg.GetOutput("tunfold_result_tmp") tmp.SetDirectory(0) tunfold_mig = H1D(tmp) tunfold_result = tunfold_mig/self.f_eff_noerr del tmp del dataMinusBkg return tunfold_result
def getTUnfolder(bkg, mig, eff, data, regMode=None, normMode=None): if regMode == None: regMode = ROOT.TUnfold.kRegModeDerivative if normMode == None: normMode = ROOT.TUnfold.kEConstraintArea tunfolder = ROOT.TUnfoldDensity(mig.T().toROOT("tunfold_mig"), ROOT.TUnfold.kHistMapOutputVert, regMode, normMode, ROOT.TUnfoldDensity.kDensityModeeNone) bkg_noerr = H1D(bkg) for k in range(0, len(bkg.err)): bkg_noerr.err[k] = 0 dataBkgSub = data - bkg_noerr tunfolder.SetInput(dataBkgSub.toROOT("data_minus_bkg"), 0) return tunfolder
def getHistograms(fname = "out_ttallhad_psrw_Syst.root", direc = "nominal", variable = "mttCoarse"): L = luminosity f = ROOT.TFile.Open(fname) truth = L*H1D(f.Get("%s/%s" % (direc, "unfoldPart_%s" % variable))) recoWithFakes = L*H1D(f.Get("%s/%s" % (direc, "unfoldReco_%s_cat2b2HTTmasscut" % variable))) # input assumed to have reco in X axis and truth in Y, so transpose it to the truth in X axis convention mig = L*H2D(f.Get("%s/%s" % (direc, "unfoldMigRecoPart_%s_cat2b2HTTmasscut" % variable))).T() # fakes bkg = L*H1D(f.Get("%s/%s" % (direc, "unfoldRecoNotPart_%s_cat2b2HTTmasscut" % variable))) #bkg = bkg + other bkgs!!! FIXME tr_1dtruth = mig.project('x') nrt = truth - tr_1dtruth ones = H1D(np.ones(len(nrt.val))) ones.err = copy.deepcopy(np.zeros(len(nrt.val))) ones.x = copy.deepcopy(nrt.x) ones.x_err = copy.deepcopy(nrt.x_err) eff = ones + nrt.divideBinomial(truth)*(-1.0) #eff = mig.project('x').divideBinomial(truth) return [truth, recoWithFakes, bkg, mig, eff, nrt]
def __call__(self, tau, data): #f_truth, f_recoWithFakes, f_bkg, f_mig, f_eff, f_nrt = getHistograms("out_ttallhad_psrw_Syst.root", "nominal", "mttAsymm") #tunfolder_reg = getTUnfolder(f_bkg, f_mig, data, regMode = ROOT.TUnfold.kRegModeDerivative) dataMinusBkg = (data - self.f_bkg_noerr).toROOT("data_minus_bkg_tmp") dataMinusBkg.SetDirectory(0) self.tunfolder_reg.SetInput(dataMinusBkg, self.fb) self.tunfolder_reg.DoUnfold(tau) tmp = self.tunfolder_reg.GetOutput("tunfold_result_tmp") tmp.SetDirectory(0) tunfold_mig = H1D(tmp) tunfold_result = tunfold_mig / self.f_eff_noerr del tmp del dataMinusBkg return tunfold_result
def getDAgostini(bkg, mig, eff, data, nIter=1): reco = (mig.project('y') + bkg).toROOT("reco_rp") reco.SetDirectory(0) truth = (mig.project('x') / eff).toROOT("truth_p") truth.SetDirectory(0) m = mig.T().toROOT("m") m.SetDirectory(0) unf_response = ROOT.RooUnfoldResponse(reco, truth, m) dataBkgSub = data # - bkg dd = dataBkgSub.toROOT("dataBkgSub_dagostini") dd.SetDirectory(0) dagostini = ROOT.RooUnfoldBayes(unf_response, dd, int(nIter)) dagostini.SetVerbose(-1) dagostini_hreco = dagostini.Hreco() dagostini_hreco.SetDirectory(0) del dagostini del unf_response del m r = H1D(dagostini_hreco) del dagostini_hreco return r
def generateHistograms(fname="histograms.pkl"): Nev = 400000 wL = 100.0 # generate wL times more events than we expect in data # number of truth bins xt = 0.5 * np.exp(np.arange(0, 12, 1) * 0.15) xt_err = np.diff(xt) * 0.5 xt = xt[:-1] xt += xt_err Nt = len(xt) # number of reco bins xf = 0.5 * np.exp(np.arange(0, 24, 1) * 0.15 * 0.5) xf_err = np.diff(xf) * 0.5 xf = xf[:-1] xf += xf_err Nr = len(xf) e = {} b = {} a = {} b = {} e["A"] = [0.40 for x in range(0, Nt)] e["B"] = [0.42 for x in range(0, Nt)] e["C"] = [0.38 for x in range(0, Nt)] a["A"] = 0.20 a["B"] = 0.25 a["C"] = 0.15 b["A"] = 0.02 b["B"] = 0.03 b["C"] = 0.01 #a["A"] = 0 #a["B"] = 0 #a["C"] = 0 #b["A"] = 0 #b["B"] = 0 #b["C"] = 0 gs = GenerateSample(minMass=0.5, sqrts=7) truth = {} reco = {} mig = {} bkg = {} truth2 = {} reco2 = {} mig2 = {} bkg2 = {} for direc in ["A", "B", "C"]: truth[direc] = H1D(np.zeros(Nt)) truth[direc].x = xt truth[direc].x_err = xt_err mig[direc] = H2D(np.zeros((Nt, Nr))) mig[direc].x = xt mig[direc].x_err = xt_err mig[direc].y = xf mig[direc].y_err = xf_err reco[direc] = H1D(np.zeros(Nr)) reco[direc].x = xf reco[direc].x_err = xf_err bkg[direc] = H1D(np.zeros(Nr)) bkg[direc].x = xf bkg[direc].x_err = xf_err for i in range(0, Nr): bkg[direc].err[i] = 0 for k in range(0, int(Nev * wL)): O = gs.sample() w = 1.0 / wL # implement overflow bin O_over = O if O_over > xt[-1] + xt_err[-1]: O_over = xt[-1] # guarantee same truth histogram for all # do not histogram events below lowest bin (ie: do not plot underflow) # we assume this is the boundary of the fiducial region # but use those low O events in the migration model, as they can be smeared in if O_over > xt[0] - xt_err[0]: for direc in ["A", "B", "C"]: bt = truth[direc].fill(O_over, w) # migration model for direc in ["A", "B", "C"]: dm = O * (a[direc] / np.sqrt(O) + b[direc]) Or = O + np.random.normal(0, dm) # if reco-level bin is below lowest bin, reject it # this effect is considered in the efficiency later if Or < xf[0] - xf_err[0]: continue # implement overflow bin if Or > xf[-1] + xf_err[-1]: Or = xf[-1] # implement efficiency if np.random.uniform(0, 1) > e[direc][bt]: continue mig[direc].fill(O_over, Or, w) br = reco[direc].fill(Or, w) reco[direc] = reco[direc] + bkg[direc] with open(fname, 'wb') as output: for direc in ["A", "B", "C"]: model = {} model["name"] = direc model["mig"] = mig[direc].T() model["truth"] = truth[direc] model["reco"] = reco[direc] model["bkg"] = bkg[direc] pickle.dump(model, output, pickle.HIGHEST_PROTOCOL)
recoWithoutFakes = {} bkg = {} bkg_noerr = {} mig = {} eff = {} eff_noerr = {} nrt = {} truth["A"], recoWithFakes["A"], bkg["A"], mig["A"], eff["A"], nrt["A"] = getHistograms(direc = "A") truth["B"], recoWithFakes["B"], bkg["B"], mig["B"], eff["B"], nrt["B"] = getHistograms(direc = "B") #truth["C"], recoWithFakes["C"], bkg["C"], mig["C"], eff["C"], nrt["C"] = getHistograms("histograms.pkl", "C") for i in recoWithFakes: recoWithoutFakes[i] = mig[i].project("y") bkg_noerr[i] = H1D(bkg[i]) for k in range(0, len(bkg_noerr[i].err)): bkg_noerr[i].err[k] = 0 eff_noerr[i] = H1D(eff[i]) for k in range(0, len(eff_noerr[i].err)): eff_noerr[i].err[k] = 0 # generate perfect fake data data = recoWithFakes["A"] # generate fake data from model pseudo_data = getDataFromModel(bkg["A"], mig["A"], eff["A"]) # functor to unfold class TUnfoldForRegularizationTest:
from Unfolder.ComparisonHelpers import * from Unfolder.Unfolder import Unfolder from Unfolder.Histogram import H1D, H2D, plotH1D, plotH2D from readHistograms import * sns.set(context="paper", style="whitegrid", font_scale=1.1) varname = "observable" extension = "eps" # get histograms from file truth, recoWithFakes, bkg, mig, eff, nrt = getHistograms(direc="A") recoWithoutFakes = mig.project("y") eff_noerr = H1D(eff) for k in range(0, len(eff_noerr.err)): eff_noerr.err[k] = 0 bkg_noerr = H1D(bkg) for k in range(0, len(bkg_noerr.err)): bkg_noerr.err[k] = 0 # generate fake data data = recoWithFakes # Create unfolding class m = Unfolder(bkg, mig, eff, truth) m.setUniformPrior() #m.setGaussianPrior() #m.setCurvaturePrior()
def getHistogramsFromJson(fname = "toyModel/ModelChrisSmallVar.json", direc = "A"): parsed_json = json.load(open(fname)) # initial (real) data --> this actually depends on the model used # it will be changed below recoWithFakes = H1D(np.asarray(parsed_json["Data"])) # this is the response matrix, which is response = eff(truth = i) * P(reco = j|truth = i) = eff(truth = i) * P(t = i, r = j) / P(t = i) # the migration matrix is eff(truth = i) * P(t=i,r=j) = response(t = i, r = j) * P(t = i) if direc == "A": resp = H2D(np.asarray(parsed_json["Nominal"]["Mig"], dtype = np.float64)) resp.err = np.zeros(shape = resp.val.shape) resp.err_up = np.zeros(shape = resp.val.shape) resp.err_dw = np.zeros(shape = resp.val.shape) else: resp = H2D(np.asarray(parsed_json["ModelVars"]["resolution"]["Variation"]["Mig"], dtype = np.float64)) resp.err = np.zeros(shape = resp.val.shape) resp.err_up = np.zeros(shape = resp.val.shape) resp.err_dw = np.zeros(shape = resp.val.shape) # input assumed to have reco in Y axis and truth in X # from here on Ntruth = resp.val.shape[0] Nreco = resp.val.shape[1] # no background for now bkg = H1D(np.zeros(Nreco, dtype = np.float64)) #print("Response matrix") #print(resp.val) # sum of response matrix in the reco rows gives the efficiency eff = H1D(np.zeros(Ntruth, dtype = np.float64)) for itruth in range(Ntruth): for ireco in range(Nreco): eff.val[itruth] += resp.val[itruth, ireco] #print("Efficiency:") #print(eff.val) # get response matrix assuming efficiency = 1 resp_noeff = copy.deepcopy(resp) for itruth in range(Ntruth): for ireco in range(Nreco): resp_noeff.val[itruth, ireco] /= eff.val[itruth] #print("Response matrix/eff: ") #print(resp_noeff.val) truth_a = [] for i in range(Ntruth): truth_a.append(parsed_json["ModelVars"]["truthbin%d" % i]["InitialValue"]) truth = H1D(np.asarray(truth_a, dtype = np.float64)) truth.err = np.zeros(shape = truth.val.shape) truth.err_up = np.zeros(shape = truth.val.shape) truth.err_dw = np.zeros(shape = truth.val.shape) #print("Truth: ", truth.val) # get migration matrix, by multiplying by P(truth=i) mig = copy.deepcopy(resp) for itruth in range(Ntruth): for ireco in range(Nreco): mig.val[itruth, ireco] *= truth.val[itruth] #print("Migration matrix: ") #print(mig.val) recoWithFakes_a = [] for ireco in range(Nreco): recoWithFakes_a.append(0) for itruth in range(Ntruth): recoWithFakes_a[-1] += resp.val[itruth, ireco]*truth.val[itruth] recoWithFakes = H1D(np.asarray(recoWithFakes_a, dtype = np.float64)) for ireco in range(Nreco): recoWithFakes.err[ireco] = 0 tr_1dtruth = mig.project('x') tr_1dtruth.err = np.zeros(shape = tr_1dtruth.val.shape) tr_1dtruth.err_up = np.zeros(shape = tr_1dtruth.val.shape) tr_1dtruth.err_dw = np.zeros(shape = tr_1dtruth.val.shape) nrt = truth - tr_1dtruth ones = H1D(np.ones(len(nrt.val))) ones.err = copy.deepcopy(np.zeros(len(nrt.val))) ones.err_up = copy.deepcopy(np.zeros(len(nrt.val))) ones.err_dw = copy.deepcopy(np.zeros(len(nrt.val))) ones.x = copy.deepcopy(nrt.x) ones.x_err = copy.deepcopy(nrt.x_err) eff = ones + nrt.divideBinomial(truth)*(-1.0) #eff = mig.project('x').divideBinomial(truth) return [truth, recoWithFakes, bkg, mig, eff, nrt]
def scanRegParameter(unfoldFunction, bkg, mig, eff, truth, N=1000, rangeAlpha=np.arange(0.0, 1.0, 1e-3), fname="scanRegParameter.png", fname_chi2="scanRegParameter_chi2.png", fname_norm="scanRegParameter_norm.png"): bias = np.zeros(len(rangeAlpha)) bias_std = np.zeros(len(rangeAlpha)) bias_chi2 = np.zeros(len(rangeAlpha)) bias_norm = np.zeros(len(rangeAlpha)) bias_norm_std = np.zeros(len(rangeAlpha)) bias_syst = np.zeros(len(rangeAlpha)) minBias = 1e10 bestAlpha = 0 bestChi2 = 0 bestI = 0 import sys for i in range(0, len(rangeAlpha)): #if i % 100 == 0: print("scanRegParameter: parameter = ", rangeAlpha[i], " / ", rangeAlpha[-1]) sys.stdout.flush() bias[i], bias_std[i], bias_chi2[i], bias_norm[i], bias_norm_std[ i], bias_syst[i] = getBiasFromToys(unfoldFunction, rangeAlpha[i], N, bkg, mig, eff, truth) print(" -- --> scanRegParameter: parameter = ", rangeAlpha[i], " / ", rangeAlpha[-1], " with chi2 = ", bias_chi2[i], ", mean and std = ", bias[i], bias_std[i]) if np.abs(bias_chi2[i] - 0.5) < minBias: minBias = np.abs(bias_chi2[i] - 0.5) bestAlpha = rangeAlpha[i] bestChi2 = bias_chi2[i] bestI = i fig = plt.figure(figsize=(10, 10)) plt_bias = H1D(bias) plt_bias.val = bias plt_bias.err = np.zeros(len(rangeAlpha)) plt_bias.x = rangeAlpha plt_bias.x_err = np.zeros(len(rangeAlpha)) plt_bias_e = H1D(bias) plt_bias_e.val = bias_std plt_bias_e.err = np.zeros(len(rangeAlpha)) plt_bias_e.x = rangeAlpha plt_bias_e.x_err = np.zeros(len(rangeAlpha)) plt_bias_syst = H1D(bias) plt_bias_syst.val = bias_syst plt_bias_syst.err = np.zeros(len(rangeAlpha)) plt_bias_syst.x = rangeAlpha plt_bias_syst.x_err = np.zeros(len(rangeAlpha)) #plotH1DLines({r"$E_{\mathrm{bins}}[|E_{\mathrm{toys}}[\mathrm{bias}]|]$": plt_bias, r"$E_{\mathrm{bins}}[\sqrt{\mathrm{Var}_{\mathrm{toys}}[\mathrm{bias}]}]$": plt_bias_e, r"$E_{\mathrm{bins}}[|\mathrm{only \;\; syst. \;\; bias}|]$": plt_bias_syst}, "Regularization parameter", "Bias", "", fname) plotH1DLines( { r"$E_{\mathrm{bins}}[|E_{\mathrm{toys}}[\mathrm{bias}]|]$": plt_bias, r"$E_{\mathrm{bins}}[\sqrt{\mathrm{Var}_{\mathrm{toys}}[\mathrm{bias}]}]$": plt_bias_e }, "Regularization parameter", "Bias", "", fname) plt_bias_norm = H1D(bias) plt_bias_norm.val = bias_norm plt_bias_norm.err = np.power(bias_norm_std, 2) plt_bias_norm.x = rangeAlpha plt_bias_norm.x_err = np.zeros(len(rangeAlpha)) plt_bias_norm_e = H1D(bias) plt_bias_norm_e.val = bias_norm_std plt_bias_norm_e.err = np.zeros(len(rangeAlpha)) plt_bias_norm_e.x = rangeAlpha plt_bias_norm_e.x_err = np.zeros(len(rangeAlpha)) plotH1DLines( { r"$E_{\mathrm{toys}}[\mathrm{norm. \;\; bias}]$": plt_bias_norm, r"$\sqrt{\mathrm{Var}_{\mathrm{toys}}[\mathrm{norm. \;\; bias}]}$": plt_bias_norm_e }, "Regularization parameter", "Normalisation bias", "", fname_norm) plt_bias_chi2 = H1D(bias_chi2) plt_bias_chi2.val = bias_chi2 plt_bias_chi2.err = np.ones(len(rangeAlpha)) * np.sqrt( float(len(truth.val)) / float(N) ) # error in chi^2 considering errors in the mean of std/sqrt(N) plt_bias_chi2.x = rangeAlpha plt_bias_chi2.x_err = np.zeros(len(rangeAlpha)) plt_cte = H1D(plt_bias_chi2) plt_cte.val = 0.5 * np.ones(len(rangeAlpha)) plt_cte.err = np.zeros(len(rangeAlpha)) plotH1DLines( { r"$E_{\mathrm{bins}}[E_{\mathrm{toys}}[\mathrm{bias}]^2/\mathrm{Var}_{\mathrm{toys}}[\mathrm{bias}]]$": plt_bias_chi2, "0.5": plt_cte }, "Regularisation parameter", r"Bias $\mathrm{mean}^2/\mathrm{variance}$", "", fname_chi2) return [ bestAlpha, bestChi2, bias[bestI], bias_std[bestI], bias_norm[bestI], bias_norm_std[bestI] ]
def getDataFromModel(bkg, mig, eff): truth = mig.project('x') / eff response_noeff = H2D(mig) # = P(r|t) = Mtr/sum_k=1^Nr Mtk for i in range(0, mig.shape[0]): # for each truth bin rsum = 0.0 for j in range(0, mig.shape[1]): # for each reco bin rsum += mig.val[ i, j] # calculate the sum of all reco bins in the same truth bin for j in range(0, mig.shape[1]): # for each reco bin response_noeff.val[i, j] = mig.val[i, j] / rsum data = H1D( bkg) # original bkg histogram is ignored: only used to clone X axis # simulate background for j in range(0, len(bkg.val)): # j is the reco bin bv = bkg.val[j] if bv < 0: bv = 0 bkgCount = np.random.poisson( bv) # this simulates a counting experiment for the bkg data.val[j] = bkgCount # overwrite background so that we use a Poisson data.err[j] = bkgCount # for each truth bin for i in range(0, len(truth.val)): # i is the truth bin trueCount = np.random.poisson( truth.val[i]) # this simulates a counting experiment for the truth #trueCount = int(truth.val[i]) # dirac delta pdf for the truth distribution # calculate cumulative response for bin i # C(k|i) = sum_l=0^k P(r=l|t=i) C = np.zeros(len(bkg.val)) for k in range(0, len(bkg.val)): for l in range(0, k + 1): C[k] += response_noeff.val[i, l] # a uniform random number is between 0 and C[0] with prob. response_noeff.val[i, 0] # it is between C[0] and C[1] with prob. response_noeff.val[i, 1], etc. for n in range( 0, trueCount ): # number of experiments is the count in the truth bin # simulate efficiency by rejecting events with efficiency eff.val[i] if np.random.uniform(0, 1) > eff.val[i]: continue # find the reco bin using the migration matrix mig # we know that the probability of getting reco bin j given that we are in truth bin i is: # P(r=j|t=i) = response_noeff.val[i, j] # first throw a random number between 0 and 1 rn = np.random.uniform(0, 1) recoBin = len(bkg.val) - 1 # set it to the last bin for k in range( 0, len(bkg.val) ): # loop over reco bins and get where the random number is in the cum. distribution if rn >= C[ k]: # if the random number is bigger than the cum. distribution boundary # keep going as we are not yet at the boundary continue # if the random number is smaller than the cum. dist., we have already crossed the boundary # stop and set the reco bin recoBin = k break data.val[recoBin] += 1 data.err[recoBin] += 1 return data
from Unfolder.Unfolder import Unfolder from Unfolder.Histogram import H1D, H2D, plotH1D, plotH2D from readHistograms import * sns.set(context="paper", style="whitegrid", font_scale=2) varname = "observable" extension = "eps" # get histograms from file truth, recoWithFakes, bkg, mig, eff, nrt = getHistograms( "out_ttallhad_psrw_Syst.root", "nominal", "mttAsymm") recoWithoutFakes = mig.project("y") eff_noerr = H1D(eff) for k in range(0, len(eff_noerr.err)): eff_noerr.err[k] = 0 bkg_noerr = H1D(bkg) for k in range(0, len(bkg_noerr.err)): bkg_noerr.err[k] = 0 # generate fake data data = recoWithFakes # Try alternative # Create alternative method for unfolding #tunfolder = getTUnfolder(bkg, mig, eff, data, regMode = ROOT.TUnfold.kRegModeDerivative) tunfolder = getTUnfolder(bkg, mig,