def sb_pval(cls, LLR, LLR_obs, LLRA, nullmu): """Compute p-value for s+b/b test ('musb'), with asymptotics as described in e.g. Eq. 73, 74 in arXiv:1007.1727""" # Note, LLR is actually q = -2 * log(L1/L0) # Asymptotic distribution of q is a Gaussian with mean and # variance computed from LLRA as follows (Eq. 73, 74 in 1007.1727) # Need to multiply by a sign depending on hypotheis if nullmu is 0: sign = 1 else: sign = -1 #var_mu = sign * 1. / LLRA Eq = LLRA Varq = sign * 4 * LLRA # Now we have the parameters, can compute p-value # (trying to exclude background-only hypothesis) #print("LLR_obs (sb_pval):",LLR_obs) #print("LLRA (sb_pval):",LLRA) apval = np.atleast_1d(sps.norm.cdf( (LLR_obs - Eq) / np.sqrt(Varq))) # Eq. 76, 1007.1727 if nullmu is not 0: apval = 1 - apval # integration goes the other way in signal+background case if np.all(np.isnan(apval)): apval = None # bit easier to work with #print("LLR_obs:", LLR_obs) #print("apval:", apval) # Empirical p-value # Signal-like direction is negative, therefore we want to integrate from -infty to LLR_obs for p-value # in mu=0 case (probability of more signal-like test statistic than was observed) # This is the opposite of the chi-squared case, thus we want to reverse the # ordering of the sorted LLR sequence in the empirical CDF calculation # (but if we are testing mu=1 then ordering is as in chi^2 case!) if LLR is not None: if nullmu is 0: epval = c.e_pval(LLR, LLR_obs, reverse=True) else: epval = c.e_pval(LLR, LLR_obs) else: epval = None return apval, epval, Eq, Varq
def chi2_pval(cls, LLR, LLR_obs, DOF): """Compute p-value for chi2 test using pre-generated LLR samples (or just asymptotically if LLR is None""" #print("LLR_obs (chi2_pval):",LLR_obs) # Asymptotic p-value apval = np.atleast_1d(1 - sps.chi2.cdf(LLR_obs, DOF)) if np.all(np.isnan(apval)): apval = None # bit easier to work with # Empirical p-value if LLR is not None: epval = c.e_pval(LLR, LLR_obs) else: epval = None return apval, epval
def do_test(self,model,test,samples=None,extra_null_opt=None,signal=None): """Perform selected statistical test""" print("Fitting experiment {0} in '{1}' test".format(self.name,test)) # Get options for fitting routines null_opt = self.tests[test].null_options if extra_null_opt: null_opt.update(extra_null_opt) full_opt = self.tests[test].full_options DOF = self.tests[test].DOF if samples != 'no_MC': # Get seeds for fitting routines, tailored to simulated (or any) data null_seeds = self.tests[test].null_seeds full_seeds = self.tests[test].full_seeds # Check if seeds actually give exact MLEs for parameters try: nseeds, nexact = null_seeds except TypeError: nseeds, nexact = null_seeds, False try: fseeds, fexact = full_seeds except TypeError: fseeds, fexact = full_seeds, False # Manually force numerical minimization #fexact, nexact = False, False # Extract fixed parameter values from options null_fixed_pars = {} for par in null_opt: fixname = "fix_{0}".format(par) if fixname in null_opt.keys(): if null_opt[fixname]: null_fixed_pars[par] = null_opt[par] full_fixed_pars = {} for par in null_opt: fixname = "fix_{0}".format(par) if fixname in full_opt.keys(): if full_opt[fixname]: full_fixed_pars[par] = full_opt[par] # Do some fits! Nproc = 3 print("samples:",samples) print("nseeds:", nseeds) seeds0 = nseeds(samples,signal) # null hypothesis fits depend on signal parameters seeds = fseeds(samples,signal) # In mu fit case, the seeds also depend on the signal if nexact: print("Seeds are exact MLEs for null hypothesis; skipping minimisation") pmax0 = seeds0 #print("seeds0:",seeds0, nseeds) pmax0.update(null_fixed_pars) # need to loop over parameters, otherwise it will automatically evaluate # every set of parameters for every set of samples. We need them done # in lock-step. Nsamples = samples.shape[0] Lmax0 = np.zeros(Nsamples) for i,X in enumerate(samples): if i % 50 == 0: print("\r","Processed {0} of {1} samples... ".format(i,Nsamples), end="") pars = {} for par, val in pmax0.items(): try: pars[par] = val[i] except TypeError: pars[par] = val # Fixed parameters aren't arrays #print("seeds0:",seeds0, nseeds) Lmax0[i] = model.logpdf(pars,X) else: print("Fitting null hypothesis...") Lmax0, pmax0 = model.find_MLE_parallel(null_opt,samples,method='minuit', Nprocesses=Nproc,seeds=seeds0) print() if fexact: pmax = seeds pmax.update(full_fixed_pars) Nsamples = samples.shape[0] Lmax = np.zeros(Nsamples) #print("samples:",samples) #print("pmax:",pmax) #print("seeds:",seeds) #print("fseeds:",fseeds) for i,X in enumerate(samples): if i % 50 == 0: print("\r","Processed {0} of {1} samples... ".format(i,Nsamples), end="") pars = {} for par, val in pmax.items(): #print(par,val) try: pars[par] = val[i] except TypeError: pars[par] = val # Fixed parameters aren't arrays Lmax[i] = model.logpdf(pars,X) else: print("Fitting alternate hypothesis (free signal)...") Lmax, pmax = model.find_MLE_parallel(full_opt,samples,method='minuit', Nprocesses=Nproc,seeds=seeds) print() #print(null_opt) #print(full_opt) #print("Lmax0",Lmax0) #print("Lmax",Lmax) # Run diagnostics functions for this experiment + test print("Running extra diagnostic functions") dfuncs = self.tests[test].diagnostics if dfuncs: for f in dfuncs: f(self, Lmax0, pmax0, seeds0, Lmax, pmax, seeds, samples) # Likelihood ratio test statistics LLR = -2*(Lmax0 - Lmax) # Correct (hopefully) small numerical errors LLR[LLR<0] = 0 else: LLR = None # Also fit the observed data so we can compute its p-value print("Fitting with observed data...") odata = self.observed_data print("odata:", odata) seeds0_obs = nseeds(odata,signal) # null hypothesis fits depend on signal parameters seeds_obs = fseeds(odata,signal) if nexact: pmax0_obs = seeds0_obs pmax0_obs.update(null_fixed_pars) Lmax0_obs = model.logpdf(pmax0_obs,odata[0]) else: Lmax0_obs, pmax0_obs = model.find_MLE_parallel(null_opt, odata, method='minuit', Nprocesses=1, seeds=seeds0_obs) if fexact: pmax_obs = seeds_obs pmax_obs.update(full_fixed_pars) Lmax_obs = model.logpdf(pmax_obs,odata[0]) else: Lmax_obs, pmax_obs = model.find_MLE_parallel(full_opt, odata, method='minuit', Nprocesses=1, seeds=seeds_obs) # Asymptotic p-value LLR_obs = -2 * (Lmax0_obs[0] - Lmax_obs[0]) apval = np.atleast_1d(1 - sps.chi2.cdf(LLR_obs, DOF))[0] # Empirical p-value a = np.argsort(LLR) #print("LLR:",LLR[a]) #print("Lmax0:",Lmax0[a]) #print("Lmax :",Lmax[a]) if LLR is not None: epval = c.e_pval(LLR,LLR_obs) else: epval = None #print("LLR_obs:", LLR_obs) #print("odata:", odata) return LLR, LLR_obs, apval, epval, DOF
def run(): #experiment_definitions = ["CMS_13TeV_2OSLEP_36invfb"] #"gaussian" #experiment_definitions = ["Hinv"] #,"ColliderBit_analysis"] experiment_definitions = [] experiment_modules = { lib: importlib.import_module("experiments.{0}".format(lib)) for lib in experiment_definitions } # Set the null hypothesis for 'gof' tests. This means fixing the non-nuisance # parameters in all of the experiments to something. # For testing I am just matching these exactly to observed data: in reality # they should come from some model of interested, e.g. a scan best-fit gof_null = {} gof_null["top_mass"] = {'loc': 173.34} gof_null["alpha_s"] = {'loc': 0.1181} gof_null["Z_invisible_width"] = {'loc': 0.2} gof_null["Higgs_invisible_width"] = {'BF': 0} def Collider_Null(N_SR): null_s = {"s_{0}".format(i): 0 for i in range(N_SR) } # Actually we'll leave this as zero signal for testing #null_theta = {"theta_{0}".format(i): 0 for i in range(7)} #null_parameters = {"mu": 0 , **null_s, **null_theta} return null_s #parameters # The 'mu' hypothesis null parameters should be defined internally by each experiment. # Add the ColliderBit analyses; have to do this after signal hypothesis is # set. CBexperiments = {} for a in CBa.analyses.values(): signal = Collider_Null(a.N_SR) gof_null[a.name] = signal CBexperiments[a.name] = a.make_experiment(signal) class Empty: pass experiment_modules["ColliderBit_analysis"] = Empty( ) # container to act like module experiment_modules["ColliderBit_analysis"].experiments = CBexperiments # Extract all experiments from modules (some define more than one experiment) # Not all experiments are used in all tests, so we also divide them up as needed gof_experiments = [] mu_experiments = [] all_experiments = [] for em in experiment_modules.values(): for e in em.experiments.values(): #for e in [list(em.experiments.values())[0]]: all_experiments += [e] if 'gof' in e.tests.keys(): gof_experiments += [e] if 'mu' in e.tests.keys(): mu_experiments += [e] #break tag = "5e3" Nsamples = int(float(tag)) #Nsamples = 0 # Ditch the simulations and just compute asymptotic results # This is very fast! Only have to fit nuisance parameters under # the observed data. Could even save those and do them only once # ever, but meh. if Nsamples == 0: asymptotic_only = True else: asymptotic_only = False do_MC = True if asymptotic_only: do_MC = False # Do single-parameter mu scaling fit? do_mu = True # Skip to mu_monster skip_to_mu_mon = True # Analyse full combined model? do_monster = True # Dictionary of results results = {} # Actually, the GOF best should work a bit differently. Currently we # simulate under the background-only hypothesis, which is fine for # testing the goodness of fit of the data to the background-only # hypothesis, but we also want to test the goodness-of-fit of e.g. # a GAMBIT best fit point. For that, we need to simulate under # some best-fit signal hypothesis. # Create monster joint experiment if do_monster: m = Experiment.fromExperimentList(all_experiments) # Helper plotting function def makeplot(ax, tobin, theoryf, log=True, label="", c='r', obs=None, pval=None, qran=None, title=None): print("Generating test statistic plot {0}".format(label)) if qran is None: ran = (0, 25) else: ran = qran yran = (1e-4, 0.5) if tobin is not None: #print(tobin) n, bins = np.histogram(tobin, bins=50, normed=True, range=ran) #print(n) #print("Histogram y range:", np.min(n[n!=0]),np.max(n)) ax.plot(bins[:-1], n, drawstyle='steps-post', label=label, c=c) yran = (1e-4, np.max([0.5, np.max(n)])) q = np.arange(ran[0], ran[1], 0.01) if theoryf is not None: ax.plot(q, theoryf(q), c='k') ax.set_xlabel("LLR") ax.set_ylabel("pdf(LLR)") if log: #ax.set_ylim(np.min(n[n!=0]),10*np.max(n)) ax.set_yscale("log") if obs is not None: # Draw line for observed value, and show p-value region shaded qfill = np.arange(obs, ran[1], 0.01) if theoryf != None: ax.fill_between(qfill, 0, theoryf(qfill), lw=0, facecolor=c, alpha=0.2) pval_str = "" if pval != None: #print("pval:", pval) pval_str = " (p={0:.2g})".format(pval) ax.axvline(x=obs, lw=2, c=c, label="Observed ({0}){1}".format(label, pval_str)) ax.set_xlim(ran[0], ran[1]) ax.set_ylim(yran[0], yran[1]) if title is not None: ax.set_title(title) # Simulate data and prepare results dictionaries all_samples = [] for e in gof_experiments: print(e.name) #print(e.general_model) #print(e.general_model.model) #print(e.general_model.model.submodels) #print(e.general_model.model.submodels[0].submodels) print("test_pars:", e.tests['gof'].test_pars) if do_MC: all_samples += [ e.general_model.simulate(Nsamples, e.tests['gof'].test_pars) ] # Just using test parameter values else: all_samples += [[]] results[e.name] = {} LLR_obs_monster = 0 if not skip_to_mu_mon: # Main loop for fitting experiments LLR_monster = 0 for j, (e, samples) in enumerate(zip(gof_experiments, all_samples)): # Do fit! test_parameters = gof_null[ e.name] # replace this with e.g. prediction from MSSM best fit LLR, LLR_obs, pval, epval, gofDOF = e.do_gof_test( test_parameters, samples) # Save LLR for combining (only works if experiments have no common parameters) #print("j:{0}, LLR:{1}".format(j,LLR)) if LLR is not None: LLR_monster += LLR else: LLR_monster = None LLR_obs_monster += LLR_obs # Plot! fig = plt.figure(figsize=(6, 4)) ax = fig.add_subplot(111) # Range for test statistic axis. Draw as far as is equivalent to 5 sigma qran = [0, sps.chi2.ppf(sps.chi2.cdf(25, df=1), df=gofDOF)] makeplot(ax, LLR, lambda q: sps.chi2.pdf(q, gofDOF), log=True, label='free s', c='g', obs=LLR_obs, pval=pval, qran=qran, title=e.name + " (Nbins={0})".format(gofDOF)) ax.legend(loc=1, frameon=False, framealpha=0, prop={'size': 10}) fig.savefig('auto_experiment_{0}_{1}.png'.format(e.name, tag)) plt.close(fig) # Fit mu model if do_mu: mu_LLR, mu_LLR_obs, mu_pval, mu_epval, muDOF = e.do_mu_test( e.tests['mu'].test_signal, samples) # Plot! fig = plt.figure(figsize=(6, 4)) ax = fig.add_subplot(111) makeplot( ax, mu_LLR, lambda q: sps.chi2.pdf(q, muDOF), log=True, #muDOF should just be 1 label='mu', c='b', obs=mu_LLR_obs, pval=mu_pval, title=e.name) ax.legend(loc=1, frameon=False, framealpha=0, prop={'size': 10}) fig.savefig('auto_experiment_mu_{0}_{1}.png'.format( e.name, tag)) plt.close(fig) # Store results results[e.name]["LLR_gof_b"] = LLR_obs results[e.name]["apval_gof_b"] = pval results[e.name]["asignif. gof_b"] = -sps.norm.ppf( pval ) #/2.) I prefer two-tailed but Andrew says 1-tailed is the convention... results[e.name]["DOF"] = gofDOF if do_mu: results[e.name]["LLR_mu_b"] = mu_LLR_obs results[e.name]["apval_mu_b"] = mu_pval results[e.name]["asignif. mu_b"] = -sps.norm.ppf(mu_pval) if LLR is not None: results[e.name]["epval_gof_b"] = epval results[e.name]["esignif. gof_b"] = -sps.norm.ppf( epval ) #/2.) I prefer two-tailed but Andrew says 1-tailed is the convention... if do_mu: results[e.name]["epval_mu_b"] = mu_epval results[e.name]["esignif. mu_b"] = -sps.norm.ppf(mu_epval) a = np.argsort(LLR) #print("LLR_monster:",LLR_monster[a]) #quit() # Plot monster LLR distribution fig = plt.figure(figsize=(6, 4)) ax = fig.add_subplot(111) monster_DOF = np.sum([e.DOF for e in gof_experiments]) monster_pval = np.atleast_1d( 1 - sps.chi2.cdf(LLR_obs_monster, monster_DOF))[0] monster_epval = c.e_pval(LLR_monster, LLR_obs_monster) if do_MC else None monster_qran = [ 0, sps.chi2.ppf(sps.chi2.cdf(25, df=1), df=monster_DOF) ] print("Monster DOF:", monster_DOF) print("Monster pval:", monster_pval) print("Monster LLR_obs:", LLR_obs_monster) makeplot(ax, LLR_monster, lambda q: sps.chi2.pdf(q, monster_DOF), log=True, label='free s', c='g', obs=LLR_obs_monster, pval=monster_pval, qran=monster_qran, title="Monster") ax.legend(loc=1, frameon=False, framealpha=0, prop={'size': 10}) fig.savefig('auto_experiment_monster_{0}.png'.format(tag)) plt.close(fig) # Join all samples if do_MC: monster_samples = np.concatenate( [samp.reshape(Nsamples, 1, -1) for samp in all_samples], axis=-1) else: monster_samples = None print("monster_samples.shape:", monster_samples.shape) if do_mu and do_monster: signal = m.tests['mu'].test_signal mu_LLR, mu_LLR_obs, mu_pval, mu_epval, muDOF = m.do_mu_test( signal, monster_samples) # Plot! fig = plt.figure(figsize=(6, 4)) ax = fig.add_subplot(111) makeplot(ax, mu_LLR, lambda q: sps.chi2.pdf(q, 1), log=True, label='mu', c='b', obs=mu_LLR_obs, pval=mu_pval, title="Monster") ax.legend(loc=1, frameon=False, framealpha=0, prop={'size': 10}) fig.savefig('auto_experiment_mu_monster_{0}.png'.format(tag)) plt.close(fig) # Store results for Monster results["Combined"] = {} results["Combined"]["LLR_gof_b"] = LLR_obs_monster results["Combined"]["apval_gof_b"] = monster_pval results["Combined"]["asignif. gof_b"] = -sps.norm.ppf(monster_pval) results["Combined"]["DOF"] = monster_DOF if do_mu and do_monster: results["Combined"]["LLR_mu_b"] = mu_LLR_obs results["Combined"]["apval_mu_b"] = mu_pval results["Combined"]["asignif. mu_b"] = -sps.norm.ppf(mu_pval) if do_MC: results["Combined"]["epval_gof_b"] = monster_epval results["Combined"]["esignif. gof_b"] = -sps.norm.ppf(monster_epval) if do_MC and do_mu and do_monster: results["Combined"]["epval_mu_b"] = mu_epval results["Combined"]["esignif. mu_b"] = -sps.norm.ppf(mu_epval) # Ok let's produce some nice tables of results. Maybe even # some cool bar graphs showing the "pull" of each experiment # Convert results to Pandas dataframe r = pd.DataFrame.from_dict(results) order = ['DOF', 'LLR_gof_b', 'apval_gof_b'] if do_MC: order += ['epval_gof_b'] order += ['asignif. gof_b'] if do_MC: order += ['esignif. gof_b'] if do_mu: order += ['LLR_mu_b', 'apval_mu_b'] if do_MC and do_mu: order += ['epval_mu_b'] if do_mu: order += ['asignif. mu_b'] if do_MC and do_mu: order += ['esignif. mu_b'] exp_order = [e.name for e in gof_experiments] + ['Combined'] print(r[exp_order].reindex(order))
t_logl = parmodel.logpdf( { 'mu1': t_mu1.reshape(newshape), 'mu2': t_mu2.reshape(newshape), 'mu3': t_mu3.reshape(newshape) }, obs_data) # Finally, we need the logl of our null hypothesis under each simulated dataset Lmax0 = parmodel.logpdf(null_parameters, null_data)[..., 0] LLR = -2 * (Lmax0 - Lmax) # Also need observed value of test statistic LLR_obs = -2 * (parmodel.logpdf(null_parameters, obs_data) - np.max(t_logl)) # Compare asymptotic to empirical p-values epval = c.e_pval(LLR, LLR_obs[0][0]) apval = 1 - sps.chi2.cdf(LLR_obs[0][0], 3) print("Empirical p-value :", epval) print("Asymptotic p-value:", apval) # Now we can compute our test statistic and plot its distribution! fig = plt.figure(figsize=(6, 4)) ax = fig.add_subplot(111) jtp.plot_teststat(ax, LLR, lambda q: sps.chi2.pdf(q, 3), log=True, c='b', obs=LLR_obs, pval=None,