import numpy as np import matplotlib.pyplot as plt import math import scipy # Define the models model_filename = 'run/high_mass_gg_1300_100.json' hypos_filename = 'run/hypos_high_mass_gg_1300.json' poi = 'mu' poi_init = 1 poi_min = -3 poi_max = 20 output_filename = 'samples/high_mass_gg_1300' ntoys = 10000 fast_model = Model.create(model_filename) #fast_data = Data(fast_model).load(model_filename) fast_data = Data(fast_model).set_expected(fast_model.expected_pars(0)) with open(hypos_filename, 'r') as fd: hypo_dicts = json.load(fd) hypo_mus = [hd[poi] for hd in hypo_dicts] print(fast_model) np.random.seed(131071) for hd in hypo_dicts: if not 'cl' in hd: hd['cl'] = QMu(hd[poi], hd['tmu'], hd['best_fit_val']).asymptotic_cl()
import numpy as np import matplotlib.pyplot as plt from fastprof import Model, Data, Parameters, NPMinimizer, OptiMinimizer import copy np.set_printoptions(precision=4, suppress=False, linewidth=180) np.random.seed(4) # ========================================== model = Model().load('run/fastprof/HighMass_NW-1700-log200-noDSig.json') pars0 = model.expected_pars(0.1) data = model.generate_data(pars0) opti = OptiMinimizer(data, 0.1, (0, 20)) t = opti.tmu(0.1) print(opti.hypo_pars) print(model.closure_approx(opti.hypo_pars, data)) print(model.closure_exact(opti.hypo_pars, data)) # ========================================== model2 = model.regularize(2) data2 = copy.copy(data) data2.model = model2 data2.aux_betas = np.zeros(model2.nb) opti2 = OptiMinimizer(data2, 0.1, (0, 20)) t2 = opti2.tmu(0.1) print(opti2.hypo_pars)
import numpy as np import matplotlib.pyplot as plt from fastprof import Model, Data, Parameters, NPMinimizer, OptiMinimizer np.random.seed(7) model = Model().load('fastprof/models/HighMass_NW-700-log500-gammas-only.json') pars0 = model.expected_pars(5) pars = model.expected_pars(5) pars.gammas = np.random.normal(pars.gammas, 1) print('Randomized gammas = ', pars.gammas) model.linear_nps = False data_exp = Data(model).set_expected(pars) model.linear_nps = True data_lin = Data(model).set_expected(pars) npm = NPMinimizer(5, data_lin) print(npm.profile()) min_pars = npm.min_pars plt.ion() plt.show() fig1 = plt.figure(1) fig1.canvas.set_window_title('True pars, data from linear model') model.plot(pars, data_lin, residuals=True) plt.xlim(150,300) fig2 = plt.figure(2)
n = np.array([5, 12]) bkg = np.array([1.0, 10.0]) alpha = np.array([1]) beta = np.array([-1.6]) a = np.array([[0.2], [0.2]]) b = np.array([[0.2], [0.2]]) data = np.array([7, 12, 0, 1]) models = {} n_np = 2 for mu in mus: sig = np.array([1.0, 0]) * mu # specific to this case! models[mu] = Model(np.array(data[:-n_np]), sig, bkg, np.array([data[3]]), np.array([data[2]]), a, b) nlls = np.zeros(mus.shape[0]) for i, mu in enumerate(mus): model = models[mu] model.set_all_data(np.array(data[:-n_np]), np.array([data[3]]), np.array([data[2]])) nlls[i] = NPMinimizer().profile_nll(model) plt.ion() plt.plot(mus, nlls, 'b') smooth_nll = InterpolatedUnivariateSpline(mus, nlls, k=4) cr_pts = smooth_nll.derivative().roots() x = np.linspace(0, mus[-1], 100)
import numpy as np import matplotlib.pyplot as plt from fastprof import Model, ScanSampler, OptiSampler model = Model(sig=np.array([1.0, 0]), bkg=np.array([1.0, 10.0]), alphas=['acc_sys'], betas=['bkg_sys'], a=np.array([[0.2], [0.2]]), b=np.array([[0.2], [0.2]])) gen_mu = 3.7 print('Will generate the following hypothesis: ', gen_mu) scan_mus = np.linspace(0, 10, 21) print('Will scan over the following hypotheses: ', scan_mus) np.random.seed(131071) #dist = ScanSampler(model, scan_mus).generate(gen_mu, 10000) dist = OptiSampler(model, mu0=1, bounds=(0, 20), method='scalar').generate(gen_mu, 10000) dist.sort() plt.ion() plt.yscale('log') plt.hist(dist.samples[:], bins=50)
def run(argv=None): parser = make_parser() options = parser.parse_args() if not options: parser.print_help() return model = Model.create(options.model_file) if model is None: raise ValueError('No valid model definition found in file %s.' % options.model_file) if options.regularize is not None: model.set_gamma_regularization(options.regularize) if options.cutoff is not None: model.cutoff = options.cutoff if options.setrange is not None: process_setranges(options.setrange, model) raster = Raster('data', model=model, filename=options.fits_file) if options.data_file: data = Data(model).load(options.data_file) if data == None: raise ValueError('No valid dataset definition found in file %s.' % options.data_file) print('Using dataset stored in file %s.' % options.data_file) elif options.asimov is not None: try: sets = process_setvals(options.asimov, model) except Exception as inst: print(inst) raise ValueError("ERROR : invalid POI specification string '%s'." % options.asimov) data = model.generate_expected(sets) print('Using Asimov dataset with parameters %s' % str(sets)) else: print('Using dataset stored in file %s.' % options.model_file) data = Data(model).load(options.model_file) if options.test_statistic == 'q~mu': if len(raster.pois()) > 1: raise ValueError( 'Currently not supporting more than 1 POI for this operation') calc = QMuTildaCalculator( OptiMinimizer(niter=options.iterations).set_pois_from_model(model)) elif options.test_statistic == 'q_mu': if len(raster.pois()) > 1: raise ValueError( 'Currently not supporting more than 1 POI for this operation') calc = QMuCalculator( OptiMinimizer(niter=options.iterations).set_pois_from_model(model)) else: raise ValueError('Unknown test statistic %s' % options.test_statistic) calc.fill_all_pv(raster) faster = calc.recompute_raster(raster, data) raster.print(verbosity=options.verbosity, other=faster) if options.verbosity > 2: print(str(faster)) # Plot results if not options.batch_mode: poi = raster.pois()[list(raster.pois())[0]] plt.ion() fig1 = plt.figure(1) plt.suptitle('$CL_{s+b}$') plt.xlabel(model.poi(0).name) plt.ylabel('$CL_{s+b}$') plt.plot([hypo[poi.name] for hypo in raster.plr_data], [full.pvs['pv'] for full in raster.plr_data.values()], options.marker + 'r:', label='Full model') plt.plot([hypo[poi.name] for hypo in faster.plr_data], [fast.pvs['pv'] for fast in faster.plr_data.values()], options.marker + 'g-', label='Fast model') plt.legend() fig2 = plt.figure(2) plt.suptitle('$CL_s$') plt.xlabel(model.poi(0).name) plt.ylabel('$CL_s$') plt.plot([hypo[poi.name] for hypo in raster.plr_data], [full.pvs['cls'] for full in raster.plr_data.values()], options.marker + 'r:', label='Full model') plt.plot([hypo[poi.name] for hypo in faster.plr_data], [fast.pvs['cls'] for fast in faster.plr_data.values()], options.marker + 'g-', label='Fast model') plt.legend() fig1.savefig(options.output_file + '_clsb.pdf') fig2.savefig(options.output_file + '_cls.pdf') plt.show()
def run(argv=None): parser = make_parser() options = parser.parse_args() if not options: parser.print_help() sys.exit(0) debug = pd.read_csv(options.filename[0]) plt.ion() fig1, ax1 = plt.subplots(2, 2) debug.hist('mu_hat', ax=ax1[0, 0], bins=options.nbins) debug.hist('tmu', ax=ax1[0, 1], bins=np.linspace(0, options.tmu_range, options.nbins)) debug.hist('pv', ax=ax1[1, 0], bins=options.nbins) debug.hist('nfev', ax=ax1[1, 1]) if options.log_scale: ax1[0, 0].set_yscale('log') ax1[0, 1].set_yscale('log') ax1[0, 1].set_ylim(bottom=1) ax1[1, 0].set_yscale('log') if options.reference: mu_hat = debug['mu_hat'] xx = np.linspace(np.min(mu_hat), np.max(mu_hat), options.nbins) yy = [ mu_hat.shape[0] * (xx[1] - xx[0]) * norm.pdf(x, np.mean(mu_hat), np.std(mu_hat)) for x in xx ] ax1[0, 0].plot(xx, yy) tmu = debug['tmu'] xx = np.linspace(0, options.tmu_range, options.nbins) yy = [tmu.shape[0] * (xx[1] - xx[0]) * chi2.pdf(x, 1) for x in xx] ax1[0, 1].plot(xx, yy) xx = np.linspace(0, 1, options.nbins) yy = [mu_hat.shape[0] * (xx[1] - xx[0]) for x in xx] ax1[1, 0].plot(xx, yy) if options.hypo != '': model = Model.create(options.model_file) if model == None: raise ValueError('No valid model definition found in file %s.' % options.model_file) try: filename, index = options.hypo.split(':') index = int(index) raster = Raster('data', model=model, filename=filename) plr_data = list(raster.plr_data.values()) hypo = list(raster.plr_data.keys())[index] print('Using hypothesis %s' % str(hypo.dict(pois_only=True))) except Exception as inst: print(inst) raise ValueError( 'Invalid hypothesis spec, should be in the format <filename>:<index>' ) z = options.np_range pars = [ col[len('free_'):] for col in debug.columns if col.startswith('free_') and not col.endswith('nll') ] fig2, ax2 = plt.subplots(2, len(pars), figsize=(15, 5), sharey=True) fig2.subplots_adjust(left=0.05, right=0.98) if options.reference: xx = np.linspace(-z, z, options.nbins) yy = [ mu_hat.shape[0] * (xx[1] - xx[0]) * norm.pdf(x, 0, 1) for x in xx ] for i, par in enumerate(pars): ax2[0, i].set_title(par) free_delta = debug['free_' + par] - -debug[ 'aux_' + par] if 'aux_' + par in debug.columns else debug['free_' + par] hypo_delta = debug['hypo_' + par] - -debug[ 'aux_' + par] if 'aux_' + par in debug.columns else debug['hypo_' + par] if options.hypo != '': print('Shifting distributions of %s by %g' % (par, hypo[par])) free_delta -= hypo[par] hypo_delta -= hypo[par] ax2[0, i].hist(free_delta, bins=np.linspace(-z, z, options.nbins)) ax2[1, i].hist(hypo_delta, bins=np.linspace(-z, z, options.nbins)) if options.reference: ax2[0, i].plot(xx, yy) ax2[1, i].plot(xx, yy) if options.log_scale: ax2[0, i].set_yscale('log') if options.log_scale: ax2[1, i].set_yscale('log') ax2[0, 0].set_ylabel('free fit') ax2[1, 0].set_ylabel('hypothesis fit') #free_g = sns.jointplot(free_delta, debug['mu_hat'], kind="kde", xlim=(-z,z), ax=ax2[1,i]) #hypo_g = sns.jointplot(hypo_delta, debug['mu_hat'], kind="kde", xlim=(-z,z), ax=ax2[2,i]) #free_g.ax_joint.axhline(0,c='r',ls='--') #free_g.ax_joint.axvline(0,c='r',ls='--') #hypo_g.ax_joint.axhline(0,c='r',ls='--') #hypo_g.ax_joint.axvline(0,c='r',ls='--') plt.show()
import numpy as np import matplotlib.pyplot as plt from fastprof import Model, ScanSampler, OptiSampler model = Model.create('models/highMass-1164.json') gen_mu = 3 print('Will generate the following hypothesis: ', gen_mu) np.random.seed(131071) dist = OptiSampler(model, test_hypo=model.expected_pars(0.1), mu0=0.1, poi_bounds=(0, 2), method='scalar').generate(1000) dist.save('test')
from fastprof import Model import matplotlib.pyplot as plt model = Model.create('run/high_mass_gg_1300.json') pars = model.expected_pars(10) data = model.generate_data(pars) plt.ion() plt.figure(1) model.plot(pars, data=data, variations=[('dEff', 5, 'r'), ('xi', -10, 'g')]) plt.yscale('log')
def run(argv=None): parser = make_parser() options = parser.parse_args() if not options: parser.print_help() return model = Model.create(options.model_file) if model == None: raise ValueError('No valid model definition found in file %s.' % options.model_file) if options.regularize is not None: model.set_gamma_regularization(options.regularize) if options.cutoff is not None: model.cutoff = options.cutoff if options.setrange is not None: process_setranges(options.setrange, model) if options.data_file: data = Data(model).load(options.data_file) if data is None: raise ValueError('No valid dataset definition found in file %s.' % options.data_file) print('Using dataset stored in file %s.' % options.data_file) elif options.asimov is not None: try: sets = process_setvals(options.asimov, model) except Exception as inst: print(inst) raise ValueError("ERROR : invalid POI specification string '%s'." % options.asimov) data = model.generate_expected(sets) print('Using Asimov dataset with parameters %s' % str(sets)) else: data = Data(model).load(options.model_file) if options.hypo is not None: try: sets = process_setvals(options.hypo, model) except Exception as inst: print(inst) raise ValueError("ERROR : invalid POI specification string '%s'." % options.hypo) hypo_pars = model.expected_pars(sets) opti = OptiMinimizer().set_pois_from_model(model) min_nll = opti.minimize(data) min_pars = opti.min_pars print('\n== Best-fit: nll = %g @ at parameter values =' % min_nll) print(min_pars) if options.hypo is not None: tmu = opti.tmu(hypo_pars, data, hypo_pars) print('\n== Profile-likelihood ratio tmu = %g for hypothesis' % tmu, hypo_pars.dict(pois_only=True)) print('-- Profiled NP values :\n' + str(opti.hypo_pars)) if len(model.pois) == 1: print('\n== Computing the q~mu test statistic') asimov = model.generate_expected(0, NPMinimizer(data)) calc = QMuTildaCalculator(opti) plr_data = calc.compute_fast_q(hypo_pars, data) print('best-fit %s = % g' % (model.poi(0).name, opti.free_pars.pois[0])) print('tmu = % g' % plr_data.test_statistics['tmu']) print('q~mu = % g' % plr_data.test_statistics['q~mu']) print('pv = % g' % plr_data.pvs['pv']) print('cls = % g' % plr_data.pvs['cls']) plt.ion() plt.figure(1) model.plot(min_pars, data=data) if options.log_scale: plt.yscale('log')
def run(argv=None): parser = make_parser() options = parser.parse_args() if not options: parser.print_help() return # Define the model model = Model.create(options.model_file) if model == None: raise ValueError('No valid model definition found in file %s.' % options.model_file) if options.regularize is not None: model.set_gamma_regularization(options.regularize) if options.cutoff is not None: model.cutoff = options.cutoff if options.setrange is not None: process_setranges(options.setrange, model) # Define the data if options.data_file: data = Data(model).load(options.data_file) if data is None: raise ValueError('No valid dataset definition found in file %s.' % options.data_file) print('Using dataset stored in file %s.' % options.data_file) elif options.asimov is not None: try: sets = process_setvals(options.asimov, model) except Exception as inst: print(inst) raise ValueError("ERROR : invalid POI specification string '%s'." % options.asimov) data = model.generate_expected(sets) print('Using Asimov dataset with parameters %s' % str(sets)) else: data = Data(model).load(options.model_file) # Parse the hypothesis values if options.hypos.find(':'): try: hypo_specs = options.hypos.split(':') poi_name = None if hypo_specs[-1] == 'log': hypos = np.logspace( 1, math.log(float(hypo_specs[-3])) / math.log(float(hypo_specs[-4])), int(hypo_specs[-2]) + 1, True, float(hypo_specs[0])) if len(hypo_specs) == 5: poi_name = hypo_specs[0] else: hypos = np.linspace(float(hypo_specs[-3]), float(hypo_specs[-2]), int(hypo_specs[-1]) + 1) if len(hypo_specs) == 4: poi_name = hypo_specs[0] except Exception as inst: print(inst) raise ValueError( "Could not parse list of hypothesis values '%s' : expected min:max:num[:log] format" % options.hypos) if poi_name is not None: if not poi_name in model.pois: raise ValueError("Unknown POI '%s' in hypothesis definitions" % poi_name) else: poi_name = model.poi(0).name hypo_sets = [{poi_name: hypo} for hypo in hypos] else: try: hypo_sets = [ process_setvals(spec, model, match_nps=False) for spec in options.hypos.split('/') ] except Exception as inst: print(inst) raise ValueError( "Could not parse list of hypothesis values '%s' : expected /-separated list of POI assignments" % options.hypos) hypos = [model.expected_pars(sets) for sets in hypo_sets] # Compute the tmu values calc = TMuCalculator( OptiMinimizer(niter=options.iterations).set_pois_from_model(model)) raster = calc.compute_fast_results(hypos, data) hypos = [hypo[poi_name] for hypo in raster.plr_data.keys()] tmus = [ plr_data.test_statistics['tmu'] for plr_data in raster.plr_data.values() ] #print(raster) # Find the minimal tmu min_index = np.argmin(tmus) if min_index == 0: print( 'Found minimum at the lower edge of the scan, returning this value' ) min_hypo = hypos[min_index] elif min_index == len(tmus): print( 'Found minimum at the upper edge of the scan, returning this value' ) min_hypo = hypos[min_index] else: calc.minimizer.minimize(data, list(raster.plr_data.keys())[min_index]) min_hypo = calc.minimizer.min_pars[poi_name] # Compute the tmu=1 crossings and uncertainties crossings = raster.interpolate(hypos, tmus, 1) if len(crossings) == 2: print('1-sigma interval : %g + %g - %g' % (min_hypo, crossings[1] - min_hypo, min_hypo - crossings[0])) # Plot the result plt.ion() plt.figure(1) plt.plot(hypos, tmus) plt.ylim(0, None) plt.xlabel(poi_name) plt.ylabel('t_mu(%s)' % poi_name)
import numpy as np import matplotlib.pyplot as plt from fastprof import Model, Data, Parameters, NPMinimizer, OptiMinimizer filename = 'run/high_mass_gg_1300.json' model = Model.create(filename) ws_data = Data(model).load(filename) #n = np.array([6, 4, 2, 0, 0, 0, 0, 0, 1, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0 ]) #aux_a = np.array([-1]) #aux_b = np.array([]) #data = Data(model, n, aux_a, aux_b) # data.set_expected(Parameters(mu)) print(model) opti = OptiMinimizer(ws_data) opti.minimize() min_pars = opti.min_pars print(min_pars) mu = 1 print('mu = ', mu) mini = NPMinimizer(mu, ws_data) mini.profile() min1 = mini.min_pars print(min1, model.nll(min1, ws_data)) print('mu = ', min_pars.mu)
test_mus = np.linspace(0,5,6) n_np = 2 bkg = np.array([1.0, 10.0]) # specific to this case! sig = np.array([1.0, 0]) # specific to this case! a = np.array([[0.2], [0.2]]) # specific to this case! b = np.array([[0.2], [0.2]]) # specific to this case! #pyhf_data = np.array([ 6.00000000e+00, 9.00000000e+00, 1.29864346e-01, -5.74496450e-01 ]) pyhf_data = np.array([ 2, 10, 0, 0 ]) spec = json.load(open('fastprof/models/test1.json', 'r')) ws = pyhf.Workspace(spec) pyhf_model = ws.model() for mu in test_mus : print('-------------------------------------') print('Testing the following hypothesis: ', mu) model = Model(sig, bkg, alphas = ['acc_sys'], betas = ['bkg_sys'], a=a, b=b) data = Data(model).set_from_pyhf_data(pyhf_data, pyhf_model) #print(model) npm = NPMinimizer(mu, data) nll = npm.profile_nll() pars, val = pyhf.infer.mle.fixed_poi_fit(mu, pyhf_data, pyhf_model, return_fitted_val=True) print('fast pars |', npm.min_pars.array()) print('fast nexp |', model.s_exp(npm.min_pars), model.b_exp(npm.min_pars), model.n_exp(npm.min_pars)) print(nll) pyhf_pars = Parameters(pars[0], pars[1], pars[2]) print('pyhf data |', pyhf_data) print('pyhf pars |', pyhf_pars.array()) print('pyhf nexp |', model.s_exp(pyhf_pars), model.b_exp(pyhf_pars), model.n_exp(pyhf_pars)) print(model.nll(pyhf_pars, data))
from fastprof import Model, Data, NPMinimizer, OptiMinimizer import numpy as np mu = 3.7 print('mu = ', mu) n = np.array([7, 12]) sig = np.array([1.0, 0]) * mu bkg = np.array([1.0, 10.0]) aux_a = np.array([1]) aux_b = np.array([-1.6]) a = np.array([[0.2], [0.2]]) b = np.array([[0.2], [0.2]]) model = Model(sig, bkg, alphas=['acc_sys'], betas=['bkg_sys'], a=a, b=b) data = Data(model, n, aux_a, aux_b) print(model) best_a, best_b, best_c = NPMinimizer(mu, data).profile() print('hat(a) =', best_a) print('hat(b) =', best_b) print('hat(c) =', best_c) #der_a, der_b = fastprof.derivatives(model, best_a, best_b) #der_a, der_b = model.derivatives(best_a, best_b) #print('dl/da =', der_a) #print('dl/db =', der_b)
def run(argv = None) : parser = make_parser() options = parser.parse_args() if not options : parser.print_help() return model = Model.create(options.model_file) if model is None : raise ValueError('No valid model definition found in file %s.' % options.model_file) if options.channel is not None and not options.channel in model.channels() : raise KeyError('Channel %s not found in model.' % options.channel) if options.data_file is not None : data = Data(model).load(options.data_file) if data is None : raise ValueError('No valid dataset definition found in file %s.' % options.data_file) print('Using dataset stored in file %s.' % options.data_file) elif options.asimov is not None : try: sets = process_setvals(options.asimov, model) data = model.generate_expected(sets) except Exception as inst : print(inst) raise ValueError("Cannot define an Asimov dataset from options '%s'." % options.asimov) print('Using Asimov dataset with POIs %s.' % str(sets)) else : data = Data(model).load(options.model_file) if options.setval is not None : try : poi_dict = process_setvals(options.setval, model, match_nps = False) except Exception as inst : print(inst) raise ValueError("ERROR : invalid POI specification string '%s'." % options.setval) pars = model.expected_pars(poi_dict) if options.profile : mini = OptiMinimizer() mini.profile_nps(pars, data) print('Minimum: nll = %g @ parameter values : %s' % (mini.min_nll, mini.min_pars)) pars = mini.min_pars elif data is not None and options.profile : mini = OptiMinimizer().set_pois_from_model(model) mini.minimize(data) pars = mini.min_pars else : pars = model.expected_pars([0]*model.npois) xmin = None xmax = None ymin = None ymax = None if options.x_range is not None : try: xmin, xmax = [ float(p) for p in options.x_range.split(',') ] except Exception as inst : print(inst) raise ValueError('Invalid X-axis range specification %s, expected x_min,x_max' % options.x_range) if options.y_range is not None : try: ymin, ymax = [ float(p) for p in options.y_range.split(',') ] except Exception as inst : print(inst) raise ValueError('Invalid Y-axis range specification %s, expected y_min,y_max' % options.y_range) plt.ion() if not options.residuals : plt.figure(1, figsize=(8, 8), dpi=96) model.plot(pars, data=data, labels=options.variations is None) if options.plot_without is not None or options.plot_alone is not None : model.plot(pars, only=options.plot_alone, exclude=options.plot_without, labels=options.variations is None) if options.log_scale : plt.yscale('log') if xmin is not None : plt.xlim(xmin, xmax) if ymin is not None : plt.ylim(ymin, ymax) else : fig1, ax1 = plt.subplots(nrows=2, ncols=1, figsize=(8, 8), dpi=96) model.plot(pars, data=data, canvas=ax1[0]) if options.log_scale : ax1[0].set_yscale('log') if xmin is not None : ax1[0].set_xlim(xmin, xmax) if ymin is not None : ax1[0].set_ylim(ymin, ymax) model.plot(pars, data=data, canvas=ax1[1], residuals=options.residuals, labels=options.variations is None) if options.output_file is not None : plt.savefig(options.output_file) variations = None colors_pos = [ 'purple', 'green', 'darkblue', 'lime' ] colors_neg = [ 'darkred', 'red', 'orange', 'magenta' ] if options.variations is not None : # First try the comma-separated format try: var_val = float(options.variations) variations = 'all' except : pass if variations == None : variations = [] try : for spec in options.variations.split(',') : specfields = spec.split(':') varval = specfields[0] color = specfields[1] if len(specfields) == 2 else None var,val = varval.split('=') try : val = float(val) except: raise ValueError('Invalid numerical value %s.' % val) if not var in model.nps : raise KeyError('Parameter %s is not defined in the model.' % var) colors = colors_pos if val > 0 else colors_neg if color is None : color = colors[len(variations) % len(colors)] variations.append( (var, val, color,) ) except Exception as inst : print(inst) raise ValueError('Invalid variations specification %s : should be a comma-separated list of var=val[:color] items, or a single number' % options.variations) if variations == 'all' : n1 = math.ceil(math.sqrt(model.nnps)) n2 = math.ceil(model.nnps/n1) fig_nps, ax_nps = plt.subplots(nrows=n1, ncols=n2, figsize=(18, 12), dpi=96) for i in range(len(model.nps), n1*n2) : fig_nps.delaxes(ax_nps.flatten()[i]) for par, ax in zip(model.nps, ax_nps.flatten()) : model.plot(pars, data=data, variations = [ (par, var_val, 'r'), (par, -var_val, 'g') ], canvas=ax) if options.plot_without is not None or options.plot_alone is not None : model.plot(pars, variations = [ (par, var_val, 'r'), (par, -var_val, 'g') ], canvas=ax, only=options.plot_alone, exclude=options.plot_without) if options.log_scale : ax.set_yscale('log') if xmin is not None : ax.set_xlim(xmin, xmax) if ymin is not None : ax.set_ylim(ymin, ymax) elif variations is not None : plt.figure(1) model.plot(pars, variations=variations) if options.plot_without is not None or options.plot_alone is not None : model.plot(pars, variations=variations, only=options.plot_alone, exclude=options.plot_without) if options.log_scale : plt.yscale('log') if options.output_file is not None : split_name = os.path.splitext(options.output_file) plt.savefig(split_name[0] + '_variations' + split_name[1])
def run(argv=None): parser = make_parser() options = parser.parse_args() if not options: parser.print_help() sys.exit(0) try: bins = [int(b) for b in options.bins.split(',')] except Exception as inst: print(inst) raise ValueError( 'Invalid bin specification %s : the format should be bin1,bin2,...' % options.bins) if options.yrange != '': try: y_min, y_max = [float(p) for p in options.yrange.split(',')] except Exception as inst: print(inst) raise ValueError( 'Invalid range specification %s, expected y_min,y_max' % options.yrange) model = Model().load(options.model_file) if model is None: raise ValueError('No valid model definition found in file %s.' % options.model_file) if not options.cutoff is None: model.cutoff = options.cutoff if options.channel != None: channel = model.channel(options.channel) if not channel: raise KeyError('Channel %s not found in model.' % options.channel) else: channel = list(model.channels.values())[0] if options.sample != None: sample = channel.sample(options.sample) if not sample: raise KeyError('Sample %s not found in channel %s.' % (options.sample, channel.name)) else: sample = list(channel.samples.values())[0] if options.validation_file is not None: validation_file = options.validation_file else: split_name = os.path.splitext(options.model_file) validation_file = split_name[0] + '_validation' + split_name[1] data = ValidationData(model, validation_file) print('Validating for POI value %s = %g' % (model.poi(0).name, data.poi)) plt.ion() nplots = model.nnps nc = math.ceil(math.sqrt(nplots)) nr = math.ceil(nplots / nc) cont_x = np.linspace(data.points[0], data.points[-1], 100) pars = model.expected_pars(data.poi) channel_offset = model.channel_offsets[channel.name] sample_index = model.sample_indices[sample.name] nexp0 = model.n_exp(pars)[sample_index, channel_offset:channel_offset + channel.nbins()] ax_vars = [] ax_invs = [] def nexp_var(pars, par, x): return model.n_exp(pars.set( par, x))[sample_index, channel_offset:channel_offset + channel.nbins()] for b in bins: fig = plt.figure(figsize=(8, 8), dpi=96) fig.suptitle('Linearity checks for sample %s, bin [%g, %g]' % (sample.name, channel.bins[b]['lo_edge'], channel.bins[b]['hi_edge'])) gs = gridspec.GridSpec(nrows=nr, ncols=nc, wspace=0.3, hspace=0.3, top=0.9, bottom=0.05, left=0.1, right=0.95) for i, par in enumerate(model.nps.keys()): if options.inversion_plots: sgs = gridspec.GridSpecFromSubplotSpec(2, 1, subplot_spec=gs[i // nc, i % nc], wspace=0.1, hspace=0.1) else: sgs = [gs[i // nc, i % nc]] pars = model.expected_pars(data.poi) model.use_linear_nps = True vars_lin = [ nexp_var(pars, par, x)[b] / nexp0[b] - 1 for x in cont_x ] rvar_lin = [ -((nexp_var(pars, par, x)[b] - nexp0[b]) / nexp0[b])**2 for x in cont_x ] model.use_linear_nps = False vars_nli = [ nexp_var(pars, par, x)[b] / nexp0[b] - 1 for x in cont_x ] rvar_nli = [ -((nexp_var(pars, par, x)[b] - nexp0[b]) / nexp0[b])**2 for x in cont_x ] ax_var = fig.add_subplot(sgs[0]) ax_var.set_title(par) ax_var.plot( data.points, data.variations[channel.name][par][sample_index, b, :] - 1, 'ko') ax_var.plot([0], [0], 's', marker='o', color='purple') ax_var.plot(sample.pos_vars[par], sample.pos_imps[par][:, b], 's', marker='o', color='red') ax_var.plot(sample.neg_vars[par], sample.neg_imps[par][:, b], 's', marker='o', color='red') ax_var.plot(cont_x, vars_lin, 'r--') ax_vars.append(ax_var) if not options.no_nli: ax_var.plot(cont_x, vars_nli, 'b') if options.yrange: ax_var.set_ylim(y_min, y_max) if options.inversion_plots: ax_inv = fig.add_subplot(sgs[1], sharex=ax_var) ax_inv.plot(cont_x, rvar_lin, 'r--') ax_inv.plot(cont_x, rvar_nli, 'b') if options.inv_range: ax_inv.set_ylim(-options.inv_range, 0) ax_invs.append(ax_inv) fig.canvas.set_window_title('Linearity checks for sample %s, bin %g' % (sample.name, b)) if options.output_file != '': if options.output_file is not None: output_file = options.output_file else: split_name = os.path.splitext(options.model_file) output_file = split_name[0] + '-%s-bin_%d.png' % ( options.sample, b) plt.savefig(output_file)
def run(argv = None) : parser = make_parser() options = parser.parse_args(argv) if not options : parser.print_help() sys.exit(0) model = Model.create(options.model_file) if model is None : raise ValueError('No valid model definition found in file %s.' % options.model_file) if not options.regularize is None : model.set_gamma_regularization(options.regularize) if not options.cutoff is None : model.cutoff = options.cutoff try : hypos = [ Parameters(process_setvals(spec, model), model=model) for spec in options.hypos.split(':') ] except Exception as inst : print(inst) raise ValueError("Could not parse list of hypothesis values '%s' : expected colon-separated list of variable assignments" % options.hypos) if options.data_file : data = Data(model).load(options.data_file) if data == None : raise ValueError('No valid dataset definition found in file %s.' % options.data_file) print('Using dataset stored in file %s.' % options.data_file) elif options.asimov != None : try: sets = [ v.replace(' ', '').split('=') for v in options.asimov.split(',') ] data = model.generate_expected(sets) except Exception as inst : print(inst) raise ValueError("Cannot define an Asimov dataset from options '%s'." % options.asimov) print('Using Asimov dataset with POIs %s.' % str(sets)) else : data = Data(model).load(options.model_file) if data == None : raise ValueError('No valid dataset definition found in file %s.' % options.data_file) print('Using dataset stored in file %s.' % options.model_file) gen_bounds = [] if options.bounds : bound_specs = options.bounds.split(',') try : for spec in bound_specs : fields = spec.split(':') gen_bounds.append(ParBound(fields[0], float(fields[1]) if fields[1] != '' else None, float(fields[2]) if fields[2] != '' else None)) except Exception as inst: print('ERROR: could not parse parameter bound specification "%s", expected in the form name1:[min]:[max],name2:[min]:[max],...' % options.bounds) raise(inst) if options.test_statistic == 'q~mu' : if len(model.pois) > 1 : raise ValueError('Currently not supporting more than 1 POI for this operation') calc = QMuTildaCalculator(OptiMinimizer().set_pois_from_model(model)) elif options.test_statistic == 'q_mu' : if len(model.pois) > 1 : raise ValueError('Currently not supporting more than 1 POI for this operation') calc = QMuCalculator(OptiMinimizer().set_pois_from_model(model)) else : raise ValueError('Unknown test statistic %s' % options.test_statistic) # Check the fastprof CLs against the ones in the reference: in principle this should match well, # otherwise it means what we generate isn't exactly comparable to the observation, which would be a problem... if options.ntoys > 0 : print('Check CL computed from fast model against those of the full model (a large difference would require to correct the sampling distributions) :') faster = calc.compute_fast_results(hypos, data) faster.print(verbosity = options.verbosity) if options.ntoys == 0 : return if options.seed != None : np.random.seed(options.seed) niter = options.iterations samplers_clsb = [] samplers_cl_b = [] print('Running with POI %s, bounds %s, and %d iteration(s).' % (str(calc.minimizer.init_pois.dict(pois_only=True)), str(calc.minimizer.bounds), niter)) for fast_plr_data in faster.plr_data.values() : test_hypo = fast_plr_data.hypo gen_hypo = test_hypo tmu_A0 = fast_plr_data.test_statistics['tmu_A0'] gen0_hypo = gen_hypo.clone().set(model.poi(0).name, 0) clsb = OptiSampler(model, test_hypo, print_freq=options.print_freq, bounds=gen_bounds, debug=options.debug, niter=niter, tmu_Amu=tmu_A0, tmu_A0=tmu_A0, gen_hypo=gen_hypo) cl_b = OptiSampler(model, test_hypo, print_freq=options.print_freq, bounds=gen_bounds, debug=options.debug, niter=niter, tmu_Amu=tmu_A0, tmu_A0=tmu_A0, gen_hypo=gen0_hypo) clsb.minimizer.set_pois_from_model(model) cl_b.minimizer.set_pois_from_model(model) samplers_clsb.append(clsb) samplers_cl_b.append(cl_b) opti_samples = CLsSamples( \ Samples(samplers_clsb, options.output_file), \ Samples(samplers_cl_b, options.output_file + '_clb')) \ .generate_and_save(options.ntoys, break_locks=options.break_locks) if options.truncate_dist : opti_samples.cut(None, options.truncate_dist) poi = faster.pois()[list(faster.pois())[0]] for plr_data in faster.plr_data.values() : plr_data.pvs['sampling_pv' ] = opti_samples.clsb.pv(plr_data.hypo, plr_data.pvs['pv'], with_error=True) plr_data.pvs['sampling_clb'] = opti_samples.cl_b.pv(plr_data.hypo, plr_data.pvs['pv'], with_error=True) plr_data.pvs['sampling_cls'] = opti_samples.pv (plr_data.hypo, plr_data.pvs['pv'], with_error=True) if options.bands : sampling_bands = opti_samples.bands(options.bands) for band in np.linspace(-options.bands, options.bands, 2*options.bands + 1) : for plr_data, band_point in zip(faster.plr_data.values(), sampling_bands[band]) : plr_data.pvs['sampling_cls_%+d' % band] = band_point def limit(rast, key, description, with_error=False) : limit_result = rast.contour(key, 1 - options.cl, with_error=with_error) limit_value = limit_result if not with_error else limit_result[0] error_str = '' if with_error : limit_error = (limit_result[1] - limit_result[2])/2 if limit_result[1] is not None and limit_result[2] is not None else None error_str = '+/- %g' % limit_error if not limit_error is None else '' if not limit_value is None : print(description + ' : UL(%g%%) = %g %s (N = %s)' % (100*options.cl, limit_value, error_str, str(model.n_exp(model.expected_pars(limit_value)).sum(axis=1))) ) return limit_result faster.print(keys=[ 'sampling_pv', 'sampling_cls', 'sampling_clb' ], verbosity=1) limit_asy_full_clsb = limit(faster, 'pv' , 'Asymptotics, fast model, CLsb') limit_sampling_clsb = limit(faster, 'sampling_pv' , 'Sampling , fast model, CLsb', with_error=True) limit_asy_full_cls = limit(faster, 'cls' , 'Asymptotics, fast model, CLs ') limit_sampling_cls = limit(faster, 'sampling_cls', 'Sampling , fast model, CLs ', with_error=True) if options.bands : limit_sampling_cls_bands = {} for band in np.linspace(-options.bands, options.bands, 2*options.bands + 1) : limit_sampling_cls_bands[band] = limit(faster, 'sampling_cls_%+d' % band, 'Expected limit band, fast model, %+d sigma band' % band) # Plot results if not options.batch_mode : plt.ion() fig1 = plt.figure(1) plt.suptitle('$CL_{s+b}$') plt.xlabel(model.poi(0).name) plt.ylabel('$CL_{s+b}$') plt.fill_between([ hypo[poi.name] for hypo in faster.plr_data ], [ plr_data.pvs['sampling_pv'][0] + plr_data.pvs['sampling_pv'][1] for plr_data in faster.plr_data.values() ], [ plr_data.pvs['sampling_pv'][0] - plr_data.pvs['sampling_pv'][1] for plr_data in faster.plr_data.values() ], facecolor='b', alpha=0.5) plt.plot([ hypo[poi.name] for hypo in faster.plr_data ], [ plr_data.pvs['pv'] for plr_data in faster.plr_data.values() ], options.marker + 'r:' , label = 'Asymptotics') plt.plot([ hypo[poi.name] for hypo in faster.plr_data ], [ plr_data.pvs['sampling_pv'][0] for plr_data in faster.plr_data.values() ], options.marker + 'b-' , label = 'Sampling') plt.legend(loc=1) # 1 -> upper right plt.axhline(y=1 - options.cl, color='k', linestyle='dotted') fig2 = plt.figure(2) plt.suptitle('$CL_s$') plt.xlabel(model.poi(0).name) plt.ylabel('$CL_s$') if options.bands : opti_samples.plot_bands(options.bands) plt.fill_between([ hypo[poi.name] for hypo in faster.plr_data ], [ plr_data.pvs['sampling_cls'][0] + plr_data.pvs['sampling_cls'][1] for plr_data in faster.plr_data.values() ], [ plr_data.pvs['sampling_cls'][0] - plr_data.pvs['sampling_cls'][1] for plr_data in faster.plr_data.values() ], facecolor='b', alpha=0.5) plt.plot([ hypo[poi.name] for hypo in faster.plr_data ], [ plr_data.pvs['cls'] for plr_data in faster.plr_data.values() ], options.marker + 'r:' , label = 'Asymptotics') plt.plot([ hypo[poi.name] for hypo in faster.plr_data ], [ plr_data.pvs['sampling_cls'][0] for plr_data in faster.plr_data.values() ], options.marker + 'b-' , label = 'Sampling') plt.legend(loc=1) # 1 -> upper right plt.axhline(y=1 - options.cl, color='k', linestyle='dotted') fig1.savefig(options.output_file + '_clsb.pdf') fig2.savefig(options.output_file + '_cls.pdf') fig2.savefig(options.output_file + '_cls.png') plt.show() jdict = {} jdict['cl'] = options.cl jdict['poi_name'] = model.poi(0).name jdict['poi_unit'] = model.poi(0).unit jdict['limit_sampling_CLs'] = limit_sampling_cls[0] jdict['limit_sampling_CLs_up'] = limit_sampling_cls[1] jdict['limit_sampling_CLs_dn'] = limit_sampling_cls[2] jdict['limit_asymptotics_CLs'] = limit_asy_full_cls jdict['limit_sampling_CLsb'] = limit_sampling_clsb[0] jdict['limit_sampling_CLsb_up'] = limit_sampling_clsb[1] jdict['limit_sampling_CLsb_dn'] = limit_sampling_clsb[2] jdict['limit_asymptotics_CLsb'] = limit_asy_full_clsb if options.bands : for band in np.linspace(-options.bands, options.bands, 2*options.bands + 1) : jdict['limit_sampling_CLs_expected_band_%+d' % band] = limit_sampling_cls_bands[band] with open(options.output_file + '_results.json', 'w') as fd: json.dump(jdict, fd, ensure_ascii=True, indent=3)
def run(argv=None): parser = make_parser() options = parser.parse_args() if not options: parser.print_help() sys.exit(0) samples = np.load(options.filename[0]) if options.x_range: try: x_min, x_max = [float(p) for p in options.x_range.split(',')] except Exception as inst: print(inst) raise ValueError( 'Invalid X-axis range specification %s, expected x_min,x_max' % options.x_range) else: if options.t_value == '': x_min, x_max = 0, 1 else: x_min, x_max = -10, 10 plr_data = None if options.hypo != '': model = Model.create(options.model_file) if model == None: raise ValueError('No valid model definition found in file %s.' % options.model_file) try: filename, index = options.hypo.split(':') index = int(index) raster = Raster('data', model=model, filename=filename) plr_data = list(raster.plr_data.values())[index] hypo = list(raster.plr_data.keys())[index] print('Using hypothesis %s' % str(hypo.dict(pois_only=True))) except Exception as inst: print(inst) raise ValueError( 'Invalid hypothesis spec, should be in the format <filename>:<index>' ) plt.ion() if options.log_scale: plt.yscale('log') plt.suptitle(options.filename[0]) if options.t_value == 'q_mu': if plr_data is None: raise ValueError( 'A signal hypothesis must be provided (--hypo option) to convert to q_mu values' ) q = QMuCalculator.make_q(plr_data) data = np.array([q.asymptotic_ts(pv) for pv in samples]) plt.hist(data[:], bins=options.nbins, range=[x_min, x_max]) elif options.t_value == 'q~mu': if plr_data is None: raise ValueError( 'A signal hypothesis must be provided (--hypo option) to convert to q~mu values' ) q = QMuTildaCalculator.make_q(plr_data) data = np.array([q.asymptotic_ts(pv) for pv in samples]) plt.hist(data[:], bins=options.nbins, range=[x_min, x_max]) else: plt.hist(samples[:], bins=options.nbins, range=[x_min, x_max]) plt.show() if options.reference: xx = np.linspace(x_min, x_max, options.nbins + 1) dx = xx[1] - xx[0] bin_norm = len(samples) if options.t_value == 'q_mu' or options.t_value == 'q~mu': yy = [ bin_norm * quad(lambda t: q.asymptotic_pdf(t), x, x + dx)[0] for x in xx[:-1] ] else: yy = [bin_norm * dx for x in xx[:-1]] plt.plot(xx[:-1] + dx / 2, yy) plt.ylim(1E-1) if options.output_file != '': plt.savefig(options.output_file)