def generate_q0_via_shape_fit(data, bin_edges, template_params, template_pdf): '''Generate likelihood ratios based on a template fit to the data. Shape values for bg and signal are determined from integration of underlying pdfs used to generate toys. Use these values to create the q0 statistic.''' n_tot = len(data) bc, bin_edges = np.histogram(data, bin_edges, range=(100, 180)) _template_params = template_params.copy() _template_params.n_tot.value = n_tot template_model = Model(template_pdf, _template_params) template_fitter = NLLFitter(template_model, verbose=False) mle_res = template_fitter.fit(bc, calculate_corr=False) nll_sig = mle_res.fun _template_params = template_params.copy() _template_params.n_tot.value = n_tot _template_params.A.value = 0 _template_params.A.vary = False template_model = Model(template_pdf, _template_params) template_fitter = NLLFitter(template_model, verbose=False) bg_res = template_fitter.fit(bc, calculate_corr=False) nll_bg = bg_res.fun q0 = 2 * (nll_bg - nll_sig) return q0
def generate_q0_via_nll_unbinned_constrained(bg, data, bg_params): '''Perform two nll fits to data, one for bg+signal, one for bg-only. Use these values to create the q0 statistic.''' data = np.asarray(data) bg = np.asarray(bg) _bg_params = bg_params.copy() for p in _bg_params: _bg_params[p].vary = False bg_model = Model(bg_pdf, _bg_params) mc_bg_only_fitter = NLLFitter(bg_model, verbose=False) mc_bg_only_fitter.fit(bg, calculate_corr=False) bg_nll = bg_model.calc_nll(None, data) _sig_params = Parameters() _sig_params.add_many( ('C', 0.1, True, 0, 1, None, None), ('mu', 125.77, False, 120, 130, None, None), ('sigma', 2.775, False, 1, 4, None, None), ('a1', _bg_params['a1'].value, False, -1, 1, None, None), ('a2', _bg_params['a2'].value, False, -1, 1, None, None), ('a3', _bg_params['a3'].value, False, -1, 1, None, None)) bg_sig_model = Model(bg_sig_pdf, _sig_params) mc_bg_sig_fitter = NLLFitter(bg_sig_model, verbose=False) mc_bg_sig_result = mc_bg_sig_fitter.fit(data, calculate_corr=False) bg_sig_nll = mc_bg_sig_result.fun q0 = 2 * max(bg_nll - bg_sig_nll, 0) return q0
def generate_initial_params(hgg_bg, hgg_signal, n_sigma): '''Input bg and signal dataframes, and a sigma value for signal injection. Output parameters for the pdfs that describe those distributions.''' # grab a handful of bg events, and an ~X sigma number of signal events hgg_bg_selection = hgg_bg[(hgg_bg.Mgg > 100) & (hgg_bg.Mgg < 180)][0:10000].Mgg n_bg_under_sig = hgg_bg_selection[(118 < hgg_bg_selection) & (hgg_bg_selection < 133)].size n_sig = int(n_sigma * np.sqrt(n_bg_under_sig)) hgg_signal_selection = hgg_signal[(hgg_signal.Mgg >= 118) & (hgg_signal.Mgg <= 133)][0:n_sig].Mgg data_bg = hgg_bg_selection.values data_sig = hgg_signal_selection.values # fit to the data distributions bg_params = Parameters() bg_params.add_many(('a1', 0., True, -1, 1, None, None), ('a2', 0., True, -1, 1, None, None), ('a3', 0., True, -1, 1, None, None)) bg_model = Model(bg_pdf, bg_params) bg_fitter = NLLFitter(bg_model) bg_result = bg_fitter.fit(data_bg, calculate_corr=False) # bg_model = ff.Model(bg_pdf, ['a1', 'a2', 'a3']) # bg_model.set_bounds([(-1., 1.), (-1., 1.), (-1., 1.)]) # bg_fitter = ff.NLLFitter(bg_model, data_bg) # bg_result = bg_fitter.fit([0.0, 0.0, 0.0]) # sig_model = ff.Model(sig_pdf, ['mu', 'sigma']) # sig_model.set_bounds([(110, 130), (1, 5)]) # sig_fitter = ff.NLLFitter(sig_model, data_sig) # sig_result = sig_fitter.fit([120.0, 2]) sig_params = Parameters() sig_params.add_many( ('mu', 125, True, 110, 130, None, None), ('sigma', 1, True, 1, 5, None, None), ) sig_model = Model(sig_pdf, sig_params) sig_fitter = NLLFitter(sig_model) sig_result = sig_fitter.fit(data_sig) n_bg = len(data_bg) be_bg = bayesian_blocks(data_bg, p0=0.02) be_sig = bayesian_blocks(data_sig, p0=0.02) return bg_result, sig_result, n_bg, n_sig, be_bg, be_sig
def calc_A_unbinned(data, bg_params, sig_params): '''Given input data and the true distribution parameters, calculate the 95% UL for the unbinned data. The bg and signal parameters are held fixed. The best-fit A value is determined first, then the 95% UL is determined by scanning for the correct value of A that leads to a p-value of 0.05. This procedure must be run many times and averaged to get the mean UL value and error bands.''' mu = sig_params[0] sigma = sig_params[1] alpha = bg_params[0] beta = bg_params[1] gamma = bg_params[2] params = Parameters() params.add_many( ('C' , 0.01 , True , 0 , 1 , None , None) , ('mu' , mu , False , None , None , None , None) , ('sigma' , sigma , False , None , None , None , None) , ('alpha' , alpha , False , None , None , None , None) , ('beta' , beta , False , None , None , None , None) , ('gamma' , gamma , False , None , None , None , None) ) bg_sig_model = Model(bg_sig_pdf, params) # Obtain the best fit value for A mle_fitter = NLLFitter(bg_sig_model) mle_res = mle_fitter.fit(np.asarray(data), calculate_corr=False, verbose=False) return mle_res.x[0]
def generate_initial_params(data_bg_mul2, data_bg_mul8, seed=5): # fit to the data distributions bg_params = Parameters() bg_params.add_many( ('alpha', -1.80808e+01, True, 1e-20, 20, None, None), ('beta', -8.21174e-02, True, -10, -1e-20, None, None), ('gamma', 8.06289e-01, True, 1e-20, 10, None, None) ) bg_model = Model(bg_pdf, bg_params) bg_fitter = NLLFitter(bg_model) bg_result = bg_fitter.fit(data_bg_mul2, calculate_corr=False) n_bg = len(data_bg_mul8) gRandom.SetSeed(seed) # Set up bg sampling bg_pdf_ROOT = functools.partial(bg_pdf, doROOT=True) tf1_bg_pdf = TF1("tf1_bg_pdf", bg_pdf_ROOT, 2800, 13000, 3) tf1_bg_pdf.SetParameters(*bg_result.x) mc_bg = [tf1_bg_pdf.GetRandom() for i in range(n_bg)] be_bg = bayesian_blocks(mc_bg, p0=0.02) be_bg[-1] += 0.1 be_bg = np.append(be_bg, [13000]) be_bg[0] = 2800 # print be_bg # hist(data_bg_mul8, bins=be_bg, scale='binwidth') # plt.show() return bg_result, n_bg, be_bg
def generate_q0_via_nll_unbinned(data, bg_params=None, sig_params=None): '''Perform two nll fits to data, one for bg+signal, one for bg-only. Use these values to create the q0 statistic.''' if not bg_params: _bg_params = Parameters() _bg_params.add_many(('a1', 0., True, -1, 1, None, None), ('a2', 0., True, -1, 1, None, None), ('a3', 0., True, -1, 1, None, None)) else: _bg_params = bg_params.copy() for p in _bg_params: _bg_params[p].vary = False bg_model = Model(bg_pdf, _bg_params) if not sig_params: _sig_params = Parameters() _sig_params.add_many(('C', 0.1, True, 0, 1, None, None), ('mu', 125, True, 120, 130, None, None), ('sigma', 2, True, 1, 4, None, None), ('a1', 0., True, -1, 1, None, None), ('a2', 0., True, -1, 1, None, None), ('a3', 0., True, -1, 1, None, None)) else: _sig_params = sig_params.copy() for p in _sig_params: _sig_params[p].vary = False if len(_sig_params) == 5: _sig_params.add('C', 0.1, True, 0, 1) bg_sig_model = Model(bg_sig_pdf, _sig_params) mc_bg_only_fitter = NLLFitter(bg_model, verbose=False) mc_bg_only_result = mc_bg_only_fitter.fit(np.asarray(data), calculate_corr=False) bg_nll = mc_bg_only_result.fun mc_bg_sig_fitter = NLLFitter(bg_sig_model, verbose=False) mc_bg_sig_result = mc_bg_sig_fitter.fit(np.asarray(data), calculate_corr=False) bg_sig_nll = mc_bg_sig_result.fun q0 = 2 * max(bg_nll - bg_sig_nll, 0) return q0
def calc_A_cnc(data, bg_params, sig_params, xlow=2800, cache_true=None, cache_fit=None): '''Given input data and the true template, calculate the 95% UL for a single binned data. The bg and signal templates are held fixed. The best-fit A value is determined first, then the 95% UL is determined by scanning for the correct value of A that leads to a p-value of 0.05. This procedure must be run many times and averaged to get the mean UL value and error bands.''' if cache_true is None: cache_true = {} if cache_fit is None: cache_fit = {} # Set up the models and pdfs, given the true means data = np.asarray(data) if xlow in cache_true: true_bg, true_sig = cache_true[xlow] else: true_bg, _ = integrate.quad(functools.partial(bg_pdf, a=bg_params), xlow, 13000) true_sig, _ = integrate.quad(functools.partial(sig_pdf, a=sig_params), xlow, 13000) cache_true[xlow] = (true_bg, true_sig) tmp_data = data[data > xlow] # if len(tmp_data) is 0: # raise Exception('no data after cut={}'.format(xlow)) if len(tmp_data) in cache_fit and xlow in cache_true: mle_a = cache_fit[len(tmp_data)] else: n_tot = len(data) template_pdf = template_pdf_wrapper([true_bg], [true_sig], cnc=True) template_params = Parameters() template_params.add_many( ('A' , 0.1 , True , 0 , 1 , None , None) , ('n_tot' , n_tot , False , None , None , None , None) ) template_model = Model(template_pdf, template_params) template_fitter = NLLFitter(template_model) # Obtain the best fit value for A ntmp = len(tmp_data) if ntmp < 3: ntmp = 3 mle_res = template_fitter.fit(np.asarray([ntmp]), calculate_corr=False, verbose=False) mle_a = mle_res.x[0] cache_fit[len(tmp_data)] = mle_a return mle_a, cache_true, cache_fit
def calc_A_binned(data, bg_mu, sig_mu): '''Given input data and the true template, calculate the 95% UL for binned data data. The bg and signal templates are held fixed. The best-fit A value is determined first, then the 95% UL is determined by scanning for the correct value of A that leads to a p-value of 0.05. This procedure must be run many times and averaged to get the mean UL value and error bands.''' # Set up the models and pdfs, given the true means n_tot = np.sum(data) template_pdf = template_pdf_wrapper(bg_mu, sig_mu) template_params = Parameters() template_params.add_many( ('A' , 0.1 , True , 0 , 1 , None , None) , ('n_tot' , n_tot , False , None , None , None , None) ) template_model = Model(template_pdf, template_params) # Obtain the best fit value for A template_fitter = NLLFitter(template_model) mle_res = template_fitter.fit(data, calculate_corr=False, verbose=False) return mle_res.x[0]