Пример #1
0
def generate_q0_via_shape_fit(data, bin_edges, template_params, template_pdf):
    '''Generate likelihood ratios based on a template fit to the data.
    Shape values for bg and signal are determined from integration of
    underlying pdfs used to generate toys.
    Use these values to create the q0 statistic.'''

    n_tot = len(data)
    bc, bin_edges = np.histogram(data, bin_edges, range=(100, 180))

    _template_params = template_params.copy()
    _template_params.n_tot.value = n_tot
    template_model = Model(template_pdf, _template_params)
    template_fitter = NLLFitter(template_model, verbose=False)
    mle_res = template_fitter.fit(bc, calculate_corr=False)
    nll_sig = mle_res.fun

    _template_params = template_params.copy()
    _template_params.n_tot.value = n_tot
    _template_params.A.value = 0
    _template_params.A.vary = False
    template_model = Model(template_pdf, _template_params)
    template_fitter = NLLFitter(template_model, verbose=False)
    bg_res = template_fitter.fit(bc, calculate_corr=False)
    nll_bg = bg_res.fun

    q0 = 2 * (nll_bg - nll_sig)
    return q0
Пример #2
0
def generate_q0_via_nll_unbinned_constrained(bg, data, bg_params):
    '''Perform two nll fits to data, one for bg+signal, one for bg-only.
    Use these values to create the q0 statistic.'''

    data = np.asarray(data)
    bg = np.asarray(bg)
    _bg_params = bg_params.copy()
    for p in _bg_params:
        _bg_params[p].vary = False

    bg_model = Model(bg_pdf, _bg_params)
    mc_bg_only_fitter = NLLFitter(bg_model, verbose=False)
    mc_bg_only_fitter.fit(bg, calculate_corr=False)

    bg_nll = bg_model.calc_nll(None, data)

    _sig_params = Parameters()
    _sig_params.add_many(
        ('C', 0.1, True, 0, 1, None, None),
        ('mu', 125.77, False, 120, 130, None, None),
        ('sigma', 2.775, False, 1, 4, None, None),
        ('a1', _bg_params['a1'].value, False, -1, 1, None, None),
        ('a2', _bg_params['a2'].value, False, -1, 1, None, None),
        ('a3', _bg_params['a3'].value, False, -1, 1, None, None))

    bg_sig_model = Model(bg_sig_pdf, _sig_params)

    mc_bg_sig_fitter = NLLFitter(bg_sig_model, verbose=False)
    mc_bg_sig_result = mc_bg_sig_fitter.fit(data, calculate_corr=False)
    bg_sig_nll = mc_bg_sig_result.fun
    q0 = 2 * max(bg_nll - bg_sig_nll, 0)

    return q0
Пример #3
0
def generate_initial_params(hgg_bg, hgg_signal, n_sigma):
    '''Input bg and signal dataframes, and a sigma value for signal injection.
    Output parameters for the pdfs that describe those distributions.'''
    # grab a handful of bg events, and an ~X sigma number of signal events
    hgg_bg_selection = hgg_bg[(hgg_bg.Mgg > 100)
                              & (hgg_bg.Mgg < 180)][0:10000].Mgg
    n_bg_under_sig = hgg_bg_selection[(118 < hgg_bg_selection)
                                      & (hgg_bg_selection < 133)].size
    n_sig = int(n_sigma * np.sqrt(n_bg_under_sig))
    hgg_signal_selection = hgg_signal[(hgg_signal.Mgg >= 118)
                                      & (hgg_signal.Mgg <= 133)][0:n_sig].Mgg
    data_bg = hgg_bg_selection.values
    data_sig = hgg_signal_selection.values

    # fit to the data distributions
    bg_params = Parameters()
    bg_params.add_many(('a1', 0., True, -1, 1, None, None),
                       ('a2', 0., True, -1, 1, None, None),
                       ('a3', 0., True, -1, 1, None, None))

    bg_model = Model(bg_pdf, bg_params)
    bg_fitter = NLLFitter(bg_model)
    bg_result = bg_fitter.fit(data_bg, calculate_corr=False)

    # bg_model = ff.Model(bg_pdf, ['a1', 'a2', 'a3'])
    # bg_model.set_bounds([(-1., 1.), (-1., 1.), (-1., 1.)])

    # bg_fitter = ff.NLLFitter(bg_model, data_bg)
    # bg_result = bg_fitter.fit([0.0, 0.0, 0.0])

    # sig_model = ff.Model(sig_pdf, ['mu', 'sigma'])
    # sig_model.set_bounds([(110, 130), (1, 5)])
    # sig_fitter = ff.NLLFitter(sig_model, data_sig)
    # sig_result = sig_fitter.fit([120.0, 2])

    sig_params = Parameters()
    sig_params.add_many(
        ('mu', 125, True, 110, 130, None, None),
        ('sigma', 1, True, 1, 5, None, None),
    )
    sig_model = Model(sig_pdf, sig_params)
    sig_fitter = NLLFitter(sig_model)
    sig_result = sig_fitter.fit(data_sig)

    n_bg = len(data_bg)

    be_bg = bayesian_blocks(data_bg, p0=0.02)
    be_sig = bayesian_blocks(data_sig, p0=0.02)

    return bg_result, sig_result, n_bg, n_sig, be_bg, be_sig
Пример #4
0
def calc_A_unbinned(data, bg_params, sig_params):
    '''Given input data and the true distribution parameters, calculate the 95% UL for the unbinned
    data.  The bg and signal parameters are held fixed.  The best-fit A value is determined first,
    then the 95% UL is determined by scanning for the correct value of A that leads to a p-value of
    0.05.  This procedure must be run many times and averaged to get the mean UL value and error
    bands.'''

    mu    = sig_params[0]
    sigma = sig_params[1]
    alpha = bg_params[0]
    beta  = bg_params[1]
    gamma = bg_params[2]

    params = Parameters()
    params.add_many(
        ('C'     , 0.01  , True  , 0    , 1    , None , None) ,
        ('mu'    , mu    , False , None , None , None , None) ,
        ('sigma' , sigma , False , None , None , None , None) ,
        ('alpha' , alpha , False , None , None , None , None) ,
        ('beta'  , beta  , False , None , None , None , None) ,
        ('gamma' , gamma , False , None , None , None , None)
    )

    bg_sig_model = Model(bg_sig_pdf, params)

    # Obtain the best fit value for A
    mle_fitter = NLLFitter(bg_sig_model)
    mle_res = mle_fitter.fit(np.asarray(data), calculate_corr=False,
                             verbose=False)

    return mle_res.x[0]
Пример #5
0
def generate_initial_params(data_bg_mul2, data_bg_mul8, seed=5):

    # fit to the data distributions

    bg_params = Parameters()
    bg_params.add_many(
        ('alpha', -1.80808e+01, True, 1e-20, 20, None, None),
        ('beta', -8.21174e-02, True, -10, -1e-20, None, None),
        ('gamma', 8.06289e-01, True, 1e-20, 10, None, None)
    )

    bg_model = Model(bg_pdf, bg_params)
    bg_fitter = NLLFitter(bg_model)
    bg_result = bg_fitter.fit(data_bg_mul2, calculate_corr=False)

    n_bg = len(data_bg_mul8)

    gRandom.SetSeed(seed)

    # Set up bg sampling
    bg_pdf_ROOT = functools.partial(bg_pdf, doROOT=True)
    tf1_bg_pdf = TF1("tf1_bg_pdf", bg_pdf_ROOT, 2800, 13000, 3)
    tf1_bg_pdf.SetParameters(*bg_result.x)
    mc_bg = [tf1_bg_pdf.GetRandom() for i in range(n_bg)]

    be_bg = bayesian_blocks(mc_bg, p0=0.02)
    be_bg[-1] += 0.1
    be_bg = np.append(be_bg, [13000])
    be_bg[0] = 2800
    # print be_bg
    # hist(data_bg_mul8, bins=be_bg, scale='binwidth')
    # plt.show()

    return bg_result, n_bg, be_bg
Пример #6
0
def generate_q0_via_nll_unbinned(data, bg_params=None, sig_params=None):
    '''Perform two nll fits to data, one for bg+signal, one for bg-only.
    Use these values to create the q0 statistic.'''

    if not bg_params:
        _bg_params = Parameters()
        _bg_params.add_many(('a1', 0., True, -1, 1, None, None),
                            ('a2', 0., True, -1, 1, None, None),
                            ('a3', 0., True, -1, 1, None, None))
    else:
        _bg_params = bg_params.copy()
        for p in _bg_params:
            _bg_params[p].vary = False

    bg_model = Model(bg_pdf, _bg_params)

    if not sig_params:
        _sig_params = Parameters()
        _sig_params.add_many(('C', 0.1, True, 0, 1, None, None),
                             ('mu', 125, True, 120, 130, None, None),
                             ('sigma', 2, True, 1, 4, None, None),
                             ('a1', 0., True, -1, 1, None, None),
                             ('a2', 0., True, -1, 1, None, None),
                             ('a3', 0., True, -1, 1, None, None))
    else:
        _sig_params = sig_params.copy()
        for p in _sig_params:
            _sig_params[p].vary = False

        if len(_sig_params) == 5:
            _sig_params.add('C', 0.1, True, 0, 1)

    bg_sig_model = Model(bg_sig_pdf, _sig_params)

    mc_bg_only_fitter = NLLFitter(bg_model, verbose=False)
    mc_bg_only_result = mc_bg_only_fitter.fit(np.asarray(data),
                                              calculate_corr=False)
    bg_nll = mc_bg_only_result.fun

    mc_bg_sig_fitter = NLLFitter(bg_sig_model, verbose=False)
    mc_bg_sig_result = mc_bg_sig_fitter.fit(np.asarray(data),
                                            calculate_corr=False)
    bg_sig_nll = mc_bg_sig_result.fun
    q0 = 2 * max(bg_nll - bg_sig_nll, 0)
    return q0
Пример #7
0
def calc_A_cnc(data, bg_params, sig_params, xlow=2800, cache_true=None, cache_fit=None):
    '''Given input data and the true template, calculate the 95% UL for a single binned
    data.  The bg and signal templates are held fixed.  The best-fit A value is determined first,
    then the 95% UL is determined by scanning for the correct value of A that leads to a p-value of
    0.05.  This procedure must be run many times and averaged to get the mean UL value and error
    bands.'''
    if cache_true is None:
        cache_true = {}
    if cache_fit is None:
        cache_fit = {}

    # Set up the models and pdfs, given the true means
    data = np.asarray(data)

    if xlow in cache_true:
        true_bg, true_sig = cache_true[xlow]
    else:
        true_bg, _   = integrate.quad(functools.partial(bg_pdf, a=bg_params), xlow, 13000)
        true_sig, _   = integrate.quad(functools.partial(sig_pdf, a=sig_params), xlow, 13000)
        cache_true[xlow] = (true_bg, true_sig)

    tmp_data = data[data > xlow]
    # if len(tmp_data) is 0:
    #     raise Exception('no data after cut={}'.format(xlow))
    if len(tmp_data) in cache_fit and xlow in cache_true:
        mle_a = cache_fit[len(tmp_data)]
    else:

        n_tot = len(data)
        template_pdf = template_pdf_wrapper([true_bg], [true_sig], cnc=True)

        template_params = Parameters()
        template_params.add_many(
            ('A'    , 0.1    , True  , 0    , 1    , None , None) ,
            ('n_tot' , n_tot , False , None , None , None , None)
        )

        template_model = Model(template_pdf, template_params)
        template_fitter = NLLFitter(template_model)

        # Obtain the best fit value for A
        ntmp = len(tmp_data)
        if ntmp < 3:
            ntmp = 3
        mle_res = template_fitter.fit(np.asarray([ntmp]), calculate_corr=False, verbose=False)
        mle_a = mle_res.x[0]
        cache_fit[len(tmp_data)] = mle_a

    return mle_a, cache_true, cache_fit
Пример #8
0
def calc_A_binned(data, bg_mu, sig_mu):
    '''Given input data and the true template, calculate the 95% UL for binned data
    data.  The bg and signal templates are held fixed.  The best-fit A value is determined first,
    then the 95% UL is determined by scanning for the correct value of A that leads to a p-value of
    0.05.  This procedure must be run many times and averaged to get the mean UL value and error
    bands.'''

    # Set up the models and pdfs, given the true means
    n_tot = np.sum(data)

    template_pdf = template_pdf_wrapper(bg_mu, sig_mu)
    template_params = Parameters()
    template_params.add_many(
        ('A'    , 0.1    , True  , 0    , 1    , None , None) ,
        ('n_tot' , n_tot , False , None , None , None , None)
    )

    template_model = Model(template_pdf, template_params)

    # Obtain the best fit value for A
    template_fitter = NLLFitter(template_model)
    mle_res = template_fitter.fit(data, calculate_corr=False, verbose=False)

    return mle_res.x[0]