Пример #1
0
def generate_initial_params(hgg_bg, hgg_signal, n_sigma):
    '''Input bg and signal dataframes, and a sigma value for signal injection.
    Output parameters for the pdfs that describe those distributions.'''
    # grab a handful of bg events, and an ~X sigma number of signal events
    hgg_bg_selection     = hgg_bg[(hgg_bg.Mgg > 100) & (hgg_bg.Mgg < 180)][0:10000].Mgg
    n_bg_under_sig       = hgg_bg_selection[(118 < hgg_bg_selection) &
                                            (hgg_bg_selection < 133)].size
    n_sig                = int(n_sigma*np.sqrt(n_bg_under_sig))
    hgg_signal_selection = hgg_signal[(hgg_signal.Mgg >= 118) &
                                      (hgg_signal.Mgg <= 133)][0:n_sig].Mgg
    data_bg              = hgg_bg_selection.values
    data_sig             = hgg_signal_selection.values

    # fit to the data distributions
    bg_model = ff.Model(bg_pdf, ['a1', 'a2', 'a3'])
    bg_model.set_bounds([(-1., 1.), (-1., 1.), (-1., 1.)])
    bg_fitter = ff.NLLFitter(bg_model, data_bg)
    bg_result = bg_fitter.fit([0.0, 0.0, 0.0])

    sig_model = ff.Model(sig_pdf, ['mu', 'sigma'])
    sig_model.set_bounds([(110, 130), (1, 5)])
    sig_fitter = ff.NLLFitter(sig_model, data_sig)
    sig_result = sig_fitter.fit([120.0, 2])

    n_bg = len(data_bg)

    be_bg = bayesian_blocks(data_bg, p0=0.02)
    be_sig = bayesian_blocks(data_sig, p0=0.02)

    return bg_result, sig_result, n_bg, n_sig, be_bg, be_sig
Пример #2
0
def generate_initial_params(hgg_bg, hgg_signal, n_sigma):
    '''Input bg and signal dataframes, and a sigma value for signal injection.
    Output parameters for the pdfs that describe those distributions.'''
    # grab a handful of bg events, and an ~X sigma number of signal events
    hgg_bg_selection = hgg_bg[(hgg_bg.Mgg > 100)
                              & (hgg_bg.Mgg < 180)][0:10000].Mgg
    n_bg_under_sig = hgg_bg_selection[(118 < hgg_bg_selection)
                                      & (hgg_bg_selection < 133)].size
    n_sig = int(n_sigma * np.sqrt(n_bg_under_sig))
    hgg_signal_selection = hgg_signal[(hgg_signal.Mgg >= 118)
                                      & (hgg_signal.Mgg <= 133)][0:n_sig].Mgg
    data_bg = hgg_bg_selection.values
    data_sig = hgg_signal_selection.values

    # fit to the data distributions
    bg_params = Parameters()
    bg_params.add_many(('a1', 0., True, -1, 1, None, None),
                       ('a2', 0., True, -1, 1, None, None),
                       ('a3', 0., True, -1, 1, None, None))

    bg_model = Model(bg_pdf, bg_params)
    bg_fitter = NLLFitter(bg_model)
    bg_result = bg_fitter.fit(data_bg, calculate_corr=False)

    # bg_model = ff.Model(bg_pdf, ['a1', 'a2', 'a3'])
    # bg_model.set_bounds([(-1., 1.), (-1., 1.), (-1., 1.)])

    # bg_fitter = ff.NLLFitter(bg_model, data_bg)
    # bg_result = bg_fitter.fit([0.0, 0.0, 0.0])

    # sig_model = ff.Model(sig_pdf, ['mu', 'sigma'])
    # sig_model.set_bounds([(110, 130), (1, 5)])
    # sig_fitter = ff.NLLFitter(sig_model, data_sig)
    # sig_result = sig_fitter.fit([120.0, 2])

    sig_params = Parameters()
    sig_params.add_many(
        ('mu', 125, True, 110, 130, None, None),
        ('sigma', 1, True, 1, 5, None, None),
    )
    sig_model = Model(sig_pdf, sig_params)
    sig_fitter = NLLFitter(sig_model)
    sig_result = sig_fitter.fit(data_sig)

    n_bg = len(data_bg)

    be_bg = bayesian_blocks(data_bg, p0=0.02)
    be_sig = bayesian_blocks(data_sig, p0=0.02)

    return bg_result, sig_result, n_bg, n_sig, be_bg, be_sig
Пример #3
0
def generate_initial_params(data_bg_mul2, data_bg_mul8, seed=5):

    # fit to the data distributions
    bg_model = ff.Model(bg_pdf, ['alpha', 'beta', 'gamma'])
    bg_model.set_bounds([(1e-20, 20), (-10, -1e-20), (1e-20, 10)])
    bg_fitter = ff.NLLFitter(bg_model, data_bg_mul2)
    bg_result = bg_fitter.fit([-1.80808e+01, -8.21174e-02, 8.06289e-01])
    n_bg = len(data_bg_mul8)

    gRandom.SetSeed(seed)

    # Set up bg sampling
    bg_pdf_ROOT = functools.partial(bg_pdf, doROOT=True)
    tf1_bg_pdf = TF1("tf1_bg_pdf", bg_pdf_ROOT, 2800, 13000, 3)
    tf1_bg_pdf.SetParameters(*bg_result.x)
    mc_bg = [tf1_bg_pdf.GetRandom() for i in range(n_bg)]

    be_bg = bayesian_blocks(mc_bg, p0=0.02)
    be_bg[-1] += 0.1
    be_bg = np.append(be_bg, [13000])
    be_bg[0] = 2800
    # print be_bg
    # hist(data_bg_mul8, bins=be_bg, scale='binwidth')
    # plt.show()

    return bg_result, n_bg, be_bg
Пример #4
0
def generate_initial_params(data_bg_mul2, data_bg_mul8, seed=5):

    # fit to the data distributions

    bg_params = Parameters()
    bg_params.add_many(
        ('alpha', -1.80808e+01, True, 1e-20, 20, None, None),
        ('beta', -8.21174e-02, True, -10, -1e-20, None, None),
        ('gamma', 8.06289e-01, True, 1e-20, 10, None, None)
    )

    bg_model = Model(bg_pdf, bg_params)
    bg_fitter = NLLFitter(bg_model)
    bg_result = bg_fitter.fit(data_bg_mul2, calculate_corr=False)

    n_bg = len(data_bg_mul8)

    gRandom.SetSeed(seed)

    # Set up bg sampling
    bg_pdf_ROOT = functools.partial(bg_pdf, doROOT=True)
    tf1_bg_pdf = TF1("tf1_bg_pdf", bg_pdf_ROOT, 2800, 13000, 3)
    tf1_bg_pdf.SetParameters(*bg_result.x)
    mc_bg = [tf1_bg_pdf.GetRandom() for i in range(n_bg)]

    be_bg = bayesian_blocks(mc_bg, p0=0.02)
    be_bg[-1] += 0.1
    be_bg = np.append(be_bg, [13000])
    be_bg[0] = 2800
    # print be_bg
    # hist(data_bg_mul8, bins=be_bg, scale='binwidth')
    # plt.show()

    return bg_result, n_bg, be_bg
Пример #5
0
def generateToy():
    plt.close('all')

    def poly1(x):
        return 2*x/100

    nentries = 100
    p0=0.01
    x = np.arange(0.0, 10, 0.1)
    np.random.seed(12345)
    ROOT.gRandom.SetSeed(8675309)
    poly1_gen = TF1("poly1","2*x",0,10)
    my_rands = []
    for i in range(nentries):
        my_rands.append(poly1_gen.GetRandom())

    fig = plt.figure()
    hist(my_rands,bins=10,histtype='stepfilled',alpha=0.2,label='10 bins',normed=True)
    bb_edges = bayesian_blocks(my_rands,p0=p0)
    hist(my_rands,bins=bb_edges,histtype='stepfilled',alpha=0.2,label='10 bins',normed=True)
    plt.plot(x,poly1(x),'k')
    plt.show()
Пример #6
0
def generateToy():
    plt.close('all')

    def poly1(x):
        return 2*x/100

    nentries = 100
    p0=0.01
    x = np.arange(0.0, 10, 0.1)
    np.random.seed(12345)
    ROOT.gRandom.SetSeed(8675309)
    poly1_gen = TF1("poly1","2*x",0,10)
    my_rands = []
    for i in xrange(nentries):
        my_rands.append(poly1_gen.GetRandom())

    fig = plt.figure()
    hist(my_rands,bins=10,histtype='stepfilled',alpha=0.2,label='10 bins',normed=True)
    bb_edges = bayesian_blocks(my_rands,p0=p0)
    hist(my_rands,bins=bb_edges,histtype='stepfilled',alpha=0.2,label='10 bins',normed=True)
    plt.plot(x,poly1(x),'k')
    plt.show()
Пример #7
0
def do_bh_analysis():

    #set up variables
    plt.close('all')
    normed = True
    log = True
    STs = [2, 3, 4, 5, 6, 7, 8, 9, 10]
    ST_low = [2300, 2300, 2300, 2600, 2600, 2600, 2800, 2800, 2900]
    ST_low = [2500] * 9
    ST_low_dict = dict(list(zip(STs, ST_low)))
    samples = 5000
    seed = 2
    p0 = 0.005
    bg_est = 'data_driven'  #'data_driven','mc','low_ST'
    mode = 'signal_search'  #'no_signal','signal_search','signal_inj','signal_search_inj'

    if mode not in [
            'no_signal', 'signal_search', 'signal_inj', 'signal_search_inj'
    ]:
        raise KeyError('mode is not allowed!')
    if bg_est not in ['data_driven', 'mc', 'low_ST']:
        raise KeyError('bg_est is not allowed!')

    if mode in ['signal_search', 'signal_inj', 'signal_search_inj']:
        signal_num = 10
    else:
        signal_num = 0

    df_mc = pkl.load(open('../../files/BH/BHTree_mc.p', 'rb'))
    df_signal = pkl.load(open('../../files/BH/BHTree_signal.p', 'rb'))
    df_data = pkl.load(open('../../files/BH/BHTree_data.p', 'rb'))

    weights = df_mc.weightTree.unique()  #[0.27436519,0.0401976,0.01657276]
    df_mc_list = []
    for weight in weights:
        df_mc_list.append(df_mc[np.isclose(df_mc.weightTree, weight)])

    all_edges = {}
    #for ST in range(2,11):
    for ST in [8]:
        my_ST_data = df_data[df_data['ST_mul' + str(ST)] > ST_low_dict[ST]][
            'ST_mul' + str(ST)].values
        nentries = len(my_ST_data)
        my_ST_mc = []
        if bg_est == 'low_ST':
            my_ST_mc = df_data[df_data['ST_mul2'] > ST_low_dict[ST]][
                df_data['n_multiplicity'] == 2]['ST_mul2'].values
        else:
            df_mc_st_list = [
                df[df['ST_mul' + str(ST)] > ST_low_dict[ST]]['ST_mul' +
                                                             str(ST)]
                for df in df_mc_list
            ]
            if mode in ['signal_search', 'signal_inj', 'signal_search_inj']:
                my_ST_signal = df_signal[
                    df_signal['ST_mul' + str(ST)] > ST_low_dict[ST]]['ST_mul' +
                                                                     str(ST)]

            samples, rel_weights = find_sample_number(df_mc_st_list, weights)
            for i, mc in enumerate(df_mc_st_list):
                if samples * rel_weights[i] == 0: continue
                my_ST_mc = np.append(
                    my_ST_mc,
                    mc.sample(int(samples * rel_weights[i]),
                              random_state=seed).values)

        print('ST_mult', ST)
        print('   n_data', nentries)
        print('   n_mc', len(my_ST_mc))

        #get the edges from bb, and the normalized bin values (integral of all hists is 1)
        #if signal and inject:
        #    my_ST_data = np.append(my_ST_data,my_ST_signal.

        if mode in ['signal_inj', 'signal_search_inj']:
            my_ST_data = np.append(
                my_ST_data,
                my_ST_signal.sample(signal_num, random_state=seed).values)
            nentries += signal_num
        elif mode in ['signal_search']:
            my_ST_signal = my_ST_signal.values
        return my_ST_data, my_ST_mc, my_ST_signal

        print(len(my_ST_data))
        normed_counts_data, bb_edges = np.histogram(my_ST_data,
                                                    bayesian_blocks(my_ST_data,
                                                                    p0=p0),
                                                    density=True)
        normed_counts_data_nobb, nobb_edges = np.histogram(my_ST_data,
                                                           20,
                                                           density=True)
        normed_counts_mc, _ = np.histogram(my_ST_mc, bb_edges, density=True)
        normed_counts_mc_nobb, _ = np.histogram(my_ST_mc,
                                                nobb_edges,
                                                density=True)
        if mode in ['signal_search', 'signal_search_inj']:
            normed_counts_signal, _ = np.histogram(my_ST_signal,
                                                   bb_edges,
                                                   density=True)
            normed_counts_signal_nobb, _ = np.histogram(my_ST_signal,
                                                        nobb_edges,
                                                        density=True)

#rescale the values so that the integral of the data hist is = num of entries
        rescaled_counts_data = normed_counts_data * nentries
        rescaled_counts_data_nobb = normed_counts_data_nobb * nentries
        if mode in ['signal_search', 'signal_search_inj']:
            rescaled_counts_mc = normed_counts_mc * (nentries - signal_num)
            rescaled_counts_mc_nobb = normed_counts_mc_nobb * (nentries -
                                                               signal_num)
            rescaled_counts_signal = normed_counts_signal * signal_num
            rescaled_counts_signal_nobb = normed_counts_signal_nobb * signal_num
        else:
            rescaled_counts_mc = normed_counts_mc * (nentries)
            rescaled_counts_mc_nobb = normed_counts_mc_nobb * (nentries)


#properly calculate the error bars on the data
        counts_data, _ = np.histogram(my_ST_data, bb_edges)
        counts_data_nobb, _ = np.histogram(my_ST_data, nobb_edges)
        rescaled_err = np.sqrt(counts_data) / (bb_edges[1:] - bb_edges[:-1])
        rescaled_err_nobb = np.sqrt(counts_data_nobb) / (nobb_edges[1:] -
                                                         nobb_edges[:-1])
        err = np.sqrt(counts_data)
        #properly account for the BG error for ratio plot
        counts_bg, _ = np.histogram(my_ST_mc, bb_edges)
        counts_bg_nobb, _ = np.histogram(my_ST_mc, nobb_edges)
        rescaled_err_bg = np.sqrt(counts_bg) / (bb_edges[1:] - bb_edges[:-1])
        rescaled_err_bg_nobb = np.sqrt(counts_bg_nobb) / (nobb_edges[1:] -
                                                          nobb_edges[:-1])

        if mode in ['signal_search', 'signal_search_inj']:
            make_hist_ratio_blackhole(bb_edges,
                                      rescaled_counts_data,
                                      rescaled_counts_mc,
                                      rescaled_err,
                                      str(ST),
                                      suffix=None,
                                      bg_est=bg_est,
                                      signal=rescaled_counts_signal,
                                      mode=mode)
            make_hist_ratio_blackhole2(nobb_edges,
                                       rescaled_counts_data_nobb,
                                       rescaled_counts_mc_nobb,
                                       rescaled_err_nobb,
                                       str(ST),
                                       suffix='nobb',
                                       bg_est=bg_est,
                                       signal=rescaled_counts_signal_nobb,
                                       mode=mode)
        else:
            make_hist_ratio_blackhole(bb_edges,
                                      rescaled_counts_data,
                                      rescaled_counts_mc,
                                      rescaled_err,
                                      str(ST),
                                      suffix=None,
                                      bg_est=bg_est,
                                      mode=mode)
            make_hist_ratio_blackhole(nobb_edges,
                                      rescaled_counts_data_nobb,
                                      rescaled_counts_mc_nobb,
                                      rescaled_err_nobb,
                                      str(ST),
                                      suffix='nobb',
                                      bg_est=bg_est,
                                      mode=mode)

        plt.show()

        all_edges[ST] = bb_edges

    for key in all_edges:
        print('ST' + str(key), all_edges[key])
    return all_edges
Пример #8
0
    binned_A_100_mle = [[] for i in range(len(sig_params))]
    binned_A_200_mle = [[] for i in range(len(sig_params))]
    binned_A_400_mle = [[] for i in range(len(sig_params))]
    binned_A_1000_mle = [[] for i in range(len(sig_params))]
    binned_A_2000_mle = [[] for i in range(len(sig_params))]
    cnc_A_mle = [[] for i in range(len(sig_params))]

    sig_pdf_ROOT = functools.partial(sig_pdf, doROOT=True)
    tf1_sig_pdf = TF1("tf1_sig_pdf", sig_pdf_ROOT, 2800, 13000, 2)

    for i, sig_p in enumerate(tqdm_notebook(sig_params, desc='Signal Model')):

        n_sig = n_bg
        tf1_sig_pdf.SetParameters(*sig_p)
        mc_sig = [tf1_sig_pdf.GetRandom() for ns in range(n_sig)]
        be_sig = bayesian_blocks(mc_sig, p0=0.02)

        true_sig_bc_bb = get_true_bin_content(be_bg, sig_pdf, sig_p)
        true_sig_bc_50GeV = get_true_bin_content(be_50GeV, sig_pdf, sig_p)
        true_sig_bc_100GeV = get_true_bin_content(be_100GeV, sig_pdf, sig_p)
        true_sig_bc_200GeV = get_true_bin_content(be_200GeV, sig_pdf, sig_p)
        true_sig_bc_400GeV = get_true_bin_content(be_400GeV, sig_pdf, sig_p)
        true_sig_bc_1000GeV = get_true_bin_content(be_1000GeV, sig_pdf, sig_p)
        true_sig_bc_2000GeV = get_true_bin_content(be_2000GeV, sig_pdf, sig_p)

        be_hybrid = np.sort(np.unique(np.concatenate([be_bg, be_sig])))

        true_bg_bc_bb_hybrid = get_true_bin_content(be_hybrid, bg_pdf,
                                                    bg_result.x)
        true_sig_bc_bb_hybrid = get_true_bin_content(be_hybrid, sig_pdf, sig_p)
def do_bh_analysis():

#set up variables
    plt.close('all')
    normed = True
    log = True
    STs = [2,3,4,5,6,7,8,9,10]
    ST_low = [2300,2300,2300,2600,2600,2600,2800,2800,2900]
    ST_low = [2500]*9
    ST_low_dict = dict(zip(STs,ST_low))
    samples = 5000
    seed = 2
    p0=0.005
    bg_est = 'data_driven' #'data_driven','mc','low_ST'
    mode = 'signal_search' #'no_signal','signal_search','signal_inj','signal_search_inj'

    if mode not in ['no_signal','signal_search','signal_inj','signal_search_inj']: raise KeyError('mode is not allowed!')
    if bg_est not in ['data_driven','mc','low_ST']: raise KeyError('bg_est is not allowed!')

    if mode in ['signal_search','signal_inj','signal_search_inj']:
        signal_num = 10
    else:
        signal_num = 0

    df_mc = pkl.load(open('../../files/BH/BHTree_mc.p','rb'))
    df_signal = pkl.load(open('../../files/BH/BHTree_signal.p','rb'))
    df_data = pkl.load(open('../../files/BH/BHTree_data.p','rb'))

    weights = df_mc.weightTree.unique()#[0.27436519,0.0401976,0.01657276]
    df_mc_list = []
    for weight in weights:
        df_mc_list.append(df_mc[np.isclose(df_mc.weightTree,weight)])

    all_edges = {}
    #for ST in range(2,11):
    for ST in [8]:
        my_ST_data = df_data[df_data['ST_mul'+str(ST)]>ST_low_dict[ST]]['ST_mul'+str(ST)].values
        nentries = len(my_ST_data)
        my_ST_mc = []
        if bg_est == 'low_ST':
            my_ST_mc = df_data[df_data['ST_mul2']>ST_low_dict[ST]][df_data['n_multiplicity']==2]['ST_mul2'].values
        else:
            df_mc_st_list = [df[df['ST_mul'+str(ST)]>ST_low_dict[ST]]['ST_mul'+str(ST)] for df in df_mc_list]
            if mode in ['signal_search','signal_inj','signal_search_inj']:
                my_ST_signal = df_signal[df_signal['ST_mul'+str(ST)]>ST_low_dict[ST]]['ST_mul'+str(ST)]

            samples,rel_weights = find_sample_number(df_mc_st_list,weights)
            for i,mc in enumerate(df_mc_st_list):
                if samples*rel_weights[i]==0: continue
                my_ST_mc = np.append(my_ST_mc, mc.sample(int(samples*rel_weights[i]),random_state=seed).values)

        print 'ST_mult',ST
        print '   n_data',nentries
        print '   n_mc',len(my_ST_mc)

#get the edges from bb, and the normalized bin values (integral of all hists is 1)
        #if signal and inject:
        #    my_ST_data = np.append(my_ST_data,my_ST_signal.

        if mode in ['signal_inj','signal_search_inj']:
            my_ST_data = np.append(my_ST_data, my_ST_signal.sample(signal_num,random_state=seed).values)
            nentries+=signal_num
        elif mode in ['signal_search']:
            my_ST_signal = my_ST_signal.values
        return my_ST_data, my_ST_mc, my_ST_signal

        print len(my_ST_data)
        normed_counts_data, bb_edges = np.histogram(my_ST_data,bayesian_blocks(my_ST_data,p0=p0), density=True)
        normed_counts_data_nobb, nobb_edges = np.histogram(my_ST_data,20, density=True)
        normed_counts_mc, _= np.histogram(my_ST_mc,bb_edges, density=True)
        normed_counts_mc_nobb, _= np.histogram(my_ST_mc,nobb_edges, density=True)
        if mode in ['signal_search','signal_search_inj']:
            normed_counts_signal, _= np.histogram(my_ST_signal,bb_edges, density=True)
            normed_counts_signal_nobb, _= np.histogram(my_ST_signal,nobb_edges, density=True)

#rescale the values so that the integral of the data hist is = num of entries
        rescaled_counts_data = normed_counts_data*nentries
        rescaled_counts_data_nobb = normed_counts_data_nobb*nentries
        if mode in ['signal_search','signal_search_inj']:
            rescaled_counts_mc = normed_counts_mc*(nentries-signal_num)
            rescaled_counts_mc_nobb = normed_counts_mc_nobb*(nentries-signal_num)
            rescaled_counts_signal = normed_counts_signal*signal_num
            rescaled_counts_signal_nobb = normed_counts_signal_nobb*signal_num
        else:
            rescaled_counts_mc = normed_counts_mc*(nentries)
            rescaled_counts_mc_nobb = normed_counts_mc_nobb*(nentries)

#properly calculate the error bars on the data
        counts_data, _= np.histogram(my_ST_data,bb_edges)
        counts_data_nobb, _= np.histogram(my_ST_data,nobb_edges)
        rescaled_err = np.sqrt(counts_data)/(bb_edges[1:]-bb_edges[:-1])
        rescaled_err_nobb = np.sqrt(counts_data_nobb)/(nobb_edges[1:]-nobb_edges[:-1])
        err = np.sqrt(counts_data)
#properly account for the BG error for ratio plot
        counts_bg, _= np.histogram(my_ST_mc,bb_edges)
        counts_bg_nobb, _= np.histogram(my_ST_mc,nobb_edges)
        rescaled_err_bg = np.sqrt(counts_bg)/(bb_edges[1:]-bb_edges[:-1])
        rescaled_err_bg_nobb = np.sqrt(counts_bg_nobb)/(nobb_edges[1:]-nobb_edges[:-1])

        if mode in ['signal_search','signal_search_inj']:
            make_hist_ratio_blackhole(bb_edges, rescaled_counts_data, rescaled_counts_mc, rescaled_err, str(ST), suffix = None, bg_est=bg_est, signal = rescaled_counts_signal, mode = mode)
            make_hist_ratio_blackhole2(nobb_edges, rescaled_counts_data_nobb, rescaled_counts_mc_nobb, rescaled_err_nobb, str(ST), suffix = 'nobb', bg_est=bg_est, signal = rescaled_counts_signal_nobb, mode=mode)
        else:
            make_hist_ratio_blackhole(bb_edges, rescaled_counts_data, rescaled_counts_mc, rescaled_err, str(ST), suffix = None, bg_est=bg_est, mode=mode)
            make_hist_ratio_blackhole(nobb_edges, rescaled_counts_data_nobb, rescaled_counts_mc_nobb, rescaled_err_nobb, str(ST), suffix = 'nobb', bg_est=bg_est, mode=mode)

        plt.show()

        all_edges[ST]=bb_edges

    for key in all_edges:
        print 'ST'+str(key), all_edges[key]
    return all_edges