示例#1
0
def DST(dst_filenames_path):

    dst = load_dsts(dst_filenames_path, "DST", "Events")
    dst_time = dst.sort_values('event')
    T = dst_time.time.values
    DT = time_delta_from_time(T)
    kge = kr_event(dst, DT, dst.S2e, dst.S2q)
    return dst, DT, kge
示例#2
0
def load_data(input_path         : str ,
              input_dsts         : str ,
              file_bootstrap_map : str ,
              ref_histo_file     : str ,
              key_Z_histo        : str ,
              quality_ranges     : dict ) -> Tuple[pd.DataFrame,
                                                   ASectorMap  ,
                                                   ref_hist_container]:
    """
    Reads kdst files and applies basic R cut. Outputs kdst as pd.DataFrame,
    bootstrap map, and reference histograms

    Parameters
    ----------
    input_path : str
        Path to the input map_folder
    input_dsts : str
        Name criteria for the dst to be read
    file_bootstrap_map : str
        Path to the bootstrap map file
    ref_Z_histo_file : str
        Path to the reference histogram file
    quality_ranges : dict
        Dictionary containing ranges for the quality cuts

    Returns
    ----------
    dst_filtered : pd.DataFrame
        Dst containing all the events once filtered
    bootstrap_map : ASectorMap
        Bootstrap map
    ref_hist_container : ref_hist_container
        To be completed
    """

    input_path         = os.path.expandvars(input_path)
    dst_files          = glob.glob(input_path + input_dsts)
    dst_full           = load_dsts(dst_files, "DST", "Events")
    dst_full           = dst_full.sort_values(by=['time'])
    mask_quality       = quality_cut(dst_full, **quality_ranges)
    dst_filtered       = dst_full[mask_quality]

    file_bootstrap_map = os.path.expandvars(file_bootstrap_map)
    bootstrap_map      = read_maps(file_bootstrap_map)

    ref_histo_file     = os.path.expandvars(ref_histo_file)
    z_pd               = pd.read_hdf(ref_histo_file, key=key_Z_histo)
    z_histo            = ref_hist(bin_centres     = z_pd.bin_centres,
                                  bin_entries     = z_pd.bin_entries,
                                  err_bin_entries = z_pd.err_bin_entries)
    ref_histos         =  ref_hist_container(Z_dist_hist = z_histo)

    return dst_filtered, bootstrap_map, ref_histos
示例#3
0
def compare_mc():
    """
    Looks at MC and data for the PMTs and checks scale levels
    at the level of individual PMTs and the sum.
    Attempt to check values for relative scaling for PMTs.
    run as python pmtCompMCData.py <MC data file base> <Data data file base>
    """

    mc_file_base = sys.argv[1]
    da_file_base = sys.argv[2]
    dst_mc_base = sys.argv[3]
    dst_da_base = sys.argv[4]

    run_number = da_file_base[da_file_base.find('/r') +
                              2:da_file_base.find('/r') + 6]

    mc_sorter = sorter_func(mc_file_base)
    mc_file_list = sorted(glob(mc_file_base + '*.h5'), key=mc_sorter)
    da_sorter = sorter_func(da_file_base)
    da_file_list = sorted(glob(da_file_base + '*.h5'), key=da_sorter)

    mc_hit = load_dsts(glob(dst_mc_base + '*.h5'), 'RECO', 'Events')
    da_hit = load_dsts(glob(dst_da_base + '*.h5'), 'RECO', 'Events')

    dfcols = [
        'wf_sum', 'p0', 'cp0', 'p1', 'cp1', 'p2', 'cp2', 'p3', 'cp3', 'p4',
        'cp4', 'p5', 'cp5', 'p6', 'cp6', 'p7', 'cp7', 'p8', 'cp8', 'p9', 'cp9',
        'p10', 'cp10', 'p11', 'cp11'
    ]
    pmt_scales = [
        1, 0.79, 1, 0.80, 0.72, 1.11, 1.03, 0.82, 0.82, 1.03, 0.89, 0.95, 0.82
    ]
    mc_sums = pd.DataFrame(columns=dfcols)
    for fn in mc_file_list:
        print('Reading mc file ', fn)
        pmaps = load_pmaps(fn)
        print('...data got')
        for evt, pmap in pmaps.items():

            if len(pmap.s2s) == 1 and len(pmap.s1s) == 1:
                try:
                    mc_hit[mc_hit.event == evt].X.values[0]
                    hx = mc_hit[mc_hit.event == evt].X.values
                    hy = mc_hit[mc_hit.event == evt].Y.values
                    hz = mc_hit[mc_hit.event == evt].Z.values
                    hq = mc_hit[mc_hit.event == evt].Q.values
                    #for s2 in pmap.s2s:
                    s2 = pmap.s2s[0]
                    rs2 = pmf.rebin_peak(s2, 2)
                    p_z = (rs2.times - pmap.s1s[0].time_at_max_energy) / 1000
                    #if hz.shape[0] == len(rs2.times):
                    new_row = [s2.pmts.waveform(x).sum() for x in range(12)]
                    cn_row = [
                        life_correction(hx, hy, hz, hq, p_z,
                                        rs2.pmts.waveform(x))
                        for x in range(12)
                    ]
                    new_row = np.column_stack((new_row, cn_row)).flatten()
                    ## new_row.insert(0, s2.total_energy)
                    new_row = np.insert(new_row, 0, s2.total_energy)

                    mc_sums.loc[len(mc_sums)] = list(new_row)
                except IndexError:
                    continue

    da_sums = pd.DataFrame(columns=dfcols)
    for fn in da_file_list:
        print('Reading data file ', fn)
        pmaps = load_pmaps(fn)
        print('...data got')
        for evt, pmap in pmaps.items():

            if len(pmap.s2s) == 1 and len(pmap.s1s) == 1:
                try:
                    da_hit[da_hit.event == evt].X.values[0]
                    hx = da_hit[da_hit.event == evt].X.values
                    hy = da_hit[da_hit.event == evt].Y.values
                    hz = da_hit[da_hit.event == evt].Z.values
                    hq = da_hit[da_hit.event == evt].Q.values
                    #for s2 in pmap.s2s:
                    s2 = pmap.s2s[0]
                    rs2 = pmf.rebin_peak(s2, 1)
                    p_z = (rs2.times - pmap.s1s[0].time_at_max_energy) / 1000
                    #print('Check: ', hz.shape[0], len(rs2.times))
                    #if hz.shape[0] == len(rs2.times):
                    new_row = [s2.pmts.waveform(x).sum() for x in range(12)]
                    cn_row = [
                        life_correction(hx, hy, hz, hq, p_z,
                                        rs2.pmts.waveform(x))
                        for x in range(12)
                    ]
                    new_row = np.column_stack((new_row, cn_row)).flatten()
                    #new_row.insert(0, s2.total_energy)
                    new_row = np.insert(new_row, 0, s2.total_energy)

                    da_sums.loc[len(da_sums)] = list(new_row)
                except IndexError:
                    continue

    trg0 = mc_sums['p0'] * pmt_scales[1] > 8835
    trg2 = mc_sums['p2'] * pmt_scales[3] > 7836
    ## Make some plots
    mc_sums[trg0 & trg2].wf_sum.plot.hist(bins=np.linspace(0, 1.2e6, 100),
                                          label='MC',
                                          density=True,
                                          histtype='step')
    da_sums.wf_sum.plot.hist(bins=np.linspace(0, 1.2e6, 100),
                             label='data',
                             density=True,
                             histtype='step')
    plt.title('PMT sum')
    plt.xlabel('Summed PMT charge (pe)')
    plt.yscale('log')
    plt.show()

    ## Attempt big fit. (only lifetime corrected [1::2] done in function)
    efunc = general_chi2(mc_sums.drop('wf_sum', axis=1).values.T)
    ## full_dats = np.apply_along_axis(np.histogram, 1,
    ##                                 da_sums.drop('wf_sum', axis=1).values.T,
    ##                                 bins=np.linspace(0, 120000, 100),
    ##                                 density=True)[:, 0]
    full_dats = np.apply_along_axis(np.histogram,
                                    1,
                                    da_sums.drop('wf_sum',
                                                 axis=1).values.T[1::2],
                                    bins=np.linspace(0, 120000, 100))[:, 0]
    dat_norms = np.fromiter((s.sum() for s in full_dats), np.int)
    full_dats = np.concatenate(full_dats)
    errs = np.sqrt(full_dats)
    errs[errs <= 0] = 3
    par_seed = pmt_scales[1:]
    pfit, cov, infodict, msg, ier = leastsq(efunc,
                                            par_seed,
                                            args=(full_dats, errs, dat_norms),
                                            full_output=True)
    print('Fit res: ', pfit, ier, infodict, msg)
    trg0 = mc_sums['p0'] * pfit[1] * pfit[0] > 8835
    trg2 = mc_sums['p2'] * pfit[1] * pfit[2] > 7836
    fig, axes = plt.subplots(nrows=3, ncols=4, figsize=(20, 6))
    #mc_sums['new_sum'] = mc_sums.drop('wf_sum', axis=1).sum(axis=1)
    mc_sums['new_sum'] = mc_sums[dfcols[1::2]].multiply(pfit).sum(axis=1)
    mc_sums['new_csum'] = mc_sums[dfcols[2::2]].multiply(pfit).sum(axis=1)
    da_sums['csum'] = da_sums[dfcols[2::2]].sum(axis=1)
    for cname, ax, p in zip(dfcols[2::2], axes.flatten(), pfit):
        ax.set_title('PMT ' + cname[2:] + ' pe distribution')
        ax.set_xlabel('Photoelectrons')
        ax.set_ylabel('AU')
        mc_sums[trg0 & trg2][cname].multiply(p).plot.hist(ax=ax,
                                                          bins=np.linspace(
                                                              0, 120000, 100),
                                                          label='MC',
                                                          density=True,
                                                          histtype='step')
        da_sums[cname].plot.hist(ax=ax,
                                 bins=np.linspace(0, 120000, 100),
                                 label='data',
                                 density=True,
                                 histtype='step')
        ## if 'p1' == cname:
        ##     mc_vals = mc_sums[trg0 & trg2][cname].values
        ##     da_vals = da_sums[cname].values
        ##     ffunc = simple_pmt1_fit(mc_vals)
        ##     dcv, hbins = np.histogram(da_vals, density=True,
        ##                               bins=np.linspace(0, 120000, 100))
        ##     hbins = shift_to_bin_centers(hbins)
        ##     errs = np.sqrt(dcv)
        ##     errs[errs==0] = 3
        ##     fvals = fitf.fit(ffunc, hbins, dcv, seed=(1), sigma=errs)
        ##     ax.plot(hbins, fvals.fn(hbins), label='fit attempt')
        ##     print('fit result: ', fvals.values, fvals.errors)

        ax.legend()
    plt.tight_layout()
    fig.show()
    plt.show()

    mc_sums[trg0 & trg2].new_sum.plot.hist(bins=np.linspace(0, 1.2e6, 100),
                                           label='MC',
                                           density=True,
                                           histtype='step')
    da_sums.wf_sum.plot.hist(bins=np.linspace(0, 1.2e6, 100),
                             label='data',
                             density=True,
                             histtype='step')
    plt.title('PMT sum')
    plt.xlabel('Summed PMT charge (pe)')
    plt.yscale('log')
    plt.show()

    mc_sums[trg0 & trg2].new_csum.plot.hist(bins=np.linspace(0, 1.2e6, 100),
                                            label='MC',
                                            density=True,
                                            histtype='step')
    da_sums.csum.plot.hist(bins=np.linspace(0, 1.2e6, 100),
                           label='data',
                           density=True,
                           histtype='step')
    plt.title('PMT sum')
    plt.xlabel('Summed PMT charge (pe)')
    plt.yscale('log')
    plt.show()
示例#4
0
def read_dsts(path_to_dsts):
    filenames = glob.glob(path_to_dsts + '/*')
    dstdf = load_dsts(filenames, group='DST', node='Events')
    nsdf = load_dsts(filenames, group='Extra', node='nS12')
    return nsdf, dstdf
示例#5
0
def load_data(fout, dir_in, run):
    """
    Input path to all dst files
    Return a merged dst
    ------
    Note: In the kdst there are duplicated events because of the
    multiplicity from both s1 and s2
    Note two: dst.event.nunique returns int. (Panda.Series.nunique)
    if used in Panda.Dataframe.nunique returns Series with unique entries
    """

    path        = dir_in + '/' + run + '/kdst/'
    files_all   = [path + f for f in os.listdir(path) \
                  if os.path.isfile( os.path.join(path, f) )]
    dst         = load_dsts(files_all, "DST", "Events")
    time_run    = dst.time.mean()

    # count number of number of unique entries
    unique_events = ~dst.event.duplicated()
    #unique_events = dst.event.nunique()
    nunique_events = dst.event.nunique()

    #print(nunique_events)

    num_of_S2s  = np.size         (unique_events)
    num_of_evts = np.count_nonzero(unique_events)

    print(num_of_evts)
    fout.write(f"dst_entries {str(len(dst))}\n")
    fout.write(f"time_run {time_run}\n")
    fout.write(f"s2_tot {num_of_S2s}\n")
    fout.write(f"evt_tot {num_of_evts}\n")

    # compute number of s1 and s2
    df = dst[~dst.time.duplicated()]
    tot_ev = df.event.nunique()
    s1_num = df.nS1.values
    s2_num = df.nS2.values
    fout.write(f"num_of_ev_check {tot_ev}\n")

    s1_0 = np.count_nonzero(s1_num == 0)
    s1_1 = np.count_nonzero(s1_num == 1)
    s1_2 = np.count_nonzero(s1_num == 2)
    s1_3 = np.count_nonzero(s1_num == 3)
    s1_4 = np.count_nonzero(s1_num == 4)
    s1_5 = np.count_nonzero(s1_num == 5)
    s1_6 = np.count_nonzero(s1_num == 6)

    s2_0 = np.count_nonzero(s2_num == 0)
    s2_1 = np.count_nonzero(s2_num == 1)
    s2_2 = np.count_nonzero(s2_num == 2)
    s2_3 = np.count_nonzero(s2_num == 3)
    s2_4 = np.count_nonzero(s2_num == 4)
    s2_5 = np.count_nonzero(s2_num == 5)
    s2_6 = np.count_nonzero(s2_num == 6)
    s2_7 = np.count_nonzero(s2_num == 7)
    s2_8 = np.count_nonzero(s2_num == 8)

    fout.write(f'eff_0s1  {s1_0 /tot_ev*100:.5f}\n')
    fout.write(f'eff_0s1_u  {error_eff(tot_ev, s1_0 /tot_ev)*100:.5f}\n')

    fout.write(f'eff_1s1  {s1_1 /tot_ev*100:.5f}\n')
    fout.write(f'eff_1s1_u  {error_eff(tot_ev, s1_1 /tot_ev)*100:.5f}\n')

    fout.write(f'eff_2s1  {s1_2 /tot_ev*100:.5f}\n')
    fout.write(f'eff_2s1_u  {error_eff(tot_ev, s1_2 /tot_ev)*100:.5f}\n')

    fout.write(f'eff_3s1  {s1_3 /tot_ev*100:.5f}\n')
    fout.write(f'eff_3s1_u  {error_eff(tot_ev, s1_3 /tot_ev)*100:.5f}\n')

    fout.write(f'eff_4s1  {s1_4 /tot_ev*100:.5f}\n')
    fout.write(f'eff_4s1_u  {error_eff(tot_ev, s1_4 /tot_ev)*100:.5f}\n')

    fout.write(f'eff_5s1  {s1_5 /tot_ev*100:.5f}\n')
    fout.write(f'eff_5s1_u  {error_eff(tot_ev, s1_5 /tot_ev)*100:.5f}\n')

    fout.write(f'eff_6s1  {s1_6 /tot_ev*100:.5f}\n')
    fout.write(f'eff_6s1_u  {error_eff(tot_ev, s1_6 /tot_ev)*100:.5f}\n')

# s2 eff
    fout.write(f'eff_0s2  {s2_0 /tot_ev*100:.5f}\n')
    fout.write(f'eff_0s2_u  {error_eff(tot_ev, s2_0/tot_ev)*100:.5f}\n')

    fout.write(f'eff_1s2  {s2_1 /tot_ev*100:.5f}\n')
    fout.write(f'eff_1s2_u  {error_eff(tot_ev, s2_1/tot_ev)*100:.5f}\n')

    fout.write(f'eff_2s2  {s2_2 /tot_ev*100:.5f}\n')
    fout.write(f'eff_2s2_u  {error_eff(tot_ev, s2_2/tot_ev)*100:.5f}\n')

    fout.write(f'eff_3s2  {s2_3 /tot_ev*100:.5f}\n')
    fout.write(f'eff_3s2_u  {error_eff(tot_ev, s2_3/tot_ev)*100:.5f}\n')

    fout.write(f'eff_4s2  {s2_4 /tot_ev*100:.5f}\n')
    fout.write(f'eff_4s2_u  {error_eff(tot_ev, s2_4/tot_ev)*100:.5f}\n')

    fout.write(f'eff_5s2  {s2_5 /tot_ev*100:.5f}\n')
    fout.write(f'eff_5s2_u  {error_eff(tot_ev, s2_5/tot_ev)*100:.5f}\n')

    fout.write(f'eff_6s2  {s2_6 /tot_ev*100:.5f}\n')
    fout.write(f'eff_6s2_u  {error_eff(tot_ev, s2_6/tot_ev)*100:.5f}\n')

    fout.write(f'eff_7s2  {s2_7 /tot_ev*100:.5f}\n')
    fout.write(f'eff_7s2_u  {error_eff(tot_ev, s2_7/tot_ev)*100:.5f}\n')

    fout.write(f'eff_8s2  {s2_8 /tot_ev*100:.5f}\n')
    fout.write(f'eff_8s2_u  {error_eff(tot_ev, s2_8/tot_ev)*100:.5f}\n')


    return dst
def relative_pmt_response():
    """
    Script which uses pmaps (will be generalised in future to check XY dependence)
    to look at the relative response of the PMTs in Kr events and
    compares to the results on Poisson mu from calibrations
    """

    pmap_file_base = sys.argv[1]
    dst_file_base = sys.argv[2]

    run_number = pmap_file_base.split('/')[2][1:]

    pmt_dats = DB.DataPMT(int(run_number))

    s1hists = {x: [] for x in range(12)}
    s2hists = {x: [] for x in range(12)}
    s1sumh = []
    s2sumh = []
    hitPMTdist = {x: [] for x in range(12)}
    hitPMTZpos = {x: [] for x in range(12)}

    pmap_sorter = sorter_func(pmap_file_base)
    pmap_file_list = sorted(glob(pmap_file_base + '*.h5'), key=pmap_sorter)

    dst_sorter = sorter_func(dst_file_base)
    dst_file_list = sorted(glob(dst_file_base + '*.h5'), key=dst_sorter)

    ## dst_frame = load_dsts(dst_file_list, 'DST', 'Events')
    dst_frame = load_dsts(dst_file_list, 'RECO', 'Events')

    dst_evt_list = dst_frame['event'].unique()

    #for fn in iglob(pmap_file_base + '*.h5'):
    for fn in pmap_file_list:

        ## This version just using pmt databases
        s1df, s2df, _, s1pmtdf, s2pmtdf = load_pmaps_as_df(fn)

        common_evts = np.intersect1d(s1pmtdf['event'].unique(), dst_evt_list)

        for evt in common_evts:
            #for evt in s1pmtdf['event'].unique():
            #evt    = dst_evt_iter[0]
            s1evt = s1pmtdf[s1pmtdf['event'] == evt]
            s2evt = s2pmtdf[s2pmtdf['event'] == evt]
            s1sevt = s1df[s1df['event'] == evt]
            s2sevt = s2df[s2df['event'] == evt]
            hit_evt = dst_frame[dst_frame['event'] == evt]
            ## if hit_evt['nS2'].iloc[0] == 1 and len(s2evt['peak'].unique()) == 1 and len(s1evt['peak'].unique()) == 1:
            if hit_evt['npeak'].nunique() == 1 and len(
                    s2evt['peak'].unique()) == 1 and len(
                        s1evt['peak'].unique()) == 1:
                ## Not well defined for multi-S2 events
                hit_x = hit_evt['X'].iloc[0]
                hit_y = hit_evt['Y'].iloc[0]
                hit_z = hit_evt['Z'].iloc[0]
                for peak in s1evt['peak'].unique():
                    s1peak = s1evt[s1evt['peak'] == peak]
                    s1sumh.append(s1sevt[s1sevt['peak'] == peak]['ene'].sum())
                    pmt1Q = s1peak[s1peak['npmt'] == 1]['ene'].sum()
                    for pmt in s1peak['npmt'].unique():
                        hitPMTdist[pmt].append(
                            np.sqrt(
                                np.power(
                                    hit_x - pmt_dats[pmt_dats['SensorID'] ==
                                                     pmt].X.values, 2) +
                                np.power(
                                    hit_y - pmt_dats[pmt_dats['SensorID'] ==
                                                     pmt].Y.values, 2)))
                        hitPMTZpos[pmt].append(hit_z)
                        if pmt != 1:
                            s1hists[pmt].append(
                                s1peak[s1peak['npmt'] == pmt]['ene'].sum() /
                                pmt1Q)
                        else:
                            s1hists[pmt].append(pmt1Q)

                for peak in s2evt['peak'].unique():
                    s2peak = s2evt[s2evt['peak'] == peak]
                    s2sumh.append(s2sevt[s2sevt['peak'] == peak]['ene'].sum())
                    if s2sumh[-1] > 4000:  # and s2sumh[-1] < 12000:
                        ## pmt1Q = s2peak[s2peak['npmt'] == 1]['ene'].values[5:-5]
                        pmt1Q = s2peak[s2peak['npmt'] == 1]['ene'].sum()
                        for pmt in s2peak['npmt'].unique():
                            if pmt != 1:
                                ## s2hists[pmt].append(s2peak[s2peak['npmt'] == pmt]['ene'].values[5:-5]/pmt1Q)
                                s2hists[pmt].append(
                                    s2peak[s2peak['npmt'] == pmt]['ene'].sum()
                                    / pmt1Q)
                            else:
                                s2hists[pmt].append(pmt1Q)

            #dst_evt_iter.iternext()

    ## Make the plots
    s1sumh = np.array(s1sumh)
    s2sumh = np.array(s2sumh)
    figs0, axes0 = plt.subplots(nrows=1, ncols=2)
    axes0[0].hist(s1sumh)
    axes0[0].set_title('PMT sum S1 distribution')
    axes0[1].hist(s2sumh)
    axes0[1].set_title('PMT sum S2 distribution')
    plt.tight_layout()
    figs0.show()
    figs0.savefig('SumChargescharge_R' + run_number + '.png')
    figs1, axess1 = plt.subplots(nrows=3, ncols=4, figsize=(20, 6))
    s1pmt1 = np.array(s1hists[1])
    s1bins = np.arange(-2, 4, 0.1)
    s2bins = np.arange(0.4, 1.1, 0.005)
    s1select = (s1sumh > 2) & (s1sumh < 150)
    for (key, val), ax in zip(s1hists.items(), axess1.flatten()):
        if key == 1:
            ax.hist(np.array(val)[s1select], bins=100)
            #ax.scatter(s1sumh[s1select], np.array(val)[s1select])
            ## ax.scatter(np.array(hitPMTdist[key])[s1select], np.array(val)[s1select])
            #ax.scatter(np.array(hitPMTZpos[key])[s1select], np.array(val)[s1select])
            ax.set_title('PMT 1 S1 charge')
            #ax.set_xlabel('integrated charge in PMT sum (pe)')
            #ax.set_xlabel('z pos.')
            #ax.set_ylabel('integrated charge in PMT1 (pe)')
            ax.set_ylabel('AU)')
            ax.set_xlabel('integrated charge in PMT1 (pe)')
            sh_hits = np.array(hitPMTZpos[key])[s1select].shape
            sh_val = np.array(val)[s1select].shape
            covar = np.cov(
                np.array(hitPMTZpos[key])[s1select].reshape(1, sh_hits[0]),
                np.array(val)[s1select].reshape(1, sh_val[0]))[0, 1]
            corr_coef = covar / (
                np.std(np.array(val)[s1select], ddof=1) *
                np.std(np.array(hitPMTZpos[key])[s1select], ddof=1))
            print('Sensor ', key, ' correlation coefficient = ', corr_coef)
        else:
            vals, bins, _ = ax.hist(np.array(val)[s1select], bins=s1bins)
            ## ax.scatter(s1pmt1[np.abs(val) < 10], np.array(val)[np.abs(val) < 10])
            #ax.scatter(s1sumh[s1select], np.array(val)[s1select])
            ## ax.scatter(np.array(hitPMTdist[key])[s1select & (np.abs(val) < 10)], np.array(val)[s1select & (np.abs(val) < 10)])
            #ax.scatter(np.array(hitPMTZpos[key])[s1select & (np.abs(val) < 10)], np.array(val)[s1select & (np.abs(val) < 10)])
            ax.set_title('PMT ' + str(key) + ' S1 relative charge')
            #ax.set_xlabel('integrated charge in PMT sum (pe)')
            ## ax.set_xlabel('PMT-hit dist. (mm)')
            #ax.set_xlabel('hit Z pos')
            #ax.set_ylabel('pmt q / pmt1 q')
            ax.set_ylabel('AU')
            ax.set_xlabel('pmt q / pmt1 q')
            ## sh_hits = np.array(hitPMTZpos[key])[s1select].shape
            ## sh_val = np.array(val)[s1select].shape
            ## covar = np.cov(np.array(hitPMTZpos[key])[s1select].reshape(1, sh_hits[0]), np.array(val)[s1select].reshape(1, sh_val[0]))[0, 1]
            ## corr_coef = covar / (np.std(np.array(val)[s1select], ddof=1)*np.std(np.array(hitPMTZpos[key])[s1select], ddof=1))
            s1select2 = s1select & (np.array(val) > 0) & (np.array(val) <= 2)
            print('Sensor ', key, ' mean = ',
                  np.mean(np.array(val)[s1select2]))
            useful_bins = np.argwhere(vals >= 100)
            b1 = useful_bins[0][0]
            b2 = useful_bins[-1][0]
            errs = np.sqrt(vals[b1:b2])
            fvals = fitf.fit(fitf.gauss,
                             shift_to_bin_centers(bins)[b1:b2],
                             vals[b1:b2],
                             seed=(vals.sum(), bins[vals.argmax()], 0.1),
                             sigma=errs)
            ax.plot(
                shift_to_bin_centers(bins)[b1:b2],
                fvals.fn(shift_to_bin_centers(bins)[b1:b2]))
            print('Fit S1 ' + str(key), fvals.values, fvals.errors, fvals.chi2)
    plt.tight_layout()
    figs1.show()
    figs1.savefig('s1relativechargeThzoom_R' + run_number + '.png')

    fitVals = {}
    figs2, axess2 = plt.subplots(nrows=3, ncols=4, figsize=(20, 6))
    s2pmt1 = np.array(s2hists[1])
    for (key, val), ax in zip(s2hists.items(), axess2.flatten()):
        if key == 1:
            ## ax.set_title('PMT 1 S2 charge')
            ax.set_title('PMT 1 S2 charge vs s2 sum charge')
            ax.set_xlabel('S2pmt sum charge (pe)')
            #ax.set_xlabel('integrated charge in PMT sum (pe)')
            #ax.set_xlabel('z pos')
            ax.set_ylabel('integrated charge in PMT1 (pe)')
            #ax.set_ylabel('AU')
            ax.set_xlabel('integrated charge in PMT1 (pe)')
            ## ax.hist(np.array(val)[(s2sumh>4000) & (s2sumh<12000)], bins=100)
            ##ax.hist(np.concatenate(val), bins=100)
            ## ax.hist(np.array(val)[s2sumh>4000], bins=100)
            ax.scatter(s2sumh[s2sumh > 4000], np.array(val)[s2sumh > 4000])
            #ax.scatter(s2sumh[(s2sumh>4000) & (s2sumh<12000)], np.array(val)[(s2sumh>4000) & (s2sumh<12000)])
            ## ax.scatter(np.array(hitPMTdist[key])[(s2sumh>4000) & (s2sumh<12000)], np.array(val)[(s2sumh>4000) & (s2sumh<12000)])
            #ax.scatter(np.array(hitPMTZpos[key])[(s2sumh>4000) & (s2sumh<12000)], np.array(val)[(s2sumh>4000) & (s2sumh<12000)])
            ## sh_hits = np.array(hitPMTdist[key])[(s2sumh>4000) & (s2sumh<12000)].shape
            ## sh_val = np.array(val)[(s2sumh>4000) & (s2sumh<12000)].shape
            ## covar = np.cov(np.array(hitPMTdist[key])[(s2sumh>4000) & (s2sumh<12000)].reshape(1, sh_hits[0]), np.array(val)[(s2sumh>4000) & (s2sumh<12000)].reshape(1, sh_val[0]))[0, 1]
            ## corr_coef = covar / (np.std(np.array(val)[(s2sumh>4000) & (s2sumh<12000)], ddof=1)*np.std(np.array(hitPMTdist[key])[(s2sumh>4000) & (s2sumh<12000)], ddof=1))
            ## print('Sensor ', key, ' correlation coefficient = ', corr_coef)
        else:
            ## ax.set_title('PMT '+str(key)+' S2 relative charge')
            ax.set_title('PMT ' + str(key) + ' S2 relative charge vs pmt sum')
            ax.set_ylabel('pmt q / pmt1 q')
            #ax.set_xlabel('zpos')
            ax.set_xlabel('integrated charge in PMT sum (pe)')
            #ax.set_xlabel('pmt q / pmt1 q')
            #ax.set_ylabel('AU')
            #ax.scatter(s2pmt1[np.abs(val) < 10], np.array(val)[np.abs(val) < 10])
            #ax.scatter(s2sumh[(s2sumh>4000) & (s2sumh<12000)], np.array(val)[(s2sumh>4000) & (s2sumh<12000)])
            ## vals, bins, _ = ax.hist(np.array(val)[(s2sumh>4000) & (s2sumh<12000)], bins=s2bins)
            ## vals, bins, _ = ax.hist(np.concatenate(val), bins=s2bins)
            ## vals, bins, _ = ax.hist(np.array(val)[s2sumh>4000], bins=s2bins)
            ax.scatter(s2sumh[s2sumh > 4000], np.array(val)[s2sumh > 4000])
            ## ax.scatter(np.array(hitPMTdist[key])[(s2sumh>4000) & (s2sumh<12000)], np.array(val)[(s2sumh>4000) & (s2sumh<12000)])
            #ax.scatter(np.array(hitPMTZpos[key])[(s2sumh>4000) & (s2sumh<12000)], np.array(val)[(s2sumh>4000) & (s2sumh<12000)])
            sh_sum = s2sumh[s2sumh > 4000].shape
            sh_vals = np.array(val)[s2sumh > 4000].shape
            covar = np.cov(s2sumh[s2sumh > 4000].reshape(1, sh_sum[0]),
                           np.array(val)[s2sumh > 4000].reshape(1,
                                                                sh_vals[0]))[0,
                                                                             1]
            corr_coef = covar / (np.std(np.array(val)[s2sumh > 4000], ddof=1) *
                                 np.std(s2sumh[s2sumh > 4000], ddof=1))
            ## sh_hits = np.array(hitPMTdist[key])[(s2sumh>4000) & (s2sumh<12000)].shape
            ## sh_val = np.array(val)[(s2sumh>4000) & (s2sumh<12000)].shape
            ## covar = np.cov(np.array(hitPMTdist[key])[(s2sumh>4000) & (s2sumh<12000)].reshape(1, sh_hits[0]), np.array(val)[(s2sumh>4000) & (s2sumh<12000)].reshape(1, sh_val[0]))[0, 1]
            ## corr_coef = covar / (np.std(np.array(val)[(s2sumh>4000) & (s2sumh<12000)], ddof=1)*np.std(np.array(hitPMTdist[key])[(s2sumh>4000) & (s2sumh<12000)], ddof=1))
            print('Sensor ', key, ' correlation coefficient = ', corr_coef)
            ## limit fit to region with stat error <= 10% Poisson
            ## useful_bins = np.argwhere(vals>=200)
            ## b1 = useful_bins[0][0]
            ## b2 = useful_bins[-1][0]
            ## errs = np.sqrt(vals[b1:b2])
            ## print('Seed check: ', (vals.sum(), bins[vals.argmax()], 0.02))
            ## fvals = fitf.fit(fitf.gauss, shift_to_bin_centers(bins)[b1:b2], vals[b1:b2],
            ##                  seed=(vals.sum(), bins[vals.argmax()], 0.02),
            ##                  sigma=errs, bounds=[(0, 0, 0.00001), (1e10, 2, 3)])
            ## ax.plot(shift_to_bin_centers(bins),
            ##         fitf.gauss(shift_to_bin_centers(bins), *fvals.values))
            ## fitVals[key] = (fvals.values[1], fvals.values[2])
            ## print('Fit PMT '+str(key), fvals.values, fvals.errors, fvals.chi2)
    plt.tight_layout()
    figs2.show()
    figs2.savefig('s2relativechargeThvsSum_R' + run_number + '.png')

    ## figcal, axcal = plt.subplots()
    ## axcal.errorbar(list(fitVals.keys()),
    ##                np.fromiter((x[0] for x in fitVals.values()), np.float),
    ##                yerr=np.fromiter((x[1] for x in fitVals.values()), np.float),
    ##                label='Average response of PMTs to Kr relative to PMT 1')
    ## ## Get the calibration info for comparison.
    ## cal_files = [ fname for fname in sys.argv[3:] ]
    ## read_params = partial(spr, table_name='FIT_pmt_scaled_dark_pedestal',
    ##                       param_names=['poisson_mu'])
    ## ## Assumes ordering, ok?
    ## for i, fn in enumerate(cal_files):
    ##     cal_run = fn.split('_')[1]
    ##     with tb.open_file(fn) as cal_in:
    ##         pmt1Val = 0
    ##         pmt1Err = 0
    ##         cVals = []
    ##         cErrs = []
    ##         for sens, (pars, errs) in read_params(cal_in):
    ##             if sens != 1:
    ##                 cVals.append(pars['poisson_mu'])
    ##                 cErrs.append(errs['poisson_mu'])
    ##             else:
    ##                 pmt1Val = pars['poisson_mu']
    ##                 pmt1Err = errs['poisson_mu']
    ##         normVals = np.array(cVals) / pmt1Val
    ##         normErrs = normVals * np.sqrt(np.power(np.array(cErrs)/np.array(cVals), 2) +
    ##                                       np.power(pmt1Err/pmt1Val, 2))
    ##         axcal.errorbar(list(fitVals.keys()), normVals,
    ##                        yerr=normErrs, label='Calibration '+cal_run)
    ## axcal.legend()
    ## axcal.set_xlabel('PMT sensor ID')
    ## axcal.set_ylabel('Response relative to that of PMT 1')
    ## figcal.show()
    ## figcal.savefig('calPoisKrRelCompStatsFILT.png')
    input('plots good?')
示例#7
0
    dct.update({'nevents': len(dfp)})
    return pd.Series(dct)


fnames_cp = [f for f in fnames]
lenfs = len(fnames_cp)
fnames_tmp = fnames_cp[:20]
for i in range(30, lenfs, 5):
    fnames_tp = fnames[i - 30:i]
    runs = re.findall('\d+', fnames_tp[0])[0]
    rune = re.findall('\d+', fnames_tp[-1])[0]
    fout = folder_out + 'runs_' + runs + '_' + rune + '.h5'
    mp_chunk = []
    print(runs, rune)
    for tbname in tablenames:
        data = load_dsts(fnames_tp, 'KDST', tbname)
        xbin, ybin = data.xbin.unique()[0], data.ybin.unique()[0]
        xbins = np.linspace(xbin - 10, xbin + 10, 11)  #2mm bins
        ybins = np.linspace(ybin - 10, ybin + 10, 11)
        xcenters = (xbins[1:] + xbins[:-1]) / 2
        ycenters = (ybins[1:] + ybins[:-1]) / 2
        data = data.assign(xbin=pd.cut(data.X, xbins, labels=xcenters),
                           ybin=pd.cut(data.Y, ybins, labels=ycenters))
        #correct columns
        ecols = [f'E_{i}' for i in range(12)] + ['S2e']
        for col in ecols:
            data[col] = data[col] * np.exp(data.Z / data['lt']) / data['e0cen']

        #find mean and std of gaussians
        means = data.groupby(['xbin',
                              'ybin']).apply(mean_and_std).reset_index()
示例#8
0
def generate_pdfs():
    """
    Generate SiPM PDFs using Kr RAW data.
    Multiple types of PDF are generated:
    Full spectrum PDFs : using full buffer
    Z vetoed PDFs      : Vetoing all sipms for regions in z where there is an identified s1 or s2
    1 ring vetoed PDFs : Vetoing sipms within 1 ring distance of a hit.
    2 ring vetoed PDFs : As above for 2 rings
    ...
    """

    pmap_file_base = sys.argv[1]
    hit_file_base = sys.argv[2]
    raw_file_base = sys.argv[3]

    pmap_sorter = sorter_func(pmap_file_base)
    pmap_files = sorted(glob(pmap_file_base + '*.h5'), key=pmap_sorter)
    hit_sorter = sorter_func(hit_file_base)
    hit_files = sorted(glob(hit_file_base + '*.h5'), key=hit_sorter)
    raw_sorter = sorter_func(raw_file_base)
    raw_files = sorted(glob(raw_file_base + '*.h5'), key=raw_sorter)

    ## Details of raw waveforms to aid vetoing (assumes all same in run)
    with tb.open_file(raw_files[0]) as rwf_in:
        sipmrwf = rwf_in.root.RD.sipmrwf[0][0]
        wf_range = np.arange(len(sipmrwf))

    run_no = int(sys.argv[4])
    ## Gains and sensor positions
    sipm_gains = DB.DataSiPM(run_no).adc_to_pes.values
    sipm_xy = DB.DataSiPM(run_no)[['X', 'Y']].values

    ## output
    histbins = np.arange(-10, 300, 0.1)
    bin_centres = shift_to_bin_centers(histbins)
    with tb.open_file('vetoedPDFs_R' + str(run_no) + '.h5', 'w') as pdf_out:
        HIST = partial(hist_writer,
                       pdf_out,
                       group_name='HIST',
                       n_sensors=1792,
                       n_bins=len(bin_centres),
                       bin_centres=bin_centres)
        full_spec = HIST(table_name='full_spec')
        z_vetoed = HIST(table_name='z_vetoed')
        one_ring = HIST(table_name='one_ring_vetoed')
        two_ring = HIST(table_name='two_ring_vetoed')
        thr_ring = HIST(table_name='thr_ring_vetoed')
        inv_thre = HIST(table_name='thr_ring_avetoed')

        ## Hit info
        hit_positions = load_dsts(hit_files, 'DST',
                                  'Events')[['event', 'X', 'Y']].values
        ## Start assuming KR data and Kdst
        ## For each event [evt_no, list tuples start and end veto areas]
        reduced_pulse_info = []
        for pmf in pmap_files:
            #print(pmf)
            sys.stdout.write(pmf + '\n')
            sys.stdout.flush()
            try:
                ## pmap_dict = load_pmaps(pmf)
                s1s, s2s, _, _, _ = load_pmaps_as_df(pmf)
            except (ValueError, tb.exceptions.NoSuchNodeError):
                print("Empty file. Skipping.")
                continue

            ## for key, pmap in pmap_dict.items():
            for evtNo in s1s['event'].unique():
                evtS1 = s1s[s1s['event'] == evtNo]
                evtS2 = s2s[s2s['event'] == evtNo]
                mask_list = []
                ## for s1 in pmap.s1s:
                for is1 in evtS1['peak'].unique():
                    s1 = evtS1[evtS1['peak'] == is1]
                    mask_list.append(
                        (wf_range < s1['time'].iloc[0] / units.mus - 1)
                        | (wf_range > s1['time'].iloc[-1] / units.mus + 1))
                for is2 in evtS2['peak'].unique():
                    s2 = evtS2[evtS2['peak'] == is2]
                    mask_list.append(
                        (wf_range < s2['time'].iloc[0] / units.mus - 2)
                        | (wf_range > s2['time'].iloc[-1] / units.mus + 2))
                reduced_pulse_info.append(
                    [evtNo, np.logical_and.reduce(mask_list)])
        print('masking info stored')
        mask_counter = 0
        pmap_evts = np.fromiter((x[0] for x in reduced_pulse_info), np.int)
        for rawf in raw_files:
            #print(rawf)
            sys.stdout.write(rawf + '\n')
            sys.stdout.flush()
            if mask_counter >= len(reduced_pulse_info):
                continue
            try:
                ## empty arrays for histograms
                shape = 1792, len(bin_centres)
                hist_full_spec = np.zeros(shape, dtype=np.int)
                hist_z_vetoed = np.zeros(shape, dtype=np.int)
                hist_1_vetoed = np.zeros(shape, dtype=np.int)
                hist_2_vetoed = np.zeros(shape, dtype=np.int)
                hist_3_vetoed = np.zeros(shape, dtype=np.int)
                hist_3_aveto = np.zeros(shape, dtype=np.int)
                with tb.open_file(rawf) as raw_in:
                    revent_nos = np.fromiter(
                        (x[0] for x in raw_in.root.Run.events), np.int)

                    #evt_no = reduced_pulse_info[mask_counter][0]
                    #indx = np.argwhere(revent_nos==evt_no)
                    #print(reduced_pulse_info)
                    #pmap_evts = np.array(reduced_pulse_info)[:, 0]
                    pmap_overlap_indx = np.arange(revent_nos.shape[0])[np.in1d(
                        revent_nos, pmap_evts)]
                    hit_overlap_indx = np.arange(revent_nos.shape[0])[np.in1d(
                        revent_nos, hit_positions[:, 0])]
                    hit_indcs = np.arange(hit_positions[:,
                                                        0].shape[0])[np.in1d(
                                                            hit_positions[:,
                                                                          0],
                                                            revent_nos)]
                    hindx = 0
                    #print(indx, indx[0][0])
                    #while indx.shape[0] != 0:
                    for indx in pmap_overlap_indx:
                        #print(indx[0][0])
                        #rwf = raw_in.root.RD.sipmrwf[indx[0][0]]
                        ## cwf = csf.sipm_processing["subtract_mode_calibrate"](raw_in.root.RD.sipmrwf[indx[0][0]], sipm_gains)
                        cwf = csf.sipm_processing["subtract_mode_calibrate"](
                            raw_in.root.RD.sipmrwf[indx], sipm_gains)

                        hist_full_spec += cf.bin_waveforms(cwf, histbins)
                        z_veto = reduced_pulse_info[mask_counter][1]
                        hist_z_vetoed += cf.bin_waveforms(
                            cwf[:, z_veto], histbins)

                        #dst_indx = np.argwhere(hit_positions[:, 0]==evt_no)
                        #if dst_indx.shape[0] != 0:
                        if indx in hit_overlap_indx:
                            ## hit_p = hit_positions[dst_indx[0][0], 1:]
                            hit_p = hit_positions[hit_indcs[hindx], 1:]
                            hindx += 1
                            hist_1_vetoed += cf.bin_waveforms(
                                ring_veto(cwf, 1, z_veto, hit_p, sipm_xy),
                                histbins)
                            hist_2_vetoed += cf.bin_waveforms(
                                ring_veto(cwf, 2, z_veto, hit_p, sipm_xy),
                                histbins)
                            thrVeto = ring_veto(cwf, 3, z_veto, hit_p, sipm_xy)
                            hist_3_vetoed += cf.bin_waveforms(
                                thrVeto, histbins)
                            hist_3_aveto += cf.bin_waveforms(
                                thrVeto[:, np.invert(z_veto)], histbins)

                        mask_counter += 1
                        #if mask_counter < len(reduced_pulse_info):
                        #    evt_no = reduced_pulse_info[mask_counter][0]
                        #    indx = np.argwhere(revent_nos==evt_no)
                        #else:
                        ## dummy evt_no to definitely give no info
                        #    indx = np.argwhere(revent_nos==-100)
                        #print(indx, indx[0][0])
                    full_spec(hist_full_spec)
                    z_vetoed(hist_z_vetoed)
                    one_ring(hist_1_vetoed)
                    two_ring(hist_2_vetoed)
                    thr_ring(hist_3_vetoed)
                    inv_thre(hist_3_aveto)
            except tb.HDF5ExtError:
                print('corrupt file')
                continue
            hdst = hdst_psf_processing(df[z_sel], [Xrange, Yrange], the_db)
            # Safety check (single sipm events not considered to be physical)
            hdst = hdst[hdst.nsipm > 1]

            # Loop to extract the PSF in different regions.
            bin_edges = [
                np.linspace(*rr, [nbinX, nbinY][i] + 1)
                for i, rr in enumerate([Xrange, Yrange])
            ]
            psf_new, entries_new, bins_new = create_psf(
                (hdst.RelX.values, hdst.RelY.values), hdst.NormQ, bin_edges)

            writer(bins_new[0], bins_new[1], [0], 0., 0., z + z_step / 2,
                   np.asarray([psf_new]).transpose((1, 2, 0)),
                   np.asarray([entries_new]).transpose((1, 2, 0)))


start = int(sys.argv[1])
numb = int(sys.argv[2])
for i in range(start, start + numb):
    thefile = filename.format(i)
    try:
        df = dstio.load_dsts([thefile], 'RECO',
                             'Events').drop(
                                 ['Xrms', 'Yrms', 'Qc', 'Ec', 'track_id'],
                                 axis='columns').reset_index(drop=True)
    except:
        print(f'File {thefile} not found or corrupted.')
        continue
    compute_psf(df, i)