Exemplo n.º 1
0
    def runtime_cycle(df_row):

        # load raw file path (with {these} in it)
        f_raw = f'{dg.lh5_dir}/{df_row.raw_path}/{df_row.raw_file}'
        f_raw = f_raw.format_map({'sysn': 'geds'})

        # always look for Ge
        f_key = df_row.raw_file.format_map({'sysn': 'geds'})
        if not os.path.exists(f_raw):
            # print(f'no Ge data: {f_key}')
            return pd.Series({'runtime': 0, 'rt_std': 0})

        # for PGT, compare the first three channels (for redundancy)
        rts = []
        ge_groups = sto.ls(f_raw)
        for ge in ge_groups[:3]:
            ts = lh5.load_nda([f_raw], ['timestamp'],
                              ge + '/raw/')['timestamp']
            rts.append(ts[-1])

        # take largest value & compute uncertainty
        runtime = max(rts) / 60
        rt_std = np.std(np.array([rts]))
        # print(f_key, runtime, rt_std)

        return pd.Series({'runtime': runtime, 'rt_std': rt_std})
Exemplo n.º 2
0
def show_cal_spectrum(dg):
    """
    apply calibration to dsp file
    """
    # get file list and load energy data (numpy array)
    lh5_dir = os.path.expandvars(dg.config['lh5_dir'])
    dsp_list = lh5_dir + dg.file_keys['dsp_path'] + '/' + dg.file_keys[
        'dsp_file']
    edata = lh5.load_nda(dsp_list, ['trapEmax'],
                         'ORSIS3302DecoderForEnergy/dsp')
    rt_min = dg.file_keys['runtime'].sum()
    u_start = dg.file_keys.iloc[0]['startTime']
    t_start = pd.to_datetime(u_start, unit='s')  # str
    print('Found energy data:', [(et, len(ev)) for et, ev in edata.items()])
    print(f'Runtime (min): {rt_min:.2f}')

    # load calibration from peakfit
    cal_db = db.TinyDB(storage=MemoryStorage)
    with open('ecalDB.json') as f:
        raw_db = json.load(f)
        cal_db.storage.write(raw_db)
    runs = dg.file_keys.run.unique()
    if len(runs) > 1:
        print("sorry, I can't do combined runs yet")
        exit()
    run = runs[0]
    tb = cal_db.table("peakfit_trapEmax").all()
    df_cal = pd.DataFrame(tb)
    df_cal['run'] = df_cal['run'].astype(int)
    df_run = df_cal.loc[df_cal.run == run]
    cal_pars = df_run.iloc[0][['cal0', 'cal1', 'cal2']]

    # compute calibrated energy
    pol = np.poly1d(cal_pars)  # handy numpy polynomial object
    cal_data = pol(edata['trapEmax'])

    elo, ehi, epb, etype = 0, 3000, 1, 'trapEmax_cal'  # gamma region
    elo, ehi, epb, etype = 2500, 8000, 10, 'trapEmax_cal'  # overflow region
    # elo, ehi, epb, etype = 0, 250, 1, 'trapEmax_cal' # low-e region

    hist, bins, _ = pgh.get_hist(cal_data, range=(elo, ehi), dx=epb)

    # normalize by runtime
    hist_rt = np.divide(hist, rt_min * 60)

    plt.plot(np.nan, np.nan, '-w', lw=1, label=f'start: {t_start}')

    plt.plot(bins[1:],
             hist_rt,
             ds='steps',
             c='b',
             lw=1,
             label=f'{etype}, {rt_min:.2f} mins')

    plt.xlabel(etype, ha='right', x=1)
    plt.ylabel('cts / sec', ha='right', y=1)
    plt.legend(loc=1, fontsize=12)
    plt.tight_layout()
    plt.savefig('./plots/CalSpectrum.png')
Exemplo n.º 3
0
def check_raw_spectrum(dg, config, db_ecal):
    """
    $ ./energy_cal.py -q 'query' --raw
    """
    import h5py

    # load energy data
    lh5_dir = os.path.expandvars(config['lh5_dir'])
    dsp_list = lh5_dir + dg.file_keys['dsp_path'] + '/' + dg.file_keys[
        'dsp_file']
    raw_data = lh5.load_nda(dsp_list, config['rawe'], config['input_table'])
    runtime_min = dg.file_keys['runtime'].sum()

    print('\nShowing raw spectra ...')
    for etype in config['rawe']:
        xlo, xhi, xpb = config['init_vals'][etype]["raw_range"]

        # load energy data for this estimator
        data = raw_data[etype]

        # print columns of table
        file_info = db_ecal.table('_file_info').all()[0]
        tb_in = file_info['input_table']
        with h5py.File(dsp_list.iloc[0], 'r') as hf:
            print("LH5 columns:", list(hf[f'{tb_in}'].keys()))

        # generate histogram
        hist, bins, var = pgh.get_hist(data, range=(xlo, xhi), dx=xpb)
        bins = bins[1:]  # trim zero bin, not needed with ds='steps'

        # normalize by runtime
        hist_rt = np.divide(hist, runtime_min * 60)

        print(
            '\nPlease determine the following parameters for ecal config file:\n'
            "  - 'raw_range': Optimal binning, and hi/lo raw energy limits\n"
            "  - 'peakdet_thresh': ~1/2 the height of a target peak\n"
            "  - 'lowe_cut' energy threshold for peak detection")

        print(
            f'\nRaw E: {etype}, {len(data)} cts, runtime: {runtime_min:.2f} min'
        )

        plt.plot(bins, hist_rt, ds='steps', c='b', lw=1, label=etype)
        plt.xlabel(etype, ha='right', x=1)
        plt.ylabel(f'cts/sec, {xpb}/bin', ha='right', y=1)

        if config['batch_mode']:
            plt.savefig('./plots/cal_spec_test.png')
        else:
            plt.show()
        plt.close()
Exemplo n.º 4
0
def check_timestamps(f_raw):
    """
    fc daq timestamps are in seconds, from beginning of file:
    https://github.com/legend-exp/pygama/blob/master/pygama/io/fcdaq.py#L27
    """
    ts = lh5.load_nda([f_raw], ['timestamp'], 'g024/raw')['timestamp']

    print(ts)
    print(ts.shape)
    print(f'first: {ts[0]}  {min(ts)}  last: {ts[-1]}  {max(ts)}')

    rt = ts[-1] / 60  # runtime in min

    plt.plot(np.arange(len(ts)), ts, '.b', label=f'runtime: {rt:.1f} min')
    plt.xlabel('entry', ha='right', x=1)
    plt.ylabel('timestamp', ha='right', y=1)
    plt.legend()
    plt.savefig('./plots/ts_check.png', dpi=100)
Exemplo n.º 5
0
def show_raw_spectrum(dg):
    """
    show spectrum w/ onbd energy and trapE
    - get calibration constants for onbd energy and 'trapE' energy
    - TODO: fit each expected peak and get resolution vs energy
    """
    # get file list and load energy data (numpy array)
    lh5_dir = os.path.expandvars(dg.config['lh5_dir'])
    dsp_list = lh5_dir + dg.file_keys['dsp_path'] + '/' + dg.file_keys[
        'dsp_file']
    edata = lh5.load_nda(dsp_list, ['trapEmax'],
                         'ORSIS3302DecoderForEnergy/dsp')
    rt_min = dg.file_keys['runtime'].sum()
    u_start = dg.file_keys.iloc[0]['startTime']
    t_start = pd.to_datetime(u_start, unit='s')  # str

    print('Found energy data:', [(et, len(ev)) for et, ev in edata.items()])
    print(f'Runtime (min): {rt_min:.2f}')

    elo, ehi, epb, etype = 0, 25000, 10, 'trapEmax'

    ene_uncal = edata[etype]
    hist, bins, _ = pgh.get_hist(ene_uncal, range=(elo, ehi), dx=epb)

    # normalize by runtime
    hist_rt = np.divide(hist, rt_min * 60)

    plt.plot(np.nan, np.nan, '-w', lw=1, label=t_start)

    plt.semilogy(bins[1:],
                 hist_rt,
                 ds='steps',
                 c='b',
                 lw=1,
                 label=f'{etype}, {rt_min:.2f} mins')

    plt.xlabel(etype, ha='right', x=1)
    plt.ylabel('cts / sec', ha='right', y=1)
    plt.legend()
    plt.tight_layout()
    #     plt.show()
    plt.savefig('./plots/uncalSpectrum.png')
Exemplo n.º 6
0
def peakfit_group(df_group, config, db_ecal):
    """
    """
    # get list of peaks to look for
    epeaks = config['expected_peaks'] + config['test_peaks']
    epeaks = np.array(sorted(epeaks))

    # right now a lookup by 'run' is hardcoded.
    # in principle the lookup should stay general using the gb_cols,
    # but it's kind of hard to see right now how to write the right db queries

    gb_run = df_group['run'].unique()
    if len(gb_run) > 1:
        print("Multi-run (or other) groupbys aren't supported yet, sorry")
        exit()

    # load data
    lh5_dir = os.path.expandvars(config['lh5_dir'])
    dsp_list = lh5_dir + df_group['dsp_path'] + '/' + df_group['dsp_file']
    raw_data = lh5.load_nda(dsp_list, config['rawe'], config['input_table'])
    runtime_min = df_group['runtime'].sum()

    # loop over energy estimators of interest
    pf_results = {}
    for et in config['rawe']:

        # load first-guess calibration constant from its table in the DB
        db_table = db_ecal.table(f'peakdet_{et}').all()
        df_cal = pd.DataFrame(db_table)
        lin_cal = df_cal.loc[df_cal.run == str(gb_run[0])]['lincal'].values[0]
        cal_data = raw_data[et] * lin_cal


        # compute expected peak locations and widths (fit to Gaussians)
        fit_results = {}
        for ie, epk in enumerate(epeaks):

            # adjust the window.  resolution goes as roughly sqrt(energy)
            window = np.sqrt(epk) * 0.8
            xlo, xhi = epk - window/2, epk + window/2
            nbins = int(window) * 5
            xpb = (xhi-xlo)/nbins
            ibin_bkg = int(nbins * 0.2)

            # get histogram, error, normalize by runtime
            pk_data = cal_data[(cal_data >= xlo) & (cal_data <= xhi)]
            hist, bins, _ = pgh.get_hist(pk_data, range=(xlo, xhi), dx=xpb)
            hist_norm = np.divide(hist, runtime_min * 60)
            hist_var = np.array([np.sqrt(h / (runtime_min * 60)) for h in hist])

            # compute expected peak location and width (simple Gaussian)
            bkg0 = np.mean(hist_norm[:ibin_bkg])
            b, h = bins[1:], hist_norm - bkg0
            imax = np.argmax(h)
            upr_half = b[np.where((b > b[imax]) & (h <= np.amax(h)/2))][0]
            bot_half = b[np.where((b < b[imax]) & (h <= np.amax(h)/2))][-1]
            fwhm = upr_half - bot_half
            sig0 = fwhm / 2.355
            
            
#             # fit to simple gaussian
#             amp0 = np.amax(h) * fwhm
#             p_init = [amp0, bins[imax], sig0, bkg0] # a, mu, sigma, bkg
#             p_fit, p_cov = pgf.fit_hist(pgf.gauss_bkg, hist_norm, bins,
#                                         var=hist_var, guess=p_init)
#             fit_func = pgf.gauss_bkg
            
#             p_err = np.sqrt(np.diag(p_cov))
            
#             # goodness of fit
#             chisq = []
#             for i, h in enumerate(hist_norm):
#                 model = fit_func(b[i], *p_fit)
#                 diff = (model - h)**2 / model
#                 chisq.append(abs(diff))
#             rchisq = sum(np.array(chisq) / len(hist_norm))
#             # fwhm_err = p_err[1] * 2.355 * e_peak / e_fit

#             # collect interesting results for this row
#             fit_results[ie] = {
#                 'epk':epk,
#                 'mu':p_fit[1], 'fwhm':p_fit[2]*2.355, 'sig':p_fit[2],
#                 'amp':p_fit[0], 'bkg':p_fit[3], 'rchisq':rchisq,
#                 'mu_raw':p_fit[1] / lin_cal, # <-- this is in terms of raw E
#                 'mu_unc':p_err[1] / lin_cal
#                 }
#             print(fit_results[ie])

        
            # fit to radford peak: mu, sigma, hstep, htail, tau, bg0, amp
            amp0 = np.amax(h) * fwhm
            hstep = 0.001 # fraction that the step contributes
            htail = 0.1
            tau = 10
            p_init = [bins[imax], sig0, hstep, htail, tau, bkg0, amp0]
            p_fit, p_cov = pgf.fit_hist(pgf.radford_peak, hist_norm, bins, var=hist_var, guess=p_init)
            fit_func = pgf.radford_peak
            
            #just for debugging
            print('Len Fit params:', len(p_fit))
            
            p_err = np.sqrt(np.diag(p_cov))
            
            # goodness of fit
            chisq = []
            for i, h in enumerate(hist_norm):
                model = fit_func(b[i], *p_fit)
                diff = (model - h)**2 / model
                chisq.append(abs(diff))
            rchisq = sum(np.array(chisq) / len(hist_norm))
            # fwhm_err = p_err[1] * 2.355 * e_peak / e_fit

            # collect interesting results for this row
            fit_results[ie] = {
                'epk':epk,
                'mu':p_fit[1], 'fwhm':p_fit[2]*2.355, 'sig':p_fit[2],
                'amp':p_fit[0], 'bkg':p_fit[3], 'rchisq':rchisq,
                'mu_raw':p_fit[1] / lin_cal, # <-- this is in terms of raw E
                'mu_unc':p_err[1] / lin_cal
                }
            
#             print('Len Fit params:', len(p_fit))
            print('Fit results: ', fit_results[ie])
            
            

            # diagnostic plot, don't delete
            if config['show_plot']:
                plt.axvline(bins[ibin_bkg], c='m', label='bkg region')
                xfit = np.arange(xlo, xhi, xpb * 0.1)
                plt.plot(xfit, fit_func(xfit, *p_init), '-', c='orange',
                         label='init')
                plt.plot(xfit, fit_func(xfit, *p_fit), '-', c='red',
                         label='fit')
                plt.plot(bins[1:], hist_norm, c='b', lw=1.5, ds='steps')
                plt.xlabel('pass-1 energy (kev)', ha='right', x=1)
                plt.legend(fontsize=12)
                if config['batch_mode']:
                    plt.savefig('./plots/fit%d_peakfit.png' %ie)
                else:
                    plt.show()
                plt.close()
                
        exit()



        # ----------------------------------------------------------------------
        # compute energy calibration by matrix inversion (thanks Tim and Jason!)

        view_cols = ['epk', 'mu', 'fwhm', 'bkg', 'rchisq', 'mu_raw']
        df_fits = pd.DataFrame(fit_results).T
        print(df_fits[view_cols])

        true_peaks = df_fits['epk']
        raw_peaks, raw_error = df_fits['mu_raw'], df_fits['mu_unc']

        error = raw_error / raw_peaks * true_peaks
        cov = np.diag(error**2)
        weights = np.diag(1 / error**2)

        degree = config['pol_order']
        raw_peaks_matrix = np.zeros((len(raw_peaks), degree+1))
        for i, pk in enumerate(raw_peaks):
            temp_degree = degree
            row = np.array([])
            while temp_degree >= 0:
                row = np.append(row, pk**temp_degree)
                temp_degree -= 1
            raw_peaks_matrix[i] += row
        print(raw_peaks_matrix)

        # perform matrix inversion
        xTWX = np.dot(np.dot(raw_peaks_matrix.T, weights), raw_peaks_matrix)
        xTWY = np.dot(np.dot(raw_peaks_matrix.T, weights), true_peaks)
        if np.linalg.det(xTWX) == 0:
            print("singular matrix, determinant is 0, can't get cal constants")
            exit()
        xTWX_inv = np.linalg.inv(xTWX)

        # get polynomial coefficients and error
        cal_pars = np.dot(xTWX_inv, xTWY)
        cal_errs = np.sqrt(np.diag(xTWX_inv))
        n = len(cal_pars)
        print(f'Fit:', ' '.join([f'p{i}:{cal_pars[i]:.4e}' for i in range(n)]))
        print(f'Unc:', ' '.join([f'p{i}:{cal_errs[i]:.4e}' for i in range(n)]))

        # ----------------------------------------------------------------------
        # repeat the peak fit with the calibrated energy (affects widths)

        # compute calibrated energy
        pol = np.poly1d(cal_pars) # handy numpy polynomial object
        cal_data = pol(raw_data[et])

        fit_results = {}
#         print('fit_results', fit_results)
        print('cal_data', cal_data)
        for ie, epk in enumerate(epeaks):
            print('epk:', epk, '\n epeaks:', epeaks)

            # adjust the window.  resolution goes as roughly sqrt(energy)
            window = np.sqrt(epk) * 0.5
            xlo, xhi = epk - window/2, epk + window/2
            nbins = int(window) * 5
            xpb = (xhi-xlo)/nbins
            ibin_bkg = int(nbins * 0.2)
            print('xhi: ', xhi, 'xlo:', xlo)

            # get histogram, error, normalize by runtime
            pk_data = cal_data[(cal_data >= xlo) & (cal_data <= xhi)]
            hist, bins, _ = pgh.get_hist(pk_data, range=(xlo, xhi), dx=xpb)
            hist_norm = np.divide(hist, runtime_min * 60)
            hist_var = np.array([np.sqrt(h / (runtime_min * 60)) for h in hist])
            
            print('cal_data:', cal_data)
            
            print('bins:', bins)

            # compute expected peak location and width (simple Gaussian)
            bkg0 = np.mean(hist_norm[:ibin_bkg])
#             print(bkg0)
            b, h = bins[1:], hist_norm - bkg0
            imax = np.argmax(h)
            upr_half = b[np.where((b > b[imax]) & (h <= np.amax(h)/2))][0]
            bot_half = b[np.where((b < b[imax]) & (h <= np.amax(h)/2))][-1]
            fwhm = upr_half - bot_half
            sig0 = fwhm / 2.355
            amp0 = np.amax(h) * fwhm
            p_init = [amp0, bins[imax], sig0, bkg0] # a, mu, sigma, bkg
            p_fit, p_cov = pgf.fit_hist(pgf.gauss_bkg, hist_norm, bins,
                                        var=hist_var, guess=p_init)
            p_err = np.sqrt(np.diag(p_cov))
            
            print('p_err: ', p_err)

            # save results
            fit_results[ie] = {
                'epk':epk,
                'mu':p_fit[1], 'fwhm':p_fit[2] * 2.355, 'sig':p_fit[2], 'amp':p_fit[0], 'bkg':p_fit[3],
                }
            print('fit results:', fit_results[ie])

        # consolidate results again
        view_cols = ['epk', 'mu', 'fwhm', 'residual']
        df_fits = pd.DataFrame(fit_results).T

        # compute the difference between lit and measured values
        cal_peaks = pol(raw_peaks)
        df_fits['residual'] = true_peaks - cal_peaks
        print(df_fits[view_cols])

        # fit fwhm vs. energy
        # FWHM(E) = sqrt(A_noise^2 + A_fano^2 * E + A_qcol^2 E^2)
        # Ref: Eq. 3 of https://arxiv.org/abs/1902.02299
        # TODO: fix error handling
        def sqrt_fwhm(x, a_n, a_f, a_c):
            return np.sqrt(a_n**2 + a_f**2 * x + a_c**2 * x**2)
        p_guess = [0.3, 0.05, 0.001]
        p_fit, p_cov = curve_fit(sqrt_fwhm, df_fits['mu'], df_fits['fwhm'],
                                 p0=p_guess)#, sigma = np.sqrt(h), absolute_sigma=True)
        p_err = np.sqrt(np.diag(p_cov))

        if config['show_plot']:

            # show a split figure with calibrated spectrum + used peaks on top,
            # and calib.function and resolution vs. energy on bottom
            fig, (p0, p1) = plt.subplots(2, 1, figsize=(8, 8), sharex=True)
                                         # gridspec_kw={'height_ratios':[2, 1]}))

            # get histogram (cts / keV / d)
            xlo, xhi, xpb = config['cal_range']
            hist, bins, _ = pgh.get_hist(cal_data, range=(xlo, xhi), dx=xpb)
            hist_norm = np.divide(hist, runtime_min * 60 * xpb)

            # show peaks
            cmap = plt.cm.get_cmap('brg', len(df_fits)+1)
            for i, row in df_fits.iterrows():

                # get a pretty label for the isotope
                lbl = config['pks'][str(row['epk'])]
                iso = ''.join(r for r in re.findall('[0-9]+', lbl))
                ele = ''.join(r for r in re.findall('[a-z]', lbl, re.I))
                pk_lbl = r'$^{%s}$%s' % (iso, ele)

                pk_diff = row['epk'] - row['mu']
                p0.axvline(row['epk'], ls='--', c=cmap(i), lw=1,
                            label=f"{pk_lbl} : {row['epk']} + {pk_diff:.3f}")

            p0.semilogy(bins[1:], hist_norm, ds='steps', c='b', lw=1)

            p0.set_ylabel('cts / s / keV', ha='right', y=1)
            p0.legend(loc=3, fontsize=11)

            # TODO: add fwhm errorbar
            x_fit = np.arange(xlo, xhi, xpb)
            y_init = sqrt_fwhm(x_fit, *p_guess)
            p1.plot(x_fit, y_init, '-', lw=1, c='orange', label='guess')

            y_fit = sqrt_fwhm(x_fit, *p_fit)
            a_n, a_f, a_c = p_fit
            fit_label = r'$\sqrt{(%.2f)^2 + (%.3f)^2 E + (%.4f)^2  E^2}$' % (a_n, a_f, a_c)
            p1.plot(x_fit, y_fit, '-r', lw=1, label=f'fit: {fit_label}')

            p1.plot(df_fits['mu'], df_fits['fwhm'], '.b')

            p1.set_xlabel('Energy (keV)', ha='right', x=1)
            p1.set_ylabel('FWHM (keV)', ha='right', y=1)
            p1.legend(fontsize=11)

            if config['batch_mode']:
                plt.savefig('./plots/peakfit.png')
            else:
                plt.show()

        # the order of the polynomial should be in the table name
        pf_results[f'{et}_Anoise'] = p_fit[0]
        pf_results[f'{et}_Afano'] = p_fit[1]
        pf_results[f'{et}_Aqcol'] = p_fit[2]
        for i in range(len(cal_pars)):
            pf_results[f'{et}_cal{i}'] = cal_pars[i]
        for i in range(len(cal_pars)):
            pf_results[f'{et}_unc{i}'] = cal_errs[i]

    return pd.Series(pf_results)
Exemplo n.º 7
0
def peakdet_group(df_group, config):
    """
    Access all files in this group, load energy histograms, and find the
    "first guess" linear calibration constant.
    Return the value, and a bool indicating success.
    """
    # get file list and load energy data
    lh5_dir = os.path.expandvars(config['lh5_dir'])
    dsp_list = lh5_dir + df_group['dsp_path'] + '/' + df_group['dsp_file']

    edata = lh5.load_nda(dsp_list, config['rawe'], config['input_table'])
    print('Found energy data:', [(et, len(ev)) for et, ev in edata.items()])

    runtime_min = df_group['runtime'].sum()
    print(f'Runtime (min): {runtime_min:.2f}')

    # loop over energy estimators of interest
    pd_results = {}
    for et in config['rawe']:

        # get histogram, error, normalize by runtime, and derivative
        xlo, xhi, xpb = config['init_vals'][et]['raw_range']
        hist, bins, var = pgh.get_hist(edata[et], range=(xlo, xhi), dx=xpb)
        hist_norm = np.divide(hist, runtime_min * 60)
        hist_err = np.array([np.sqrt(hbin / (runtime_min * 60)) for hbin in hist])

        # plt.plot(bins[1:], hist_norm, ds='steps')
        # plt.show()
        # hist_deriv = np.diff(hist_norm)
        # hist_deriv = np.insert(hist_deriv, 0, 0)

        # run peakdet
        pd_thresh = config['init_vals'][et]['peakdet_thresh']
        lowe_cut = config['init_vals'][et]['lowe_cut']
        ctr_bins = (bins[:-1] + bins[1:]) / 2.
        idx = np.where(ctr_bins > lowe_cut)

        maxes, mins = pgc.peakdet(hist_norm[idx], pd_thresh, ctr_bins[idx])
        # maxes, mins = pgc.peakdet(hist_deriv[idx], pd_thresh, ctr_bins[idx])
        if len(maxes)==0:
            print('warning, no maxima!  adjust peakdet threshold')
        # print(maxes) # x (energy) [:,0], y (counts) [:,1]

        # run peak matching
        exp_pks = config['expected_peaks']
        tst_pks = config['test_peaks']
        mode = config['match_mode']
        etol = config['raw_ene_tol']
        lin_cal, mp_success = match_peaks(maxes, exp_pks, tst_pks, mode, etol)

        if config['show_plot']:

            # plot uncalibrated and calibrated energy spectrum, w/ maxima
            fig, (p0, p1) = plt.subplots(2, 1, figsize=(8, 8))

            idx = np.where(bins[1:] > lowe_cut)
            imaxes = [np.where(np.isclose(ctr_bins, x[0]))[0][0] for x in maxes]
            imaxes = np.asarray(imaxes)

            # energy, uncalibrated
            p0.plot(bins[imaxes], hist_norm[imaxes], '.m')
            p0.plot(bins[idx], hist_norm[idx], ds='steps', c='b', lw=1, label=et)
            p0.set_ylabel(f'cts/s, {xpb}/bin', ha='right', y=1)
            p0.set_xlabel(et, ha='right', x=1)

            # energy, with rough calibration
            bins_cal = bins[1:] * lin_cal
            p1.plot(bins_cal, hist_norm, ds='steps', c='b', lw=1,
                    label=f'E = {lin_cal:.3f}*{et}')

            # compute best-guess location of all peaks, assuming rough calibration
            cal_maxes = lin_cal * maxes[:, 0]
            all_pks = np.concatenate((exp_pks, tst_pks))
            raw_guesses = []
            for pk in all_pks:
                imatch = np.isclose(cal_maxes, pk, atol=config['mp_tol'])
                if imatch.any():
                    # print(pk, cal_maxes[imatch], maxes[:,0][imatch])
                    raw_guesses.append([pk, maxes[:,0][imatch][0]])
            rg = np.asarray(raw_guesses)
            rg = rg[rg[:,0].argsort()] # sort by energy

            cmap = plt.cm.get_cmap('jet', len(rg))
            for i, epk in enumerate(rg):
                idx_nearest = (np.abs(bins_cal - epk[0])).argmin()
                cts_nearest = hist_norm[idx_nearest]
                p1.plot(epk[0], cts_nearest, '.r', c=cmap(i),
                        label=f'{epk[0]:.1f} keV')

            p1.set_xlabel(f'{et}, pass-1 cal', ha='right', x=1)
            p1.set_ylabel(f'cts/s, {xpb} kev/bin', ha='right', y=1)
            p1.legend(fontsize=10)

            if config['batch_mode']:
                plt.savefig(f'./plots/peakdet_cal_{et}.pdf')
            else:
                plt.show()

        pd_results[f'{et}_lincal'] = lin_cal
        pd_results[f'{et}_lcpass'] = str(mp_success)

    return pd.Series(pd_results)
Exemplo n.º 8
0
def plot_dsp(dg):
    """
    create a DataFrame from the dsp files and make some 1d and 2d diagnostic plots.
    
    for reference, current 12/30/20 dsp parameters:
      ['channel', 'timestamp', 'energy', 'bl', 'bl_sig', 'trapEftp',
       'trapEmax', 'triE', 'tp_max', 'tp_0', 'tp_10', 'tp_50', 'tp_80',
       'tp_90', 'A_10', 'AoE', 'dcr_raw', 'dcr_max', 'dcr_ftp', 'hf_max']
    columns added by this code:
      ['run', 'cycle', 'ts_sec', 'ts_glo']
    """
    sto = lh5.Store()

    dsp_name = 'ORSIS3302DecoderForEnergy/dsp'
    wfs_name = 'ORSIS3302DecoderForEnergy/raw/waveform'

    def get_dsp_dfs(df_row):
        """
        grab the dsp df, add some columns, and return it
        """
        f_dsp = dg.lh5_dir + '/' + df_row.dsp_path + '/' + df_row.dsp_file
        if len(f_dsp) > 1:
            print('Error, this part is supposed to only load individual files')
            exit()
        f_dsp = f_dsp.iloc[0]
        run, cyc = df_row.run.iloc[0], df_row.cycle.iloc[0]
        # print(run, cyc, f_dsp)

        # grab the dataframe and add some columns
        tb, nr = sto.read_object(dsp_name, f_dsp)
        df = tb.get_dataframe()
        df['run'] = run
        df['cycle'] = cyc

        # need global timestamp.  just calculate here instead of making hit files
        clock = 100e6  # 100 MHz
        UINT_MAX = 4294967295  # (0xffffffff)
        t_max = UINT_MAX / clock
        ts = df['timestamp'].values / clock
        tdiff = np.diff(ts)
        tdiff = np.insert(tdiff, 0, 0)
        iwrap = np.where(tdiff < 0)
        iloop = np.append(iwrap[0], len(ts))
        ts_new, t_roll = [], 0
        for i, idx in enumerate(iloop):
            ilo = 0 if i == 0 else iwrap[0][i - 1]
            ihi = idx
            ts_block = ts[ilo:ihi]
            t_last = ts[ilo - 1]
            t_diff = t_max - t_last
            ts_new.append(ts_block + t_roll)
            t_roll += t_last + t_diff
        df['ts_sec'] = np.concatenate(ts_new)
        t_start = df_row.startTime.iloc[0]
        df['ts_glo'] = df['ts_sec'] + t_start

        # print(df)
        return df

    # create the multi-cycle DataFrame
    df_dsp = dg.fileDB.groupby(['cycle']).apply(get_dsp_dfs)
    df_dsp.reset_index(inplace=True, drop=True)  # << VERY IMPORTANT!

    print(df_dsp)
    print(df_dsp.columns)

    # 1. 1d energy histogram -- use this to select energy range of interest
    et = 'trapEmax'
    elo, ehi, epb = 0, 10000, 10
    edata = df_dsp.trapEmax.values
    hist, bins, _ = pgh.get_hist(edata, range=(elo, ehi), dx=epb)
    plt.semilogy(bins[1:], hist, ds='steps', c='b', lw=1)
    plt.xlabel(et, ha='right', x=1)
    plt.ylabel('Counts', ha='right', y=1)
    # plt.show()
    plt.savefig('./plots/risingedge_1dspec.pdf')
    plt.cla()

    # 2. 2d histo: show risetime vs. time for wfs in an energy range

    # choose risetime range (usec)
    # rlo, rhi, rpb = 0, 5, 0.1 # run 110 (good)
    rlo, rhi, rpb = 0, 50, 1  # run 111 (bad)

    # select energy range
    elo, ehi, epb = 1500, 1600, 0.5
    df = df_dsp.query(f'trapEmax > {elo} and trapEmax < {ehi}').copy()

    # calculate timestamp range
    t0 = df_dsp.iloc[0]['ts_glo']
    df['ts_adj'] = (df.ts_glo - t0) / 60  # minutes after t0
    tlo, thi, tpb = 0, df.ts_adj.max(), 1

    # compute t50-100 risetime
    df['rt_us'] = (df.tp_max - df.tp_50) / 1e3  # convert ns to us
    # print(df[['tp_max', 'tp_50', 'rt_us']])

    nbx, nby = int((thi - tlo) / tpb), int((rhi - rlo) / rpb)
    plt.hist2d(df['ts_adj'],
               df['rt_us'],
               bins=[nbx, nby],
               range=[[tlo, thi], [rlo, rhi]],
               cmap='jet')

    plt.xlabel('Time (min)', ha='right', x=1)
    plt.ylabel('Rise Time (t50-100), usec', ha='right', y=1)
    # plt.show()
    plt.savefig('./plots/risingedge_2dRisetime.png', dpi=150)
    plt.cla()

    # 3. 1st 10 wfs from energy region selection (requires raw file)
    # this assumes the first file has 10 events
    db = dg.fileDB.iloc[0]
    cyc = db.cycle
    f_raw = dg.lh5_dir + '/' + db.raw_path + '/' + db.raw_file
    f_dsp = dg.lh5_dir + '/' + db.dsp_path + '/' + db.dsp_file

    edata = lh5.load_nda([f_dsp], ['trapEmax'], dsp_name)['trapEmax']
    idx = np.where((edata >= elo) & (edata <= ehi))

    nwfs = 10
    idx_sel = idx[0][:nwfs]
    n_rows = idx_sel[-1] + 1  # read up to this event and stop
    tb_wfs, n_wfs = sto.read_object(wfs_name, f_raw, n_rows=n_rows)

    # grab the 2d numpy array of waveforms
    wfs = tb_wfs['values'].nda[idx_sel, :]

    ts = np.arange(0, len(wfs[0, :-2])) / 1e2  # usec

    for iwf in range(wfs.shape[0]):
        plt.plot(ts, wfs[iwf, :-2], lw=2, alpha=0.5)

    plt.xlabel('Time (us)', ha='right', x=1)
    plt.ylabel('ADC', ha='right', y=1)

    plt.show()
Exemplo n.º 9
0
    def analyze_pulser_run(df_row):
        """
        loop over each row of dfp and save the superpulse
        """
        epk, rt, vp, cyc = df_row[['E_keV', 'runtime', 'V_pulser', 'cycle']]
        rt *= 60  # sec
        if epk == 0: return []  # skip the bkg run

        # load pulser energies
        f_dsp = dg.lh5_dir + '/' + df_row.dsp_path + '/' + df_row.dsp_file
        pdata = lh5.load_nda([f_dsp], ['energy'], dsp_name)['energy'] * ecal

        # auto-narrow the window around the max pulser peak in two steps
        elo, ehi, epb = epk - 50, epk + 50, 0.5
        pdata_all = pdata[(pdata > elo) & (pdata < ehi)]
        hp, bp, _ = pgh.get_hist(pdata_all, range=(elo, ehi), dx=epb)
        pctr = bp[np.argmax(hp)]

        plo, phi, ppb = pctr - e_window, pctr + e_window, 0.1
        pdata_pk = pdata[(pdata > plo) & (pdata < phi)]
        hp, bp, _ = pgh.get_hist(pdata_pk, range=(plo, phi), dx=ppb)
        hp_rt = np.divide(hp, rt)
        hp_var = np.array([np.sqrt(h / (rt)) for h in hp])

        # fit a gaussian to get 1 sigma e-values
        ibin_bkg = 50
        bkg0 = np.mean(hp_rt[:ibin_bkg])
        b, h = bp[1:], hp_rt
        imax = np.argmax(h)
        upr_half = b[np.where((b > b[imax]) & (h <= np.amax(h) / 2))][0]
        bot_half = b[np.where((b < b[imax]) & (h <= np.amax(h) / 2))][-1]
        fwhm = upr_half - bot_half
        sig0 = fwhm / 2.355
        amp0 = np.amax(hp_rt) * fwhm
        p_init = [amp0, bp[imax], sig0, bkg0]
        p_fit, p_cov = pgf.fit_hist(pgf.gauss_bkg,
                                    hp_rt,
                                    bp,
                                    var=hp_var,
                                    guess=p_init)
        amp, mu, sigma, bkg = p_fit

        # select events within 1 sigma of the maximum
        # and pull the waveforms from the raw file to make a superpulse.
        idx = np.where((pdata >= mu - sigma) & (pdata <= mu + sigma))
        print(
            f'Pulser at {epk} keV, {len(idx[0])} events.  Limiting to {nwfs}.')
        if len(idx[0]) > nwfs:
            idx = idx[0][:nwfs]

        # grab the 2d numpy array of pulser wfs
        n_rows = idx[-1] + 1  # read up to this event and stop
        f_raw = dg.lh5_dir + '/' + df_row.raw_path + '/' + df_row.raw_file
        tb_wfs, n_wfs = sto.read_object(raw_name, f_raw, n_rows=n_rows)
        pwfs = tb_wfs['values'].nda[idx, :]
        # print(idx, len(idx), pwfs.shape, '\n', pwfs)

        # data cleaning step: remove events with outlier baselines
        bl_means = pwfs[:, :500].mean(axis=1)
        bl_mode = mode(bl_means.astype(int))[0][0]
        bl_ctr = np.subtract(bl_means, bl_mode)
        idx_dc = np.where(np.abs(bl_ctr) < bl_thresh)
        pwfs = pwfs[idx_dc[0], :]
        bl_means = bl_means[idx_dc]
        # print(pwfs.shape, bl_means.shape)

        # baseline subtract (trp when leading (not trailing) dim is the same)
        wfs = (pwfs.transpose() - bl_means).transpose()

        # time-align all wfs at their 50% timepoint (tricky!).
        # adapted from pygama/sandbox/old_dsp/[calculators,transforms].py
        # an alternate approach would be to use ProcessingChain here
        wf_maxes = np.amax(wfs, axis=1)
        timepoints = np.argmax(wfs >= wf_maxes[:, None] * tp_align, axis=1)
        wf_idxs = np.zeros([wfs.shape[0], n_pre + n_post], dtype=int)
        row_idxs = np.zeros_like(wf_idxs)
        for i, tp in enumerate(timepoints):
            wf_idxs[i, :] = np.arange(tp - n_pre, tp + n_post)
            row_idxs[i, :] = i
        wfs = wfs[row_idxs, wf_idxs]

        # take the average to get the superpulse
        superpulse = np.mean(wfs, axis=0)

        # normalize all wfs to the superpulse maximum
        wfmax, tmax = np.amax(superpulse), np.argmax(superpulse)
        superpulse = np.divide(superpulse, wfmax)
        wfs = np.divide(wfs, wfmax)

        # -- plot results --
        if show_plots:
            fig, (p0, p1) = plt.subplots(2, figsize=(7, 8))

            # plot fit result (top), and waveforms + superpulse (bottom)
            xfit = np.arange(plo, phi, ppb * 0.1)
            p0.plot(xfit,
                    pgf.gauss_bkg(xfit, *p_init),
                    '-',
                    c='orange',
                    label='init')
            p0.plot(xfit,
                    pgf.gauss_bkg(xfit, *p_fit),
                    '-',
                    c='red',
                    label='fit')

            # plot 1 sigma window
            p0.axvspan(mu - sigma,
                       mu + sigma,
                       color='m',
                       alpha=0.2,
                       label='1 sigma')

            # plot data
            p0.plot(bp[1:],
                    hp_rt,
                    ds='steps',
                    c='k',
                    lw=1,
                    label=f'{vp:.2f} V')
            p0.set_xlabel(f'onboard energy (keV, c={ecal:.2e})',
                          ha='right',
                          x=1)
            p0.set_ylabel('cts / s', ha='right', y=1)
            p0.legend(fontsize=10)

            # plot individ. wfs
            ts = np.arange(0, len(wfs[0, :]))
            for iwf in range(wfs.shape[0]):
                p1.plot(ts, wfs[iwf, :], '-k', lw=2, alpha=0.5)
            p1.plot(np.nan, np.nan, '-k', label=f'wfs, {epk:.0f} keV')

            # plot superpulse
            p1.plot(ts,
                    superpulse,
                    '-r',
                    lw=2,
                    label=f'superpulse, {vp:.2f} V')

            p1.set_xlabel('time (10 ns)', ha='right', x=1)
            p1.set_ylabel('amplitude', ha='right', y=1)
            p1.legend(fontsize=10)
            # plt.show()
            plt.savefig(f'./plots/superpulse_cyc{cyc}.png', dpi=150)
            plt.cla()

        # save the superpulse to our output file
        return superpulse
Exemplo n.º 10
0
def show_spectra(dfp, dg):
    """
    plot events from each pulser peak on top of a background spectrum run,
    to show where in the spectrum we sampled from.
    let's use the E_keV column to find the pulser peaks.
    need to figure out the proper calibration constant (use onboard energy)
    so load the bkg run and figure out the calibration constant.
    that's the parameter we need for get_superpulses.
    """
    run_diagnostic = False

    f_dsp = dg.lh5_dir + '/' + dfp.dsp_path + '/' + dfp.dsp_file
    f_bkg = f_dsp.iloc[0]  # bkg run is 0 by dfn
    print('Background run:', f_bkg)

    # dataframe method - pulls all values from table
    # sto = lh5.Store()
    # tb_data, n_rows = sto.read_object('ORSIS3302DecoderForEnergy/dsp', f_bkg)
    # df_data = tb_data.get_dataframe()

    # load_nda method - just grab onboard energy
    tb_name = 'ORSIS3302DecoderForEnergy/dsp'
    edata = lh5.load_nda([f_bkg], ['energy'], tb_name)['energy']

    # use this flag to figure out the calibration of the 1460 line
    if run_diagnostic:
        elo, ehi, epb = 0, 1e7, 10000
        hist, bins, _ = pgh.get_hist(edata, range=(elo, ehi), dx=epb)
        plt.semilogy(bins[1:], hist, ds='steps', c='b', lw=1)
        plt.show()
        exit()

    ecal = 1460.8 / 2.005e6  # works for pulser dataset 2 (dec 2020)

    elo, ehi, epb = 0, 5000, 10
    hist, bins, _ = pgh.get_hist(edata * ecal, range=(elo, ehi), dx=epb)
    runtime = dfp.iloc[0].runtime * 60  # sec
    hist_rt = np.divide(hist, runtime)
    print(f'bkg runtime: {runtime:.2f} min')

    cmap = plt.cm.get_cmap('jet', len(dfp))
    for i, df_row in dfp.iterrows():

        epk, rt, vp = df_row[['E_keV', 'runtime', 'V_pulser']]
        rt *= 60  # sec
        if epk == 0: continue  # skip the bkg run

        # draw the expected peak location based on our input table
        plt.axvline(epk, lw=1, alpha=0.5)

        # load pulser data
        f_dsp = dg.lh5_dir + '/' + df_row.dsp_path + '/' + df_row.dsp_file
        pdata = lh5.load_nda([f_dsp], ['energy'], tb_name)['energy'] * ecal

        # take a wide window around where we expect the pulser peak
        pdata = pdata[(pdata > epk - 50) & (pdata < epk + 50)]
        hp, bp, _ = pgh.get_hist(pdata, range=(elo, ehi), dx=epb)
        hp_rt = np.divide(hp, rt)
        plt.semilogy(bp[1:],
                     hp_rt,
                     ds='steps',
                     lw=1,
                     c=cmap(i),
                     label=f'{vp:.2f} V')

    plt.semilogy(bins[1:], hist_rt, ds='steps', c='k', lw=1, label='bkg data')

    plt.xlabel(f'onboard energy (keV, c={ecal:.2e})', ha='right', x=1)
    plt.ylabel('cts / s', ha='right', y=1)
    plt.legend(fontsize=10)
    plt.savefig('./plots/transferfn_peaks.pdf')
    plt.show()
    plt.clf()
Exemplo n.º 11
0
#!/usr/bin/env python3
import pandas as pd
import pygama.io.lh5 as lh5

fin = '/global/project/projectdirs/legend/users/gothman/CAGE'
fin += '/dsp/cage_run110_cyc1186_dsp.lh5' # run 110 is cycles 1184--1190

name = 'ORSIS3302DecoderForEnergy/dsp'

energy = lh5.load_nda([fin], ['trapEmax'], name)['trapEmax']
energy = pd.Series(energy)

print(energy, len(energy))
# print(energy.loc[(energy > 3597) & (energy < 3617)])
emask = (energy > 3597) & (energy < 3617)
print(emask.value_counts())
# print(energy.max())


# sto = lh5.Store()
# tb, n = sto.read_object(name, fin)