Exemplo n.º 1
0
def show_lowe_wfs():
    """
    separate function to show really low-e waveforms after the data cleaning cut
    """
    f_raw = '/Users/wisecg/Data/OPPI/raw/oppi_run0_cyc2027_raw.lh5'
    f_hit = '/Users/wisecg/Data/OPPI/hit/oppi_run0_cyc2027_hit.lh5'

    tb_name = 'ORSIS3302DecoderForEnergy/raw'
    hit_store = lh5.Store()
    data = hit_store.read_object(tb_name, f_hit)
    df_hit = data.get_dataframe()

    # correct energy_first (inplace) to allow negative values
    df_hit['energy_first'] = df_hit['energy_first'].astype(np.int64)
    efirst = df_hit['energy_first'].values
    idx = np.where(efirst > 4e9)
    eshift = efirst[idx] - 4294967295
    efirst[idx] = eshift

    nwfs = 40
    elo, ehi, epb = 1, 10, 0.1
    blo, bhi = 57700, 58500  # cut values
    etype = 'trapE_cal'  # noise stops @ 35 keV

    idx_lowe = df_hit[etype].loc[(df_hit[etype] > elo) & (df_hit[etype] < ehi)
                                 & (df_hit.bl > blo) & (df_hit.bl < bhi)]
    idx_lowe = idx_lowe.index[:nwfs]
    # print(df_hit.loc[idx_lowe])

    # get phys waveforms, normalized by max value
    i_max = idx_lowe[-1]

    raw_store = lh5.Store()
    data_raw = raw_store.read_object(tb_name,
                                     f_raw,
                                     start_row=0,
                                     n_rows=i_max + 1)

    wfs = data_raw['waveform']['values'].nda
    wfs_lowe = wfs[idx_lowe.values, :]
    ts = np.arange(0, wfs_lowe.shape[1], 1)

    # plot wfs
    for iwf in range(wfs_lowe.shape[0]):
        plt.plot(ts, wfs_lowe[iwf, :], lw=1, alpha=0.5)

    plt.xlabel('time (clock ticks)', ha='right', x=1)
    plt.ylabel('ADC', ha='right', y=1)
    # plt.show()
    plt.savefig('./plots/lowe_wfs.png', dpi=300)
    plt.cla()
Exemplo n.º 2
0
def select_energies(energy_name,
                    range_name,
                    filenames,
                    database,
                    lh5_group='',
                    store=None,
                    verbosity=0):
    """
    """
    if energy_name not in database:
        print(f'no energy {energy_name} in database')
        return None

    if 'ranges' not in database[energy_name]:
        print(f'database["{energy_name}"] missing field "ranges"')
        return None

    if range_name not in database[energy_name]['ranges']:
        print(f'no range {range_name} in database["{energy_name}"]["ranges"]')
        return None

    E_low = database[energy_name]["ranges"][range_name]["E_low"]
    E_high = database[energy_name]["ranges"][range_name]["E_high"]

    print(lh5_group + '/' + energy_name)
    print(filenames)
    #     print(E_low, E_high)
    #     exit()

    if store is None: store = lh5.Store()

    energies, _ = store.read_object(lh5_group + '/' + energy_name,
                                    filenames,
                                    verbosity=1)
    return np.where((energies.nda > E_low) & (energies.nda < E_high))
Exemplo n.º 3
0
def show_cal_spectrum():
    """
    """
    f_hit = '/Users/wisecg/Data/OPPI/hit/oppi_run0_cyc2027_hit.lh5'
    tb_name = 'ORSIS3302DecoderForEnergy/raw'
    sto = lh5.Store()
    groups = sto.ls(f_hit)
    data = sto.read_object(tb_name, f_hit)
    df_hit = data.get_dataframe()

    print(df_hit)

    # energy in keV
    elo, ehi, epb = 0, 3000, 0.5

    # choose energy estimator
    etype = 'energy_cal'
    # etype = 'trapE_cal'

    hist, bins, _ = pgh.get_hist(df_hit[etype], range=(elo, ehi), dx=epb)
    bins = bins[1:]  # trim zero bin, not needed with ds='steps'

    plt.plot(bins, hist, ds='steps', c='b', lw=2, label=etype)
    plt.xlabel(etype, ha='right', x=1)
    plt.ylabel('Counts', ha='right', y=1)
    plt.legend()
    plt.tight_layout()
    plt.show()
Exemplo n.º 4
0
def main():
    """
    an example of loading an LH5 DSP file and converting to pandas DataFrame.
    """
    # we will probably make this part simpler in the near future
    f = '/Users/wisecg/Data/lh5/hades_I02160A_r1_191021T162944_th_HS2_top_psa_dsp.lh5'
    sto = lh5.Store()
    groups = sto.ls(f)  # the example file only has one group, 'raw'
    data = sto.read_object('raw', f)
    df_dsp = data.get_dataframe()

    # from here, we can use standard pandas to work with data
    print(df_dsp)

    # one example: create uncalibrated energy spectrum,
    # using a pygama helper function to get the histogram

    elo, ehi, epb = 0, 100000, 10
    ene_uncal = df_dsp['trapE']
    hist, bins, _ = pgh.get_hist(ene_uncal, range=(elo, ehi), dx=epb)
    bins = bins[1:]  # trim zero bin, not needed with ds='steps'

    plt.semilogy(bins, hist, ds='steps', c='b', label='trapE')
    plt.xlabel('trapE', ha='right', x=1)
    plt.ylabel('Counts', ha='right', y=1)
    plt.legend()
    plt.tight_layout()
    plt.show()
Exemplo n.º 5
0
def show_raw_spectrum():
    """
    show spectrum w/ onbd energy and trapE
    - get calibration constants for onbd energy and 'trapE' energy
    - TODO: fit each expected peak and get resolution vs energy
    """
    f_dsp = '/Users/wisecg/Data/OPPI/dsp/oppi_run0_cyc2027_dsp_test.lh5'

    # we will probably make this part simpler in the near future
    sto = lh5.Store()
    groups = sto.ls(f_dsp)
    data = sto.read_object('ORSIS3302DecoderForEnergy/raw', f_dsp)
    df_dsp = data.get_dataframe()

    # from here, we can use standard pandas to work with data
    print(df_dsp)

    # elo, ehi, epb, etype = 0, 8e6, 1000, 'energy'
    # elo, ehi, epb, etype = 0, 8e6, 1000, 'energy' # whole spectrum
    # elo, ehi, epb, etype = 0, 800000, 1000, 'energy' # < 250 keV
    elo, ehi, epb, etype = 0, 10000, 10, 'trapE'

    ene_uncal = df_dsp[etype]
    hist, bins, _ = pgh.get_hist(ene_uncal, range=(elo, ehi), dx=epb)
    bins = bins[1:]  # trim zero bin, not needed with ds='steps'

    plt.plot(bins, hist, ds='steps', c='b', lw=2, label=etype)
    plt.xlabel(etype, ha='right', x=1)
    plt.ylabel('Counts', ha='right', y=1)
    plt.legend()
    plt.tight_layout()
    plt.show()
Exemplo n.º 6
0
def dsp_to_hit():
    """
    save calibrated energies into the dsp file.
    this is a good example of adding a column, reading & writing to an LH5 file.
    """
    f_dsp = '/Users/wisecg/Data/OPPI/dsp/oppi_run0_cyc2027_dsp_test.lh5'
    f_hit = '/Users/wisecg/Data/OPPI/hit/oppi_run0_cyc2027_hit.lh5'
    sto = lh5.Store()
    groups = sto.ls(f_dsp)
    tb_name = 'ORSIS3302DecoderForEnergy/raw'
    data = sto.read_object(tb_name, f_dsp)
    df_dsp = data.get_dataframe()

    # add a new column for each energy estimator of interest
    for etype in ['energy', 'trapE']:
        ecal_name = etype + '_cal'
        pfit = linear_cal(etype)
        df_dsp[ecal_name] = df_dsp[etype] * pfit[0] + pfit[1]

        e_cal_lh5 = lh5.Array(df_dsp[ecal_name].values, attrs={'units': 'keV'})
        data.add_field(f'{etype}_cal', e_cal_lh5)

    # write to hit file.  delete if exists, LH5 overwrite is broken rn
    if os.path.exists(f_hit):
        os.remove(f_hit)
    sto.write_object(data, tb_name, f_hit)
Exemplo n.º 7
0
def get_superpulse(df, dg, cut_str='', nwfs=100, all=False, norm=True):
    """Create a super-pulse from waveforms passing a cut. Waveforms are first baseline-subtracted.
    """
    if all == True:
        nwfs = len(df.query(cut_str))

        print(f'using all {nwfs} Waveforms passing cut')
    else:
        print(f'using first {nwfs} waveforms passing cut')

    idx = df.query(cut_str).index[:nwfs]
    raw_store = lh5.Store()
    tb_name = 'ORSIS3302DecoderForEnergy/raw'
    lh5_dir = dg.lh5_dir
    raw_list = lh5_dir + dg.fileDB['raw_path'] + '/' + dg.fileDB['raw_file']
    raw_list = raw_list.tolist(
    )  # right now lh5.store.read_object() only works for lists, so need to convert the pandas object to a list first
    data_raw, nrows = raw_store.read_object(tb_name, raw_list)

    wfs_all = (data_raw['waveform']['values']).nda
    wfs = wfs_all[idx.values, :]
    # baseline subtraction
    bl_means = wfs[:, :800].mean(axis=1)
    wf_blsub = (wfs.transpose() - bl_means).transpose()
    ts = np.arange(0, wf_blsub.shape[1] - 1, 1)
    super_wf = np.mean(wf_blsub, axis=0)
    wf_max = np.amax(super_wf)
    if norm == True:
        superpulse = np.divide(super_wf, wf_max)
    else:
        superpulse = super_wf
    return (ts, superpulse)
Exemplo n.º 8
0
def power_spectrum(dg):
    """
    plot power spectral density for groups of runs.
    note.  typical cycle files have ~120,000 wfs.
    """
    import scipy.signal as signal

    view_cols = [
        'runtype', 'run', 'cycle', 'startTime', 'runtime', 'threshold'
    ]

    sto = lh5.Store()
    lh5_dir = os.path.expandvars(dg.config['lh5_dir'])

    # n_wfs = np.inf # np.inf to select all
    n_wfs = int(1e3)
    clk = 100e6  # Hz
    nseg = 3500  # num baseline samples (cage wfs are usually length 8192)

    runs = dg.fileDB['run'].unique()

    # cmap = plt.cm.get_cmap('jet', len(runs))
    # iplt = 0

    def psd_run(df_run):

        run = int(df_run.iloc[0]['run'])
        # print(df_run[view_cols])

        tb_name = 'ORSIS3302DecoderForEnergy/raw'
        raw_list = lh5_dir + df_run['raw_path'] + '/' + df_run['raw_file']

        # for now, just grab wfs from the first cycle file.
        # that should be PLENTY for a power spectrum plot
        f_raw = raw_list.values[0]
        data_raw, n_rows = sto.read_object(tb_name,
                                           f_raw,
                                           start_row=0,
                                           n_rows=n_wfs)
        wfs_all = data_raw['waveform']['values'].nda

        # wfs = wfs_all[idx.values, :] # can slice them by np array
        wfs = wfs_all[:, 0:nseg]  # baseline only (8192 samples in cage)
        print(wfs.shape)

        f, p = signal.welch(wfs, clk, nperseg=nseg)
        ptot = np.sum(p, axis=0)
        y = ptot / wfs.shape[0]
        plt.semilogy(f, y, '-', lw=2, label=f'run {run}')
        # iplt += 1

        # exit()

    dg.fileDB.groupby(['run']).apply(psd_run)  #, iplt)

    plt.xlabel('Frequency (Hz)', ha='right', x=0.9)
    plt.ylabel('PSD (ADC^2 / Hz)', ha='right', y=1)
    plt.legend(loc=1)
    plt.savefig('./plots/psd_runs.pdf')
Exemplo n.º 9
0
def get_runtimes(dg):
    """
    $ ./setup.py --runtime

    Get the Ge runtime of each cycle file (in seconds).
    Add a 'ge_runtime' column to the fileDB.
    Requires the raw LH5 files.
    """
    dg.load_df()
    # dg.fileDB = dg.fileDB[50:55] # debug only

    # reset columns of interest
    new_cols = ['runtime', 'rt_std']
    for col in new_cols:
        if col in dg.fileDB.columns:
            dg.fileDB.drop(col, axis=1, inplace=True)

    sto = lh5.Store()

    t_start = time.time()
    def runtime_cycle(df_row):

        # load raw file path (with {these} in it)
        f_raw = f'{dg.lh5_dir}/{df_row.raw_path}/{df_row.raw_file}'
        f_raw = f_raw.format_map({'sysn':'geds'})

        # always look for Ge
        f_key = df_row.raw_file.format_map({'sysn':'geds'})
        if not os.path.exists(f_raw):
            # print(f'no Ge data: {f_key}')
            return pd.Series({'runtime':0, 'rt_std':0})

        # for PGT, compare the first three channels (for redundancy)
        rts = []
        ge_groups = sto.ls(f_raw)
        for ge in ge_groups[:3]:
            ts = lh5.load_nda([f_raw], ['timestamp'], ge+'/raw/')['timestamp']
            rts.append(ts[-1])

        # take largest value & compute uncertainty
        runtime = max(rts) / 60
        rt_std = np.std(np.array([rts]))
        # print(f_key, runtime, rt_std)

        return pd.Series({'runtime':runtime, 'rt_std':rt_std})

    # df_tmp = dg.fileDB.apply(runtime_cycle, axis=1)
    dg.fileDB[new_cols] = dg.fileDB.progress_apply(runtime_cycle, axis=1)

    print(f'Done. Time elapsed: {(time.time()-t_start)/60:.2f} mins.')

    # save to fileDB if everything looks OK
    print(dg.fileDB)
    print(dg.fileDB.columns)
    print('FileDB location:', dg.config['fileDB'])
    ans = input('Save new fileDB? (y/n) ')
    if ans.lower() == 'y':
        dg.save_df(dg.config['fileDB'])
Exemplo n.º 10
0
 def write_out_garbage(self, filename, group='/', lh5_store=None):
     if lh5_store is None: lh5_store = lh5.Store()
     n_rows = self.garbage_table.loc
     if n_rows == 0: return
     lh5_store.write_object(self.garbage_table,
                            'garbage',
                            filename,
                            group,
                            n_rows=n_rows,
                            append=True)
     self.garbage_table.clear()
Exemplo n.º 11
0
def get_wfs(df, dg, cut_str='', nwfs=10, all=False):
    """Get waveforms passing a cut, baseline-subtracted but not normalized. These are individual waveforms, not superpulses!
    """
    all_nwfs = len(df.query(cut_str).copy())
    print(f'{all_nwfs} passing cuts')

    if all == True:
        nwfs = len(df.query(cut_str).copy())

        print(f'using all {nwfs} Waveforms passing cut')

    else:
        print(f'using first {nwfs} waveforms passing cut')

    if all_nwfs < nwfs:
        print(
            f'Less than the specified number of waveforms ({nwfs}) passing cuts. \nUsing all {all_nwfs} waveforms passing cut'
        )
        nwfs = all_nwfs

    idx = df.query(cut_str).copy().index[:nwfs]
    raw_store = lh5.Store()
    tb_name = 'ORSIS3302DecoderForEnergy/raw'
    lh5_dir = dg.lh5_dir
    raw_list = lh5_dir + dg.fileDB['raw_path'] + '/' + dg.fileDB['raw_file']
    raw_list = raw_list.tolist(
    )  # right now lh5.store.read_object() only works for lists, so need to convert the pandas object to a list first
    data_raw, nrows = raw_store.read_object(tb_name, raw_list)

    wfs_all = (data_raw['waveform']['values']).nda
    wfs = wfs_all[idx.values, :]
    # baseline subtraction
    bl_means = wfs[:, :800].mean(axis=1)
    wf_blsub = (wfs.transpose() - bl_means).transpose()
    ts = np.arange(0, wf_blsub.shape[1] - 1, 1)

    return (ts, wf_blsub)
Exemplo n.º 12
0
def show_groups():
    """
    show example of accessing the names of the HDF5 groups in our LH5 files
    """
    f_raw = '/Users/wisecg/Data/OPPI/raw/oppi_run0_cyc2027_raw.lh5'
    f_dsp = '/Users/wisecg/Data/OPPI/dsp/oppi_run0_cyc2027_dsp_test.lh5'
    f_hit = '/Users/wisecg/Data/OPPI/hit/oppi_run0_cyc2027_hit.lh5'

    # h5py method
    # hf = h5py.File(f_raw)
    # hf = h5py.File(f_dsp)

    # some examples of navigating the groups
    # print(hf.keys())
    # print(hf['ORSIS3302DecoderForEnergy/raw'].keys())
    # print(hf['ORSIS3302DecoderForEnergy/raw/waveform'].keys())
    # exit()

    # lh5 method
    sto = lh5.Store()
    groups = sto.ls(f_dsp)
    data = sto.read_object('ORSIS3302DecoderForEnergy/raw', f_dsp)

    # testing -- make sure data columns all have same shape
    for col in data.keys():
        print(col, data[col].nda.shape)

    # directly access timestamps in a raw file w/o loading all the wfs
    # groups = sto.ls(f_raw, 'ORSIS3302DecoderForEnergy/raw/')
    # data = sto.read_object('ORSIS3302DecoderForEnergy/raw/timestamp', f_raw)
    # ts = data.nda

    # check pandas conversion
    df_dsp = data.get_dataframe()
    print(df_dsp.columns)
    print(df_dsp)
Exemplo n.º 13
0
def get_runtimes(dg):
    """
    Requires DSP files.
    compute runtime (# minutes in run) and stopTime (unix timestamp) using
    the timestamps in the dsp file.
    """
    write_output = True

    df_keys = pd.read_hdf(dg.config['fileDB'])

    # clear new colums if they exist
    new_cols = ['stopTime', 'runtime']
    for col in new_cols:
        if col in df_keys.columns:
            df_keys.drop(col, axis=1, inplace=True)

    sto = lh5.Store()

    def get_runtime(df_row):

        # load timestamps from dsp file
        f_dsp = dg.lh5_dir + df_row['dsp_path'] + '/' + df_row['dsp_file']
        data = sto.read_object('ORSIS3302DecoderForEnergy/dsp', f_dsp)

        # correct for timestamp rollover
        clock = 100e6  # 100 MHz
        UINT_MAX = 4294967295  # (0xffffffff)
        t_max = UINT_MAX / clock

        # ts = data['timestamp'].nda.astype(np.int64) # must be signed for np.diff
        ts = data['timestamp'].nda / clock  # converts to float

        tdiff = np.diff(ts)
        tdiff = np.insert(tdiff, 0, 0)
        iwrap = np.where(tdiff < 0)
        iloop = np.append(iwrap[0], len(ts))

        ts_new, t_roll = [], 0
        for i, idx in enumerate(iloop):
            ilo = 0 if i == 0 else iwrap[0][i - 1]
            ihi = idx
            ts_block = ts[ilo:ihi]
            t_last = ts[ilo - 1]
            t_diff = t_max - t_last
            ts_new.append(ts_block + t_roll)
            t_roll += t_last + t_diff
        ts_corr = np.concatenate(ts_new)

        # calculate runtime and unix stopTime
        rt = ts_corr[-1] / 60  # minutes
        st = int(np.ceil(df_row['startTime'] + rt * 60))

        return pd.Series({'stopTime': st, 'runtime': rt})

    df_tmp = df_keys.progress_apply(get_runtime, axis=1)
    df_keys[new_cols] = df_tmp

    print(df_keys)

    if write_output:
        df_keys.to_hdf(dg.config['fileDB'], key='file_keys')
        print(f"Wrote output file: {dg.config['fileDB']}")
Exemplo n.º 14
0
def get_runtimes(dg, overwrite=False, batch_mode=False):
    """
    $ ./setup.py --rt

    Compute runtime (# minutes in run) and stopTime (unix timestamp) using
    the timestamps in the DSP file.

    NOTE: Could change this to use the raw file timestamps instead of dsp file,
          but that still makes this function dependent on a processing step.
    NOTE: CAGE uses struck channel 2 (0-indexed)
    """
    print('Scanning DSP files for runtimes ...')

    # load existing fileDB
    dg.load_df()

    # first-time setup
    if 'runtime' not in dg.fileDB.columns or overwrite:
        df_keys = dg.fileDB.copy()
        update_existing = False
        print('Re-scanning entire fileDB')

    elif 'runtime' in dg.fileDB.columns:
        # look for any rows with nans to update
        idx = dg.fileDB.loc[pd.isna(dg.fileDB['runtime']), :].index
        if len(idx) > 0:
            df_keys = dg.fileDB.loc[idx].copy()
            print(f'Found {len(df_keys)} new files without runtime:')
            print(df_keys)
            update_existing = True
        else:
            print('No empty runtime values found.')

    if len(df_keys) == 0:
        print('No files to update.  Exiting...')
        exit()

    # clear new colums if they exist
    new_cols = ['stopTime', 'runtime']
    for col in new_cols:
        if col in df_keys.columns:
            df_keys.drop(col, axis=1, inplace=True)

    sto = lh5.Store()

    def get_runtime(df_row):

        # load timestamps from dsp file
        f_dsp = dg.lh5_dir + df_row['dsp_path'] + '/' + df_row['dsp_file']

        if not os.path.exists(f_dsp) and not df_row.skip:
            print(f"Error, file doesn't exist:\n  {f_dsp}")
            print(
                f"Warning, proceeding anyway -- this can mess up your fileDB")
            # exit() # careful!
            return pd.Series({'stopTime': 0, 'runtime': 0})
        elif df_row.skip:
            print(f'Skipping cycle file:\n  {f_dsp}')
            return pd.Series({'stopTime': 0, 'runtime': 0})

        data, n_rows = sto.read_object('ORSIS3302DecoderForEnergy/dsp', f_dsp)

        # correct for timestamp rollover
        clock = 100e6  # 100 MHz
        UINT_MAX = 4294967295  # (0xffffffff)
        t_max = UINT_MAX / clock

        # ts = data['timestamp'].nda.astype(np.int64) # must be signed for np.diff
        ts = data['timestamp'].nda / clock  # converts to float

        tdiff = np.diff(ts)
        tdiff = np.insert(tdiff, 0, 0)
        iwrap = np.where(tdiff < 0)
        iloop = np.append(iwrap[0], len(ts))

        ts_new, t_roll = [], 0
        for i, idx in enumerate(iloop):
            ilo = 0 if i == 0 else iwrap[0][i - 1]
            ihi = idx
            ts_block = ts[ilo:ihi]
            t_last = ts[ilo - 1]
            t_diff = t_max - t_last
            ts_new.append(ts_block + t_roll)
            t_roll += t_last + t_diff
        ts_corr = np.concatenate(ts_new)

        # calculate runtime and unix stopTime
        rt = ts_corr[-1] / 60  # minutes
        st = int(np.ceil(df_row['startTime'] + rt * 60))

        return pd.Series({'stopTime': st, 'runtime': rt})

    df_tmp = df_keys.progress_apply(get_runtime, axis=1)
    df_keys[new_cols] = df_tmp

    if update_existing:
        idx = dg.fileDB.loc[pd.isna(dg.fileDB['runtime']), :].index
        dg.fileDB.loc[idx] = df_keys
    else:
        dg.fileDB = df_keys

    dbg_cols = ['run', 'cycle', 'unique_key', 'startTime', 'runtime']
    print(dg.fileDB[dbg_cols])

    print('Ready to save.  This will overwrite any existing fileDB.')
    if not batch_mode:
        ans = input('Save updated fileDB? (y/n):')
        if ans.lower() == 'y':
            dg.fileDB = df_keys
            dg.save_df(os.path.expandvars(dg.config['fileDB']))
            print('fileDB updated.')
    else:
        dg.fileDB = df_keys
        dg.save_df(os.path.expandvars(dg.config['fileDB']))
        print('fileDB updated.')
Exemplo n.º 15
0
def process_ttree(root_files,
                  raw_file=None,
                  n_max=None,
                  config=None,
                  verbose=False,
                  buffer_size=1024,
                  chans=None,
                  tree_name='MGTree'):
    # Load up the tree (or trees)
    ch = ROOT.TChain(tree_name)
    if isinstance(root_files, str):
        ch.Add(root_files)
    else:
        for root_file in raw_files:
            ch.Add(raw_file)

    dec = MGDODecoder(buffer_size)
    lh5_st = lh5.Store()
    if not raw_file:
        raw_file = root_files.replace('.root', '.lh5')

    tables = {}  # map from detector channel to output table
    n_tot = 0  # total waveforms
    # loop through MGTEvents in ttree
    for event in ch:
        # loop through waveforms in event
        for i_wf in range(event.event.GetNWaveforms()):
            # Get digitizer data, waveform and auxwaveform (if applicable)
            dd = event.event.GetDigitizerData(i_wf)
            wf = event.event.GetWaveform(i_wf)
            auxwf = event.event.GetAuxWaveform(
                i_wf) if event.event.GetAuxWaveformArrayStatus() else None

            # Get the output table for this channel
            tb = tables.get(dd.GetID(), None)
            if not tb:
                if verbose:
                    print('Create table for channel', dd.GetID())
                tb = dec.get_table(dd, wf, auxwf)
                tables[dd.GetID()] = tb
            i_chan = tb.loc

            dec.read_waveform(tb, dd, wf, auxwf)

            # write table if it is full
            tb.push_row()
            if tb.is_full():
                lh5_st.write_object(tb,
                                    'g{:04d}/raw'.format(dd.GetID()),
                                    raw_file,
                                    n_rows=tb.loc)
                tb.clear()

            n_tot += 1

        # check if we have hit n_wf limit. Note that we always include all WFs in an event, which can result in including a few extra waveforms
        if n_max and n_tot >= n_max:
            break

    # Fill remaining events for each table
    for channel, tb in tables.items():
        if verbose:
            print('Wrote to', 'g{:04d}/raw'.format(channel), 'in', raw_file)
        lh5_st.write_object(tb,
                            'g{:04d}/raw'.format(channel),
                            raw_file,
                            n_rows=tb.loc)
        tb.clear()
Exemplo n.º 16
0
def show_wfs(dg):
    """
    show waveforms in different enery regions.
    use the hit file to select events
    """
    # get file list and load hit data
    lh5_dir = dg.lh5_user_dir  #if user else dg.lh5_dir
    hit_list = lh5_dir + dg.fileDB['hit_path'] + '/' + dg.fileDB['hit_file']
    df_hit = lh5.load_dfs(
        hit_list,
        ['trapEmax', 'trapEmax_cal', 'bl', 'AoE', 'dcr_raw', 'tp_0', 'tp_50'],
        'ORSIS3302DecoderForEnergy/hit')
    # print(df_hit)
    # print(df_hit.columns)

    # settings
    # etype = 'trapEmax'
    etype = 'trapEmax_cal'
    nwfs = 20

    #creat new DCR
    const = 0.0555
    df_hit['dcr_linoff'] = df_hit['dcr_raw'] + const * df_hit['trapEmax']

    #create 0-50
    df_hit['tp0_50'] = df_hit['tp_50'] - df_hit['tp_0']

    # elo, ehi, epb = 0, 100, 0.2 # low-e region
    # elo, ehi, epb = 0, 20, 0.2 # noise region
    elo, ehi, epb = 351, 355, 1  # 351 peak, cal
    # elo, ehi, epb = 1452, 1468, 1 # good physics events
    #     elo, ehi, epb = 7100, 7200, 1 # good physics events, uncal
    # elo, ehi, epb = 6175, 6250, 1 # overflow peak
    # elo, ehi, epb = 5000, 5200, 0.2 # lower overflow peak

    # # diagnostic plot
    # hE, xE, vE = pgh.get_hist(df_hit[etype], range=(elo, ehi), dx=epb)
    # plt.plot(xE[1:], hE, c='b', ds='steps')
    # plt.show()
    # exit()

    # select bulk waveforms
    idx = df_hit[etype].loc[(df_hit[etype] >= elo)
                            & (df_hit[etype] <= ehi)].index[:nwfs]

    raw_store = lh5.Store()
    tb_name = 'ORSIS3302DecoderForEnergy/raw'
    lh5_dir = dg.lh5_dir
    raw_list = lh5_dir + dg.fileDB['raw_path'] + '/' + dg.fileDB['raw_file']
    f_raw = raw_list.values[0]  # fixme, only works for one file rn
    data_raw, nrows = raw_store.read_object(tb_name,
                                            f_raw,
                                            start_row=0,
                                            n_rows=idx[-1] + 1)

    bulk_wfs_all = (data_raw['waveform']['values']).nda
    bulk_wfs = bulk_wfs_all[idx.values, :]
    ts = np.arange(0, bulk_wfs.shape[1] - 1, 1)

    # select alpha waveforms
    dlo = 25
    dhi = 200
    tlo = 100
    thi = 400
    blmin = 8500
    blmax = 10000
    alpha_idx = df_hit[etype].loc[(df_hit['dcr_linoff'] > dlo)
                                  & (df_hit['dcr_linoff'] < dhi)
                                  & (df_hit['tp0_50'] > tlo) &
                                  (df_hit['tp0_50'] < thi) &
                                  (df_hit['bl'] > blmin) &
                                  (df_hit['bl'] < blmax)
                                  & (df_hit[etype] < 12000)].index[:nwfs]

    raw_store = lh5.Store()
    tb_name = 'ORSIS3302DecoderForEnergy/raw'
    raw_list = lh5_dir + dg.fileDB['raw_path'] + '/' + dg.fileDB['raw_file']
    f_raw = raw_list.values[0]  # fixme, only works for one file rn
    data_raw, nrows = raw_store.read_object(tb_name,
                                            f_raw,
                                            start_row=0,
                                            n_rows=alpha_idx[-1] + 1)

    alpha_wfs_all = data_raw['waveform']['values'].nda
    alpha_wfs = alpha_wfs_all[alpha_idx.values, :]
    ats = np.arange(0, alpha_wfs.shape[1] - 1, 1)

    # plot wfs
    for iwf in range(bulk_wfs.shape[0]):
        plt.plot(ts,
                 bulk_wfs[iwf, :len(bulk_wfs[iwf]) - 1],
                 lw=1,
                 color='blue',
                 label='Bulk')

    plt.xlabel('time (clock ticks)', ha='right', x=1)
    plt.ylabel('ADC', ha='right', y=1)

    #     # plot alpha wfs
    #     for aiwf in range(alpha_wfs.shape[0]):
    #         plt.plot(ats, alpha_wfs[aiwf,:len(alpha_wfs[aiwf])-1], lw=1, color = 'red', label = 'Alpha')

    #     plt.title('Alpha versus bulk events')
    plt.title('right 351 Wfs run 82')
    plt.xlabel('time (clock ticks)', ha='right', x=1)
    plt.ylabel('ADC', ha='right', y=1)
    plt.xlim(3500, 4500)
    plt.ylim(9100, 10300)
    # plt.legend(loc='upper left')
    # plt.show()
    plt.savefig('./plots/normScan/zoom_350_right_waveforms_run82.png', dpi=300)
Exemplo n.º 17
0
def pole_zero(dg):
    """
    NOTE: I think this result might be wrong, for the CAGE amp it should be
    around 250 usec.  Need to check.
    """
    # load hit data
    lh5_dir = os.path.expandvars(dg.config['lh5_dir'])
    hit_list = lh5_dir + dg.fileDB['hit_path'] + '/' + dg.fileDB['hit_file']
    df_hit = lh5.load_dfs(hit_list, ['trapEmax'],
                          'ORSIS3302DecoderForEnergy/hit')
    df_hit.reset_index(inplace=True)
    rt_min = dg.fileDB['runtime'].sum()
    # print(f'runtime: {rt_min:.2f} min')

    # load waveforms
    etype = 'trapEmax_cal'
    nwfs = 20
    elo, ehi = 1455, 1465

    # select waveforms
    idx = df_hit[etype].loc[(df_hit[etype] >= elo)
                            & (df_hit[etype] <= ehi)].index[:nwfs]
    raw_store = lh5.Store()
    tb_name = 'ORSIS3302DecoderForEnergy/raw'
    raw_list = lh5_dir + dg.fileDB['raw_path'] + '/' + dg.fileDB['raw_file']
    f_raw = raw_list.values[0]  # fixme, only works for one file rn
    data_raw = raw_store.read_object(tb_name,
                                     f_raw,
                                     start_row=0,
                                     n_rows=idx[-1] + 1)

    wfs_all = data_raw['waveform']['values'].nda
    wfs = wfs_all[idx.values, :]
    df_wfs = pd.DataFrame(wfs)
    # print(df_wfs)

    # simple test function to compute pole-zero constant for a few wfs.
    # the final one should become a dsp processor
    clock = 1e8  # 100 MHz
    istart = 5000
    iwinlo, iwinhi, iwid = 500, 2500, 20  # two-point slope
    # ts = np.arange(istart, df_wfs.shape[1]-1, 1) / 1e3 # usec
    ts = np.arange(0, df_wfs.shape[1] - 1 - istart, 1) / 1e3  # usec

    def get_rc(row):
        # two-point method
        wf = row[istart:-1].values
        wflog = np.log(wf)
        win1 = np.mean(np.log(row[istart + iwinlo:istart + iwinlo + iwid]))
        win2 = np.mean(np.log(row[istart + iwinhi:istart + iwinhi + iwid]))
        slope = (win2 - win1) / (ts[iwinhi] - ts[iwinlo])
        tau = 1 / slope

        # # diagnostic plot: check against expo method
        # guess_tau = 60
        # a = wf.max()
        # expdec = lambda x : a * np.exp(-x / guess_tau)
        # logdec = lambda x : np.log(a * np.exp(-x / guess_tau))
        # slopeway = lambda x: wflog[0] + x / tau
        # plt.plot(ts, wflog, '-r', lw=1)
        # plt.plot(ts, logdec(ts), '-b', lw=1)
        # plt.plot(ts, slopeway(ts), '-k', lw=1)
        # plt.show()
        # exit()

        return tau

        # return tau

    res = df_wfs.apply(get_rc, axis=1)

    tau_avg, tau_std = res.mean(), res.std()
    print(f'average RC decay constant: {tau_avg:.2f} pm {tau_std:.2f}')
Exemplo n.º 18
0
def dsp_to_hit(df_row, dg=None, verbose=False, overwrite=False, lowE=False):
    """
    Create hit files from dsp files.  This routine is specific to CAGE but could
    be extended & modified in the future to work for multi-channel data (PGT,
    L200, etc.)
    """
    apply_ecal = True
    apply_tscorr = False  # not needed, should be fixed by the jan 30 2021 re-d2r

    f_dsp = f"{dg.config['dsp_input_dir']}/{df_row['dsp_path']}/{df_row['dsp_file']}"
    f_hit = f"{dg.config['hit_output_dir']}/{df_row['hit_path']}/{df_row['hit_file']}"
    # change output directory if in spec_id 2 mode (ie low-energy calibration to get 60 keV in right place)
    if lowE:
        f_hit = f"{dg.config['hit_output_dir']}/{df_row['hit_path']}/lowE/{df_row['hit_file']}"
        print(f'Writing to low-energy hit file: {f_hit}')
    if verbose:
        print('input:', f_dsp)
        print('output:', f_hit)

    if not overwrite and os.path.exists(f_hit):
        print('file exists, overwrite not set, skipping f_hit:\n   ', f_dsp)
        return

    # get run and cycle for ecalDB lookup.  also apply run selection
    run, cycle = df_row[['run', 'cycle']].astype(int)
    if df_row.skip:
        print(f'Cycle {cycle} has been marked junk, will not process.')
        return

    # create initial 'hit' DataFrame from dsp data
    hit_store = lh5.Store()
    data, n_rows = hit_store.read_object(dg.config['input_table'], f_dsp)
    df_hit = data.get_dataframe()

    # 1. get energy calibration for this run from peakfit
    if apply_ecal:

        # loading the tinydb this way preserves the in-file text formatting
        cal_db = db.TinyDB(storage=MemoryStorage)
        with open(dg.config['ecaldb']) as f:
            raw_db = json.load(f)
            cal_db.storage.write(raw_db)

        # loop over energy estimators of interest
        for etype in dg.config['rawe']:

            # load ecalDB table
            tb = cal_db.table(f'peakfit_{etype}').all()
            df_cal = pd.DataFrame(tb)
            for col in ['run', 'cyclo', 'cychi']:
                df_cal[col] = df_cal[col].astype(int)

            # load cal constants for this cycle
            que = f'run=={run} and cyclo <= {cycle} <= cychi'
            df_run = df_cal.query(que)
            if len(df_run) != 1:
                print('Warning, non-unique query:', que)
                print(df_run)
                exit()

            # figure out the order of the polynomial from column names
            pols = {}
            for col in [c for c in df_run.columns if 'cal' in c]:
                val = parse('cal{p}', col)
                val = val.named  # convert to dict
                iord = int(val['p'])
                pols[iord] = df_run.iloc[0][f'cal{iord}']

            # get the coefficients in descending order for np.poly1d: p2, p1, p0...
            coeffs = []
            for ord, val in sorted(pols.items()):
                coeffs.append([ord, val])
            coeffs = np.array(coeffs)
            coeffs = coeffs[coeffs[:, 0].argsort()[::-1]]  # 2, 1, 0 ...
            coeffs = coeffs[:, 1]

            # apply the calibration to the dataframe
            pfunc = np.poly1d(coeffs)
            df_hit[f'{etype}_cal'] = pfunc(df_hit[f'{etype}'])

    # 2. compute timestamp rollover correction (specific to struck 3302)
    clock = 100e6  # 100 MHz
    if apply_tscorr:
        UINT_MAX = 4294967295  # (0xffffffff)
        t_max = UINT_MAX / clock
        ts = df_hit['timestamp'].values / clock
        tdiff = np.diff(ts)
        tdiff = np.insert(tdiff, 0, 0)
        iwrap = np.where(tdiff < 0)
        iloop = np.append(iwrap[0], len(ts))
        ts_new, t_roll = [], 0
        for i, idx in enumerate(iloop):
            ilo = 0 if i == 0 else iwrap[0][i - 1]
            ihi = idx
            ts_block = ts[ilo:ihi]
            t_last = ts[ilo - 1]
            t_diff = t_max - t_last
            ts_new.append(ts_block + t_roll)
            t_roll += t_last + t_diff
        df_hit['ts_sec'] = np.concatenate(ts_new)
    else:
        # NOTE: may need to subtract off the 1st value here if we find
        # that the timestamp doesn't reset at cycle boundaries.
        df_hit['ts_sec'] = df_hit['timestamp'].values / clock

    # 3. compute global timestamp
    t_start = df_row['startTime']
    if t_start is not None:
        df_hit['ts_glo'] = df_hit['ts_sec'] + t_start

    # write to LH5 file
    if os.path.exists(f_hit):
        os.remove(f_hit)
    sto = lh5.Store()
    tb_name = dg.config['input_table'].replace('dsp', 'hit')
    tb_lh5 = lh5.Table(size=len(df_hit))

    for col in df_hit.columns:
        tb_lh5.add_field(col, lh5.Array(df_hit[col].values,
                                        attrs={'units': ''}))
        if verbose:
            print(col)

    print(f'Writing table: {tb_name} in file:\n   {f_hit}')
    sto.write_object(tb_lh5, tb_name, f_hit)

    if verbose:
        print('Creating diagnostic plots ...')

        # energy
        xlo, xhi, xpb = 0, 3000, 10
        hist, bins, _ = pgh.get_hist(df_hit['trapEftp_cal'],
                                     range=(xlo, xhi),
                                     dx=xpb)
        plt.semilogy(bins[1:], hist, ds='steps', c='b', lw=1)
        plt.xlabel('Energy (keV)', ha='right', x=1)
        plt.ylabel('Counts', ha='right', y=1)
        plt.savefig('./plots/d2h_etest.png')
        print('saved figure: ./plots/d2h_etest.png')
        plt.cla()

        # timestamp
        xv = np.arange(len(df_hit))
        plt.plot(xv, df_hit['ts_sec'], '.b')
        plt.savefig('./plots/d2h_ttest.png')
        print('saved figure: ./plots/d2h_ttest.png')
        plt.cla()

        # exit, don't create + overwrite a million plots
        print(
            'verbose mode of d2h is meant to look at 1 cycle file, exiting...')
        exit()
Exemplo n.º 19
0
def get_resolution():
    """
    """
    # load hit file
    f_hit = '/Users/wisecg/Data/OPPI/hit/oppi_run0_cyc2027_hit.lh5'
    tb_name = 'ORSIS3302DecoderForEnergy/raw'
    sto = lh5.Store()
    groups = sto.ls(f_hit)
    data = sto.read_object(tb_name, f_hit)
    df_hit = data.get_dataframe()

    # load parameters
    e_peak = 1460.8
    etype = 'trapE_cal'
    # etype = 'energy_cal'
    elo, ehi, epb = 1445, 1475, 0.2

    # get histogram
    hE, bins, vE = pgh.get_hist(df_hit[etype], range=(elo, ehi), dx=epb)
    xE = bins[1:]

    # simple numerical width
    i_max = np.argmax(hE)
    h_max = hE[i_max]
    upr_half = xE[(xE > xE[i_max]) & (hE <= h_max / 2)][0]
    bot_half = xE[(xE < xE[i_max]) & (hE >= h_max / 2)][0]
    fwhm = upr_half - bot_half
    sig = fwhm / 2.355

    # # fit to gaussian: amp, mu, sig, bkg
    # amp = h_max * fwhm
    # bg0 = np.mean(hE[:20])
    # x0 = [amp, xE[i_max], sig, bg0]
    # xF, xF_cov = pgf.fit_hist(pgf.gauss_bkg, hE, bins, var=vE, guess=x0)
    # fit_func = pgf.gauss_bkg

    # fit to radford peak: mu, sigma, hstep, htail, tau, bg0, amp
    amp = h_max * fwhm
    hstep = 0.001  # fraction that the step contributes
    htail = 0.1
    tau = 10
    bg0 = np.mean(hE[:20])
    x0 = [xE[i_max], sig, hstep, htail, tau, bg0, amp]
    xF, xF_cov = pgf.fit_hist(pgf.radford_peak, hE, bins, var=vE, guess=x0)
    fit_func = pgf.radford_peak

    xF_err = np.sqrt(np.diag(xF_cov))
    chisq = []
    for i, h in enumerate(hE):
        model = fit_func(xE[i], *xF)
        diff = (model - h)**2 / model
        chisq.append(abs(diff))

    # collect results (for output, should use a dict or DataFrame)
    e_fit = xF[0]
    fwhm_fit = xF[1] * 2.355  #  * e_peak / e_fit
    print(fwhm, fwhm_fit)
    fwhmerr = xF_err[1] * 2.355 * e_peak / e_fit
    rchisq = sum(np.array(chisq) / len(hE))

    # plotting
    plt.plot(xE, hE, ds='steps', c='b', lw=2, label=etype)

    # peak shape
    plt.plot(xE,
             fit_func(xE, *x0),
             '-',
             c='orange',
             alpha=0.5,
             label='init. guess')
    plt.plot(xE, fit_func(xE, *xF), '-r', alpha=0.8, label='peakshape fit')
    plt.plot(np.nan,
             np.nan,
             '-w',
             label=f'mu={e_fit:.1f}, fwhm={fwhm_fit:.2f}')

    plt.xlabel(etype, ha='right', x=1)
    plt.ylabel('Counts', ha='right', y=1)
    plt.legend(loc=2)
    plt.tight_layout()
    # plt.show()
    plt.savefig(f'./plots/resolution_1460_{etype}.pdf')
    plt.cla()
Exemplo n.º 20
0
def show_wfs():
    """
    show low-e waveforms in different enery regions
    """
    f_raw = '/Users/wisecg/Data/OPPI/raw/oppi_run0_cyc2027_raw.lh5'
    f_hit = '/Users/wisecg/Data/OPPI/hit/oppi_run0_cyc2027_hit.lh5'

    # use the hit file to select events
    tb_name = 'ORSIS3302DecoderForEnergy/raw'
    hit_store = lh5.Store()
    data = hit_store.read_object(tb_name, f_hit)
    df_hit = data.get_dataframe()

    # settings
    nwfs = 20
    elo, ehi, epb = 0, 100, 0.2

    # etype = 'energy_cal'  # noise stops @ 18 keV
    # noise_lo, noise_hi, phys_lo, phys_hi = 10, 15, 25, 30

    etype = 'trapE_cal'  # noise stops @ 35 keV
    noise_lo, noise_hi, phys_lo, phys_hi = 25, 30, 40, 45

    # # diagnostic plot
    # hE, bins, vE = pgh.get_hist(df_hit[etype], range=(elo, ehi), dx=epb)
    # xE = bins[1:]
    # plt.plot(xE, hE, c='b', ds='steps')
    # plt.show()
    # exit()

    # select noise and phys events
    idx_noise = df_hit[etype].loc[(df_hit[etype] > noise_lo)
                                  & (df_hit[etype] < noise_hi)].index[:nwfs]

    idx_phys = df_hit[etype].loc[(df_hit[etype] > phys_lo)
                                 & (df_hit[etype] < phys_hi)].index[:nwfs]

    # print(df_hit.loc[idx_noise])
    # print(df_hit.loc[idx_phys])

    # get phys waveforms, normalized by max value
    i_max = max(idx_noise[-1], idx_phys[-1])

    raw_store = lh5.Store()
    data_raw = raw_store.read_object(tb_name,
                                     f_raw,
                                     start_row=0,
                                     n_rows=i_max + 1)

    wfs = data_raw['waveform']['values'].nda
    wfs_noise = wfs[idx_noise.values, :]
    wfs_phys = wfs[idx_phys.values, :]
    ts = np.arange(0, wfs_noise.shape[1], 1)

    # noise wfs
    for iwf in range(wfs_noise.shape[0]):
        plt.plot(ts, wfs_noise[iwf, :], lw=1)

    # # phys wfs
    # for iwf in range(wfs_phys.shape[0]):
    #     plt.plot(ts, wfs_phys[iwf,:], lw=1)

    plt.xlabel('time (clock ticks)', ha='right', x=1)
    plt.ylabel('ADC', ha='right', y=1)
    # plt.show()
    plt.savefig('./plots/noise_wfs.png', dpi=300)
    plt.cla()
Exemplo n.º 21
0
def plot_wfs(run, cycle, etype, user=False, hit=True, cal=True):
    """
    show waveforms in different enery regions.
    use the dsp or hit file to select events
    """
    dg = DataGroup('$CAGE_SW/processing/cage.json', load=True)
    str_query = f'cycle=={cycle} and skip==False'
    dg.fileDB.query(str_query, inplace=True)

    #get runtime, startime, runtype
    runtype_list = np.array(dg.fileDB['runtype'])
    runtype = runtype_list[0]
    rt_min = dg.fileDB['runtime'].sum()
    u_start = dg.fileDB.iloc[0]['startTime']
    t_start = pd.to_datetime(u_start, unit='s')


    # get data and load into df
    lh5_dir = dg.lh5_user_dir if user else dg.lh5_dir
    if cal==True:
        etype_cal = etype + '_cal'

    if hit==True:
        print('Using hit files')
        file_list = lh5_dir + dg.fileDB['hit_path'] + '/' + dg.fileDB['hit_file']
        if run<=117 and cal==True:
            df = lh5.load_dfs(file_list, [f'{etype}', f'{etype_cal}', 'bl','bl_sig','A_10','AoE', 'ts_sec', 'dcr_raw', 'dcr_ftp', 'dcr_max', 'tp_0', 'tp_10', 'tp_90', 'tp_50', 'tp_80', 'tp_max'], 'ORSIS3302DecoderForEnergy/hit')
        elif run>117 and cal==True:
            df = lh5.load_dfs(file_list, [f'{etype}', f'{etype_cal}', 'bl','bl_sig', 'bl_slope', 'lf_max', 'A_10','AoE', 'dcr', 'tp_0', 'tp_10', 'tp_90', 'tp_50', 'tp_80', 'tp_max'], 'ORSIS3302DecoderForEnergy/hit')

        elif run<=117 and cal==False:
            df = lh5.load_dfs(file_list, [f'{etype}', 'bl','bl_sig','A_10','AoE', 'ts_sec', 'dcr_raw', 'dcr_ftp', 'dcr_max', 'tp_0', 'tp_10', 'tp_90', 'tp_50', 'tp_80', 'tp_max'], 'ORSIS3302DecoderForEnergy/hit')
        elif run>117 and cal==False:
            df = lh5.load_dfs(file_list, [f'{etype}', 'bl','bl_sig', 'bl_slope', 'lf_max', 'A_10','AoE', 'dcr', 'tp_0', 'tp_10', 'tp_90', 'tp_50', 'tp_80', 'tp_max'], 'ORSIS3302DecoderForEnergy/hit')

    elif hit==False:
        print('Using dsp files')
        file_list = lh5_dir + dg.fileDB['dsp_path'] + '/' + dg.fileDB['dsp_file']
        if run<=117 and cal==True:
            df = lh5.load_dfs(file_list, [f'{etype}', f'{etype_cal}', 'bl','bl_sig','A_10','AoE', 'ts_sec', 'dcr_raw', 'dcr_ftp', 'dcr_max', 'tp_0', 'tp_10', 'tp_90', 'tp_50', 'tp_80', 'tp_max'], 'ORSIS3302DecoderForEnergy/dsp')
        elif run>117 and cal==True:
            df = lh5.load_dfs(file_list, [f'{etype}', f'{etype_cal}', 'bl','bl_sig', 'bl_slope', 'lf_max', 'A_10','AoE', 'dcr', 'tp_0', 'tp_10', 'tp_90', 'tp_50', 'tp_80', 'tp_max'], 'ORSIS3302DecoderForEnergy/dsp')

        elif run<=117 and cal==False:
            df = lh5.load_dfs(file_list, [f'{etype}', 'bl','bl_sig','A_10','AoE', 'ts_sec', 'dcr_raw', 'dcr_ftp', 'dcr_max', 'tp_0', 'tp_10', 'tp_90', 'tp_50', 'tp_80', 'tp_max'], 'ORSIS3302DecoderForEnergy/dsp')
        elif run>117 and cal==False:
            df = lh5.load_dfs(file_list, [f'{etype}', 'bl','bl_sig', 'bl_slope', 'lf_max', 'A_10','AoE', 'dcr', 'tp_0', 'tp_10', 'tp_90', 'tp_50', 'tp_80', 'tp_max'], 'ORSIS3302DecoderForEnergy/dsp')

    else:
        print('dont know what to do here! need to specify if working with calibrated/uncalibrated data, or dsp/hit files')


    waveforms = []

    n_eranges = 10 #number of steps between lower and higher energy limits
    nwfs= 50 #number of waveforms to average for superpulse
    emin = 500 #lower energy limit
    emax = 15000 #higher energy limit

    eranges = np.linspace(emin, emax, n_eranges) #set up energy slices
    for e in eranges:
        #get events within 1% of energy
        elo = e-(0.01*e)
        ehi = e+(0.01*e)
        idx = df[etype].loc[(df[etype] >= elo) & (df[etype] <= ehi)].index[:nwfs]
        raw_store = lh5.Store()
        tb_name = 'ORSIS3302DecoderForEnergy/raw'
        lh5_dir = dg.lh5_dir
        raw_list = lh5_dir + dg.fileDB['raw_path'] + '/' + dg.fileDB['raw_file']
        f_raw = raw_list.values[0] # fixme, only works for one file rn
        data_raw, nrows = raw_store.read_object(tb_name, f_raw, start_row=0, n_rows=idx[-1]+1)

        wfs_all = (data_raw['waveform']['values']).nda
        wfs = wfs_all[idx.values, :]
        # baseline subtraction
        bl_means = wfs[:,:800].mean(axis=1)
        wf_blsub = (wfs.transpose() - bl_means).transpose()
        ts = np.arange(0, wf_blsub.shape[1]-1, 1)
        super_wf = np.mean(wf_blsub, axis=0)
        wf_max = np.amax(super_wf)
        superpulse = np.divide(super_wf, wf_max)
        waveforms.append(superpulse)

    fig, ax = plt.subplots(figsize=(9,8))
    ax = plt.axes()

    # set up colorbar to plot waveforms of different energies different colors
    colors = plt.cm.viridis(np.linspace(0, 1, n_eranges))
    c = np.arange(0, n_eranges)
    norm = mpl.colors.Normalize(vmin=c.min(), vmax=c.max())
    cmap = mpl.cm.ScalarMappable(norm=norm, cmap=mpl.cm.jet)
    cmap.set_array([])

    for n in range(n_eranges):
        plt.plot(ts, waveforms[n][:len(waveforms[n])-1], c=cmap.to_rgba(n))

    cb = fig.colorbar(cmap, ticks=list(eranges))
    cb.set_label("Energy", ha = 'right', va='center', rotation=270, fontsize=20)
    cb.ax.tick_params(labelsize=18)

#     plt.xlim(3800, 8000)
#     plt.ylim(0.4, 1.01)
    plt.setp(ax.get_xticklabels(), fontsize=16)
    plt.setp(ax.get_yticklabels(), fontsize=16)
    plt.title(f'Waveforms, {emin}-{emax} trapEftp, {n_eranges} steps', fontsize=20)
    plt.xlabel('clock cycles', fontsize=20)
    plt.savefig(f'./plots/angleScan/waveforms/wfs_fallingEdge_cycle{cycle}.png', dpi=300)
Exemplo n.º 22
0
def process_orca(daq_filename,
                 raw_file_pattern,
                 n_max=np.inf,
                 ch_groups_dict=None,
                 verbose=False,
                 buffer_size=1024):
    """
    convert ORCA DAQ data to "raw" lh5

    ch_groups_dict: keyed by decoder_name
    """
    lh5_store = lh5.Store()

    f_in = open_orca(daq_filename)
    if f_in == None:
        print("Couldn't find the file %s" % daq_filename)
        sys.exit(0)

    # parse the header. save the length so we can jump past it later
    reclen, header_nbytes, header_dict = parse_header(daq_filename)

    # figure out the total size
    SEEK_END = 2
    f_in.seek(0, SEEK_END)
    file_size = float(f_in.tell())
    f_in.seek(0, 0)  # rewind
    file_size_MB = file_size / 1e6
    print("Total file size: {:.3f} MB".format(file_size_MB))
    print("Run number:", get_run_number(header_dict))

    # Build the dict used in the inner loop for passing data packets to decoders
    decoders = {}

    # First build a list of all decoder names that might be in the data
    # This is a dict of names keyed off of data_id
    id2dn_dict = get_id_to_decoder_name_dict(header_dict)
    if verbose:
        print("Data IDs present in ORCA file header are:")
        for data_id in id2dn_dict:
            print(f"    {data_id}: {id2dn_dict[data_id]}")

    # Invert the previous list, to get a list of decoder ids keyed off of
    # decoder names
    dn2id_dict = {name: data_id for data_id, name in id2dn_dict.items()}

    # By default we decode all data for which we have decoders. If the user
    # provides a ch_group_dict, we will only decode data from decoders keyed in
    # the dict.
    decode_all_data = True
    decoders_to_run = dn2id_dict.keys()
    if ch_groups_dict is not None:
        decode_all_data = False
        decoders_to_run = ch_groups_dict.keys()

    # Now get the actual requested decoders
    for sub in OrcaDecoder.__subclasses__():
        decoder = sub()  # instantiate the class
        if decoder.decoder_name in decoders_to_run:
            decoder.dataID = dn2id_dict[decoder.decoder_name]
            decoder.set_header_dict(header_dict)
            decoders[decoder.dataID] = decoder
    if len(decoders) == 0:
        print("No decoders. Exiting...")
        sys.exit(1)
    if verbose:
        print("pygama will run these decoders:")
        for data_id, dec in decoders.items():
            print("   ", dec.decoder_name + ", id =", data_id)

    # Now cull the decoders_to_run list
    new_dtr = []
    for decoder_name in decoders_to_run:
        data_id = dn2id_dict[decoder_name]
        if data_id not in decoders.keys():
            print("warning: no decoder exists for", decoder_name,
                  "... will skip its data.")
        else:
            new_dtr.append(decoder_name)
    decoders_to_run = new_dtr

    # prepare ch groups
    if ch_groups_dict is None:
        ch_groups_dict = {}
        for decoder_name in decoders_to_run:
            ch_groups = create_dummy_ch_group()
            ch_groups_dict[decoder_name] = ch_groups
            grp_path_template = f'{decoder_name}/raw'
            set_outputs(ch_groups,
                        out_file_template=raw_file_pattern,
                        grp_path_template=grp_path_template)
    else:
        for decoder_name, ch_groups in ch_groups_dict.items():
            expand_ch_groups(ch_groups)
            set_outputs(ch_groups,
                        out_file_template=raw_file_pattern,
                        grp_path_template='{system}/{group_name}/raw')

    # Set up tables for data
    ch_tables_dict = {}
    for data_id, dec in decoders.items():
        decoder_name = id2dn_dict[data_id]
        ch_groups = ch_groups_dict[decoder_name]
        ch_tables_dict[data_id] = build_tables(ch_groups, buffer_size, dec)
    max_tbl_size = 0

    # -- scan over raw data --
    print("Beginning daq-to-raw processing ...")

    packet_id = 0  # number of events decoded
    unrecognized_data_ids = []

    # skip the header using reclen from before
    # reclen is in number of longs, and we want to skip a number of bytes
    f_in.seek(reclen * 4)

    n_entries = 0
    unit = "B"
    if n_max < np.inf and n_max > 0:
        n_entries = n_max
        unit = "id"
    else:
        n_entries = file_size
    progress_bar = tqdm_range(0,
                              n_entries,
                              text="Processing",
                              verbose=verbose,
                              unit=unit)
    file_position = 0

    # start scanning
    while (packet_id < n_max and f_in.tell() < file_size):
        packet_id += 1

        try:
            packet, data_id = get_next_packet(f_in)
        except EOFError:
            break
        except Exception as e:
            print("Failed to get the next event ... Exception:", e)
            break

        if decode_all_data and data_id not in decoders:
            if data_id not in unrecognized_data_ids:
                unrecognized_data_ids.append(data_id)
            continue

        if data_id not in decoders: continue
        decoder = decoders[data_id]

        # Clear the tables if the next read could overflow them.
        # Only have to check this when the max table size is within
        # max_n_rows_per_packet of being full.
        if max_tbl_size + decoder.max_n_rows_per_packet() >= buffer_size:
            ch_groups = ch_groups_dict[id2dn_dict[data_id]]
            max_tbl_size = 0
            for group_info in ch_groups.values():
                tbl = group_info['table']
                if tbl.is_full():
                    group_path = group_info['group_path']
                    out_file = group_info['out_file']
                    lh5_store.write_object(tbl,
                                           group_path,
                                           out_file,
                                           n_rows=tbl.loc)
                    tbl.clear()
                if tbl.loc > max_tbl_size: max_tbl_size = tbl.loc
        else: max_tbl_size += decoder.max_n_rows_per_packet()

        tables = ch_tables_dict[data_id]
        decoder.decode_packet(packet, tables, packet_id, header_dict)

        if verbose:
            if n_max < np.inf and n_max > 0:
                update_len = 1
            else:
                update_len = f_in.tell() - file_position
                file_position = f_in.tell()
            update_progress(progress_bar, update_len)

    print("Done. Last packet ID:", packet_id)
    f_in.close()

    # final write to file
    for dec_name, ch_groups in ch_groups_dict.items():
        for group_info in ch_groups.values():
            tbl = group_info['table']
            if tbl.loc == 0: continue
            group_path = group_info['group_path']
            out_file = group_info['out_file']
            lh5_store.write_object(tbl, group_path, out_file, n_rows=tbl.loc)
            print('last write')
            tbl.clear()

    if len(unrecognized_data_ids) > 0:
        print("WARNING, Found the following unknown data IDs:")
        for data_id in unrecognized_data_ids:
            print("  {}: {}".format(data_id, id2dn_dict[data_id]))
        print("hopefully they weren't important!\n")

    print("Wrote RAW File:\n    {}\nFILE INFO:".format(raw_file_pattern))
Exemplo n.º 23
0
def raw_to_dsp(f_raw,
               f_dsp,
               dsp_config,
               lh5_tables=None,
               database=None,
               outputs=None,
               n_max=np.inf,
               overwrite=True,
               buffer_len=3200,
               block_width=16,
               verbose=1):
    """
    Uses the ProcessingChain class.
    The list of processors is specifed via a JSON file.
    """
    t_start = time.time()

    if isinstance(dsp_config, str):
        with open(dsp_config, 'r') as config_file:
            dsp_config = json.load(config_file, object_pairs_hook=OrderedDict)

    if not isinstance(dsp_config, dict):
        raise Exception('Error, dsp_config must be an dict')

    raw_store = lh5.Store()
    lh5_file = raw_store.gimme_file(f_raw, 'r')
    if lh5_file is None:
        print(f'raw_to_dsp: input file not found: {f_raw}')
        return
    else:
        print(f'Opened file {f_raw}')

    # if no group is specified, assume we want to decode every table in the file
    if lh5_tables is None:
        lh5_tables = []
        lh5_keys = raw_store.ls(f_raw)

        # sometimes 'raw' is nested, e.g g024/raw
        for tb in lh5_keys:
            if "raw" not in tb:
                tbname = raw_store.ls(lh5_file[tb])[0]
                if "raw" in tbname:
                    tb = tb + '/' + tbname  # g024 + /raw
            lh5_tables.append(tb)

    # make sure every group points to waveforms, if not, remove the group
    for tb in lh5_tables:
        if 'raw' not in tb:
            lh5_tables.remove(tb)
    if len(lh5_tables) == 0:
        print("Empty lh5_tables, exiting...")
        sys.exit(1)

    # get the database parameters. For now, this will just be a dict in a json
    # file, but eventually we will want to interface with the metadata repo
    if isinstance(database, str):
        with open(database, 'r') as db_file:
            database = json.load(db_file)

    if database and not isinstance(database, dict):
        database = None
        print(
            'database is not a valid json file or dict. Using default db values.'
        )

    # clear existing output files
    if overwrite:
        if os.path.isfile(f_dsp):
            if verbose:
                print('Overwriting existing file:', f_dsp)
            os.remove(f_dsp)

    for tb in lh5_tables:
        # load primary table and build processing chain and output table
        tot_n_rows = raw_store.read_n_rows(tb, f_raw)
        if n_max and n_max < tot_n_rows: tot_n_rows = n_max

        chan_name = tb.split('/')[0]
        db_dict = database.get(chan_name) if database else None
        lh5_in, n_rows_read = raw_store.read_object(tb,
                                                    f_raw,
                                                    start_row=0,
                                                    n_rows=buffer_len)
        pc, mask, tb_out = build_processing_chain(lh5_in, dsp_config, db_dict,
                                                  outputs, verbose,
                                                  block_width)

        print(f'Processing table: {tb} ...')

        for start_row in tqdm_range(0, int(tot_n_rows), buffer_len, verbose):
            lh5_in, n_rows = raw_store.read_object(tb,
                                                   f_raw,
                                                   start_row=start_row,
                                                   n_rows=buffer_len,
                                                   field_mask=mask,
                                                   obj_buf=lh5_in)
            n_rows = min(tot_n_rows - start_row, n_rows)
            try:
                pc.execute(0, n_rows)
            except DSPFatal as e:
                # Update the wf_range to reflect the file position
                e.wf_range = "{}-{}".format(e.wf_range[0] + start_row,
                                            e.wf_range[1] + start_row)
                raise e

            raw_store.write_object(tb_out,
                                   tb.replace('/raw', '/dsp'),
                                   f_dsp,
                                   n_rows=n_rows)

        print(f'Done.  Writing to file {f_dsp}')

    # write processing metadata
    dsp_info = lh5.Struct()
    dsp_info.add_field('timestamp', lh5.Scalar(np.uint64(time.time())))
    dsp_info.add_field('python_version', lh5.Scalar(sys.version))
    dsp_info.add_field('numpy_version', lh5.Scalar(np.version.version))
    dsp_info.add_field('h5py_version', lh5.Scalar(h5py.version.version))
    dsp_info.add_field('hdf5_version', lh5.Scalar(h5py.version.hdf5_version))
    dsp_info.add_field('pygama_version', lh5.Scalar(pygama_version))
    dsp_info.add_field('pygama_branch', lh5.Scalar(git.branch))
    dsp_info.add_field('pygama_revision', lh5.Scalar(git.revision))
    dsp_info.add_field('pygama_date', lh5.Scalar(git.commit_date))
    dsp_info.add_field('dsp_config',
                       lh5.Scalar(json.dumps(dsp_config, indent=2)))
    raw_store.write_object(dsp_info, 'dsp_info', f_dsp)

    t_elap = (time.time() - t_start) / 60
    print(f'Done processing.  Time elapsed: {t_elap:.2f} min.')
Exemplo n.º 24
0
def data_cleaning():
    """
    using parameters in the hit file, plot 1d and 2d spectra to find cut values.

    columns in file:
        ['trapE', 'bl', 'bl_sig', 'A_10', 'AoE', 'packet_id', 'ievt', 'energy',
        'energy_first', 'timestamp', 'crate', 'card', 'channel', 'energy_cal',
        'trapE_cal']

    note, 'energy_first' from first value of energy gate.
    """
    i_plot = 3  # run all plots after this number

    f_hit = '/Users/wisecg/Data/OPPI/hit/oppi_run0_cyc2027_hit.lh5'

    tb_name = 'ORSIS3302DecoderForEnergy/raw'
    hit_store = lh5.Store()
    data = hit_store.read_object(tb_name, f_hit)
    df_hit = data.get_dataframe()

    # get info about df -- 'describe' is very convenient
    dsc = df_hit[['bl', 'bl_sig', 'A_10', 'energy_first',
                  'timestamp']].describe()
    # print(dsc)
    # print(dsc.loc['min','bl'])

    # correct energy_first (inplace) to allow negative values
    df_hit['energy_first'] = df_hit['energy_first'].astype(np.int64)
    efirst = df_hit['energy_first'].values
    idx = np.where(efirst > 4e9)
    eshift = efirst[idx] - 4294967295
    efirst[idx] = eshift
    # print(df_hit[['energy','energy_first','bl']])

    if i_plot <= 0:
        # bl vs energy

        elo, ehi, epb = 0, 250, 1
        blo, bhi, bpb = 54700, 61400, 100
        nbx = int((ehi - elo) / epb)
        nby = int((bhi - blo) / bpb)

        h = plt.hist2d(df_hit['trapE_cal'],
                       df_hit['bl'],
                       bins=[nbx, nby],
                       range=[[elo, ehi], [blo, bhi]],
                       cmap='jet')

        cb = plt.colorbar(h[3], ax=plt.gca())
        plt.xlabel('trapE_cal', ha='right', x=1)
        plt.ylabel('bl', ha='right', y=1)
        plt.tight_layout()
        # plt.show()
        plt.savefig('./plots/bl_vs_e.png', dpi=300)
        cb.remove()
        plt.cla()

        # make a formal baseline cut from 1d histogram
        hE, bins, vE = pgh.get_hist(df_hit['bl'], range=(blo, bhi), dx=bpb)
        xE = bins[1:]
        plt.semilogy(xE, hE, c='b', ds='steps')

        bl_cut_lo, bl_cut_hi = 57700, 58500
        plt.axvline(bl_cut_lo, c='r', lw=1)
        plt.axvline(bl_cut_hi, c='r', lw=1)

        plt.xlabel('bl', ha='right', x=1)
        plt.ylabel('counts', ha='right', y=1)
        # plt.show()
        plt.savefig('./plots/bl_cut.pdf')
        plt.cla()

    if i_plot <= 1:
        # energy_first vs. E

        flo, fhi, fpb = -565534, 70000, 1000
        elo, ehi, epb = 0, 250, 1

        nbx = int((ehi - elo) / epb)
        nby = int((fhi - flo) / fpb)

        h = plt.hist2d(df_hit['trapE_cal'],
                       df_hit['energy_first'],
                       bins=[nbx, nby],
                       range=[[elo, ehi], [flo, fhi]],
                       cmap='jet',
                       norm=LogNorm())

        cb = plt.colorbar(h[3], ax=plt.gca())
        plt.xlabel('trapE_cal', ha='right', x=1)
        plt.ylabel('energy_first', ha='right', y=1)
        plt.tight_layout()
        # plt.show()
        plt.savefig('./plots/efirst_vs_e.png', dpi=300)
        cb.remove()
        plt.cla()

        # make a formal baseline cut from 1d histogram
        flo, fhi, fpb = -20000, 20000, 100
        hE, xE, vE = pgh.get_hist(df_hit['energy_first'],
                                  range=(flo, fhi),
                                  dx=fpb)
        xE = xE[1:]
        plt.semilogy(xE, hE, c='b', ds='steps')

        ef_cut_lo, ef_cut_hi = -5000, 4000
        plt.axvline(ef_cut_lo, c='r', lw=1)
        plt.axvline(ef_cut_hi, c='r', lw=1)

        plt.xlabel('energy_first', ha='right', x=1)
        plt.ylabel('counts', ha='right', y=1)
        # plt.show()
        plt.savefig('./plots/efirst_cut.pdf')
        plt.cla()

    if i_plot <= 3:
        # trapE_cal - energy_cal vs trapE_cal

        # use baseline cut
        df_cut = df_hit.query('bl > 57700 and bl < 58500').copy()

        # add new diffE column
        df_cut['diffE'] = df_cut['trapE_cal'] - df_cut['energy_cal']

        elo, ehi, epb = 0, 3000, 1
        dlo, dhi, dpb = -10, 10, 0.1

        nbx = int((ehi - elo) / epb)
        nby = int((dhi - dlo) / dpb)

        h = plt.hist2d(df_cut['trapE_cal'],
                       df_cut['diffE'],
                       bins=[nbx, nby],
                       range=[[elo, ehi], [dlo, dhi]],
                       cmap='jet',
                       norm=LogNorm())

        plt.xlabel('trapE_cal', ha='right', x=1)
        plt.ylabel('diffE (trap-onbd)', ha='right', y=1)
        plt.tight_layout()
        # plt.show()
        plt.savefig('./plots/diffE.png', dpi=300)
        plt.cla()

    if i_plot <= 4:
        # A_10/trapE_cal vs trapE_cal (A/E vs E)

        # i doubt we want to introduce a pulse shape cut at this point,
        # since i'm tuning on bkg data and we don't know a priori what (if any)
        # features the Kr waveforms will have.  also, the efficiency as a
        # function of energy would have to be determined, which is hard.
        # so this is just for fun.

        # use baseline cut
        df_cut = df_hit.query('bl > 57700 and bl < 58500').copy()

        # add new A/E column
        df_cut['aoe'] = df_cut['A_10'] / df_cut['trapE_cal']

        # alo, ahi, apb = -1300, 350, 1
        # elo, ehi, epb = 0, 250, 1
        alo, ahi, apb = -0.5, 5, 0.05
        elo, ehi, epb = 0, 50, 0.2

        nbx = int((ehi - elo) / epb)
        nby = int((ahi - alo) / apb)

        h = plt.hist2d(df_cut['trapE_cal'],
                       df_cut['aoe'],
                       bins=[nbx, nby],
                       range=[[elo, ehi], [alo, ahi]],
                       cmap='jet',
                       norm=LogNorm())

        plt.xlabel('trapE_cal', ha='right', x=1)
        plt.ylabel('A/E', ha='right', y=1)
        plt.tight_layout()
        # plt.show()
        plt.savefig('./plots/aoe_vs_e_lowe.png', dpi=300)
        plt.cla()

    if i_plot <= 5:
        # show effect of cuts on energy spectrum

        # baseline cut and efirst cut are very similar
        df_cut = df_hit.query('bl > 57700 and bl < 58500')
        # df_cut = df_hit.query('energy_first > -5000 and energy_first < 4000')

        etype = 'trapE_cal'
        elo, ehi, epb = 0, 250, 0.5

        # no cuts
        h1, x1, v1 = pgh.get_hist(df_hit[etype], range=(elo, ehi), dx=epb)
        x1 = x1[1:]
        plt.plot(x1, h1, c='k', lw=1, ds='steps', label='raw')

        # baseline cut
        h2, x2, v2 = pgh.get_hist(df_cut[etype], range=(elo, ehi), dx=epb)
        plt.plot(x1, h2, c='b', lw=1, ds='steps', label='bl cut')

        plt.xlabel(etype, ha='right', x=1)
        plt.ylabel('counts', ha='right', y=1)
        plt.legend()
        # plt.show()
        plt.savefig('./plots/cut_spectrum.pdf')
        plt.cla()
Exemplo n.º 25
0
def process_flashcam(daq_file,
                     raw_files,
                     n_max,
                     ch_groups_dict=None,
                     verbose=False,
                     buffer_size=8192,
                     chans=None,
                     f_out=''):
    """
    decode FlashCam data, using the fcutils package to handle file access,
    and the FlashCam DataTaker to save the results and write to output.

    `raw_files` can be a string, or a dict with a label for each file:
      `{'geds':'filename_geds.lh5', 'muvt':'filename_muvt.lh5}`
    """
    import fcutils

    if isinstance(raw_files, str):
        single_output = True
        f_out = raw_files
    elif len(raw_files) == 1:
        single_output = True
        f_out = list(raw_files.values())[0]
    else:
        single_output = False

    fcio = fcutils.fcio(daq_file)

    # set up event decoder
    event_decoder = FlashCamEventDecoder()
    event_decoder.set_file_config(fcio)
    event_tables = {}

    # build ch_groups and set up tables
    ch_groups = None
    if (ch_groups_dict is not None) and ('FlashCamEventDecoder'
                                         in ch_groups_dict):
        # get ch_groups
        ch_groups = ch_groups_dict['FlashCamEventDecoder']
        expand_ch_groups(ch_groups)
    else:
        print('Config not found.  Single-table mode')
        ch_groups = create_dummy_ch_group()

    # set up ch_group-to-output-file-and-group info
    if single_output:
        set_outputs(ch_groups,
                    out_file_template=f_out,
                    grp_path_template='{group_name}/raw')
    else:
        set_outputs(ch_groups,
                    out_file_template=raw_files,
                    grp_path_template='{group_name}/raw')

    # set up tables
    event_tables = build_tables(ch_groups, buffer_size, event_decoder)

    if verbose:
        print('Output group : output file')
        for group_info in ch_groups.values():
            group_path = group_info['group_path']
            out_file = group_info['out_file']
            print(group_path, ':', out_file.split('/')[-1])

    # dictionary with the unique file names as keys
    file_info = dict.fromkeys(
        set(group_info['out_file'] for group_info in ch_groups.values()),
        False)

    # set up status decoder (this is 'auxs' output)
    status_decoder = FlashCamStatusDecoder()
    status_decoder.set_file_config(fcio)
    status_tbl = lh5.Table(buffer_size)
    status_decoder.initialize_lh5_table(status_tbl)
    try:
        status_filename = f_out if single_output else raw_files['auxs']
        config_filename = f_out if single_output else raw_files['auxs']
    except:
        status_filename = "fcio_status"
        config_filename = "fcio_config"

    # Set up the store
    # TODO: add overwrite capability
    lh5_store = lh5.Store()

    # write fcio_config
    fcio_config = event_decoder.get_file_config_struct()
    lh5_store.write_object(fcio_config, 'fcio_config', config_filename)

    # loop over raw data packets
    i_debug = 0
    packet_id = 0
    rc = 1
    bytes_processed = 0
    bytes_per_loop = 0
    file_size = os.path.getsize(daq_file)
    max_numtraces = 0

    unit = "B"
    n_entries = 0
    if n_max < np.inf and n_max > 0:
        n_entries = n_max
        unit = "id"
    else:
        n_entries = file_size
    progress_bar = tqdm_range(0,
                              int(n_entries),
                              text="Processing",
                              verbose=verbose,
                              unit=unit)
    while rc and packet_id < n_max:
        rc = fcio.get_record()

        # Skip non-interesting records
        # FIXME: push to a buffer of skipped packets?
        if rc == 0 or rc == 1 or rc == 2 or rc == 5: continue

        packet_id += 1

        # Status record
        if rc == 4:
            bytes_per_loop = status_decoder.decode_packet(
                fcio, status_tbl, packet_id)
            bytes_processed += bytes_per_loop
            if status_tbl.is_full():
                lh5_store.write_object(status_tbl,
                                       'fcio_status',
                                       status_filename,
                                       n_rows=status_tbl.size)
                status_tbl.clear()

        # Event or SparseEvent record
        if rc == 3 or rc == 6:
            for group_info in ch_groups.values():
                tbl = group_info['table']
                # Check that the tables are large enough
                # TODO: don't need to check this every event, only if sum(numtraces) >= buffer_size
                if tbl.size < fcio.numtraces and fcio.numtraces > max_numtraces:
                    print('warning: tbl.size =', tbl.size,
                          'but fcio.numtraces =', fcio.numtraces)
                    print('may overflow. suggest increasing tbl.size')
                    max_numtraces = fcio.numtraces
                # Pre-emptively clear tables if it might be necessary
                if tbl.size - tbl.loc < fcio.numtraces:  # might overflow
                    group_path = group_info['group_path']
                    out_file = group_info['out_file']
                    lh5_store.write_object(tbl,
                                           group_path,
                                           out_file,
                                           n_rows=tbl.loc)
                    if out_file in file_info: file_info[out_file] = True
                    tbl.clear()

            # Looks okay: just decode
            bytes_per_loop = event_decoder.decode_packet(
                fcio, event_tables, packet_id)
            bytes_processed += bytes_per_loop

            if verbose:
                update_len = 0
                if n_max < np.inf and n_max > 0:
                    update_len = 1
                else:
                    update_len = bytes_per_loop
                update_progress(progress_bar, update_len)

            # i_debug += 1
            # if i_debug == 10:
            #    print("breaking early")
            #    break # debug, deleteme

    # end of loop, write to file once more
    for group_info in ch_groups.values():
        tbl = group_info['table']
        if tbl.loc != 0:
            group_path = group_info['group_path']
            out_file = group_info['out_file']
            lh5_store.write_object(tbl, group_path, out_file, n_rows=tbl.loc)
            if out_file in file_info: file_info[out_file] = True
            tbl.clear()
    if status_tbl.loc != 0:
        lh5_store.write_object(status_tbl,
                               'stat',
                               status_filename,
                               n_rows=status_tbl.loc)
        status_tbl.clear()

    # alert user to any files not actually saved in the end
    for out_file, is_saved in file_info.items():
        if not is_saved:
            print('Not saving file since no data were found:', out_file)

    if verbose:
        print(packet_id, 'packets decoded')

    if len(event_decoder.skipped_channels) > 0:
        print("Warning - daq_to_raw skipped some channels in file")
        if verbose:
            for ch, n in event_decoder.skipped_channels.items():
                print("  ch", ch, ":", n, "hits")

    return bytes_processed
Exemplo n.º 26
0
#!/usr/bin/env python3
import numpy as np
import pygama.lh5 as lh5
import matplotlib.pyplot as plt

# show how to correct for timestamp rollover with the struck 3302,
# and how to calculate the run duration using the dsp file (fastest).

f_dsp = '/Users/wisecg/Data/OPPI/dsp/oppi_run9_cyc2180_dsp.lh5'

sto = lh5.Store()
data = sto.read_object('ORSIS3302DecoderForEnergy/raw', f_dsp)

# correct for timestamp rollover
clock = 100e6 # 100 MHz
UINT_MAX = 4294967295 # (0xffffffff)
t_max = UINT_MAX / clock

# ts = data['timestamp'].nda.astype(np.int64) # has to be signed for np.diff
ts = data['timestamp'].nda / clock # converts to float

tdiff = np.diff(ts)
tdiff = np.insert(tdiff, 0 , 0)
iwrap = np.where(tdiff < 0)
iloop = np.append(iwrap[0], len(ts))

ts_new, t_roll = [], 0
for i, idx in enumerate(iloop):
    ilo = 0 if i==0 else iwrap[0][i-1]
    ihi = idx
    ts_block = ts[ilo:ihi]
Exemplo n.º 27
0
    def __init__(self,
                 files_in,
                 lh5_group,
                 dsp_config=None,
                 database=None,
                 n_drawn=1,
                 x_unit='ns',
                 x_lim=None,
                 waveforms='waveform',
                 wf_styles=None,
                 lines=None,
                 legend=None,
                 legend_opts=None,
                 norm=None,
                 align=None,
                 selection=None,
                 buffer_len=128,
                 block_width=8,
                 verbosity=1):
        """Constructor for WaveformBrowser:
        - file_in: name of file or list of names to browse. Can use wildcards
        - lh5_group: name of LH5 group in file to browse
        - dsp_config (optional): name of DSP config json file containing transforms available to draw
        - database (optional): dict with database of processing parameters
        - n_drawn (default 1): number of events to draw simultaneously when calling DrawNext
        - x_unit (default ns): unit for x-axis
        - x_lim (default auto): range of x-values passes as tuple
        - waveforms (default 'waveform'): name of wf or list of wf names to draw
        - wf_styles (default None): waveform colors and other style parameters to cycle through when drawing waveforms. Can be given as:
            dict of lists: e.g. {'color':['r', 'g', 'b'], 'linestyle':['-', '--', '.']}
            name of predefined style; see matplotlib.style documentation
            None: use current matplotlib style
          If a single style cycle is given, use for all lines; if a list is given, match to waveforms list.
        - lines (default None): name of parameter or list of parameters to draw hlines and vlines for
        - legend (default None): formatting string and values to include in the legend. This can be a list of values (one for each waveform in waveforms). The values can be given as a tuple whose first entry is a formatting string and subsequent entries are the values to place in the formatting string. When building a formatting string, if a name is given in the {}s, it is assumed to be a parameter from the DSP config file. An example is:
          ("{:0.1f} keV", energy)
        - legend_opts (default None): dict containing kwargs for formatting the legend
        - norm (default None): name of parameter (probably energy) to use to normalize WFs; useful when drawing multiple
        - align (default None): name of time parameter to set as 0 time; useful for aligning multiple waveforms
        - selection (optional): selection of events to draw. Can be either a list of event indices or a numpy array mask (ala pandas).
        - buffer_len (default 128): number of waveforms to keep in memory at a time
        - block_width (default 8): block width for processing chain
        """
        self.verbosity = verbosity

        # data i/o initialization
        self.lh5_st = lh5.Store(keep_open=True)
        if isinstance(files_in, str): files_in = [files_in]

        # Expand wildcards and map out the files
        self.lh5_files = [
            f for f_wc in files_in
            for f in sorted(glob.glob(os.path.expandvars(f_wc)))
        ]
        self.lh5_group = lh5_group
        # file map is cumulative lenght of files up to file n. By doing searchsorted left, we can get the file for a given wf index
        self.file_map = np.array(
            [self.lh5_st.read_n_rows(lh5_group, f) for f in self.lh5_files],
            'int64')
        np.cumsum(self.file_map, out=self.file_map)

        # Get the input buffer and read the first chunk
        self.lh5_in = self.lh5_st.get_buffer(self.lh5_group, self.lh5_files[0],
                                             buffer_len)
        self.lh5_st.read_object(self.lh5_group,
                                self.lh5_files[0],
                                start_row=0,
                                n_rows=buffer_len,
                                obj_buf=self.lh5_in)
        self.buffer_len = buffer_len
        self.current_file = None
        self.current_chunk = None

        # initialize stuff for iteration
        self.selection = selection
        self.index_it = None
        self.reset()
        self.n_drawn = n_drawn

        # initialize list of objects to draw
        if isinstance(waveforms, str): self.wf_names = [waveforms]
        elif waveforms is None: self.wf_names = []
        else: self.wf_names = list(waveforms)
        self.wf_data = [[] for _ in self.wf_names]

        # wf_styles
        if isinstance(wf_styles, list) or isinstance(wf_styles, tuple):
            self.wf_styles = [None for _ in self.wf_data]
            for i, sty in enumerate(wf_styles):
                if isinstance(sty, str):
                    try:
                        self.wf_styles[i] = plt.style.library[sty][
                            'axes.prop_cycle']
                    except:
                        self.wf_styles[i] = itertools.repeat(None)
                elif sty is None:
                    self.wf_styles[i] = itertools.repeat(None)
                else:
                    self.wf_styles[i] = cycler(**sty)
        else:
            if isinstance(wf_styles, str):
                try:
                    self.wf_styles = plt.style.library[wf_styles][
                        'axes.prop_cycle']
                except:
                    self.wf_styles = itertools.repeat(None)
            elif wf_styles is None:
                self.wf_styles = itertools.repeat(None)
            else:
                self.wf_styles = cycler(**wf_styles)

        if lines is None: self.line_names = []
        elif isinstance(lines, list): self.line_names = lines
        elif isinstance(lines, tuple): self.line_names = list(lines)
        else: self.line_names = [lines]
        self.line_data = [[] for _ in self.line_names]

        if legend is None: legend = []
        elif not isinstance(legend, list): legend = [legend]

        # Set up the legend format strings and collect input values
        self.legend_input = []
        self.legend_format = []
        for entry in legend:
            legend_input = []
            legend_format = ''
            if not isinstance(entry, tuple):
                entry = (entry, )

            for val in entry:
                if isinstance(val, str):
                    for st, name, form, cv in string.Formatter().parse(val):
                        legend_format += st
                        if name is not None:
                            legend_format += '{'
                            legend_input.append(name)
                            if form is not None and form != '':
                                legend_format += ':' + form
                            if cv is not None and cv != '':
                                legend_format += '!' + cv
                            legend_format += '}'
                else:
                    # find any {}s to fill from the formatter
                    idxs = [
                        i for i, inp in enumerate(legend_input)
                        if isinstance(inp, str) and inp == ''
                    ]
                    if idxs:  # if we found a {}. it's already in the formatter
                        legend_input[idxs[0]] = val
                    else:  # otherwise add to formatter
                        legend_input.append(val)
                        if legend_format != '': legend_format += ', '
                        if isinstance(val, pd.Series):
                            legend_format += val.name + ' = {:.3g}'
                        elif isinstance(val, np.ndarray):
                            legend_format += '{:.3g}'
            self.legend_input.append(legend_input)
            self.legend_format.append(legend_format)

        self.legend_data = [[] for _ in self.legend_input]
        self.legend_kwargs = legend_opts if legend_opts else {}

        self.norm_par = norm
        self.align_par = align

        self.x_unit = units.unit_parser.parse_unit(x_unit)
        self.x_lim = x_lim

        # make processing chain and output buffer
        outputs = self.wf_names + \
                  [name for name in self.line_names if isinstance(name, str)] + \
                  [name for name in self.legend_input  if isinstance(name, str)]
        if isinstance(self.norm_par, str): outputs += [self.norm_par]
        if isinstance(self.align_par, str): outputs += [self.align_par]

        self.proc_chain, self.lh5_out = build_processing_chain(
            self.lh5_in,
            dsp_config,
            db_dict=database,
            outputs=outputs,
            verbosity=self.verbosity,
            block_width=block_width)

        self.fig = None
        self.ax = None
Exemplo n.º 28
0
def dsp_to_hit_cage(f_dsp, f_hit, dg, n_max=None, verbose=False, t_start=None):
    """
    non-general placeholder for creating a pygama 'hit' file.  uses pandas.
    for every file, apply:
    - energy calibration (peakfit results)
    - timestamp correction
    for a more general dsp_to_hit, maybe each function could be given in terms
    of an 'apply' on a dsp dataframe ...
    
    TODO: create entry config['rawe'] with list of energy pars to calibrate, as 
    in energy_cal.py
    """
    rawe = ['trapEmax']

    # create initial 'hit' DataFrame from dsp data
    hit_store = lh5.Store()
    data = hit_store.read_object(dg.config['input_table'], f_dsp)
    df_hit = data.get_dataframe()

    # 1. get energy calibration for this run from peakfit
    cal_db = db.TinyDB(storage=MemoryStorage)
    with open(dg.config['ecaldb']) as f:
        raw_db = json.load(f)
        cal_db.storage.write(raw_db)
    runs = dg.fileDB.run.unique()
    if len(runs) > 1:
        print("sorry, I can't do combined runs yet")
        exit()
    run = runs[0]
    for etype in rawe:
        tb = cal_db.table(f'peakfit_{etype}').all()
        df_cal = pd.DataFrame(tb)
        df_cal['run'] = df_cal['run'].astype(int)
        df_run = df_cal.loc[df_cal.run == run]
        cal_pars = df_run.iloc[0][['cal0', 'cal1', 'cal2']]
        pol = np.poly1d(cal_pars)  # handy numpy polynomial object
        df_hit[f'{etype}_cal'] = pol(df_hit[f'{etype}'])

    # 2. compute timestamp rollover correction (specific to struck 3302)
    clock = 100e6  # 100 MHz
    UINT_MAX = 4294967295  # (0xffffffff)
    t_max = UINT_MAX / clock
    ts = df_hit['timestamp'].values / clock
    tdiff = np.diff(ts)
    tdiff = np.insert(tdiff, 0, 0)
    iwrap = np.where(tdiff < 0)
    iloop = np.append(iwrap[0], len(ts))
    ts_new, t_roll = [], 0
    for i, idx in enumerate(iloop):
        ilo = 0 if i == 0 else iwrap[0][i - 1]
        ihi = idx
        ts_block = ts[ilo:ihi]
        t_last = ts[ilo - 1]
        t_diff = t_max - t_last
        ts_new.append(ts_block + t_roll)
        t_roll += t_last + t_diff
    df_hit['ts_sec'] = np.concatenate(ts_new)

    # 3. compute global timestamp
    if t_start is not None:
        df_hit['ts_glo'] = df_hit['ts_sec'] + t_start

    # write to LH5 file
    if os.path.exists(f_hit):
        os.remove(f_hit)
    sto = lh5.Store()
    tb_name = dg.config['input_table'].replace('dsp', 'hit')
    tb_lh5 = lh5.Table(size=len(df_hit))

    for col in df_hit.columns:
        tb_lh5.add_field(col, lh5.Array(df_hit[col].values,
                                        attrs={'units': ''}))
        print(col)

    print(f'Writing table: {tb_name} in file:\n   {f_hit}')
    sto.write_object(tb_lh5, tb_name, f_hit)
Exemplo n.º 29
0

# set up the figure-of-merit to be computed at each grid point
def fltp_sig_mean(tb_out, verbosity):
    mean = np.average(tb_out['fltp2_sig'].nda)
    if verbosity > 1: print(f'mean: {mean}')
    return mean


# set up the energy selection
energy_name = 'energy'
range_name = '40K_1460'

# loop over detectors
detectors = ['oppi']
store = lh5.Store()
for detector in detectors:
    # get indices for just a selected energy range
    det_db = apdb[detector]
    lh5_group = 'ORSIS3302DecoderForEnergy/raw'
    idx = select_energies(energy_name,
                          range_name,
                          filenames,
                          det_db,
                          lh5_group=lh5_group)

    waveform_name = 'ORSIS3302DecoderForEnergy/raw/waveform/'
    waveforms, _ = store.read_object(waveform_name, filenames, idx=idx)
    print(f'{len(waveforms)} wfs for {detector}')

    # build the table for processing
Exemplo n.º 30
0
def get_superpulses(dfp, dg, f_super):
    """
    calculate average waveforms for each set of pulser data.
    save an output file with the superpulses for further analysis.
    """
    # find this with the show_spectra function above
    # ecal = 1460.8 / 2.005e6 # TODO: find the const for oct 2020
    ecal = 1460.8 / 2.005e6  # works for pulser dataset 2 (dec 2020)

    # more settings
    show_plots = True  # default True
    write_output = True
    nwfs = 1000  # limit number to go fast.  1000 is enough for a good measurement
    tp_align = 0.5  # pct timepoint to align wfs at
    e_window = 20  # plot (in keV) this window around each pulser peak
    n_pre, n_post = 50, 100  # num samples before/after tp_align
    bl_thresh = 10  # allowable baseline ADC deviation

    dsp_name = 'ORSIS3302DecoderForEnergy/dsp'
    raw_name = 'ORSIS3302DecoderForEnergy/raw/waveform'

    sto = lh5.Store()
    t_start = time.time()

    def analyze_pulser_run(df_row):
        """
        loop over each row of dfp and save the superpulse
        """
        epk, rt, vp, cyc = df_row[['E_keV', 'runtime', 'V_pulser', 'cycle']]
        rt *= 60  # sec
        if epk == 0: return []  # skip the bkg run

        # load pulser energies
        f_dsp = dg.lh5_dir + '/' + df_row.dsp_path + '/' + df_row.dsp_file
        pdata = lh5.load_nda([f_dsp], ['energy'], dsp_name)['energy'] * ecal

        # auto-narrow the window around the max pulser peak in two steps
        elo, ehi, epb = epk - 50, epk + 50, 0.5
        pdata_all = pdata[(pdata > elo) & (pdata < ehi)]
        hp, bp, _ = pgh.get_hist(pdata_all, range=(elo, ehi), dx=epb)
        pctr = bp[np.argmax(hp)]

        plo, phi, ppb = pctr - e_window, pctr + e_window, 0.1
        pdata_pk = pdata[(pdata > plo) & (pdata < phi)]
        hp, bp, bpvars = pgh.get_hist(pdata_pk, range=(plo, phi), dx=ppb)
        hp_rt = np.divide(hp, rt)
        hp_var = np.array([np.sqrt(h / (rt)) for h in hp])

        # fit a gaussian to get 1 sigma e-values
        ibin_bkg = 50
        bkg0 = np.mean(hp_rt[:ibin_bkg])
        b, h = bp[1:], hp_rt
        imax = np.argmax(h)
        upr_half = b[np.where((b > b[imax]) & (h <= np.amax(h) / 2))][0]
        bot_half = b[np.where((b < b[imax]) & (h <= np.amax(h) / 2))][-1]
        fwhm = upr_half - bot_half
        sig0 = fwhm / 2.355
        amp0 = np.amax(hp_rt) * fwhm

        # 14 July 2021 Joule changed p_init to use outputs gauss_mode_with_max() b/c fit wasn't
        # working with previous initial guess
        # p_init = [amp0, bp[imax], sig0, bkg0]

        pars, cov = pgf.gauss_mode_width_max(hp, bp, bpvars, n_bins=50)
        p_init = [pars[2], pars[0], pars[1], 1]
        p_fit, p_cov = pgf.fit_hist(pgf.gauss_bkg,
                                    hp,
                                    bp,
                                    var=hp_var,
                                    guess=p_init)
        amp, mu, sigma, bkg = p_fit

        # select events within 1 sigma of the maximum
        # and pull the waveforms from the raw file to make a superpulse.
        idx = np.where((pdata >= mu - sigma) & (pdata <= mu + sigma))
        print(
            f'Pulser at {epk} keV, {len(idx[0])} events.  Limiting to {nwfs}.')
        if len(idx[0]) > nwfs:
            idx = idx[0][:nwfs]

        # grab the 2d numpy array of pulser wfs
        n_rows = idx[-1] + 1  # read up to this event and stop
        f_raw = dg.lh5_dir + '/' + df_row.raw_path + '/' + df_row.raw_file
        tb_wfs, n_wfs = sto.read_object(raw_name, f_raw, n_rows=n_rows)
        pwfs = tb_wfs['values'].nda[idx, :]
        # print(idx, len(idx), pwfs.shape, '\n', pwfs)

        # data cleaning step: remove events with outlier baselines
        bl_means = pwfs[:, :500].mean(axis=1)
        bl_mode = mode(bl_means.astype(int))[0][0]
        bl_ctr = np.subtract(bl_means, bl_mode)
        idx_dc = np.where(np.abs(bl_ctr) < bl_thresh)
        pwfs = pwfs[idx_dc[0], :]
        bl_means = bl_means[idx_dc]
        print(pwfs.shape, bl_means.shape)

        # baseline subtract (trp when leading (not trailing) dim is the same)
        wfs = (pwfs.transpose() - bl_means).transpose()

        # !!!!15 July 2021: Joule commented this out because somehow it makes superpulses 150 instead of 8192 samples!!!!

        # time-align all wfs at their 50% timepoint (tricky!).
        # adapted from pygama/sandbox/old_dsp/[calculators,transforms].py
        # an alternate approach would be to use ProcessingChain here
        # wf_maxes = np.amax(wfs, axis=1)
        # timepoints = np.argmax(wfs >= wf_maxes[:, None]*tp_align, axis=1)
        # wf_idxs = np.zeros([wfs.shape[0], n_pre + n_post], dtype=int)
        # row_idxs = np.zeros_like(wf_idxs)
        # for i, tp in enumerate(timepoints):
        # wf_idxs[i, :] = np.arange(tp - n_pre, tp + n_post)
        # row_idxs[i, :] = i
        # wfs = wfs[row_idxs, wf_idxs]
        # print(f'len wfs: {len(wfs[1])}')

        # take the average to get the superpulse
        superpulse = np.mean(wfs, axis=0)

        # normalize all wfs to the superpulse maximum
        wfmax, tmax = np.amax(superpulse), np.argmax(superpulse)
        superpulse = np.divide(superpulse, wfmax)
        wfs = np.divide(wfs, wfmax)

        # -- plot results --
        if show_plots:
            fig, (p0, p1) = plt.subplots(2, figsize=(7, 8))

            # plot fit result (top), and waveforms + superpulse (bottom)
            xfit = np.arange(plo, phi, ppb * 0.1)
            p0.plot(xfit,
                    pgf.gauss_bkg(xfit, *p_init),
                    '-',
                    c='orange',
                    label='init')
            p0.plot(xfit,
                    pgf.gauss_bkg(xfit, *p_fit),
                    '-',
                    c='red',
                    label='fit')

            # plot 1 sigma window
            p0.axvspan(mu - sigma,
                       mu + sigma,
                       color='m',
                       alpha=0.2,
                       label='1 sigma')

            # plot data
            p0.plot(bp[1:],
                    hp_rt,
                    ds='steps',
                    c='k',
                    lw=1,
                    label=f'{vp:.2f} V')
            p0.set_xlabel(f'onboard energy (keV, c={ecal:.2e})',
                          ha='right',
                          x=1)
            p0.set_ylabel('cts / s', ha='right', y=1)
            p0.legend(fontsize=10)

            # plot individ. wfs
            ts = np.arange(0, len(wfs[0, :]))
            for iwf in range(wfs.shape[0]):
                p1.plot(ts, wfs[iwf, :], '-k', lw=2, alpha=0.5)
            p1.plot(np.nan, np.nan, '-k', label=f'wfs, {epk:.0f} keV')

            # plot superpulse
            p1.plot(ts,
                    superpulse,
                    '-r',
                    lw=2,
                    label=f'superpulse, {vp:.2f} V')

            p1.set_xlabel('time (10 ns)', ha='right', x=1)
            p1.set_ylabel('amplitude', ha='right', y=1)
            p1.legend(fontsize=10)
            # plt.show()
            plt.savefig(f'./plots/superpulse_cyc{cyc}.png', dpi=150)
            plt.cla()

        # save the superpulse to our output file
        print(f'length of superpulse: {len(superpulse)}')
        return superpulse

    dfp['superpulse'] = dfp.apply(analyze_pulser_run, axis=1)

    # drop the duplicated 'run' row before saving
    dfp = dfp.loc[:, ~dfp.columns.duplicated()]
    # print(dfp.columns)
    print(dfp)

    if write_output:
        print('Saving output file: ', f_super)
        dfp.to_hdf(f_super, key='superpulses')

    t_elap = (time.time() - t_start) / 60
    print(f'Done.  Elapsed: {t_elap:.2f} min.')