def show_lowe_wfs(): """ separate function to show really low-e waveforms after the data cleaning cut """ f_raw = '/Users/wisecg/Data/OPPI/raw/oppi_run0_cyc2027_raw.lh5' f_hit = '/Users/wisecg/Data/OPPI/hit/oppi_run0_cyc2027_hit.lh5' tb_name = 'ORSIS3302DecoderForEnergy/raw' hit_store = lh5.Store() data = hit_store.read_object(tb_name, f_hit) df_hit = data.get_dataframe() # correct energy_first (inplace) to allow negative values df_hit['energy_first'] = df_hit['energy_first'].astype(np.int64) efirst = df_hit['energy_first'].values idx = np.where(efirst > 4e9) eshift = efirst[idx] - 4294967295 efirst[idx] = eshift nwfs = 40 elo, ehi, epb = 1, 10, 0.1 blo, bhi = 57700, 58500 # cut values etype = 'trapE_cal' # noise stops @ 35 keV idx_lowe = df_hit[etype].loc[(df_hit[etype] > elo) & (df_hit[etype] < ehi) & (df_hit.bl > blo) & (df_hit.bl < bhi)] idx_lowe = idx_lowe.index[:nwfs] # print(df_hit.loc[idx_lowe]) # get phys waveforms, normalized by max value i_max = idx_lowe[-1] raw_store = lh5.Store() data_raw = raw_store.read_object(tb_name, f_raw, start_row=0, n_rows=i_max + 1) wfs = data_raw['waveform']['values'].nda wfs_lowe = wfs[idx_lowe.values, :] ts = np.arange(0, wfs_lowe.shape[1], 1) # plot wfs for iwf in range(wfs_lowe.shape[0]): plt.plot(ts, wfs_lowe[iwf, :], lw=1, alpha=0.5) plt.xlabel('time (clock ticks)', ha='right', x=1) plt.ylabel('ADC', ha='right', y=1) # plt.show() plt.savefig('./plots/lowe_wfs.png', dpi=300) plt.cla()
def select_energies(energy_name, range_name, filenames, database, lh5_group='', store=None, verbosity=0): """ """ if energy_name not in database: print(f'no energy {energy_name} in database') return None if 'ranges' not in database[energy_name]: print(f'database["{energy_name}"] missing field "ranges"') return None if range_name not in database[energy_name]['ranges']: print(f'no range {range_name} in database["{energy_name}"]["ranges"]') return None E_low = database[energy_name]["ranges"][range_name]["E_low"] E_high = database[energy_name]["ranges"][range_name]["E_high"] print(lh5_group + '/' + energy_name) print(filenames) # print(E_low, E_high) # exit() if store is None: store = lh5.Store() energies, _ = store.read_object(lh5_group + '/' + energy_name, filenames, verbosity=1) return np.where((energies.nda > E_low) & (energies.nda < E_high))
def show_cal_spectrum(): """ """ f_hit = '/Users/wisecg/Data/OPPI/hit/oppi_run0_cyc2027_hit.lh5' tb_name = 'ORSIS3302DecoderForEnergy/raw' sto = lh5.Store() groups = sto.ls(f_hit) data = sto.read_object(tb_name, f_hit) df_hit = data.get_dataframe() print(df_hit) # energy in keV elo, ehi, epb = 0, 3000, 0.5 # choose energy estimator etype = 'energy_cal' # etype = 'trapE_cal' hist, bins, _ = pgh.get_hist(df_hit[etype], range=(elo, ehi), dx=epb) bins = bins[1:] # trim zero bin, not needed with ds='steps' plt.plot(bins, hist, ds='steps', c='b', lw=2, label=etype) plt.xlabel(etype, ha='right', x=1) plt.ylabel('Counts', ha='right', y=1) plt.legend() plt.tight_layout() plt.show()
def main(): """ an example of loading an LH5 DSP file and converting to pandas DataFrame. """ # we will probably make this part simpler in the near future f = '/Users/wisecg/Data/lh5/hades_I02160A_r1_191021T162944_th_HS2_top_psa_dsp.lh5' sto = lh5.Store() groups = sto.ls(f) # the example file only has one group, 'raw' data = sto.read_object('raw', f) df_dsp = data.get_dataframe() # from here, we can use standard pandas to work with data print(df_dsp) # one example: create uncalibrated energy spectrum, # using a pygama helper function to get the histogram elo, ehi, epb = 0, 100000, 10 ene_uncal = df_dsp['trapE'] hist, bins, _ = pgh.get_hist(ene_uncal, range=(elo, ehi), dx=epb) bins = bins[1:] # trim zero bin, not needed with ds='steps' plt.semilogy(bins, hist, ds='steps', c='b', label='trapE') plt.xlabel('trapE', ha='right', x=1) plt.ylabel('Counts', ha='right', y=1) plt.legend() plt.tight_layout() plt.show()
def show_raw_spectrum(): """ show spectrum w/ onbd energy and trapE - get calibration constants for onbd energy and 'trapE' energy - TODO: fit each expected peak and get resolution vs energy """ f_dsp = '/Users/wisecg/Data/OPPI/dsp/oppi_run0_cyc2027_dsp_test.lh5' # we will probably make this part simpler in the near future sto = lh5.Store() groups = sto.ls(f_dsp) data = sto.read_object('ORSIS3302DecoderForEnergy/raw', f_dsp) df_dsp = data.get_dataframe() # from here, we can use standard pandas to work with data print(df_dsp) # elo, ehi, epb, etype = 0, 8e6, 1000, 'energy' # elo, ehi, epb, etype = 0, 8e6, 1000, 'energy' # whole spectrum # elo, ehi, epb, etype = 0, 800000, 1000, 'energy' # < 250 keV elo, ehi, epb, etype = 0, 10000, 10, 'trapE' ene_uncal = df_dsp[etype] hist, bins, _ = pgh.get_hist(ene_uncal, range=(elo, ehi), dx=epb) bins = bins[1:] # trim zero bin, not needed with ds='steps' plt.plot(bins, hist, ds='steps', c='b', lw=2, label=etype) plt.xlabel(etype, ha='right', x=1) plt.ylabel('Counts', ha='right', y=1) plt.legend() plt.tight_layout() plt.show()
def dsp_to_hit(): """ save calibrated energies into the dsp file. this is a good example of adding a column, reading & writing to an LH5 file. """ f_dsp = '/Users/wisecg/Data/OPPI/dsp/oppi_run0_cyc2027_dsp_test.lh5' f_hit = '/Users/wisecg/Data/OPPI/hit/oppi_run0_cyc2027_hit.lh5' sto = lh5.Store() groups = sto.ls(f_dsp) tb_name = 'ORSIS3302DecoderForEnergy/raw' data = sto.read_object(tb_name, f_dsp) df_dsp = data.get_dataframe() # add a new column for each energy estimator of interest for etype in ['energy', 'trapE']: ecal_name = etype + '_cal' pfit = linear_cal(etype) df_dsp[ecal_name] = df_dsp[etype] * pfit[0] + pfit[1] e_cal_lh5 = lh5.Array(df_dsp[ecal_name].values, attrs={'units': 'keV'}) data.add_field(f'{etype}_cal', e_cal_lh5) # write to hit file. delete if exists, LH5 overwrite is broken rn if os.path.exists(f_hit): os.remove(f_hit) sto.write_object(data, tb_name, f_hit)
def get_superpulse(df, dg, cut_str='', nwfs=100, all=False, norm=True): """Create a super-pulse from waveforms passing a cut. Waveforms are first baseline-subtracted. """ if all == True: nwfs = len(df.query(cut_str)) print(f'using all {nwfs} Waveforms passing cut') else: print(f'using first {nwfs} waveforms passing cut') idx = df.query(cut_str).index[:nwfs] raw_store = lh5.Store() tb_name = 'ORSIS3302DecoderForEnergy/raw' lh5_dir = dg.lh5_dir raw_list = lh5_dir + dg.fileDB['raw_path'] + '/' + dg.fileDB['raw_file'] raw_list = raw_list.tolist( ) # right now lh5.store.read_object() only works for lists, so need to convert the pandas object to a list first data_raw, nrows = raw_store.read_object(tb_name, raw_list) wfs_all = (data_raw['waveform']['values']).nda wfs = wfs_all[idx.values, :] # baseline subtraction bl_means = wfs[:, :800].mean(axis=1) wf_blsub = (wfs.transpose() - bl_means).transpose() ts = np.arange(0, wf_blsub.shape[1] - 1, 1) super_wf = np.mean(wf_blsub, axis=0) wf_max = np.amax(super_wf) if norm == True: superpulse = np.divide(super_wf, wf_max) else: superpulse = super_wf return (ts, superpulse)
def power_spectrum(dg): """ plot power spectral density for groups of runs. note. typical cycle files have ~120,000 wfs. """ import scipy.signal as signal view_cols = [ 'runtype', 'run', 'cycle', 'startTime', 'runtime', 'threshold' ] sto = lh5.Store() lh5_dir = os.path.expandvars(dg.config['lh5_dir']) # n_wfs = np.inf # np.inf to select all n_wfs = int(1e3) clk = 100e6 # Hz nseg = 3500 # num baseline samples (cage wfs are usually length 8192) runs = dg.fileDB['run'].unique() # cmap = plt.cm.get_cmap('jet', len(runs)) # iplt = 0 def psd_run(df_run): run = int(df_run.iloc[0]['run']) # print(df_run[view_cols]) tb_name = 'ORSIS3302DecoderForEnergy/raw' raw_list = lh5_dir + df_run['raw_path'] + '/' + df_run['raw_file'] # for now, just grab wfs from the first cycle file. # that should be PLENTY for a power spectrum plot f_raw = raw_list.values[0] data_raw, n_rows = sto.read_object(tb_name, f_raw, start_row=0, n_rows=n_wfs) wfs_all = data_raw['waveform']['values'].nda # wfs = wfs_all[idx.values, :] # can slice them by np array wfs = wfs_all[:, 0:nseg] # baseline only (8192 samples in cage) print(wfs.shape) f, p = signal.welch(wfs, clk, nperseg=nseg) ptot = np.sum(p, axis=0) y = ptot / wfs.shape[0] plt.semilogy(f, y, '-', lw=2, label=f'run {run}') # iplt += 1 # exit() dg.fileDB.groupby(['run']).apply(psd_run) #, iplt) plt.xlabel('Frequency (Hz)', ha='right', x=0.9) plt.ylabel('PSD (ADC^2 / Hz)', ha='right', y=1) plt.legend(loc=1) plt.savefig('./plots/psd_runs.pdf')
def get_runtimes(dg): """ $ ./setup.py --runtime Get the Ge runtime of each cycle file (in seconds). Add a 'ge_runtime' column to the fileDB. Requires the raw LH5 files. """ dg.load_df() # dg.fileDB = dg.fileDB[50:55] # debug only # reset columns of interest new_cols = ['runtime', 'rt_std'] for col in new_cols: if col in dg.fileDB.columns: dg.fileDB.drop(col, axis=1, inplace=True) sto = lh5.Store() t_start = time.time() def runtime_cycle(df_row): # load raw file path (with {these} in it) f_raw = f'{dg.lh5_dir}/{df_row.raw_path}/{df_row.raw_file}' f_raw = f_raw.format_map({'sysn':'geds'}) # always look for Ge f_key = df_row.raw_file.format_map({'sysn':'geds'}) if not os.path.exists(f_raw): # print(f'no Ge data: {f_key}') return pd.Series({'runtime':0, 'rt_std':0}) # for PGT, compare the first three channels (for redundancy) rts = [] ge_groups = sto.ls(f_raw) for ge in ge_groups[:3]: ts = lh5.load_nda([f_raw], ['timestamp'], ge+'/raw/')['timestamp'] rts.append(ts[-1]) # take largest value & compute uncertainty runtime = max(rts) / 60 rt_std = np.std(np.array([rts])) # print(f_key, runtime, rt_std) return pd.Series({'runtime':runtime, 'rt_std':rt_std}) # df_tmp = dg.fileDB.apply(runtime_cycle, axis=1) dg.fileDB[new_cols] = dg.fileDB.progress_apply(runtime_cycle, axis=1) print(f'Done. Time elapsed: {(time.time()-t_start)/60:.2f} mins.') # save to fileDB if everything looks OK print(dg.fileDB) print(dg.fileDB.columns) print('FileDB location:', dg.config['fileDB']) ans = input('Save new fileDB? (y/n) ') if ans.lower() == 'y': dg.save_df(dg.config['fileDB'])
def write_out_garbage(self, filename, group='/', lh5_store=None): if lh5_store is None: lh5_store = lh5.Store() n_rows = self.garbage_table.loc if n_rows == 0: return lh5_store.write_object(self.garbage_table, 'garbage', filename, group, n_rows=n_rows, append=True) self.garbage_table.clear()
def get_wfs(df, dg, cut_str='', nwfs=10, all=False): """Get waveforms passing a cut, baseline-subtracted but not normalized. These are individual waveforms, not superpulses! """ all_nwfs = len(df.query(cut_str).copy()) print(f'{all_nwfs} passing cuts') if all == True: nwfs = len(df.query(cut_str).copy()) print(f'using all {nwfs} Waveforms passing cut') else: print(f'using first {nwfs} waveforms passing cut') if all_nwfs < nwfs: print( f'Less than the specified number of waveforms ({nwfs}) passing cuts. \nUsing all {all_nwfs} waveforms passing cut' ) nwfs = all_nwfs idx = df.query(cut_str).copy().index[:nwfs] raw_store = lh5.Store() tb_name = 'ORSIS3302DecoderForEnergy/raw' lh5_dir = dg.lh5_dir raw_list = lh5_dir + dg.fileDB['raw_path'] + '/' + dg.fileDB['raw_file'] raw_list = raw_list.tolist( ) # right now lh5.store.read_object() only works for lists, so need to convert the pandas object to a list first data_raw, nrows = raw_store.read_object(tb_name, raw_list) wfs_all = (data_raw['waveform']['values']).nda wfs = wfs_all[idx.values, :] # baseline subtraction bl_means = wfs[:, :800].mean(axis=1) wf_blsub = (wfs.transpose() - bl_means).transpose() ts = np.arange(0, wf_blsub.shape[1] - 1, 1) return (ts, wf_blsub)
def show_groups(): """ show example of accessing the names of the HDF5 groups in our LH5 files """ f_raw = '/Users/wisecg/Data/OPPI/raw/oppi_run0_cyc2027_raw.lh5' f_dsp = '/Users/wisecg/Data/OPPI/dsp/oppi_run0_cyc2027_dsp_test.lh5' f_hit = '/Users/wisecg/Data/OPPI/hit/oppi_run0_cyc2027_hit.lh5' # h5py method # hf = h5py.File(f_raw) # hf = h5py.File(f_dsp) # some examples of navigating the groups # print(hf.keys()) # print(hf['ORSIS3302DecoderForEnergy/raw'].keys()) # print(hf['ORSIS3302DecoderForEnergy/raw/waveform'].keys()) # exit() # lh5 method sto = lh5.Store() groups = sto.ls(f_dsp) data = sto.read_object('ORSIS3302DecoderForEnergy/raw', f_dsp) # testing -- make sure data columns all have same shape for col in data.keys(): print(col, data[col].nda.shape) # directly access timestamps in a raw file w/o loading all the wfs # groups = sto.ls(f_raw, 'ORSIS3302DecoderForEnergy/raw/') # data = sto.read_object('ORSIS3302DecoderForEnergy/raw/timestamp', f_raw) # ts = data.nda # check pandas conversion df_dsp = data.get_dataframe() print(df_dsp.columns) print(df_dsp)
def get_runtimes(dg): """ Requires DSP files. compute runtime (# minutes in run) and stopTime (unix timestamp) using the timestamps in the dsp file. """ write_output = True df_keys = pd.read_hdf(dg.config['fileDB']) # clear new colums if they exist new_cols = ['stopTime', 'runtime'] for col in new_cols: if col in df_keys.columns: df_keys.drop(col, axis=1, inplace=True) sto = lh5.Store() def get_runtime(df_row): # load timestamps from dsp file f_dsp = dg.lh5_dir + df_row['dsp_path'] + '/' + df_row['dsp_file'] data = sto.read_object('ORSIS3302DecoderForEnergy/dsp', f_dsp) # correct for timestamp rollover clock = 100e6 # 100 MHz UINT_MAX = 4294967295 # (0xffffffff) t_max = UINT_MAX / clock # ts = data['timestamp'].nda.astype(np.int64) # must be signed for np.diff ts = data['timestamp'].nda / clock # converts to float tdiff = np.diff(ts) tdiff = np.insert(tdiff, 0, 0) iwrap = np.where(tdiff < 0) iloop = np.append(iwrap[0], len(ts)) ts_new, t_roll = [], 0 for i, idx in enumerate(iloop): ilo = 0 if i == 0 else iwrap[0][i - 1] ihi = idx ts_block = ts[ilo:ihi] t_last = ts[ilo - 1] t_diff = t_max - t_last ts_new.append(ts_block + t_roll) t_roll += t_last + t_diff ts_corr = np.concatenate(ts_new) # calculate runtime and unix stopTime rt = ts_corr[-1] / 60 # minutes st = int(np.ceil(df_row['startTime'] + rt * 60)) return pd.Series({'stopTime': st, 'runtime': rt}) df_tmp = df_keys.progress_apply(get_runtime, axis=1) df_keys[new_cols] = df_tmp print(df_keys) if write_output: df_keys.to_hdf(dg.config['fileDB'], key='file_keys') print(f"Wrote output file: {dg.config['fileDB']}")
def get_runtimes(dg, overwrite=False, batch_mode=False): """ $ ./setup.py --rt Compute runtime (# minutes in run) and stopTime (unix timestamp) using the timestamps in the DSP file. NOTE: Could change this to use the raw file timestamps instead of dsp file, but that still makes this function dependent on a processing step. NOTE: CAGE uses struck channel 2 (0-indexed) """ print('Scanning DSP files for runtimes ...') # load existing fileDB dg.load_df() # first-time setup if 'runtime' not in dg.fileDB.columns or overwrite: df_keys = dg.fileDB.copy() update_existing = False print('Re-scanning entire fileDB') elif 'runtime' in dg.fileDB.columns: # look for any rows with nans to update idx = dg.fileDB.loc[pd.isna(dg.fileDB['runtime']), :].index if len(idx) > 0: df_keys = dg.fileDB.loc[idx].copy() print(f'Found {len(df_keys)} new files without runtime:') print(df_keys) update_existing = True else: print('No empty runtime values found.') if len(df_keys) == 0: print('No files to update. Exiting...') exit() # clear new colums if they exist new_cols = ['stopTime', 'runtime'] for col in new_cols: if col in df_keys.columns: df_keys.drop(col, axis=1, inplace=True) sto = lh5.Store() def get_runtime(df_row): # load timestamps from dsp file f_dsp = dg.lh5_dir + df_row['dsp_path'] + '/' + df_row['dsp_file'] if not os.path.exists(f_dsp) and not df_row.skip: print(f"Error, file doesn't exist:\n {f_dsp}") print( f"Warning, proceeding anyway -- this can mess up your fileDB") # exit() # careful! return pd.Series({'stopTime': 0, 'runtime': 0}) elif df_row.skip: print(f'Skipping cycle file:\n {f_dsp}') return pd.Series({'stopTime': 0, 'runtime': 0}) data, n_rows = sto.read_object('ORSIS3302DecoderForEnergy/dsp', f_dsp) # correct for timestamp rollover clock = 100e6 # 100 MHz UINT_MAX = 4294967295 # (0xffffffff) t_max = UINT_MAX / clock # ts = data['timestamp'].nda.astype(np.int64) # must be signed for np.diff ts = data['timestamp'].nda / clock # converts to float tdiff = np.diff(ts) tdiff = np.insert(tdiff, 0, 0) iwrap = np.where(tdiff < 0) iloop = np.append(iwrap[0], len(ts)) ts_new, t_roll = [], 0 for i, idx in enumerate(iloop): ilo = 0 if i == 0 else iwrap[0][i - 1] ihi = idx ts_block = ts[ilo:ihi] t_last = ts[ilo - 1] t_diff = t_max - t_last ts_new.append(ts_block + t_roll) t_roll += t_last + t_diff ts_corr = np.concatenate(ts_new) # calculate runtime and unix stopTime rt = ts_corr[-1] / 60 # minutes st = int(np.ceil(df_row['startTime'] + rt * 60)) return pd.Series({'stopTime': st, 'runtime': rt}) df_tmp = df_keys.progress_apply(get_runtime, axis=1) df_keys[new_cols] = df_tmp if update_existing: idx = dg.fileDB.loc[pd.isna(dg.fileDB['runtime']), :].index dg.fileDB.loc[idx] = df_keys else: dg.fileDB = df_keys dbg_cols = ['run', 'cycle', 'unique_key', 'startTime', 'runtime'] print(dg.fileDB[dbg_cols]) print('Ready to save. This will overwrite any existing fileDB.') if not batch_mode: ans = input('Save updated fileDB? (y/n):') if ans.lower() == 'y': dg.fileDB = df_keys dg.save_df(os.path.expandvars(dg.config['fileDB'])) print('fileDB updated.') else: dg.fileDB = df_keys dg.save_df(os.path.expandvars(dg.config['fileDB'])) print('fileDB updated.')
def process_ttree(root_files, raw_file=None, n_max=None, config=None, verbose=False, buffer_size=1024, chans=None, tree_name='MGTree'): # Load up the tree (or trees) ch = ROOT.TChain(tree_name) if isinstance(root_files, str): ch.Add(root_files) else: for root_file in raw_files: ch.Add(raw_file) dec = MGDODecoder(buffer_size) lh5_st = lh5.Store() if not raw_file: raw_file = root_files.replace('.root', '.lh5') tables = {} # map from detector channel to output table n_tot = 0 # total waveforms # loop through MGTEvents in ttree for event in ch: # loop through waveforms in event for i_wf in range(event.event.GetNWaveforms()): # Get digitizer data, waveform and auxwaveform (if applicable) dd = event.event.GetDigitizerData(i_wf) wf = event.event.GetWaveform(i_wf) auxwf = event.event.GetAuxWaveform( i_wf) if event.event.GetAuxWaveformArrayStatus() else None # Get the output table for this channel tb = tables.get(dd.GetID(), None) if not tb: if verbose: print('Create table for channel', dd.GetID()) tb = dec.get_table(dd, wf, auxwf) tables[dd.GetID()] = tb i_chan = tb.loc dec.read_waveform(tb, dd, wf, auxwf) # write table if it is full tb.push_row() if tb.is_full(): lh5_st.write_object(tb, 'g{:04d}/raw'.format(dd.GetID()), raw_file, n_rows=tb.loc) tb.clear() n_tot += 1 # check if we have hit n_wf limit. Note that we always include all WFs in an event, which can result in including a few extra waveforms if n_max and n_tot >= n_max: break # Fill remaining events for each table for channel, tb in tables.items(): if verbose: print('Wrote to', 'g{:04d}/raw'.format(channel), 'in', raw_file) lh5_st.write_object(tb, 'g{:04d}/raw'.format(channel), raw_file, n_rows=tb.loc) tb.clear()
def show_wfs(dg): """ show waveforms in different enery regions. use the hit file to select events """ # get file list and load hit data lh5_dir = dg.lh5_user_dir #if user else dg.lh5_dir hit_list = lh5_dir + dg.fileDB['hit_path'] + '/' + dg.fileDB['hit_file'] df_hit = lh5.load_dfs( hit_list, ['trapEmax', 'trapEmax_cal', 'bl', 'AoE', 'dcr_raw', 'tp_0', 'tp_50'], 'ORSIS3302DecoderForEnergy/hit') # print(df_hit) # print(df_hit.columns) # settings # etype = 'trapEmax' etype = 'trapEmax_cal' nwfs = 20 #creat new DCR const = 0.0555 df_hit['dcr_linoff'] = df_hit['dcr_raw'] + const * df_hit['trapEmax'] #create 0-50 df_hit['tp0_50'] = df_hit['tp_50'] - df_hit['tp_0'] # elo, ehi, epb = 0, 100, 0.2 # low-e region # elo, ehi, epb = 0, 20, 0.2 # noise region elo, ehi, epb = 351, 355, 1 # 351 peak, cal # elo, ehi, epb = 1452, 1468, 1 # good physics events # elo, ehi, epb = 7100, 7200, 1 # good physics events, uncal # elo, ehi, epb = 6175, 6250, 1 # overflow peak # elo, ehi, epb = 5000, 5200, 0.2 # lower overflow peak # # diagnostic plot # hE, xE, vE = pgh.get_hist(df_hit[etype], range=(elo, ehi), dx=epb) # plt.plot(xE[1:], hE, c='b', ds='steps') # plt.show() # exit() # select bulk waveforms idx = df_hit[etype].loc[(df_hit[etype] >= elo) & (df_hit[etype] <= ehi)].index[:nwfs] raw_store = lh5.Store() tb_name = 'ORSIS3302DecoderForEnergy/raw' lh5_dir = dg.lh5_dir raw_list = lh5_dir + dg.fileDB['raw_path'] + '/' + dg.fileDB['raw_file'] f_raw = raw_list.values[0] # fixme, only works for one file rn data_raw, nrows = raw_store.read_object(tb_name, f_raw, start_row=0, n_rows=idx[-1] + 1) bulk_wfs_all = (data_raw['waveform']['values']).nda bulk_wfs = bulk_wfs_all[idx.values, :] ts = np.arange(0, bulk_wfs.shape[1] - 1, 1) # select alpha waveforms dlo = 25 dhi = 200 tlo = 100 thi = 400 blmin = 8500 blmax = 10000 alpha_idx = df_hit[etype].loc[(df_hit['dcr_linoff'] > dlo) & (df_hit['dcr_linoff'] < dhi) & (df_hit['tp0_50'] > tlo) & (df_hit['tp0_50'] < thi) & (df_hit['bl'] > blmin) & (df_hit['bl'] < blmax) & (df_hit[etype] < 12000)].index[:nwfs] raw_store = lh5.Store() tb_name = 'ORSIS3302DecoderForEnergy/raw' raw_list = lh5_dir + dg.fileDB['raw_path'] + '/' + dg.fileDB['raw_file'] f_raw = raw_list.values[0] # fixme, only works for one file rn data_raw, nrows = raw_store.read_object(tb_name, f_raw, start_row=0, n_rows=alpha_idx[-1] + 1) alpha_wfs_all = data_raw['waveform']['values'].nda alpha_wfs = alpha_wfs_all[alpha_idx.values, :] ats = np.arange(0, alpha_wfs.shape[1] - 1, 1) # plot wfs for iwf in range(bulk_wfs.shape[0]): plt.plot(ts, bulk_wfs[iwf, :len(bulk_wfs[iwf]) - 1], lw=1, color='blue', label='Bulk') plt.xlabel('time (clock ticks)', ha='right', x=1) plt.ylabel('ADC', ha='right', y=1) # # plot alpha wfs # for aiwf in range(alpha_wfs.shape[0]): # plt.plot(ats, alpha_wfs[aiwf,:len(alpha_wfs[aiwf])-1], lw=1, color = 'red', label = 'Alpha') # plt.title('Alpha versus bulk events') plt.title('right 351 Wfs run 82') plt.xlabel('time (clock ticks)', ha='right', x=1) plt.ylabel('ADC', ha='right', y=1) plt.xlim(3500, 4500) plt.ylim(9100, 10300) # plt.legend(loc='upper left') # plt.show() plt.savefig('./plots/normScan/zoom_350_right_waveforms_run82.png', dpi=300)
def pole_zero(dg): """ NOTE: I think this result might be wrong, for the CAGE amp it should be around 250 usec. Need to check. """ # load hit data lh5_dir = os.path.expandvars(dg.config['lh5_dir']) hit_list = lh5_dir + dg.fileDB['hit_path'] + '/' + dg.fileDB['hit_file'] df_hit = lh5.load_dfs(hit_list, ['trapEmax'], 'ORSIS3302DecoderForEnergy/hit') df_hit.reset_index(inplace=True) rt_min = dg.fileDB['runtime'].sum() # print(f'runtime: {rt_min:.2f} min') # load waveforms etype = 'trapEmax_cal' nwfs = 20 elo, ehi = 1455, 1465 # select waveforms idx = df_hit[etype].loc[(df_hit[etype] >= elo) & (df_hit[etype] <= ehi)].index[:nwfs] raw_store = lh5.Store() tb_name = 'ORSIS3302DecoderForEnergy/raw' raw_list = lh5_dir + dg.fileDB['raw_path'] + '/' + dg.fileDB['raw_file'] f_raw = raw_list.values[0] # fixme, only works for one file rn data_raw = raw_store.read_object(tb_name, f_raw, start_row=0, n_rows=idx[-1] + 1) wfs_all = data_raw['waveform']['values'].nda wfs = wfs_all[idx.values, :] df_wfs = pd.DataFrame(wfs) # print(df_wfs) # simple test function to compute pole-zero constant for a few wfs. # the final one should become a dsp processor clock = 1e8 # 100 MHz istart = 5000 iwinlo, iwinhi, iwid = 500, 2500, 20 # two-point slope # ts = np.arange(istart, df_wfs.shape[1]-1, 1) / 1e3 # usec ts = np.arange(0, df_wfs.shape[1] - 1 - istart, 1) / 1e3 # usec def get_rc(row): # two-point method wf = row[istart:-1].values wflog = np.log(wf) win1 = np.mean(np.log(row[istart + iwinlo:istart + iwinlo + iwid])) win2 = np.mean(np.log(row[istart + iwinhi:istart + iwinhi + iwid])) slope = (win2 - win1) / (ts[iwinhi] - ts[iwinlo]) tau = 1 / slope # # diagnostic plot: check against expo method # guess_tau = 60 # a = wf.max() # expdec = lambda x : a * np.exp(-x / guess_tau) # logdec = lambda x : np.log(a * np.exp(-x / guess_tau)) # slopeway = lambda x: wflog[0] + x / tau # plt.plot(ts, wflog, '-r', lw=1) # plt.plot(ts, logdec(ts), '-b', lw=1) # plt.plot(ts, slopeway(ts), '-k', lw=1) # plt.show() # exit() return tau # return tau res = df_wfs.apply(get_rc, axis=1) tau_avg, tau_std = res.mean(), res.std() print(f'average RC decay constant: {tau_avg:.2f} pm {tau_std:.2f}')
def dsp_to_hit(df_row, dg=None, verbose=False, overwrite=False, lowE=False): """ Create hit files from dsp files. This routine is specific to CAGE but could be extended & modified in the future to work for multi-channel data (PGT, L200, etc.) """ apply_ecal = True apply_tscorr = False # not needed, should be fixed by the jan 30 2021 re-d2r f_dsp = f"{dg.config['dsp_input_dir']}/{df_row['dsp_path']}/{df_row['dsp_file']}" f_hit = f"{dg.config['hit_output_dir']}/{df_row['hit_path']}/{df_row['hit_file']}" # change output directory if in spec_id 2 mode (ie low-energy calibration to get 60 keV in right place) if lowE: f_hit = f"{dg.config['hit_output_dir']}/{df_row['hit_path']}/lowE/{df_row['hit_file']}" print(f'Writing to low-energy hit file: {f_hit}') if verbose: print('input:', f_dsp) print('output:', f_hit) if not overwrite and os.path.exists(f_hit): print('file exists, overwrite not set, skipping f_hit:\n ', f_dsp) return # get run and cycle for ecalDB lookup. also apply run selection run, cycle = df_row[['run', 'cycle']].astype(int) if df_row.skip: print(f'Cycle {cycle} has been marked junk, will not process.') return # create initial 'hit' DataFrame from dsp data hit_store = lh5.Store() data, n_rows = hit_store.read_object(dg.config['input_table'], f_dsp) df_hit = data.get_dataframe() # 1. get energy calibration for this run from peakfit if apply_ecal: # loading the tinydb this way preserves the in-file text formatting cal_db = db.TinyDB(storage=MemoryStorage) with open(dg.config['ecaldb']) as f: raw_db = json.load(f) cal_db.storage.write(raw_db) # loop over energy estimators of interest for etype in dg.config['rawe']: # load ecalDB table tb = cal_db.table(f'peakfit_{etype}').all() df_cal = pd.DataFrame(tb) for col in ['run', 'cyclo', 'cychi']: df_cal[col] = df_cal[col].astype(int) # load cal constants for this cycle que = f'run=={run} and cyclo <= {cycle} <= cychi' df_run = df_cal.query(que) if len(df_run) != 1: print('Warning, non-unique query:', que) print(df_run) exit() # figure out the order of the polynomial from column names pols = {} for col in [c for c in df_run.columns if 'cal' in c]: val = parse('cal{p}', col) val = val.named # convert to dict iord = int(val['p']) pols[iord] = df_run.iloc[0][f'cal{iord}'] # get the coefficients in descending order for np.poly1d: p2, p1, p0... coeffs = [] for ord, val in sorted(pols.items()): coeffs.append([ord, val]) coeffs = np.array(coeffs) coeffs = coeffs[coeffs[:, 0].argsort()[::-1]] # 2, 1, 0 ... coeffs = coeffs[:, 1] # apply the calibration to the dataframe pfunc = np.poly1d(coeffs) df_hit[f'{etype}_cal'] = pfunc(df_hit[f'{etype}']) # 2. compute timestamp rollover correction (specific to struck 3302) clock = 100e6 # 100 MHz if apply_tscorr: UINT_MAX = 4294967295 # (0xffffffff) t_max = UINT_MAX / clock ts = df_hit['timestamp'].values / clock tdiff = np.diff(ts) tdiff = np.insert(tdiff, 0, 0) iwrap = np.where(tdiff < 0) iloop = np.append(iwrap[0], len(ts)) ts_new, t_roll = [], 0 for i, idx in enumerate(iloop): ilo = 0 if i == 0 else iwrap[0][i - 1] ihi = idx ts_block = ts[ilo:ihi] t_last = ts[ilo - 1] t_diff = t_max - t_last ts_new.append(ts_block + t_roll) t_roll += t_last + t_diff df_hit['ts_sec'] = np.concatenate(ts_new) else: # NOTE: may need to subtract off the 1st value here if we find # that the timestamp doesn't reset at cycle boundaries. df_hit['ts_sec'] = df_hit['timestamp'].values / clock # 3. compute global timestamp t_start = df_row['startTime'] if t_start is not None: df_hit['ts_glo'] = df_hit['ts_sec'] + t_start # write to LH5 file if os.path.exists(f_hit): os.remove(f_hit) sto = lh5.Store() tb_name = dg.config['input_table'].replace('dsp', 'hit') tb_lh5 = lh5.Table(size=len(df_hit)) for col in df_hit.columns: tb_lh5.add_field(col, lh5.Array(df_hit[col].values, attrs={'units': ''})) if verbose: print(col) print(f'Writing table: {tb_name} in file:\n {f_hit}') sto.write_object(tb_lh5, tb_name, f_hit) if verbose: print('Creating diagnostic plots ...') # energy xlo, xhi, xpb = 0, 3000, 10 hist, bins, _ = pgh.get_hist(df_hit['trapEftp_cal'], range=(xlo, xhi), dx=xpb) plt.semilogy(bins[1:], hist, ds='steps', c='b', lw=1) plt.xlabel('Energy (keV)', ha='right', x=1) plt.ylabel('Counts', ha='right', y=1) plt.savefig('./plots/d2h_etest.png') print('saved figure: ./plots/d2h_etest.png') plt.cla() # timestamp xv = np.arange(len(df_hit)) plt.plot(xv, df_hit['ts_sec'], '.b') plt.savefig('./plots/d2h_ttest.png') print('saved figure: ./plots/d2h_ttest.png') plt.cla() # exit, don't create + overwrite a million plots print( 'verbose mode of d2h is meant to look at 1 cycle file, exiting...') exit()
def get_resolution(): """ """ # load hit file f_hit = '/Users/wisecg/Data/OPPI/hit/oppi_run0_cyc2027_hit.lh5' tb_name = 'ORSIS3302DecoderForEnergy/raw' sto = lh5.Store() groups = sto.ls(f_hit) data = sto.read_object(tb_name, f_hit) df_hit = data.get_dataframe() # load parameters e_peak = 1460.8 etype = 'trapE_cal' # etype = 'energy_cal' elo, ehi, epb = 1445, 1475, 0.2 # get histogram hE, bins, vE = pgh.get_hist(df_hit[etype], range=(elo, ehi), dx=epb) xE = bins[1:] # simple numerical width i_max = np.argmax(hE) h_max = hE[i_max] upr_half = xE[(xE > xE[i_max]) & (hE <= h_max / 2)][0] bot_half = xE[(xE < xE[i_max]) & (hE >= h_max / 2)][0] fwhm = upr_half - bot_half sig = fwhm / 2.355 # # fit to gaussian: amp, mu, sig, bkg # amp = h_max * fwhm # bg0 = np.mean(hE[:20]) # x0 = [amp, xE[i_max], sig, bg0] # xF, xF_cov = pgf.fit_hist(pgf.gauss_bkg, hE, bins, var=vE, guess=x0) # fit_func = pgf.gauss_bkg # fit to radford peak: mu, sigma, hstep, htail, tau, bg0, amp amp = h_max * fwhm hstep = 0.001 # fraction that the step contributes htail = 0.1 tau = 10 bg0 = np.mean(hE[:20]) x0 = [xE[i_max], sig, hstep, htail, tau, bg0, amp] xF, xF_cov = pgf.fit_hist(pgf.radford_peak, hE, bins, var=vE, guess=x0) fit_func = pgf.radford_peak xF_err = np.sqrt(np.diag(xF_cov)) chisq = [] for i, h in enumerate(hE): model = fit_func(xE[i], *xF) diff = (model - h)**2 / model chisq.append(abs(diff)) # collect results (for output, should use a dict or DataFrame) e_fit = xF[0] fwhm_fit = xF[1] * 2.355 # * e_peak / e_fit print(fwhm, fwhm_fit) fwhmerr = xF_err[1] * 2.355 * e_peak / e_fit rchisq = sum(np.array(chisq) / len(hE)) # plotting plt.plot(xE, hE, ds='steps', c='b', lw=2, label=etype) # peak shape plt.plot(xE, fit_func(xE, *x0), '-', c='orange', alpha=0.5, label='init. guess') plt.plot(xE, fit_func(xE, *xF), '-r', alpha=0.8, label='peakshape fit') plt.plot(np.nan, np.nan, '-w', label=f'mu={e_fit:.1f}, fwhm={fwhm_fit:.2f}') plt.xlabel(etype, ha='right', x=1) plt.ylabel('Counts', ha='right', y=1) plt.legend(loc=2) plt.tight_layout() # plt.show() plt.savefig(f'./plots/resolution_1460_{etype}.pdf') plt.cla()
def show_wfs(): """ show low-e waveforms in different enery regions """ f_raw = '/Users/wisecg/Data/OPPI/raw/oppi_run0_cyc2027_raw.lh5' f_hit = '/Users/wisecg/Data/OPPI/hit/oppi_run0_cyc2027_hit.lh5' # use the hit file to select events tb_name = 'ORSIS3302DecoderForEnergy/raw' hit_store = lh5.Store() data = hit_store.read_object(tb_name, f_hit) df_hit = data.get_dataframe() # settings nwfs = 20 elo, ehi, epb = 0, 100, 0.2 # etype = 'energy_cal' # noise stops @ 18 keV # noise_lo, noise_hi, phys_lo, phys_hi = 10, 15, 25, 30 etype = 'trapE_cal' # noise stops @ 35 keV noise_lo, noise_hi, phys_lo, phys_hi = 25, 30, 40, 45 # # diagnostic plot # hE, bins, vE = pgh.get_hist(df_hit[etype], range=(elo, ehi), dx=epb) # xE = bins[1:] # plt.plot(xE, hE, c='b', ds='steps') # plt.show() # exit() # select noise and phys events idx_noise = df_hit[etype].loc[(df_hit[etype] > noise_lo) & (df_hit[etype] < noise_hi)].index[:nwfs] idx_phys = df_hit[etype].loc[(df_hit[etype] > phys_lo) & (df_hit[etype] < phys_hi)].index[:nwfs] # print(df_hit.loc[idx_noise]) # print(df_hit.loc[idx_phys]) # get phys waveforms, normalized by max value i_max = max(idx_noise[-1], idx_phys[-1]) raw_store = lh5.Store() data_raw = raw_store.read_object(tb_name, f_raw, start_row=0, n_rows=i_max + 1) wfs = data_raw['waveform']['values'].nda wfs_noise = wfs[idx_noise.values, :] wfs_phys = wfs[idx_phys.values, :] ts = np.arange(0, wfs_noise.shape[1], 1) # noise wfs for iwf in range(wfs_noise.shape[0]): plt.plot(ts, wfs_noise[iwf, :], lw=1) # # phys wfs # for iwf in range(wfs_phys.shape[0]): # plt.plot(ts, wfs_phys[iwf,:], lw=1) plt.xlabel('time (clock ticks)', ha='right', x=1) plt.ylabel('ADC', ha='right', y=1) # plt.show() plt.savefig('./plots/noise_wfs.png', dpi=300) plt.cla()
def plot_wfs(run, cycle, etype, user=False, hit=True, cal=True): """ show waveforms in different enery regions. use the dsp or hit file to select events """ dg = DataGroup('$CAGE_SW/processing/cage.json', load=True) str_query = f'cycle=={cycle} and skip==False' dg.fileDB.query(str_query, inplace=True) #get runtime, startime, runtype runtype_list = np.array(dg.fileDB['runtype']) runtype = runtype_list[0] rt_min = dg.fileDB['runtime'].sum() u_start = dg.fileDB.iloc[0]['startTime'] t_start = pd.to_datetime(u_start, unit='s') # get data and load into df lh5_dir = dg.lh5_user_dir if user else dg.lh5_dir if cal==True: etype_cal = etype + '_cal' if hit==True: print('Using hit files') file_list = lh5_dir + dg.fileDB['hit_path'] + '/' + dg.fileDB['hit_file'] if run<=117 and cal==True: df = lh5.load_dfs(file_list, [f'{etype}', f'{etype_cal}', 'bl','bl_sig','A_10','AoE', 'ts_sec', 'dcr_raw', 'dcr_ftp', 'dcr_max', 'tp_0', 'tp_10', 'tp_90', 'tp_50', 'tp_80', 'tp_max'], 'ORSIS3302DecoderForEnergy/hit') elif run>117 and cal==True: df = lh5.load_dfs(file_list, [f'{etype}', f'{etype_cal}', 'bl','bl_sig', 'bl_slope', 'lf_max', 'A_10','AoE', 'dcr', 'tp_0', 'tp_10', 'tp_90', 'tp_50', 'tp_80', 'tp_max'], 'ORSIS3302DecoderForEnergy/hit') elif run<=117 and cal==False: df = lh5.load_dfs(file_list, [f'{etype}', 'bl','bl_sig','A_10','AoE', 'ts_sec', 'dcr_raw', 'dcr_ftp', 'dcr_max', 'tp_0', 'tp_10', 'tp_90', 'tp_50', 'tp_80', 'tp_max'], 'ORSIS3302DecoderForEnergy/hit') elif run>117 and cal==False: df = lh5.load_dfs(file_list, [f'{etype}', 'bl','bl_sig', 'bl_slope', 'lf_max', 'A_10','AoE', 'dcr', 'tp_0', 'tp_10', 'tp_90', 'tp_50', 'tp_80', 'tp_max'], 'ORSIS3302DecoderForEnergy/hit') elif hit==False: print('Using dsp files') file_list = lh5_dir + dg.fileDB['dsp_path'] + '/' + dg.fileDB['dsp_file'] if run<=117 and cal==True: df = lh5.load_dfs(file_list, [f'{etype}', f'{etype_cal}', 'bl','bl_sig','A_10','AoE', 'ts_sec', 'dcr_raw', 'dcr_ftp', 'dcr_max', 'tp_0', 'tp_10', 'tp_90', 'tp_50', 'tp_80', 'tp_max'], 'ORSIS3302DecoderForEnergy/dsp') elif run>117 and cal==True: df = lh5.load_dfs(file_list, [f'{etype}', f'{etype_cal}', 'bl','bl_sig', 'bl_slope', 'lf_max', 'A_10','AoE', 'dcr', 'tp_0', 'tp_10', 'tp_90', 'tp_50', 'tp_80', 'tp_max'], 'ORSIS3302DecoderForEnergy/dsp') elif run<=117 and cal==False: df = lh5.load_dfs(file_list, [f'{etype}', 'bl','bl_sig','A_10','AoE', 'ts_sec', 'dcr_raw', 'dcr_ftp', 'dcr_max', 'tp_0', 'tp_10', 'tp_90', 'tp_50', 'tp_80', 'tp_max'], 'ORSIS3302DecoderForEnergy/dsp') elif run>117 and cal==False: df = lh5.load_dfs(file_list, [f'{etype}', 'bl','bl_sig', 'bl_slope', 'lf_max', 'A_10','AoE', 'dcr', 'tp_0', 'tp_10', 'tp_90', 'tp_50', 'tp_80', 'tp_max'], 'ORSIS3302DecoderForEnergy/dsp') else: print('dont know what to do here! need to specify if working with calibrated/uncalibrated data, or dsp/hit files') waveforms = [] n_eranges = 10 #number of steps between lower and higher energy limits nwfs= 50 #number of waveforms to average for superpulse emin = 500 #lower energy limit emax = 15000 #higher energy limit eranges = np.linspace(emin, emax, n_eranges) #set up energy slices for e in eranges: #get events within 1% of energy elo = e-(0.01*e) ehi = e+(0.01*e) idx = df[etype].loc[(df[etype] >= elo) & (df[etype] <= ehi)].index[:nwfs] raw_store = lh5.Store() tb_name = 'ORSIS3302DecoderForEnergy/raw' lh5_dir = dg.lh5_dir raw_list = lh5_dir + dg.fileDB['raw_path'] + '/' + dg.fileDB['raw_file'] f_raw = raw_list.values[0] # fixme, only works for one file rn data_raw, nrows = raw_store.read_object(tb_name, f_raw, start_row=0, n_rows=idx[-1]+1) wfs_all = (data_raw['waveform']['values']).nda wfs = wfs_all[idx.values, :] # baseline subtraction bl_means = wfs[:,:800].mean(axis=1) wf_blsub = (wfs.transpose() - bl_means).transpose() ts = np.arange(0, wf_blsub.shape[1]-1, 1) super_wf = np.mean(wf_blsub, axis=0) wf_max = np.amax(super_wf) superpulse = np.divide(super_wf, wf_max) waveforms.append(superpulse) fig, ax = plt.subplots(figsize=(9,8)) ax = plt.axes() # set up colorbar to plot waveforms of different energies different colors colors = plt.cm.viridis(np.linspace(0, 1, n_eranges)) c = np.arange(0, n_eranges) norm = mpl.colors.Normalize(vmin=c.min(), vmax=c.max()) cmap = mpl.cm.ScalarMappable(norm=norm, cmap=mpl.cm.jet) cmap.set_array([]) for n in range(n_eranges): plt.plot(ts, waveforms[n][:len(waveforms[n])-1], c=cmap.to_rgba(n)) cb = fig.colorbar(cmap, ticks=list(eranges)) cb.set_label("Energy", ha = 'right', va='center', rotation=270, fontsize=20) cb.ax.tick_params(labelsize=18) # plt.xlim(3800, 8000) # plt.ylim(0.4, 1.01) plt.setp(ax.get_xticklabels(), fontsize=16) plt.setp(ax.get_yticklabels(), fontsize=16) plt.title(f'Waveforms, {emin}-{emax} trapEftp, {n_eranges} steps', fontsize=20) plt.xlabel('clock cycles', fontsize=20) plt.savefig(f'./plots/angleScan/waveforms/wfs_fallingEdge_cycle{cycle}.png', dpi=300)
def process_orca(daq_filename, raw_file_pattern, n_max=np.inf, ch_groups_dict=None, verbose=False, buffer_size=1024): """ convert ORCA DAQ data to "raw" lh5 ch_groups_dict: keyed by decoder_name """ lh5_store = lh5.Store() f_in = open_orca(daq_filename) if f_in == None: print("Couldn't find the file %s" % daq_filename) sys.exit(0) # parse the header. save the length so we can jump past it later reclen, header_nbytes, header_dict = parse_header(daq_filename) # figure out the total size SEEK_END = 2 f_in.seek(0, SEEK_END) file_size = float(f_in.tell()) f_in.seek(0, 0) # rewind file_size_MB = file_size / 1e6 print("Total file size: {:.3f} MB".format(file_size_MB)) print("Run number:", get_run_number(header_dict)) # Build the dict used in the inner loop for passing data packets to decoders decoders = {} # First build a list of all decoder names that might be in the data # This is a dict of names keyed off of data_id id2dn_dict = get_id_to_decoder_name_dict(header_dict) if verbose: print("Data IDs present in ORCA file header are:") for data_id in id2dn_dict: print(f" {data_id}: {id2dn_dict[data_id]}") # Invert the previous list, to get a list of decoder ids keyed off of # decoder names dn2id_dict = {name: data_id for data_id, name in id2dn_dict.items()} # By default we decode all data for which we have decoders. If the user # provides a ch_group_dict, we will only decode data from decoders keyed in # the dict. decode_all_data = True decoders_to_run = dn2id_dict.keys() if ch_groups_dict is not None: decode_all_data = False decoders_to_run = ch_groups_dict.keys() # Now get the actual requested decoders for sub in OrcaDecoder.__subclasses__(): decoder = sub() # instantiate the class if decoder.decoder_name in decoders_to_run: decoder.dataID = dn2id_dict[decoder.decoder_name] decoder.set_header_dict(header_dict) decoders[decoder.dataID] = decoder if len(decoders) == 0: print("No decoders. Exiting...") sys.exit(1) if verbose: print("pygama will run these decoders:") for data_id, dec in decoders.items(): print(" ", dec.decoder_name + ", id =", data_id) # Now cull the decoders_to_run list new_dtr = [] for decoder_name in decoders_to_run: data_id = dn2id_dict[decoder_name] if data_id not in decoders.keys(): print("warning: no decoder exists for", decoder_name, "... will skip its data.") else: new_dtr.append(decoder_name) decoders_to_run = new_dtr # prepare ch groups if ch_groups_dict is None: ch_groups_dict = {} for decoder_name in decoders_to_run: ch_groups = create_dummy_ch_group() ch_groups_dict[decoder_name] = ch_groups grp_path_template = f'{decoder_name}/raw' set_outputs(ch_groups, out_file_template=raw_file_pattern, grp_path_template=grp_path_template) else: for decoder_name, ch_groups in ch_groups_dict.items(): expand_ch_groups(ch_groups) set_outputs(ch_groups, out_file_template=raw_file_pattern, grp_path_template='{system}/{group_name}/raw') # Set up tables for data ch_tables_dict = {} for data_id, dec in decoders.items(): decoder_name = id2dn_dict[data_id] ch_groups = ch_groups_dict[decoder_name] ch_tables_dict[data_id] = build_tables(ch_groups, buffer_size, dec) max_tbl_size = 0 # -- scan over raw data -- print("Beginning daq-to-raw processing ...") packet_id = 0 # number of events decoded unrecognized_data_ids = [] # skip the header using reclen from before # reclen is in number of longs, and we want to skip a number of bytes f_in.seek(reclen * 4) n_entries = 0 unit = "B" if n_max < np.inf and n_max > 0: n_entries = n_max unit = "id" else: n_entries = file_size progress_bar = tqdm_range(0, n_entries, text="Processing", verbose=verbose, unit=unit) file_position = 0 # start scanning while (packet_id < n_max and f_in.tell() < file_size): packet_id += 1 try: packet, data_id = get_next_packet(f_in) except EOFError: break except Exception as e: print("Failed to get the next event ... Exception:", e) break if decode_all_data and data_id not in decoders: if data_id not in unrecognized_data_ids: unrecognized_data_ids.append(data_id) continue if data_id not in decoders: continue decoder = decoders[data_id] # Clear the tables if the next read could overflow them. # Only have to check this when the max table size is within # max_n_rows_per_packet of being full. if max_tbl_size + decoder.max_n_rows_per_packet() >= buffer_size: ch_groups = ch_groups_dict[id2dn_dict[data_id]] max_tbl_size = 0 for group_info in ch_groups.values(): tbl = group_info['table'] if tbl.is_full(): group_path = group_info['group_path'] out_file = group_info['out_file'] lh5_store.write_object(tbl, group_path, out_file, n_rows=tbl.loc) tbl.clear() if tbl.loc > max_tbl_size: max_tbl_size = tbl.loc else: max_tbl_size += decoder.max_n_rows_per_packet() tables = ch_tables_dict[data_id] decoder.decode_packet(packet, tables, packet_id, header_dict) if verbose: if n_max < np.inf and n_max > 0: update_len = 1 else: update_len = f_in.tell() - file_position file_position = f_in.tell() update_progress(progress_bar, update_len) print("Done. Last packet ID:", packet_id) f_in.close() # final write to file for dec_name, ch_groups in ch_groups_dict.items(): for group_info in ch_groups.values(): tbl = group_info['table'] if tbl.loc == 0: continue group_path = group_info['group_path'] out_file = group_info['out_file'] lh5_store.write_object(tbl, group_path, out_file, n_rows=tbl.loc) print('last write') tbl.clear() if len(unrecognized_data_ids) > 0: print("WARNING, Found the following unknown data IDs:") for data_id in unrecognized_data_ids: print(" {}: {}".format(data_id, id2dn_dict[data_id])) print("hopefully they weren't important!\n") print("Wrote RAW File:\n {}\nFILE INFO:".format(raw_file_pattern))
def raw_to_dsp(f_raw, f_dsp, dsp_config, lh5_tables=None, database=None, outputs=None, n_max=np.inf, overwrite=True, buffer_len=3200, block_width=16, verbose=1): """ Uses the ProcessingChain class. The list of processors is specifed via a JSON file. """ t_start = time.time() if isinstance(dsp_config, str): with open(dsp_config, 'r') as config_file: dsp_config = json.load(config_file, object_pairs_hook=OrderedDict) if not isinstance(dsp_config, dict): raise Exception('Error, dsp_config must be an dict') raw_store = lh5.Store() lh5_file = raw_store.gimme_file(f_raw, 'r') if lh5_file is None: print(f'raw_to_dsp: input file not found: {f_raw}') return else: print(f'Opened file {f_raw}') # if no group is specified, assume we want to decode every table in the file if lh5_tables is None: lh5_tables = [] lh5_keys = raw_store.ls(f_raw) # sometimes 'raw' is nested, e.g g024/raw for tb in lh5_keys: if "raw" not in tb: tbname = raw_store.ls(lh5_file[tb])[0] if "raw" in tbname: tb = tb + '/' + tbname # g024 + /raw lh5_tables.append(tb) # make sure every group points to waveforms, if not, remove the group for tb in lh5_tables: if 'raw' not in tb: lh5_tables.remove(tb) if len(lh5_tables) == 0: print("Empty lh5_tables, exiting...") sys.exit(1) # get the database parameters. For now, this will just be a dict in a json # file, but eventually we will want to interface with the metadata repo if isinstance(database, str): with open(database, 'r') as db_file: database = json.load(db_file) if database and not isinstance(database, dict): database = None print( 'database is not a valid json file or dict. Using default db values.' ) # clear existing output files if overwrite: if os.path.isfile(f_dsp): if verbose: print('Overwriting existing file:', f_dsp) os.remove(f_dsp) for tb in lh5_tables: # load primary table and build processing chain and output table tot_n_rows = raw_store.read_n_rows(tb, f_raw) if n_max and n_max < tot_n_rows: tot_n_rows = n_max chan_name = tb.split('/')[0] db_dict = database.get(chan_name) if database else None lh5_in, n_rows_read = raw_store.read_object(tb, f_raw, start_row=0, n_rows=buffer_len) pc, mask, tb_out = build_processing_chain(lh5_in, dsp_config, db_dict, outputs, verbose, block_width) print(f'Processing table: {tb} ...') for start_row in tqdm_range(0, int(tot_n_rows), buffer_len, verbose): lh5_in, n_rows = raw_store.read_object(tb, f_raw, start_row=start_row, n_rows=buffer_len, field_mask=mask, obj_buf=lh5_in) n_rows = min(tot_n_rows - start_row, n_rows) try: pc.execute(0, n_rows) except DSPFatal as e: # Update the wf_range to reflect the file position e.wf_range = "{}-{}".format(e.wf_range[0] + start_row, e.wf_range[1] + start_row) raise e raw_store.write_object(tb_out, tb.replace('/raw', '/dsp'), f_dsp, n_rows=n_rows) print(f'Done. Writing to file {f_dsp}') # write processing metadata dsp_info = lh5.Struct() dsp_info.add_field('timestamp', lh5.Scalar(np.uint64(time.time()))) dsp_info.add_field('python_version', lh5.Scalar(sys.version)) dsp_info.add_field('numpy_version', lh5.Scalar(np.version.version)) dsp_info.add_field('h5py_version', lh5.Scalar(h5py.version.version)) dsp_info.add_field('hdf5_version', lh5.Scalar(h5py.version.hdf5_version)) dsp_info.add_field('pygama_version', lh5.Scalar(pygama_version)) dsp_info.add_field('pygama_branch', lh5.Scalar(git.branch)) dsp_info.add_field('pygama_revision', lh5.Scalar(git.revision)) dsp_info.add_field('pygama_date', lh5.Scalar(git.commit_date)) dsp_info.add_field('dsp_config', lh5.Scalar(json.dumps(dsp_config, indent=2))) raw_store.write_object(dsp_info, 'dsp_info', f_dsp) t_elap = (time.time() - t_start) / 60 print(f'Done processing. Time elapsed: {t_elap:.2f} min.')
def data_cleaning(): """ using parameters in the hit file, plot 1d and 2d spectra to find cut values. columns in file: ['trapE', 'bl', 'bl_sig', 'A_10', 'AoE', 'packet_id', 'ievt', 'energy', 'energy_first', 'timestamp', 'crate', 'card', 'channel', 'energy_cal', 'trapE_cal'] note, 'energy_first' from first value of energy gate. """ i_plot = 3 # run all plots after this number f_hit = '/Users/wisecg/Data/OPPI/hit/oppi_run0_cyc2027_hit.lh5' tb_name = 'ORSIS3302DecoderForEnergy/raw' hit_store = lh5.Store() data = hit_store.read_object(tb_name, f_hit) df_hit = data.get_dataframe() # get info about df -- 'describe' is very convenient dsc = df_hit[['bl', 'bl_sig', 'A_10', 'energy_first', 'timestamp']].describe() # print(dsc) # print(dsc.loc['min','bl']) # correct energy_first (inplace) to allow negative values df_hit['energy_first'] = df_hit['energy_first'].astype(np.int64) efirst = df_hit['energy_first'].values idx = np.where(efirst > 4e9) eshift = efirst[idx] - 4294967295 efirst[idx] = eshift # print(df_hit[['energy','energy_first','bl']]) if i_plot <= 0: # bl vs energy elo, ehi, epb = 0, 250, 1 blo, bhi, bpb = 54700, 61400, 100 nbx = int((ehi - elo) / epb) nby = int((bhi - blo) / bpb) h = plt.hist2d(df_hit['trapE_cal'], df_hit['bl'], bins=[nbx, nby], range=[[elo, ehi], [blo, bhi]], cmap='jet') cb = plt.colorbar(h[3], ax=plt.gca()) plt.xlabel('trapE_cal', ha='right', x=1) plt.ylabel('bl', ha='right', y=1) plt.tight_layout() # plt.show() plt.savefig('./plots/bl_vs_e.png', dpi=300) cb.remove() plt.cla() # make a formal baseline cut from 1d histogram hE, bins, vE = pgh.get_hist(df_hit['bl'], range=(blo, bhi), dx=bpb) xE = bins[1:] plt.semilogy(xE, hE, c='b', ds='steps') bl_cut_lo, bl_cut_hi = 57700, 58500 plt.axvline(bl_cut_lo, c='r', lw=1) plt.axvline(bl_cut_hi, c='r', lw=1) plt.xlabel('bl', ha='right', x=1) plt.ylabel('counts', ha='right', y=1) # plt.show() plt.savefig('./plots/bl_cut.pdf') plt.cla() if i_plot <= 1: # energy_first vs. E flo, fhi, fpb = -565534, 70000, 1000 elo, ehi, epb = 0, 250, 1 nbx = int((ehi - elo) / epb) nby = int((fhi - flo) / fpb) h = plt.hist2d(df_hit['trapE_cal'], df_hit['energy_first'], bins=[nbx, nby], range=[[elo, ehi], [flo, fhi]], cmap='jet', norm=LogNorm()) cb = plt.colorbar(h[3], ax=plt.gca()) plt.xlabel('trapE_cal', ha='right', x=1) plt.ylabel('energy_first', ha='right', y=1) plt.tight_layout() # plt.show() plt.savefig('./plots/efirst_vs_e.png', dpi=300) cb.remove() plt.cla() # make a formal baseline cut from 1d histogram flo, fhi, fpb = -20000, 20000, 100 hE, xE, vE = pgh.get_hist(df_hit['energy_first'], range=(flo, fhi), dx=fpb) xE = xE[1:] plt.semilogy(xE, hE, c='b', ds='steps') ef_cut_lo, ef_cut_hi = -5000, 4000 plt.axvline(ef_cut_lo, c='r', lw=1) plt.axvline(ef_cut_hi, c='r', lw=1) plt.xlabel('energy_first', ha='right', x=1) plt.ylabel('counts', ha='right', y=1) # plt.show() plt.savefig('./plots/efirst_cut.pdf') plt.cla() if i_plot <= 3: # trapE_cal - energy_cal vs trapE_cal # use baseline cut df_cut = df_hit.query('bl > 57700 and bl < 58500').copy() # add new diffE column df_cut['diffE'] = df_cut['trapE_cal'] - df_cut['energy_cal'] elo, ehi, epb = 0, 3000, 1 dlo, dhi, dpb = -10, 10, 0.1 nbx = int((ehi - elo) / epb) nby = int((dhi - dlo) / dpb) h = plt.hist2d(df_cut['trapE_cal'], df_cut['diffE'], bins=[nbx, nby], range=[[elo, ehi], [dlo, dhi]], cmap='jet', norm=LogNorm()) plt.xlabel('trapE_cal', ha='right', x=1) plt.ylabel('diffE (trap-onbd)', ha='right', y=1) plt.tight_layout() # plt.show() plt.savefig('./plots/diffE.png', dpi=300) plt.cla() if i_plot <= 4: # A_10/trapE_cal vs trapE_cal (A/E vs E) # i doubt we want to introduce a pulse shape cut at this point, # since i'm tuning on bkg data and we don't know a priori what (if any) # features the Kr waveforms will have. also, the efficiency as a # function of energy would have to be determined, which is hard. # so this is just for fun. # use baseline cut df_cut = df_hit.query('bl > 57700 and bl < 58500').copy() # add new A/E column df_cut['aoe'] = df_cut['A_10'] / df_cut['trapE_cal'] # alo, ahi, apb = -1300, 350, 1 # elo, ehi, epb = 0, 250, 1 alo, ahi, apb = -0.5, 5, 0.05 elo, ehi, epb = 0, 50, 0.2 nbx = int((ehi - elo) / epb) nby = int((ahi - alo) / apb) h = plt.hist2d(df_cut['trapE_cal'], df_cut['aoe'], bins=[nbx, nby], range=[[elo, ehi], [alo, ahi]], cmap='jet', norm=LogNorm()) plt.xlabel('trapE_cal', ha='right', x=1) plt.ylabel('A/E', ha='right', y=1) plt.tight_layout() # plt.show() plt.savefig('./plots/aoe_vs_e_lowe.png', dpi=300) plt.cla() if i_plot <= 5: # show effect of cuts on energy spectrum # baseline cut and efirst cut are very similar df_cut = df_hit.query('bl > 57700 and bl < 58500') # df_cut = df_hit.query('energy_first > -5000 and energy_first < 4000') etype = 'trapE_cal' elo, ehi, epb = 0, 250, 0.5 # no cuts h1, x1, v1 = pgh.get_hist(df_hit[etype], range=(elo, ehi), dx=epb) x1 = x1[1:] plt.plot(x1, h1, c='k', lw=1, ds='steps', label='raw') # baseline cut h2, x2, v2 = pgh.get_hist(df_cut[etype], range=(elo, ehi), dx=epb) plt.plot(x1, h2, c='b', lw=1, ds='steps', label='bl cut') plt.xlabel(etype, ha='right', x=1) plt.ylabel('counts', ha='right', y=1) plt.legend() # plt.show() plt.savefig('./plots/cut_spectrum.pdf') plt.cla()
def process_flashcam(daq_file, raw_files, n_max, ch_groups_dict=None, verbose=False, buffer_size=8192, chans=None, f_out=''): """ decode FlashCam data, using the fcutils package to handle file access, and the FlashCam DataTaker to save the results and write to output. `raw_files` can be a string, or a dict with a label for each file: `{'geds':'filename_geds.lh5', 'muvt':'filename_muvt.lh5}` """ import fcutils if isinstance(raw_files, str): single_output = True f_out = raw_files elif len(raw_files) == 1: single_output = True f_out = list(raw_files.values())[0] else: single_output = False fcio = fcutils.fcio(daq_file) # set up event decoder event_decoder = FlashCamEventDecoder() event_decoder.set_file_config(fcio) event_tables = {} # build ch_groups and set up tables ch_groups = None if (ch_groups_dict is not None) and ('FlashCamEventDecoder' in ch_groups_dict): # get ch_groups ch_groups = ch_groups_dict['FlashCamEventDecoder'] expand_ch_groups(ch_groups) else: print('Config not found. Single-table mode') ch_groups = create_dummy_ch_group() # set up ch_group-to-output-file-and-group info if single_output: set_outputs(ch_groups, out_file_template=f_out, grp_path_template='{group_name}/raw') else: set_outputs(ch_groups, out_file_template=raw_files, grp_path_template='{group_name}/raw') # set up tables event_tables = build_tables(ch_groups, buffer_size, event_decoder) if verbose: print('Output group : output file') for group_info in ch_groups.values(): group_path = group_info['group_path'] out_file = group_info['out_file'] print(group_path, ':', out_file.split('/')[-1]) # dictionary with the unique file names as keys file_info = dict.fromkeys( set(group_info['out_file'] for group_info in ch_groups.values()), False) # set up status decoder (this is 'auxs' output) status_decoder = FlashCamStatusDecoder() status_decoder.set_file_config(fcio) status_tbl = lh5.Table(buffer_size) status_decoder.initialize_lh5_table(status_tbl) try: status_filename = f_out if single_output else raw_files['auxs'] config_filename = f_out if single_output else raw_files['auxs'] except: status_filename = "fcio_status" config_filename = "fcio_config" # Set up the store # TODO: add overwrite capability lh5_store = lh5.Store() # write fcio_config fcio_config = event_decoder.get_file_config_struct() lh5_store.write_object(fcio_config, 'fcio_config', config_filename) # loop over raw data packets i_debug = 0 packet_id = 0 rc = 1 bytes_processed = 0 bytes_per_loop = 0 file_size = os.path.getsize(daq_file) max_numtraces = 0 unit = "B" n_entries = 0 if n_max < np.inf and n_max > 0: n_entries = n_max unit = "id" else: n_entries = file_size progress_bar = tqdm_range(0, int(n_entries), text="Processing", verbose=verbose, unit=unit) while rc and packet_id < n_max: rc = fcio.get_record() # Skip non-interesting records # FIXME: push to a buffer of skipped packets? if rc == 0 or rc == 1 or rc == 2 or rc == 5: continue packet_id += 1 # Status record if rc == 4: bytes_per_loop = status_decoder.decode_packet( fcio, status_tbl, packet_id) bytes_processed += bytes_per_loop if status_tbl.is_full(): lh5_store.write_object(status_tbl, 'fcio_status', status_filename, n_rows=status_tbl.size) status_tbl.clear() # Event or SparseEvent record if rc == 3 or rc == 6: for group_info in ch_groups.values(): tbl = group_info['table'] # Check that the tables are large enough # TODO: don't need to check this every event, only if sum(numtraces) >= buffer_size if tbl.size < fcio.numtraces and fcio.numtraces > max_numtraces: print('warning: tbl.size =', tbl.size, 'but fcio.numtraces =', fcio.numtraces) print('may overflow. suggest increasing tbl.size') max_numtraces = fcio.numtraces # Pre-emptively clear tables if it might be necessary if tbl.size - tbl.loc < fcio.numtraces: # might overflow group_path = group_info['group_path'] out_file = group_info['out_file'] lh5_store.write_object(tbl, group_path, out_file, n_rows=tbl.loc) if out_file in file_info: file_info[out_file] = True tbl.clear() # Looks okay: just decode bytes_per_loop = event_decoder.decode_packet( fcio, event_tables, packet_id) bytes_processed += bytes_per_loop if verbose: update_len = 0 if n_max < np.inf and n_max > 0: update_len = 1 else: update_len = bytes_per_loop update_progress(progress_bar, update_len) # i_debug += 1 # if i_debug == 10: # print("breaking early") # break # debug, deleteme # end of loop, write to file once more for group_info in ch_groups.values(): tbl = group_info['table'] if tbl.loc != 0: group_path = group_info['group_path'] out_file = group_info['out_file'] lh5_store.write_object(tbl, group_path, out_file, n_rows=tbl.loc) if out_file in file_info: file_info[out_file] = True tbl.clear() if status_tbl.loc != 0: lh5_store.write_object(status_tbl, 'stat', status_filename, n_rows=status_tbl.loc) status_tbl.clear() # alert user to any files not actually saved in the end for out_file, is_saved in file_info.items(): if not is_saved: print('Not saving file since no data were found:', out_file) if verbose: print(packet_id, 'packets decoded') if len(event_decoder.skipped_channels) > 0: print("Warning - daq_to_raw skipped some channels in file") if verbose: for ch, n in event_decoder.skipped_channels.items(): print(" ch", ch, ":", n, "hits") return bytes_processed
#!/usr/bin/env python3 import numpy as np import pygama.lh5 as lh5 import matplotlib.pyplot as plt # show how to correct for timestamp rollover with the struck 3302, # and how to calculate the run duration using the dsp file (fastest). f_dsp = '/Users/wisecg/Data/OPPI/dsp/oppi_run9_cyc2180_dsp.lh5' sto = lh5.Store() data = sto.read_object('ORSIS3302DecoderForEnergy/raw', f_dsp) # correct for timestamp rollover clock = 100e6 # 100 MHz UINT_MAX = 4294967295 # (0xffffffff) t_max = UINT_MAX / clock # ts = data['timestamp'].nda.astype(np.int64) # has to be signed for np.diff ts = data['timestamp'].nda / clock # converts to float tdiff = np.diff(ts) tdiff = np.insert(tdiff, 0 , 0) iwrap = np.where(tdiff < 0) iloop = np.append(iwrap[0], len(ts)) ts_new, t_roll = [], 0 for i, idx in enumerate(iloop): ilo = 0 if i==0 else iwrap[0][i-1] ihi = idx ts_block = ts[ilo:ihi]
def __init__(self, files_in, lh5_group, dsp_config=None, database=None, n_drawn=1, x_unit='ns', x_lim=None, waveforms='waveform', wf_styles=None, lines=None, legend=None, legend_opts=None, norm=None, align=None, selection=None, buffer_len=128, block_width=8, verbosity=1): """Constructor for WaveformBrowser: - file_in: name of file or list of names to browse. Can use wildcards - lh5_group: name of LH5 group in file to browse - dsp_config (optional): name of DSP config json file containing transforms available to draw - database (optional): dict with database of processing parameters - n_drawn (default 1): number of events to draw simultaneously when calling DrawNext - x_unit (default ns): unit for x-axis - x_lim (default auto): range of x-values passes as tuple - waveforms (default 'waveform'): name of wf or list of wf names to draw - wf_styles (default None): waveform colors and other style parameters to cycle through when drawing waveforms. Can be given as: dict of lists: e.g. {'color':['r', 'g', 'b'], 'linestyle':['-', '--', '.']} name of predefined style; see matplotlib.style documentation None: use current matplotlib style If a single style cycle is given, use for all lines; if a list is given, match to waveforms list. - lines (default None): name of parameter or list of parameters to draw hlines and vlines for - legend (default None): formatting string and values to include in the legend. This can be a list of values (one for each waveform in waveforms). The values can be given as a tuple whose first entry is a formatting string and subsequent entries are the values to place in the formatting string. When building a formatting string, if a name is given in the {}s, it is assumed to be a parameter from the DSP config file. An example is: ("{:0.1f} keV", energy) - legend_opts (default None): dict containing kwargs for formatting the legend - norm (default None): name of parameter (probably energy) to use to normalize WFs; useful when drawing multiple - align (default None): name of time parameter to set as 0 time; useful for aligning multiple waveforms - selection (optional): selection of events to draw. Can be either a list of event indices or a numpy array mask (ala pandas). - buffer_len (default 128): number of waveforms to keep in memory at a time - block_width (default 8): block width for processing chain """ self.verbosity = verbosity # data i/o initialization self.lh5_st = lh5.Store(keep_open=True) if isinstance(files_in, str): files_in = [files_in] # Expand wildcards and map out the files self.lh5_files = [ f for f_wc in files_in for f in sorted(glob.glob(os.path.expandvars(f_wc))) ] self.lh5_group = lh5_group # file map is cumulative lenght of files up to file n. By doing searchsorted left, we can get the file for a given wf index self.file_map = np.array( [self.lh5_st.read_n_rows(lh5_group, f) for f in self.lh5_files], 'int64') np.cumsum(self.file_map, out=self.file_map) # Get the input buffer and read the first chunk self.lh5_in = self.lh5_st.get_buffer(self.lh5_group, self.lh5_files[0], buffer_len) self.lh5_st.read_object(self.lh5_group, self.lh5_files[0], start_row=0, n_rows=buffer_len, obj_buf=self.lh5_in) self.buffer_len = buffer_len self.current_file = None self.current_chunk = None # initialize stuff for iteration self.selection = selection self.index_it = None self.reset() self.n_drawn = n_drawn # initialize list of objects to draw if isinstance(waveforms, str): self.wf_names = [waveforms] elif waveforms is None: self.wf_names = [] else: self.wf_names = list(waveforms) self.wf_data = [[] for _ in self.wf_names] # wf_styles if isinstance(wf_styles, list) or isinstance(wf_styles, tuple): self.wf_styles = [None for _ in self.wf_data] for i, sty in enumerate(wf_styles): if isinstance(sty, str): try: self.wf_styles[i] = plt.style.library[sty][ 'axes.prop_cycle'] except: self.wf_styles[i] = itertools.repeat(None) elif sty is None: self.wf_styles[i] = itertools.repeat(None) else: self.wf_styles[i] = cycler(**sty) else: if isinstance(wf_styles, str): try: self.wf_styles = plt.style.library[wf_styles][ 'axes.prop_cycle'] except: self.wf_styles = itertools.repeat(None) elif wf_styles is None: self.wf_styles = itertools.repeat(None) else: self.wf_styles = cycler(**wf_styles) if lines is None: self.line_names = [] elif isinstance(lines, list): self.line_names = lines elif isinstance(lines, tuple): self.line_names = list(lines) else: self.line_names = [lines] self.line_data = [[] for _ in self.line_names] if legend is None: legend = [] elif not isinstance(legend, list): legend = [legend] # Set up the legend format strings and collect input values self.legend_input = [] self.legend_format = [] for entry in legend: legend_input = [] legend_format = '' if not isinstance(entry, tuple): entry = (entry, ) for val in entry: if isinstance(val, str): for st, name, form, cv in string.Formatter().parse(val): legend_format += st if name is not None: legend_format += '{' legend_input.append(name) if form is not None and form != '': legend_format += ':' + form if cv is not None and cv != '': legend_format += '!' + cv legend_format += '}' else: # find any {}s to fill from the formatter idxs = [ i for i, inp in enumerate(legend_input) if isinstance(inp, str) and inp == '' ] if idxs: # if we found a {}. it's already in the formatter legend_input[idxs[0]] = val else: # otherwise add to formatter legend_input.append(val) if legend_format != '': legend_format += ', ' if isinstance(val, pd.Series): legend_format += val.name + ' = {:.3g}' elif isinstance(val, np.ndarray): legend_format += '{:.3g}' self.legend_input.append(legend_input) self.legend_format.append(legend_format) self.legend_data = [[] for _ in self.legend_input] self.legend_kwargs = legend_opts if legend_opts else {} self.norm_par = norm self.align_par = align self.x_unit = units.unit_parser.parse_unit(x_unit) self.x_lim = x_lim # make processing chain and output buffer outputs = self.wf_names + \ [name for name in self.line_names if isinstance(name, str)] + \ [name for name in self.legend_input if isinstance(name, str)] if isinstance(self.norm_par, str): outputs += [self.norm_par] if isinstance(self.align_par, str): outputs += [self.align_par] self.proc_chain, self.lh5_out = build_processing_chain( self.lh5_in, dsp_config, db_dict=database, outputs=outputs, verbosity=self.verbosity, block_width=block_width) self.fig = None self.ax = None
def dsp_to_hit_cage(f_dsp, f_hit, dg, n_max=None, verbose=False, t_start=None): """ non-general placeholder for creating a pygama 'hit' file. uses pandas. for every file, apply: - energy calibration (peakfit results) - timestamp correction for a more general dsp_to_hit, maybe each function could be given in terms of an 'apply' on a dsp dataframe ... TODO: create entry config['rawe'] with list of energy pars to calibrate, as in energy_cal.py """ rawe = ['trapEmax'] # create initial 'hit' DataFrame from dsp data hit_store = lh5.Store() data = hit_store.read_object(dg.config['input_table'], f_dsp) df_hit = data.get_dataframe() # 1. get energy calibration for this run from peakfit cal_db = db.TinyDB(storage=MemoryStorage) with open(dg.config['ecaldb']) as f: raw_db = json.load(f) cal_db.storage.write(raw_db) runs = dg.fileDB.run.unique() if len(runs) > 1: print("sorry, I can't do combined runs yet") exit() run = runs[0] for etype in rawe: tb = cal_db.table(f'peakfit_{etype}').all() df_cal = pd.DataFrame(tb) df_cal['run'] = df_cal['run'].astype(int) df_run = df_cal.loc[df_cal.run == run] cal_pars = df_run.iloc[0][['cal0', 'cal1', 'cal2']] pol = np.poly1d(cal_pars) # handy numpy polynomial object df_hit[f'{etype}_cal'] = pol(df_hit[f'{etype}']) # 2. compute timestamp rollover correction (specific to struck 3302) clock = 100e6 # 100 MHz UINT_MAX = 4294967295 # (0xffffffff) t_max = UINT_MAX / clock ts = df_hit['timestamp'].values / clock tdiff = np.diff(ts) tdiff = np.insert(tdiff, 0, 0) iwrap = np.where(tdiff < 0) iloop = np.append(iwrap[0], len(ts)) ts_new, t_roll = [], 0 for i, idx in enumerate(iloop): ilo = 0 if i == 0 else iwrap[0][i - 1] ihi = idx ts_block = ts[ilo:ihi] t_last = ts[ilo - 1] t_diff = t_max - t_last ts_new.append(ts_block + t_roll) t_roll += t_last + t_diff df_hit['ts_sec'] = np.concatenate(ts_new) # 3. compute global timestamp if t_start is not None: df_hit['ts_glo'] = df_hit['ts_sec'] + t_start # write to LH5 file if os.path.exists(f_hit): os.remove(f_hit) sto = lh5.Store() tb_name = dg.config['input_table'].replace('dsp', 'hit') tb_lh5 = lh5.Table(size=len(df_hit)) for col in df_hit.columns: tb_lh5.add_field(col, lh5.Array(df_hit[col].values, attrs={'units': ''})) print(col) print(f'Writing table: {tb_name} in file:\n {f_hit}') sto.write_object(tb_lh5, tb_name, f_hit)
# set up the figure-of-merit to be computed at each grid point def fltp_sig_mean(tb_out, verbosity): mean = np.average(tb_out['fltp2_sig'].nda) if verbosity > 1: print(f'mean: {mean}') return mean # set up the energy selection energy_name = 'energy' range_name = '40K_1460' # loop over detectors detectors = ['oppi'] store = lh5.Store() for detector in detectors: # get indices for just a selected energy range det_db = apdb[detector] lh5_group = 'ORSIS3302DecoderForEnergy/raw' idx = select_energies(energy_name, range_name, filenames, det_db, lh5_group=lh5_group) waveform_name = 'ORSIS3302DecoderForEnergy/raw/waveform/' waveforms, _ = store.read_object(waveform_name, filenames, idx=idx) print(f'{len(waveforms)} wfs for {detector}') # build the table for processing
def get_superpulses(dfp, dg, f_super): """ calculate average waveforms for each set of pulser data. save an output file with the superpulses for further analysis. """ # find this with the show_spectra function above # ecal = 1460.8 / 2.005e6 # TODO: find the const for oct 2020 ecal = 1460.8 / 2.005e6 # works for pulser dataset 2 (dec 2020) # more settings show_plots = True # default True write_output = True nwfs = 1000 # limit number to go fast. 1000 is enough for a good measurement tp_align = 0.5 # pct timepoint to align wfs at e_window = 20 # plot (in keV) this window around each pulser peak n_pre, n_post = 50, 100 # num samples before/after tp_align bl_thresh = 10 # allowable baseline ADC deviation dsp_name = 'ORSIS3302DecoderForEnergy/dsp' raw_name = 'ORSIS3302DecoderForEnergy/raw/waveform' sto = lh5.Store() t_start = time.time() def analyze_pulser_run(df_row): """ loop over each row of dfp and save the superpulse """ epk, rt, vp, cyc = df_row[['E_keV', 'runtime', 'V_pulser', 'cycle']] rt *= 60 # sec if epk == 0: return [] # skip the bkg run # load pulser energies f_dsp = dg.lh5_dir + '/' + df_row.dsp_path + '/' + df_row.dsp_file pdata = lh5.load_nda([f_dsp], ['energy'], dsp_name)['energy'] * ecal # auto-narrow the window around the max pulser peak in two steps elo, ehi, epb = epk - 50, epk + 50, 0.5 pdata_all = pdata[(pdata > elo) & (pdata < ehi)] hp, bp, _ = pgh.get_hist(pdata_all, range=(elo, ehi), dx=epb) pctr = bp[np.argmax(hp)] plo, phi, ppb = pctr - e_window, pctr + e_window, 0.1 pdata_pk = pdata[(pdata > plo) & (pdata < phi)] hp, bp, bpvars = pgh.get_hist(pdata_pk, range=(plo, phi), dx=ppb) hp_rt = np.divide(hp, rt) hp_var = np.array([np.sqrt(h / (rt)) for h in hp]) # fit a gaussian to get 1 sigma e-values ibin_bkg = 50 bkg0 = np.mean(hp_rt[:ibin_bkg]) b, h = bp[1:], hp_rt imax = np.argmax(h) upr_half = b[np.where((b > b[imax]) & (h <= np.amax(h) / 2))][0] bot_half = b[np.where((b < b[imax]) & (h <= np.amax(h) / 2))][-1] fwhm = upr_half - bot_half sig0 = fwhm / 2.355 amp0 = np.amax(hp_rt) * fwhm # 14 July 2021 Joule changed p_init to use outputs gauss_mode_with_max() b/c fit wasn't # working with previous initial guess # p_init = [amp0, bp[imax], sig0, bkg0] pars, cov = pgf.gauss_mode_width_max(hp, bp, bpvars, n_bins=50) p_init = [pars[2], pars[0], pars[1], 1] p_fit, p_cov = pgf.fit_hist(pgf.gauss_bkg, hp, bp, var=hp_var, guess=p_init) amp, mu, sigma, bkg = p_fit # select events within 1 sigma of the maximum # and pull the waveforms from the raw file to make a superpulse. idx = np.where((pdata >= mu - sigma) & (pdata <= mu + sigma)) print( f'Pulser at {epk} keV, {len(idx[0])} events. Limiting to {nwfs}.') if len(idx[0]) > nwfs: idx = idx[0][:nwfs] # grab the 2d numpy array of pulser wfs n_rows = idx[-1] + 1 # read up to this event and stop f_raw = dg.lh5_dir + '/' + df_row.raw_path + '/' + df_row.raw_file tb_wfs, n_wfs = sto.read_object(raw_name, f_raw, n_rows=n_rows) pwfs = tb_wfs['values'].nda[idx, :] # print(idx, len(idx), pwfs.shape, '\n', pwfs) # data cleaning step: remove events with outlier baselines bl_means = pwfs[:, :500].mean(axis=1) bl_mode = mode(bl_means.astype(int))[0][0] bl_ctr = np.subtract(bl_means, bl_mode) idx_dc = np.where(np.abs(bl_ctr) < bl_thresh) pwfs = pwfs[idx_dc[0], :] bl_means = bl_means[idx_dc] print(pwfs.shape, bl_means.shape) # baseline subtract (trp when leading (not trailing) dim is the same) wfs = (pwfs.transpose() - bl_means).transpose() # !!!!15 July 2021: Joule commented this out because somehow it makes superpulses 150 instead of 8192 samples!!!! # time-align all wfs at their 50% timepoint (tricky!). # adapted from pygama/sandbox/old_dsp/[calculators,transforms].py # an alternate approach would be to use ProcessingChain here # wf_maxes = np.amax(wfs, axis=1) # timepoints = np.argmax(wfs >= wf_maxes[:, None]*tp_align, axis=1) # wf_idxs = np.zeros([wfs.shape[0], n_pre + n_post], dtype=int) # row_idxs = np.zeros_like(wf_idxs) # for i, tp in enumerate(timepoints): # wf_idxs[i, :] = np.arange(tp - n_pre, tp + n_post) # row_idxs[i, :] = i # wfs = wfs[row_idxs, wf_idxs] # print(f'len wfs: {len(wfs[1])}') # take the average to get the superpulse superpulse = np.mean(wfs, axis=0) # normalize all wfs to the superpulse maximum wfmax, tmax = np.amax(superpulse), np.argmax(superpulse) superpulse = np.divide(superpulse, wfmax) wfs = np.divide(wfs, wfmax) # -- plot results -- if show_plots: fig, (p0, p1) = plt.subplots(2, figsize=(7, 8)) # plot fit result (top), and waveforms + superpulse (bottom) xfit = np.arange(plo, phi, ppb * 0.1) p0.plot(xfit, pgf.gauss_bkg(xfit, *p_init), '-', c='orange', label='init') p0.plot(xfit, pgf.gauss_bkg(xfit, *p_fit), '-', c='red', label='fit') # plot 1 sigma window p0.axvspan(mu - sigma, mu + sigma, color='m', alpha=0.2, label='1 sigma') # plot data p0.plot(bp[1:], hp_rt, ds='steps', c='k', lw=1, label=f'{vp:.2f} V') p0.set_xlabel(f'onboard energy (keV, c={ecal:.2e})', ha='right', x=1) p0.set_ylabel('cts / s', ha='right', y=1) p0.legend(fontsize=10) # plot individ. wfs ts = np.arange(0, len(wfs[0, :])) for iwf in range(wfs.shape[0]): p1.plot(ts, wfs[iwf, :], '-k', lw=2, alpha=0.5) p1.plot(np.nan, np.nan, '-k', label=f'wfs, {epk:.0f} keV') # plot superpulse p1.plot(ts, superpulse, '-r', lw=2, label=f'superpulse, {vp:.2f} V') p1.set_xlabel('time (10 ns)', ha='right', x=1) p1.set_ylabel('amplitude', ha='right', y=1) p1.legend(fontsize=10) # plt.show() plt.savefig(f'./plots/superpulse_cyc{cyc}.png', dpi=150) plt.cla() # save the superpulse to our output file print(f'length of superpulse: {len(superpulse)}') return superpulse dfp['superpulse'] = dfp.apply(analyze_pulser_run, axis=1) # drop the duplicated 'run' row before saving dfp = dfp.loc[:, ~dfp.columns.duplicated()] # print(dfp.columns) print(dfp) if write_output: print('Saving output file: ', f_super) dfp.to_hdf(f_super, key='superpulses') t_elap = (time.time() - t_start) / 60 print(f'Done. Elapsed: {t_elap:.2f} min.')