def raw_to_dsp(ds, overwrite=False, nevt=None, test=False, verbose=2, block=8, group=''): """ Run raw_to_dsp on a set of runs. [raw file] ---> [dsp_run{}.lh5] (digital signal processing results) """ for run in ds.runs: raw_file = "/lfs/l1/legend/users/dandrea/pygama/pgt/tier1/pgt_longtrace_run0117-20200110-105115-calib_raw.lh5" dsp_file = "/lfs/l1/legend/users/dandrea/pygama/pgt/tier2/pgt_longtrace_run0117-20200110-105115-calib_dsp.lh5" #raw_file = ds.paths[run]["raw_path"] #dsp_file = ds.paths[run]["dsp_path"] print("raw_file: ",raw_file) print("dsp_file: ",dsp_file) if dsp_file is not None and overwrite is False: continue if dsp_file is None: # declare new file name dsp_file = raw_file.replace('raw_', 'dsp_') if test: print("test mode (dry run), processing raw file:", raw_file) continue print("Definition of new LH5 version") #f_lh5 = lh5.Store() #data = f_lh5.read_object("raw", raw_file) #wf_in = data['waveform']['values'].nda #dt = data['waveform']['dt'].nda[0] * unit_parser.parse_unit(data['waveform']['dt'].attrs['units']) lh5_in = lh5.Store() #groups = lh5_in.ls(raw_file, group) f = h5py.File(raw_file,'r') print("File info: ",f.keys()) for group in f.keys(): print("Processing: " + raw_file + '/' + group) #data = lh5_in.read_object(group, raw_file) data = f[group]['raw'] #wf_in = data['waveform']['values'].nda #dt = data['waveform']['dt'].nda[0] * unit_parser.parse_unit(data['waveform']['dt'].attrs['units']) wf_in = data['waveform']['values'][()] dt = data['waveform']['dt'][0] * unit_parser.parse_unit(data['waveform']['dt'].attrs['units']) # Parameters for DCR calculation dcr_trap_int = 200 dcr_trap_flat = 1000 dcr_trap_startSample = 1200 # Set up processing chain proc = ProcessingChain(block_width=block, clock_unit=dt, verbosity=verbose) proc.add_input_buffer("wf", wf_in, dtype='float32') # Basic Filters proc.add_processor(mean_stdev, "wf[0:1000]", "bl", "bl_sig") proc.add_processor(np.subtract, "wf", "bl", "wf_blsub") proc.add_processor(pole_zero, "wf_blsub", 145*us, "wf_pz") proc.add_processor(trap_norm, "wf_pz", 10*us, 5*us, "wf_trap") proc.add_processor(asymTrapFilter, "wf_pz", 0.05*us, 2*us, 4*us, "wf_atrap") # Timepoint calculation proc.add_processor(np.argmax, "wf_blsub", 1, "t_max", signature='(n),()->()', types=['fi->i']) proc.add_processor(time_point_frac, "wf_blsub", 0.95, "t_max", "tp_95") proc.add_processor(time_point_frac, "wf_blsub", 0.8, "t_max", "tp_80") proc.add_processor(time_point_frac, "wf_blsub", 0.5, "t_max", "tp_50") proc.add_processor(time_point_frac, "wf_blsub", 0.2, "t_max", "tp_20") proc.add_processor(time_point_frac, "wf_blsub", 0.05, "t_max", "tp_05") proc.add_processor(time_point_thresh, "wf_atrap[0:2000]", 0, "tp_0") # Energy calculation proc.add_processor(np.amax, "wf_trap", 1, "trapEmax", signature='(n),()->()', types=['fi->f']) proc.add_processor(fixed_time_pickoff, "wf_trap", "tp_0+(5*us+9*us)", "trapEftp") proc.add_processor(trap_pickoff, "wf_pz", 1.5*us, 0, "tp_0", "ct_corr") # Current calculation proc.add_processor(avg_current, "wf_pz", 10, "curr(len(wf_pz)-10, f)") proc.add_processor(np.amax, "curr", 1, "curr_amp", signature='(n),()->()', types=['fi->f']) proc.add_processor(np.divide, "curr_amp", "trapEftp", "aoe") # DCR calculation: use slope using 1000 samples apart and averaging 200 # samples, with the start 1.5 us offset from t0 proc.add_processor(trap_pickoff, "wf_pz", 200, 1000, "tp_0+1.5*us", "dcr_unnorm") proc.add_processor(np.divide, "dcr_unnorm", "trapEftp", "dcr") # Tail slope. Basically the same as DCR, except with no PZ correction proc.add_processor(linear_fit, "wf_blsub[3000:]", "wf_b", "wf_m") proc.add_processor(np.divide, "-wf_b", "wf_m", "tail_rc") #add zac filter energy calculation sigma = 10*us flat = 1*us decay = 160*us proc.add_processor(zac_filter, "wf", sigma, flat, decay, "wf_zac(101, f)") proc.add_processor(np.amax, "wf_zac", 1, "zacE", signature='(n),()->()', types=['fi->f']) # Set up the LH5 output lh5_out = lh5.Table(size=proc._buffer_len) lh5_out.add_field("zacE", lh5.Array(proc.get_output_buffer("zacE"), attrs={"units":"ADC"})) lh5_out.add_field("trapEmax", lh5.Array(proc.get_output_buffer("trapEmax"), attrs={"units":"ADC"})) lh5_out.add_field("trapEftp", lh5.Array(proc.get_output_buffer("trapEftp"), attrs={"units":"ADC"})) lh5_out.add_field("ct_corr", lh5.Array(proc.get_output_buffer("ct_corr"), attrs={"units":"ADC*ns"})) lh5_out.add_field("bl", lh5.Array(proc.get_output_buffer("bl"), attrs={"units":"ADC"})) lh5_out.add_field("bl_sig", lh5.Array(proc.get_output_buffer("bl_sig"), attrs={"units":"ADC"})) lh5_out.add_field("A", lh5.Array(proc.get_output_buffer("curr_amp"), attrs={"units":"ADC"})) lh5_out.add_field("AoE", lh5.Array(proc.get_output_buffer("aoe"), attrs={"units":"ADC"})) lh5_out.add_field("dcr", lh5.Array(proc.get_output_buffer("dcr"), attrs={"units":"ADC"})) lh5_out.add_field("tp_max", lh5.Array(proc.get_output_buffer("tp_95", unit=us), attrs={"units":"us"})) lh5_out.add_field("tp_95", lh5.Array(proc.get_output_buffer("tp_95", unit=us), attrs={"units":"us"})) lh5_out.add_field("tp_80", lh5.Array(proc.get_output_buffer("tp_80", unit=us), attrs={"units":"us"})) lh5_out.add_field("tp_50", lh5.Array(proc.get_output_buffer("tp_50", unit=us), attrs={"units":"us"})) lh5_out.add_field("tp_20", lh5.Array(proc.get_output_buffer("tp_20", unit=us), attrs={"units":"us"})) lh5_out.add_field("tp_05", lh5.Array(proc.get_output_buffer("tp_05", unit=us), attrs={"units":"us"})) lh5_out.add_field("tp_0", lh5.Array(proc.get_output_buffer("tp_0", unit=us), attrs={"units":"us"})) lh5_out.add_field("tail_rc", lh5.Array(proc.get_output_buffer("tail_rc", unit=us), attrs={"units":"us"})) print("Processing:\n",proc) proc.execute() #groupname = group[:group.rfind('/')+1]+"data" groupname = group+"/data" print("Writing to: " + dsp_file + "/" + groupname) lh5_in.write_object(lh5_out, groupname, dsp_file)
def optimize_trap(dg): """ Generate a file with grid points to search, and events from the target peak. Then run DSP a bunch of times on the small table, and fit the peak w/ the peakshape function. NOTE: run table-to-table DSP (no file I/O) """ f_peak = './temp_peak.lh5' # lh5 f_results = './temp_results.h5' # pandas grp_data, grp_grid = '/optimize_data', '/optimize_grid' # epar, elo, ehi, epb = 'energy', 0, 1e7, 10000 # full range epar, elo, ehi, epb = 'energy', 3.88e6, 3.92e6, 500 # K40 peak show_movie = True write_output = True n_rows = None # default None with open('opt_trap.json') as f: dsp_config = json.load(f, object_pairs_hook=OrderedDict) # files to consider. fixme: right now only works with one file sto = lh5.Store() lh5_dir = os.path.expandvars(dg.config['lh5_dir']) raw_list = lh5_dir + dg.fileDB['raw_path'] + '/' + dg.fileDB['raw_file'] f_raw = raw_list.values[0] tb_raw = 'ORSIS3302DecoderForEnergy/raw/' # quick check of the energy range # ene_raw = sto.read_object(tb_raw+'/'+epar, f_raw).nda # hist, bins, var = pgh.get_hist(ene_raw, range=(elo, ehi), dx=epb) # plt.plot(bins[1:], hist, ds='steps') # plt.show() # exit() # set grid parameters # TODO: jason's suggestions, knowing the expected shape of the noise curve # e_rises = np.linspace(-1, 0, sqrt(sqrt(3)) # e_rises # make another list which is 10^pwr of this list # np.linspace(log_tau_min, log_tau_max) # try this too e_rises = np.arange(1, 12, 1) e_flats = np.arange(1, 6, 1) # rc_consts = np.arange(54, 154, 10) # changing this here messes up DCR # -- create the grid search file the first time -- # NOTE: this makes a linear grid, and is editable by the arrays above. # jason also proposed a more active gradient-descent style search # like with Brent's method. (https://en.wikipedia.org/wiki/Brent%27s_method) if True: # if not os.path.exists(f_peak): print('Recreating grid search file') # create the grid file # NOTE: save it as an lh5 Table just as an example of writing/reading one lists = [e_rises, e_flats] #, rc_consts] prod = list(itertools.product(*lists)) # clint <3 stackoverflow df_grid = pd.DataFrame(prod, columns=['rise', 'flat']) #,'rc']) lh5_grid = {} for i, dfcol in df_grid.iteritems(): lh5_grid[dfcol.name] = lh5.Array(dfcol.values) tb_grid = lh5.Table(col_dict=lh5_grid) sto.write_object(tb_grid, grp_grid, f_peak) # filter events by onboard energy ene_raw = sto.read_object(tb_raw + '/' + epar, f_raw).nda # hist, bins, var = pgh.get_hist(ene_raw, range=(elo, ehi), dx=epb) # plt.plot(bins[1:], hist, ds='steps') # plt.show() if n_rows is not None: ene_raw = ene_raw[:n_rows] idx = np.where((ene_raw > elo) & (ene_raw < ehi)) # create a filtered table with correct waveform and attrs # TODO: move this into a function in lh5.py which takes idx as an input tb_data, wf_tb_data = lh5.Table(), lh5.Table() # read non-wf cols (lh5 Arrays) data_raw = sto.read_object(tb_raw, f_raw, n_rows=n_rows) for col in data_raw.keys(): if col == 'waveform': continue newcol = lh5.Array(data_raw[col].nda[idx], attrs=data_raw[col].attrs) tb_data.add_field(col, newcol) # handle waveform column (lh5 Table) data_wfs = sto.read_object(tb_raw + '/waveform', f_raw, n_rows=n_rows) for col in data_wfs.keys(): attrs = data_wfs[col].attrs if isinstance(data_wfs[col], lh5.ArrayOfEqualSizedArrays): # idk why i can't put the filtered array into the constructor aoesa = lh5.ArrayOfEqualSizedArrays(attrs=attrs, dims=[1, 1]) aoesa.nda = data_wfs[col].nda[idx] newcol = aoesa else: newcol = lh5.Array(data_wfs[col].nda[idx], attrs=attrs) wf_tb_data.add_field(col, newcol) tb_data.add_field('waveform', wf_tb_data) tb_data.attrs = data_raw.attrs sto.write_object(tb_data, grp_data, f_peak) else: print('Loading peak file. groups:', sto.ls(f_peak)) tb_grid = sto.read_object(grp_grid, f_peak) tb_data = sto.read_object(grp_data, f_peak) # filtered file # tb_data = sto.read_object(tb_raw, f_raw) # orig file df_grid = tb_grid.get_dataframe() # check shape of input table print('input table attributes:') for key in tb_data.keys(): obj = tb_data[key] if isinstance(obj, lh5.Table): for key2 in obj.keys(): obj2 = obj[key2] print(' ', key, key2, obj2.nda.shape, obj2.attrs) else: print(' ', key, obj.nda.shape, obj.attrs) # clear new colums if they exist new_cols = ['e_fit', 'fwhm_fit', 'rchisq', 'xF_err', 'fwhm_ovr_mean'] for col in new_cols: if col in df_grid.columns: df_grid.drop(col, axis=1, inplace=True) t_start = time.time() def run_dsp(dfrow): """ run dsp on the test file, editing the processor list alternate idea: generate a long list of processors with different names """ # adjust dsp config dictionary rise, flat = dfrow # dsp_config['processors']['wf_pz']['defaults']['db.pz.tau'] = f'{tau}*us' dsp_config['processors']['wf_trap']['args'][1] = f'{rise}*us' dsp_config['processors']['wf_trap']['args'][2] = f'{flat}*us' # pprint(dsp_config) # run dsp pc, tb_out = build_processing_chain(tb_data, dsp_config, verbosity=0) pc.execute() # analyze peak e_peak = 1460. etype = 'trapEmax' elo, ehi, epb = 4000, 4500, 3 # the peak moves around a bunch energy = tb_out[etype].nda # get histogram hE, bins, vE = pgh.get_hist(energy, range=(elo, ehi), dx=epb) xE = bins[1:] # should I center the max at 1460? # simple numerical width i_max = np.argmax(hE) h_max = hE[i_max] upr_half = xE[(xE > xE[i_max]) & (hE <= h_max / 2)][0] bot_half = xE[(xE < xE[i_max]) & (hE >= h_max / 2)][0] fwhm = upr_half - bot_half sig = fwhm / 2.355 # fit to gaussian: amp, mu, sig, bkg fit_func = pgf.gauss_bkg amp = h_max * fwhm bg0 = np.mean(hE[:20]) x0 = [amp, xE[i_max], sig, bg0] xF, xF_cov = pgf.fit_hist(fit_func, hE, bins, var=vE, guess=x0) # collect results e_fit = xF[0] xF_err = np.sqrt(np.diag(xF_cov)) e_err = xF fwhm_fit = xF[1] * 2.355 * 1460. / e_fit fwhm_err = xF_err[2] * 2.355 * 1460. / e_fit chisq = [] for i, h in enumerate(hE): model = fit_func(xE[i], *xF) diff = (model - h)**2 / model chisq.append(abs(diff)) rchisq = sum(np.array(chisq) / len(hE)) fwhm_ovr_mean = fwhm_fit / e_fit if show_movie: plt.plot(xE, hE, ds='steps', c='b', lw=2, label=f'{etype} {rise}--{flat}') # peak shape plt.plot(xE, fit_func(xE, *x0), '-', c='orange', alpha=0.5, label='init. guess') plt.plot(xE, fit_func(xE, *xF), '-r', alpha=0.8, label='peakshape fit') plt.plot(np.nan, np.nan, '-w', label=f'mu={e_fit:.1f}, fwhm={fwhm_fit:.2f}') plt.xlabel(etype, ha='right', x=1) plt.ylabel('Counts', ha='right', y=1) plt.legend(loc=2) # show a little movie plt.show(block=False) plt.pause(0.01) plt.cla() # return results return pd.Series({ 'e_fit': e_fit, 'fwhm_fit': fwhm_fit, 'rchisq': rchisq, 'fwhm_err': xF_err[0], 'fwhm_ovr_mean': fwhm_ovr_mean }) # df_grid=df_grid[:10] df_tmp = df_grid.progress_apply(run_dsp, axis=1) df_grid[new_cols] = df_tmp # print(df_grid) if show_movie: plt.close() print('elapsed:', time.time() - t_start) if write_output: df_grid.to_hdf(f_results, key=grp_grid) print(f"Wrote output file: {f_results}")
signature='(n),()->()', types=['fi->f']) proc.add_processor(np.divide, "curr_amp", "trapEftp", "aoe") # DCR calculation: use slope using 1000 samples apart and averaging 200 # samples, with the start 1.5 us offset from t0 proc.add_processor(trap_pickoff, "wf_pz", 200, 1000, "tp_0+1.5*us", "dcr_unnorm") proc.add_processor(np.divide, "dcr_unnorm", "trapEftp", "dcr") # Tail slope. Basically the same as DCR, except with no PZ correction proc.add_processor(linear_fit, "wf_blsub[3000:]", "wf_b", "wf_m") proc.add_processor(np.divide, "-wf_b", "wf_m", "tail_rc") # Set up the LH5 output lh5_out = lh5.Table(size=proc._buffer_len) lh5_out.add_field( "trapEmax", lh5.Array(proc.get_output_buffer("trapEmax"), attrs={"units": "ADC"})) lh5_out.add_field( "trapEftp", lh5.Array(proc.get_output_buffer("trapEftp"), attrs={"units": "ADC"})) lh5_out.add_field( "ct_corr", lh5.Array(proc.get_output_buffer("ct_corr"), attrs={"units": "ADC*ns"})) lh5_out.add_field( "bl", lh5.Array(proc.get_output_buffer("bl"), attrs={"units": "ADC"})) lh5_out.add_field( "bl_sig", lh5.Array(proc.get_output_buffer("bl_sig"), attrs={"units": "ADC"}))
def process_ds(f_grid, f_opt, f_tier1, d_out, efilter): """ process the windowed raw file 'f_tier1' and create the DSP file 'f_opt' """ print("Grid file:",f_grid) df_grid = pd.read_hdf(f_grid) if os.path.exists(f_opt): os.remove(f_opt) if 'corr' in efilter: bfilter = efilter.split('corr')[0] try: df_res = pd.read_hdf(f'{d_out}/{bfilter}_results.h5',key='results') print("Extraction of best parameters for", bfilter) except: print(bfilter,"not optimized") return # open raw file lh5_in = lh5.Store() #groups = lh5_in.ls(f_tier1, '*/raw') f = h5py.File(f_tier1,'r') #print("File info: ",f.keys()) t_start = time.time() #for group in groups: for idx, ged in enumerate(f.keys()): if idx == 4: diff = time.time() - t_start tot = diff/5 * len(df_grid) / 60 tot -= diff / 60 print(f"Estimated remaining time: {tot:.2f} mins") print("Detector:",ged) #data = lh5_in.read_object(group, f_tier1) data = f[ged]['raw'] #wf_in = data['waveform']['values'].nda #dt = data['waveform']['dt'].nda[0] * unit_parser.parse_unit(data['waveform']['dt'].attrs['units']) wf_in = data['waveform']['values'][()] dt = data['waveform']['dt'][0] * unit_parser.parse_unit(data['waveform']['dt'].attrs['units']) bl_in = data['baseline'][()] #flashcam baseline values # Set up DSP processing chain -- very minimal block = 8 #waveforms to process simultaneously proc = ProcessingChain(block_width=block, clock_unit=dt, verbosity=False) proc.add_input_buffer("wf", wf_in, dtype='float32') proc.add_input_buffer("bl", bl_in, dtype='float32') wsize = wf_in.shape[1] dt0 = data['waveform']['dt'][0]*0.001 #proc.add_processor(mean_stdev, "wf[0:1000]", "bl", "bl_sig") proc.add_processor(np.subtract, "wf", "bl", "wf_blsub") for i, row in df_grid.iterrows(): if 'corr' in efilter: ct_const = row if 'trapE' in efilter: if 'corr' in efilter: rise, flat, rc = float(df_res['rise'][idx]), float(df_res['flat'][idx]), float(df_res['rc'][idx]) else: rise, flat, rc = row proc.add_processor(pole_zero, "wf_blsub", rc*us, "wf_pz") proc.add_processor(trap_norm, "wf_pz", rise*us, flat*us, f"wf_trap_{i}") proc.add_processor(asymTrapFilter, "wf_pz", 0.05*us, 4*us, 4*us, "wf_atrap") proc.add_processor(time_point_thresh, "wf_pz", 0, "tp_0") proc.add_processor(np.amax, f"wf_trap_{i}", 1, f"trapE_{i}", signature='(n),()->()', types=['fi->f']) proc.add_processor(fixed_time_pickoff, f"wf_trap_{i}", f"tp_0+({rise*us}+{flat*us})", f"trapEftp_{i}") if 'zacE' in efilter: if 'corr' in efilter: sigma, flat, decay = float(df_res['sigma'][idx]), float(df_res['flat'][idx]), float(df_res['decay'][idx]) else: sigma, flat, decay = row proc.add_processor(zac_filter(wsize, sigma/dt0, flat/dt0, decay/dt0),"wf", f"wf_zac_{i}(101, f)") proc.add_processor(np.amax, f"wf_zac_{i}", 1, f"zacE_{i}", signature='(n),()->()', types=['fi->f']) if 'cuspE' in efilter: if 'corr' in efilter: sigma, flat, decay = float(df_res['sigma'][idx]), float(df_res['flat'][idx]), float(df_res['decay'][idx]) else: sigma, flat, decay = row proc.add_processor(cusp_filter(wsize, sigma/dt0, flat/dt0, decay/dt0),"wf_blsub", f"wf_cusp_{i}(101, f)") proc.add_processor(np.amax, f"wf_cusp_{i}", 1, f"cuspE_{i}", signature='(n),()->()', types=['fi->f']) if 'corr' in efilter: proc.add_processor(trap_pickoff, "wf_pz", 1.5*us, 0, "tp_0", "ct_corr") #proc.add_processor(trap_pickoff, "wf_pz", rise*us, flat*us, "tp_0", "ct_corr") proc.add_processor(np.multiply, ct_const, "ct_corr", f"ct_corr_cal_{i}") proc.add_processor(np.add, f"ct_corr_cal_{i}", f"{bfilter}_{i}", f"{efilter}_{i}") # Set up the LH5 output lh5_out = lh5.Table(size=proc._buffer_len) for i, row in df_grid.iterrows(): lh5_out.add_field(f"{efilter}_{i}", lh5.Array(proc.get_output_buffer(f"{efilter}_{i}"), attrs={"units":"ADC"})) print("Processing:\n",proc) proc.execute() #groupname = group[:group.rfind('/')+1]+"data" #groupname = df_key+"/"+group+"/data" groupname = ged+"/data" print("Writing to: " + f_opt + "/" + groupname) lh5_in.write_object(lh5_out, groupname, f_opt) print("") #list the datasets of the output file data_opt = lh5_in.ls(f_opt) #data_opt_0 = lh5_in.ls(f_opt,'opt_0/*') data_opt_0 = lh5_in.ls(f_opt,'g024/data/*') diff = time.time() - t_start print(f"Time to process: {diff:.2f} s")
def dsp_to_hit_cage(f_dsp, f_hit, dg, n_max=None, verbose=False, t_start=None): """ non-general placeholder for creating a pygama 'hit' file. uses pandas. for every file, apply: - energy calibration (peakfit results) - timestamp correction for a more general dsp_to_hit, maybe each function could be given in terms of an 'apply' on a dsp dataframe ... TODO: create entry config['rawe'] with list of energy pars to calibrate, as in energy_cal.py """ rawe = ['trapEmax'] # create initial 'hit' DataFrame from dsp data hit_store = lh5.Store() data = hit_store.read_object(dg.config['input_table'], f_dsp) df_hit = data.get_dataframe() # 1. get energy calibration for this run from peakfit cal_db = db.TinyDB(storage=MemoryStorage) with open(dg.config['ecaldb']) as f: raw_db = json.load(f) cal_db.storage.write(raw_db) runs = dg.file_keys.run.unique() if len(runs) > 1: print("sorry, I can't do combined runs yet") exit() run = runs[0] for etype in rawe: tb = cal_db.table(f'peakfit_{etype}').all() df_cal = pd.DataFrame(tb) df_cal['run'] = df_cal['run'].astype(int) df_run = df_cal.loc[df_cal.run == run] cal_pars = df_run.iloc[0][['cal0', 'cal1', 'cal2']] pol = np.poly1d(cal_pars) # handy numpy polynomial object df_hit[f'{etype}_cal'] = pol(df_hit[f'{etype}']) # 2. compute timestamp rollover correction (specific to struck 3302) clock = 100e6 # 100 MHz UINT_MAX = 4294967295 # (0xffffffff) t_max = UINT_MAX / clock ts = df_hit['timestamp'].values / clock tdiff = np.diff(ts) tdiff = np.insert(tdiff, 0, 0) iwrap = np.where(tdiff < 0) iloop = np.append(iwrap[0], len(ts)) ts_new, t_roll = [], 0 for i, idx in enumerate(iloop): ilo = 0 if i == 0 else iwrap[0][i - 1] ihi = idx ts_block = ts[ilo:ihi] t_last = ts[ilo - 1] t_diff = t_max - t_last ts_new.append(ts_block + t_roll) t_roll += t_last + t_diff df_hit['ts_sec'] = np.concatenate(ts_new) # 3. compute global timestamp if t_start is not None: df_hit['ts_glo'] = df_hit['ts_sec'] + t_start # write to LH5 file if os.path.exists(f_hit): os.remove(f_hit) sto = lh5.Store() tb_name = dg.config['input_table'].replace('dsp', 'hit') tb_lh5 = lh5.Table(size=len(df_hit)) for col in df_hit.columns: tb_lh5.add_field(col, lh5.Array(df_hit[col].values, attrs={'units': ''})) print(col) print(f'Writing table: {tb_name} in file:\n {f_hit}') sto.write_object(tb_lh5, tb_name, f_hit)
def raw_to_dsp(ds, overwrite=False, nevt=None, test=False, verbose=2, block=8, group='daqdata'): """ Run raw_to_dsp on a set of runs. [raw file] ---> [dsp_run{}.lh5] (digital signal processing results) """ for run in ds.runs: raw_file = ds.paths[run]["raw_path"] dsp_file = ds.paths[run]["dsp_path"] if dsp_file is not None and overwrite is False: continue if dsp_file is None: # declare new file name dsp_file = raw_file.replace('raw', 'dsp') if test: print("test mode (dry run), processing raw file:", raw_file) continue # new LH5 version lh5_in = lh5.Store() data = lh5_in.read_object("/ORSIS3302DecoderForEnergy", raw_file) wf_in = data['waveform']['values'].nda dt = data['waveform']['dt'].nda[0] * unit_parser.parse_unit( data['waveform']['dt'].attrs['units']) # Parameters for DCR calculation dcr_trap_int = 200 dcr_trap_flat = 1000 dcr_trap_startSample = 1200 # Set up processing chain proc = ProcessingChain(block_width=block, clock_unit=dt, verbosity=verbose) proc.add_input_buffer("wf", wf_in, dtype='float32') proc.add_processor(mean_stdev, "wf[0:1000]", "bl", "bl_sig") proc.add_processor(np.subtract, "wf", "bl", "wf_blsub") proc.add_processor(pole_zero, "wf_blsub", 70 * us, "wf_pz") proc.add_processor(asymTrapFilter, "wf_pz", 10 * us, 5 * us, 10 * us, "wf_atrap") proc.add_processor(np.amax, "wf_atrap", 1, "atrapE", signature='(n),()->()', types=['fi->f']) # proc.add_processor(np.divide, "atrapmax", 10*us, "atrapE") proc.add_processor(trap_norm, "wf_pz", 10 * us, 5 * us, "wf_trap") proc.add_processor(np.amax, "wf_trap", 1, "trapE", signature='(n),()->()', types=['fi->f']) proc.add_processor(avg_current, "wf_pz", 10, "curr") proc.add_processor(np.amax, "curr", 1, "A_10", signature='(n),()->()', types=['fi->f']) proc.add_processor(np.divide, "A_10", "trapE", "AoE") proc.add_processor(trap_pickoff, "wf_pz", dcr_trap_int, dcr_trap_flat, dcr_trap_startSample, "dcr") # Set up the LH5 output lh5_out = lh5.Table(size=proc._buffer_len) lh5_out.add_field( "trapE", lh5.Array(proc.get_output_buffer("trapE"), attrs={"units": "ADC"})) lh5_out.add_field( "bl", lh5.Array(proc.get_output_buffer("bl"), attrs={"units": "ADC"})) lh5_out.add_field( "bl_sig", lh5.Array(proc.get_output_buffer("bl_sig"), attrs={"units": "ADC"})) lh5_out.add_field( "A", lh5.Array(proc.get_output_buffer("A_10"), attrs={"units": "ADC"})) lh5_out.add_field( "AoE", lh5.Array(proc.get_output_buffer("AoE"), attrs={"units": "ADC"})) lh5_out.add_field( "dcr", lh5.Array(proc.get_output_buffer("dcr"), attrs={"units": "ADC"})) print("Processing:\n", proc) proc.execute() print("Writing to: ", dsp_file) f_lh5.write_object(lh5_out, "data", dsp_file)
def build_processing_chain(lh5_in, dsp_config, outputs=None, verbosity=1, block_width=8): """ Produces a ProcessingChain object and an lh5 table for output parameters from an input lh5 table and a json recipe. Returns (proc_chain, lh5_out): - proc_chain: ProcessingChain object that is bound to lh5_in and lh5_out; all you need to do is handle file i/o for lh5_in/out and run execute - lh5_out: output LH5 table Required arguments: - lh5_in: input LH5 table - config: dict or name of json file containing a recipe for constructing the ProcessingChain object produced by this function. config is formated as a json dict with different processors. Config should have a dictionary called processors, containing dictionaries of the following format: Key: parameter name: name of parameter produced by the processor. can optionally provide multiple, separated by spaces Values: processor (req): name of gufunc module (req): name of module in which to find processor prereqs (req): name of parameters from other processors and from input that are required to exist to run this args (req): list of arguments for processor, with variables passed by name or value. Names should either be inputs from lh5_in, or parameter names for other processors. Names of the format db.name will look up the parameter in the metadata. kwargs (opt): kwargs used when adding processors to proc_chain init_args (opt): args used when initializing a processor that has static data (for factory functions) default (opt): default value for db parameters if not found unit (opt): unit to be used for attr in lh5 file. There may also be a list called 'outputs', containing a list of parameters to put into lh5_out. Optional keyword arguments: - outputs: list of parameters to put in the output lh5 table. If None, use the parameters in the 'outputs' list from config - verbosity: verbosity level: 0: Print nothing (except errors...) 1: Print basic warnings (default) 2: Print basic debug info 3: Print friggin' everything! - block_width: number of entries to process at once. """ if isinstance(dsp_config, str): with open(dsp_config) as f: dsp_config = json.load(f) elif dsp_config is None: dsp_config = {'outputs': [], 'processors': {}} else: # We don't want to modify the input! dsp_config = deepcopy(dsp_config) if outputs is None: outputs = dsp_config['outputs'] processors = dsp_config['processors'] # for processors with multiple outputs, add separate entries to the processor list for key in list(processors): keys = [k for k in re.split(",| ", key) if k != ''] if len(keys) > 1: for k in keys: processors[k] = key # Recursive function to crawl through the parameters/processors and get # a sequence of unique parameters such that parameters always appear after # their dependencies. For parameters that are not produced by the ProcChain # (i.e. input/db parameters), add them to the list of leafs # https://www.electricmonk.nl/docs/dependency_resolving_algorithm/dependency_resolving_algorithm.html def resolve_dependencies(par, resolved, leafs, unresolved=[]): if par in resolved: return elif par in unresolved: raise Exception('Circular references detected: %s -> %s' % (par, edge)) # if we don't find a node, this is a leaf node = processors.get(par) if node is None: if par not in leafs: leafs.append(par) return # if it's a string, that means it is part of a processor that returns multiple outputs (see above); in that case, node is a str pointing to the actual node we want if isinstance(node, str): resolve_dependencies(node, resolved, leafs, unresolved) return edges = node['prereqs'] unresolved.append(par) for edge in edges: resolve_dependencies(edge, resolved, leafs, unresolved) resolved.append(par) unresolved.remove(par) proc_par_list = [] # calculated from processors input_par_list = [] # input from file and used for processors copy_par_list = [] # copied from input to output out_par_list = [] for out_par in outputs: if out_par not in processors: copy_par_list.append(out_par) else: resolve_dependencies(out_par, proc_par_list, input_par_list) out_par_list.append(out_par) if verbosity > 0: print('Processing parameters:', str(proc_par_list)) print('Required input parameters:', str(input_par_list)) print('Copied output parameters:', str(copy_par_list)) print('Processed output parameters:', str(out_par_list)) proc_chain = ProcessingChain(block_width, lh5_in.size, verbosity=verbosity) # Now add all of the input buffers from lh5_in (and also the clk time) for input_par in input_par_list: buf_in = lh5_in.get(input_par) if buf_in is None: print("I don't know what to do with " + input_par + ". Building output without it!") elif isinstance(buf_in, lh5.Array): proc_chain.add_input_buffer(input_par, buf_in.nda) elif isinstance(buf_in, lh5.Table): # check if this is waveform if 't0' and 'dt' and 'values' in buf_in: proc_chain.add_input_buffer(input_par, buf_in['values'].nda, 'float32') clk = buf_in['dt'].nda[0] * unit_parser.parse_unit( lh5_in['waveform']['dt'].attrs['units']) if proc_chain._clk is not None and proc_chain._clk != clk: print( "Somehow you managed to set multiple clock frequencies...Using " + str(proc_chain._clk)) else: proc_chain._clk = clk # now add the processors for proc_par in proc_par_list: recipe = processors[proc_par] module = importlib.import_module(recipe['module']) func = getattr(module, recipe['function']) args = recipe['args'] for i, arg in enumerate(args): if isinstance(arg, str) and arg[0:3] == 'db.': #TODO: ADD METADATA LOOKUP! args[i] = recipe['defaults'][arg] kwargs = recipe.get('kwargs', {}) # might also need metadata lookup here # if init_args are defined, parse any strings and then call func # as a factory/constructor function try: init_args = recipe['init_args'] for i, arg in enumerate(init_args): if isinstance(arg, str): if arg[0:3] == 'db.': #TODO: ADD METADATA LOOKUP! init_args[i] = recipe['defaults'][arg] else: # see if string can be parsed by proc_chain try: init_args[i] = proc_chain.get_variable(arg) except: pass if (verbosity > 1): print("Building function", func.__name__, "from init_args", init_args) func = func(*init_args) except: pass proc_chain.add_processor(func, *args, **kwargs) # build the output buffers lh5_out = lh5.Table(size=proc_chain._buffer_len) # add inputs that are directly copied for copy_par in copy_par_list: buf_in = lh5_in.get(copy_par) if isinstance(buf_in, lh5.Array): lh5_out.add_field(copy_par, buf_in) elif isinstance(buf_in, lh5.Table): # check if this is waveform if 't0' and 'dt' and 'values' in buf_in: lh5_out.add_field(copy_par, buf_in['values']) clk = buf_in['dt'].nda[0] * unit_parser.parse_unit( lh5_in['waveform']['dt'].attrs['units']) if proc_chain._clk is not None and proc_chain._clk != clk: print( "Somehow you managed to set multiple clock frequencies...Using " + str(proc_chain._clk)) else: proc_chain._clk = clk else: print("I don't know what to do with " + input_par + ". Building output without it!") # finally, add the output buffers to lh5_out and the proc chain for out_par in out_par_list: recipe = processors[out_par] # special case for proc with multiple outputs if isinstance(recipe, str): i = [k for k in re.split(",| ", recipe) if k != ''].index(out_par) recipe = processors[recipe] unit = recipe['unit'][i] else: unit = recipe['unit'] try: scale = convert(1, unit_parser.parse_unit(unit), clk) except InvalidConversion: scale = None buf_out = proc_chain.get_output_buffer(out_par, unit=scale) lh5_out.add_field(out_par, lh5.Array(buf_out, attrs={"units": unit})) return (proc_chain, lh5_out)
def load_raw_data_example(f_raw): """ make a plot of the timestamps in a particular channel. instead of accessing just the timestamp column, this is an example of accessing the entire raw file (including waveforms) with LH5. """ sto = lh5.Store() tb_name = 'g024/raw' n_rows = 100 # np.inf to read all # method 1: call load_nda to pull out only timestamp column (fast) # par_data = lh5.load_nda([f_raw], ['timestamp'], tb_name) # pprint(par_data) # print(par_data['timestamp'].shape) # exit() # method 2: read all data, just to give a longer example of what we can access # TODO: include an example of slicing/selecting rows with np.where # read non-wf cols (lh5 Arrays) data_raw, n_tot = sto.read_object(tb_name, f_raw, n_rows=n_rows) # declare output table (must specify n_rows for size) tb_raw = lh5.Table(size=n_tot) for col in data_raw.keys(): if col in ['waveform', 'tracelist']: continue # copy all values newcol = lh5.Array(data_raw[col].nda, attrs=data_raw[col].attrs) # copy a selection (using np.where) # newcol = lh5.Array(data_raw[col].nda[idx], attrs=data_raw[col].attrs) tb_raw.add_field(col, newcol) df_raw = tb_raw.get_dataframe() print(df_raw) # load waveform column (nested LH5 Table) data_wfs, n_tot = sto.read_object(tb_name + '/waveform', f_raw, n_rows=n_rows) tb_wfs = lh5.Table(size=n_tot) for col in data_wfs.keys(): attrs = data_wfs[col].attrs if isinstance(data_wfs[col], lh5.ArrayOfEqualSizedArrays): # idk why i can't put the filtered array into the constructor aoesa = lh5.ArrayOfEqualSizedArrays(attrs=attrs, dims=[1, 1]) aoesa.nda = data_wfs[col].nda # aoesa.nda = data_wfs[col].nda[idx] # with np.where selection newcol = aoesa else: newcol = lh5.Array(data_wfs[col].nda, attrs=attrs) # newcol = lh5.Array(data_wfs[col].nda[idx], attrs=attrs) # selection tb_wfs.add_field(col, newcol) tb_wfs.add_field('waveform', newcol) tb_wfs.attrs = data_raw.attrs # can write to file, to read back in for DSP, etc. # sto.write_object(tb_raw, grp_data, f_peak) print(tb_wfs) print(tb_wfs['waveform'].shape)