Пример #1
0
def raw_to_dsp(ds, overwrite=False, nevt=None, test=False, verbose=2, block=8, group=''):
    """
    Run raw_to_dsp on a set of runs.
    [raw file] ---> [dsp_run{}.lh5] (digital signal processing results)
    """
    for run in ds.runs:
        raw_file = "/lfs/l1/legend/users/dandrea/pygama/pgt/tier1/pgt_longtrace_run0117-20200110-105115-calib_raw.lh5"
        dsp_file = "/lfs/l1/legend/users/dandrea/pygama/pgt/tier2/pgt_longtrace_run0117-20200110-105115-calib_dsp.lh5"
        #raw_file = ds.paths[run]["raw_path"]
        #dsp_file = ds.paths[run]["dsp_path"]
        print("raw_file: ",raw_file)
        print("dsp_file: ",dsp_file)
        if dsp_file is not None and overwrite is False:
            continue

        if dsp_file is None:
            # declare new file name
            dsp_file = raw_file.replace('raw_', 'dsp_')
            
        if test:
            print("test mode (dry run), processing raw file:", raw_file)
            continue
            
        print("Definition of new LH5 version")
        #f_lh5 = lh5.Store()
        #data = f_lh5.read_object("raw", raw_file)
        #wf_in = data['waveform']['values'].nda
        #dt = data['waveform']['dt'].nda[0] * unit_parser.parse_unit(data['waveform']['dt'].attrs['units'])
        
        lh5_in = lh5.Store()
        #groups = lh5_in.ls(raw_file, group)
        f = h5py.File(raw_file,'r')
        print("File info: ",f.keys())
        for group in f.keys():
            print("Processing: " + raw_file + '/' + group)
            #data = lh5_in.read_object(group, raw_file)
            data =  f[group]['raw']
            
            #wf_in = data['waveform']['values'].nda
            #dt = data['waveform']['dt'].nda[0] * unit_parser.parse_unit(data['waveform']['dt'].attrs['units'])
            wf_in = data['waveform']['values'][()]
            dt = data['waveform']['dt'][0] * unit_parser.parse_unit(data['waveform']['dt'].attrs['units'])
            
            # Parameters for DCR calculation
            dcr_trap_int = 200
            dcr_trap_flat = 1000
            dcr_trap_startSample = 1200
            
            # Set up processing chain
            proc = ProcessingChain(block_width=block, clock_unit=dt, verbosity=verbose)
            proc.add_input_buffer("wf", wf_in, dtype='float32')
            
            # Basic Filters
            proc.add_processor(mean_stdev, "wf[0:1000]", "bl", "bl_sig")
            proc.add_processor(np.subtract, "wf", "bl", "wf_blsub")
            proc.add_processor(pole_zero, "wf_blsub", 145*us, "wf_pz")
            proc.add_processor(trap_norm, "wf_pz", 10*us, 5*us, "wf_trap")
            proc.add_processor(asymTrapFilter, "wf_pz", 0.05*us, 2*us, 4*us, "wf_atrap")
            
            # Timepoint calculation
            proc.add_processor(np.argmax, "wf_blsub", 1, "t_max", signature='(n),()->()', types=['fi->i'])
            proc.add_processor(time_point_frac, "wf_blsub", 0.95, "t_max", "tp_95")
            proc.add_processor(time_point_frac, "wf_blsub", 0.8, "t_max", "tp_80")
            proc.add_processor(time_point_frac, "wf_blsub", 0.5, "t_max", "tp_50")
            proc.add_processor(time_point_frac, "wf_blsub", 0.2, "t_max", "tp_20")
            proc.add_processor(time_point_frac, "wf_blsub", 0.05, "t_max", "tp_05")
            proc.add_processor(time_point_thresh, "wf_atrap[0:2000]", 0, "tp_0")
            
            # Energy calculation
            proc.add_processor(np.amax, "wf_trap", 1, "trapEmax", signature='(n),()->()', types=['fi->f'])
            proc.add_processor(fixed_time_pickoff, "wf_trap", "tp_0+(5*us+9*us)", "trapEftp")
            proc.add_processor(trap_pickoff, "wf_pz", 1.5*us, 0, "tp_0", "ct_corr")
            
            # Current calculation
            proc.add_processor(avg_current, "wf_pz", 10, "curr(len(wf_pz)-10, f)")
            proc.add_processor(np.amax, "curr", 1, "curr_amp", signature='(n),()->()', types=['fi->f'])
            proc.add_processor(np.divide, "curr_amp", "trapEftp", "aoe")

            # DCR calculation: use slope using 1000 samples apart and averaging 200
            # samples, with the start 1.5 us offset from t0
            proc.add_processor(trap_pickoff, "wf_pz", 200, 1000, "tp_0+1.5*us", "dcr_unnorm")
            proc.add_processor(np.divide, "dcr_unnorm", "trapEftp", "dcr")
            
            # Tail slope. Basically the same as DCR, except with no PZ correction
            proc.add_processor(linear_fit, "wf_blsub[3000:]", "wf_b", "wf_m")
            proc.add_processor(np.divide, "-wf_b", "wf_m", "tail_rc")            
            
            #add zac filter energy calculation
            sigma = 10*us
            flat = 1*us
            decay = 160*us
            proc.add_processor(zac_filter, "wf", sigma, flat, decay, "wf_zac(101, f)")
            proc.add_processor(np.amax, "wf_zac", 1, "zacE", signature='(n),()->()', types=['fi->f'])
            
            # Set up the LH5 output
            lh5_out = lh5.Table(size=proc._buffer_len)
            lh5_out.add_field("zacE", lh5.Array(proc.get_output_buffer("zacE"), attrs={"units":"ADC"}))
            lh5_out.add_field("trapEmax", lh5.Array(proc.get_output_buffer("trapEmax"), attrs={"units":"ADC"}))
            lh5_out.add_field("trapEftp", lh5.Array(proc.get_output_buffer("trapEftp"), attrs={"units":"ADC"}))
            lh5_out.add_field("ct_corr", lh5.Array(proc.get_output_buffer("ct_corr"), attrs={"units":"ADC*ns"}))
            lh5_out.add_field("bl", lh5.Array(proc.get_output_buffer("bl"), attrs={"units":"ADC"}))
            lh5_out.add_field("bl_sig", lh5.Array(proc.get_output_buffer("bl_sig"), attrs={"units":"ADC"}))
            lh5_out.add_field("A", lh5.Array(proc.get_output_buffer("curr_amp"), attrs={"units":"ADC"}))
            lh5_out.add_field("AoE", lh5.Array(proc.get_output_buffer("aoe"), attrs={"units":"ADC"}))
            lh5_out.add_field("dcr", lh5.Array(proc.get_output_buffer("dcr"), attrs={"units":"ADC"}))
            
            lh5_out.add_field("tp_max", lh5.Array(proc.get_output_buffer("tp_95", unit=us), attrs={"units":"us"}))
            lh5_out.add_field("tp_95", lh5.Array(proc.get_output_buffer("tp_95", unit=us), attrs={"units":"us"}))
            lh5_out.add_field("tp_80", lh5.Array(proc.get_output_buffer("tp_80", unit=us), attrs={"units":"us"}))
            lh5_out.add_field("tp_50", lh5.Array(proc.get_output_buffer("tp_50", unit=us), attrs={"units":"us"}))
            lh5_out.add_field("tp_20", lh5.Array(proc.get_output_buffer("tp_20", unit=us), attrs={"units":"us"}))
            lh5_out.add_field("tp_05", lh5.Array(proc.get_output_buffer("tp_05", unit=us), attrs={"units":"us"}))
            lh5_out.add_field("tp_0", lh5.Array(proc.get_output_buffer("tp_0", unit=us), attrs={"units":"us"}))
            lh5_out.add_field("tail_rc", lh5.Array(proc.get_output_buffer("tail_rc", unit=us), attrs={"units":"us"}))
            
            print("Processing:\n",proc)
            proc.execute()
            
            #groupname = group[:group.rfind('/')+1]+"data"
            groupname = group+"/data"
            print("Writing to: " + dsp_file + "/" + groupname)
            lh5_in.write_object(lh5_out, groupname, dsp_file)
Пример #2
0
def optimize_trap(dg):
    """
    Generate a file with grid points to search, and events from the target peak.  
    Then run DSP a bunch of times on the small table, and fit the peak w/ the
    peakshape function.  
    NOTE: run table-to-table DSP (no file I/O)
    """
    f_peak = './temp_peak.lh5'  # lh5
    f_results = './temp_results.h5'  # pandas
    grp_data, grp_grid = '/optimize_data', '/optimize_grid'

    # epar, elo, ehi, epb = 'energy', 0, 1e7, 10000 # full range
    epar, elo, ehi, epb = 'energy', 3.88e6, 3.92e6, 500  # K40 peak

    show_movie = True
    write_output = True
    n_rows = None  # default None

    with open('opt_trap.json') as f:
        dsp_config = json.load(f, object_pairs_hook=OrderedDict)

    # files to consider.  fixme: right now only works with one file
    sto = lh5.Store()
    lh5_dir = os.path.expandvars(dg.config['lh5_dir'])
    raw_list = lh5_dir + dg.fileDB['raw_path'] + '/' + dg.fileDB['raw_file']
    f_raw = raw_list.values[0]
    tb_raw = 'ORSIS3302DecoderForEnergy/raw/'

    # quick check of the energy range
    # ene_raw = sto.read_object(tb_raw+'/'+epar, f_raw).nda
    # hist, bins, var = pgh.get_hist(ene_raw, range=(elo, ehi), dx=epb)
    # plt.plot(bins[1:], hist, ds='steps')
    # plt.show()
    # exit()

    # set grid parameters
    # TODO: jason's suggestions, knowing the expected shape of the noise curve
    # e_rises = np.linspace(-1, 0, sqrt(sqrt(3))
    # e_rises # make another list which is 10^pwr of this list
    # np.linspace(log_tau_min, log_tau_max) # try this too
    e_rises = np.arange(1, 12, 1)
    e_flats = np.arange(1, 6, 1)
    # rc_consts = np.arange(54, 154, 10) # changing this here messes up DCR

    # -- create the grid search file the first time --
    # NOTE: this makes a linear grid, and is editable by the arrays above.
    # jason also proposed a more active gradient-descent style search
    # like with Brent's method. (https://en.wikipedia.org/wiki/Brent%27s_method)

    if True:
        # if not os.path.exists(f_peak):
        print('Recreating grid search file')

        # create the grid file
        # NOTE: save it as an lh5 Table just as an example of writing/reading one
        lists = [e_rises, e_flats]  #, rc_consts]
        prod = list(itertools.product(*lists))  # clint <3 stackoverflow
        df_grid = pd.DataFrame(prod, columns=['rise', 'flat'])  #,'rc'])
        lh5_grid = {}
        for i, dfcol in df_grid.iteritems():
            lh5_grid[dfcol.name] = lh5.Array(dfcol.values)
        tb_grid = lh5.Table(col_dict=lh5_grid)
        sto.write_object(tb_grid, grp_grid, f_peak)

        # filter events by onboard energy
        ene_raw = sto.read_object(tb_raw + '/' + epar, f_raw).nda
        # hist, bins, var = pgh.get_hist(ene_raw, range=(elo, ehi), dx=epb)
        # plt.plot(bins[1:], hist, ds='steps')
        # plt.show()
        if n_rows is not None:
            ene_raw = ene_raw[:n_rows]
        idx = np.where((ene_raw > elo) & (ene_raw < ehi))

        # create a filtered table with correct waveform and attrs
        # TODO: move this into a function in lh5.py which takes idx as an input
        tb_data, wf_tb_data = lh5.Table(), lh5.Table()

        # read non-wf cols (lh5 Arrays)
        data_raw = sto.read_object(tb_raw, f_raw, n_rows=n_rows)
        for col in data_raw.keys():
            if col == 'waveform': continue
            newcol = lh5.Array(data_raw[col].nda[idx],
                               attrs=data_raw[col].attrs)
            tb_data.add_field(col, newcol)

        # handle waveform column (lh5 Table)
        data_wfs = sto.read_object(tb_raw + '/waveform', f_raw, n_rows=n_rows)
        for col in data_wfs.keys():
            attrs = data_wfs[col].attrs
            if isinstance(data_wfs[col], lh5.ArrayOfEqualSizedArrays):
                # idk why i can't put the filtered array into the constructor
                aoesa = lh5.ArrayOfEqualSizedArrays(attrs=attrs, dims=[1, 1])
                aoesa.nda = data_wfs[col].nda[idx]
                newcol = aoesa
            else:
                newcol = lh5.Array(data_wfs[col].nda[idx], attrs=attrs)
            wf_tb_data.add_field(col, newcol)
        tb_data.add_field('waveform', wf_tb_data)
        tb_data.attrs = data_raw.attrs
        sto.write_object(tb_data, grp_data, f_peak)

    else:
        print('Loading peak file. groups:', sto.ls(f_peak))
        tb_grid = sto.read_object(grp_grid, f_peak)
        tb_data = sto.read_object(grp_data, f_peak)  # filtered file
        # tb_data = sto.read_object(tb_raw, f_raw) # orig file
        df_grid = tb_grid.get_dataframe()

    # check shape of input table
    print('input table attributes:')
    for key in tb_data.keys():
        obj = tb_data[key]
        if isinstance(obj, lh5.Table):
            for key2 in obj.keys():
                obj2 = obj[key2]
                print('  ', key, key2, obj2.nda.shape, obj2.attrs)
        else:
            print('  ', key, obj.nda.shape, obj.attrs)

    # clear new colums if they exist
    new_cols = ['e_fit', 'fwhm_fit', 'rchisq', 'xF_err', 'fwhm_ovr_mean']
    for col in new_cols:
        if col in df_grid.columns:
            df_grid.drop(col, axis=1, inplace=True)

    t_start = time.time()

    def run_dsp(dfrow):
        """
        run dsp on the test file, editing the processor list
        alternate idea: generate a long list of processors with different names
        """
        # adjust dsp config dictionary
        rise, flat = dfrow
        # dsp_config['processors']['wf_pz']['defaults']['db.pz.tau'] = f'{tau}*us'
        dsp_config['processors']['wf_trap']['args'][1] = f'{rise}*us'
        dsp_config['processors']['wf_trap']['args'][2] = f'{flat}*us'
        # pprint(dsp_config)

        # run dsp
        pc, tb_out = build_processing_chain(tb_data, dsp_config, verbosity=0)
        pc.execute()

        # analyze peak
        e_peak = 1460.
        etype = 'trapEmax'
        elo, ehi, epb = 4000, 4500, 3  # the peak moves around a bunch
        energy = tb_out[etype].nda

        # get histogram
        hE, bins, vE = pgh.get_hist(energy, range=(elo, ehi), dx=epb)
        xE = bins[1:]

        # should I center the max at 1460?

        # simple numerical width
        i_max = np.argmax(hE)
        h_max = hE[i_max]
        upr_half = xE[(xE > xE[i_max]) & (hE <= h_max / 2)][0]
        bot_half = xE[(xE < xE[i_max]) & (hE >= h_max / 2)][0]
        fwhm = upr_half - bot_half
        sig = fwhm / 2.355

        # fit to gaussian: amp, mu, sig, bkg
        fit_func = pgf.gauss_bkg
        amp = h_max * fwhm
        bg0 = np.mean(hE[:20])
        x0 = [amp, xE[i_max], sig, bg0]
        xF, xF_cov = pgf.fit_hist(fit_func, hE, bins, var=vE, guess=x0)

        # collect results
        e_fit = xF[0]
        xF_err = np.sqrt(np.diag(xF_cov))
        e_err = xF
        fwhm_fit = xF[1] * 2.355 * 1460. / e_fit

        fwhm_err = xF_err[2] * 2.355 * 1460. / e_fit

        chisq = []
        for i, h in enumerate(hE):
            model = fit_func(xE[i], *xF)
            diff = (model - h)**2 / model
            chisq.append(abs(diff))
        rchisq = sum(np.array(chisq) / len(hE))
        fwhm_ovr_mean = fwhm_fit / e_fit

        if show_movie:

            plt.plot(xE,
                     hE,
                     ds='steps',
                     c='b',
                     lw=2,
                     label=f'{etype} {rise}--{flat}')

            # peak shape
            plt.plot(xE,
                     fit_func(xE, *x0),
                     '-',
                     c='orange',
                     alpha=0.5,
                     label='init. guess')
            plt.plot(xE,
                     fit_func(xE, *xF),
                     '-r',
                     alpha=0.8,
                     label='peakshape fit')
            plt.plot(np.nan,
                     np.nan,
                     '-w',
                     label=f'mu={e_fit:.1f}, fwhm={fwhm_fit:.2f}')

            plt.xlabel(etype, ha='right', x=1)
            plt.ylabel('Counts', ha='right', y=1)
            plt.legend(loc=2)

            # show a little movie
            plt.show(block=False)
            plt.pause(0.01)
            plt.cla()

        # return results
        return pd.Series({
            'e_fit': e_fit,
            'fwhm_fit': fwhm_fit,
            'rchisq': rchisq,
            'fwhm_err': xF_err[0],
            'fwhm_ovr_mean': fwhm_ovr_mean
        })

    # df_grid=df_grid[:10]
    df_tmp = df_grid.progress_apply(run_dsp, axis=1)
    df_grid[new_cols] = df_tmp
    # print(df_grid)

    if show_movie:
        plt.close()

    print('elapsed:', time.time() - t_start)
    if write_output:
        df_grid.to_hdf(f_results, key=grp_grid)
        print(f"Wrote output file: {f_results}")
Пример #3
0
                       signature='(n),()->()',
                       types=['fi->f'])
    proc.add_processor(np.divide, "curr_amp", "trapEftp", "aoe")

    # DCR calculation: use slope using 1000 samples apart and averaging 200
    # samples, with the start 1.5 us offset from t0
    proc.add_processor(trap_pickoff, "wf_pz", 200, 1000, "tp_0+1.5*us",
                       "dcr_unnorm")
    proc.add_processor(np.divide, "dcr_unnorm", "trapEftp", "dcr")

    # Tail slope. Basically the same as DCR, except with no PZ correction
    proc.add_processor(linear_fit, "wf_blsub[3000:]", "wf_b", "wf_m")
    proc.add_processor(np.divide, "-wf_b", "wf_m", "tail_rc")

    # Set up the LH5 output
    lh5_out = lh5.Table(size=proc._buffer_len)
    lh5_out.add_field(
        "trapEmax",
        lh5.Array(proc.get_output_buffer("trapEmax"), attrs={"units": "ADC"}))
    lh5_out.add_field(
        "trapEftp",
        lh5.Array(proc.get_output_buffer("trapEftp"), attrs={"units": "ADC"}))
    lh5_out.add_field(
        "ct_corr",
        lh5.Array(proc.get_output_buffer("ct_corr"), attrs={"units":
                                                            "ADC*ns"}))
    lh5_out.add_field(
        "bl", lh5.Array(proc.get_output_buffer("bl"), attrs={"units": "ADC"}))
    lh5_out.add_field(
        "bl_sig",
        lh5.Array(proc.get_output_buffer("bl_sig"), attrs={"units": "ADC"}))
Пример #4
0
def process_ds(f_grid, f_opt, f_tier1, d_out, efilter):
    """
    process the windowed raw file 'f_tier1' and create the DSP file 'f_opt'
    """
    print("Grid file:",f_grid)
    df_grid = pd.read_hdf(f_grid)
    
    if os.path.exists(f_opt):
        os.remove(f_opt)

    if 'corr' in efilter:
        bfilter = efilter.split('corr')[0]
        try:
            df_res = pd.read_hdf(f'{d_out}/{bfilter}_results.h5',key='results')
            print("Extraction of best parameters for", bfilter)
        except:
            print(bfilter,"not optimized")
            return
    
    # open raw file
    lh5_in = lh5.Store()
    #groups = lh5_in.ls(f_tier1, '*/raw')
    f = h5py.File(f_tier1,'r')
    #print("File info: ",f.keys())
    
    t_start = time.time()
    #for group in groups:
    for idx, ged in enumerate(f.keys()):
        if idx == 4:
            diff = time.time() - t_start
            tot = diff/5 * len(df_grid) / 60
            tot -= diff / 60
            print(f"Estimated remaining time: {tot:.2f} mins")
        
        print("Detector:",ged)
        #data = lh5_in.read_object(group, f_tier1)
        data =  f[ged]['raw']
        
        #wf_in = data['waveform']['values'].nda
        #dt = data['waveform']['dt'].nda[0] * unit_parser.parse_unit(data['waveform']['dt'].attrs['units'])
        wf_in = data['waveform']['values'][()]
        dt = data['waveform']['dt'][0] * unit_parser.parse_unit(data['waveform']['dt'].attrs['units'])
        bl_in = data['baseline'][()] #flashcam baseline values
        
        # Set up DSP processing chain -- very minimal
        block = 8 #waveforms to process simultaneously
        proc = ProcessingChain(block_width=block, clock_unit=dt, verbosity=False)
        proc.add_input_buffer("wf", wf_in, dtype='float32')
        proc.add_input_buffer("bl", bl_in, dtype='float32')
        
        wsize = wf_in.shape[1]
        dt0 = data['waveform']['dt'][0]*0.001
        
        #proc.add_processor(mean_stdev, "wf[0:1000]", "bl", "bl_sig")
        proc.add_processor(np.subtract, "wf", "bl", "wf_blsub")
        for i, row in df_grid.iterrows():
            if 'corr' in efilter: ct_const = row
            if 'trapE' in efilter:
                if 'corr' in efilter: rise, flat, rc = float(df_res['rise'][idx]), float(df_res['flat'][idx]), float(df_res['rc'][idx])
                else: rise, flat, rc = row
                proc.add_processor(pole_zero, "wf_blsub", rc*us, "wf_pz")
                proc.add_processor(trap_norm, "wf_pz", rise*us, flat*us, f"wf_trap_{i}")
                proc.add_processor(asymTrapFilter, "wf_pz", 0.05*us, 4*us, 4*us, "wf_atrap")
                proc.add_processor(time_point_thresh, "wf_pz", 0, "tp_0")
                proc.add_processor(np.amax, f"wf_trap_{i}", 1, f"trapE_{i}", signature='(n),()->()', types=['fi->f'])
                proc.add_processor(fixed_time_pickoff, f"wf_trap_{i}", f"tp_0+({rise*us}+{flat*us})", f"trapEftp_{i}")
            if 'zacE' in efilter:
                if 'corr' in efilter: sigma, flat, decay = float(df_res['sigma'][idx]), float(df_res['flat'][idx]), float(df_res['decay'][idx])
                else: sigma, flat, decay = row
                proc.add_processor(zac_filter(wsize, sigma/dt0, flat/dt0, decay/dt0),"wf", f"wf_zac_{i}(101, f)")
                proc.add_processor(np.amax, f"wf_zac_{i}", 1, f"zacE_{i}", signature='(n),()->()', types=['fi->f'])
            if 'cuspE' in efilter:
                if 'corr' in efilter: sigma, flat, decay = float(df_res['sigma'][idx]), float(df_res['flat'][idx]), float(df_res['decay'][idx])
                else: sigma, flat, decay = row
                proc.add_processor(cusp_filter(wsize, sigma/dt0, flat/dt0, decay/dt0),"wf_blsub", f"wf_cusp_{i}(101, f)")
                proc.add_processor(np.amax, f"wf_cusp_{i}", 1, f"cuspE_{i}", signature='(n),()->()', types=['fi->f'])
            if 'corr' in efilter:
                proc.add_processor(trap_pickoff, "wf_pz", 1.5*us, 0, "tp_0", "ct_corr")
                #proc.add_processor(trap_pickoff, "wf_pz", rise*us, flat*us, "tp_0", "ct_corr")
                proc.add_processor(np.multiply, ct_const, "ct_corr", f"ct_corr_cal_{i}")
                proc.add_processor(np.add, f"ct_corr_cal_{i}", f"{bfilter}_{i}", f"{efilter}_{i}")
                
        # Set up the LH5 output
        lh5_out = lh5.Table(size=proc._buffer_len)
        for i, row in df_grid.iterrows():
            lh5_out.add_field(f"{efilter}_{i}", lh5.Array(proc.get_output_buffer(f"{efilter}_{i}"), attrs={"units":"ADC"}))
        
        print("Processing:\n",proc)
        proc.execute()
        
        #groupname = group[:group.rfind('/')+1]+"data"
        #groupname = df_key+"/"+group+"/data"
        groupname = ged+"/data"
        print("Writing to: " + f_opt + "/" + groupname)
        lh5_in.write_object(lh5_out, groupname, f_opt)
        print("")
    
    #list the datasets of the output file
    data_opt = lh5_in.ls(f_opt)
    #data_opt_0 = lh5_in.ls(f_opt,'opt_0/*')
    data_opt_0 = lh5_in.ls(f_opt,'g024/data/*')
    diff = time.time() - t_start
    print(f"Time to process: {diff:.2f} s")
Пример #5
0
def dsp_to_hit_cage(f_dsp, f_hit, dg, n_max=None, verbose=False, t_start=None):
    """
    non-general placeholder for creating a pygama 'hit' file.  uses pandas.
    for every file, apply:
    - energy calibration (peakfit results)
    - timestamp correction
    for a more general dsp_to_hit, maybe each function could be given in terms
    of an 'apply' on a dsp dataframe ...
    
    TODO: create entry config['rawe'] with list of energy pars to calibrate, as 
    in energy_cal.py
    """
    rawe = ['trapEmax']

    # create initial 'hit' DataFrame from dsp data
    hit_store = lh5.Store()
    data = hit_store.read_object(dg.config['input_table'], f_dsp)
    df_hit = data.get_dataframe()

    # 1. get energy calibration for this run from peakfit
    cal_db = db.TinyDB(storage=MemoryStorage)
    with open(dg.config['ecaldb']) as f:
        raw_db = json.load(f)
        cal_db.storage.write(raw_db)
    runs = dg.file_keys.run.unique()
    if len(runs) > 1:
        print("sorry, I can't do combined runs yet")
        exit()
    run = runs[0]
    for etype in rawe:
        tb = cal_db.table(f'peakfit_{etype}').all()
        df_cal = pd.DataFrame(tb)
        df_cal['run'] = df_cal['run'].astype(int)
        df_run = df_cal.loc[df_cal.run == run]
        cal_pars = df_run.iloc[0][['cal0', 'cal1', 'cal2']]
        pol = np.poly1d(cal_pars)  # handy numpy polynomial object
        df_hit[f'{etype}_cal'] = pol(df_hit[f'{etype}'])

    # 2. compute timestamp rollover correction (specific to struck 3302)
    clock = 100e6  # 100 MHz
    UINT_MAX = 4294967295  # (0xffffffff)
    t_max = UINT_MAX / clock
    ts = df_hit['timestamp'].values / clock
    tdiff = np.diff(ts)
    tdiff = np.insert(tdiff, 0, 0)
    iwrap = np.where(tdiff < 0)
    iloop = np.append(iwrap[0], len(ts))
    ts_new, t_roll = [], 0
    for i, idx in enumerate(iloop):
        ilo = 0 if i == 0 else iwrap[0][i - 1]
        ihi = idx
        ts_block = ts[ilo:ihi]
        t_last = ts[ilo - 1]
        t_diff = t_max - t_last
        ts_new.append(ts_block + t_roll)
        t_roll += t_last + t_diff
    df_hit['ts_sec'] = np.concatenate(ts_new)

    # 3. compute global timestamp
    if t_start is not None:
        df_hit['ts_glo'] = df_hit['ts_sec'] + t_start

    # write to LH5 file
    if os.path.exists(f_hit):
        os.remove(f_hit)
    sto = lh5.Store()
    tb_name = dg.config['input_table'].replace('dsp', 'hit')
    tb_lh5 = lh5.Table(size=len(df_hit))

    for col in df_hit.columns:
        tb_lh5.add_field(col, lh5.Array(df_hit[col].values,
                                        attrs={'units': ''}))
        print(col)

    print(f'Writing table: {tb_name} in file:\n   {f_hit}')
    sto.write_object(tb_lh5, tb_name, f_hit)
Пример #6
0
def raw_to_dsp(ds,
               overwrite=False,
               nevt=None,
               test=False,
               verbose=2,
               block=8,
               group='daqdata'):
    """
    Run raw_to_dsp on a set of runs.
    [raw file] ---> [dsp_run{}.lh5] (digital signal processing results)
    """
    for run in ds.runs:
        raw_file = ds.paths[run]["raw_path"]
        dsp_file = ds.paths[run]["dsp_path"]

        if dsp_file is not None and overwrite is False:
            continue

        if dsp_file is None:
            # declare new file name
            dsp_file = raw_file.replace('raw', 'dsp')

        if test:
            print("test mode (dry run), processing raw file:", raw_file)
            continue

        # new LH5 version

        lh5_in = lh5.Store()
        data = lh5_in.read_object("/ORSIS3302DecoderForEnergy", raw_file)

        wf_in = data['waveform']['values'].nda
        dt = data['waveform']['dt'].nda[0] * unit_parser.parse_unit(
            data['waveform']['dt'].attrs['units'])

        # Parameters for DCR calculation
        dcr_trap_int = 200
        dcr_trap_flat = 1000
        dcr_trap_startSample = 1200

        # Set up processing chain
        proc = ProcessingChain(block_width=block,
                               clock_unit=dt,
                               verbosity=verbose)
        proc.add_input_buffer("wf", wf_in, dtype='float32')

        proc.add_processor(mean_stdev, "wf[0:1000]", "bl", "bl_sig")
        proc.add_processor(np.subtract, "wf", "bl", "wf_blsub")
        proc.add_processor(pole_zero, "wf_blsub", 70 * us, "wf_pz")

        proc.add_processor(asymTrapFilter, "wf_pz", 10 * us, 5 * us, 10 * us,
                           "wf_atrap")
        proc.add_processor(np.amax,
                           "wf_atrap",
                           1,
                           "atrapE",
                           signature='(n),()->()',
                           types=['fi->f'])

        # proc.add_processor(np.divide, "atrapmax", 10*us, "atrapE")

        proc.add_processor(trap_norm, "wf_pz", 10 * us, 5 * us, "wf_trap")
        proc.add_processor(np.amax,
                           "wf_trap",
                           1,
                           "trapE",
                           signature='(n),()->()',
                           types=['fi->f'])
        proc.add_processor(avg_current, "wf_pz", 10, "curr")
        proc.add_processor(np.amax,
                           "curr",
                           1,
                           "A_10",
                           signature='(n),()->()',
                           types=['fi->f'])
        proc.add_processor(np.divide, "A_10", "trapE", "AoE")
        proc.add_processor(trap_pickoff, "wf_pz", dcr_trap_int, dcr_trap_flat,
                           dcr_trap_startSample, "dcr")

        # Set up the LH5 output
        lh5_out = lh5.Table(size=proc._buffer_len)
        lh5_out.add_field(
            "trapE",
            lh5.Array(proc.get_output_buffer("trapE"), attrs={"units": "ADC"}))

        lh5_out.add_field(
            "bl",
            lh5.Array(proc.get_output_buffer("bl"), attrs={"units": "ADC"}))
        lh5_out.add_field(
            "bl_sig",
            lh5.Array(proc.get_output_buffer("bl_sig"), attrs={"units":
                                                               "ADC"}))
        lh5_out.add_field(
            "A",
            lh5.Array(proc.get_output_buffer("A_10"), attrs={"units": "ADC"}))
        lh5_out.add_field(
            "AoE",
            lh5.Array(proc.get_output_buffer("AoE"), attrs={"units": "ADC"}))
        lh5_out.add_field(
            "dcr",
            lh5.Array(proc.get_output_buffer("dcr"), attrs={"units": "ADC"}))

        print("Processing:\n", proc)
        proc.execute()

        print("Writing to: ", dsp_file)
        f_lh5.write_object(lh5_out, "data", dsp_file)
Пример #7
0
def build_processing_chain(lh5_in,
                           dsp_config,
                           outputs=None,
                           verbosity=1,
                           block_width=8):
    """
    Produces a ProcessingChain object and an lh5 table for output parameters
    from an input lh5 table and a json recipe.
    
    Returns (proc_chain, lh5_out):
    - proc_chain: ProcessingChain object that is bound to lh5_in and lh5_out;
      all you need to do is handle file i/o for lh5_in/out and run execute
    - lh5_out: output LH5 table
    
    Required arguments:
    - lh5_in: input LH5 table
    - config: dict or name of json file containing a recipe for
      constructing the ProcessingChain object produced by this function.
      config is formated as a json dict with different processors. Config
      should have a dictionary called processors, containing dictionaries
      of the following format:
        Key: parameter name: name of parameter produced by the processor.
             can optionally provide multiple, separated by spaces
        Values:
          processor (req): name of gufunc
          module (req): name of module in which to find processor
          prereqs (req): name of parameters from other processors and from 
            input that are required to exist to run this
          args (req): list of arguments for processor, with variables passed
            by name or value. Names should either be inputs from lh5_in, or
            parameter names for other processors. Names of the format db.name
            will look up the parameter in the metadata. 
          kwargs (opt): kwargs used when adding processors to proc_chain
          init_args (opt): args used when initializing a processor that has
            static data (for factory functions)
          default (opt): default value for db parameters if not found
          unit (opt): unit to be used for attr in lh5 file.
      There may also be a list called 'outputs', containing a list of parameters
      to put into lh5_out.
    
    Optional keyword arguments:
    - outputs: list of parameters to put in the output lh5 table. If None,
      use the parameters in the 'outputs' list from config
    - verbosity: verbosity level:
            0: Print nothing (except errors...)
            1: Print basic warnings (default)
            2: Print basic debug info
            3: Print friggin' everything!    
    - block_width: number of entries to process at once.
    """

    if isinstance(dsp_config, str):
        with open(dsp_config) as f:
            dsp_config = json.load(f)
    elif dsp_config is None:
        dsp_config = {'outputs': [], 'processors': {}}
    else:
        # We don't want to modify the input!
        dsp_config = deepcopy(dsp_config)

    if outputs is None:
        outputs = dsp_config['outputs']

    processors = dsp_config['processors']

    # for processors with multiple outputs, add separate entries to the processor list
    for key in list(processors):
        keys = [k for k in re.split(",| ", key) if k != '']
        if len(keys) > 1:
            for k in keys:
                processors[k] = key

    # Recursive function to crawl through the parameters/processors and get
    # a sequence of unique parameters such that parameters always appear after
    # their dependencies. For parameters that are not produced by the ProcChain
    # (i.e. input/db parameters), add them to the list of leafs
    # https://www.electricmonk.nl/docs/dependency_resolving_algorithm/dependency_resolving_algorithm.html
    def resolve_dependencies(par, resolved, leafs, unresolved=[]):
        if par in resolved:
            return
        elif par in unresolved:
            raise Exception('Circular references detected: %s -> %s' %
                            (par, edge))

        # if we don't find a node, this is a leaf
        node = processors.get(par)
        if node is None:
            if par not in leafs:
                leafs.append(par)
            return

        # if it's a string, that means it is part of a processor that returns multiple outputs (see above); in that case, node is a str pointing to the actual node we want
        if isinstance(node, str):
            resolve_dependencies(node, resolved, leafs, unresolved)
            return

        edges = node['prereqs']
        unresolved.append(par)
        for edge in edges:
            resolve_dependencies(edge, resolved, leafs, unresolved)
        resolved.append(par)
        unresolved.remove(par)

    proc_par_list = []  # calculated from processors
    input_par_list = []  # input from file and used for processors
    copy_par_list = []  # copied from input to output
    out_par_list = []
    for out_par in outputs:
        if out_par not in processors:
            copy_par_list.append(out_par)
        else:
            resolve_dependencies(out_par, proc_par_list, input_par_list)
            out_par_list.append(out_par)

    if verbosity > 0:
        print('Processing parameters:', str(proc_par_list))
        print('Required input parameters:', str(input_par_list))
        print('Copied output parameters:', str(copy_par_list))
        print('Processed output parameters:', str(out_par_list))

    proc_chain = ProcessingChain(block_width, lh5_in.size, verbosity=verbosity)

    # Now add all of the input buffers from lh5_in (and also the clk time)
    for input_par in input_par_list:
        buf_in = lh5_in.get(input_par)
        if buf_in is None:
            print("I don't know what to do with " + input_par +
                  ". Building output without it!")
        elif isinstance(buf_in, lh5.Array):
            proc_chain.add_input_buffer(input_par, buf_in.nda)
        elif isinstance(buf_in, lh5.Table):
            # check if this is waveform
            if 't0' and 'dt' and 'values' in buf_in:
                proc_chain.add_input_buffer(input_par, buf_in['values'].nda,
                                            'float32')
                clk = buf_in['dt'].nda[0] * unit_parser.parse_unit(
                    lh5_in['waveform']['dt'].attrs['units'])
                if proc_chain._clk is not None and proc_chain._clk != clk:
                    print(
                        "Somehow you managed to set multiple clock frequencies...Using "
                        + str(proc_chain._clk))
                else:
                    proc_chain._clk = clk

    # now add the processors
    for proc_par in proc_par_list:
        recipe = processors[proc_par]
        module = importlib.import_module(recipe['module'])
        func = getattr(module, recipe['function'])
        args = recipe['args']
        for i, arg in enumerate(args):
            if isinstance(arg, str) and arg[0:3] == 'db.':
                #TODO: ADD METADATA LOOKUP!
                args[i] = recipe['defaults'][arg]

        kwargs = recipe.get('kwargs',
                            {})  # might also need metadata lookup here
        # if init_args are defined, parse any strings and then call func
        # as a factory/constructor function
        try:
            init_args = recipe['init_args']
            for i, arg in enumerate(init_args):
                if isinstance(arg, str):
                    if arg[0:3] == 'db.':
                        #TODO: ADD METADATA LOOKUP!
                        init_args[i] = recipe['defaults'][arg]
                    else:
                        # see if string can be parsed by proc_chain
                        try:
                            init_args[i] = proc_chain.get_variable(arg)
                        except:
                            pass
            if (verbosity > 1):
                print("Building function", func.__name__, "from init_args",
                      init_args)
            func = func(*init_args)
        except:
            pass
        proc_chain.add_processor(func, *args, **kwargs)

    # build the output buffers
    lh5_out = lh5.Table(size=proc_chain._buffer_len)

    # add inputs that are directly copied
    for copy_par in copy_par_list:
        buf_in = lh5_in.get(copy_par)
        if isinstance(buf_in, lh5.Array):
            lh5_out.add_field(copy_par, buf_in)
        elif isinstance(buf_in, lh5.Table):
            # check if this is waveform
            if 't0' and 'dt' and 'values' in buf_in:
                lh5_out.add_field(copy_par, buf_in['values'])
                clk = buf_in['dt'].nda[0] * unit_parser.parse_unit(
                    lh5_in['waveform']['dt'].attrs['units'])
                if proc_chain._clk is not None and proc_chain._clk != clk:
                    print(
                        "Somehow you managed to set multiple clock frequencies...Using "
                        + str(proc_chain._clk))
                else:
                    proc_chain._clk = clk
        else:
            print("I don't know what to do with " + input_par +
                  ". Building output without it!")

    # finally, add the output buffers to lh5_out and the proc chain
    for out_par in out_par_list:
        recipe = processors[out_par]
        # special case for proc with multiple outputs
        if isinstance(recipe, str):
            i = [k for k in re.split(",| ", recipe) if k != ''].index(out_par)
            recipe = processors[recipe]
            unit = recipe['unit'][i]
        else:
            unit = recipe['unit']

        try:
            scale = convert(1, unit_parser.parse_unit(unit), clk)
        except InvalidConversion:
            scale = None

        buf_out = proc_chain.get_output_buffer(out_par, unit=scale)
        lh5_out.add_field(out_par, lh5.Array(buf_out, attrs={"units": unit}))
    return (proc_chain, lh5_out)
Пример #8
0
def load_raw_data_example(f_raw):
    """
    make a plot of the timestamps in a particular channel.
    instead of accessing just the timestamp column, this is an example
    of accessing the entire raw file (including waveforms) with LH5.
    """
    sto = lh5.Store()

    tb_name = 'g024/raw'

    n_rows = 100  # np.inf to read all

    # method 1: call load_nda to pull out only timestamp column (fast)
    # par_data = lh5.load_nda([f_raw], ['timestamp'], tb_name)
    # pprint(par_data)
    # print(par_data['timestamp'].shape)
    # exit()

    # method 2: read all data, just to give a longer example of what we can access
    # TODO: include an example of slicing/selecting rows with np.where

    # read non-wf cols (lh5 Arrays)
    data_raw, n_tot = sto.read_object(tb_name, f_raw, n_rows=n_rows)

    # declare output table (must specify n_rows for size)
    tb_raw = lh5.Table(size=n_tot)

    for col in data_raw.keys():
        if col in ['waveform', 'tracelist']: continue
        # copy all values
        newcol = lh5.Array(data_raw[col].nda, attrs=data_raw[col].attrs)
        # copy a selection (using np.where)
        # newcol = lh5.Array(data_raw[col].nda[idx], attrs=data_raw[col].attrs)
        tb_raw.add_field(col, newcol)

    df_raw = tb_raw.get_dataframe()
    print(df_raw)

    # load waveform column (nested LH5 Table)
    data_wfs, n_tot = sto.read_object(tb_name + '/waveform',
                                      f_raw,
                                      n_rows=n_rows)
    tb_wfs = lh5.Table(size=n_tot)

    for col in data_wfs.keys():
        attrs = data_wfs[col].attrs
        if isinstance(data_wfs[col], lh5.ArrayOfEqualSizedArrays):
            # idk why i can't put the filtered array into the constructor
            aoesa = lh5.ArrayOfEqualSizedArrays(attrs=attrs, dims=[1, 1])
            aoesa.nda = data_wfs[col].nda
            # aoesa.nda = data_wfs[col].nda[idx] # with np.where selection
            newcol = aoesa
        else:
            newcol = lh5.Array(data_wfs[col].nda, attrs=attrs)
            # newcol = lh5.Array(data_wfs[col].nda[idx], attrs=attrs) # selection
        tb_wfs.add_field(col, newcol)

    tb_wfs.add_field('waveform', newcol)
    tb_wfs.attrs = data_raw.attrs

    # can write to file, to read back in for DSP, etc.
    # sto.write_object(tb_raw, grp_data, f_peak)

    print(tb_wfs)
    print(tb_wfs['waveform'].shape)