def test_tight_coincidence(hits, channel): hits['area'] = 1 hits['channel'] = channel[:len(hits)] # In case there are less channel then hits (unlikely) gap_threshold = 10 peaks = strax.find_peaks(hits, adc_to_pe=np.ones(10), right_extension=0, left_extension=0, gap_threshold=gap_threshold, min_channels=1, min_area=0) peaks_max_time = peaks['time'] + peaks['length']//2 hits_max_time = hits['time'] + hits['length']//2 left = 5 right = 5 tight_coin_channel = get_tight_coin(hits_max_time, hits['channel'], peaks_max_time, left, right, ) for ind, p_max_t in enumerate(peaks_max_time): m_hits_in_peak = (hits_max_time >= (p_max_t - left)) m_hits_in_peak &= (hits_max_time <= (p_max_t + right)) n_channel = len(np.unique(hits[m_hits_in_peak]['channel'])) assert n_channel == tight_coin_channel[ind], f'Wrong number of tight channel got {tight_coin_channel[ind]}, but expectd {n_channel}' # noqa
def find_peak_groups(peaks, gap_threshold, left_extension=0, right_extension=0, max_duration=int(1e9)): """Return boundaries of groups of peaks separated by gap_threshold, extended left and right. :param peaks: Peaks to group :param gap_threshold: Minimum gap between peaks :param left_extension: Extend groups by this many ns left :param right_extension: " " right :param max_duration: Maximum group duration. See strax.find_peaks for what happens if this is exceeded :return: time, endtime arrays of group boundaries """ # Mock up a "hits" array so we can just use the existing peakfinder # It doesn't work on raw peaks, since they might have different dts # TODO: is there no cleaner way? fake_hits = np.zeros(len(peaks), dtype=strax.hit_dtype) fake_hits['dt'] = 1 fake_hits['time'] = peaks['time'] # TODO: could this cause int overrun nonsense anywhere? fake_hits['length'] = peaks['endtime'] - peaks['time'] fake_peaks = strax.find_peaks( fake_hits, to_pe=np.zeros(1), gap_threshold=gap_threshold, left_extension=left_extension, right_extension=right_extension, min_hits=1, min_area=0, max_duration=max_duration) return fake_peaks['time'], strax.endtime(fake_peaks)
def test_find_peaks(hits, min_channels, min_area): hits['area'] = 1 gap_threshold = 10 peaks = strax.find_peaks(hits, adc_to_pe=np.ones(1), right_extension=0, left_extension=0, gap_threshold=gap_threshold, min_channels=min_channels, min_area=min_area) # Check sanity assert np.all(peaks['length'] > 0) assert np.all(peaks['n_hits'] > 0) # Check if requirements satisfied if min_area != 0: assert np.all(peaks['area'] >= min_area) if min_channels != 1: assert np.all(peaks['n_hits'] >= min_channels) assert np.all(peaks['max_gap'] < gap_threshold) # Without requirements, all hits must occur in a peak if min_area == 0 and min_channels == 1: assert np.sum(peaks['n_hits']) == len(hits) assert np.all(strax.fully_contained_in(hits, peaks) > -1) # Since no extensions, peaks must be at least gap_threshold apart starts = peaks['time'] ends = peaks['time'] + peaks['length'] * peaks['dt'] assert np.all(ends[:-1] + gap_threshold <= starts[1:]) assert np.all(starts == np.sort(starts)), "Not sorted" assert np.all(peaks['time'] < strax.endtime(peaks)), "Non+ peak length"
def compute(self, records): r = records hits = strax.find_hits(r, threshold=self.config['hit_threshold']) hits = strax.sort_by_time(hits) peaks = strax.find_peaks( hits, self.config['to_pe'], result_dtype=self.dtype, gap_threshold=self.config['peak_gap_threshold'], left_extension=self.config['peak_left_extension'], right_extension=self.config['peak_right_extension'], min_channels=self.config['peak_min_chan'], min_area=self.config['peak_min_area'], max_duration=self.config['peak_max_duration']) strax.sum_waveform(peaks, r, adc_to_pe=self.config['to_pe']) peaks = peaks[peaks['dt'] > 0] # removes strange edge case peaks = strax.split_peaks(peaks, r, self.config['to_pe'], min_height=self.config['split_min_height'], min_ratio=self.config['split_min_ratio']) strax.compute_widths(peaks) return peaks
def compute(self, records): r = records hits = strax.find_hits(r) # TODO: Duplicate work hits = strax.sort_by_time(hits) peaks = strax.find_peaks(hits, to_pe, result_dtype=self.dtype) strax.sum_waveform(peaks, r, to_pe) peaks = strax.split_peaks(peaks, r, to_pe) strax.compute_widths(peaks) if self.config['diagnose_sorting']: assert np.diff(r['time']).min() >= 0, "Records not sorted" assert np.diff(hits['time']).min() >= 0, "Hits not sorted" assert np.all(peaks['time'][1:] >= strax.endtime(peaks)[:-1] ), "Peaks not disjoint" return peaks
def test_sum_waveform(records): # Make a single big peak to contain all the records n_ch = 100 rlinks = strax.record_links(records) hits = strax.find_hits(records, np.ones(n_ch)) hits['left_integration'] = hits['left'] hits['right_integration'] = hits['right'] hits = strax.sort_by_time(hits) peaks = strax.find_peaks(hits, np.ones(n_ch), gap_threshold=6, left_extension=2, right_extension=3, min_area=0, min_channels=1, max_duration=10_000_000) strax.sum_waveform(peaks, hits, records, rlinks, np.ones(n_ch)) for p in peaks: # Area measures must be consistent area = p['area'] assert area >= 0 assert p['data'].sum() == area assert p['area_per_channel'].sum() == area sum_wv = np.zeros(p['length'], dtype=np.float32) for r in records: (rs, re), (ps, pe) = strax.overlap_indices(r['time'], r['length'], p['time'], p['length']) sum_wv[ps:pe] += r['data'][rs:re] assert np.all(p['data'][:p['length']] == sum_wv) # Finally check that we also can use a selection of peaks to sum strax.sum_waveform(peaks, hits, records, rlinks, np.ones(n_ch), select_peaks_indices=np.array([0]))
def compute(self, records): r = records hits = strax.find_hits(r) # Remove hits in zero-gain channels # they should not affect the clustering! hits = hits[self.to_pe[hits['channel']] != 0] hits = strax.sort_by_time(hits) peaks = strax.find_peaks( hits, self.to_pe, gap_threshold=self.config['peak_gap_threshold'], left_extension=self.config['peak_left_extension'], right_extension=self.config['peak_right_extension'], min_channels=self.config['peak_min_pmts'], result_dtype=self.dtype) strax.sum_waveform(peaks, r, self.to_pe) peaks = strax.split_peaks( peaks, r, self.to_pe, min_height=self.config['peak_split_min_height'], min_ratio=self.config['peak_split_min_ratio']) strax.compute_widths(peaks) if self.config['diagnose_sorting']: assert np.diff(r['time']).min() >= 0, "Records not sorted" assert np.diff(hits['time']).min() >= 0, "Hits not sorted" assert np.all(peaks['time'][1:] >= strax.endtime(peaks)[:-1] ), "Peaks not disjoint" return peaks
def find_peak_groups( peaks, gap_threshold, left_extension=0, right_extension=0, max_duration=int(1e9), ): """Return boundaries of groups of peaks separated by gap_threshold, extended left and right. :param peaks: Peaks to group :param gap_threshold: Minimum gap between peaks :param left_extension: Extend groups by this many ns left :param right_extension: " " right :param max_duration: max duration time of merged peak in ns :return: time, endtime arrays of group boundaries """ # Mock up a "hits" array so we can just use the existing peakfinder # It doesn't work on raw peaks, since they might have different dts # Maybe there is a cleaner way? fake_hits = np.zeros(len(peaks), dtype=strax.hit_dtype) fake_hits['dt'] = 1 fake_hits['area'] = 1 fake_hits['time'] = peaks['time'] fake_hits['length'] = strax.endtime(peaks) - peaks['time'] # Probably int overflow assert np.all(fake_hits['length'] > 0), "Attempt to create invalid hit" fake_peaks = strax.find_peaks(fake_hits, adc_to_pe=np.ones(1), gap_threshold=gap_threshold, left_extension=left_extension, right_extension=right_extension, min_channels=1, min_area=0, max_duration=max_duration) return fake_peaks['time'], strax.endtime(fake_peaks)
def find_peak_groups(peaks, gap_threshold, left_extension=0, right_extension=0, max_duration=int(1e9)): # Mock up a "hits" array so we can just use the existing peakfinder # It doesn't work on raw peaks, since they might have different dts # TODO: is there no cleaner way? fake_hits = np.zeros(len(peaks), dtype=strax.hit_dtype) fake_hits['dt'] = 1 fake_hits['time'] = peaks['time'] # TODO: could this cause int nonsense? fake_hits['length'] = peaks['endtime'] - peaks['time'] fake_peaks = strax.find_peaks(fake_hits, to_pe=np.zeros(1), gap_threshold=gap_threshold, left_extension=left_extension, right_extension=right_extension, min_hits=1, min_area=0, max_duration=max_duration) # TODO: cleanup input of meaningless fields? # (e.g. sum waveform) return fake_peaks
def software_he_veto(records, to_pe, chunk_end, area_threshold=int(1e5), veto_length=int(3e6), veto_res=int(1e3), pass_veto_fraction=0.01, pass_veto_extend=3, max_veto_value=None): """Veto veto_length (time in ns) after peaks larger than area_threshold (in PE). Further large peaks inside the veto regions are still passed: We sum the waveform inside the veto region (with time resolution veto_res in ns) and pass regions within pass_veto_extend samples of samples with amplitude above pass_veto_fraction times the maximum. :returns: (preserved records, vetoed records, veto intervals). :param records: PMT records :param to_pe: ADC to PE conversion factors for the channels in records. :param chunk_end: Endtime of chunk to set as maximum ceiling for the veto period :param area_threshold: Minimum peak area to trigger the veto. Note we use a much rougher clustering than in later processing. :param veto_length: Time in ns to veto after the peak :param veto_res: Resolution of the sum waveform inside the veto region. Do not make too large without increasing integer type in some strax dtypes... :param pass_veto_fraction: fraction of maximum sum waveform amplitude to trigger veto passing of further peaks :param pass_veto_extend: samples to extend (left and right) the pass veto regions. :param max_veto_value: if not None, pass peaks that exceed this area no matter what. """ veto_res = int(veto_res) if veto_res > np.iinfo(np.int16).max: raise ValueError("Veto resolution does not fit 16-bit int") veto_length = np.ceil(veto_length / veto_res).astype(np.int) * veto_res veto_n = int(veto_length / veto_res) + 1 # 1. Find large peaks in the data. # This will actually return big agglomerations of peaks and their tails peaks = strax.find_peaks(records, to_pe, gap_threshold=1, left_extension=0, right_extension=0, min_channels=100, min_area=area_threshold, result_dtype=strax.peak_dtype( n_channels=len(to_pe), n_sum_wv_samples=veto_n)) # 2a. Set 'candidate regions' at these peaks. These should: # - Have a fixed maximum length (else we can't use the strax hitfinder on them) # - Never extend beyond the current chunk # - Do not overlap veto_start = peaks['time'] veto_end = np.clip(peaks['time'] + veto_length, None, chunk_end) veto_end[:-1] = np.clip(veto_end[:-1], None, veto_start[1:]) # 2b. Convert these into strax record-like objects # Note the waveform is float32 though (it's a summed waveform) regions = np.zeros(len(veto_start), dtype=strax.interval_dtype + [ ("data", (np.float32, veto_n)), ("baseline", np.float32), ("baseline_rms", np.float32), ("reduction_level", np.int64), ("record_i", np.int64), ("pulse_length", np.int64), ]) regions['time'] = veto_start regions['length'] = (veto_end - veto_start) // veto_n regions['pulse_length'] = veto_n regions['dt'] = veto_res if not len(regions): # No veto anywhere in this data return records, records[:0], np.zeros(0, strax.hit_dtype) # 3. Find pass_veto regios with big peaks inside the veto regions. # For this we compute a rough sum waveform (at low resolution, # without looping over the pulse data) rough_sum(regions, records, to_pe, veto_n, veto_res) if max_veto_value is not None: pass_veto = strax.find_hits(regions, min_amplitude=max_veto_value) else: regions['data'] /= np.max(regions['data'], axis=1)[:, np.newaxis] pass_veto = strax.find_hits(regions, min_amplitude=pass_veto_fraction) # 4. Extend these by a few samples and inverse to find veto regions regions['data'] = 1 regions = strax.cut_outside_hits(regions, pass_veto, left_extension=pass_veto_extend, right_extension=pass_veto_extend) regions['data'] = 1 - regions['data'] veto = strax.find_hits(regions, min_amplitude=1) # Do not remove very tiny regions veto = veto[veto['length'] > 2 * pass_veto_extend] # 5. Apply the veto and return results veto_mask = strax.fully_contained_in(records, veto) == -1 return tuple(list(mask_and_not(records, veto_mask)) + [veto])
def compute(self, records, start, end): r = records hits = strax.find_hits(r, min_amplitude=self.hit_thresholds) # Remove hits in zero-gain channels # they should not affect the clustering! hits = hits[self.to_pe[hits['channel']] != 0] hits = strax.sort_by_time(hits) # Use peaklet gap threshold for initial clustering # based on gaps between hits peaklets = strax.find_peaks( hits, self.to_pe, gap_threshold=self.config['peaklet_gap_threshold'], left_extension=self.config['peak_left_extension'], right_extension=self.config['peak_right_extension'], min_channels=self.config['peak_min_pmts'], result_dtype=self.dtype_for('peaklets'), max_duration=self.config['peaklet_max_duration'], ) # Make sure peaklets don't extend out of the chunk boundary # This should be very rare in normal data due to the ADC pretrigger # window. self.clip_peaklet_times(peaklets, start, end) # Get hits outside peaklets, and store them separately. # fully_contained is OK provided gap_threshold > extension, # which is asserted inside strax.find_peaks. is_lone_hit = strax.fully_contained_in(hits, peaklets) == -1 lone_hits = hits[is_lone_hit] strax.integrate_lone_hits( lone_hits, records, peaklets, save_outside_hits=(self.config['peak_left_extension'], self.config['peak_right_extension']), n_channels=len(self.to_pe)) # Compute basic peak properties -- needed before natural breaks hits = hits[~is_lone_hit] # Define regions outside of peaks such that _find_hit_integration_bounds # is not extended beyond a peak. outside_peaks = self.create_outside_peaks_region(peaklets, start, end) strax.find_hit_integration_bounds( hits, outside_peaks, records, save_outside_hits=(self.config['peak_left_extension'], self.config['peak_right_extension']), n_channels=len(self.to_pe), allow_bounds_beyond_records=True, ) # Transform hits to hitlets for naming conventions. A hit refers # to the central part above threshold a hitlet to the entire signal # including the left and right extension. # (We are not going to use the actual hitlet data_type here.) hitlets = hits del hits hitlet_time_shift = (hitlets['left'] - hitlets['left_integration']) * hitlets['dt'] hitlets['time'] = hitlets['time'] - hitlet_time_shift hitlets['length'] = (hitlets['right_integration'] - hitlets['left_integration']) hitlets = strax.sort_by_time(hitlets) rlinks = strax.record_links(records) strax.sum_waveform(peaklets, hitlets, r, rlinks, self.to_pe) strax.compute_widths(peaklets) # Split peaks using low-split natural breaks; # see https://github.com/XENONnT/straxen/pull/45 # and https://github.com/AxFoundation/strax/pull/225 peaklets = strax.split_peaks( peaklets, hitlets, r, rlinks, self.to_pe, algorithm='natural_breaks', threshold=self.natural_breaks_threshold, split_low=True, filter_wing_width=self.config['peak_split_filter_wing_width'], min_area=self.config['peak_split_min_area'], do_iterations=self.config['peak_split_iterations']) # Saturation correction using non-saturated channels # similar method used in pax # see https://github.com/XENON1T/pax/pull/712 # Cases when records is not writeable for unclear reason # only see this when loading 1T test data # more details on https://numpy.org/doc/stable/reference/generated/numpy.ndarray.flags.html if not r['data'].flags.writeable: r = r.copy() if self.config['saturation_correction_on']: peak_list = peak_saturation_correction( r, rlinks, peaklets, hitlets, self.to_pe, reference_length=self.config['saturation_reference_length'], min_reference_length=self. config['saturation_min_reference_length']) # Compute the width again for corrected peaks strax.compute_widths(peaklets, select_peaks_indices=peak_list) # Compute tight coincidence level. # Making this a separate plugin would # (a) doing hitfinding yet again (or storing hits) # (b) increase strax memory usage / max_messages, # possibly due to its currently primitive scheduling. hit_max_times = np.sort( hitlets['time'] + hitlets['dt'] * hit_max_sample(records, hitlets) + hitlet_time_shift # add time shift again to get correct maximum ) peaklet_max_times = ( peaklets['time'] + np.argmax(peaklets['data'], axis=1) * peaklets['dt']) tight_coincidence_channel = get_tight_coin( hit_max_times, hitlets['channel'], peaklet_max_times, self.config['tight_coincidence_window_left'], self.config['tight_coincidence_window_right'], self.channel_range) peaklets['tight_coincidence'] = tight_coincidence_channel if self.config['diagnose_sorting'] and len(r): assert np.diff(r['time']).min(initial=1) >= 0, "Records not sorted" assert np.diff( hitlets['time']).min(initial=1) >= 0, "Hits/Hitlets not sorted" assert np.all(peaklets['time'][1:] >= strax.endtime(peaklets)[:-1] ), "Peaks not disjoint" # Update nhits of peaklets: counts = strax.touching_windows(hitlets, peaklets) counts = np.diff(counts, axis=1).flatten() peaklets['n_hits'] = counts return dict(peaklets=peaklets, lone_hits=lone_hits)
def compute(self, records, start, end): r = records hits = strax.find_hits(r, min_amplitude=straxen.hit_min_amplitude( self.config['hit_min_amplitude'])) # Remove hits in zero-gain channels # they should not affect the clustering! hits = hits[self.to_pe[hits['channel']] != 0] hits = strax.sort_by_time(hits) # Use peaklet gap threshold for initial clustering # based on gaps between hits peaklets = strax.find_peaks( hits, self.to_pe, gap_threshold=self.config['peaklet_gap_threshold'], left_extension=self.config['peak_left_extension'], right_extension=self.config['peak_right_extension'], min_channels=self.config['peak_min_pmts'], result_dtype=self.dtype_for('peaklets')) # Make sure peaklets don't extend out of the chunk boundary # This should be very rare in normal data due to the ADC pretrigger # window. self.clip_peaklet_times(peaklets, start, end) # Get hits outside peaklets, and store them separately. # fully_contained is OK provided gap_threshold > extension, # which is asserted inside strax.find_peaks. lone_hits = hits[strax.fully_contained_in(hits, peaklets) == -1] strax.integrate_lone_hits( lone_hits, records, peaklets, save_outside_hits=(self.config['peak_left_extension'], self.config['peak_right_extension']), n_channels=len(self.to_pe)) # Compute basic peak properties -- needed before natural breaks strax.sum_waveform(peaklets, r, self.to_pe) strax.compute_widths(peaklets) # Split peaks using low-split natural breaks; # see https://github.com/XENONnT/straxen/pull/45 # and https://github.com/AxFoundation/strax/pull/225 peaklets = strax.split_peaks( peaklets, r, self.to_pe, algorithm='natural_breaks', threshold=self.natural_breaks_threshold, split_low=True, filter_wing_width=self.config['peak_split_filter_wing_width'], min_area=self.config['peak_split_min_area'], do_iterations=self.config['peak_split_iterations']) # Saturation correction using non-saturated channels # similar method used in pax # see https://github.com/XENON1T/pax/pull/712 if self.config['saturation_correction_on']: peak_saturation_correction( r, peaklets, self.to_pe, reference_length=self.config['saturation_reference_length'], min_reference_length=self. config['saturation_min_reference_length']) # Compute tight coincidence level. # Making this a separate plugin would # (a) doing hitfinding yet again (or storing hits) # (b) increase strax memory usage / max_messages, # possibly due to its currently primitive scheduling. hit_max_times = np.sort(hits['time'] + hits['dt'] * hit_max_sample(records, hits)) peaklet_max_times = ( peaklets['time'] + np.argmax(peaklets['data'], axis=1) * peaklets['dt']) peaklets['tight_coincidence'] = get_tight_coin( hit_max_times, peaklet_max_times, self.config['tight_coincidence_window_left'], self.config['tight_coincidence_window_right']) if self.config['diagnose_sorting'] and len(r): assert np.diff(r['time']).min(initial=1) >= 0, "Records not sorted" assert np.diff(hits['time']).min(initial=1) >= 0, "Hits not sorted" assert np.all(peaklets['time'][1:] >= strax.endtime(peaklets)[:-1] ), "Peaks not disjoint" # Update nhits of peaklets: counts = strax.touching_windows(hits, peaklets) counts = np.diff(counts, axis=1).flatten() counts += 1 peaklets['n_hits'] = counts return dict(peaklets=peaklets, lone_hits=lone_hits)
def test_peak_overflow( records, gap_factor, right_extension, gap_threshold, max_duration, ): """ Test that we handle dt overflows in peaks correctly. To this end, we just create some sets of records and copy that set of records for a few times. That way we may end up with a very long artificial set of hits that can be used in the peak building. By setting the peak finding parameters to very strange conditions we are able to replicate the behaviour where a peak would become so large that it cannot be written out correctly due to integer overflow of the dt field, :param records: records :param gap_factor: to create very extended sets of records, just add a factor that can be used to multiply the time field with, to more quickly arrive to a very long pulse-train :param max_duration: max_duration option for strax.find_peaks :param right_extension: option for strax.find_peaks :param gap_threshold: option for strax.find_peaks :return: None """ # Set this here, no need to test left and right independently left_extension = 0 # Make a single big peak to contain all the records peak_dtype = np.zeros(0, strax.peak_dtype()).dtype # NB! This is only for before #403, now peaks are int32 so # this test would take forever with int32. magic_overflow_time = np.iinfo(np.int16).max * peak_dtype['data'].shape[0] def retrun_1(x): """ Return 1 for all of the input that can be used as a parameter for the splitting in natural breaks :param x: any type of array :return: ones * len(array) """ ret = np.ones(len(x)) return ret r = records if not len(r) or len(r['channel']) == 1: # Hard to test integer overflow for empty records or with # records only from a single channel return # Copy the pulse train of the records. We are going to copy the same # set of records many times now. t_max = strax.endtime(r).max() print('make buffer') n_repeat = int(1.5 * magic_overflow_time + t_max * gap_factor) // int( t_max * gap_factor) + 1 time_offset = np.linspace(0, 1.5 * magic_overflow_time + t_max * gap_factor, n_repeat, dtype=np.int64) r_buffer = np.tile(r, n_repeat // len(r) + 1)[:len(time_offset)] assert len(r_buffer) == len(time_offset) r_buffer['time'] = r_buffer['time'] + time_offset assert strax.endtime( r_buffer[-1]) - r_buffer['time'].min() > magic_overflow_time r = r_buffer.copy() del r_buffer print(f'Array is {r.nbytes/1e6} MB, good luck') # Do peak finding! print(f'Find hits') hits = strax.find_hits(r, min_amplitude=0) assert len(hits) hits = strax.sort_by_time(hits) # Dummy to_pe to_pe = np.ones(max(r['channel']) + 1) try: print('Find peaks') # Find peaks, we might end up with negative dt here! p = strax.find_peaks( hits, to_pe, gap_threshold=gap_threshold, left_extension=left_extension, right_extension=right_extension, max_duration=max_duration, # Due to these settings, we will start merging # whatever strax can get its hands on min_area=0., min_channels=1, ) except AssertionError as e: if not gap_threshold > left_extension + right_extension: print(f'Great, we are getting the assertion statement for the ' f'incongruent extensions') return elif not left_extension + max_duration + right_extension < magic_overflow_time: # Ending up here is the ultimate goal of the tests. This # means we are hitting github.com/AxFoundation/strax/issues/397 print(f'Great, the test worked, we are getting the assertion ' f'statement for the int overflow') return else: # The error is caused by something else, we need to re-raise raise e print(f'Peaklet array is {p.nbytes / 1e6} MB, good luck') if len(p) == 0: print(f'rec length {len(r)}') assert len(p) assert np.all(p['dt'] > 0) # Double check that this error should have been raised. if not gap_threshold > left_extension + right_extension: raise ValueError(f'No assertion error raised! Working with' f'{gap_threshold} {left_extension + right_extension}') # Compute basics hits = strax.find_hits(r, np.ones(10000)) hits['left_integration'] = hits['left'] hits['right_integration'] = hits['right'] rlinks = strax.record_links(r) strax.sum_waveform(p, hits, r, rlinks, to_pe) strax.compute_widths(p) try: print('Split peaks') peaklets = strax.split_peaks(p, hits, r, rlinks, to_pe, algorithm='natural_breaks', threshold=retrun_1, split_low=True, filter_wing_width=70, min_area=0, do_iterations=2) except AssertionError as e: if not left_extension + max_duration + right_extension < magic_overflow_time: # Ending up here is the ultimate goal of the tests. This # means we are hitting github.com/AxFoundation/strax/issues/397 print(f'Great, the test worked, we are getting the assertion ' f'statement for the int overflow') raise RuntimeError( 'We were not properly warned of the imminent peril we are ' 'facing. This error means that the peak_finding is not ' 'protected against integer overflow in the dt field. Where is ' 'our white knight in shining armour to protected from this ' 'imminent doom:\n' 'github.com/AxFoundation/strax/issues/397') from e # We failed for another reason, we need to re-raise raise e assert len(peaklets) assert len(peaklets) <= len(r) # Integer overflow will manifest itself here again: assert np.all(peaklets['dt'] > 0)
def show_time_range(st, run_id, t0, dt=10): from functools import partial import numpy as np import pandas as pd import holoviews as hv from holoviews.operation.datashader import datashade, dynspread hv.extension('bokeh') import strax import gc # Somebody thought it was a good idea to call gc.collect explicitly somewhere in holoviews # This makes dynamic PMT maps super slow # Until I trace the offender: gc.collect = lambda *args, **kwargs: None # Custom wheel zoom tool that only zooms in time from bokeh.models import WheelZoomTool time_zoom = WheelZoomTool(dimensions='width') # Get ADC->pe multiplicative conversion factor from pax.configuration import load_configuration from pax.dsputils import adc_to_pe pax_config = load_configuration('XENON1T')["DEFAULT"] to_pe = np.array( [adc_to_pe(pax_config, ch) for ch in range(pax_config['n_channels'])]) tpc_r = pax_config['tpc_radius'] # Get locations of PMTs r = [] for q in pax_config['pmts']: r.append( dict(x=q['position']['x'], y=q['position']['y'], i=q['pmt_position'], array=q.get('array', 'other'))) f = 1.08 pmt_locs = pd.DataFrame(r) records = st.get_array(run_id, 'raw_records', time_range=(t0, t0 + int(1e10))) # TOOD: don't reprocess, just load... hits = strax.find_hits(records) peaks = strax.find_peaks(hits, to_pe, gap_threshold=300, min_hits=3, result_dtype=strax.peak_dtype(n_channels=260)) strax.sum_waveform(peaks, records, to_pe) # Integral in pe areas = records['data'].sum(axis=1) * to_pe[records['channel']] def normalize_time(t): return (t - records[0]['time']) / 1e9 # Create dataframe with record metadata df = pd.DataFrame( dict(area=areas, time=normalize_time(records['time']), channel=records['channel'])) # Convert to holoviews Points points = hv.Points( df, kdims=[ hv.Dimension('time', label='Time', unit='sec'), hv.Dimension('channel', label='PMT number', range=(0, 260)) ], vdims=[ hv.Dimension( 'area', label='Area', unit='pe', # range=(0, 1000) ) ]) def pmt_map(t_0, t_1, array='top', **kwargs): # Compute the PMT pattern (fast) ps = points[(t_0 <= points['time']) & (points['time'] < t_1)] areas = np.bincount(ps['channel'], weights=ps['area'], minlength=len(pmt_locs)) # Which PMTs should we include? pmt_mask = pmt_locs['array'] == array d = pmt_locs[pmt_mask].copy() d['area'] = areas[pmt_mask] # Convert to holoviews points d = hv.Dataset(d, kdims=[ hv.Dimension('x', unit='cm', range=(-tpc_r * f, tpc_r * f)), hv.Dimension('y', unit='cm', range=(-tpc_r * f, tpc_r * f)), hv.Dimension('i', label='PMT number'), hv.Dimension('area', label='Area', unit='PE') ]) return d.to(hv.Points, vdims=['area', 'i'], group='PMTPattern', label=array.capitalize(), **kwargs).opts(plot=dict(color_index=2, tools=['hover'], show_grid=False), style=dict(size=17, cmap='magma')) def pmt_map_range(x_range, array='top', **kwargs): # For use in dynamicmap with streams if x_range is None: x_range = (0, 0) return pmt_map(x_range[0], x_range[1], array=array, **kwargs) xrange_stream = hv.streams.RangeX(source=points) # TODO: weigh by area def channel_map(): return dynspread( datashade( points, y_range=(0, 260), streams=[xrange_stream])).opts(plot=dict( width=600, tools=[time_zoom, 'xpan'], default_tools=['save', 'pan', 'box_zoom', 'save', 'reset'], show_grid=False)) def plot_peak(p): # It's better to plot amplitude /time than per bin, since # sampling times are now variable y = p['data'][:p['length']] / p['dt'] t_edges = np.arange(p['length'] + 1, dtype=np.int64) t_edges = t_edges * p['dt'] + p['time'] t_edges = normalize_time(t_edges) # Correct step plotting from Knut t_ = np.zeros(2 * len(y)) y_ = np.zeros(2 * len(y)) t_[0::2] = t_edges[0:-1] t_[1::2] = t_edges[1::] y_[0::2] = y y_[1::2] = y c = hv.Curve(dict(time=t_, amplitude=y_), kdims=points.kdims[0], vdims=hv.Dimension('amplitude', label='Amplitude', unit='PE/ns'), group='PeakSumWaveform') return c.opts( plot=dict( # interpolation='steps-mid', # default_tools=['save', 'pan', 'box_zoom', 'save', 'reset'], # tools=[time_zoom, 'xpan'], width=600, shared_axes=False, show_grid=True), style=dict(color='b') # norm=dict(framewise=True) ) def peaks_in(t_0, t_1): return peaks[(normalize_time(peaks['time'] + peaks['length'] * peaks['dt']) > t_0) & (normalize_time(peaks['time']) < t_1)] def plot_peaks(t_0, t_1, n_max=10): # Find peaks in this range ps = peaks_in(t_0, t_1) # Show only the largest n_max peaks if len(ps) > n_max: areas = ps['area'] max_area = np.sort(areas)[-n_max] ps = ps[areas >= max_area] return hv.Overlay(items=[plot_peak(p) for p in ps]) def plot_peak_range(x_range, **kwargs): # For use in dynamicmap with streams if x_range is None: x_range = (0, 10) return plot_peaks(x_range[0], x_range[1], **kwargs) top_map = hv.DynamicMap(partial(pmt_map_range, array='top'), streams=[xrange_stream]) bot_map = hv.DynamicMap(partial(pmt_map_range, array='bottom'), streams=[xrange_stream]) waveform = hv.DynamicMap(plot_peak_range, streams=[xrange_stream]) layout = waveform + top_map + channel_map() + bot_map return layout.cols(2)
def software_he_veto(records, to_pe, area_threshold=int(1e5), veto_length=int(3e6), veto_res=int(1e3), pass_veto_fraction=0.01, pass_veto_extend=3): """Veto veto_length (time in ns) after peaks larger than area_threshold (in PE). Further large peaks inside the veto regions are still passed: We sum the waveform inside the veto region (with time resolution veto_res in ns) and pass regions within pass_veto_extend samples of samples with amplitude above pass_veto_fraction times the maximum. :returns: (preserved records, vetoed records, veto intervals). :param records: PMT records :param to_pe: ADC to PE conversion factors for the channels in records. :param area_threshold: Minimum peak area to trigger the veto. Note we use a much rougher clustering than in later processing. :param veto_length: Time in ns to veto after the peak :param veto_res: Resolution of the sum waveform inside the veto region. Do not make too large without increasing integer type in some strax dtypes... :param pass_veto_fraction: fraction of maximum sum waveform amplitude to trigger veto passing of further peaks :param pass_veto_extend: samples to extend (left and right) the pass veto regions. """ veto_res = int(veto_res) if veto_res > np.iinfo(np.int16).max: raise ValueError("Veto resolution does not fit 16-bit int") veto_length = np.ceil(veto_length / veto_res).astype(np.int) * veto_res veto_n = int(veto_length / veto_res) + 1 # 1. Find large peaks in the data. # This will actually return big agglomerations of peaks and their tails peaks = strax.find_peaks( records, to_pe, gap_threshold=1, left_extension=0, right_extension=0, min_channels=100, min_area=area_threshold, result_dtype=strax.peak_dtype(n_channels=len(to_pe), n_sum_wv_samples=veto_n)) # 2. Find initial veto regions around these peaks # (with a generous right extension) veto_start, veto_end = strax.find_peak_groups( peaks, gap_threshold=veto_length + 2 * veto_res, right_extension=veto_length, left_extension=veto_res) veto_end = veto_end.clip(0, strax.endtime(records[-1])) veto_length = veto_end - veto_start # dtype is like record (since we want to use hitfiding etc) # but with float32 waveform regions = np.zeros( len(veto_start), dtype=strax.interval_dtype + [ ("data", (np.float32, veto_n)), ("baseline", np.float32), ("reduction_level", np.int64), ("record_i", np.int64), ("pulse_length", np.int64), ]) regions['time'] = veto_start regions['length'] = veto_length regions['pulse_length'] = veto_length regions['dt'] = veto_res if not len(regions): # No veto anywhere in this data return records, records[:0], np.zeros(0, strax.hit_dtype) # 3. Find pass_veto regios with big peaks inside the veto regions. # For this we compute a rough sum waveform (at low resolution, # without looping over the pulse data) rough_sum(regions, records, to_pe, veto_n, veto_res) regions['data'] /= np.max(regions['data'], axis=1)[:, np.newaxis] pass_veto = strax.find_hits(regions, threshold=pass_veto_fraction) # 4. Extend these by a few samples and inverse to find veto regions regions['data'] = 1 regions = strax.cut_outside_hits( regions, pass_veto, left_extension=pass_veto_extend, right_extension=pass_veto_extend) regions['data'] = 1 - regions['data'] veto = strax.find_hits(regions, threshold=0.5) # Do not remove very tiny regions veto = veto[veto['length'] > 2 * pass_veto_extend] # 5. Apply the veto and return results veto_mask = strax.fully_contained_in(records, veto) == -1 return tuple(list(_mask_and_not(records, veto_mask)) + [veto])