Exemplo n.º 1
0
def test_sum_waveform(records, peak_left, peak_length):
    # Make a single big peak to contain all the records
    n_ch = 100
    peaks = np.zeros(1, strax.peak_dtype(n_ch, n_sum_wv_samples=200))
    p = peaks[0]
    p['time'] = peak_left
    p['length'] = peak_length
    p['dt'] = 0

    strax.sum_waveform(peaks, records, np.ones(n_ch))

    # Area measures must be consistent
    area = p['area']
    assert area >= 0
    assert p['data'].sum() == area
    assert p['area_per_channel'].sum() == area

    # Create a simple sum waveform
    if not len(records):
        max_sample = 3   # Whatever
    else:
        max_sample = (records['time'] + records['length']).max()
    max_sample = max(max_sample, peak_left + peak_length)
    sum_wv = np.zeros(max_sample + 1, dtype=np.float32)
    for r in records:
        sum_wv[r['time']:r['time'] + r['length']] += r['data'][:r['length']]
    # Select the part inside the peak
    sum_wv = sum_wv[peak_left:peak_left + peak_length]

    assert len(sum_wv) == peak_length
    assert np.all(p['data'][:peak_length] == sum_wv)
Exemplo n.º 2
0
    def compute(self, records):
        r = records
        hits = strax.find_hits(r, threshold=self.config['hit_threshold'])
        hits = strax.sort_by_time(hits)

        peaks = strax.find_peaks(
            hits,
            self.config['to_pe'],
            result_dtype=self.dtype,
            gap_threshold=self.config['peak_gap_threshold'],
            left_extension=self.config['peak_left_extension'],
            right_extension=self.config['peak_right_extension'],
            min_channels=self.config['peak_min_chan'],
            min_area=self.config['peak_min_area'],
            max_duration=self.config['peak_max_duration'])
        strax.sum_waveform(peaks, r, adc_to_pe=self.config['to_pe'])
        peaks = peaks[peaks['dt'] > 0]  # removes strange edge case
        peaks = strax.split_peaks(peaks,
                                  r,
                                  self.config['to_pe'],
                                  min_height=self.config['split_min_height'],
                                  min_ratio=self.config['split_min_ratio'])

        strax.compute_widths(peaks)

        return peaks
Exemplo n.º 3
0
    def __call__(self, peaks, records, to_pe, data_type,
                 next_ri=None, do_iterations=1, min_area=0, **kwargs):
        if not len(records) or not len(peaks) or not do_iterations:
            return peaks

        # Build the *args tuple for self.find_split_points from kwargs
        # since numba doesn't support **kwargs
        args_options = []
        for i, (k, value) in enumerate(self.find_split_args_defaults):
            if k in kwargs:
                value = kwargs[k]
            if k == 'threshold':
                # The 'threshold' option is a user-specified function
                value = value(peaks)
            args_options.append(value)
        args_options = tuple(args_options)

        # Check for spurious options
        argnames = [k for k, _ in self.find_split_args_defaults]
        for k in kwargs:
            if k not in argnames:
                raise TypeError(f"Unknown argument {k} for {self.__class__}")

        is_split = np.zeros(len(peaks), dtype=np.bool_)

        split_function = {'peaks': self._split_peaks,
                          'hitlets': self._split_hitlets}
        if data_type not in split_function:
            raise ValueError(f'Data_type "{data_type}" is not supported.')

        new_peaks = split_function[data_type](
            # Numba doesn't like self as argument, but it's ok with functions...
            split_finder=self.find_split_points,
            peaks=peaks,
            is_split=is_split,
            orig_dt=records[0]['dt'],
            min_area=min_area,
            args_options=tuple(args_options),
            result_dtype=peaks.dtype)

        if is_split.sum() != 0:
            # Found new peaks: compute basic properties
            if data_type == 'peaks':
                strax.sum_waveform(new_peaks, records, to_pe)
            elif data_type == 'hitlets':
                # Add record fields here
                strax.update_new_hitlets(new_peaks, records, next_ri, to_pe)

            strax.compute_widths(new_peaks)

            # ... and recurse (if needed)
            new_peaks = self(new_peaks, records, to_pe, data_type, next_ri,
                             do_iterations=do_iterations - 1,
                             min_area=min_area, **kwargs)
            peaks = strax.sort_by_time(np.concatenate([peaks[~is_split],
                                                       new_peaks]))

        return peaks
Exemplo n.º 4
0
def split_peaks(peaks, records, to_pe, min_height=25, min_ratio=4):
    """Return peaks after splitting at prominent sum waveform minima
    'Prominent' means: on either side of a split point, local maxima are:
      - larger than minimum + min_height
      - larger than minimum * min_ratio
    (this is related to topographical prominence for mountains)

    Min_height is in pe/ns (NOT pe/bin!)
    """
    is_split = np.zeros(len(peaks), dtype=np.bool_)

    new_peaks = _split_peaks(peaks,
                             min_height=min_height,
                             min_ratio=min_ratio,
                             orig_dt=records[0]['dt'],
                             is_split=is_split,
                             result_dtype=peaks.dtype)
    strax.sum_waveform(new_peaks, records, to_pe)
    return strax.sort_by_time(np.concatenate([peaks[~is_split], new_peaks]))
Exemplo n.º 5
0
    def compute(self, records):
        r = records
        hits = strax.find_hits(r)  # TODO: Duplicate work
        hits = strax.sort_by_time(hits)

        peaks = strax.find_peaks(hits, to_pe, result_dtype=self.dtype)
        strax.sum_waveform(peaks, r, to_pe)

        peaks = strax.split_peaks(peaks, r, to_pe)

        strax.compute_widths(peaks)

        if self.config['diagnose_sorting']:
            assert np.diff(r['time']).min() >= 0, "Records not sorted"
            assert np.diff(hits['time']).min() >= 0, "Hits not sorted"
            assert np.all(peaks['time'][1:] >= strax.endtime(peaks)[:-1]
                          ), "Peaks not disjoint"

        return peaks
Exemplo n.º 6
0
def test_sum_waveform(records):
    # Make a single big peak to contain all the records
    n_ch = 100

    rlinks = strax.record_links(records)
    hits = strax.find_hits(records, np.ones(n_ch))
    hits['left_integration'] = hits['left']
    hits['right_integration'] = hits['right']
    hits = strax.sort_by_time(hits)

    peaks = strax.find_peaks(hits,
                             np.ones(n_ch),
                             gap_threshold=6,
                             left_extension=2,
                             right_extension=3,
                             min_area=0,
                             min_channels=1,
                             max_duration=10_000_000)
    strax.sum_waveform(peaks, hits, records, rlinks, np.ones(n_ch))

    for p in peaks:
        # Area measures must be consistent
        area = p['area']
        assert area >= 0
        assert p['data'].sum() == area
        assert p['area_per_channel'].sum() == area

        sum_wv = np.zeros(p['length'], dtype=np.float32)
        for r in records:
            (rs, re), (ps, pe) = strax.overlap_indices(r['time'], r['length'],
                                                       p['time'], p['length'])
            sum_wv[ps:pe] += r['data'][rs:re]

        assert np.all(p['data'][:p['length']] == sum_wv)

        # Finally check that we also can use a selection of peaks to sum
        strax.sum_waveform(peaks,
                           hits,
                           records,
                           rlinks,
                           np.ones(n_ch),
                           select_peaks_indices=np.array([0]))
Exemplo n.º 7
0
    def compute(self, records):
        r = records

        hits = strax.find_hits(r)

        # Remove hits in zero-gain channels
        # they should not affect the clustering!
        hits = hits[self.to_pe[hits['channel']] != 0]

        hits = strax.sort_by_time(hits)

        peaks = strax.find_peaks(
            hits,
            self.to_pe,
            gap_threshold=self.config['peak_gap_threshold'],
            left_extension=self.config['peak_left_extension'],
            right_extension=self.config['peak_right_extension'],
            min_channels=self.config['peak_min_pmts'],
            result_dtype=self.dtype)
        strax.sum_waveform(peaks, r, self.to_pe)

        peaks = strax.split_peaks(
            peaks,
            r,
            self.to_pe,
            min_height=self.config['peak_split_min_height'],
            min_ratio=self.config['peak_split_min_ratio'])

        strax.compute_widths(peaks)

        if self.config['diagnose_sorting']:
            assert np.diff(r['time']).min() >= 0, "Records not sorted"
            assert np.diff(hits['time']).min() >= 0, "Hits not sorted"
            assert np.all(peaks['time'][1:] >= strax.endtime(peaks)[:-1]
                          ), "Peaks not disjoint"

        return peaks
Exemplo n.º 8
0
def peak_saturation_correction(
    records,
    rlinks,
    peaks,
    hitlets,
    to_pe,
    reference_length=100,
    min_reference_length=20,
    use_classification=False,
):
    """Correct the area and per pmt area of peaks from saturation
    :param records: Records
    :param rlinks: strax.record_links of corresponding records.
    :param peaks: Peaklets / Peaks
    :param hitlets: Hitlets found in records to build peaks.
        (Hitlets are hits including the left/right extension)
    :param to_pe: adc to PE conversion (length should equal number of PMTs)
    :param reference_length: Maximum number of reference sample used
    to correct saturated samples
    :param min_reference_length: Minimum number of reference sample used
    to correct saturated samples
    :param use_classification: Option of using classification to pick only S2
    """

    if not len(records):
        return
    if not len(peaks):
        return

    # Search for peaks with saturated channels
    mask = peaks['n_saturated_channels'] > 0
    if use_classification:
        mask &= peaks['type'] == 2
    peak_list = np.where(mask)[0]
    # Look up records that touch each peak
    record_ranges = _touching_windows(records['time'], strax.endtime(records),
                                      peaks[peak_list]['time'],
                                      strax.endtime(peaks[peak_list]))

    # Create temporary arrays for calculation
    dt = records[0]['dt']
    n_channels = len(peaks[0]['saturated_channel'])
    len_buffer = np.max(peaks['length'] * peaks['dt']) // dt + 1
    max_nrecord = len_buffer // len(records[0]['data']) + 1

    # Buff the sum wf [pe] of non-saturated channels
    b_sumwf = np.zeros(len_buffer, dtype=np.float32)
    # Buff the records 'data' [ADC] in saturated channels
    b_pulse = np.zeros((n_channels, len_buffer), dtype=np.int16)
    # Buff the corresponding record index of saturated channels
    b_index = np.zeros((n_channels, max_nrecord), dtype=np.int64)

    # Main
    for ix, peak_i in enumerate(peak_list):
        # reset buffers
        b_sumwf[:] = 0
        b_pulse[:] = 0
        b_index[:] = -1

        p = peaks[peak_i]
        channel_saturated = p['saturated_channel'] > 0

        for record_i in range(record_ranges[ix][0], record_ranges[ix][1]):
            r = records[record_i]
            r_slice, b_slice = strax.overlap_indices(
                r['time'] // dt, r['length'], p['time'] // dt,
                p['length'] * p['dt'] // dt)

            ch = r['channel']
            if channel_saturated[ch]:
                b_pulse[ch, slice(*b_slice)] += r['data'][slice(*r_slice)]
                b_index[ch, np.argmin(b_index[ch])] = record_i
            else:
                b_sumwf[slice(*b_slice)] += r['data'][slice(*r_slice)] \
                    * to_pe[ch]

        _peak_saturation_correction_inner(channel_saturated, records, p, to_pe,
                                          b_sumwf, b_pulse, b_index,
                                          reference_length,
                                          min_reference_length)

        # Back track sum wf downsampling
        peaks[peak_i]['length'] = p['length'] * p['dt'] / dt
        peaks[peak_i]['dt'] = dt

    strax.sum_waveform(peaks, hitlets, records, rlinks, to_pe, peak_list)
    return peak_list
Exemplo n.º 9
0
    def compute(self, records, start, end):
        r = records

        hits = strax.find_hits(r, min_amplitude=self.hit_thresholds)

        # Remove hits in zero-gain channels
        # they should not affect the clustering!
        hits = hits[self.to_pe[hits['channel']] != 0]

        hits = strax.sort_by_time(hits)

        # Use peaklet gap threshold for initial clustering
        # based on gaps between hits
        peaklets = strax.find_peaks(
            hits,
            self.to_pe,
            gap_threshold=self.config['peaklet_gap_threshold'],
            left_extension=self.config['peak_left_extension'],
            right_extension=self.config['peak_right_extension'],
            min_channels=self.config['peak_min_pmts'],
            result_dtype=self.dtype_for('peaklets'),
            max_duration=self.config['peaklet_max_duration'],
        )

        # Make sure peaklets don't extend out of the chunk boundary
        # This should be very rare in normal data due to the ADC pretrigger
        # window.
        self.clip_peaklet_times(peaklets, start, end)

        # Get hits outside peaklets, and store them separately.
        # fully_contained is OK provided gap_threshold > extension,
        # which is asserted inside strax.find_peaks.
        is_lone_hit = strax.fully_contained_in(hits, peaklets) == -1
        lone_hits = hits[is_lone_hit]
        strax.integrate_lone_hits(
            lone_hits,
            records,
            peaklets,
            save_outside_hits=(self.config['peak_left_extension'],
                               self.config['peak_right_extension']),
            n_channels=len(self.to_pe))

        # Compute basic peak properties -- needed before natural breaks
        hits = hits[~is_lone_hit]
        # Define regions outside of peaks such that _find_hit_integration_bounds
        # is not extended beyond a peak.
        outside_peaks = self.create_outside_peaks_region(peaklets, start, end)
        strax.find_hit_integration_bounds(
            hits,
            outside_peaks,
            records,
            save_outside_hits=(self.config['peak_left_extension'],
                               self.config['peak_right_extension']),
            n_channels=len(self.to_pe),
            allow_bounds_beyond_records=True,
        )

        # Transform hits to hitlets for naming conventions. A hit refers
        # to the central part above threshold a hitlet to the entire signal
        # including the left and right extension.
        # (We are not going to use the actual hitlet data_type here.)
        hitlets = hits
        del hits

        hitlet_time_shift = (hitlets['left'] -
                             hitlets['left_integration']) * hitlets['dt']
        hitlets['time'] = hitlets['time'] - hitlet_time_shift
        hitlets['length'] = (hitlets['right_integration'] -
                             hitlets['left_integration'])
        hitlets = strax.sort_by_time(hitlets)
        rlinks = strax.record_links(records)

        strax.sum_waveform(peaklets, hitlets, r, rlinks, self.to_pe)

        strax.compute_widths(peaklets)

        # Split peaks using low-split natural breaks;
        # see https://github.com/XENONnT/straxen/pull/45
        # and https://github.com/AxFoundation/strax/pull/225
        peaklets = strax.split_peaks(
            peaklets,
            hitlets,
            r,
            rlinks,
            self.to_pe,
            algorithm='natural_breaks',
            threshold=self.natural_breaks_threshold,
            split_low=True,
            filter_wing_width=self.config['peak_split_filter_wing_width'],
            min_area=self.config['peak_split_min_area'],
            do_iterations=self.config['peak_split_iterations'])

        # Saturation correction using non-saturated channels
        # similar method used in pax
        # see https://github.com/XENON1T/pax/pull/712
        # Cases when records is not writeable for unclear reason
        # only see this when loading 1T test data
        # more details on https://numpy.org/doc/stable/reference/generated/numpy.ndarray.flags.html
        if not r['data'].flags.writeable:
            r = r.copy()

        if self.config['saturation_correction_on']:
            peak_list = peak_saturation_correction(
                r,
                rlinks,
                peaklets,
                hitlets,
                self.to_pe,
                reference_length=self.config['saturation_reference_length'],
                min_reference_length=self.
                config['saturation_min_reference_length'])

            # Compute the width again for corrected peaks
            strax.compute_widths(peaklets, select_peaks_indices=peak_list)

        # Compute tight coincidence level.
        # Making this a separate plugin would
        # (a) doing hitfinding yet again (or storing hits)
        # (b) increase strax memory usage / max_messages,
        #     possibly due to its currently primitive scheduling.
        hit_max_times = np.sort(
            hitlets['time'] +
            hitlets['dt'] * hit_max_sample(records, hitlets) +
            hitlet_time_shift  # add time shift again to get correct maximum
        )
        peaklet_max_times = (
            peaklets['time'] +
            np.argmax(peaklets['data'], axis=1) * peaklets['dt'])
        tight_coincidence_channel = get_tight_coin(
            hit_max_times, hitlets['channel'], peaklet_max_times,
            self.config['tight_coincidence_window_left'],
            self.config['tight_coincidence_window_right'], self.channel_range)

        peaklets['tight_coincidence'] = tight_coincidence_channel

        if self.config['diagnose_sorting'] and len(r):
            assert np.diff(r['time']).min(initial=1) >= 0, "Records not sorted"
            assert np.diff(
                hitlets['time']).min(initial=1) >= 0, "Hits/Hitlets not sorted"
            assert np.all(peaklets['time'][1:] >= strax.endtime(peaklets)[:-1]
                          ), "Peaks not disjoint"

        # Update nhits of peaklets:
        counts = strax.touching_windows(hitlets, peaklets)
        counts = np.diff(counts, axis=1).flatten()
        peaklets['n_hits'] = counts

        return dict(peaklets=peaklets, lone_hits=lone_hits)
Exemplo n.º 10
0
    def compute(self, records, start, end):
        r = records

        hits = strax.find_hits(r,
                               min_amplitude=straxen.hit_min_amplitude(
                                   self.config['hit_min_amplitude']))

        # Remove hits in zero-gain channels
        # they should not affect the clustering!
        hits = hits[self.to_pe[hits['channel']] != 0]

        hits = strax.sort_by_time(hits)

        # Use peaklet gap threshold for initial clustering
        # based on gaps between hits
        peaklets = strax.find_peaks(
            hits,
            self.to_pe,
            gap_threshold=self.config['peaklet_gap_threshold'],
            left_extension=self.config['peak_left_extension'],
            right_extension=self.config['peak_right_extension'],
            min_channels=self.config['peak_min_pmts'],
            result_dtype=self.dtype_for('peaklets'))

        # Make sure peaklets don't extend out of the chunk boundary
        # This should be very rare in normal data due to the ADC pretrigger
        # window.
        self.clip_peaklet_times(peaklets, start, end)

        # Get hits outside peaklets, and store them separately.
        # fully_contained is OK provided gap_threshold > extension,
        # which is asserted inside strax.find_peaks.
        lone_hits = hits[strax.fully_contained_in(hits, peaklets) == -1]
        strax.integrate_lone_hits(
            lone_hits,
            records,
            peaklets,
            save_outside_hits=(self.config['peak_left_extension'],
                               self.config['peak_right_extension']),
            n_channels=len(self.to_pe))

        # Compute basic peak properties -- needed before natural breaks
        strax.sum_waveform(peaklets, r, self.to_pe)
        strax.compute_widths(peaklets)

        # Split peaks using low-split natural breaks;
        # see https://github.com/XENONnT/straxen/pull/45
        # and https://github.com/AxFoundation/strax/pull/225
        peaklets = strax.split_peaks(
            peaklets,
            r,
            self.to_pe,
            algorithm='natural_breaks',
            threshold=self.natural_breaks_threshold,
            split_low=True,
            filter_wing_width=self.config['peak_split_filter_wing_width'],
            min_area=self.config['peak_split_min_area'],
            do_iterations=self.config['peak_split_iterations'])

        # Saturation correction using non-saturated channels
        # similar method used in pax
        # see https://github.com/XENON1T/pax/pull/712
        if self.config['saturation_correction_on']:
            peak_saturation_correction(
                r,
                peaklets,
                self.to_pe,
                reference_length=self.config['saturation_reference_length'],
                min_reference_length=self.
                config['saturation_min_reference_length'])

        # Compute tight coincidence level.
        # Making this a separate plugin would
        # (a) doing hitfinding yet again (or storing hits)
        # (b) increase strax memory usage / max_messages,
        #     possibly due to its currently primitive scheduling.
        hit_max_times = np.sort(hits['time'] +
                                hits['dt'] * hit_max_sample(records, hits))
        peaklet_max_times = (
            peaklets['time'] +
            np.argmax(peaklets['data'], axis=1) * peaklets['dt'])
        peaklets['tight_coincidence'] = get_tight_coin(
            hit_max_times, peaklet_max_times,
            self.config['tight_coincidence_window_left'],
            self.config['tight_coincidence_window_right'])

        if self.config['diagnose_sorting'] and len(r):
            assert np.diff(r['time']).min(initial=1) >= 0, "Records not sorted"
            assert np.diff(hits['time']).min(initial=1) >= 0, "Hits not sorted"
            assert np.all(peaklets['time'][1:] >= strax.endtime(peaklets)[:-1]
                          ), "Peaks not disjoint"

        # Update nhits of peaklets:
        counts = strax.touching_windows(hits, peaklets)
        counts = np.diff(counts, axis=1).flatten()
        counts += 1
        peaklets['n_hits'] = counts

        return dict(peaklets=peaklets, lone_hits=lone_hits)
Exemplo n.º 11
0
def test_peak_overflow(
    records,
    gap_factor,
    right_extension,
    gap_threshold,
    max_duration,
):
    """
    Test that we handle dt overflows in peaks correctly. To this end, we
        just create some sets of records and copy that set of records
        for a few times. That way we may end up with a very long
        artificial set of hits that can be used in the peak building. By
        setting the peak finding parameters to very strange conditions
        we are able to replicate the behaviour where a peak would become
        so large that it cannot be written out correctly due to integer
        overflow of the dt field,
    :param records: records
    :param gap_factor: to create very extended sets of records, just
        add a factor that can be used to multiply the time field with,
        to more quickly arrive to a very long pulse-train
    :param max_duration: max_duration option for strax.find_peaks
    :param right_extension: option for strax.find_peaks
    :param gap_threshold: option for strax.find_peaks
    :return: None
    """

    # Set this here, no need to test left and right independently
    left_extension = 0
    # Make a single big peak to contain all the records
    peak_dtype = np.zeros(0, strax.peak_dtype()).dtype
    # NB! This is only for before #403, now peaks are int32 so
    # this test would take forever with int32.
    magic_overflow_time = np.iinfo(np.int16).max * peak_dtype['data'].shape[0]

    def retrun_1(x):
        """
        Return 1 for all of the input that can be used as a parameter
            for the splitting in natural breaks
        :param x: any type of array
        :return: ones * len(array)
        """
        ret = np.ones(len(x))
        return ret

    r = records
    if not len(r) or len(r['channel']) == 1:
        # Hard to test integer overflow for empty records or with
        # records only from a single channel
        return

    # Copy the pulse train of the records. We are going to copy the same
    # set of records many times now.
    t_max = strax.endtime(r).max()
    print('make buffer')
    n_repeat = int(1.5 * magic_overflow_time + t_max * gap_factor) // int(
        t_max * gap_factor) + 1
    time_offset = np.linspace(0,
                              1.5 * magic_overflow_time + t_max * gap_factor,
                              n_repeat,
                              dtype=np.int64)
    r_buffer = np.tile(r, n_repeat // len(r) + 1)[:len(time_offset)]
    assert len(r_buffer) == len(time_offset)
    r_buffer['time'] = r_buffer['time'] + time_offset
    assert strax.endtime(
        r_buffer[-1]) - r_buffer['time'].min() > magic_overflow_time
    r = r_buffer.copy()
    del r_buffer
    print(f'Array is {r.nbytes/1e6} MB, good luck')

    # Do peak finding!
    print(f'Find hits')
    hits = strax.find_hits(r, min_amplitude=0)
    assert len(hits)
    hits = strax.sort_by_time(hits)

    # Dummy to_pe
    to_pe = np.ones(max(r['channel']) + 1)

    try:
        print('Find peaks')
        # Find peaks, we might end up with negative dt here!
        p = strax.find_peaks(
            hits,
            to_pe,
            gap_threshold=gap_threshold,
            left_extension=left_extension,
            right_extension=right_extension,
            max_duration=max_duration,
            # Due to these settings, we will start merging
            # whatever strax can get its hands on
            min_area=0.,
            min_channels=1,
        )
    except AssertionError as e:
        if not gap_threshold > left_extension + right_extension:
            print(f'Great, we are getting the assertion statement for the '
                  f'incongruent extensions')
            return
        elif not left_extension + max_duration + right_extension < magic_overflow_time:
            # Ending up here is the ultimate goal of the tests. This
            # means we are hitting github.com/AxFoundation/strax/issues/397
            print(f'Great, the test worked, we are getting the assertion '
                  f'statement for the int overflow')
            return
        else:
            # The error is caused by something else, we need to re-raise
            raise e

    print(f'Peaklet array is {p.nbytes / 1e6} MB, good luck')
    if len(p) == 0:
        print(f'rec length {len(r)}')
    assert len(p)
    assert np.all(p['dt'] > 0)

    # Double check that this error should have been raised.
    if not gap_threshold > left_extension + right_extension:
        raise ValueError(f'No assertion error raised! Working with'
                         f'{gap_threshold} {left_extension + right_extension}')

    # Compute basics
    hits = strax.find_hits(r, np.ones(10000))
    hits['left_integration'] = hits['left']
    hits['right_integration'] = hits['right']
    rlinks = strax.record_links(r)
    strax.sum_waveform(p, hits, r, rlinks, to_pe)
    strax.compute_widths(p)

    try:
        print('Split peaks')
        peaklets = strax.split_peaks(p,
                                     hits,
                                     r,
                                     rlinks,
                                     to_pe,
                                     algorithm='natural_breaks',
                                     threshold=retrun_1,
                                     split_low=True,
                                     filter_wing_width=70,
                                     min_area=0,
                                     do_iterations=2)
    except AssertionError as e:
        if not left_extension + max_duration + right_extension < magic_overflow_time:
            # Ending up here is the ultimate goal of the tests. This
            # means we are hitting github.com/AxFoundation/strax/issues/397
            print(f'Great, the test worked, we are getting the assertion '
                  f'statement for the int overflow')
            raise RuntimeError(
                'We were not properly warned of the imminent peril we are '
                'facing. This error means that the peak_finding is not '
                'protected against integer overflow in the dt field. Where is '
                'our white knight in shining armour to protected from this '
                'imminent doom:\n'
                'github.com/AxFoundation/strax/issues/397') from e
        # We failed for another reason, we need to re-raise
        raise e

    assert len(peaklets)
    assert len(peaklets) <= len(r)
    # Integer overflow will manifest itself here again:
    assert np.all(peaklets['dt'] > 0)
Exemplo n.º 12
0
def show_time_range(st, run_id, t0, dt=10):
    from functools import partial

    import numpy as np
    import pandas as pd

    import holoviews as hv
    from holoviews.operation.datashader import datashade, dynspread
    hv.extension('bokeh')

    import strax

    import gc
    # Somebody thought it was a good idea to call gc.collect explicitly somewhere in holoviews
    # This makes dynamic PMT maps super slow
    # Until I trace the offender:
    gc.collect = lambda *args, **kwargs: None

    # Custom wheel zoom tool that only zooms in time
    from bokeh.models import WheelZoomTool
    time_zoom = WheelZoomTool(dimensions='width')

    # Get ADC->pe multiplicative conversion factor
    from pax.configuration import load_configuration
    from pax.dsputils import adc_to_pe
    pax_config = load_configuration('XENON1T')["DEFAULT"]
    to_pe = np.array(
        [adc_to_pe(pax_config, ch) for ch in range(pax_config['n_channels'])])

    tpc_r = pax_config['tpc_radius']

    # Get locations of PMTs
    r = []
    for q in pax_config['pmts']:
        r.append(
            dict(x=q['position']['x'],
                 y=q['position']['y'],
                 i=q['pmt_position'],
                 array=q.get('array', 'other')))
    f = 1.08
    pmt_locs = pd.DataFrame(r)

    records = st.get_array(run_id,
                           'raw_records',
                           time_range=(t0, t0 + int(1e10)))

    # TOOD: don't reprocess, just load...
    hits = strax.find_hits(records)
    peaks = strax.find_peaks(hits,
                             to_pe,
                             gap_threshold=300,
                             min_hits=3,
                             result_dtype=strax.peak_dtype(n_channels=260))
    strax.sum_waveform(peaks, records, to_pe)
    # Integral in pe
    areas = records['data'].sum(axis=1) * to_pe[records['channel']]

    def normalize_time(t):
        return (t - records[0]['time']) / 1e9

    # Create dataframe with record metadata
    df = pd.DataFrame(
        dict(area=areas,
             time=normalize_time(records['time']),
             channel=records['channel']))

    # Convert to holoviews Points
    points = hv.Points(
        df,
        kdims=[
            hv.Dimension('time', label='Time', unit='sec'),
            hv.Dimension('channel', label='PMT number', range=(0, 260))
        ],
        vdims=[
            hv.Dimension(
                'area',
                label='Area',
                unit='pe',
                # range=(0, 1000)
            )
        ])

    def pmt_map(t_0, t_1, array='top', **kwargs):
        # Compute the PMT pattern (fast)
        ps = points[(t_0 <= points['time']) & (points['time'] < t_1)]
        areas = np.bincount(ps['channel'],
                            weights=ps['area'],
                            minlength=len(pmt_locs))

        # Which PMTs should we include?
        pmt_mask = pmt_locs['array'] == array
        d = pmt_locs[pmt_mask].copy()
        d['area'] = areas[pmt_mask]

        # Convert to holoviews points
        d = hv.Dataset(d,
                       kdims=[
                           hv.Dimension('x',
                                        unit='cm',
                                        range=(-tpc_r * f, tpc_r * f)),
                           hv.Dimension('y',
                                        unit='cm',
                                        range=(-tpc_r * f, tpc_r * f)),
                           hv.Dimension('i', label='PMT number'),
                           hv.Dimension('area', label='Area', unit='PE')
                       ])

        return d.to(hv.Points,
                    vdims=['area', 'i'],
                    group='PMTPattern',
                    label=array.capitalize(),
                    **kwargs).opts(plot=dict(color_index=2,
                                             tools=['hover'],
                                             show_grid=False),
                                   style=dict(size=17, cmap='magma'))

    def pmt_map_range(x_range, array='top', **kwargs):
        # For use in dynamicmap with streams
        if x_range is None:
            x_range = (0, 0)
        return pmt_map(x_range[0], x_range[1], array=array, **kwargs)

    xrange_stream = hv.streams.RangeX(source=points)

    # TODO: weigh by area

    def channel_map():
        return dynspread(
            datashade(
                points, y_range=(0, 260),
                streams=[xrange_stream])).opts(plot=dict(
                    width=600,
                    tools=[time_zoom, 'xpan'],
                    default_tools=['save', 'pan', 'box_zoom', 'save', 'reset'],
                    show_grid=False))

    def plot_peak(p):
        # It's better to plot amplitude /time than per bin, since
        # sampling times are now variable
        y = p['data'][:p['length']] / p['dt']
        t_edges = np.arange(p['length'] + 1, dtype=np.int64)
        t_edges = t_edges * p['dt'] + p['time']
        t_edges = normalize_time(t_edges)

        # Correct step plotting from Knut
        t_ = np.zeros(2 * len(y))
        y_ = np.zeros(2 * len(y))
        t_[0::2] = t_edges[0:-1]
        t_[1::2] = t_edges[1::]
        y_[0::2] = y
        y_[1::2] = y

        c = hv.Curve(dict(time=t_, amplitude=y_),
                     kdims=points.kdims[0],
                     vdims=hv.Dimension('amplitude',
                                        label='Amplitude',
                                        unit='PE/ns'),
                     group='PeakSumWaveform')
        return c.opts(
            plot=dict(  # interpolation='steps-mid',
                # default_tools=['save', 'pan', 'box_zoom', 'save', 'reset'],
                # tools=[time_zoom, 'xpan'],
                width=600,
                shared_axes=False,
                show_grid=True),
            style=dict(color='b')
            # norm=dict(framewise=True)
        )

    def peaks_in(t_0, t_1):
        return peaks[(normalize_time(peaks['time'] +
                                     peaks['length'] * peaks['dt']) > t_0)
                     & (normalize_time(peaks['time']) < t_1)]

    def plot_peaks(t_0, t_1, n_max=10):
        # Find peaks in this range
        ps = peaks_in(t_0, t_1)
        # Show only the largest n_max peaks
        if len(ps) > n_max:
            areas = ps['area']
            max_area = np.sort(areas)[-n_max]
            ps = ps[areas >= max_area]

        return hv.Overlay(items=[plot_peak(p) for p in ps])

    def plot_peak_range(x_range, **kwargs):
        # For use in dynamicmap with streams
        if x_range is None:
            x_range = (0, 10)
        return plot_peaks(x_range[0], x_range[1], **kwargs)

    top_map = hv.DynamicMap(partial(pmt_map_range, array='top'),
                            streams=[xrange_stream])
    bot_map = hv.DynamicMap(partial(pmt_map_range, array='bottom'),
                            streams=[xrange_stream])
    waveform = hv.DynamicMap(plot_peak_range, streams=[xrange_stream])
    layout = waveform + top_map + channel_map() + bot_map
    return layout.cols(2)