Exemplo n.º 1
0
    def test_cwt(self):
        widths = [1.0]
        delta_wavelet = lambda s, t: np.array([1])
        len_data = 100
        test_data = np.sin(np.pi * np.arange(0, len_data) / 10.0)

        #Test delta function input gives same data as output
        cwt_dat = wavelets.cwt(test_data, delta_wavelet, widths)
        assert_(cwt_dat.shape == (len(widths), len_data))
        assert_array_almost_equal(test_data, cwt_dat.flatten())

        #Check proper shape on output
        widths = [1, 3, 4, 5, 10]
        cwt_dat = wavelets.cwt(test_data, wavelets.ricker, widths)
        assert_(cwt_dat.shape == (len(widths), len_data))

        widths = [len_data * 10]
        #Note: this wavelet isn't defined quite right, but is fine for this test
        flat_wavelet = lambda l, w: np.ones(w) / w
        cwt_dat = wavelets.cwt(test_data, flat_wavelet, widths)
        assert_array_almost_equal(cwt_dat, np.mean(test_data))
Exemplo n.º 2
0
    def test_cwt(self):
        widths = [1.0]
        delta_wavelet = lambda s, t: np.array([1])
        len_data = 100
        test_data = np.sin(np.pi * np.arange(0, len_data) / 10.0)

        #Test delta function input gives same data as output
        cwt_dat = wavelets.cwt(test_data, delta_wavelet, widths)
        assert_(cwt_dat.shape == (len(widths), len_data))
        assert_array_almost_equal(test_data, cwt_dat.flatten())

        #Check proper shape on output
        widths = [1, 3, 4, 5, 10]
        cwt_dat = wavelets.cwt(test_data, wavelets.ricker, widths)
        assert_(cwt_dat.shape == (len(widths), len_data))

        widths = [len_data * 10]
        #Note: this wavelet isn't defined quite right, but is fine for this test
        flat_wavelet = lambda l, w: np.ones(w) / w
        cwt_dat = wavelets.cwt(test_data, flat_wavelet, widths)
        assert_array_almost_equal(cwt_dat, np.mean(test_data))
Exemplo n.º 3
0
def find_peaks(data, widths=[1, 2, 7, 30, 182, 365]):
    '''
    Finds the peaks using the CWTFindPeaks algorithm. This code is mostly a line
    by line port of the scipy.signal.wavelets.find_peaks_cwt. We had to port that
    code since the default scipy implementation does not return the widths.

    Paramaters
    ----------
    data : array like
        the time series to find the peaks
    widths : array like
        the candidate widths to test

    Returns
    -------
    A list of tripples (peak_volume, peak_width, peak_position). The volume is
    the value of data[peak_position]. The width is the estimated width of the
    wavelet used to find that peak.
    '''
    data = np.asanyarray(data)
    widths = np.asanyarray(widths)

    #These are default values from the scipy port which we based our code on.
    gap_thresh = np.ceil(widths[0])
    max_distances = widths / 4.0

    cwt_dat = cwt(data, ricker, widths)
    ridge_lines = _identify_ridge_lines(cwt_dat, max_distances, gap_thresh)
    filtered = _filter_ridge_lines(cwt_dat, ridge_lines, \
            min_snr=1, noise_perc=1) #noise_perc=1 filters more noise.
    
    #Filtered will be of the form [[peak_widths], [peak_positions]]
    candidates = []
    for x in filtered:
        assert x[0].min() >= 0
        assert x[0].max() < widths.shape[0]

        peak_pos, peak_width = x[1][0], widths[x[0].max()]
        candidates.append((data[peak_pos], peak_width, peak_pos))
    
    return sorted(candidates, reverse=True)
Exemplo n.º 4
0
def find_peaks_cwt(vector, widths, wavelet=None, max_distances=None, gap_thresh=None,
               min_length=None, min_snr=1, noise_perc=10):
    """
    Attempt to find the peaks in the given 1-D array `vector`.

    The general approach is to smooth `vector` by convolving it with `wavelet(width)`
    for each width in `widths`. Relative maxima which appear at enough length scales,
    and with sufficiently high SNR, are accepted.

    Parameters
    ----------
    vector: 1-D ndarray
    widths: 1-D sequence
        Widths to use for calculating the CWT matrix. In general,
        this range should cover the expected width of peaks of interest.
    wavelet: function
        Should take a single variable and return a 1d array to convolve
        with `vector`. Should be normalized to unit area. Default
        is the ricker wavelet
    max_distances: 1-D ndarray,optional
        Default `widths`/4. See identify_ridge_lines
    gap_thresh: float, optional
        Default 2. See identify_ridge_lines
    min_length: int, optional
        Default None. See filter_ridge_lines
    min_snr: float, optional
        Default 1. See filter_ridge_lines
    noise_perc: float, optional
        Default 10. See filter_ridge_lines

    Notes
    ---------
    This approach was designed for finding sharp peaks among noisy data, however
    with proper parameter selection it should function well for different
    peak shapes.
    The algorithm is as follows:
    1. Perform a continuous wavelet transform on `vector`, for the supplied
    `widths`. This is a convolution of `vector` with `wavelet(width)` for
    each width in `widths`. See `cwt`
    2. Identify "ridge lines" in the cwt matrix. These are relative maxima
    at each row, connected across adjacent rows. See identify_ridge_lines
    3. Filter the ridge_lines using filter_ridge_lines.

    References
    ----------
    Bioinformatics (2006) 22 (17): 2059-2065. doi: 10.1093/bioinformatics/btl355
    http://bioinformatics.oxfordjournals.org/content/22/17/2059.long

    Examples
    --------
    >>> xs = np.arange(0, np.pi, 0.05)
    >>> data = np.sin(xs)
    >>> peakind = find_peaks_cwt(data, np.arange(1,10))
    >>> peakind, xs[peakind],data[peakind]
    ([32], array([ 1.6]), array([ 0.9995736]))
    """
    if gap_thresh is None:
        gap_thresh = np.ceil(widths[0])
    if max_distances is None:
        max_distances = widths / 4.0
    if wavelet is None:
        wavelet = ricker

    cwt_dat = cwt(vector, wavelet, widths)
    ridge_lines = _identify_ridge_lines(cwt_dat, max_distances, gap_thresh)
    filtered = _filter_ridge_lines(cwt_dat, ridge_lines, min_length=min_length,
                                   min_snr=min_snr, noise_perc=noise_perc)
    max_locs = map(lambda x: x[1][0], filtered)
    return sorted(max_locs)
Exemplo n.º 5
0
def find_peaks_cwt(vector, widths, wavelet=None, max_distances=None, gap_thresh=None,
               min_length=None, min_snr=1, noise_perc=10):
    """
    Attempt to find the peaks in the given 1-D array `vector`.

    The general approach is to smooth `vector` by convolving it with `wavelet(width)`
    for each width in `widths`. Relative maxima which appear at enough length scales,
    and with sufficiently high SNR, are accepted.

    Parameters
    ----------
    vector: 1-D ndarray
    widths: 1-D sequence
        Widths to use for calculating the CWT matrix. In general,
        this range should cover the expected width of peaks of interest.
    wavelet: function
        Should take a single variable and return a 1d array to convolve
        with `vector`. Should be normalized to unit area. Default
        is the ricker wavelet
    max_distances: 1-D ndarray,optional
        Default `widths`/4. See identify_ridge_lines
    gap_thresh: float, optional
        Default 2. See identify_ridge_lines
    min_length: int, optional
        Default None. See filter_ridge_lines
    min_snr: float, optional
        Default 1. See filter_ridge_lines
    noise_perc: float, optional
        Default 10. See filter_ridge_lines

    Notes
    ---------
    This approach was designed for finding sharp peaks among noisy data, however
    with proper parameter selection it should function well for different
    peak shapes.
    The algorithm is as follows:
    1. Perform a continuous wavelet transform on `vector`, for the supplied
    `widths`. This is a convolution of `vector` with `wavelet(width)` for
    each width in `widths`. See `cwt`
    2. Identify "ridge lines" in the cwt matrix. These are relative maxima
    at each row, connected across adjacent rows. See identify_ridge_lines
    3. Filter the ridge_lines using filter_ridge_lines.

    References
    ----------
    Bioinformatics (2006) 22 (17): 2059-2065. doi: 10.1093/bioinformatics/btl355
    http://bioinformatics.oxfordjournals.org/content/22/17/2059.long

    Examples
    --------
    >>> xs = np.arange(0, np.pi, 0.05)
    >>> data = np.sin(xs)
    >>> peakind = find_peaks_cwt(data, np.arange(1,10))
    >>> peakind, xs[peakind],data[peakind]
    ([32], array([ 1.6]), array([ 0.9995736]))
    """
    if gap_thresh is None:
        gap_thresh = np.ceil(widths[0])
    if max_distances is None:
        max_distances = widths / 4.0
    if wavelet is None:
        wavelet = ricker

    cwt_dat = cwt(vector, wavelet, widths)
    ridge_lines = _identify_ridge_lines(cwt_dat, max_distances, gap_thresh)
    filtered = _filter_ridge_lines(cwt_dat, ridge_lines, min_length=min_length,
                                   min_snr=min_snr, noise_perc=noise_perc)
    max_locs = map(lambda x: x[1][0], filtered)
    return sorted(max_locs)
Exemplo n.º 6
0
def find_peaks_cwt(vector, widths, wavelet=None, max_distances=None,
                   gap_thresh=None, min_length=None, min_snr=1, noise_perc=10):
    """
    Find peaks in a 1-D array with wavelet transformation.

    The general approach is to smooth `vector` by convolving it with
    `wavelet(width)` for each width in `widths`. Relative maxima which
    appear at enough length scales, and with sufficiently high SNR, are
    accepted.

    Parameters
    ----------
    vector : ndarray
        1-D array in which to find the peaks.
    widths : sequence
        1-D array of widths to use for calculating the CWT matrix. In general,
        this range should cover the expected width of peaks of interest.
    wavelet : callable, optional
        Should take two parameters and return a 1-D array to convolve
        with `vector`. The first parameter determines the number of points
        of the returned wavelet array, the second parameter is the scale
        (`width`) of the wavelet. Should be normalized and symmetric.
        Default is the ricker wavelet.
    max_distances : ndarray, optional
        At each row, a ridge line is only connected if the relative max at
        row[n] is within ``max_distances[n]`` from the relative max at
        ``row[n+1]``.  Default value is ``widths/4``.
    gap_thresh : float, optional
        If a relative maximum is not found within `max_distances`,
        there will be a gap. A ridge line is discontinued if there are more
        than `gap_thresh` points without connecting a new relative maximum.
        Default is the first value of the widths array i.e. widths[0].
    min_length : int, optional
        Minimum length a ridge line needs to be acceptable.
        Default is ``cwt.shape[0] / 4``, ie 1/4-th the number of widths.
    min_snr : float, optional
        Minimum SNR ratio. Default 1. The signal is the value of
        the cwt matrix at the shortest length scale (``cwt[0, loc]``), the
        noise is the `noise_perc`th percentile of datapoints contained within a
        window of `window_size` around ``cwt[0, loc]``.
    noise_perc : float, optional
        When calculating the noise floor, percentile of data points
        examined below which to consider noise. Calculated using
        `stats.scoreatpercentile`.  Default is 10.

    Returns
    -------
    peaks_indices : ndarray
        Indices of the locations in the `vector` where peaks were found.
        The list is sorted.

    See Also
    --------
    cwt
        Continuous wavelet transform.
    find_peaks
        Find peaks inside a signal based on peak properties.

    Notes
    -----
    This approach was designed for finding sharp peaks among noisy data,
    however with proper parameter selection it should function well for
    different peak shapes.

    The algorithm is as follows:
     1. Perform a continuous wavelet transform on `vector`, for the supplied
        `widths`. This is a convolution of `vector` with `wavelet(width)` for
        each width in `widths`. See `cwt`
     2. Identify "ridge lines" in the cwt matrix. These are relative maxima
        at each row, connected across adjacent rows. See identify_ridge_lines
     3. Filter the ridge_lines using filter_ridge_lines.

    .. versionadded:: 0.11.0

    References
    ----------
    .. [1] Bioinformatics (2006) 22 (17): 2059-2065.
        :doi:`10.1093/bioinformatics/btl355`
        http://bioinformatics.oxfordjournals.org/content/22/17/2059.long

    Examples
    --------
    >>> from scipy import signal
    >>> xs = np.arange(0, np.pi, 0.05)
    >>> data = np.sin(xs)
    >>> peakind = signal.find_peaks_cwt(data, np.arange(1,10))
    >>> peakind, xs[peakind], data[peakind]
    ([32], array([ 1.6]), array([ 0.9995736]))

    """
    widths = np.asarray(widths)

    if gap_thresh is None:
        gap_thresh = np.ceil(widths[0])
    if max_distances is None:
        max_distances = widths / 4.0
    if wavelet is None:
        wavelet = ricker

    cwt_dat = cwt(vector, wavelet, widths)
    ridge_lines = _identify_ridge_lines(cwt_dat, max_distances, gap_thresh)
    filtered = _filter_ridge_lines(cwt_dat, ridge_lines, min_length=min_length,
                                   min_snr=min_snr, noise_perc=noise_perc)
    max_locs = np.asarray([x[1][0] for x in filtered])
    max_locs.sort()

    return max_locs
Exemplo n.º 7
0
def find_peaks_cwt(vector,
                   widths,
                   wavelet=None,
                   max_distances=None,
                   gap_thresh=None,
                   min_length=None,
                   min_snr=1,
                   noise_perc=10):
    """
    Attempt to find the peaks in a 1-D array.
    The general approach is to smooth `vector` by convolving it with
    `wavelet(width)` for each width in `widths`. Relative maxima which
    appear at enough length scales, and with sufficiently high SNR, are
    accepted.
    Parameters
    ----------
    vector : ndarray
        1-D array in which to find the peaks.
    widths : sequence
        1-D array of widths to use for calculating the CWT matrix. In general,
        this range should cover the expected width of peaks of interest.
    wavelet : callable, optional
        Should take a single variable and return a 1-D array to convolve
        with `vector`.  Should be normalized to unit area.
        Default is the ricker wavelet.
    max_distances : ndarray, optional
        At each row, a ridge line is only connected if the relative max at
        row[n] is within ``max_distances[n]`` from the relative max at
        ``row[n+1]``.  Default value is ``widths/4``.
    gap_thresh : float, optional
        If a relative maximum is not found within `max_distances`,
        there will be a gap. A ridge line is discontinued if there are more
        than `gap_thresh` points without connecting a new relative maximum.
        Default is 2.
    min_length : int, optional
        Minimum length a ridge line needs to be acceptable.
        Default is ``cwt.shape[0] / 4``, ie 1/4-th the number of widths.
    min_snr : float, optional
        Minimum SNR ratio. Default 1. The signal is the value of
        the cwt matrix at the shortest length scale (``cwt[0, loc]``), the
        noise is the `noise_perc`th percentile of datapoints contained within a
        window of `window_size` around ``cwt[0, loc]``.
    noise_perc : float, optional
        When calculating the noise floor, percentile of data points
        examined below which to consider noise. Calculated using
        `stats.scoreatpercentile`.  Default is 10.
    Returns
    -------
    peaks_indices : list
        Indices of the locations in the `vector` where peaks were found.
        The list is sorted.
    See Also
    --------
    cwt
    Notes
    -----
    This approach was designed for finding sharp peaks among noisy data,
    however with proper parameter selection it should function well for
    different peak shapes.
    The algorithm is as follows:
     1. Perform a continuous wavelet transform on `vector`, for the supplied
        `widths`. This is a convolution of `vector` with `wavelet(width)` for
        each width in `widths`. See `cwt`
     2. Identify "ridge lines" in the cwt matrix. These are relative maxima
        at each row, connected across adjacent rows. See identify_ridge_lines
     3. Filter the ridge_lines using filter_ridge_lines.
    .. versionadded:: 0.11.0
    References
    ----------
    .. [1] Bioinformatics (2006) 22 (17): 2059-2065.
        doi: 10.1093/bioinformatics/btl355
        http://bioinformatics.oxfordjournals.org/content/22/17/2059.long
    Examples
    --------
    >>> from scipy import signal
    >>> xs = np.arange(0, np.pi, 0.05)
    >>> data = np.sin(xs)
    >>> peakind = signal.find_peaks_cwt(data, np.arange(1,10))
    >>> peakind, xs[peakind], data[peakind]
    ([32], array([ 1.6]), array([ 0.9995736]))
    """
    if gap_thresh is None:
        gap_thresh = np.ceil(widths[0])
    if max_distances is None:
        max_distances = widths / 4.0
    if wavelet is None:
        wavelet = ricker

    cwt_dat = cwt(vector, wavelet, widths)
    ridge_lines = _identify_ridge_lines(cwt_dat, max_distances, gap_thresh)
    filtered = _filter_ridge_lines(cwt_dat,
                                   ridge_lines,
                                   min_length=min_length,
                                   min_snr=min_snr,
                                   noise_perc=noise_perc)
    max_locs = [x[1][0] for x in filtered]
    return ridge_lines, filtered, sorted(max_locs)
Exemplo n.º 8
0
def detect_peaks(x: np.ndarray, y: np.ndarray, widths: np.ndarray,
                 min_length: int = 5, max_distance: int = 2,
                 gap_threshold: int = 1, snr: float = 3, min_width: float = 5,
                 max_width: float = 60,
                 estimators: Union[str, _estimator_type] = "default"):
    r"""
    Find peaks in a 1D signal.

    Peaks are detected using a modified version of the algorithm described in
    [1].

    Parameters
    ----------
    x : sorted array
    y : array of intensities
    widths : array
        Array of widths, in x units. Used as scales to build the wavelet
        array.
    min_length : int
        Minimum number of points in a ridge line.
    max_distance : float
        Maximum x distance between consecutive points in a ridge line, in x
        units.
    gap_threshold : int
        Maximum number of consecutive missing peaks in a ridge line.
    snr : positive number
        Signal-to-noise- ratio used to filter peaks. Defined as follows:

        .. math::

            SNR = \frac{peak height - baseline}{noise}

    min_width : positive number
        Minimum width of the peaks
    max_width : positive number
        Maximum width of the peaks
    estimators : str or dict
        How to estimate baseline, noise, peak height, peak width, peak area and
        peak location. If `estimators` is 'cwt', parameters are computed as
        described in [1]. Check the Notes to see how estimations in 'default'
        mode are computed or how custom estimators can be used.

    Returns
    -------
    peaks : List of PeakLocation
    params : dict of peak parameters

    Notes
    -----
    Peaks are detected using the CWT algorithm described in [DP06]. The optimum
    scale where each peak is detected is the local maximum at the lowest scale
    in the ridge line. If no local maximum was found, the scale with the maximum
    coefficient is chosen. After finding a peak, the extension of the peak
    is found by finding the nearest local minimum at both sides of the peak,
    using the wavelet coefficients with the best scale. A peak is represented
    then by three indices specifying the peak location, peak start and peak end.
    These three values, together with baseline and noise estimations are used
    to estimate peak parameters. If the mode used is 'default`, the peak
    parameters are defined as follows:

        baseline :
            A baseline is built using y values where no peak was detected. These
            values are interpolated to build the baseline.
        noise :
            The noise is computed as the standard deviation of the values used
            to build the baseline. To obtain a robust estimation, the median
            absolute deviation of the baseline is used.
        height :
            The height of a peak is computed as the difference between the
            y value baseline value at the peak location
        snr :
            The quotient between the height of the peak and the noise.
        area :
            Area of the peak obtained by integration between the start and
            the end of the peak. The area of the baseline is subtracted.
        width :
            The peak width is computed as the peak extension, that is, the
            difference between the end and the start of the peak.

    After computing these parameters, peaks are filtered based on SNR and peak
    width. Peak overlap between the filtered peaks is analyzed then. Two
    peaks are overlapping if there is superposition in their peak extensions.
    Overlapping peaks are flagged, their extension corrected and  corrected peak
    parameters are computed again.

    Custom estimators can be used for noise, baseline, peak height, peak
    location, peak width and peak area:

    .. code-block:: python

            estimators = {"baseline": baseline_func, "noise": noise_func,
                          "height": height_func,  "loc": loc_func,
                          "width": width_func, "area": area_func}

            # x and y are the same array used in the function
            # peaks is a list of PeakLocation instances
            # peak is a single PeakLocation instance

            # baseline must have the same size as x and y
            baseline = baseline_func(x, y, peaks)
            # noise is a positive number
            noise = noise_func(x, y, peaks)
            # peak_parameters are all positive numbers
            # (area and height can be zero)
            height = height_func(x, y, peak, baseline)
            area = area_func(x, y, peak, baseline)
            width = width_func(x, y, peak, baseline)
            loc = loc_func(x, y, peak, baseline)

    References
    ----------

    .. [DP06] Pan Du, Warren A. Kibbe, Simon M. Lin, Improved peak detection in
        mass spectrum by incorporating continuous wavelet transform-based
        pattern matching, Bioinformatics, Volume 22, Issue 17, 1 September 2006,
        Pages 2059–2065, https://doi.org/10.1093/bioinformatics/btl355

    """

    # Convert to uniform sampling
    xu, yu = _resample_data(x, y)

    # convert parameters to number of points
    widths, max_distance = \
        _convert_to_points(xu, widths, max_distance)

    # detect peaks in the ridge lines
    w = cwt(yu, ricker, widths)
    ridge_lines = \
        _peak_finding._identify_ridge_lines(w, max_distance, gap_threshold)
    # y_peaks are the local maxima of y and are used to validate peaks
    # y_peaks = find_peaks(yu)[0]
    y_peaks = argrelmax(yu, order=2)[0]
    peaks = _process_ridge_lines(w, y_peaks, ridge_lines, min_length,
                                 max_distance)

    # baseline and noise estimation
    if estimators == "default":
        baseline, noise = baseline_noise_estimation(yu)
    elif estimators == "cwt":
        baseline, noise = None, None
    else:
        baseline = estimators["baseline"](xu, yu, peaks)
        noise = estimators["noise"](xu, yu, peaks)

    # peak filtering and parameter estimation
    peaks, params = \
        _estimate_params(xu, yu, widths, w, peaks, snr, min_width, max_width,
                         estimators, baseline=baseline, noise=noise)

    # sort peaks based on location
    sorted_index = sorted(range(len(peaks)), key=lambda s: peaks[s].loc)
    peaks = [peaks[k] for k in sorted_index]
    params = [params[k] for k in sorted_index]

    # find and correct overlap between consecutive peaks:
    overlap_index = list()
    rm_index = list()
    for k in range(len(peaks) - 1):
        left, right = peaks[k], peaks[k + 1]
        is_same_peak = right.loc == left.loc
        merge = (right.loc - left.loc) <= max_distance[0]
        has_overlap = left.end > right.start
        if is_same_peak:
            rm_index.append(k + (left.scale < right.scale))
        elif merge:
            rm_index.append(k)
            right.start = left.start
            right.loc = (left.loc + right.loc) // 2
        elif has_overlap:
            _fix_peak_extension(left, right, yu)
            overlap_index.extend([k, k + 1])
        # remove invalid peaks after the extension was fixed
        if yu[left.loc] < max(yu[left.start], yu[left.end]):
            rm_index.append(k)

    overlap_peaks = [peaks[x] for x in overlap_index]

    # if there are peaks with overlap, then compute again peak parameters after
    # correction
    if overlap_index:
        _, overlap_params = \
            _estimate_params(xu, yu, widths, w, overlap_peaks, snr, min_width,
                             max_width, estimators, baseline=baseline,
                             noise=noise, append_empty_params=True)
        # replace corrected values in params:
        for k, param in zip(overlap_index, overlap_params):
            if len(param):
                params[k] = param
            else:
                rm_index.append(k)

    # remove invalid peaks and back scale peaks
    peaks = [p.rescale(xu, x) for (k, p) in enumerate(peaks)
             if k not in rm_index]
    params = [p for k, p in enumerate(params) if (len(p) and k not in rm_index)]

    return peaks, params