def test_cwt(self): widths = [1.0] delta_wavelet = lambda s, t: np.array([1]) len_data = 100 test_data = np.sin(np.pi * np.arange(0, len_data) / 10.0) #Test delta function input gives same data as output cwt_dat = wavelets.cwt(test_data, delta_wavelet, widths) assert_(cwt_dat.shape == (len(widths), len_data)) assert_array_almost_equal(test_data, cwt_dat.flatten()) #Check proper shape on output widths = [1, 3, 4, 5, 10] cwt_dat = wavelets.cwt(test_data, wavelets.ricker, widths) assert_(cwt_dat.shape == (len(widths), len_data)) widths = [len_data * 10] #Note: this wavelet isn't defined quite right, but is fine for this test flat_wavelet = lambda l, w: np.ones(w) / w cwt_dat = wavelets.cwt(test_data, flat_wavelet, widths) assert_array_almost_equal(cwt_dat, np.mean(test_data))
def find_peaks(data, widths=[1, 2, 7, 30, 182, 365]): ''' Finds the peaks using the CWTFindPeaks algorithm. This code is mostly a line by line port of the scipy.signal.wavelets.find_peaks_cwt. We had to port that code since the default scipy implementation does not return the widths. Paramaters ---------- data : array like the time series to find the peaks widths : array like the candidate widths to test Returns ------- A list of tripples (peak_volume, peak_width, peak_position). The volume is the value of data[peak_position]. The width is the estimated width of the wavelet used to find that peak. ''' data = np.asanyarray(data) widths = np.asanyarray(widths) #These are default values from the scipy port which we based our code on. gap_thresh = np.ceil(widths[0]) max_distances = widths / 4.0 cwt_dat = cwt(data, ricker, widths) ridge_lines = _identify_ridge_lines(cwt_dat, max_distances, gap_thresh) filtered = _filter_ridge_lines(cwt_dat, ridge_lines, \ min_snr=1, noise_perc=1) #noise_perc=1 filters more noise. #Filtered will be of the form [[peak_widths], [peak_positions]] candidates = [] for x in filtered: assert x[0].min() >= 0 assert x[0].max() < widths.shape[0] peak_pos, peak_width = x[1][0], widths[x[0].max()] candidates.append((data[peak_pos], peak_width, peak_pos)) return sorted(candidates, reverse=True)
def find_peaks_cwt(vector, widths, wavelet=None, max_distances=None, gap_thresh=None, min_length=None, min_snr=1, noise_perc=10): """ Attempt to find the peaks in the given 1-D array `vector`. The general approach is to smooth `vector` by convolving it with `wavelet(width)` for each width in `widths`. Relative maxima which appear at enough length scales, and with sufficiently high SNR, are accepted. Parameters ---------- vector: 1-D ndarray widths: 1-D sequence Widths to use for calculating the CWT matrix. In general, this range should cover the expected width of peaks of interest. wavelet: function Should take a single variable and return a 1d array to convolve with `vector`. Should be normalized to unit area. Default is the ricker wavelet max_distances: 1-D ndarray,optional Default `widths`/4. See identify_ridge_lines gap_thresh: float, optional Default 2. See identify_ridge_lines min_length: int, optional Default None. See filter_ridge_lines min_snr: float, optional Default 1. See filter_ridge_lines noise_perc: float, optional Default 10. See filter_ridge_lines Notes --------- This approach was designed for finding sharp peaks among noisy data, however with proper parameter selection it should function well for different peak shapes. The algorithm is as follows: 1. Perform a continuous wavelet transform on `vector`, for the supplied `widths`. This is a convolution of `vector` with `wavelet(width)` for each width in `widths`. See `cwt` 2. Identify "ridge lines" in the cwt matrix. These are relative maxima at each row, connected across adjacent rows. See identify_ridge_lines 3. Filter the ridge_lines using filter_ridge_lines. References ---------- Bioinformatics (2006) 22 (17): 2059-2065. doi: 10.1093/bioinformatics/btl355 http://bioinformatics.oxfordjournals.org/content/22/17/2059.long Examples -------- >>> xs = np.arange(0, np.pi, 0.05) >>> data = np.sin(xs) >>> peakind = find_peaks_cwt(data, np.arange(1,10)) >>> peakind, xs[peakind],data[peakind] ([32], array([ 1.6]), array([ 0.9995736])) """ if gap_thresh is None: gap_thresh = np.ceil(widths[0]) if max_distances is None: max_distances = widths / 4.0 if wavelet is None: wavelet = ricker cwt_dat = cwt(vector, wavelet, widths) ridge_lines = _identify_ridge_lines(cwt_dat, max_distances, gap_thresh) filtered = _filter_ridge_lines(cwt_dat, ridge_lines, min_length=min_length, min_snr=min_snr, noise_perc=noise_perc) max_locs = map(lambda x: x[1][0], filtered) return sorted(max_locs)
def find_peaks_cwt(vector, widths, wavelet=None, max_distances=None, gap_thresh=None, min_length=None, min_snr=1, noise_perc=10): """ Find peaks in a 1-D array with wavelet transformation. The general approach is to smooth `vector` by convolving it with `wavelet(width)` for each width in `widths`. Relative maxima which appear at enough length scales, and with sufficiently high SNR, are accepted. Parameters ---------- vector : ndarray 1-D array in which to find the peaks. widths : sequence 1-D array of widths to use for calculating the CWT matrix. In general, this range should cover the expected width of peaks of interest. wavelet : callable, optional Should take two parameters and return a 1-D array to convolve with `vector`. The first parameter determines the number of points of the returned wavelet array, the second parameter is the scale (`width`) of the wavelet. Should be normalized and symmetric. Default is the ricker wavelet. max_distances : ndarray, optional At each row, a ridge line is only connected if the relative max at row[n] is within ``max_distances[n]`` from the relative max at ``row[n+1]``. Default value is ``widths/4``. gap_thresh : float, optional If a relative maximum is not found within `max_distances`, there will be a gap. A ridge line is discontinued if there are more than `gap_thresh` points without connecting a new relative maximum. Default is the first value of the widths array i.e. widths[0]. min_length : int, optional Minimum length a ridge line needs to be acceptable. Default is ``cwt.shape[0] / 4``, ie 1/4-th the number of widths. min_snr : float, optional Minimum SNR ratio. Default 1. The signal is the value of the cwt matrix at the shortest length scale (``cwt[0, loc]``), the noise is the `noise_perc`th percentile of datapoints contained within a window of `window_size` around ``cwt[0, loc]``. noise_perc : float, optional When calculating the noise floor, percentile of data points examined below which to consider noise. Calculated using `stats.scoreatpercentile`. Default is 10. Returns ------- peaks_indices : ndarray Indices of the locations in the `vector` where peaks were found. The list is sorted. See Also -------- cwt Continuous wavelet transform. find_peaks Find peaks inside a signal based on peak properties. Notes ----- This approach was designed for finding sharp peaks among noisy data, however with proper parameter selection it should function well for different peak shapes. The algorithm is as follows: 1. Perform a continuous wavelet transform on `vector`, for the supplied `widths`. This is a convolution of `vector` with `wavelet(width)` for each width in `widths`. See `cwt` 2. Identify "ridge lines" in the cwt matrix. These are relative maxima at each row, connected across adjacent rows. See identify_ridge_lines 3. Filter the ridge_lines using filter_ridge_lines. .. versionadded:: 0.11.0 References ---------- .. [1] Bioinformatics (2006) 22 (17): 2059-2065. :doi:`10.1093/bioinformatics/btl355` http://bioinformatics.oxfordjournals.org/content/22/17/2059.long Examples -------- >>> from scipy import signal >>> xs = np.arange(0, np.pi, 0.05) >>> data = np.sin(xs) >>> peakind = signal.find_peaks_cwt(data, np.arange(1,10)) >>> peakind, xs[peakind], data[peakind] ([32], array([ 1.6]), array([ 0.9995736])) """ widths = np.asarray(widths) if gap_thresh is None: gap_thresh = np.ceil(widths[0]) if max_distances is None: max_distances = widths / 4.0 if wavelet is None: wavelet = ricker cwt_dat = cwt(vector, wavelet, widths) ridge_lines = _identify_ridge_lines(cwt_dat, max_distances, gap_thresh) filtered = _filter_ridge_lines(cwt_dat, ridge_lines, min_length=min_length, min_snr=min_snr, noise_perc=noise_perc) max_locs = np.asarray([x[1][0] for x in filtered]) max_locs.sort() return max_locs
def find_peaks_cwt(vector, widths, wavelet=None, max_distances=None, gap_thresh=None, min_length=None, min_snr=1, noise_perc=10): """ Attempt to find the peaks in a 1-D array. The general approach is to smooth `vector` by convolving it with `wavelet(width)` for each width in `widths`. Relative maxima which appear at enough length scales, and with sufficiently high SNR, are accepted. Parameters ---------- vector : ndarray 1-D array in which to find the peaks. widths : sequence 1-D array of widths to use for calculating the CWT matrix. In general, this range should cover the expected width of peaks of interest. wavelet : callable, optional Should take a single variable and return a 1-D array to convolve with `vector`. Should be normalized to unit area. Default is the ricker wavelet. max_distances : ndarray, optional At each row, a ridge line is only connected if the relative max at row[n] is within ``max_distances[n]`` from the relative max at ``row[n+1]``. Default value is ``widths/4``. gap_thresh : float, optional If a relative maximum is not found within `max_distances`, there will be a gap. A ridge line is discontinued if there are more than `gap_thresh` points without connecting a new relative maximum. Default is 2. min_length : int, optional Minimum length a ridge line needs to be acceptable. Default is ``cwt.shape[0] / 4``, ie 1/4-th the number of widths. min_snr : float, optional Minimum SNR ratio. Default 1. The signal is the value of the cwt matrix at the shortest length scale (``cwt[0, loc]``), the noise is the `noise_perc`th percentile of datapoints contained within a window of `window_size` around ``cwt[0, loc]``. noise_perc : float, optional When calculating the noise floor, percentile of data points examined below which to consider noise. Calculated using `stats.scoreatpercentile`. Default is 10. Returns ------- peaks_indices : list Indices of the locations in the `vector` where peaks were found. The list is sorted. See Also -------- cwt Notes ----- This approach was designed for finding sharp peaks among noisy data, however with proper parameter selection it should function well for different peak shapes. The algorithm is as follows: 1. Perform a continuous wavelet transform on `vector`, for the supplied `widths`. This is a convolution of `vector` with `wavelet(width)` for each width in `widths`. See `cwt` 2. Identify "ridge lines" in the cwt matrix. These are relative maxima at each row, connected across adjacent rows. See identify_ridge_lines 3. Filter the ridge_lines using filter_ridge_lines. .. versionadded:: 0.11.0 References ---------- .. [1] Bioinformatics (2006) 22 (17): 2059-2065. doi: 10.1093/bioinformatics/btl355 http://bioinformatics.oxfordjournals.org/content/22/17/2059.long Examples -------- >>> from scipy import signal >>> xs = np.arange(0, np.pi, 0.05) >>> data = np.sin(xs) >>> peakind = signal.find_peaks_cwt(data, np.arange(1,10)) >>> peakind, xs[peakind], data[peakind] ([32], array([ 1.6]), array([ 0.9995736])) """ if gap_thresh is None: gap_thresh = np.ceil(widths[0]) if max_distances is None: max_distances = widths / 4.0 if wavelet is None: wavelet = ricker cwt_dat = cwt(vector, wavelet, widths) ridge_lines = _identify_ridge_lines(cwt_dat, max_distances, gap_thresh) filtered = _filter_ridge_lines(cwt_dat, ridge_lines, min_length=min_length, min_snr=min_snr, noise_perc=noise_perc) max_locs = [x[1][0] for x in filtered] return ridge_lines, filtered, sorted(max_locs)
def detect_peaks(x: np.ndarray, y: np.ndarray, widths: np.ndarray, min_length: int = 5, max_distance: int = 2, gap_threshold: int = 1, snr: float = 3, min_width: float = 5, max_width: float = 60, estimators: Union[str, _estimator_type] = "default"): r""" Find peaks in a 1D signal. Peaks are detected using a modified version of the algorithm described in [1]. Parameters ---------- x : sorted array y : array of intensities widths : array Array of widths, in x units. Used as scales to build the wavelet array. min_length : int Minimum number of points in a ridge line. max_distance : float Maximum x distance between consecutive points in a ridge line, in x units. gap_threshold : int Maximum number of consecutive missing peaks in a ridge line. snr : positive number Signal-to-noise- ratio used to filter peaks. Defined as follows: .. math:: SNR = \frac{peak height - baseline}{noise} min_width : positive number Minimum width of the peaks max_width : positive number Maximum width of the peaks estimators : str or dict How to estimate baseline, noise, peak height, peak width, peak area and peak location. If `estimators` is 'cwt', parameters are computed as described in [1]. Check the Notes to see how estimations in 'default' mode are computed or how custom estimators can be used. Returns ------- peaks : List of PeakLocation params : dict of peak parameters Notes ----- Peaks are detected using the CWT algorithm described in [DP06]. The optimum scale where each peak is detected is the local maximum at the lowest scale in the ridge line. If no local maximum was found, the scale with the maximum coefficient is chosen. After finding a peak, the extension of the peak is found by finding the nearest local minimum at both sides of the peak, using the wavelet coefficients with the best scale. A peak is represented then by three indices specifying the peak location, peak start and peak end. These three values, together with baseline and noise estimations are used to estimate peak parameters. If the mode used is 'default`, the peak parameters are defined as follows: baseline : A baseline is built using y values where no peak was detected. These values are interpolated to build the baseline. noise : The noise is computed as the standard deviation of the values used to build the baseline. To obtain a robust estimation, the median absolute deviation of the baseline is used. height : The height of a peak is computed as the difference between the y value baseline value at the peak location snr : The quotient between the height of the peak and the noise. area : Area of the peak obtained by integration between the start and the end of the peak. The area of the baseline is subtracted. width : The peak width is computed as the peak extension, that is, the difference between the end and the start of the peak. After computing these parameters, peaks are filtered based on SNR and peak width. Peak overlap between the filtered peaks is analyzed then. Two peaks are overlapping if there is superposition in their peak extensions. Overlapping peaks are flagged, their extension corrected and corrected peak parameters are computed again. Custom estimators can be used for noise, baseline, peak height, peak location, peak width and peak area: .. code-block:: python estimators = {"baseline": baseline_func, "noise": noise_func, "height": height_func, "loc": loc_func, "width": width_func, "area": area_func} # x and y are the same array used in the function # peaks is a list of PeakLocation instances # peak is a single PeakLocation instance # baseline must have the same size as x and y baseline = baseline_func(x, y, peaks) # noise is a positive number noise = noise_func(x, y, peaks) # peak_parameters are all positive numbers # (area and height can be zero) height = height_func(x, y, peak, baseline) area = area_func(x, y, peak, baseline) width = width_func(x, y, peak, baseline) loc = loc_func(x, y, peak, baseline) References ---------- .. [DP06] Pan Du, Warren A. Kibbe, Simon M. Lin, Improved peak detection in mass spectrum by incorporating continuous wavelet transform-based pattern matching, Bioinformatics, Volume 22, Issue 17, 1 September 2006, Pages 2059–2065, https://doi.org/10.1093/bioinformatics/btl355 """ # Convert to uniform sampling xu, yu = _resample_data(x, y) # convert parameters to number of points widths, max_distance = \ _convert_to_points(xu, widths, max_distance) # detect peaks in the ridge lines w = cwt(yu, ricker, widths) ridge_lines = \ _peak_finding._identify_ridge_lines(w, max_distance, gap_threshold) # y_peaks are the local maxima of y and are used to validate peaks # y_peaks = find_peaks(yu)[0] y_peaks = argrelmax(yu, order=2)[0] peaks = _process_ridge_lines(w, y_peaks, ridge_lines, min_length, max_distance) # baseline and noise estimation if estimators == "default": baseline, noise = baseline_noise_estimation(yu) elif estimators == "cwt": baseline, noise = None, None else: baseline = estimators["baseline"](xu, yu, peaks) noise = estimators["noise"](xu, yu, peaks) # peak filtering and parameter estimation peaks, params = \ _estimate_params(xu, yu, widths, w, peaks, snr, min_width, max_width, estimators, baseline=baseline, noise=noise) # sort peaks based on location sorted_index = sorted(range(len(peaks)), key=lambda s: peaks[s].loc) peaks = [peaks[k] for k in sorted_index] params = [params[k] for k in sorted_index] # find and correct overlap between consecutive peaks: overlap_index = list() rm_index = list() for k in range(len(peaks) - 1): left, right = peaks[k], peaks[k + 1] is_same_peak = right.loc == left.loc merge = (right.loc - left.loc) <= max_distance[0] has_overlap = left.end > right.start if is_same_peak: rm_index.append(k + (left.scale < right.scale)) elif merge: rm_index.append(k) right.start = left.start right.loc = (left.loc + right.loc) // 2 elif has_overlap: _fix_peak_extension(left, right, yu) overlap_index.extend([k, k + 1]) # remove invalid peaks after the extension was fixed if yu[left.loc] < max(yu[left.start], yu[left.end]): rm_index.append(k) overlap_peaks = [peaks[x] for x in overlap_index] # if there are peaks with overlap, then compute again peak parameters after # correction if overlap_index: _, overlap_params = \ _estimate_params(xu, yu, widths, w, overlap_peaks, snr, min_width, max_width, estimators, baseline=baseline, noise=noise, append_empty_params=True) # replace corrected values in params: for k, param in zip(overlap_index, overlap_params): if len(param): params[k] = param else: rm_index.append(k) # remove invalid peaks and back scale peaks peaks = [p.rescale(xu, x) for (k, p) in enumerate(peaks) if k not in rm_index] params = [p for k, p in enumerate(params) if (len(p) and k not in rm_index)] return peaks, params