Exemplo n.º 1
0
def peak_identification(peaks,width_in_s,width_roll_mean = 200,
                        roll_max_peaks_threshold = 4.0,fs = 16000,nr_ffts_per_s = 100,
                        chunk_len_s = 60,len_fft = 1024,is_ret_roll_max_peaks = False):
    """
    Identify isolated peaks in an 1d array calculated by correlation_picking.
    Parameters:
    -----------
    peaks : ndarary
        Array containing isolated peaks with a sample rate depending on fft_per_sec = 100
    width_in_s : int
        The width in seconds of an interval in which the maximum is found. I.e. two maxima have to be at least
        width_in_s apart to be registered as separate.
    width_roll_mean : int
        The width used for the rolling mean normalisation of the data for better identification
        of pattern matches as it only looks for narrow peaks.
    roll_max_peaks_threshold : float
        The threshold for when a peak is considered high enough to be added to the returned indices.
        A peak has to be roll_max_peaks_threshold times larger in amplitude than the rolling mean to be
        registered as valid peak.    
    fs : float
        The sample frequency (frames per second) of the data       
    nr_ffts_per_s : int
        Number of ffts per second in the stft.
    chunk_len_s  : int
        The length in seconds for each chunked stft. 
    len_fft : int
        The length of each fft calculation.     
    is_ret_roll_max_peaks : bool
        Return roll_max_peaks or not. Default is not.
    Returns:
    --------
    peak_frame_list : list
        List of frames in the original sound file used in correlation_picking() containing peaks   
    roll_max_peaks : ndarray, if is_ret_roll_max_peaks
        Rolling maximum of data normalised by its rolling mean.
    """


    _,_,frames_per_sample,sec_per_sample,_ = stft.calc_nr_frames(chunk_len_s,fs,len_fft,chunk_len_s*nr_ffts_per_s)
    if is_ret_roll_max_peaks:
        inds,roll_max_peaks = find_peak_ind(peaks,width_in_s/sec_per_sample,width_roll_mean = width_roll_mean,
                                            roll_max_peaks_threshold = roll_max_peaks_threshold,is_ret_roll_max_peaks = True)
    else:
        inds = find_peak_ind(peaks,width_in_s/sec_per_sample,width_roll_mean = width_roll_mean,roll_max_peaks_threshold = roll_max_peaks_threshold)
    peak_frame_list = np.array([ind*frames_per_sample for ind in inds])
    
    if is_ret_roll_max_peaks:
        return peak_frame_list,roll_max_peaks
    else:
        return peak_frame_list
Exemplo n.º 2
0
def calc_pattern_correlation_chunked(data,pattern,fs,freq_fft_bins ,chunk_len_s = 45,
                                        len_fft = 1024, nr_ffts_per_s = 100, pattern_len_s = 2):
    """
    Calculate the average correlation between the stft of a timeseries and a pattern over a certain 
    frequency range. Used for data generation for machine learning as input for peak finding algorithms.
    
    Parameters
    ----------
    data : 1D ndarray
        Timeseries
    pattern : ndarray
        The timeseries pattern which is used to calculate the correlation with data
    fs : float
        The sample frequency (frames per second) of the data      
    freq_fft_bins : list
        The frequency bins used for the correlation between pattern and the stft of the data
    chunk_len_s  : int
        The length in seconds for each chunked stft. 
    len_fft : int
        The length of each fft calculation. 
    nr_ffts_per_s : int
        Number of ffts per second in the stft. 
    pattern_len_s : int
        Length of the pattern in seconds
    Returns
    -------
    peaks : 1D ndarray
        The concatenated array of the correlation between data and pattern
 
    """
        
    
    n_frames_chunk,_,_,sec_per_sample,overlap = stft.calc_nr_frames(chunk_len_s,fs,len_fft,chunk_len_s*nr_ffts_per_s)
    # -- By giving the overlap, the length of the pattern is not necessarily pattern_len_s*nr_ffts_per_s anymore
    pattern = stft.calc_stft(pattern,0,pattern.shape[0], fs, pattern_len_s*nr_ffts_per_s,overlap=overlap)[0]
    # -- z-score the pattern
    pattern = (pattern-np.mean(pattern)) / np.std(pattern)
    #q75, q50, q25 = np.percentile(pattern, [75 ,50, 25])
    #iqr = q75 - q25
    #pattern  = 1/(1+np.exp(-(pattern -q50)/(iqr/1.35)))
    
#     plt.matshow(pattern, origin='lower')
#     exit()
    end_frame = 0
    start_frame = 0
    while end_frame < data.shape[0] - n_frames_chunk:
        start_frame = end_frame
        end_frame = end_frame+n_frames_chunk
        spectrum = stft.calc_stft(data,start_frame,end_frame, fs,chunk_len_s*nr_ffts_per_s)[0]
        #spectrum = (spectrum - np.mean(spectrum))/np.std(spectrum)
        print 'spectrum.shape: ',spectrum.shape
        
        for i in freq_fft_bins:
            if i == freq_fft_bins[0]:
                tmp = np.correlate(spectrum[i,:], pattern[i,:], mode='same', old_behavior=False)
                print 'tmp.shape: ',tmp.shape
                print 'spectrum[i,:].shape:' ,spectrum[i,:].shape
            else :
                tmp += np.correlate(spectrum[i,:], pattern[i,:], mode='same', old_behavior=False)
                print 'tmp.shape: ',tmp.shape  
                print 'spectrum[i,:].shape:' ,spectrum[i,:].shape
        if start_frame == 0:
            peaks = tmp
        else:
            peaks = np.hstack((peaks,tmp))
    
    return peaks
Exemplo n.º 3
0
def peak_identification(peaks,
                        width_in_s,
                        width_roll_mean=200,
                        roll_max_peaks_threshold=4.0,
                        fs=16000,
                        nr_ffts_per_s=100,
                        chunk_len_s=60,
                        len_fft=1024,
                        is_ret_roll_max_peaks=False):
    """
    Identify isolated peaks in an 1d array calculated by correlation_picking.
    Parameters:
    -----------
    peaks : ndarary
        Array containing isolated peaks with a sample rate depending on fft_per_sec = 100
    width_in_s : int
        The width in seconds of an interval in which the maximum is found. I.e. two maxima have to be at least
        width_in_s apart to be registered as separate.
    width_roll_mean : int
        The width used for the rolling mean normalisation of the data for better identification
        of pattern matches as it only looks for narrow peaks.
    roll_max_peaks_threshold : float
        The threshold for when a peak is considered high enough to be added to the returned indices.
        A peak has to be roll_max_peaks_threshold times larger in amplitude than the rolling mean to be
        registered as valid peak.    
    fs : float
        The sample frequency (frames per second) of the data       
    nr_ffts_per_s : int
        Number of ffts per second in the stft.
    chunk_len_s  : int
        The length in seconds for each chunked stft. 
    len_fft : int
        The length of each fft calculation.     
    is_ret_roll_max_peaks : bool
        Return roll_max_peaks or not. Default is not.
    Returns:
    --------
    peak_frame_list : list
        List of frames in the original sound file used in correlation_picking() containing peaks   
    roll_max_peaks : ndarray, if is_ret_roll_max_peaks
        Rolling maximum of data normalised by its rolling mean.
    """

    _, _, frames_per_sample, sec_per_sample, _ = stft.calc_nr_frames(
        chunk_len_s, fs, len_fft, chunk_len_s * nr_ffts_per_s)
    if is_ret_roll_max_peaks:
        inds, roll_max_peaks = find_peak_ind(
            peaks,
            width_in_s / sec_per_sample,
            width_roll_mean=width_roll_mean,
            roll_max_peaks_threshold=roll_max_peaks_threshold,
            is_ret_roll_max_peaks=True)
    else:
        inds = find_peak_ind(peaks,
                             width_in_s / sec_per_sample,
                             width_roll_mean=width_roll_mean,
                             roll_max_peaks_threshold=roll_max_peaks_threshold)
    peak_frame_list = np.array([ind * frames_per_sample for ind in inds])

    if is_ret_roll_max_peaks:
        return peak_frame_list, roll_max_peaks
    else:
        return peak_frame_list
Exemplo n.º 4
0
def calc_pattern_correlation_chunked(data,
                                     pattern,
                                     fs,
                                     freq_fft_bins,
                                     chunk_len_s=45,
                                     len_fft=1024,
                                     nr_ffts_per_s=100,
                                     pattern_len_s=2):
    """
    Calculate the average correlation between the stft of a timeseries and a pattern over a certain 
    frequency range. Used for data generation for machine learning as input for peak finding algorithms.
    
    Parameters
    ----------
    data : 1D ndarray
        Timeseries
    pattern : ndarray
        The timeseries pattern which is used to calculate the correlation with data
    fs : float
        The sample frequency (frames per second) of the data      
    freq_fft_bins : list
        The frequency bins used for the correlation between pattern and the stft of the data
    chunk_len_s  : int
        The length in seconds for each chunked stft. 
    len_fft : int
        The length of each fft calculation. 
    nr_ffts_per_s : int
        Number of ffts per second in the stft. 
    pattern_len_s : int
        Length of the pattern in seconds
    Returns
    -------
    peaks : 1D ndarray
        The concatenated array of the correlation between data and pattern
 
    """

    n_frames_chunk, _, _, sec_per_sample, overlap = stft.calc_nr_frames(
        chunk_len_s, fs, len_fft, chunk_len_s * nr_ffts_per_s)
    # -- By giving the overlap, the length of the pattern is not necessarily pattern_len_s*nr_ffts_per_s anymore
    pattern = stft.calc_stft(pattern,
                             0,
                             pattern.shape[0],
                             fs,
                             pattern_len_s * nr_ffts_per_s,
                             overlap=overlap)[0]
    # -- z-score the pattern
    pattern = (pattern - np.mean(pattern)) / np.std(pattern)
    #q75, q50, q25 = np.percentile(pattern, [75 ,50, 25])
    #iqr = q75 - q25
    #pattern  = 1/(1+np.exp(-(pattern -q50)/(iqr/1.35)))

    #     plt.matshow(pattern, origin='lower')
    #     exit()
    end_frame = 0
    start_frame = 0
    while end_frame < data.shape[0] - n_frames_chunk:
        start_frame = end_frame
        end_frame = end_frame + n_frames_chunk
        spectrum = stft.calc_stft(data, start_frame, end_frame, fs,
                                  chunk_len_s * nr_ffts_per_s)[0]
        #spectrum = (spectrum - np.mean(spectrum))/np.std(spectrum)
        print 'spectrum.shape: ', spectrum.shape

        for i in freq_fft_bins:
            if i == freq_fft_bins[0]:
                tmp = np.correlate(spectrum[i, :],
                                   pattern[i, :],
                                   mode='same',
                                   old_behavior=False)
                print 'tmp.shape: ', tmp.shape
                print 'spectrum[i,:].shape:', spectrum[i, :].shape
            else:
                tmp += np.correlate(spectrum[i, :],
                                    pattern[i, :],
                                    mode='same',
                                    old_behavior=False)
                print 'tmp.shape: ', tmp.shape
                print 'spectrum[i,:].shape:', spectrum[i, :].shape
        if start_frame == 0:
            peaks = tmp
        else:
            peaks = np.hstack((peaks, tmp))

    return peaks