def get_fromants(sound, fs=11025):
    # Read from file.
    # spf = sound

    x = sound
    # Get Hamming window.
    N = len(x)
    w = np.hamming(N)

    # Apply window and high pass filter.
    x1 = x * w
    x1 = lfilter([1.], [1., 0.63], x1)

    # Get LPC.
    ncoeff = 2 + fs / 1000
    A, e, k = lpc(x1, ncoeff)

    # Get roots.
    rts = np.roots(A)
    rts = [r for r in rts if np.imag(r) >= 0]

    # Get angles.
    angz = np.arctan2(np.imag(rts), np.real(rts))

    # Fs = spf.getframerate() #Gregory comment
    frqs = sorted(angz * (fs / (2 * math.pi)))

    return frqs
Exemplo n.º 2
0
def compute_formants(audio_buffer):
    N = len(audio_buffer)
    Fs = 8000 # sampling frequency
    hamming_window = np.hamming(N)
    window = audio_buffer * hamming_window

    # Apply a pre-emphasis filter; this amplifies high-frequency components and attenuates low-frequency components.
    # The purpose in voice processing is to remove noise.
    filtered_buffer = lfilter([1], [1., 0.63], window)

    ncoeff = 2 + Fs / 1000
    A, e, k = lpc(filtered_buffer, ncoeff)
    roots = np.roots(A)
    roots = [r for r in roots if np.imag(r) >= 0]

    angz = np.arctan2(np.imag(roots), np.real(roots))

    unsorted_freqs = angz * (Fs / (2 * math.pi))

    freqs = sorted(unsorted_freqs)

    # also get the indices so that we can get the bandwidths in the same order
    indices = np.argsort(unsorted_freqs)
    sorted_roots = np.asarray(roots)[indices]

    #compute the bandwidths of each formant
    bandwidths = -1/2. * (Fs/(2*math.pi))*np.log(np.abs(sorted_roots))

    return freqs, bandwidths
Exemplo n.º 3
0
def formant(arr):
    # entered array is the recorded array using the data acquisation

    #duration = 2;
    #fs = 11025;

    #sd.default.samplerate = fs;

    #print("Speak now")
    #arr = sd.rec(duration*fs, samplerate=fs, channels=1, blocking=True)
    #from scikits.talkbox import lpc
    # applying hamming window
    Fs = 11025
    #rr = [1,2,3,4,5,6,7,7,88]
    N = len(arr)
    window = numpy.hamming(N)
    arr1 = arr * window
    arr1 = lfilter([1], [1., 0.63], arr1)
    n_coeff = int(2 + Fs / 1000)  #no of coefficients

    A, e, k = (lpc(arr1, n_coeff))  #applying lpc
    #A.numpolyz(A)
    rts = numpy.roots(A)
    rts = [r for r in rts if numpy.imag(r) >= 0]  #only positive roots
    angz = numpy.arctan2(numpy.imag(rts), numpy.real(rts))  # taking angles

    for_freq = sorted(angz * (Fs / (2 * math.pi)))
    return (for_freq)
Exemplo n.º 4
0
def get_formants(file_path):

    # Read from file.
    spf = wave.open(file_path, 'r') # http://www.linguistics.ucla.edu/people/hayes/103/Charts/VChart/ae.wav

    # Get file as numpy array.
    x = spf.readframes(-1)
    x = numpy.fromstring(x, 'Int16')

    # Get Hamming window.
    N = len(x)
    w = numpy.hamming(N)

    # Apply window and high pass filter.
    x1 = x * w
    x1 = lfilter([1], [1., 0.63], x1)
    # Get LPC.

    Fs = spf.getframerate()
    ncoeff = 2 + Fs / 1000
    A, e, k = lpc(x1, ncoeff)


    # Get roots.
    rts = numpy.roots(A)
    rts = [r for r in rts if numpy.imag(r) >= 0]

    # Get angles.
    angz = numpy.arctan2(numpy.imag(rts), numpy.real(rts))

    # Get frequencies.
    Fs = spf.getframerate()
    frqs = sorted(angz * (Fs / (2 * math.pi)))

    return frqs
def feature_extraction_lp_group_delay(y, fs=44100, statistics=True, lpgd_params=None, win_params=None):

    eps = numpy.spacing(1)

    nfft = lpgd_params['nfft']
    lp_order = lpgd_params['lp_order']
    
    y = y + eps
  
    frames = segment_axis(y, win_params['win_length'], win_params['hop_length']);
   
    print 'frames : ' + str(frames.shape)
   
    a,e,k = lpc(frames, lp_order)
    print 'a : ' + str(a.shape)
   
    A = fft(a, nfft)
	

    A = 1/A
    

    
    phaseA = numpy.unwrap(numpy.angle(A))
    
    print 'phaseA: ' + str(phaseA.shape) 

    phaseA = phaseA[:,0:nfft/2]
    
    
    print 'phaseA: ' + str(phaseA.shape)
 
    tauA = -1 * numpy.diff(phaseA)
    
    print 'tauA' + str(tauA.shape)
    # tau = numpy.concatenate((tauA, tauA[-1]))
    # tau = tau
    
    feature_matrix = tauA
    feature_matrix = dct(feature_matrix, n=20)
    
    print 'fm: ' + str(feature_matrix.shape)
        

    # Collect into data structure
    if statistics:
        return {
            'feat': feature_matrix,
            'stat': {
                'mean': numpy.mean(feature_matrix, axis=0),
                'std': numpy.std(feature_matrix, axis=0),
                'N': feature_matrix.shape[0],
                'S1': numpy.sum(feature_matrix, axis=0),
                'S2': numpy.sum(feature_matrix ** 2, axis=0),
            }
        }
    else:
        return {
            'feat': feature_matrix}
Exemplo n.º 6
0
def do_lpc(spec, order, error_normal=False):
    coeff, error, k = lpc(spec, order, axis=0)
    if error_normal:
        error = np.reshape(error, (1, len(error)))
        error = np.repeat(error, order + 1, axis=0)
        return coeff / error
    else:
        return coeff[1:, :]
def feature_extraction_lp_group_delay(y, fs=44100, statistics=True, lpgd_params=None, win_params=None):

    eps = numpy.spacing(1)

    nfft = lpgd_params['nfft']
    lp_order = lpgd_params['lp_order']
    
    y = y + eps
  
    frames = segment_axis(y, win_params['win_length'], win_params['hop_length']);
   
    print 'frames : ' + str(frames.shape)
   
    a,e,k = lpc(frames, lp_order)
    print 'a : ' + str(a.shape)
   
    A = fft(a, nfft)
	

    A = 1/A
    

    
    phaseA = numpy.unwrap(numpy.angle(A))
    
    print 'phaseA: ' + str(phaseA.shape) 

    phaseA = phaseA[:,0:nfft/2]
    
    
    print 'phaseA: ' + str(phaseA.shape)
 
    tauA = -1 * numpy.diff(phaseA)
    
    print 'tauA' + str(tauA.shape)
    # tau = numpy.concatenate((tauA, tauA[-1]))
    # tau = tau
    
    feature_matrix = tauA
    feature_matrix = dct(feature_matrix, n=20)
    print 'fm: ' + str(feature_matrix.shape)
    

    # Collect into data structure
    if statistics:
        return {
            'feat': feature_matrix,
            'stat': {
                'mean': numpy.mean(feature_matrix, axis=0),
                'std': numpy.std(feature_matrix, axis=0),
                'N': feature_matrix.shape[0],
                'S1': numpy.sum(feature_matrix, axis=0),
                'S2': numpy.sum(feature_matrix ** 2, axis=0),
            }
        }
    else:
        return {
            'feat': feature_matrix}
Exemplo n.º 8
0
def lpc_filter(x, lpc_mem):
    # linear prediction coefficients
    a = lpc(x, len(lpc_mem))
    coeff = np.asarray(a[0])

    est_frames, lpc_mem = lfilter(0 - coeff, 1, x, -1, lpc_mem)
    res_frames = x - est_frames

    return coeff, lpc_mem, res_frames
Exemplo n.º 9
0
def LSF(arr):
    Fs = 11025
    #rr = [1,2,3,4,5,6,7,7,88]
    N = len(arr)
    window = np.hamming(N)
    arr1 = arr * window
    arr1 = sp.signal.lfilter([1], [1., 0.63], arr1)
    n_coeff = int(2 + Fs / 1000)  #no of coefficients
    A, e, k = (lpc(arr1, n_coeff))  #applying lpc
    lsfs = poly2lsf(A)
    return lsfs
	def get_formants(audio):

		"""

		Calculate the formant frequencies of the
		audio segment. 

		This method was taken from http://stackoverflow.com/questions/25107806/estimate-formants-using-lpc-in-python
		and should be confirmed with prof Niesler. 

		Things I wonder about: 

		 - Peak picking? Where is this happening?
		 - Why is the HPF necessary?
		 - How do we confirm that this works?

		 INPUTS:
		 =======

		 audio:	List containing audio data


		 OUTPUTS:
		 ========
		 freq:	Formant frequencies F1-F5


		"""


		N = len(audio)

		w = np.hamming(N)

		# Apply hamming window and  High Pass filter
		audio = lfilter([1],[1,0.63], (audio*w) )

		# ncoeff = 2 + fs/1000
		"""paper used 14th order LPC"""

		A, e, k = lpc(audio,1)

		roots = np.roots(A)
		roots = [r for r in roots if np.imag(r) >= 0]


		# Get angles
		angles = np.arctan(np.imag(roots), np.real(roots))

		# Get Frequencies
		freq = sorted(angles * (fs / (2 * math.pi)))

		return freq[0:5]
Exemplo n.º 11
0
    def compare_to_talkbox(self):
        try:
            import scikits.talkbox as tbox
        except ImportError:
            return
        
        order = 1
        y = generate_recursive_noise()
        a_pyp = sp.analysis.lpc(y,order=order)
        a_tbx = tbox.lpc(y,order=order)

        for ap, at in zip(a_pyp, a_tbx):
            self.assertAlmostEqual(ap, at, delta=0.01)
Exemplo n.º 12
0
def fft(fs, data):
    max_freq = 5000

    ncoeff = 2 + fs / 1000

    a, e, k = lpc(data, ncoeff)
    w, h = scipy.signal.freqz(1, a, worN=512)
    freqs = fs * w / (2 * np.pi)
    ans = 20 * np.log10(abs(h))

    freqs = [freq for freq in freqs if freq < max_freq]
    ans = ans[:len(freqs)]

    labels = {'xlabel': u'Częstotliwość [Hz]', 'ylabel': 'Wzmocnienie [dB]'}
    return {'y_vector': ans, 'x_vector': freqs, 'labels': labels}
Exemplo n.º 13
0
def get_formants(x, Fs):

    #for e in x:
    #    print >> sys.stderr, e

    # Get Hamming window.
    N = len(x)
    w = np.hamming(N)

    # Apply window and high pass (pre-emphasis) filter.
    x1 = x * w
    x1 = lfilter([1.], [1., 0.63], x1)

    # Resample to make estimates better??
    new_Fs = 22050
    new_N = np.floor((float(N) * float(new_Fs)) / Fs)
    #print new_N
    x1 = resample(x1, new_N, window=None)
    Fs = int(new_Fs)

    # Get LPC.
    ncoeff = 0 + Fs / 1000
    A, e, k = lpc(x1, ncoeff)

    try:
        # Get roots.
        rts = np.roots(A)
        rts = [r for r in rts if np.imag(r) >= 0]

        # Get angles.
        angz = np.arctan2(np.imag(rts), np.real(rts))

        # Get frequencies.
        frqs = angz * (Fs / (2 * math.pi))
        frq_indices = np.argsort(frqs)
        frqs = [frqs[i] for i in frq_indices]
        bws = [
            -1 / 2 * (Fs / (2 * np.pi)) * np.log(np.abs(rts[i]))
            for i in frq_indices
        ]
        frqs = [
            freq for freq, bw in itertools.izip(frqs, bws)
            if freq > 90 and bw < 400
        ]
    except np.linalg.LinAlgError:
        frqs = []

    return frqs
Exemplo n.º 14
0
def lpcc(arr):
    Fs = 11025
    #rr = [1,2,3,4,5,6,7,7,88]
    N = len(arr)
    window = numpy.hamming(N)
    arr1 = (arr * window)
    arr1 = lfilter([1], [1., 0.63], arr1)
    n_coeff = int(2 + Fs / 1000)  #no of coefficients
    A, e, k = (lpc(arr1, n_coeff))  #applying lpc
    fft_var = numpy.fft.fft(A, 1024)
    fft_var = abs(fft_var)  #taking abs
    squared_array = numpy.square(fft_var)
    ar = []  #power spectrum
    for i in squared_array:
        ar.append(e / i)
    log_signal = numpy.log(ar)
    ifft_signal = numpy.fft.ifft(numpy.transpose(log_signal), 1024)
    return ifft_signal[:14].tolist()
Exemplo n.º 15
0
    def getFormants(frames, sr):
        # calculate number of LPC coefficients to use
        ncoeff = 2 + sr/1000
        # calculate LPC coefficients
        c = lpc(frames, ncoeff)[0]
        # obtain roots of LPC
        A = np.diag(np.ones((c.shape[1]-2,), float), -1)
        cs = -c[:,1:]
        Z = np.array([np.vstack((cp, A[1:])) for cp in cs])
        # root calculation using eigen method: VERY SLOW
        eig = np.linalg.eigvals(Z)
        arc = np.arctan2(np.imag(eig), np.real(eig))
        # convert to Hz and sort ascending
        formant = []
        pi2 = 0.05*sr/np.pi
        [formant.append(sorted(pi2*a[a>0])[:4]) for a in arc]

        return np.array(formant)
Exemplo n.º 16
0
    def getFormants(frames, sr):
        # calculate number of LPC coefficients to use
        ncoeff = 2 + sr / 1000
        # calculate LPC coefficients
        c = lpc(frames, ncoeff)[0]
        # obtain roots of LPC
        A = np.diag(np.ones((c.shape[1] - 2, ), float), -1)
        cs = -c[:, 1:]
        Z = np.array([np.vstack((cp, A[1:])) for cp in cs])
        # root calculation using eigen method: VERY SLOW
        eig = np.linalg.eigvals(Z)
        arc = np.arctan2(np.imag(eig), np.real(eig))
        # convert to Hz and sort ascending
        formant = []
        pi2 = 0.05 * sr / np.pi
        [formant.append(sorted(pi2 * a[a > 0])[:4]) for a in arc]

        return np.array(formant)
Exemplo n.º 17
0
def formant_freqs(fs, data):
    max_freq = 5000

    ncoeff = 2 + fs / 1000

    a, e, k = lpc(data, ncoeff)
    w, h = scipy.signal.freqz(1, a, worN=512)
    freqs = fs * w / (2 * np.pi)
    ans = 20 * np.log10(abs(h))
    rts = np.roots(a)
    rts = [r for r in rts if np.imag(r) >= 0]
    angs = np.arctan2(np.imag(rts), np.real(rts))

    formants = sorted(angs * (fs / (2 * math.pi)))
    formants = filter(lambda formant: formant != 0 and formant < max_freq, formants)
    freqs = [freq for freq in freqs if freq < max_freq]
    ans = ans[:len(freqs)]
    labels = {'xlabel': u'Częstotliwość [Hz]', 'ylabel': 'Wzmocnienie [dB]'}
    return {'y_vector': ans, 'x_vector': freqs, 'labels': labels, 'cursors': formants}
Exemplo n.º 18
0
def get_formants(x, fs):

    """
    Estimate formants using LPC.

    See:
    http://www.mathworks.com/help/signal/ug/formant-estimation-with-lpc-coefficients.html
    http://www.phon.ucl.ac.uk/courses/spsci/matlab/lect10.html

    """

    # b, a = scipy.signal.butter(5, 1.0, 'low', analog=True)
    # x = scipy.signal.filtfilt(b, a, x)

    if not np.any(x):
        # All zeroes
        return []

    x1 = lfilter([1.], [1., 0.63], x)

    # Get Hamming window.
    # N = len(x)
    # w = np.hamming(N)

    # # Apply window.
    # x1 = x * w

    # Get LPC.
    ncoeff = 2 + fs / 1000
    A, e, k = lpc(x1, ncoeff)

    # Get roots.
    rts = np.roots(A)
    rts = [r for r in rts if np.imag(r) > 0]

    # Get angles.
    angz = np.arctan2(np.imag(rts), np.real(rts))

    # Get frequencies.
    frqs = sorted(angz * (fs / (2 * math.pi)))

    return frqs
Exemplo n.º 19
0
def get_formants(dummy):

    # Read from file.
    spf = wave.open("/home/ponco/devel/mel_cepstral_coeff_neural/vowels/EMartin.wav", 'r') # http://www.linguistics.ucla.edu/people/hayes/103/Charts/VChart/ae.wav

    # Get file as numpy array.
    x = spf.readframes(-1)
    x = np.fromstring(x, 'Int16')

    # Get Hamming window.
    N = len(x)
    w = np.hamming(N)

    # Apply window and high pass filter.
    x1 = x * w
    x1 = lfilter([1], [1., 0.63], x1)
    
    Fs = spf.getframerate()
    #Fs = 44100
    ncoeff = 2 + Fs / 1000
    A, e, k = lpc(x1, ncoeff)

    # Get LPC.
    #A, e, k = lpc(x1, 8)


    # Get roots.
    rts = np.roots(A)
    rts = [r for r in rts if np.imag(r) >= 0]

    # Get angles.
    angz = np.arctan2(np.imag(rts), np.real(rts))

    # Get frequencies.
    #Fs = spf.getframerate()
    frqs = sorted(angz * (Fs / (2 * math.pi)))

    return frqs
Exemplo n.º 20
0
def stlpc(longSignal, order=10, windowLength=1024, hopsize=512, axis=-1):
    """Compute "Short Time LPC":
          Cut the input signal in frames
          Compute the LPC on each of the frames
    
    """
    lengthSignal = longSignal.size
    
    currentWindow = np.zeros([windowLength,])
    
    Nb_windows = np.ceil((lengthSignal - windowLength) / (np.double(hopsize)) + 1.0)
    STLpc = np.ones([order + 1, Nb_windows])
    
    rootLpc = np.zeros([order, Nb_windows], dtype=np.complex)
    freqLpc = np.ones([(order - 2.0)/2.0, Nb_windows])
    specFromLpc = np.zeros([windowLength / 2.0 + 1, Nb_windows])
    sigmaS = np.zeros([Nb_windows, ])
    
    b_preamp=np.array([1.0,-0.99])
    a_preamp=np.array([1.0])
    longSignalPreamp = scipy.signal.lfilter(b_preamp,a_preamp,longSignal)
    
    for n in np.arange(Nb_windows):
        beginFrame = n * hopsize
        endFrame = np.minimum(n * hopsize + windowLength, lengthSignal)
        currentWindow[:endFrame-beginFrame] = longSignalPreamp[beginFrame: endFrame]
        currentWindow *= np.hamming(windowLength)
        STLpc[:,n], sigmaS[n], trash = tb.lpc(currentWindow, order)
        specFromLpc[:,n] = lpc2spec(STLpc[:,n], sigmaS[n], fs, windowLength)
        rootLpc[:,n] = np.roots(STLpc[:,n])
        freqLpcTmp = np.angle(rootLpc[:,n]) / (2.0 * np.pi) * fs
        freqLpcTmp = freqLpcTmp[freqLpcTmp>0.0]
        freqLpcTmp.sort()
        nbMinPositiveRoots = freqLpcTmp[0:(order - 2.0)/2.0].size
        freqLpc[0:nbMinPositiveRoots,n] = freqLpcTmp[0:(order - 2.0)/2.0]
        
    return STLpc, rootLpc, freqLpc, specFromLpc, sigmaS
Exemplo n.º 21
0
def get_formants(x, fs):

    """
    Estimate formants using LPC.

    See:
    http://www.mathworks.com/help/signal/ug/formant-estimation-with-lpc-coefficients.html
    http://www.phon.ucl.ac.uk/courses/spsci/matlab/lect10.html

    """

    # Get Hamming window.
    N = len(x)
    w = np.hamming(N)

    # Apply window.
    x1 = x * w

    # Apply pre-emphasis filter.
    # x1 = lfilter([1.0], [1.0, -0.63], x1)

    # Get LPC.
    ncoeff = 2 + fs / 1000
    A, e, k = lpc(x1, ncoeff)

    # Get roots.
    rts = np.roots(A)
    rts = [r for r in rts if np.imag(r) > 0]

    # Get angles.
    angz = np.arctan2(np.imag(rts), np.real(rts))

    # Get frequencies.
    frqs = sorted(angz * (fs / (2 * math.pi)))

    return frqs
Exemplo n.º 22
0
def phormants(x, Fs):
    N = len(x)
    w = numpy.hamming(N)

    # Apply window and high pass filter.
    x1 = x * w
    x1 = lfilter([1], [1., 0.63], x1)

    # Get LPC.
    ncoeff = 2 + Fs / 1000
    A, e, k = lpc(x1, ncoeff)
    #A, e, k = lpc(x1, 8)

    # Get roots.
    rts = numpy.roots(A)
    rts = [r for r in rts if numpy.imag(r) >= 0]

    # Get angles.
    angz = numpy.arctan2(numpy.imag(rts), numpy.real(rts))

    # Get frequencies.
    frqs = sorted(angz * (Fs / (2 * math.pi)))

    return frqs
Exemplo n.º 23
0
def phormants(x, Fs):
    N = len(x)
    w = numpy.hamming(N)

    # Apply window and high pass filter.
    x1 = x * w
    x1 = lfilter([1], [1., 0.63], x1)

    # Get LPC.    
    ncoeff = 2 + Fs / 1000
    A, e, k = lpc(x1, ncoeff)
    # A, e, k = lpc(x1, 8)

    # Get roots.
    rts = numpy.roots(A)
    rts = [r for r in rts if numpy.imag(r) >= 0]

    # Get angles.
    angz = numpy.arctan2(numpy.imag(rts), numpy.real(rts))

    # Get frequencies.    
    frqs = sorted(angz * (Fs / (2 * math.pi)))

    return frqs
def get_formants(file_path):

    # Read from file.
    spf = wave.open(
        file_path, 'r'
    )  # http://www.linguistics.ucla.edu/people/hayes/103/Charts/VChart/ae.wav

    # Get file as numpy array.
    x = spf.readframes(-1)
    x = numpy.fromstring(x, 'Int16')

    # Get Hamming window.
    N = len(x)
    w = numpy.hamming(N)

    # Apply window and high pass filter.
    x1 = x * w
    x1 = lfilter([1], [1., 0.63], x1)

    # Get LPC.
    Fs = spf.getframerate()
    ncoeff = 2 + Fs / 1000
    A, e, k = lpc(x1, ncoeff)

    # Get roots.
    rts = numpy.roots(A)
    rts = [r for r in rts if numpy.imag(r) >= 0]

    # Get angles.
    angz = numpy.arctan2(numpy.imag(rts), numpy.real(rts))

    # Get frequencies.
    Fs = spf.getframerate()
    frqs = sorted(angz * (Fs / (2 * math.pi)))

    return frqs
Exemplo n.º 25
0
def find_start_and_end_of_hit(vec_x, fs, vec_idx_transients):

    idx_start = []
    idx_end = []
    
    L_x = len(vec_x)
    
    # ___ some user parameters___
    T_window = 5 # sec
    L_window = int(math.floor(T_window * fs))
    if sp.remainder(L_window, 2) == 1:
        L_window = L_window + 1
        
    L_half_window = L_window / 2
            
    for cur_idx_transient in vec_idx_transients:
        idx_start = int(cur_idx_transient)- L_half_window
        idx_end = int(cur_idx_transient) + L_half_window
        
        if idx_start < 0: idx_start = 0
        if idx_end > L_x: idx_end = L_x
        
        print("idx_start: {}".format(idx_start) )
        print("idx_end: {}".format(idx_end))
        
        #pdb.set_trace()        
        
        cur_vec_x = vec_x[idx_start:idx_end+1]
    
        vec_envelope = np.abs(sgn.hilbert(vec_x, axis=0))
        
        plt.figure(2)
        plt.plot(cur_vec_x)
        plt.hold(True)
        plt.plot(vec_envelope)
        plt.legend(['input signal', 'hilbert envelope'])
        plt.show()
        
        # calculate a threshold
        #if b_debug
        #    tempfig('transient start search');
        #    plot([x vec_envelope]);
        #end
        
        # whiten the signal via lpc
    #    vec_a = [1, 2, 3] # lpc(x,32)
        (vec_a, vec_prediction_error, vec_k) = talkbox.lpc(vec_x, 32, axis=0)   
        vec_prediction_error = sgn.lfilter(vec_a, 1, vec_x, axis=0)
#        print(vec_prediction_error)

        plt.figure(3)
        plt.plot(vec_prediction_error)
        plt.title('prediction error')
        plt.show()
        #vec_signal_whitened = scipy.filter([0 -a(2:end)], 1, x);
        #vec_signal_diff = x- vec_signal_whitened;
        
        L_vec_x = len(vec_x)
        
        idx_mid = (L_vec_x-1) / 2;
        if sp.mod(idx_mid, 1) > 0:
            idx_mid = sp.floor(idx_mid)
        
        vec_prediction_error_power = vec_prediction_error**2
        
        threshold = 0.9 * np.max(vec_prediction_error_power)
        
        
        
        #idx_temp_to_the_left = idx_mid - find(vec_prediction_error_power(idx_mid:-1:1) > threshold, 1, 'first') + 1;
        idx_temp_to_the_left = idx_mid - sp.nonzero(vec_prediction_error_power[idx_mid:0:-1] > threshold)[0] + 1
        #idx_temp_to_the_right = find(vec_prediction_error_power(idx_mid:end) > threshold, 1, 'first') + idx_mid - 1;
        idx_temp_to_the_right = idx_mid+ sp.nonzero(vec_prediction_error_power[idx_mid-1:L_vec_x] > threshold)[0]
        
        if len(idx_temp_to_the_left) == 0:
            idx_start = idx_temp_to_the_right[0]
            
        elif len(idx_temp_to_the_right) == 0:
            idx_start = idx_temp_to_the_left[0]
            
        else:
            # which one is closer?
            if abs(idx_temp_to_the_left[0] - idx_mid) > abs(idx_temp_to_the_right[0] - idx_mid):
                idx_start = idx_temp_to_the_right[0]
            else:
                idx_start = idx_temp_to_the_left[0]
            
            #todo: think about this
        idx_start_decay = idx_start
        idx_start_transient = idx_start
    #
    #if b_debug
    #    tempfig('transient start search');
    #    hold on; plot(idx_start, vec_envelope(idx_start), 'Marker', 'o', 'MarkerFaceColor', 'r');
    #    plot(vec_signal_whitened, 'g');
    #    hold off;
    #    tempfig('prediction error');
    #    plot(vec_prediction_error_power);
    #    hold on;
    #    line([1 length(vec_prediction_error_power)], repmat(threshold, 1, 2), 'Color', 'r', 'LineWidth', 2); hold off;
    #end
    
        # filter that thing
        order_filter_smooth = 300.
        vec_b_filter_smooth = 1 / order_filter_smooth * sp.ones(order_filter_smooth,)
        vec_a_filter_smooth = [1]
        
        #disp(vec_b_filter_smooth)
    #vec_b_smooth = 1/order_smoothing_filter * ones(order_smoothing_filter, 1);
    #vec_a_smooth = 1;
    #x_envelope_smoothed = filtfilt(vec_b_smooth, vec_a_smooth, x_envelope);
        vec_envelope_smoothed = sgn.filtfilt(vec_b_filter_smooth, vec_a_filter_smooth, vec_envelope, padtype=None)
    #tempfig('selected waveform');
    #% subplot(211);
    #hold on, plot((0:length(x)-1) / fs, x_envelope_smoothed, 'r'); hold off;
    #
        # try to find the beginning of the decay phase
        temp_max = np.max(vec_envelope_smoothed, axis=0)
    #    disp(temp_max)
    #temp_max = max(x_envelope_smoothed);
    #tempfig('selected waveform'); hold on;
    #% subplot(211);
    #plot((idx_start_decay-1) / fs, temp_max, 'Marker', 'o', 'MarkerSize', 15, 'Color', 'g');
    #
    #% tempfig('selected waveform'); hold on;
    #% subplot(212);
    #% plot((0:length(x)-2) / fs, diff((x_envelope_smoothed)), 'r'); hold off;
    #
    #tempfig('envelope histogram');
    #hist(x_envelope_smoothed, 100);
    #
        # find the end of the decay phase
        threshold = np.percentile(vec_envelope_smoothed[idx_start_decay:L_vec_x], 30)
    #threshold = quantile(x_envelope_smoothed(idx_start_decay:end), 0.3);
    #idx_end_decay = find(x_envelope_smoothed(idx_start_decay:end) <= threshold, 1, 'first') + idx_start_decay-1 ;
        idx_end_decay = sp.nonzero(vec_envelope_smoothed[idx_start_decay:L_vec_x] <= threshold)[0][0] + idx_start_decay
    #tempfig('selected waveform'); hold on;
    #% subplot(211);
    #plot((idx_end_decay-1) / fs, x_envelope_smoothed(idx_end_decay), 'Marker', 'o', 'MarkerSize', 15, 'Color', 'g');
    #
    #
    #% now model the decay
    #% (to estimate the decay time)
    #if true
    #    val_start = (x_envelope_smoothed(idx_start_decay));
    #    val_end = (x_envelope_smoothed(idx_end_decay));
    #    decay_constant = 1*( log(val_end) - log(val_start) ) / (idx_end_decay - idx_start_decay);
    #else
    #    val_start = x_envelope_smoothed(idx_start_decay);
    #    
    #end
    #    
    #    % plot the model
    #    x_model = (val_start) * exp(1*decay_constant * (0:(idx_end_decay - idx_start_decay))');
    #    tempfig('selected waveform'); hold on;
    #    plot((idx_start_decay:idx_end_decay) / fs, x_model, 'k');
    #    
    #    tau_decay_ms = -1 / (decay_constant * fs) * 1000
    #    
    
        idx_end_transient = idx_end_decay
        
        print('idx_start_transient: {}'.format(idx_start_transient))
        print('idx_end_transient: {}'.format(idx_end_transient))
    
    return (idx_start_transient, idx_end_transient)
Exemplo n.º 26
0
def get_formant_locations_from_raw_long_frame(v_sig, v_pm, nx, fft_len):
    '''
    nx: frame index
    '''

    #v_sig, fs = la.read_audio_file(wavfile)

    # Epoch detection:
    #v_pm_sec, v_voi = la.reaper_epoch_detection(wavfile)
    #v_pm = lu.round_to_int(v_pm_sec * fs)

    # Raw-long Frame extraction:

    v_frm_long = v_sig[v_pm[nx-2]:v_pm[nx+2]+1]

    # Win:
    left_len  = v_pm[nx] - v_pm[nx-2]
    right_len = v_pm[nx+2] - v_pm[nx]
    v_win = la.gen_non_symmetric_win(left_len, right_len, np.hanning, b_norm=False)
    v_frm_long_win = v_frm_long * v_win

    # Spectrum:
    v_mag = la.remove_hermitian_half(np.absolute(np.fft.fft(v_frm_long_win, n=fft_len)[None,:]))[0]
    v_mag_db = la.db(v_mag)

    # Mel warping:
    alpha = 0.50  # 0.55 - 0.60
    #ncoeffs = 2048 # must be even
    v_mag_mel      = la.sp_mel_warp(v_mag[None,:], fft_len/2, alpha=alpha, in_type=3)[0]
    v_sp_cmplx     = la.build_min_phase_from_mag_spec(v_mag_mel[None,:])[0]
    v_sp_cmplx_ext = la.add_hermitian_half(v_sp_cmplx[None,:], data_type='complex')[0]
    v_frm_long_win_mel = np.fft.ifft(v_sp_cmplx_ext).real

    if False:
        plt.close('all')
        pl(la.db(v_mag))
        pl(la.db(np.absolute(v_sp_cmplx)))
        pl(v_frm_long_win_mel)

    # Formant extraction -LPC method:--------------------------------------------------
    n_lpc_coeffs = 30 # 40
    v_lpc_mel, v_e, v_refl = lpc(v_frm_long_win_mel, n_lpc_coeffs)

    v_lpc_mag_mel = lpc_to_mag(v_lpc_mel, fft_len=fft_len)
    v_lpc_mag_mel_db = la.db(v_lpc_mag_mel)
    v_lpc_mag_mel_db = v_lpc_mag_mel_db - np.mean(v_lpc_mag_mel_db) + np.mean(la.db(v_mag_mel))

    v_frmnts_bins_mel, v_frmnts_gains_db = get_formant_locations_from_spec_env(v_lpc_mag_mel_db)

    # Getting bandwidth:
    fft_len_half = 1 + fft_len / 2
    v_vall_bins = get_formant_locations_from_spec_env(-v_lpc_mag_mel_db)[0]
    v_vall_bins = np.r_[0, v_vall_bins, fft_len_half-1]

    nfrmnts = v_frmnts_bins_mel.size
    v_frmnts_bw_mel = np.zeros(nfrmnts) - 1.0
    for nx_f in xrange(nfrmnts):
        #Left slope:
        curr_frmnt_bin  = v_frmnts_bins_mel[nx_f]
        curr_vall_l_bin = v_vall_bins[nx_f]
        curr_vall_r_bin = v_vall_bins[nx_f+1]

        curr_midp_l = int((curr_frmnt_bin + curr_vall_l_bin) / 2.0)
        curr_midp_r = int((curr_frmnt_bin + curr_vall_r_bin) / 2.0)

        # Protection:
        if curr_midp_l==curr_frmnt_bin:
            curr_midp_l = curr_vall_l_bin
        if curr_midp_r==curr_frmnt_bin:
            curr_midp_r = curr_vall_r_bin

        #print(nx_f)
        # 27 y 32
        slope_l = (v_frmnts_gains_db[nx_f] - v_lpc_mag_mel_db[curr_midp_l]) / (curr_frmnt_bin - curr_midp_l).astype(float)
        slope_r = (v_frmnts_gains_db[nx_f] - v_lpc_mag_mel_db[curr_midp_r]) / (curr_frmnt_bin - curr_midp_r).astype(float)

        slope_ave = (slope_l - slope_r) / 2.0

        v_frmnts_bw_mel[nx_f] = 1.0 / slope_ave

    # Filtering by bandwidth:
    # bw_thress         = 7.0
    # v_frmnts_bins_mel     = v_frmnts_bins_mel[v_frmnts_bw_mel<bw_thress]
    # v_frmnts_gains_db = v_frmnts_gains_db[v_frmnts_bw_mel<bw_thress]
    # v_frmnts_bw_mel       = v_frmnts_bw_mel[v_frmnts_bw_mel<bw_thress]

    # Computing frame short:--------------------------------
    # Win:
    left_len_short  = v_pm[nx] - v_pm[nx-1]
    right_len_short = v_pm[nx+1] - v_pm[nx]
    v_win_short = la.gen_non_symmetric_win(left_len_short, right_len_short, np.hanning, b_norm=False)
    v_frm_short = v_sig[v_pm[nx-1]:v_pm[nx+1]+1]
    v_frm_short_win = v_frm_short * v_win_short
    shift = v_pm[nx] - v_pm[nx-1]

    # Formant extraction - True envelope method:----------------------------------------
    # Not finished.
    #v_true_env_db = la.true_envelope(v_mag_db[None,:], in_type='db', ncoeffs=400, thres_db=0.1)[0]

    if True:
        plt.figure(); plt.plot(la.db(v_mag_mel)); plt.plot(v_lpc_mag_mel_db); plt.grid(); plt.show()
        #pl(v_mag_db)
        if True: import ipdb; ipdb.set_trace(context=8)  # breakpoint 906d26d6 //


    return v_mag_db, v_lpc_mag_mel_db, v_frmnts_bins_mel, v_frmnts_gains_db, v_frmnts_bw_mel, v_frm_short_win, shift
Exemplo n.º 27
0
def get_formant_locations_from_raw_long_frame(v_sig, v_pm, nx, fft_len):
    '''
    nx: frame index
    '''

    #v_sig, fs = la.read_audio_file(wavfile)

    # Epoch detection:
    #v_pm_sec, v_voi = la.reaper_epoch_detection(wavfile)
    #v_pm = lu.round_to_int(v_pm_sec * fs)

    # Raw-long Frame extraction:

    v_frm_long = v_sig[v_pm[nx-2]:v_pm[nx+2]+1]

    # Win:
    left_len  = v_pm[nx] - v_pm[nx-2]
    right_len = v_pm[nx+2] - v_pm[nx]
    v_win = la.gen_non_symmetric_win(left_len, right_len, np.hanning, b_norm=False)
    v_frm_long_win = v_frm_long * v_win


    # Spectrum:
    v_mag = np.absolute(np.fft.fft(v_frm_long_win, n=fft_len))
    v_mag_db = la.db(la.remove_hermitian_half(v_mag[None,:])[0])

    # Formant extraction -LPC method:--------------------------------------------------
    v_lpc, v_e, v_refl = lpc(v_frm_long_win, 120)

    b_use_lpc_roots = False
    if b_use_lpc_roots:
        v_lpc_roots = np.roots(v_lpc)
        v_lpc_angles = np.angle(v_lpc_roots)
        v_lpc_angles = v_lpc_angles[v_lpc_angles>=0]
        v_lpc_angles = np.sort(v_lpc_angles)
        fft_len_half = 1 + fft_len / 2
        v_lpc_roots_bins = v_lpc_angles * fft_len_half / np.pi

    v_lpc_mag = lpc_to_mag(v_lpc, fft_len=fft_len)
    v_lpc_mag_db = la.db(v_lpc_mag)
    v_lpc_mag_db = v_lpc_mag_db - np.mean(v_lpc_mag_db) + np.mean(v_mag_db)

    v_frmnts_bins, v_frmnts_gains_db = get_formant_locations_from_spec_env(v_lpc_mag_db)

    # Getting bandwidth:
    fft_len_half = 1 + fft_len / 2
    v_vall_bins = get_formant_locations_from_spec_env(-v_lpc_mag_db)[0]
    v_vall_bins = np.r_[0, v_vall_bins, fft_len_half-1]

    nfrmnts = v_frmnts_bins.size
    v_frmnts_bw = np.zeros(nfrmnts) - 1.0
    for nx_f in xrange(nfrmnts):
        #Left slope:
        curr_frmnt_bin  = v_frmnts_bins[nx_f]
        curr_vall_l_bin = v_vall_bins[nx_f]
        curr_vall_r_bin = v_vall_bins[nx_f+1]

        curr_midp_l = int((curr_frmnt_bin + curr_vall_l_bin) / 2.0)
        curr_midp_r = int((curr_frmnt_bin + curr_vall_r_bin) / 2.0)

        # Protection:
        if curr_midp_l==curr_frmnt_bin:
            curr_midp_l = curr_vall_l_bin
        if curr_midp_r==curr_frmnt_bin:
            curr_midp_r = curr_vall_r_bin

        #print(nx_f)
        # 27 y 32
        #if ((nx==73) and (nx_f==27)): import ipdb; ipdb.set_trace(context=8)  # breakpoint c4f78f1e //

        slope_l = (v_frmnts_gains_db[nx_f] - v_lpc_mag_db[curr_midp_l]) / (curr_frmnt_bin - curr_midp_l).astype(float)
        slope_r = (v_frmnts_gains_db[nx_f] - v_lpc_mag_db[curr_midp_r]) / (curr_frmnt_bin - curr_midp_r).astype(float)

        slope_ave = (slope_l - slope_r) / 2.0

        v_frmnts_bw[nx_f] = 1.0 / slope_ave

    # Filtering by bandwidth:
    bw_thress         = 7.0
    v_frmnts_bins     = v_frmnts_bins[v_frmnts_bw<bw_thress]
    v_frmnts_gains_db = v_frmnts_gains_db[v_frmnts_bw<bw_thress]
    v_frmnts_bw       = v_frmnts_bw[v_frmnts_bw<bw_thress]

    # Computing frame short:--------------------------------
    # Win:
    left_len_short  = v_pm[nx] - v_pm[nx-1]
    right_len_short = v_pm[nx+1] - v_pm[nx]
    v_win_short = la.gen_non_symmetric_win(left_len_short, right_len_short, np.hanning, b_norm=False)
    v_frm_short = v_sig[v_pm[nx-1]:v_pm[nx+1]+1]
    v_frm_short_win = v_frm_short * v_win_short
    shift = v_pm[nx] - v_pm[nx-1]

    # Formant extraction - True envelope method:----------------------------------------
    # Not finished.
    #v_true_env_db = la.true_envelope(v_mag_db[None,:], in_type='db', ncoeffs=400, thres_db=0.1)[0]

    if False:
        plt.figure(); plt.plot(v_mag_db); plt.plot(v_lpc_mag_db); plt.grid(); plt.show()



    return v_mag_db, v_lpc_mag_db, v_frmnts_bins, v_frmnts_gains_db, v_frmnts_bw, v_frm_short_win, shift
Exemplo n.º 28
0
import numpy as np
from matplotlib import pyplot as plt
from math import pi
from scipy.io import wavfile
from scipy import signal as sig
import das
from scikits.talkbox import lpc

#%% load wav
fs, wav = wavfile.read('audio/glas_aaa.wav')
wav = wav / 2**15
# wav = wav * sig.hamming(wav.size)
f, wav_spec = das.get_spectrum(wav, fs)

#%% get LPC parameters
a_lp, e, k = lpc(wav, 25)
b_inv = np.concatenate(([0], -a_lp[1:]))
wav_est = sig.lfilter(b_inv, 1, wav)
wav_err = wav - wav_est
G = e
f, err_spec = das.get_spectrum(wav_err, fs)

#%% plot
#plt.figure()
#plt.plot(wav)
#plt.plot(est_wav)
#plt.figure()
#plt.plot(err)

#%% LP filter impulse response and transfer function
x = np.zeros(.02 * fs)
Exemplo n.º 29
0
def lpc_coeff(sig):
    """ return ndarray"""
    signal = reshapeSignal(sig)
    A, e, k = lpc(signal, 12) # 12 coefficients (n/sizeof window) *k coefficients
    return k
Exemplo n.º 30
0
def fundEstimator(soundIn, fs, t=None, debugFig = 0, maxFund = 1500, minFund = 300, lowFc = 200, highFc = 6000, minSaliency = 0.5):
    """
    Estimates the fundamental frequency of a complex sound.
    soundIn is the sound pressure waveformlog spectrogram.
    fs is the sampling rate
    t is a vector of time values in s at which the fundamental will be estimated.
    The sound must include at least 1024 sample points

    The optional parameter with defaults are
    Some user parameters (should be part of the function at some time)
       debugFig = 0         Set to zero to eliminate figures.
       maxFund = 1500       Maximum fundamental frequency
       minFund = 300        Minimum fundamental frequency
       lowFc = 200          Low frequency cut-off for band-passing the signal prior to auto-correlation.
       highFc = 6000        High frequency cut-off
       minSaliency = 0.5    Threshold in the auto-correlation for minimum saliency - returns NaN for pitch values is saliency is below this number

    Returns
           sal     - the time varying pitch saliency - a number between 0 and 1 corresponding to relative size of the first auto-correlation peak
           fund     - the time-varying fundamental in Hz at the same resolution as the spectrogram.
           fund2   - a second peak in the spectrum - not a multiple of the fundamental a sign of a second voice
           form1   - the first formant, if it exists
           form2   - the second formant, if it exists
           form3   - the third formant, if it exists
           soundLen - length of sal, fund, fund2, form1, form2, form3
    """

    # Band-pass filtering signal prior to auto-correlation
    soundLen = len(soundIn)
    nfilt = 1024
    if soundLen < 1024:
        print 'Error in fundEstimator: sound too short for bandpass filtering, len(soundIn)=%d\n' % soundLen
        return (0, 0, 0, 0, 0, 0, 0)

    # high pass filter the signal
    highpassFilter = firwin(nfilt-1, 2*lowFc/fs, pass_zero=False)
    padlen = min(soundLen-10, 3*len(highpassFilter))
    soundIn = filtfilt(highpassFilter, [1.0], soundIn, padlen=padlen)

    # low pass filter the signal
    lowpassFilter = firwin(nfilt, 2*highFc/fs)
    padlen = min(soundLen-10, 3*len(lowpassFilter))
    soundIn = filtfilt(lowpassFilter, [1.0], soundIn, padlen=padlen)

    # Plot a spectrogram?
    if debugFig:
        plt.figure(9)
        (tDebug ,freqDebug ,specDebug , rms) = spectrogram(soundIn, fs, 1000.0, 50, min_freq=0, max_freq=10000, nstd=6, log=True, noise_level_db=50, rectify=True)
        plot_spectrogram(tDebug, freqDebug, specDebug)

    # Initializations and useful variables
    if t is None:
        # initialize t to be spaced by 500us increments
        sound_dur = len(soundIn) / fs
        _si = 1e-3
        npts = int(sound_dur / _si)
        t = np.arange(npts) * _si

    nt=len(t)
    soundRMS = np.zeros(nt)
    fund = np.zeros(nt)
    fund2 = np.zeros(nt)
    sal = np.zeros(nt)
    form1 = np.zeros(nt)
    form2 = np.zeros(nt)
    form3 = np.zeros(nt)

    #  Calculate the size of the window for the auto-correlation
    alpha = 5                          # Number of sd in the Gaussian window
    winLen = int(np.fix((2.0*alpha/minFund)*fs))  # Length of Gaussian window based on minFund
    if (winLen%2 == 0):  # Make a symmetric window
        winLen += 1

    winLen2 = 2**12+1   # This looks like a good size for LPC - 4097 points

    gt, w = gaussian_window(winLen, alpha)
    gt2, w2 = gaussian_window(winLen2, alpha)
    maxlags = int(2*ceil((float(fs)/minFund)))

    # First calculate the rms in each window
    for it in range(nt):
        tval = t[it]               # Center of window in time
        tind = int(np.fix(tval*fs))    # Center of window in ind
        tstart = tind - (winLen-1)/2
        tend = tind + (winLen-1)/2

        if tstart < 0:
            winstart = - tstart
            tstart = 0
        else:
            winstart = 0

        if tend >= soundLen:
            windend = winLen - (tend-soundLen+1) - 1
            tend = soundLen-1
        else:
            windend = winLen-1

        soundWin = soundIn[tstart:tend]*w[winstart:windend]
        soundRMS[it] = np.std(soundWin)

    soundRMSMax = max(soundRMS)

    # Calculate the auto-correlation in windowed segments and obtain 4 guess values of the fundamental
    # fundCorrGuess - guess from the auto-correlation function
    # fundCorrAmpGuess - guess form the amplitude of the auto-correlation function
    # fundCepGuess - guess from the cepstrum
    # fundStackGuess - guess taken from a fit of the power spectrum with a harmonic stack, using the fundCepGuess as a starting point
    #  Current version use fundStackGuess as the best estimate...

    soundlen = 0
    for it in range(nt):
        fund[it] = float('nan')
        sal[it] = float('nan')
        fund2[it] = float('nan')
        form1[it] = float('nan')
        form2[it] = float('nan')
        form3[it] = float('nan')

        if (soundRMS[it] < soundRMSMax*0.1):
            continue

        soundlen += 1
        tval = t[it]               # Center of window in time
        tind = int(np.fix(tval*fs))    # Center of window in ind
        tstart = tind - (winLen-1)/2
        tend = tind + (winLen-1)/2

        if tstart < 0:
            winstart = - tstart
            tstart = 0
        else:
            winstart = 0

        if tend >= soundLen:
            windend = winLen - (tend-soundLen+1) - 1
            tend = soundLen-1
        else:
            windend = winLen-1

        tstart2 = tind - (winLen2-1)/2
        tend2 = tind + (winLen2-1)/2

        if tstart2 < 0:
            winstart2 = - tstart2
            tstart2 = 0
        else:
            winstart2 = 0

        if tend2 >= soundLen:
            windend2 = winLen2 - (tend2-soundLen+1) - 1
            tend2 = soundLen-1
        else:
            windend2 = winLen2-1

        soundWin = soundIn[tstart:tend]*w[winstart:windend]

        soundWin2 = soundIn[tstart2:tend2]*w2[winstart2:windend2]

        # Apply LPC to get time-varying formants and one additional guess for the fundamental frequency
        A, E, K = talkbox.lpc(soundWin2, 8)    # 8 degree polynomial
        rts = np.roots(A)          # Find the roots of A
        rts = rts[np.imag(rts)>=0]  # Keep only half of them
        angz = np.arctan2(np.imag(rts),np.real(rts))

        # Calculate the frequencies and the bandwidth of the formants
        frqsFormants = angz*(fs/(2*np.pi))
        indices = np.argsort(frqsFormants)
        bw = -1/2*(fs/(2*np.pi))*np.log(np.abs(rts))

        # Keep formants above 1000 Hz and with bandwidth < 1000
        formants = []
        for kk in indices:
            if ( frqsFormants[kk]>1000 and bw[kk] < 1000):
                formants.append(frqsFormants[kk])
        formants = np.array(formants)

        if len(formants) > 0 :
            form1[it] = formants[0]
        if len(formants) > 1 :
            form2[it] = formants[1]
        if len(formants) > 2 :
            form3[it] = formants[2]

        # Calculate the auto-correlation
        lags = np.arange(-maxlags, maxlags+1, 1)
        autoCorr = correlation_function(soundWin, soundWin, lags)
        ind0 = int(mlab.find(lags == 0))  # need to find lag zero index

        # find peaks
        indPeaksCorr = detect_peaks(autoCorr, mph=max(autoCorr)/10)

        # Eliminate center peak and all peaks too close to middle
        indPeaksCorr = np.delete(indPeaksCorr,mlab.find( (indPeaksCorr-ind0) < fs/maxFund))
        pksCorr = autoCorr[indPeaksCorr]

        # Find max peak
        if len(pksCorr)==0:
            pitchSaliency = 0.1               # 0.1 goes with the detection of peaks greater than max/10
        else:
            indIndMax = mlab.find(pksCorr == max(pksCorr))[0]
            indMax = indPeaksCorr[indIndMax]
            fundCorrGuess = fs/abs(lags[indMax])
            pitchSaliency = autoCorr[indMax]/autoCorr[ind0]

        sal[it] = pitchSaliency

        if sal[it] < minSaliency:
            continue

        # Calculate the envelope of the auto-correlation after rectification
        envCorr = temporal_envelope(autoCorr, fs, cutoff_freq=maxFund, resample_rate=None)
        locsEnvCorr = detect_peaks(envCorr, mph=max(envCorr)/10)
        pksEnvCorr = envCorr[locsEnvCorr]

        # The max peak should be around zero
        indIndEnvMax = mlab.find(pksEnvCorr == max(pksEnvCorr))

        # Take the first peak not in the middle
        if indIndEnvMax+2 > len(locsEnvCorr):
            fundCorrAmpGuess = fundCorrGuess
            indEnvMax = indMax
        else:
            indEnvMax = locsEnvCorr[indIndEnvMax+1]
            fundCorrAmpGuess = fs/lags[indEnvMax]

        # Calculate power spectrum and cepstrum
        Y = fft(soundWin, n=winLen+1)
        f = (fs/2.0)*(np.array(range((winLen+1)/2+1), dtype=float)/float((winLen+1)/2))
        fhigh = mlab.find(f >= highFc)[0]

        powSound = 20.0*np.log10(np.abs(Y[0:(winLen+1)/2+1]))    # This is the power spectrum
        powSoundGood = powSound[0:fhigh]
        maxPow = max(powSoundGood)
        powSoundGood = powSoundGood - maxPow   # Set zero as the peak amplitude
        powSoundGood[powSoundGood < - 60] = -60

        # Calculate coarse spectral enveloppe
        p = np.polyfit(f[0:fhigh], powSoundGood, 3)
        powAmp = np.polyval(p, f[0:fhigh])

        # Cepstrum
        CY = dct(powSoundGood-powAmp, norm = 'ortho')

        tCY = 2000.0*np.array(range(len(CY)))/fs          # Units of Cepstrum in ms
        fCY = 1000.0/tCY # Corresponding fundamental frequency in Hz.
        lowInd = mlab.find(fCY<lowFc)
        if lowInd.size > 0:
            flowCY = mlab.find(fCY < lowFc)[0]
        else:
            flowCY = fCY.size

        fhighCY = mlab.find(fCY < highFc)[0]

        # Find peak of Cepstrum
        indPk = mlab.find(CY[fhighCY:flowCY] == max(CY[fhighCY:flowCY]))[-1]
        indPk = fhighCY + indPk
        fmass = 0
        mass = 0
        indTry = indPk
        while (CY[indTry] > 0):
            fmass = fmass + fCY[indTry]*CY[indTry]
            mass = mass + CY[indTry]
            indTry = indTry + 1
            if indTry >= len(CY):
                break

        indTry = indPk - 1
        if (indTry >= 0 ):
            while (CY[indTry] > 0):
                fmass = fmass + fCY[indTry]*CY[indTry]
                mass = mass + CY[indTry]
                indTry = indTry - 1
                if indTry < 0:
                    break

        fGuess = fmass/mass

        if (fGuess == 0  or np.isnan(fGuess) or np.isinf(fGuess) ):              # Failure of cepstral method
            fGuess = fundCorrGuess

        fundCepGuess = fGuess

        # Force fundamendal to be bounded
        if (fundCepGuess > maxFund ):
            i = 2
            while(fundCepGuess > maxFund):
                fundCepGuess = fGuess/i
                i += 1
        elif (fundCepGuess < minFund):
            i = 2
            while(fundCepGuess < minFund):
                fundCepGuess = fGuess*i
                i += 1

        # Fit Gaussian harmonic stack
        maxPow = max(powSoundGood-powAmp)

        # This is the matlab code...
        # fundFitCep = NonLinearModel.fit(f(1:fhigh)', powSoundGood'-powAmp, @synSpect, [fundCepGuess ones(1,9).*log(maxPow)])
        # modelPowCep = synSpect(double(fundFitCep.Coefficients(:,1)), f(1:fhigh))

        vars = np.concatenate(([fundCepGuess], np.ones(9)*np.log(maxPow)))
        bout = leastsq(residualSyn, vars, args = (f[0:fhigh], powSoundGood-powAmp))
        modelPowCep = synSpect(bout[0], f[0:fhigh])
        errCep = sum((powSoundGood - powAmp - modelPowCep)**2)

        vars = np.concatenate(([fundCepGuess*2], np.ones(9)*np.log(maxPow)))
        bout2 = leastsq(residualSyn, vars, args = (f[0:fhigh], powSoundGood-powAmp))
        modelPowCep2 = synSpect(bout2[0], f[0:fhigh])
        errCep2 = sum((powSoundGood - powAmp - modelPowCep2)**2)

        if errCep2 < errCep:
            bout = bout2
            modelPowCep =  modelPowCep2

        fundStackGuess = bout[0][0]
        if (fundStackGuess > maxFund) or (fundStackGuess < minFund ):
            fundStackGuess = float('nan')


        # A second cepstrum for the second voice
        #     CY2 = dct(powSoundGood-powAmp'- modelPowCep)

        fund[it] = fundStackGuess

        if  not np.isnan(fundStackGuess):
            powLeft = powSoundGood- powAmp - modelPowCep
            maxPow2 = max(powLeft)
            f2 = 0
            if ( maxPow2 > maxPow*0.5):    # Possible second peak in central area as indicator of second voice.
                f2 = f[mlab.find(powLeft == maxPow2)]
                if ( f2 > 1000 and f2 < 4000):
                    if (pitchSaliency > minSaliency):
                        fund2[it] = f2

#%     modelPowCorrAmp = synSpect(double(fundFitCorrAmp.Coefficients(:,1)), f(1:fhigh))
#%
#%     errCorr = sum((powSoundGood - powAmp' - modelPowCorr).^2)
#%     errCorrAmp = sum((powSoundGood - powAmp' - modelPowCorrAmp).^2)
#%     errCorrSum = sum((powSoundGood - powAmp' - (modelPowCorr+modelPowCorrAmp) ).^2)
#%
#%     f1 = double(fundFitCorr.Coefficients(1,1))
#%     f2 = double(fundFitCorrAmp.Coefficients(1,1))
#%
#%     if (pitchSaliency > minSaliency)
#%         if (errCorr < errCorrAmp)
#%             fund(it) = f1
#%             if errCorrSum < errCorr
#%                 fund2(it) = f2
#%             end
#%         else
#%             fund(it) = f2
#%             if errCorrSum < errCorrAmp
#%                 fund2(it) = f1
#%             end
#%         end
#%
#%     end

        if (debugFig ):
            plt.figure(10)
            plt.subplot(4,1,1)
            plt.cla()
            plt.plot(soundWin)
#         f1 = double(fundFitCorr.Coefficients(1,1))
#         f2 = double(fundFitCorrAmp.Coefficients(1,1))
            titleStr = 'Saliency = %.2f Pitch AC = %.2f (Hz)  Pitch ACA = %.2f Pitch C %.2f (Hz)' % (pitchSaliency, fundCorrGuess, fundCorrAmpGuess, fundStackGuess)
            plt.title(titleStr)

            plt.subplot(4,1,2)
            plt.cla()
            plt.plot(1000*(lags/fs), autoCorr)
            plt.plot([1000.*lags[indMax]/fs, 1000*lags[indMax]/fs], [0, autoCorr[ind0]], 'k')
            plt.plot(1000.*lags/fs, envCorr, 'r', linewidth= 2)
            plt.plot([1000*lags[indEnvMax]/fs, 1000*lags[indEnvMax]/fs], [0, autoCorr[ind0]], 'g')
            plt.xlabel('Time (ms)')

            plt.subplot(4,1,3)
            plt.cla()
            plt.plot(f[0:fhigh],powSoundGood)
            plt.axis([0, highFc, -60, 0])
            plt.plot(f[0:fhigh], powAmp, 'b--')
            plt.plot(f[0:fhigh], modelPowCep + powAmp, 'k')
            # plt.plot(f(1:fhigh), modelPowCorrAmp + powAmp', 'g')

            for ih in range(1,6):
                plt.plot([fundCorrGuess*ih, fundCorrGuess*ih], [-60, 0], 'r')
                plt.plot([fundStackGuess*ih, fundStackGuess*ih], [-60, 0], 'k')

            if f2 != 0:
                plt.plot([f2, f2], [-60, 0], 'g')

            plt.xlabel('Frequency (Hz)')
            # title(sprintf('Err1 = %.1f Err2 = %.1f', errCorr, errCorrAmp))

            plt.subplot(4,1,4)
            plt.cla()
            plt.plot(tCY, CY)
#         plot(tCY, CY2, 'k--')
            plt.plot([1000/fundCorrGuess, 1000/fundCorrGuess], [0, max(CY)], 'r')
            plt.plot([1000/fundStackGuess, 1000/fundStackGuess], [0, max(CY)], 'k')

            #%         plot([(pkClosest-1)/fs (pkClosest-1)/fs], [0 max(CY)], 'g')
            #%         if ~isempty(ipk2)
            #%             plot([(pk2-1)/fs (pk2-1)/fs], [0 max(CY)], 'b')
            #%         end
            #%         for ip=1:length(pks)
            #%             plot([(locs(ip)-1)/fs (locs(ip)-1)/fs], [0 pks(ip)/4], 'r')
            #%         end
            plt.axis([0, 1000*np.size(CY)/(2*fs), 0, max(CY)])
            plt.xlabel('Time (ms)')

            plt.pause(1)

    # Fix formants.
    meanf1 = np.mean(form1[~np.isnan(form1)])
    meanf2 = np.mean(form2[~np.isnan(form2)])
    meanf3 = np.mean(form3[~np.isnan(form3)])

    for it in range(nt):
        if ~np.isnan(form1[it]):
            df11 = np.abs(form1[it]-meanf1)
            df12 = np.abs(form1[it]-meanf2)
            df13 = np.abs(form1[it]-meanf3)
            if df12 < df11:
                if df13 < df12:
                    if ~np.isnan(form3[it]):
                        df33 = np.abs(form3[it]-meanf3)
                        if df13 < df33:
                            form3[it] = form1[it]
                    else:
                      form3[it] = form1[it]
                else:
                    if ~np.isnan(form2[it]):
                        df22 = np.abs(form2[it]-meanf2)
                        if df12 < df22:
                            form2[it] = form1[it]
                    else:
                        form2[it] = form1[it]
                form1[it] = float('nan')
            if ~np.isnan(form2[it]):
                df21 = np.abs(form2[it]-meanf1)
                df22 = np.abs(form2[it]-meanf2)
                df23 = np.abs(form2[it]-meanf3)
                if df21 < df22 :
                    if ~np.isnan(form1[it]):
                        df11 = np.abs(form1[it]-meanf1)
                        if df21 < df11:
                            form1[it] = form2[it]
                    else:
                      form1[it] = form2[it]
                    form2[it] = float('nan')
                elif df23 < df22:
                    if ~np.isnan(form3[it]):
                        df33 = np.abs(form3[it]-meanf3)
                        if df23 < df33:
                            form3[it] = form2[it]
                    else:
                        form3[it] = form2[it]
                    form2[it] = float('nan')
            if ~np.isnan(form3[it]):
                df31 = np.abs(form3[it]-meanf1)
                df32 = np.abs(form3[it]-meanf2)
                df33 = np.abs(form3[it]-meanf3)
                if df32 < df33:
                    if df31 < df32:
                        if ~np.isnan(form1[it]):
                            df11 = np.abs(form1[it]-meanf1)
                            if df31 < df11:
                                form1[it] = form3[it]
                        else:
                            form1[it] = form3[it]
                    else:
                        if ~np.isnan(form2[it]):
                            df22 = np.abs(form2[it]-meanf2)
                            if df32 < df22:
                                form2[it] = form3[it]
                        else:
                            form2[it] = form3[it]
                    form3[it] = float('nan')

    return (sal, fund, fund2, form1, form2, form3, soundlen)
Exemplo n.º 31
0
 def targets_map_fn(indexes):
     rval = []
     for sequence_index, example_index in self._fetch_index(indexes):
         rval.append(lpc(self.samples_sequences[sequence_index][example_index].T,self.lpc_order)[0][1:].ravel())
     return rval
Exemplo n.º 32
0
def calFormants(frame):
    formants = []
    Fs = 7418
    preemph = [1.0, 0.63]
    frame = lfilter(preemph, 1, frame)
    A, e, k = lpc(frame, 8)
    A = numpy.nan_to_num(A)

    rts = numpy.roots(A)
    rts = rts[numpy.imag(rts) >= 0]
    angz = []
    for a in range(0, len(rts)):
        ang = math.atan2(numpy.imag(rts[a]), numpy.real(rts[a]))
        angz.insert(a, ang)

    # print("angz", angz)

    freqs = numpy.multiply(angz, (Fs / (2 * math.pi)))
    freqs = sorted(freqs, reverse=True)
    indices = numpy.argsort(freqs)
    # print("freq and indices", freqs, indices)
    bw = []
    for a in range(0, len(indices)):
        b = (-1 / 2) * (Fs /
                        (2 * math.pi)) * math.log(abs(rts[indices[a]]), 10)
        bw.insert(a, b)
    # print("bw", bw)

    nn = 0
    formants = []
    for kk in range(0, len(freqs)):

        if (freqs[kk] > 90 and bw[kk] < 400):
            formants.insert(nn, freqs[kk])
            nn = nn + 1

    if (nn < 5):
        if nn == 3:  # indexing from zero -1 to matlab
            formants.insert(3, 3500)
            formants.insert(4, 3700)
            # print ("formants")

        if nn == 4:  # indexing from zero so -1 to matlab
            formants.insert(4, 3700)

        if nn == 2:  # indexing from zero so -1 to matlab
            formants.insert(2, 3700)
            formants.insert(3, 3700)
            formants.insert(4, 3700)

        if nn == 1:  # indexing from zero so -1 to matlab

            formants.insert(1, 3700)
            formants.insert(2, 3700)
            formants.insert(2, 3700)
            formants.insert(4, 3700)
        if nn == 0:  # indexing from zero so -1 to matlab

            formants.insert(0, 3700)
            formants.insert(1, 3700)
            formants.insert(2, 3700)
            formants.insert(2, 3700)
            formants.insert(4, 3700)

    formants_5 = formants[:]
    form = numpy.array(formants_5)
    form.shape = (5, )

    return form
Exemplo n.º 33
0
def stlpc(longSignal,
          order=10,
          windowLength=1024,
          hopsize=512,
          samplingrate=16000,
          axis=-1):
    """Compute 'Short Term LPC':
          Cut the input signal in frames
          Compute the LPC on each of the frames (through talkbox)
    
    """
    fs = samplingrate
    # adding zeros to have the first frame centered on 0:
    data = np.concatenate((np.zeros(windowLength / 2), longSignal))
    lengthSignal = data.size
    # number of windows, and resizing the data,
    # in accordance with stft from sffhmm.py:
    nbWindows = np.ceil((lengthSignal - windowLength) /
                        (np.double(hopsize)) + 1.0) + 1
    newLengthSignal = (nbWindows - 1) * hopsize + windowLength
    data = np.concatenate([data, np.zeros([newLengthSignal - lengthSignal])])

    currentWindow = np.zeros([
        windowLength,
    ])

    # number of coefficients for the LPC decomposition is `order+1`
    STLpc = np.ones([order + 1, nbWindows])
    # number of corresponding formants is
    #    `floor((order-1)/2)
    # indeed, if `order` is odd, then it's `(order-1)/2`, that is to say all
    # poles, except the isolated one (which is real)
    # if `order` is even, then it's equal to `(order-2)/2`,
    #
    # 20130514 wait, why is it not order/2 again?
    nbFormants = int(order / 2)
    rootLpc = np.zeros([order, nbWindows], dtype=np.complex)
    freqLpc = np.ones([nbFormants, nbWindows])
    # specFromLpc = np.zeros([windowLength / 2.0 + 1, nbWindows])
    sigmaS = np.zeros([
        nbWindows,
    ])

    # pre-processing the data, amplifying high frequencies:
    b_preamp = np.array([1.0, -0.99])
    a_preamp = np.array([1.0])
    longSignalPreamp = scipy.signal.lfilter(b_preamp, a_preamp, data)

    for n in np.arange(nbWindows):
        # getting the desired frame
        beginFrame = n * hopsize
        endFrame = np.minimum(n * hopsize + windowLength, lengthSignal)
        currentWindow[:endFrame -
                      beginFrame] = longSignalPreamp[beginFrame:endFrame]
        # windowing the frame
        currentWindow *= np.hamming(windowLength)
        # computing the LPC coefficients
        STLpc[:, n], sigmaS[n], _ = tb.lpc(currentWindow, order)
        # compute the corresponding spectrum - not necessary here
        # specFromLpc[:,n] = lpc2spec(STLpc[:,n], sigmaS[n], fs, windowLength)

        # compute the roots of the polynomial:
        rootLpc[:, n] = np.roots(STLpc[:, n])
        # convert to frequencies
        freqLpcTmp = np.angle(rootLpc[:, n]) / (2.0 * np.pi) * fs
        freqLpcTmp = freqLpcTmp[freqLpcTmp > 0.0]
        freqLpcTmp.sort()
        nbMinPositiveRoots = freqLpcTmp[0:nbFormants].size
        freqLpc[0:nbMinPositiveRoots, n] = freqLpcTmp[0:nbFormants]

    return STLpc, rootLpc, freqLpc, sigmaS  #, specFromLpc,
Exemplo n.º 34
0
 def get_lpc(self, data, order=44):
     # Use talkbox to get the linear predictive coding
     from scikits.talkbox import lpc
     coefs = lpc(data, order)
     return coefs[0]
Exemplo n.º 35
0
def convert_to_lpc(filename, n_coeff):
    wave, sr = lp.load(filename, mono=True, sr=16000)
    lpc_signal = lpc(wave, n_coeff)
    return np.hstack((lpc_signal[0], lpc_signal[1], lpc_signal[2]))
def feature_extraction_gd(y, fs=44100, statistics=True, include_delta=True,
                       include_acceleration=True, lpgd_params=None, win_params=None, delta_params=None, acceleration_params=None):

    eps = numpy.spacing(1)

    nfft = lpgd_params['nfft']
    lp_order = lpgd_params['lp_order']

    y = y + eps
    frames = segment_axis(y, win_params['win_length'], win_params['hop_length']);

    #print 'frames : ' + str(frames.shape)

    a,e,k = lpc(frames, lp_order)
    #print 'a : ' + str(a.shape)

    A = fft(a, nfft)
    A = 1/A
    phaseA = numpy.unwrap(numpy.angle(A))

    #print 'phaseA: ' + str(phaseA.shape) 

    phaseA = phaseA[:,0:nfft/2]

    #print 'phaseA: ' + str(phaseA.shape)

    tauA = -1 * numpy.diff(phaseA)

    #print 'tauA' + str(tauA.shape)
    tauA = numpy.vstack((tauA, tauA[-1]))
    # tau = tau

   # print 'tauA' + str(tauA.shape)

    feature_matrix = tauA
    feature_matrix = dct(feature_matrix, n=20)
    feature_matrix = feature_matrix.T
    print feature_matrix.shape
    if include_delta:
        # Delta coefficients
        feature_delta = librosa.feature.delta(feature_matrix, **delta_params)

        # Add Delta Coefficients to feature matrix
        feature_matrix = numpy.vstack((feature_matrix, feature_delta))

  #  print 'fm: ' + str(feature_matrix.shape)

    if include_acceleration:
        # Acceleration coefficients (aka delta)
        feature_delta2 = librosa.feature.delta(feature_delta, order=2, **acceleration_params)

        # Add Acceleration Coefficients to feature matrix
        feature_matrix = numpy.vstack((feature_matrix, feature_delta2))

    feature_matrix = feature_matrix.T
    print 'fm: ' + str(feature_matrix.shape)


    # Collect into data structure
    if statistics:
        return {
            'tauA' : tauA,
            'feat': feature_matrix,
            'stat': {
                'mean': numpy.mean(feature_matrix, axis=0),
                'std': numpy.std(feature_matrix, axis=0),
                'N': feature_matrix.shape[0],
                'S1': numpy.sum(feature_matrix, axis=0),
                'S2': numpy.sum(feature_matrix ** 2, axis=0),
            }
        }
    else:
        return {
            'feat': feature_matrix}
Exemplo n.º 37
0
 def get_lpc(self,data,order=44):
     # Use talkbox to get the linear predictive coding
     from scikits.talkbox import lpc
     coefs = lpc(data,order)
     return coefs[0]
Exemplo n.º 38
0
def fundEstimator(soundIn, fs, t=None, debugFig = 0, maxFund = 1500, minFund = 300, lowFc = 200, highFc = 6000, minSaliency = 0.5):
    """
    Estimates the fundamental frequency of a complex sound.
    soundIn is the sound pressure waveformlog spectrogram.
    fs is the sampling rate
    t is a vector of time values in s at which the fundamental will be estimated.
    The sound must include at least 1024 sample points

    The optional parameter with defaults are
    Some user parameters (should be part of the function at some time)
       debugFig = 0         Set to zero to eliminate figures.
       maxFund = 1500       Maximum fundamental frequency
       minFund = 300        Minimum fundamental frequency
       lowFc = 200          Low frequency cut-off for band-passing the signal prior to auto-correlation.
       highFc = 6000        High frequency cut-off
       minSaliency = 0.5    Threshold in the auto-correlation for minimum saliency - returns NaN for pitch values is saliency is below this number

    Returns
           sal     - the time varying pitch saliency - a number between 0 and 1 corresponding to relative size of the first auto-correlation peak
           fund     - the time-varying fundamental in Hz at the same resolution as the spectrogram.
           fund2   - a second peak in the spectrum - not a multiple of the fundamental a sign of a second voice
           form1   - the first formant, if it exists
           form2   - the second formant, if it exists
           form3   - the third formant, if it exists
           soundLen - length of sal, fund, fund2, form1, form2, form3
    """

    # Band-pass filtering signal prior to auto-correlation
    soundLen = len(soundIn)
    nfilt = 1024
    if soundLen < 1024:
        print 'Error in fundEstimator: sound too short for bandpass filtering, len(soundIn)=%d\n' % soundLen
        return (0, 0, 0, 0, 0, 0, 0)

    # high pass filter the signal
    highpassFilter = firwin(nfilt-1, 2*lowFc/fs, pass_zero=False)
    padlen = min(soundLen-10, 3*len(highpassFilter))
    soundIn = filtfilt(highpassFilter, [1.0], soundIn, padlen=padlen)

    # low pass filter the signal
    lowpassFilter = firwin(nfilt, 2*highFc/fs)
    padlen = min(soundLen-10, 3*len(lowpassFilter))
    soundIn = filtfilt(lowpassFilter, [1.0], soundIn, padlen=padlen)

    # Plot a spectrogram?
    if debugFig:
        plt.figure(9)
        (tDebug ,freqDebug ,specDebug , rms) = spectrogram(soundIn, fs, 1000.0, 50, min_freq=0, max_freq=10000, nstd=6, log=True, noise_level_db=50, rectify=True) 
        plot_spectrogram(tDebug, freqDebug, specDebug)

    # Initializations and useful variables
    if t is None:
        # initialize t to be spaced by 500us increments
        sound_dur = len(soundIn) / fs
        _si = 1e-3
        npts = int(sound_dur / _si)
        t = np.arange(npts) * _si

    nt=len(t)
    soundRMS = np.zeros(nt)
    fund = np.zeros(nt)
    fund2 = np.zeros(nt)
    sal = np.zeros(nt)
    form1 = np.zeros(nt)
    form2 = np.zeros(nt)
    form3 = np.zeros(nt)

    #  Calculate the size of the window for the auto-correlation
    alpha = 5                          # Number of sd in the Gaussian window
    winLen = int(np.fix((2.0*alpha/minFund)*fs))  # Length of Gaussian window based on minFund
    if (winLen%2 == 0):  # Make a symmetric window
        winLen += 1
        
    winLen2 = 2**12+1   # This looks like a good size for LPC - 4097 points

    gt, w = gaussian_window(winLen, alpha)
    gt2, w2 = gaussian_window(winLen2, alpha)
    maxlags = int(2*ceil((float(fs)/minFund)))

    # First calculate the rms in each window
    for it in range(nt):
        tval = t[it]               # Center of window in time
        tind = int(np.fix(tval*fs))    # Center of window in ind
        tstart = tind - (winLen-1)/2
        tend = tind + (winLen-1)/2
    
        if tstart < 0:
            winstart = - tstart
            tstart = 0
        else:
            winstart = 0
        
        if tend >= soundLen:
            windend = winLen - (tend-soundLen+1) - 1
            tend = soundLen-1
        else:
            windend = winLen-1
            
        soundWin = soundIn[tstart:tend]*w[winstart:windend]
        soundRMS[it] = np.std(soundWin)
    
    soundRMSMax = max(soundRMS)

    # Calculate the auto-correlation in windowed segments and obtain 4 guess values of the fundamental
    # fundCorrGuess - guess from the auto-correlation function
    # fundCorrAmpGuess - guess form the amplitude of the auto-correlation function
    # fundCepGuess - guess from the cepstrum
    # fundStackGuess - guess taken from a fit of the power spectrum with a harmonic stack, using the fundCepGuess as a starting point
    #  Current version use fundStackGuess as the best estimate...

    soundlen = 0
    for it in range(nt):
        fund[it] = float('nan')
        sal[it] = float('nan')
        fund2[it] = float('nan')
        form1[it] = float('nan')
        form2[it] = float('nan')
        form3[it] = float('nan')
        
        if (soundRMS[it] < soundRMSMax*0.1):
            continue
    
        soundlen += 1
        tval = t[it]               # Center of window in time
        tind = int(np.fix(tval*fs))    # Center of window in ind
        tstart = tind - (winLen-1)/2
        tend = tind + (winLen-1)/2
    
        if tstart < 0:
            winstart = - tstart
            tstart = 0
        else:
            winstart = 0
        
        if tend >= soundLen:
            windend = winLen - (tend-soundLen+1) - 1
            tend = soundLen-1
        else:
            windend = winLen-1
            
        tstart2 = tind - (winLen2-1)/2
        tend2 = tind + (winLen2-1)/2
    
        if tstart2 < 0:
            winstart2 = - tstart2
            tstart2 = 0
        else:
            winstart2 = 0
        
        if tend2 >= soundLen:
            windend2 = winLen2 - (tend2-soundLen+1) - 1
            tend2 = soundLen-1
        else:
            windend2 = winLen2-1
            
        soundWin = soundIn[tstart:tend]*w[winstart:windend]
              
        soundWin2 = soundIn[tstart2:tend2]*w2[winstart2:windend2]
        
        # Apply LPC to get time-varying formants and one additional guess for the fundamental frequency
        A, E, K = talkbox.lpc(soundWin2, 8)    # 8 degree polynomial
        rts = np.roots(A)          # Find the roots of A
        rts = rts[np.imag(rts)>=0]  # Keep only half of them
        angz = np.arctan2(np.imag(rts),np.real(rts))
    
        # Calculate the frequencies and the bandwidth of the formants
        frqsFormants = angz*(fs/(2*np.pi))
        indices = np.argsort(frqsFormants)
        bw = -1/2*(fs/(2*np.pi))*np.log(np.abs(rts))
    
        # Keep formants above 1000 Hz and with bandwidth < 1000
        formants = []
        for kk in indices:
            if ( frqsFormants[kk]>1000 and bw[kk] < 1000):        
                formants.append(frqsFormants[kk])
        formants = np.array(formants) 
        
        if len(formants) > 0 : 
            form1[it] = formants[0]
        if len(formants) > 1 : 
            form2[it] = formants[1]
        if len(formants) > 2 : 
            form3[it] = formants[2]

        # Calculate the auto-correlation
        lags = np.arange(-maxlags, maxlags+1, 1)
        autoCorr = correlation_function(soundWin, soundWin, lags)
        ind0 = int(mlab.find(lags == 0))  # need to find lag zero index
    
        # find peaks
        indPeaksCorr = detect_peaks(autoCorr, mph=max(autoCorr)/10)
    
        # Eliminate center peak and all peaks too close to middle    
        indPeaksCorr = np.delete(indPeaksCorr,mlab.find( (indPeaksCorr-ind0) < fs/maxFund))
        pksCorr = autoCorr[indPeaksCorr]
    
        # Find max peak
        if len(pksCorr)==0:
            pitchSaliency = 0.1               # 0.1 goes with the detection of peaks greater than max/10
        else:
            indIndMax = mlab.find(pksCorr == max(pksCorr))[0]
            indMax = indPeaksCorr[indIndMax]   
            fundCorrGuess = fs/abs(lags[indMax])
            pitchSaliency = autoCorr[indMax]/autoCorr[ind0]

        sal[it] = pitchSaliency
    
        if sal[it] < minSaliency:
            continue

        # Calculate the envelope of the auto-correlation after rectification
        envCorr = temporal_envelope(autoCorr, fs, cutoff_freq=maxFund, resample_rate=None) 
        locsEnvCorr = detect_peaks(envCorr, mph=max(envCorr)/10)
        pksEnvCorr = envCorr[locsEnvCorr]
    
        # The max peak should be around zero
        indIndEnvMax = mlab.find(pksEnvCorr == max(pksEnvCorr))
          
        # Take the first peak not in the middle
        if indIndEnvMax+2 > len(locsEnvCorr):
            fundCorrAmpGuess = fundCorrGuess
            indEnvMax = indMax
        else:
            indEnvMax = locsEnvCorr[indIndEnvMax+1]
            fundCorrAmpGuess = fs/lags[indEnvMax]

        # Calculate power spectrum and cepstrum
        Y = fft(soundWin, n=winLen+1)
        f = (fs/2.0)*(np.array(range((winLen+1)/2+1), dtype=float)/float((winLen+1)/2))
        fhigh = mlab.find(f >= highFc)[0]
    
        powSound = 20.0*np.log10(np.abs(Y[0:(winLen+1)/2+1]))    # This is the power spectrum
        powSoundGood = powSound[0:fhigh]
        maxPow = max(powSoundGood)
        powSoundGood = powSoundGood - maxPow   # Set zero as the peak amplitude
        powSoundGood[powSoundGood < - 60] = -60    
    
        # Calculate coarse spectral enveloppe
        p = np.polyfit(f[0:fhigh], powSoundGood, 3)
        powAmp = np.polyval(p, f[0:fhigh]) 
    
        # Cepstrum
        CY = dct(powSoundGood-powAmp, norm = 'ortho')            
    
        tCY = 2000.0*np.array(range(len(CY)))/fs          # Units of Cepstrum in ms
        fCY = 1000.0/tCY # Corresponding fundamental frequency in Hz.
        lowInd = mlab.find(fCY<lowFc)
        if lowInd.size > 0:
            flowCY = mlab.find(fCY < lowFc)[0]
        else:
            flowCY = fCY.size
            
        fhighCY = mlab.find(fCY < highFc)[0]
    
        # Find peak of Cepstrum
        indPk = mlab.find(CY[fhighCY:flowCY] == max(CY[fhighCY:flowCY]))[-1]
        indPk = fhighCY + indPk 
        fmass = 0
        mass = 0
        indTry = indPk
        while (CY[indTry] > 0):
            fmass = fmass + fCY[indTry]*CY[indTry]
            mass = mass + CY[indTry]
            indTry = indTry + 1
            if indTry >= len(CY):
                break

        indTry = indPk - 1
        if (indTry >= 0 ):
            while (CY[indTry] > 0):
                fmass = fmass + fCY[indTry]*CY[indTry]
                mass = mass + CY[indTry]
                indTry = indTry - 1
                if indTry < 0:
                    break

        fGuess = fmass/mass
    
        if (fGuess == 0  or np.isnan(fGuess) or np.isinf(fGuess) ):              # Failure of cepstral method
            fGuess = fundCorrGuess

        fundCepGuess = fGuess
    
        # Force fundamendal to be bounded
        if (fundCepGuess > maxFund ):
            i = 2
            while(fundCepGuess > maxFund):
                fundCepGuess = fGuess/i
                i += 1
        elif (fundCepGuess < minFund):
            i = 2
            while(fundCepGuess < minFund):
                fundCepGuess = fGuess*i
                i += 1
    
        # Fit Gaussian harmonic stack
        maxPow = max(powSoundGood-powAmp)

        # This is the matlab code...
        # fundFitCep = NonLinearModel.fit(f(1:fhigh)', powSoundGood'-powAmp, @synSpect, [fundCepGuess ones(1,9).*log(maxPow)])
        # modelPowCep = synSpect(double(fundFitCep.Coefficients(:,1)), f(1:fhigh))

        vars = np.concatenate(([fundCepGuess], np.ones(9)*np.log(maxPow)))
        bout = leastsq(residualSyn, vars, args = (f[0:fhigh], powSoundGood-powAmp)) 
        modelPowCep = synSpect(bout[0], f[0:fhigh])
        errCep = sum((powSoundGood - powAmp - modelPowCep)**2)
    
        vars = np.concatenate(([fundCepGuess*2], np.ones(9)*np.log(maxPow)))
        bout2 = leastsq(residualSyn, vars, args = (f[0:fhigh], powSoundGood-powAmp)) 
        modelPowCep2 = synSpect(bout2[0], f[0:fhigh])
        errCep2 = sum((powSoundGood - powAmp - modelPowCep2)**2)
    
        if errCep2 < errCep:
            bout = bout2
            modelPowCep =  modelPowCep2

        fundStackGuess = bout[0][0]
        if (fundStackGuess > maxFund) or (fundStackGuess < minFund ):
            fundStackGuess = float('nan')

    
        # A second cepstrum for the second voice
        #     CY2 = dct(powSoundGood-powAmp'- modelPowCep)
                
        fund[it] = fundStackGuess        
    
        if  not np.isnan(fundStackGuess):
            powLeft = powSoundGood- powAmp - modelPowCep
            maxPow2 = max(powLeft)
            f2 = 0
            if ( maxPow2 > maxPow*0.5):    # Possible second peak in central area as indicator of second voice.
                f2 = f[mlab.find(powLeft == maxPow2)]
                if ( f2 > 1000 and f2 < 4000):
                    if (pitchSaliency > minSaliency):
                        fund2[it] = f2

#%     modelPowCorrAmp = synSpect(double(fundFitCorrAmp.Coefficients(:,1)), f(1:fhigh))
#%     
#%     errCorr = sum((powSoundGood - powAmp' - modelPowCorr).^2)
#%     errCorrAmp = sum((powSoundGood - powAmp' - modelPowCorrAmp).^2)
#%     errCorrSum = sum((powSoundGood - powAmp' - (modelPowCorr+modelPowCorrAmp) ).^2)
#%       
#%     f1 = double(fundFitCorr.Coefficients(1,1))
#%     f2 = double(fundFitCorrAmp.Coefficients(1,1))
#%     
#%     if (pitchSaliency > minSaliency)
#%         if (errCorr < errCorrAmp)
#%             fund(it) = f1
#%             if errCorrSum < errCorr
#%                 fund2(it) = f2
#%             end
#%         else
#%             fund(it) = f2
#%             if errCorrSum < errCorrAmp
#%                 fund2(it) = f1
#%             end
#%         end
#%         
#%     end

        if (debugFig ):
            plt.figure(10)
            plt.subplot(4,1,1)
            plt.cla()
            plt.plot(soundWin)
#         f1 = double(fundFitCorr.Coefficients(1,1))
#         f2 = double(fundFitCorrAmp.Coefficients(1,1))
            titleStr = 'Saliency = %.2f Pitch AC = %.2f (Hz)  Pitch ACA = %.2f Pitch C %.2f (Hz)' % (pitchSaliency, fundCorrGuess, fundCorrAmpGuess, fundStackGuess)
            plt.title(titleStr)
        
            plt.subplot(4,1,2)
            plt.cla()
            plt.plot(1000*(lags/fs), autoCorr)
            plt.plot([1000.*lags[indMax]/fs, 1000*lags[indMax]/fs], [0, autoCorr[ind0]], 'k')
            plt.plot(1000.*lags/fs, envCorr, 'r', linewidth= 2)
            plt.plot([1000*lags[indEnvMax]/fs, 1000*lags[indEnvMax]/fs], [0, autoCorr[ind0]], 'g')
            plt.xlabel('Time (ms)')
              
            plt.subplot(4,1,3)
            plt.cla()
            plt.plot(f[0:fhigh],powSoundGood)
            plt.axis([0, highFc, -60, 0])
            plt.plot(f[0:fhigh], powAmp, 'b--')
            plt.plot(f[0:fhigh], modelPowCep + powAmp, 'k')
            # plt.plot(f(1:fhigh), modelPowCorrAmp + powAmp', 'g')
        
            for ih in range(1,6):
                plt.plot([fundCorrGuess*ih, fundCorrGuess*ih], [-60, 0], 'r')
                plt.plot([fundStackGuess*ih, fundStackGuess*ih], [-60, 0], 'k')

            if f2 != 0: 
                plt.plot([f2, f2], [-60, 0], 'g')

            plt.xlabel('Frequency (Hz)')
            # title(sprintf('Err1 = %.1f Err2 = %.1f', errCorr, errCorrAmp))
        
            plt.subplot(4,1,4)
            plt.cla()
            plt.plot(tCY, CY)
#         plot(tCY, CY2, 'k--')
            plt.plot([1000/fundCorrGuess, 1000/fundCorrGuess], [0, max(CY)], 'r')
            plt.plot([1000/fundStackGuess, 1000/fundStackGuess], [0, max(CY)], 'k')
        
            #%         plot([(pkClosest-1)/fs (pkClosest-1)/fs], [0 max(CY)], 'g')
            #%         if ~isempty(ipk2)
            #%             plot([(pk2-1)/fs (pk2-1)/fs], [0 max(CY)], 'b')
            #%         end
            #%         for ip=1:length(pks)
            #%             plot([(locs(ip)-1)/fs (locs(ip)-1)/fs], [0 pks(ip)/4], 'r')
            #%         end
            plt.axis([0, 1000*np.size(CY)/(2*fs), 0, max(CY)])
            plt.xlabel('Time (ms)')

            plt.pause(1)
    
    # Fix formants.
    meanf1 = np.mean(form1[~np.isnan(form1)])
    meanf2 = np.mean(form2[~np.isnan(form2)])
    meanf3 = np.mean(form3[~np.isnan(form3)])

    for it in range(nt):
        if ~np.isnan(form1[it]):
            df11 = np.abs(form1[it]-meanf1)
            df12 = np.abs(form1[it]-meanf2)
            df13 = np.abs(form1[it]-meanf3)
            if df12 < df11:
                if df13 < df12:
                    if ~np.isnan(form3[it]):
                        df33 = np.abs(form3[it]-meanf3)
                        if df13 < df33:
                            form3[it] = form1[it]
                    else:
                      form3[it] = form1[it]
                else:
                    if ~np.isnan(form2[it]):
                        df22 = np.abs(form2[it]-meanf2)
                        if df12 < df22:
                            form2[it] = form1[it]
                    else:
                        form2[it] = form1[it]
                form1[it] = float('nan')
            if ~np.isnan(form2[it]):  
                df21 = np.abs(form2[it]-meanf1)
                df22 = np.abs(form2[it]-meanf2)
                df23 = np.abs(form2[it]-meanf3)
                if df21 < df22 :
                    if ~np.isnan(form1[it]):
                        df11 = np.abs(form1[it]-meanf1)
                        if df21 < df11:
                            form1[it] = form2[it]
                    else:
                      form1[it] = form2[it]
                    form2[it] = float('nan')
                elif df23 < df22:
                    if ~np.isnan(form3[it]):
                        df33 = np.abs(form3[it]-meanf3)
                        if df23 < df33:
                            form3[it] = form2[it]
                    else:
                        form3[it] = form2[it]
                    form2[it] = float('nan')
            if ~np.isnan(form3[it]):
                df31 = np.abs(form3[it]-meanf1)
                df32 = np.abs(form3[it]-meanf2)
                df33 = np.abs(form3[it]-meanf3)
                if df32 < df33:
                    if df31 < df32:
                        if ~np.isnan(form1[it]):
                            df11 = np.abs(form1[it]-meanf1)
                            if df31 < df11:
                                form1[it] = form3[it]
                        else:
                            form1[it] = form3[it]
                    else:
                        if ~np.isnan(form2[it]):
                            df22 = np.abs(form2[it]-meanf2)
                            if df32 < df22:
                                form2[it] = form3[it]
                        else:
                            form2[it] = form3[it]
                    form3[it] = float('nan')

    return (sal, fund, fund2, form1, form2, form3, soundlen)
Exemplo n.º 39
0
def stlpc(longSignal,
          order=10,
          windowLength=1024,
          hopsize=512,
          samplingrate=16000,
          axis=-1):
    """Compute 'Short Term LPC':
          Cut the input signal in frames
          Compute the LPC on each of the frames (through talkbox)
    
    """
    fs = samplingrate
    # adding zeros to have the first frame centered on 0:
    data = np.concatenate((np.zeros(windowLength/2),
                           longSignal))
    lengthSignal = data.size
    # number of windows, and resizing the data,
    # in accordance with stft from sffhmm.py:
    nbWindows = np.ceil((lengthSignal - windowLength) /
                        (np.double(hopsize)) + 1.0) + 1
    newLengthSignal = (nbWindows - 1) * hopsize + windowLength
    data = np.concatenate([data,
                           np.zeros([newLengthSignal - lengthSignal])])
    
    currentWindow = np.zeros([windowLength,])
    
    # number of coefficients for the LPC decomposition is `order+1`
    STLpc = np.ones([order + 1, nbWindows])
    # number of corresponding formants is
    #    `floor((order-1)/2)
    # indeed, if `order` is odd, then it's `(order-1)/2`, that is to say all
    # poles, except the isolated one (which is real)
    # if `order` is even, then it's equal to `(order-2)/2`,
    #
    # 20130514 wait, why is it not order/2 again?
    nbFormants = int(order / 2)
    rootLpc = np.zeros([order, nbWindows], dtype=np.complex)
    freqLpc = np.ones([nbFormants, nbWindows])
    # specFromLpc = np.zeros([windowLength / 2.0 + 1, nbWindows])
    sigmaS = np.zeros([nbWindows, ])
    
    # pre-processing the data, amplifying high frequencies:
    b_preamp=np.array([1.0,-0.99])
    a_preamp=np.array([1.0])
    longSignalPreamp = scipy.signal.lfilter(b_preamp,a_preamp,data)
    
    for n in np.arange(nbWindows):
        # getting the desired frame
        beginFrame = n * hopsize
        endFrame = np.minimum(n * hopsize + windowLength, lengthSignal)
        currentWindow[:endFrame-beginFrame] = longSignalPreamp[beginFrame:
                                                               endFrame]
        # windowing the frame
        currentWindow *= np.hamming(windowLength)
        # computing the LPC coefficients
        STLpc[:,n], sigmaS[n], _ = tb.lpc(currentWindow, order)
        # compute the corresponding spectrum - not necessary here
        # specFromLpc[:,n] = lpc2spec(STLpc[:,n], sigmaS[n], fs, windowLength)
        
        # compute the roots of the polynomial:
        rootLpc[:,n] = np.roots(STLpc[:,n])
        # convert to frequencies
        freqLpcTmp = np.angle(rootLpc[:,n]) / (2.0 * np.pi) * fs
        freqLpcTmp = freqLpcTmp[freqLpcTmp>0.0]
        freqLpcTmp.sort()
        nbMinPositiveRoots = freqLpcTmp[0:nbFormants].size
        freqLpc[0:nbMinPositiveRoots,n] = freqLpcTmp[0:nbFormants]
        
    return STLpc, rootLpc, freqLpc, sigmaS #, specFromLpc, 
Exemplo n.º 40
0
    def _compute_formants(self, audio_buffer):
        """
        Computes the frequencies of formants of the window of audio data, along with their bandwidths.
    
        A formant is a frequency band over which there is a concentration of energy. 
        They correspond to tones produced by the vocal tract and are therefore often 
        used to characterize vowels, which have distinct frequencies. In the task of 
        speaker identification, it can be used to characterize a person's speech 
        patterns.
        
        This implementation is based on the Matlab tutorial on Estimating Formants using 
        LPC (Linear Predictive Coding) Coefficients: 
        http://www.mathworks.com/help/signal/ug/formant-estimation-with-lpc-coefficients.html.
        
        Help porting this to Python was found here : 
        http://stackoverflow.com/questions/25107806/estimate-formants-using-lpc-in-python.
        
        Why LPC? http://dsp.stackexchange.com/questions/2482/speech-compression-in-lpc-how-does-the-linear-predictive-filter-work-on-a-gene
        
        Here are some more details on why linear predictive analysis is a generally powerful tool
        in audio processing: http://iitg.vlab.co.in/?sub=59&brch=164&sim=616&cnt=1108.
        
        """

        # Get Hamming window. More on window functions can be found at https://en.wikipedia.org/wiki/Window_function
        # The idea of the Hamming window is to smooth out discontinuities at the edges of the window.
        # Simply multiply to apply the window.
        N = len(audio_buffer)
        Fs = 8000  # sampling frequency
        hamming_window = np.hamming(N)
        window = audio_buffer * hamming_window

        # Apply a pre-emphasis filter; this amplifies high-frequency components and attenuates low-frequency components.
        # The purpose in voice processing is to remove noise.
        filtered_buffer = lfilter([1], [1., 0.63], window)

        # Speech can be broken down into (1) The raw sound emitted by the larynx and (2) Filtering that occurs when transmitted from the larynx, defined by, for instance, mouth shape and tongue position.
        # The larynx emits a periodic function defined by its amplitude and frequency.
        # The transmission is more complex to model but is in the form 1/(1-sum(a_k * z^-k)), where the coefficients
        # a_k sufficiently encode the function (because we know it's of that form).
        # Linear Predictive Coding is a method for estimating these coefficients given a pre-filtered audio signal.
        # These value are called the roots, because the are the points at which the difference
        # from the actual signal and the reconstructed signal (using that transmission function) is closest to 0.
        # See http://dsp.stackexchange.com/questions/2482/speech-compression-in-lpc-how-does-the-linear-predictive-filter-work-on-a-gene.

        # Get the roots using linear predictive coding.
        # As a rule of thumb, the order of the LPC should be 2 more than the sampling frequency (in kHz).
        ncoeff = 2 + Fs / 1000
        A, e, k = lpc(filtered_buffer, ncoeff)
        roots = np.roots(A)
        roots = [r for r in roots if np.imag(r) >= 0]

        # Get angles from the roots. Each root represents a complex number. The angle in the
        # complex coordinate system (where x is the real part and y is the imaginary part)
        # corresponds to the "frequency" of the formant (in rad/s, however, so we need to convert them).
        # Note it really is a frequency band, not a single frequency, but this is a simplification that is acceptable.
        angz = np.arctan2(np.imag(roots), np.real(roots))

        # Convert the angular frequencies from rad/sample to Hz; then calculate the
        # bandwidths of the formants. The distance of the roots from the unit circle
        # gives the bandwidths of the formants (*Extra credit* if you can explain this!).
        unsorted_freqs = angz * (Fs / (2 * math.pi))

        # Let's sort the frequencies so that when we later compare them, we don't overestimate
        # the difference due to ordering choices.
        freqs = sorted(unsorted_freqs)

        # also get the indices so that we can get the bandwidths in the same order
        indices = np.argsort(unsorted_freqs)
        sorted_roots = np.asarray(roots)[indices]

        #compute the bandwidths of each formant
        bandwidths = -1 / 2. * (Fs /
                                (2 * math.pi)) * np.log(np.abs(sorted_roots))

        if self.debug:
            print("Identified {} formants.".format(len(freqs)))

        return freqs, bandwidths
def feature_extraction_gd(y,
                          fs=44100,
                          statistics=True,
                          include_delta=True,
                          include_acceleration=True,
                          lpgd_params=None,
                          win_params=None,
                          delta_params=None,
                          acceleration_params=None):

    eps = numpy.spacing(1)

    nfft = lpgd_params['nfft']
    lp_order = lpgd_params['lp_order']

    y = y + eps
    frames = segment_axis(y, win_params['win_length'],
                          win_params['hop_length'])

    #print 'frames : ' + str(frames.shape)

    a, e, k = lpc(frames, lp_order)
    #print 'a : ' + str(a.shape)

    A = fft(a, nfft)
    A = 1 / A
    phaseA = numpy.unwrap(numpy.angle(A))

    #print 'phaseA: ' + str(phaseA.shape)

    phaseA = phaseA[:, 0:nfft / 2]

    #print 'phaseA: ' + str(phaseA.shape)

    tauA = -1 * numpy.diff(phaseA)

    #print 'tauA' + str(tauA.shape)
    tauA = numpy.vstack((tauA, tauA[-1]))
    # tau = tau

    # print 'tauA' + str(tauA.shape)

    feature_matrix = tauA
    feature_matrix = dct(feature_matrix, n=20)
    feature_matrix = feature_matrix.T
    print feature_matrix.shape
    if include_delta:
        # Delta coefficients
        feature_delta = librosa.feature.delta(feature_matrix, **delta_params)

        # Add Delta Coefficients to feature matrix
        feature_matrix = numpy.vstack((feature_matrix, feature_delta))

#  print 'fm: ' + str(feature_matrix.shape)

    if include_acceleration:
        # Acceleration coefficients (aka delta)
        feature_delta2 = librosa.feature.delta(feature_delta,
                                               order=2,
                                               **acceleration_params)

        # Add Acceleration Coefficients to feature matrix
        feature_matrix = numpy.vstack((feature_matrix, feature_delta2))

    feature_matrix = feature_matrix.T
    print 'fm: ' + str(feature_matrix.shape)

    # Collect into data structure
    if statistics:
        return {
            'tauA': tauA,
            'feat': feature_matrix,
            'stat': {
                'mean': numpy.mean(feature_matrix, axis=0),
                'std': numpy.std(feature_matrix, axis=0),
                'N': feature_matrix.shape[0],
                'S1': numpy.sum(feature_matrix, axis=0),
                'S2': numpy.sum(feature_matrix**2, axis=0),
            }
        }
    else:
        return {'feat': feature_matrix}