def f0Twm(x, fs, w, N, H, t, minf0, maxf0, f0et): # fundamental frequency detection using twm algorithm # x: input sound, fs: sampling rate, w: analysis window, # N: FFT size (minimum 512), t: threshold in negative dB, # minf0: minimum f0 frequency in Hz, maxf0: maximim f0 frequency in Hz, # f0et: error threshold in the f0 detection (ex: 5), # returns f0: fundamental frequency hN = N/2 # size of positive spectrum hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor x = np.append(np.zeros(hM2),x) # add zeros at beginning to center first window at sample 0 x = np.append(x,np.zeros(hM1)) # add zeros at the end to analyze last sample pin = hM1 # init sound pointer in middle of anal window pend = x.size - hM1 # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT w = w / sum(w) # normalize analysis window f0 = [] f0t = 0 f0stable = 0 while pin<pend: x1 = x[pin-hM1:pin+hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, hN, t) # detect peak locations iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc/N f0t = UF.f0DetectionTwm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 if ((f0stable==0)&(f0t>0)) \ or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 f0 = np.append(f0, f0t) pin += H # advance sound pointer return f0
def harmonicModel(x, fs, w, N, t, nH, minf0, maxf0, f0et): # Analysis/synthesis of a sound using the sinusoidal harmonic model # x: input sound, fs: sampling rate, w: analysis window, # N: FFT size (minimum 512), t: threshold in negative dB, # nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz, # maxf0: maximim f0 frequency in Hz, # f0et: error threshold in the f0 detection (ex: 5), # returns y: output array sound hN = N/2 # size of positive spectrum hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor x = np.append(np.zeros(hM2),x) # add zeros at beginning to center first window at sample 0 x = np.append(x,np.zeros(hM1)) # add zeros at the end to analyze last sample Ns = 512 # FFT size for synthesis (even) H = Ns/4 # Hop size used for analysis and synthesis hNs = Ns/2 pin = max(hNs, hM1) # init sound pointer in middle of anal window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT yh = np.zeros(Ns) # initialize output sound frame y = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) # initialize synthesis window ow = triang(2*H) # overlapping window sw[hNs-H:hNs+H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] # window for overlap-add hfreqp = [] f0t = 0 f0stable = 0 while pin<pend: #-----analysis----- x1 = x[pin-hM1:pin+hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, hN, t) # detect peak locations iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc/N f0t = UF.f0DetectionTwm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 if ((f0stable==0)&(f0t>0)) \ or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 hfreq, hmag, hphase = UF.harmonicDetection(ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs) # find harmonics hfreqp = hfreq #-----synthesis----- Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs) # generate spec sines fftbuffer = np.real(ifft(Yh)) # inverse FFT yh[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window yh[hNs-1:] = fftbuffer[:hNs+1] y[pin-hNs:pin+hNs] += sw*yh # overlap-add pin += H # advance sound pointer y = np.delete(y, range(hM2)) # delete half of first window which was added in stftAnal y = np.delete(y, range(y.size-hM1, y.size)) # add zeros at the end to analyze last sample return y
def harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope=0.01, minSineDur=0.02): # Analysis of a sound using the sinusoidal harmonic model # x: input sound, fs: sampling rate, w: analysis window, # N: FFT size (minimum 512), t: threshold in negative dB, # nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz, # maxf0: maximim f0 frequency in Hz, # f0et: error threshold in the f0 detection (ex: 5), # harmDevSlope: slope of harmonic deviation # minSineDur: minimum length of harmonics # returns xhfreq, xhmag, xhphase: harmonic frequencies, magnitudes and phases hN = N / 2 # size of positive spectrum hM1 = int(math.floor((w.size + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(w.size / 2)) # half analysis window size by floor x = np.append(np.zeros(hM2), x) # add zeros at beginning to center first window at sample 0 x = np.append(x, np.zeros(hM2)) # add zeros at the end to analyze last sample pin = hM1 # init sound pointer in middle of anal window pend = x.size - hM1 # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT w = w / sum(w) # normalize analysis window hfreqp = [] f0t = 0 f0stable = 0 while pin <= pend: x1 = x[pin - hM1 : pin + hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, hN, t) # detect peak locations iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc / N f0t = UF.f0DetectionTwm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 if ((f0stable == 0) & (f0t > 0)) or ((f0stable > 0) & (np.abs(f0stable - f0t) < f0stable / 5.0)): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 hfreq, hmag, hphase = UF.harmonicDetection( ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs, harmDevSlope ) # find harmonics hfreqp = hfreq if pin == hM1: xhfreq = np.array([hfreq]) xhmag = np.array([hmag]) xhphase = np.array([hphase]) else: xhfreq = np.vstack((xhfreq, np.array([hfreq]))) xhmag = np.vstack((xhmag, np.array([hmag]))) xhphase = np.vstack((xhphase, np.array([hphase]))) pin += H # advance sound pointer xhfreq = UF.cleaningSineTracks(xhfreq, round(fs * minSineDur / H)) return xhfreq, xhmag, xhphase
def hprModel(x, fs, w, N, t, nH, minf0, maxf0, f0et): # Analysis/synthesis of a sound using the harmonic plus residual model # x: input sound, fs: sampling rate, w: analysis window, # N: FFT size (minimum 512), t: threshold in negative dB, # nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz, # maxf0: maximim f0 frequency in Hz, # f0et: error threshold in the f0 detection (ex: 5), # maxhd: max. relative deviation in harmonic detection (ex: .2) # returns y: output sound, yh: harmonic component, xr: residual component hN = N/2 # size of positive spectrum hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor Ns = 512 # FFT size for synthesis (even) H = Ns/4 # Hop size used for analysis and synthesis hNs = Ns/2 pin = max(hNs, hM1) # initialize sound pointer in middle of analysis window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT yhw = np.zeros(Ns) # initialize output sound frame xrw = np.zeros(Ns) # initialize output sound frame yh = np.zeros(x.size) # initialize output array xr = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) ow = triang(2*H) # overlapping window sw[hNs-H:hNs+H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window wr = bh # window for residual sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] hfreqp = [] f0t = 0 f0stable = 0 while pin<pend: #-----analysis----- x1 = x[pin-hM1:pin+hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, hN, t) # find peaks iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc/N f0t = UF.f0DetectionTwm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 if ((f0stable==0)&(f0t>0)) \ or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 hfreq, hmag, hphase = UF.harmonicDetection(ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs) # find harmonics hfreqp = hfreq ri = pin-hNs-1 # input sound pointer for residual analysis xw2 = x[ri:ri+Ns]*wr # window the input sound fftbuffer = np.zeros(Ns) # reset buffer fftbuffer[:hNs] = xw2[hNs:] # zero-phase window in fftbuffer fftbuffer[hNs:] = xw2[:hNs] X2 = fft(fftbuffer) # compute FFT for residual analysis #-----synthesis----- Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs) # generate sines Xr = X2-Yh # get the residual complex spectrum fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Yh)) # inverse FFT of harmonic spectrum yhw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window yhw[hNs-1:] = fftbuffer[:hNs+1] fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Xr)) # inverse FFT of residual spectrum xrw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window xrw[hNs-1:] = fftbuffer[:hNs+1] yh[ri:ri+Ns] += sw*yhw # overlap-add for sines xr[ri:ri+Ns] += sw*xrw # overlap-add for residual pin += H # advance sound pointer y = yh+xr # sum of harmonic and residual components return y, yh, xr
def hpsModel(x, fs, w, N, t, nH, minf0, maxf0, f0et, stocf): # Analysis/synthesis of a sound using the harmonic plus stochastic model # x: input sound, fs: sampling rate, w: analysis window, # N: FFT size (minimum 512), t: threshold in negative dB, # nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz, # maxf0: maximim f0 frequency in Hz, # f0et: error threshold in the f0 detection (ex: 5), # stocf: decimation factor of mag spectrum for stochastic analysis # returns y: output sound, yh: harmonic component, yst: stochastic component hN = N/2 # size of positive spectrum hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor Ns = 512 # FFT size for synthesis (even) H = Ns/4 # Hop size used for analysis and synthesis hNs = Ns/2 pin = max(hNs, hM1) # initialize sound pointer in middle of analysis window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT yhw = np.zeros(Ns) # initialize output sound frame ystw = np.zeros(Ns) # initialize output sound frame yh = np.zeros(x.size) # initialize output array yst = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) ow = triang(2*H) # overlapping window sw[hNs-H:hNs+H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window wr = bh # window for residual sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] # synthesis window for harmonic component sws = H*hanning(Ns)/2 # synthesis window for stochastic hfreqp = [] f0t = 0 f0stable = 0 while pin<pend: #-----analysis----- x1 = x[pin-hM1:pin+hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, hN, t) # find peaks iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc/N # convert peak locations to Hz f0t = UF.f0DetectionTwm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 if ((f0stable==0)&(f0t>0)) \ or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 hfreq, hmag, hphase = UF.harmonicDetection(ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs) # find harmonics hfreqp = hfreq ri = pin-hNs-1 # input sound pointer for residual analysis xw2 = x[ri:ri+Ns]*wr # window the input sound fftbuffer = np.zeros(Ns) # reset buffer fftbuffer[:hNs] = xw2[hNs:] # zero-phase window in fftbuffer fftbuffer[hNs:] = xw2[:hNs] X2 = fft(fftbuffer) # compute FFT for residual analysis #-----synthesis----- Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs) # generate spec sines of harmonic component Xr = X2-Yh; # get the residual complex spectrum mXr = 20 * np.log10(abs(Xr[:hNs])) # magnitude spectrum of residual mXrenv = resample(np.maximum(-200, mXr), mXr.size*stocf) # decimate the magnitude spectrum and avoid -Inf mYst = resample(mXrenv, hNs) # interpolate to original size mYst = 10**(mYst/20) # dB to linear magnitude pYst = 2*np.pi*np.random.rand(hNs) # generate phase random values Yst = np.zeros(Ns, dtype = complex) Yst[:hNs] = mYst * np.exp(1j*pYst) # generate positive freq. Yst[hNs+1:] = mYst[:0:-1] * np.exp(-1j*pYst[:0:-1]) # generate negative freq. fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Yh)) # inverse FFT of harmonic spectrum yhw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window yhw[hNs-1:] = fftbuffer[:hNs+1] fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Yst)) # inverse FFT of stochastic spectrum ystw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window ystw[hNs-1:] = fftbuffer[:hNs+1] yh[ri:ri+Ns] += sw*yhw # overlap-add for sines yst[ri:ri+Ns] += sws*ystw # overlap-add for stochastic pin += H # advance sound pointer y = yh+yst # sum of harmonic and stochastic components return y, yh, yst
minf0 = 420 maxf0 = 460 f0et = 5 minSineDur = .1 harmDevSlope = 0.01 Ns = 512 H = Ns/4 stocf = .2 x1 = x[pos-hM1:pos+hM2] x2 = x[pos-Ns/2-1:pos+Ns/2-1] mX, pX = DFT.dftAnal(x1, w, N) ploc = UF.peakDetection(mX, N/2, t) iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) ipfreq = fs*iploc/N f0 = UF.f0DetectionTwm(ipfreq, ipmag, f0et, minf0, maxf0) hfreqp = [] hfreq, hmag, hphase = UF.harmonicDetection(ipfreq, ipmag, ipphase, f0, nH, hfreqp, fs, harmDevSlope) Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs) mYh = 20 * np.log10(abs(Yh[:Ns/2])) bh=blackmanharris(Ns) X2 = fft(fftshift(x2*bh/sum(bh))) Xr = X2-Yh mXr = 20 * np.log10(abs(Xr[:Ns/2])) mYst = resample(np.maximum(-200, mXr), mXr.size*stocf) # decimate the mag spectrum maxplotfreq = 8000.0 plt.figure(1, figsize=(9, 7)) plt.subplot(2,1,1) binFreq = (fs/2.0)*np.arange(mX.size)/(mX.size)
minf0 = 420 maxf0 = 460 f0et = 5 minSineDur = .1 harmDevSlope = 0.01 Ns = 512 H = Ns / 4 stocf = .2 x1 = x[pos - hM1:pos + hM2] x2 = x[pos - Ns / 2 - 1:pos + Ns / 2 - 1] mX, pX = DFT.dftAnal(x1, w, N) ploc = UF.peakDetection(mX, N / 2, t) iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) ipfreq = fs * iploc / N f0 = UF.f0DetectionTwm(ipfreq, ipmag, f0et, minf0, maxf0) hfreqp = [] hfreq, hmag, hphase = UF.harmonicDetection(ipfreq, ipmag, ipphase, f0, nH, hfreqp, fs, harmDevSlope) Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs) mYh = 20 * np.log10(abs(Yh[:Ns / 2])) bh = blackmanharris(Ns) X2 = fft(fftshift(x2 * bh / sum(bh))) Xr = X2 - Yh mXr = 20 * np.log10(abs(Xr[:Ns / 2])) mYst = resample(np.maximum(-200, mXr), mXr.size * stocf) # decimate the mag spectrum maxplotfreq = 8000.0 plt.figure(1, figsize=(9, 7)) plt.subplot(2, 1, 1)