def sineModelSynth(tfreq, tmag, tphase, N, H, fs): """ Synthesis of a sound using the sinusoidal model tfreq,tmag,tphase: frequencies, magnitudes and phases of sinusoids N: synthesis FFT size, H: hop size, fs: sampling rate returns y: output array sound """ hN = N//2 # half of FFT size for synthesis L = tfreq.shape[0] # number of frames pout = 0 # initialize output sound pointer ysize = H*(L+3) # output sound size y = np.zeros(ysize) # initialize output array sw = np.zeros(N) # initialize synthesis window ow = triang(2*H) # triangular window sw[hN-H:hN+H] = ow # add triangular window bh = blackmanharris(N) # blackmanharris window bh = bh / sum(bh) # normalized blackmanharris window sw[hN-H:hN+H] = sw[hN-H:hN+H]/bh[hN-H:hN+H] # normalized synthesis window lastytfreq = tfreq[0,:] # initialize synthesis frequencies ytphase = 2*np.pi*np.random.rand(tfreq[0,:].size) # initialize synthesis phases for l in range(L): # iterate over all frames if (tphase.size > 0): # if no phases generate them ytphase = tphase[l,:] else: ytphase += (np.pi*(lastytfreq+tfreq[l,:])/fs)*H # propagate phases Y = UF.genSpecSines(tfreq[l,:], tmag[l,:], ytphase, N, fs) # generate sines in the spectrum lastytfreq = tfreq[l,:] # save frequency for phase propagation ytphase = ytphase % (2*np.pi) # make phase inside 2*pi yw = np.real(fftshift(ifft(Y))) # compute inverse FFT y[pout:pout+N] += sw*yw # overlap-add and apply a synthesis window pout += H # advance sound pointer y = np.delete(y, range(hN)) # delete half of first window y = np.delete(y, range(y.size-hN, y.size)) # delete half of the last window return y
def sineModelSynth(tfreq, tmag, tphase, N, H, fs): """ Synthesis of a sound using the sinusoidal model tfreq,tmag,tphase: frequencies, magnitudes and phases of sinusoids N: synthesis FFT size, H: hop size, fs: sampling rate returns y: output array sound """ hN = N/2 # half of FFT size for synthesis L = tfreq.shape[0] # number of frames pout = 0 # initialize output sound pointer ysize = H*(L+3) # output sound size y = np.zeros(ysize) # initialize output array sw = np.zeros(N) # initialize synthesis window ow = triang(2*H) # triangular window sw[hN-H:hN+H] = ow # add triangular window bh = blackmanharris(N) # blackmanharris window bh = bh / sum(bh) # normalized blackmanharris window sw[hN-H:hN+H] = sw[hN-H:hN+H]/bh[hN-H:hN+H] # normalized synthesis window lastytfreq = tfreq[0,:] # initialize synthesis frequencies ytphase = 2*np.pi*np.random.rand(tfreq[0,:].size) # initialize synthesis phases for l in range(L): # iterate over all frames if (tphase.size > 0): # if no phases generate them ytphase = tphase[l,:] else: ytphase += (np.pi*(lastytfreq+tfreq[l,:])/fs)*H # propagate phases Y = UF.genSpecSines(tfreq[l,:], tmag[l,:], ytphase, N, fs) # generate sines in the spectrum lastytfreq = tfreq[l,:] # save frequency for phase propagation ytphase = ytphase % (2*np.pi) # make phase inside 2*pi yw = np.real(fftshift(ifft(Y))) # compute inverse FFT y[pout:pout+N] += sw*yw # overlap-add and apply a synthesis window pout += H # advance sound pointer y = np.delete(y, range(hN)) # delete half of first window y = np.delete(y, range(y.size-hN, y.size)) # delete half of the last window return y
def stochasticResidualAnal(x, N, H, sfreq, smag, sphase, fs, stocf): """ Subtract sinusoids from a sound and approximate the residual with an envelope x: input sound, N: fft size, H: hop-size sfreq, smag, sphase: sinusoidal frequencies, magnitudes and phases fs: sampling rate; stocf: stochastic factor, used in the approximation returns stocEnv: stochastic approximation of residual """ hN = N // 2 # half of fft size x = np.append( np.zeros(hN), x) # add zeros at beginning to center first window at sample 0 x = np.append(x, np.zeros(hN)) # add zeros at the end to analyze last sample bh = blackmanharris(N) # synthesis window w = bh / sum(bh) # normalize synthesis window L = sfreq.shape[0] # number of frames, this works if no sines pin = 0 for l in range(L): xw = x[pin:pin + N] * w # window the input sound X = fft(fftshift(xw)) # compute FFT Yh = UF_C.genSpecSines(N * sfreq[l, :] / fs, smag[l, :], sphase[l, :], N) # generate spec sines Xr = X - Yh # subtract sines from original spectrum mXr = 20 * np.log10(abs(Xr[:hN])) # magnitude spectrum of residual mXrenv = resample(np.maximum(-200, mXr), mXr.size * stocf) # decimate the mag spectrum if l == 0: # if first frame stocEnv = np.array([mXrenv]) else: # rest of frames stocEnv = np.vstack((stocEnv, np.array([mXrenv]))) pin += H # advance sound pointer return stocEnv
def sinewaveSynth(freq, mag, N, H, fs): # Synthesis of a time-varying sinusoid # freq,mag, phase: frequency, magnitude and phase of sinusoid, # N: synthesis FFT size, H: hop size, fs: sampling rate # returns y: output array sound hN = N/2 # half of FFT size for synthesis L = freq.size # number of frames pout = 0 # initialize output sound pointer ysize = H*(L+3) # output sound size y = np.zeros(ysize) # initialize output array sw = np.zeros(N) # initialize synthesis window ow = triang(2*H); # triangular window sw[hN-H:hN+H] = ow # add triangular window bh = blackmanharris(N) # blackmanharris window bh = bh / sum(bh) # normalized blackmanharris window sw[hN-H:hN+H] = sw[hN-H:hN+H]/bh[hN-H:hN+H] # normalized synthesis window lastfreq = freq[0] # initialize synthesis frequencies phase = 0 # initialize synthesis phases for l in range(L): # iterate over all frames phase += (np.pi*(lastfreq+freq[l])/fs)*H # propagate phases Y = UF.genSpecSines(freq[l], mag[l], phase, N, fs) # generate sines in the spectrum lastfreq = freq[l] # save frequency for phase propagation yw = np.real(fftshift(ifft(Y))) # compute inverse FFT y[pout:pout+N] += sw*yw # overlap-add and apply a synthesis window pout += H # advance sound pointer y = np.delete(y, range(hN)) # delete half of first window y = np.delete(y, range(y.size-hN, y.size)) # delete half of the last window return y
def harmonicModel(x, fs, w, N, t, nH, minf0, maxf0, f0et): """ Analysis/synthesis of a sound using the sinusoidal harmonic model x: input sound, fs: sampling rate, w: analysis window, N: FFT size (minimum 512), t: threshold in negative dB, nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz, maxf0: maximim f0 frequency in Hz, f0et: error threshold in the f0 detection (ex: 5), returns y: output array sound """ hN = N/2 # size of positive spectrum hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor x = np.append(np.zeros(hM2),x) # add zeros at beginning to center first window at sample 0 x = np.append(x,np.zeros(hM1)) # add zeros at the end to analyze last sample Ns = 512 # FFT size for synthesis (even) H = Ns/4 # Hop size used for analysis and synthesis hNs = Ns/2 pin = max(hNs, hM1) # init sound pointer in middle of anal window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT yh = np.zeros(Ns) # initialize output sound frame y = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) # initialize synthesis window ow = triang(2*H) # overlapping window sw[hNs-H:hNs+H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] # window for overlap-add hfreqp = [] f0t = 0 f0stable = 0 while pin<pend: #-----analysis----- x1 = x[pin-hM1:pin+hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # detect peak locations iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc/N f0t = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 if ((f0stable==0)&(f0t>0)) \ or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 hfreq, hmag, hphase = harmonicDetection(ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs) # find harmonics hfreqp = hfreq #-----synthesis----- Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs) # generate spec sines fftbuffer = np.real(ifft(Yh)) # inverse FFT yh[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window yh[hNs-1:] = fftbuffer[:hNs+1] y[pin-hNs:pin+hNs] += sw*yh # overlap-add pin += H # advance sound pointer y = np.delete(y, range(hM2)) # delete half of first window which was added in stftAnal y = np.delete(y, range(y.size-hM1, y.size)) # add zeros at the end to analyze last sample return y
def harmonicModel(x, fs, w, N, t, nH, minf0, maxf0, f0et): """ Analysis/synthesis of a sound using the sinusoidal harmonic model x: input sound, fs: sampling rate, w: analysis window, N: FFT size (minimum 512), t: threshold in negative dB, nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz, maxf0: maximim f0 frequency in Hz, f0et: error threshold in the f0 detection (ex: 5), returns y: output array sound """ hN = N/2 # size of positive spectrum hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor x = np.append(np.zeros(hM2),x) # add zeros at beginning to center first window at sample 0 x = np.append(x,np.zeros(hM1)) # add zeros at the end to analyze last sample Ns = 512 # FFT size for synthesis (even) H = Ns/4 # Hop size used for analysis and synthesis hNs = Ns/2 pin = max(hNs, hM1) # init sound pointer in middle of anal window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT yh = np.zeros(Ns) # initialize output sound frame y = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) # initialize synthesis window ow = triang(2*H) # overlapping window sw[int(hNs-H):int(hNs+H)] = int(ow) bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window sw[int(hNs-H):int(hNs+H)] = sw[int(hNs-H):int(hNs+H)] / bh[int(hNs-H):int(hNs+H)] # window for overlap-add hfreqp = [] f0t = 0 f0stable = 0 while pin<pend: #-----analysis----- x1 = x[pin-hM1:pin+hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # detect peak locations iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc/N f0t = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 if ((f0stable==0)&(f0t>0)) \ or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 hfreq, hmag, hphase = harmonicDetection(ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs) # find harmonics hfreqp = hfreq #-----synthesis----- Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs) # generate spec sines fftbuffer = np.real(ifft(Yh)) # inverse FFT yh[:int(hNs-1)] = fftbuffer[int(hNs+1):] # undo zero-phase window yh[int(hNs-1):] = fftbuffer[:int(hNs+1)] y[pin-hNs:pin+hNs] += sw*yh # overlap-add pin += H # advance sound pointer y = np.delete(y, range(hM2)) # delete half of first window which was added in stftAnal y = np.delete(y, range(y.size-hM1, y.size)) # add zeros at the end to analyze last sample return y
def sprModel(x, fs, w, N, t): """ Analysis/synthesis of a sound using the sinusoidal plus residual model, one frame at a time x: input sound, fs: sampling rate, w: analysis window, N: FFT size (minimum 512), t: threshold in negative dB, returns y: output sound, ys: sinusoidal component, xr: residual component """ hN = N/2 # size of positive spectrum hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor Ns = 512 # FFT size for synthesis (even) H = Ns/4 # Hop size used for analysis and synthesis hNs = Ns/2 pin = max(hNs, hM1) # initialize sound pointer in middle of analysis window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT ysw = np.zeros(Ns) # initialize output sound frame xrw = np.zeros(Ns) # initialize output sound frame ys = np.zeros(x.size) # initialize output array xr = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) ow = triang(2*H) # overlapping window sw[hNs-H:hNs+H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window wr = bh # window for residual sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] while pin<pend: #-----analysis----- x1 = x[pin-hM1:pin+hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # find peaks iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs*iploc/float(N) # convert peak locations to Hertz ri = pin-hNs-1 # input sound pointer for residual analysis xw2 = x[ri:ri+Ns]*wr # window the input sound fftbuffer = np.zeros(Ns) # reset buffer fftbuffer[:hNs] = xw2[hNs:] # zero-phase window in fftbuffer fftbuffer[hNs:] = xw2[:hNs] X2 = fft(fftbuffer) # compute FFT for residual analysis #-----synthesis----- Ys = UF.genSpecSines(ipfreq, ipmag, ipphase, Ns, fs) # generate spec of sinusoidal component Xr = X2-Ys; # get the residual complex spectrum fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Ys)) # inverse FFT of sinusoidal spectrum ysw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window ysw[hNs-1:] = fftbuffer[:hNs+1] fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Xr)) # inverse FFT of residual spectrum xrw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window xrw[hNs-1:] = fftbuffer[:hNs+1] ys[ri:ri+Ns] += sw*ysw # overlap-add for sines xr[ri:ri+Ns] += sw*xrw # overlap-add for residual pin += H # advance sound pointer y = ys+xr # sum of sinusoidal and residual components return y, ys, xr
def genSpecSines(ipfreq, ipmag, ipphase, N, fs): """ Generate a spectrum from a series of sine values, calling a C function ipfreq, ipmag, ipphase: sine peaks frequencies, magnitudes and phases N: size of the complex spectrum to generate; fs: sampling frequency returns Y: generated complex spectrum of sines """ Y = UF_C.genSpecSines(N * ipfreq / float(fs), ipmag, ipphase, N) return Y
def popMode(mX:np.ndarray, pX:np.ndarray=None, N:int=4096, H:int=1024, fs:int=44100, debug:int=0)->tuple: """ Returns the extracted parameters of first partial found in the spectrogram, than removes it from mX. - mX: the input spectrogram in linear scale and with dftFrames as rows. - pX: phase spectrum, 0 angle phase if None. - N: fftsize - H: hopsample - fs: frequency sample - debug: shows the spectrograms plot """ if pX is None: pX = np.full(mX.shape, 0) # phases are 0 if there is no pX in input seam = findVerticalSeam(mX) argPeakMax = np.argmax([mX[i][j] for i, j in seam]) decay, short_seam = backward_integration(seam, mX) if not decay: return (0,)*4 if debug: # shows the seam on the spectrogram tmX = np.full(mX.shape, np.nan) # mX like matrix only for plot purpose for i, j in short_seam: tmX[i][j] = 1.0 TS.stft_plot(mX, N, H, fs, show=False) TS.stft_plot(tmX, N, H, fs, show=True, mask=True) freq_bin, freq_mag, freq_phase = UF.peakInterp(mX[argPeakMax],pX[argPeakMax],seam[argPeakMax][1]) for i, j in short_seam: if mX[i][j]<1e-05: continue f_bin, f_mag, f_phase = UF.peakInterp(mX[i],pX[i],j) if f_mag < 1e-05: f_mag = 1e-05 f_mag = 20 * np.log10(f_mag) + 2 # convert magnitude to dB (genSpecSines expect dB magnitudes) f_bin = f_bin * fs / N # convert y or j-th column to frequency y = UF.genSpecSines(f_bin, f_mag, pX[i][j], N, 44100) # generate a spectrum frame for the input frequencies and phases y = abs(y)[:N//2 + 1] # positive half of the spectrum mX[i] = mX[i] - y # subtract the generated frequencies mX[i][mX[i] <= 1e-06] = 1e-06 # giving 10**(-120dB/20) to each element <=0 in linear scale if debug == 1: TS.stft_plot(mX, N, H, fs, show=True) return 20*np.log10(freq_mag), decay * H / fs , freq_bin * fs / N, freq_phase # convert and returns all values
def hpsModelSynth(hfreq, hmag, hphase, mYst, N, H, fs): # Synthesis of a sound using the harmonic plus stochastic model # hfreq: harmonic frequencies, hmag:harmonic amplitudes, mYst: stochastic envelope # Ns: synthesis FFT size, H: hop size, fs: sampling rate # y: output sound, yh: harmonic component, yst: stochastic component hN = N/2 # half of FFT size for synthesis L = hfreq[:,0].size # number of frames nH = hfreq[0,:].size # number of harmonics pout = 0 # initialize output sound pointer ysize = H*(L+4) # output sound size yhw = np.zeros(N) # initialize output sound frame ysw = np.zeros(N) # initialize output sound frame yh = np.zeros(ysize) # initialize output array yst = np.zeros(ysize) # initialize output array sw = np.zeros(N) ow = triang(2*H) # overlapping window sw[hN-H:hN+H] = ow bh = blackmanharris(N) # synthesis window bh = bh / sum(bh) # normalize synthesis window wr = bh # window for residual sw[hN-H:hN+H] = sw[hN-H:hN+H] / bh[hN-H:hN+H] # synthesis window for harmonic component sws = H*hanning(N)/2 # synthesis window for stochastic component lastyhfreq = hfreq[0,:] # initialize synthesis harmonic frequencies yhphase = 2*np.pi*np.random.rand(nH) # initialize synthesis harmonic phases for l in range(L): yhfreq = hfreq[l,:] # synthesis harmonics frequencies yhmag = hmag[l,:] # synthesis harmonic amplitudes mYrenv = mYst[l,:] # synthesis residual envelope if (hphase.size > 0): yhphase = hphase[l,:] else: yhphase += (np.pi*(lastyhfreq+yhfreq)/fs)*H # propagate phases lastyhfreq = yhfreq Yh = UF.genSpecSines(yhfreq, yhmag, yhphase, N, fs) # generate spec sines mYs = resample(mYrenv, hN) # interpolate to original size mYs = 10**(mYs/20) # dB to linear magnitude pYs = 2*np.pi*np.random.rand(hN) # generate phase random values Ys = np.zeros(N, dtype = complex) Ys[:hN] = mYs * np.exp(1j*pYs) # generate positive freq. Ys[hN+1:] = mYs[:0:-1] * np.exp(-1j*pYs[:0:-1]) # generate negative freq. fftbuffer = np.zeros(N) fftbuffer = np.real(ifft(Yh)) # inverse FFT of harm spectrum yhw[:hN-1] = fftbuffer[hN+1:] # undo zer-phase window yhw[hN-1:] = fftbuffer[:hN+1] fftbuffer = np.zeros(N) fftbuffer = np.real(ifft(Ys)) # inverse FFT of stochastic approximation spectrum ysw[:hN-1] = fftbuffer[hN+1:] # undo zero-phase window ysw[hN-1:] = fftbuffer[:hN+1] yh[pout:pout+N] += sw*yhw # overlap-add for sines yst[pout:pout+N] += sws*ysw # overlap-add for stoch pout += H # advance sound pointer y = yh+yst # sum harmonic and stochastic components return y, yh, yst
def sinesynth(freqs: np.ndarray, mag: int = 0, N: int = 4096, H: int = 1024, T: float = 1.0, fs: int = 44100) -> tuple: """ Create a sound spectrum gived its frequencies and fft parameters :param freqs: numpy array of frequencies :param mag: magnitude in dB, each dftFrame will substract 1/e to this value :param N: fftsize :param H: hopsize :param T: duration of the sound in seconds :param fs: frequency sample :return: magnitude spectrogram of the created sound using UF.genSpecSines() from SMS-tools """ tol = 1e-14 # threshold used to compute phase numFrames = int(T * fs / H) # number of dftFrames in the output spectrogram alpha = 20 * np.log10((1 / np.e)) / ( fs / H) * 2.05 # the quantity of decrement (in dB) in each dftFrame hN = N // 2 + 1 # size of positive spectrum xmX = [] xpX = [] n_freq = freqs.shape[0] phases = np.array( [0] * n_freq) # phase for every sine, assuming it is constant (0 to all) for i in range(numFrames): magnitudes = np.array([mag] * n_freq) # magnitude array in dB X = UF.genSpecSines( freqs, magnitudes, phases, N, fs) # using only the fftsize N, suppose window size W = N absX = abs(X[:hN]) # compute absolute value of positive side absX[absX < 1e-06] = 1e-06 # handle log, you can use also np.finfo(float).eps mX = 20 * np.log10( absX) # magnitude spectrum of positive frequencies in dB X[:hN].real[ np.abs(X[:hN].real) < tol] = 0.0 # for phase calculation set to 0 the small values X[:hN].imag[ np.abs(X[:hN].imag) < tol] = 0.0 # for phase calculation set to 0 the small values pX = np.unwrap(np.angle( X[:hN])) # unwrapped phase spectrum of positive frequencies xmX.append(np.array(mX)) # append output to list xpX.append(np.array(pX)) mag += alpha xmX = np.array(xmX) xpX = np.array(xpX) return xmX, xpX
def sineModelMultiRes(x, fs, wList, NList, t, BList): """ Analysis/synthesis of a sound using the sinusoidal model, without sine tracking x: input array sound, w: analysis window, N: size of complex spectrum, t: threshold in negative dB returns y: output array sound """ #-----synthesis params init----- Ns = 512 # FFT size for synthesis (even) H = Ns/4 # Hop size used for analysis and synthesis hNs = Ns/2 # half of synthesis FFT size yw = np.zeros(Ns) # initialize output sound frame y = np.zeros(x.size) # initialize output array sw = np.zeros(Ns) # initialize synthesis window ow = triang(2*H) # triangular window sw[hNs-H:hNs+H] = ow # add triangular window bh = blackmanharris(Ns) # blackmanharris window bh = bh / sum(bh) # normalized blackmanharris window sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] # normalized synthesis window for i in range(3): #-----analysis params init----- w = wList[i] N = NList[i] Bmin = BList[i][0] Bmax = BList[i][1] hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor pin = max(hNs, hM1) # init sound pointer in middle of anal window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT w = w / sum(w) # normalize analysis window while pin<pend: # while input sound pointer is within sound #-----analysis----- x1 = x[pin-hM1:pin+hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # detect locations of peaks iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values by interpolation ipfreq = fs*iploc/float(N) # convert peak locations to Hertz ipmag = ipmag[np.logical_and(ipfreq>=Bmin, ipfreq<Bmax)] ipphase = ipphase[np.logical_and(ipfreq>=Bmin, ipfreq<Bmax)] ipfreq = ipfreq[np.logical_and(ipfreq>=Bmin, ipfreq<Bmax)] #-----synthesis----- Y = UF.genSpecSines(ipfreq, ipmag, ipphase, Ns, fs) # generate sines in the spectrum fftbuffer = np.real(ifft(Y)) # compute inverse FFT yw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window yw[hNs-1:] = fftbuffer[:hNs+1] y[pin-hNs:pin+hNs] += sw*yw # overlap-add and apply a synthesis window pin += H # advance sound pointer return y
def synth_rs(xr, f, m, p, recType, outDir, name, N=4096, H=128, fs=44100): hN = N // 2 # half of FFT size for synthesis L = f.shape[0] # number of frames pout = 0 # initialize output sound pointer ysize = H * (L + 3) # output sound size y = np.zeros(ysize) # initialize output array sw = np.zeros(N) # initialize synthesis window ow = triang(2 * H) # triangular window sw[hN - H:hN + H] = ow # add triangular window bh = blackmanharris(N) # blackmanharris window bh = bh / sum(bh) # normalized blackmanharris window sw[hN - H:hN + H] = sw[hN - H:hN + H] / bh[hN - H:hN + H] # normalized synthesis window lastytfreq = f[0, :] # initialize synthesis frequencies ytphase = 2 * np.pi * np.random.rand( f[0, :].size) # initialize synthesis phases err = int(163200 / H) # Frame of error for l in range(L): if pout > y.shape[0] - N: # break at penultimate frame break if (p.size > 0): # if no phases generate them ytphase = p[l, :] else: ytphase += (np.pi * (lastytfreq + f[l, :]) / fs) * H # propagate phases Y = UF.genSpecSines(f[l, :], m[l, :], ytphase, N, fs) # generate sines in the spectrum Y[np.isnan(Y)] = 0 lastytfreq = f[l, :] # save frequency for phase propagation yw = np.real(fftshift(ifft(Y))) # compute inverse FFT y[pout:pout + N] += sw * yw # overlap-add and apply a synthesis window pout += H # advance sound pointer y = np.delete(y, range(hN)) # delete half of first window y = np.delete(y, range(y.size - hN, y.size)) # delete half of the last window Sy = y.shape[0] Sxr = xr.shape[0] if Sy > Sxr: y = y[Sxr - Sy:] elif Sy < Sxr: xr = xr[Sxr - Sy:] yrs = y + xr os.chdir('/home/tgoodall/sms-tools/software/models/Overtone_Arrays/' + recType + '/' + outDir) outputFile = name + '.wav' UF.wavwrite(yrs, fs, outputFile) return yrs
def sineModelMultiRes(x, fs, w, N, t, B): """ Analysis/synthesis of a sound using the sinusoidal model, without sine tracking x: input array sound, w: array of analysis windows, N: array of sizes of complex spectrum, t: threshold in negative dB, B: array of frequency bands returns y: output array sound """ hM1 = [int(math.floor((_w.size + 1) / 2)) for _w in w] # half analysis window(s) size by rounding hM2 = [int(math.floor(_w.size / 2)) for _w in w] # half analysis window(s) size by floor Ns = 512 # FFT size for synthesis (even) H = Ns / 4 # Hop size used for analysis and synthesis hNs = Ns / 2 # half of synthesis FFT size pin = max(hNs, max(hM1)) # init sound pointer in middle of anal window pend = x.size - max(hNs, max(hM1)) # last sample to start a frame fftbuffer = np.array([]) # initialize buffer for FFT yw = np.zeros(Ns) # initialize output sound frame y = np.zeros(x.size) # initialize output array w = [_w / sum(_w) for _w in w] # normalize analysis window(s) sw = np.zeros(Ns) # initialize synthesis window ow = triang(2 * H) # triangular window sw[hNs - H : hNs + H] = ow # add triangular window bh = blackmanharris(Ns) # blackmanharris window bh = bh / sum(bh) # normalized blackmanharris window sw[hNs - H : hNs + H] = sw[hNs - H : hNs + H] / bh[hNs - H : hNs + H] # normalized synthesis window while pin < pend: # while input sound pointer is within sound # -----analysis----- ipmag = ipphase = ipfreq = np.array([]) # initialize the synthesis arrays for i in range(0, len(w)): # for each window, use some loop variables ('_' prefix) _hM1, _hM2, _w, _N, _B = (hM1[i], hM2[i], w[i], N[i], B[i]) x1 = x[pin - _hM1 : pin + _hM2] # select frame mX, pX = DFT.dftAnal(x1, _w, _N) # compute dft ploc = UF.peakDetection(mX, t) # detect locations of peaks iploc, _ipmag, _ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values by interpolation _ipfreq = fs * iploc / float(_N) # convert peak locations to Hertz lo, hi = (_B[0], _B[1]) # low/high from band tuples [..(lo, hi)..] mask = (_ipfreq >= lo) * (_ipfreq < hi) # mask for in-band components ipmag = np.append(ipmag, _ipmag * mask) # mask and append components ipphase = np.append(ipphase, _ipphase * mask) ipfreq = np.append(ipfreq, _ipfreq * mask) # -----synthesis----- Y = UF.genSpecSines(ipfreq, ipmag, ipphase, Ns, fs) # generate sines in the spectrum fftbuffer = np.real(ifft(Y)) # compute inverse FFT yw[: hNs - 1] = fftbuffer[hNs + 1 :] # undo zero-phase window yw[hNs - 1 :] = fftbuffer[: hNs + 1] y[pin - hNs : pin + hNs] += sw * yw # overlap-add and apply a synthesis window pin += H # advance sound pointer return y
def sineModel(x, fs, w, N, t): """ Analysis/synthesis of a sound using the sinusoidal model, without sine tracking x: input array sound, w: analysis window, N: size of complex spectrum, t: threshold in negative dB returns y: output array sound """ hN = N / 2 # size of positive spectrum hM1 = int(math.floor( (w.size + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(w.size / 2)) # half analysis window size by floor Ns = 512 # FFT size for synthesis (even) H = Ns / 4 # Hop size used for analysis and synthesis hNs = Ns / 2 # half of synthesis FFT size pin = max(hNs, hM1) # init sound pointer in middle of anal window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT yw = np.zeros(Ns) # initialize output sound frame y = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) # initialize synthesis window ow = triang(2 * H) # triangular window sw[hNs - H:hNs + H] = ow # add triangular window bh = blackmanharris(Ns) # blackmanharris window bh = bh / sum(bh) # normalized blackmanharris window sw[hNs - H:hNs + H] = sw[hNs - H:hNs + H] / bh[hNs - H:hNs + H] # normalized synthesis window while pin < pend: # while input sound pointer is within sound #-----analysis----- x1 = x[pin - hM1:pin + hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # detect locations of peaks pmag = mX[ploc] # get the magnitude of the peaks iploc, ipmag, ipphase = UF.peakInterp( mX, pX, ploc) # refine peak values by interpolation ipfreq = fs * iploc / float(N) # convert peak locations to Hertz #-----synthesis----- Y = UF.genSpecSines(ipfreq, ipmag, ipphase, Ns, fs) # generate sines in the spectrum fftbuffer = np.real(ifft(Y)) # compute inverse FFT yw[:hNs - 1] = fftbuffer[hNs + 1:] # undo zero-phase window yw[hNs - 1:] = fftbuffer[:hNs + 1] y[pin - hNs:pin + hNs] += sw * yw # overlap-add and apply a synthesis window pin += H # advance sound pointer return y
def sineModel(x, fs, w, N, t): # Analysis/synthesis of a sound using the sinusoidal model # x: input array sound, w: analysis window, N: size of complex spectrum, # t: threshold in negative dB # returns y: output array sound hN = N/2 # size of positive spectrum hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor Ns = 512 # FFT size for synthesis (even) H = Ns/4 # Hop size used for analysis and synthesis hNs = Ns/2 # half of synthesis FFT size pin = max(hNs, hM1) # init sound pointer in middle of anal window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT yw = np.zeros(Ns) # initialize output sound frame y = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) # initialize synthesis window ow = triang(2*H); # triangular window sw[hNs-H:hNs+H] = ow # add triangular window bh = blackmanharris(Ns) # blackmanharris window bh = bh / sum(bh) # normalized blackmanharris window sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] # normalized synthesis window while pin<pend: # while input sound pointer is within sound #-----analysis----- x1 = x[pin-hM1:pin+hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, hN, t) # detect locations of peaks pmag = mX[ploc] # get the magnitude of the peaks iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values by interpolation #-----synthesis----- plocs = iploc*Ns/N # adapt peak locations to size of synthesis FFT Y = UF.genSpecSines(fs*plocs/N, ipmag, ipphase, Ns, fs) # generate sines in the spectrum fftbuffer = np.real(ifft(Y)) # compute inverse FFT yw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window yw[hNs-1:] = fftbuffer[:hNs+1] y[pin-hNs:pin+hNs] += sw*yw # overlap-add and apply a synthesis window pin += H # advance sound pointer return y
def sineSubtraction(x, N, H, sfreq, smag, sphase, fs): """ Subtract sinusoids from a sound x: input sound, N: fft-size, H: hop-size sfreq, smag, sphase: sinusoidal frequencies, magnitudes and phases returns xr: residual sound """ hN = N // 2 # half of fft size x = np.append( np.zeros(hN), x) # add zeros at beginning to center first window at sample 0 x = np.append(x, np.zeros(hN)) # add zeros at the end to analyze last sample bh = blackmanharris(N) # blackman harris window w = bh / sum(bh) # normalize window sw = np.zeros(N) # initialize synthesis window sw[hN - H:hN + H] = triang(2 * H) / w[hN - H:hN + H] # synthesis window L = sfreq.shape[0] # number of frames, this works if no sines xr = np.zeros(x.size) # initialize output array pin = 0 for l in range(L): xw = x[pin:pin + N] * w # window the input sound X = fft(fftshift(xw)) # compute FFT Yh = UF_C.genSpecSines(N * sfreq[l, :] / fs, smag[l, :], sphase[l, :], N) # generate spec sines Xr = X - Yh # subtract sines from original spectrum xrw = np.real(fftshift(ifft(Xr))) # inverse FFT xr[pin:pin + N] += xrw * sw # overlap-add pin += H # advance sound pointer xr = np.delete( xr, range(hN)) # delete half of first window which was added in stftAnal xr = np.delete(xr, range( xr.size - hN, xr.size)) # delete half of last window which was added in stftAnal return xr
sw = np.zeros(N) ow = triang(2 * H) sw[hN-H:hN+H] = ow bh = blackmanharris(N) bh = bh / sum(bh) sw[hN-H:hN+H] = sw[hN-H:hN+H] / bh[hN-H:hN+H] lastytfreq = tfreq[0,:] ytphase = 2*np.pi*np.random.rand(tfreq[0,:].size) for l in range(L): # iterate over all frames if (tphase.size > 0): ytphase = tphase[l, :] else: # propogate phases ytphase += (np.pi * (lastytfreq + tfreq[l, :])/fs) * H Y = UF.genSpecSines(tfreq[l,:], tmag[l, :], ytphase, N, fs) lastytfreq = tfreq[l,:] # save frequency for phase propogation ytphase = ytphase % (2*np.pi) yw = np.real(fftshift(ifft(Y))) # overlap add and apply a synthesis window y[pout:pout+N] += sw * yw pout += H # delete half of first window y = np.delete(y, range(hN)) # delete half of last window y = np.delete(y, range(y.size-hN, y.size)) return y inputFile = '../../sounds/oboe-A4.wav' window = 'hamming'
def sineModelMultiRes(x, fs, multi_w, multi_N, t, multi_B): """ Analysis/synthesis of a sound using the sinusoidal model, without sine tracking x: input array sound, w: analysis window, N: size of complex spectrum, t: threshold in negative dB returns y: output array sound """ bands = range(len(multi_B)) # to iterate over bands N = max(multi_N) multi_w_size = np.array([multi_w[i].size for i in bands]) multi_hM1 = np.floor((multi_w_size + 1)/2.0).astype(int) # half analysis window size by rounding multi_hM2 = np.floor(multi_w_size / 2.0).astype(int) # half analysis window size by floor Ns = 512 # FFT size for synthesis (even) H = Ns/4 # Hop size used for analysis and synthesis hNs = Ns/2 # half of synthesis FFT size multi_pin = np.maximum(hNs, multi_hM1) # init sound pointer in middle of anal window multi_pend = x.size - multi_pin # last sample to start a frame fftbuffer_combined = np.zeros(N) yw_combined = np.zeros(Ns) # initialize output sound frame y_combined = np.zeros(x.size) # initialize output array multi_w = [multi_w[i] / sum(multi_w[i]) for i in bands] # normalize analysis window sw = np.zeros(Ns) # initialize synthesis window ow = triang(2*H) # triangular window sw[hNs-H:hNs+H] = ow # add triangular window bh = blackmanharris(Ns) # blackmanharris window bh = bh / sum(bh) # normalized blackmanharris window sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] # normalized synthesis window while (multi_pin<multi_pend).all(): # while input sound pointer is within sound #-----analysis----- multi_x1 = [x[(multi_pin[i] - multi_hM1[i]) : (multi_pin[i] + multi_hM2[i])] for i in bands] # select frame multi_mX = [] multi_pX = [] for i in bands: mXi, pXi = DFT.dftAnal(multi_x1[i], multi_w[i], multi_N[i]) multi_mX.append(mXi) multi_pX.append(pXi) multi_ploc = [] for i in bands: ploci = UF.peakDetection(multi_mX[i], t) # detect locations of peaks multi_ploc.append(ploci) multi_ipmag = [] multi_ipphase = [] multi_ipfreq = [] for i in bands: iploci, ipmagi, ipphasei = UF.peakInterp(multi_mX[i], multi_pX[i], multi_ploc[i]) # refine peak values by interpolation ipfreqi = fs*iploci/float(multi_N[i]) # convert peak locations to Hertz multi_ipmag.append(ipmagi) multi_ipphase.append(ipphasei) multi_ipfreq.append(ipfreqi) # count first for array allocation num_ip = 0 for i in bands: for p in range(len(multi_ipfreq[i])): f = multi_ipfreq[i][p] if (i == 0 or f >= multi_B[i-1]) and f < multi_B[i]: num_ip += 1 ipfreq_combined = np.zeros(num_ip) ipmag_combined = np.zeros(num_ip) ipphase_combined = np.zeros(num_ip) ip = 0 for i in bands: for p in range(len(multi_ipfreq[i])): f = multi_ipfreq[i][p] if (i == 0 or f >= multi_B[i-1]) and f < multi_B[i]: ipfreq_combined[ip] = f ipmag_combined[ip] = multi_ipmag[i][p] ipphase_combined[ip] = multi_ipphase[i][p] ip += 1 #-----synthesis----- Y_combined = UF.genSpecSines(ipfreq_combined, ipmag_combined, ipphase_combined, Ns, fs) # generate sines in the spectrum fftbuffer_combined = np.real(ifft(Y_combined)) # compute inverse FFT yw_combined[:hNs-1] = fftbuffer_combined[hNs+1:] # undo zero-phase window yw_combined[hNs-1:] = fftbuffer_combined[:hNs+1] y_combined[multi_pin[0]-hNs:multi_pin[0]+hNs] += sw*yw_combined # overlap-add and apply a synthesis window multi_pin += H return y_combined
import stft as STFT import sineModel as SM import utilFunctions as UF Ns = 256 hNs = Ns//2 yw = np.zeros(Ns) fs = 44100 freqs = np.array([1000.0, 4000.0, 8000.0]) amps = np.array([.6, .4, .6]) phases = ([0.5, 1.2, 2.3]) yploc = Ns*freqs/fs ypmag = 20*np.log10(amps/2.0) ypphase = phases Y = UF.genSpecSines(freqs, ypmag, ypphase, Ns, fs) mY = 20*np.log10(abs(Y[:hNs])) pY = np.unwrap(np.angle(Y[:hNs])) y= fftshift(ifft(Y))*sum(blackmanharris(Ns)) plt.figure(1, figsize=(9, 5)) plt.subplot(3,1,1) plt.plot(fs*np.arange(Ns/2)/Ns, mY, 'r', lw=1.5) plt.axis([0, fs/2.0,-100,0]) plt.title("mY, freqs (Hz) = 1000, 4000, 8000; amps = .6, .4, .6") plt.subplot(3,1,2) pY[pY==0]= np.nan plt.plot(fs*np.arange(Ns/2)/Ns, pY, 'c', lw=1.5) plt.axis([0, fs/2.0,-.01,3.0]) plt.title("pY, phases (radians) = .5, 1.2, 2.3")
def sineModelMultiRes(x, fs, w1, w2, w3, N1, N2, N3, t, B1, B2, B3): """ Analysis/synthesis of a sound using the multi resolution sinusoidal model, without sine tracking x: input array sound, w1, w2 & w3: analysis window, N1, N2, & N3: size of complex spectrum, t: threshold in negative dB B1, B2, & B3: different bandwith for given windows returns y: output array sound """ import dftModel as DFT import utilFunctions as UF # sms-tool https://github.com/MTG/sms-tools import numpy as np w = [w1, w2, w3] # build the arrays for loop N = [N1, N2, N3] plocinic = [0, np.floor(B1 * N2 / fs), np.floor(B2 * N3 / fs)] #ploc inicial for all B plocfin = [ np.ceil(B1 * N1 / fs), np.ceil(B2 * N2 / fs), np.ceil(B3 * N3 / fs) ] #ploc final for all B signal = np.zeros(len(x)) # build the output signal for i in range(3): hM1 = int(math.floor( (w[i].size + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(w[i].size / 2)) # half analysis window size by floor Ns = N[i] # FFT size for synthesis (even) H = Ns // 4 # Hop size used for analysis and synthesis hNs = Ns // 2 # half of synthesis FFT size pin = max(hNs, hM1) # init sound pointer in middle of anal window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N[i]) # initialize buffer for FFT yw = np.zeros(Ns) # initialize output sound frame y = np.zeros(x.size) # initialize output array w[i] = w[i] / sum(w[i]) # normalize analysis window sw = np.zeros(Ns) # initialize synthesis window ow = triang(2 * H) # triangular window sw[hNs - H:hNs + H] = ow # add triangular windows bh = blackmanharris(Ns) # blackmanharris window bh = bh / sum(bh) # normalized blackmanharris window sw[hNs - H:hNs + H] = sw[hNs - H:hNs + H] / bh[hNs - H:hNs + H] # normalized synthesis window while pin < pend: # while input sound pointer is within sound # -----analysis----- x1 = x[pin - hM1:pin + hM2] # select frame mX, pX = DFT.dftAnal(x1, w[i], N[i]) # compute dft ploc = UF.peakDetection(mX, t) # detect locations of peaks ploc = ploc[(ploc >= plocinic[i]) & (ploc <= plocfin[i])] # filter ploc's out of range B iploc, ipmag, ipphase = UF.peakInterp( mX, pX, ploc) # refine peak values by interpolation ipfreq = fs * iploc / float(N[i]) # -----synthesis----- Y = UF.genSpecSines(ipfreq, ipmag, ipphase, Ns, fs) # generate sines in the spectrum fftbuffer = np.real(ifft(Y)) # compute inverse FFT yw[:hNs - 1] = fftbuffer[hNs + 1:] # undo zero-phase window yw[hNs - 1:] = fftbuffer[:hNs + 1] y[pin - hNs:pin + hNs] += sw * yw # overlap-add and apply a synthesis window pin += H # advance sound pointer signal = signal + y # sum of signals at different bandwith return signal
maxnpeaksTwm = 5 minSineDur = .1 harmDevSlope = 0.01 Ns = 512 H = Ns//4 x1 = x[pos-hM1:pos+hM2] x2 = x[pos-Ns//2-1:pos+Ns//2-1] mX, pX = DFT.dftAnal(x1, w, N) ploc = UF.peakDetection(mX, t) iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) ipfreq = fs*iploc/N f0 = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0) hfreqp = [] hfreq, hmag, hphase = HM.harmonicDetection(ipfreq, ipmag, ipphase, f0, nH, hfreqp, fs, harmDevSlope) Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs) mYh = 20 * np.log10(abs(Yh[:Ns//2])) pYh = np.unwrap(np.angle(Yh[:Ns//2])) bh=blackmanharris(Ns) X2 = fft(fftshift(x2*bh/sum(bh))) Xr = X2-Yh mXr = 20 * np.log10(abs(Xr[:Ns//2])) pXr = np.unwrap(np.angle(Xr[:Ns//2])) xrw = np.real(fftshift(ifft(Xr))) * H * 2 yhw = np.real(fftshift(ifft(Yh))) * H * 2 maxplotfreq = 8000.0 plt.figure(1, figsize=(9, 7)) plt.subplot(3,2,1) plt.plot(np.arange(M), x[pos-hM1:pos+hM2]*w, lw=1.5)
w = get_window('blackman', M) hM1 = int(math.floor((M + 1) / 2)) hM2 = int(math.floor(M / 2)) x1 = x[pin - hM1:pin + hM2] mX, pX = DFT.dftAnal(x1, w, N) ploc = UF.peakDetection(mX, t) iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) ipfreq = fs * iploc / N f0 = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, 0) hfreq, hmag, hphase = HM.harmonicDetection(ipfreq, ipmag, ipphase, f0, nH, [], fs, harmDevSlope) Ns = 512 hNs = 256 Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs) #Yh is the complete spectrum for the component wr = get_window('blackman', Ns) xw2 = x[pin - hNs - 1:pin + hNs - 1] * wr / sum( wr) # only 512 samples around the pointer # centered everything around zero fftbuffer = np.zeros(Ns) fftbuffer[:hNs] = xw2[hNs:] fftbuffer[hNs:] = xw2[:hNs] X2 = fft(fftbuffer) Xr = X2 - Yh mXr = 20 * np.log10(abs(Xr[:hNs])) mXrenv = resample(np.maximum(-200, mXr), int(mXr.size * stocf)) stocEnv = resample(mXrenv, hNs)
2)) # zero-phase windowing is essential here, for correct subtraction hM2 = int(math.floor(M / 2)) x1 = x[pin - hM1:pin + hM2] mX, pX = DFT.dftAnal(x1, w, N) ploc = UF.peakDetection(mX, t) iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) ipfreq = fs * iploc / N # convert to Hz f0 = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, 0) # find best candidate for f0 hfreq, hmag, hphase = HM.harmonicDetection(ipfreq, ipmag, ipphase, f0, nH, [], fs, harmDevSlope) Ns = 512 hNs = 256 # Ns / 2 Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs) # uses blackman harris lobe # Yh is the complete spectrum of the harmonic components wr = get_window('blackmanharris', Ns) # must use same window and size as harmonic spectrum xw2 = x[pin - hNs - 1:pin + hNs - 1] * wr / sum(wr) fftbuffer = np.zeros(Ns) fftbuffer[:hNs] = xw2[hNs:] # zero-phase windowing fftbuffer[hNs:] = xw2[:hNs] X2 = fft(fftbuffer) Xr = X2 - Yh mXr = 20 * np.log10(abs(Xr[:hNs])) mXrenv = resample(np.maximum(-200, mXr), mXr.size * stocf) stocEnv = resample(mXrenv, hNs)
def sineModelMultiRes(x, fs, windows, fftSizes, t, bands): """ Analysis/synthesis of a sound using a multi-resolution sinusoidal model, without sine tracking x: input array sound, s: sampling frequency, w: analysis windows, fftSizes: sizes of complex spectrum, t: threshold in negative dB, bands: bands of the sounds for multi-resolution analysis returns y: output array sound """ # Resynthesis values Ns = 512 # FFT size for synthesis (even) H = Ns // 4 # Hop size used for analysis and synthesis hNs = Ns // 2 # half of synthesis FFT size yw = np.zeros(Ns) # initialize output sound frame x = np.array(x) # Convert input to numpy array # Create output buffers for all the bands y1 = np.zeros(x.size) y2 = np.zeros(x.size) y3 = np.zeros(x.size) outputArrays = np.array([y1, y2, y3]) sw = np.zeros(Ns) # initialize synthesis window ow = triang(2 * H) # triangular window sw[hNs - H:hNs + H] = ow # add triangular window bh = blackmanharris(Ns) # blackmanharris window bh = bh / sum(bh) # normalized blackmanharris window sw[hNs - H:hNs + H] = sw[hNs - H:hNs + H] / bh[hNs - H:hNs + H] # normalized synthesis window # Compute analysis/synthesis 3 times with each window/fft/band for i in range(0, 3): #-----variable inits----- # Get the current window bounds (rounding up/floor down) hM1 = int(math.floor((windows[i].size + 1) / 2)) hM2 = int(math.floor(windows[i].size / 2)) # Get start/end pin for current window pin = max(hNs, hM1) pend = x.size - max(hNs, hM1) # Normalize window w = windows[i] / sum(windows[i]) # Get current FFT size and init FFT buffer N = fftSizes[i] fftbuffer = np.zeros(N) # Pick up/down bin limits binCutoffUpLimit = (np.ceil(bands[i] * N / fs)) - 1 if i == 0: binCutoffDownLimit = 0 else: binCutoffDownLimit = np.ceil(bands[i - 1] * N / fs) while pin < pend: #-----analysis----- # Get the frame with current window size x1 = x[pin - hM1:pin + hM2] # Get the spectra for each frame sizes using windows with their respective FFT sizes mX, pX = DFT.dftAnal(x1, w, N) # Only get the part of the spectrum we're interested in for the current band mXFilt = mX.copy() mXFilt[binCutoffDownLimit:] = -120 mXFilt[:binCutoffUpLimit] = -120 # Get the peaks out of each spectrum ploc = UF.peakDetection(mX, t) # Refine peak values by interpolation iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # Convert peak locations to Hertz ipfreq = fs * iploc / float(N) #-----synthesis----- Y = UF.genSpecSines(ipfreq, ipmag, ipphase, Ns, fs) # generate sines in the spectrum fftbuffer = np.real(ifft(Y)) # compute inverse FFT yw[:hNs - 1] = fftbuffer[hNs + 1:] # undo zero-phase window yw[hNs - 1:] = fftbuffer[:hNs + 1] # Place sample to respective output array outputArrays[ i][pin - hNs:pin + hNs] += sw * yw # overlap-add and apply a synthesis window pin += H # advance sound pointer # Sum the content of the three time-domain-bandlimited output arrays into final output array out = outputArrays.sum(axis=0) # Scale down the final output (optimally I would have windowed-out for the filtering process) out *= 0.3 return out
from scipy.fftpack import ifft from scipy.signal import blackmanharris, triang, get_window (fs, x) = UF.wavread('../../sounds/oboe-A4.wav') Ns = 512 hNs = Ns / 2 H = Ns / 4 M = 511 t = -70.0 w = get_window('hamming', M) x1 = x[0.8 * fs:0.8 * fs + M] mX, pX = DFT.dftAnal(x1, w, Ns) ploc = UF.peakDetection(mX, t) iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) ipfreq = fs * iploc / float(Ns) Y = UF.genSpecSines(ipfreq, ipmag, ipphase, Ns, fs) y = np.real(ifft(Y)) # synthesis window = triangle window / blackman-harris window sw = np.zeros(Ns) ow = triang(Ns / 2) sw[hNs - H:hNs + H] = ow bh = blackmanharris(Ns) bh = bh / sum(bh) sw[hNs - H:hNs + H] = sw[hNs - H:hNs + H] / bh[hNs - H:hNs + H] yw = np.zeros(Ns) yw[:hNs - 1] = y[hNs + 1:] yw[hNs - 1:] = y[:hNs + 1] yw *= sw
def sineModelMultiRes(x, fs, w, N, t,B): """ Analysis/synthesis of a sound using the sinusoidal model, without sine tracking x: input array sound, w: analysis window, N: size of complex spectrum, t: threshold in negative dB returns y: output array sound MODIFIED by RJL for Audio Signal Processing course on coursera to allow for multi resolutions w=[w1,w2,w3] are the windows for the three bands (with corresponding fft size N= [N1,N2,N3]). The bands are defined by the boundaries B= [Ba,Bb]. I.e Band 1 is 0<= f< Ba, band 2 is Ba <= f < Bb, and band 3 is Bb <= f < 22050. (The assignement instructions suggested as input three 'bands' but only two numbers are requred here.) """ # Note: For production code would need to make sure the arguments are lists of the correct size Here hM1s = [int(math.floor((aw.size+1)/2)) for aw in w] # half analysis window size by rounding hM2s = [int(math.floor(aw.size/2)) for aw in w] # half analysis window size by floor Ns = 512 # FFT size for synthesis (even) H = Ns//4 # Hop size used for analysis and synthesis hNs = Ns//2 # half of synthesis FFT size pin = max(hNs, max(hM1s)) # init sound pointer in middle of anal window pend = x.size - max(hNs, max(hM1s)) # last sample to start a frame fftbuffers = [np.zeros(n) for n in N] # initialize buffers for FFTs yw = np.zeros(Ns) # initialize output sound frame y = np.zeros(x.size) # initialize output array w = [aw / sum(aw) for aw in w] # normalize analysis windows sw = np.zeros(Ns) # initialize synthesis window ow = triang(2*H) # triangular window sw[hNs-H:hNs+H] = ow # add triangular window bh = blackmanharris(Ns) # blackmanharris window bh = bh / sum(bh) # normalized blackmanharris window sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] # normalized synthesis window B = [(0,B[0]),(B[0],B[1]),(B[1],22050)] # set up bands ipfreq = [0,0,0] ipphase =[0,0,0] ipmag =[0,0,0] while pin<pend: # while input sound pointer is within sound #-----analysis----- Basically unchanged but done three times, ecept i get rid of frequencies not in band for i in range(0,3): # here we mandate three bands! x1 = x[pin-hM1s[i]:pin+hM2s[i]] # select frame mX, pX = DFT.dftAnal(x1, w[i], N[i]) # compute dft ploc = UF.peakDetection(mX, t) # detect locations of peaks iploc, ipmagt, ipphaset = UF.peakInterp(mX, pX, ploc) # refine peak values by interpolation ipfreqt = fs*iploc/float(N[i]) # convert peak locations to Her indexes = [j for (j,f) in enumerate(ipfreqt) if f >= B[i][0] and f < B[i][1] ] # filter out of band peaks ipfreq[i] = ipfreqt[indexes] # There must be an easier way, but here are rebuild ipphase[i] = ipphaset[indexes] # with only the indexes where the freq is in range ipmag[i] = ipmagt[indexes] # Combine the peaks into single combined analysis. This is verbose but clear ipfreqC = np.concatenate((ipfreq[0],ipfreq[1],ipfreq[2])) ipphaseC = np.concatenate((ipphase[0] , ipphase[1] ,ipphase[2])) ipmagC = np.concatenate((ipmag[0],ipmag[1],ipmag[2])) #-----synthesis----- completely unchanged. # import pdb; pdb.set_trace() Y = UF.genSpecSines(ipfreqC, ipmagC, ipphaseC, Ns, fs) # generate sines in the spectrum fftbuffer = np.real(ifft(Y)) # compute inverse FFT yw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window yw[hNs-1:] = fftbuffer[:hNs+1] y[pin-hNs:pin+hNs] += sw*yw # overlap-add and apply a synthesis window pin += H # advance sound pointer return y
def sineModelMultiRes(x, fs, multi_w, multi_N, t, multi_B): """ Analysis/synthesis of a sound using the sinusoidal model, without sine tracking x: input array sound, w: analysis window, N: size of complex spectrum, t: threshold in negative dB returns y: output array sound """ bands = range(len(multi_B)) # to iterate over bands N = max(multi_N) multi_w_size = np.array([multi_w[i].size for i in bands]) multi_hM1 = np.floor((multi_w_size + 1) / 2.0).astype( int) # half analysis window size by rounding multi_hM2 = np.floor(multi_w_size / 2.0).astype( int) # half analysis window size by floor Ns = 512 # FFT size for synthesis (even) H = Ns / 4 # Hop size used for analysis and synthesis hNs = Ns / 2 # half of synthesis FFT size multi_pin = np.maximum( hNs, multi_hM1) # init sound pointer in middle of anal window multi_pend = x.size - multi_pin # last sample to start a frame fftbuffer_combined = np.zeros(N) yw_combined = np.zeros(Ns) # initialize output sound frame y_combined = np.zeros(x.size) # initialize output array multi_w = [multi_w[i] / sum(multi_w[i]) for i in bands] # normalize analysis window sw = np.zeros(Ns) # initialize synthesis window ow = triang(2 * H) # triangular window sw[hNs - H:hNs + H] = ow # add triangular window bh = blackmanharris(Ns) # blackmanharris window bh = bh / sum(bh) # normalized blackmanharris window sw[hNs - H:hNs + H] = sw[hNs - H:hNs + H] / bh[hNs - H:hNs + H] # normalized synthesis window while (multi_pin < multi_pend).all(): # while input sound pointer is within sound #-----analysis----- multi_x1 = [ x[(multi_pin[i] - multi_hM1[i]):(multi_pin[i] + multi_hM2[i])] for i in bands ] # select frame multi_mX = [] multi_pX = [] for i in bands: mXi, pXi = DFT.dftAnal(multi_x1[i], multi_w[i], multi_N[i]) multi_mX.append(mXi) multi_pX.append(pXi) multi_ploc = [] for i in bands: ploci = UF.peakDetection(multi_mX[i], t) # detect locations of peaks multi_ploc.append(ploci) multi_ipmag = [] multi_ipphase = [] multi_ipfreq = [] for i in bands: iploci, ipmagi, ipphasei = UF.peakInterp( multi_mX[i], multi_pX[i], multi_ploc[i]) # refine peak values by interpolation ipfreqi = fs * iploci / float( multi_N[i]) # convert peak locations to Hertz multi_ipmag.append(ipmagi) multi_ipphase.append(ipphasei) multi_ipfreq.append(ipfreqi) # count first for array allocation num_ip = 0 for i in bands: for p in range(len(multi_ipfreq[i])): f = multi_ipfreq[i][p] if (i == 0 or f >= multi_B[i - 1]) and f < multi_B[i]: num_ip += 1 ipfreq_combined = np.zeros(num_ip) ipmag_combined = np.zeros(num_ip) ipphase_combined = np.zeros(num_ip) ip = 0 for i in bands: for p in range(len(multi_ipfreq[i])): f = multi_ipfreq[i][p] if (i == 0 or f >= multi_B[i - 1]) and f < multi_B[i]: ipfreq_combined[ip] = f ipmag_combined[ip] = multi_ipmag[i][p] ipphase_combined[ip] = multi_ipphase[i][p] ip += 1 #-----synthesis----- Y_combined = UF.genSpecSines(ipfreq_combined, ipmag_combined, ipphase_combined, Ns, fs) # generate sines in the spectrum fftbuffer_combined = np.real(ifft(Y_combined)) # compute inverse FFT yw_combined[:hNs - 1] = fftbuffer_combined[hNs + 1:] # undo zero-phase window yw_combined[hNs - 1:] = fftbuffer_combined[:hNs + 1] y_combined[ multi_pin[0] - hNs:multi_pin[0] + hNs] += sw * yw_combined # overlap-add and apply a synthesis window multi_pin += H return y_combined
def sineModelMultiRes_combined(x, fs, multi_w, multi_N, t, multi_B): """ Analysis/synthesis of a sound using the sinusoidal model, without sine tracking x: input array sound, w: analysis window, N: size of complex spectrum, t: threshold in negative dB returns y: output array sound """ # fallback for original code w = multi_w[0] N = multi_N[0] bands = range(len(multi_B)) # to iterate over bands #-orig----------------------------- hM1 = int(math.floor( (w.size + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(w.size / 2)) # half analysis window size by floor #-multi---------------------------- multi_w_size = np.array([multi_w[i].size for i in bands]) multi_hM1 = np.floor((multi_w_size + 1) / 2.0).astype( int) # half analysis window size by rounding multi_hM2 = np.floor(multi_w_size / 2.0).astype( int) # half analysis window size by floor #---------------------------------- Ns = 512 # FFT size for synthesis (even) H = Ns / 4 # Hop size used for analysis and synthesis hNs = Ns / 2 # half of synthesis FFT size #-orig----------------------------- pin = max(hNs, hM1) # init sound pointer in middle of anal window pend = x.size - max(hNs, hM1) # last sample to start a frame #-multi---------------------------- multi_pin = np.maximum( hNs, multi_hM1) # init sound pointer in middle of anal window multi_pend = x.size - multi_pin # last sample to start a frame #---------------------------------- #-orig----------------------------- fftbuffer = np.zeros(N) # initialize buffer for FFT #-multi---------------------------- fftbuffer_combined = np.zeros(N) #multi_fftbuffer = [np.array(multi_N[i]) for i in bands] #---------------------------------- yw = np.zeros(Ns) # initialize output sound frame y = np.zeros(x.size) # initialize output array #-multi---------------------------- yw_combined = np.zeros(Ns) # initialize output sound frame y_combined = np.zeros(x.size) # initialize output array #-orig----------------------------- w = w / sum(w) # normalize analysis window #-multi---------------------------- multi_w = [multi_w[i] / sum(multi_w[i]) for i in bands] # normalize analysis window #---------------------------------- sw = np.zeros(Ns) # initialize synthesis window ow = triang(2 * H) # triangular window sw[hNs - H:hNs + H] = ow # add triangular window bh = blackmanharris(Ns) # blackmanharris window bh = bh / sum(bh) # normalized blackmanharris window sw[hNs - H:hNs + H] = sw[hNs - H:hNs + H] / bh[hNs - H:hNs + H] # normalized synthesis window while pin < pend and (multi_pin < multi_pend ).all(): # while input sound pointer is within sound #-----analysis----- #-orig----------------------------- x1 = x[pin - hM1:pin + hM2] # select frame #-multi---------------------------- multi_x1 = [ x[(multi_pin[i] - multi_hM1[i]):(multi_pin[i] + multi_hM2[i])] for i in bands ] # select frame #---------------------------------- #-orig----------------------------- mX, pX = DFT.dftAnal(x1, w, N) # compute dft #-multi---------------------------- multi_mX = [] multi_pX = [] for i in bands: mXi, pXi = DFT.dftAnal(multi_x1[i], multi_w[i], multi_N[i]) multi_mX.append(mXi) multi_pX.append(pXi) #---------------------------------- # we could apply the filters for the bands here already ... #-orig----------------------------- ploc = UF.peakDetection(mX, t) # detect locations of peaks #pmag = mX[ploc] # get the magnitude of the peaks #-multi---------------------------- multi_ploc = [] #multi_pmag = [] for i in bands: ploci = UF.peakDetection(multi_mX[i], t) # detect locations of peaks #pmagi = multi_mX[i][ploci] # get the magnitude of the peaks multi_ploc.append(ploci) #multi_pmag.append(pmagi) #---------------------------------- #-orig----------------------------- iploc, ipmag, ipphase = UF.peakInterp( mX, pX, ploc) # refine peak values by interpolation ipfreq = fs * iploc / float(N) # convert peak locations to Hertz #-multi---------------------------- #multi_iploc = [] multi_ipmag = [] multi_ipphase = [] multi_ipfreq = [] for i in bands: iploci, ipmagi, ipphasei = UF.peakInterp( multi_mX[i], multi_pX[i], multi_ploc[i]) # refine peak values by interpolation ipfreqi = fs * iploci / float( multi_N[i]) # convert peak locations to Hertz #multi_iploc.append(iploci) multi_ipmag.append(ipmagi) multi_ipphase.append(ipphasei) multi_ipfreq.append(ipfreqi) #---------------------------------- # ... but we shall decide here! """ print "--------------------------------------" print ipfreq print ipmag print ipphase """ """ ipfreq_combined = [] ipmag_combined = [] ipphase_combined = [] for i in bands: for p in range(len(multi_ipfreq[i])): f = multi_ipfreq[i][p] if (i == 0 or f >= multi_B[i-1]) and f < multi_B[i]: ipfreq_combined.append(f) ipmag_combined.append(multi_ipmag[i][p]) ipphase_combined.append(multi_ipphase[i][p]) #ipfreq = np.array(ipfreq_combined) #ipmag = np.array(ipmag_combined) #ipphase = np.array(ipphase_combined) """ # count first for array allocation num_ip = 0 for i in bands: for p in range(len(multi_ipfreq[i])): f = multi_ipfreq[i][p] if (i == 0 or f >= multi_B[i - 1]) and f < multi_B[i]: num_ip += 1 ipfreq_combined = np.zeros(num_ip) ipmag_combined = np.zeros(num_ip) ipphase_combined = np.zeros(num_ip) ip = 0 for i in bands: for p in range(len(multi_ipfreq[i])): f = multi_ipfreq[i][p] if (i == 0 or f >= multi_B[i - 1]) and f < multi_B[i]: ipfreq_combined[ip] = f ipmag_combined[ip] = multi_ipmag[i][p] ipphase_combined[ip] = multi_ipphase[i][p] ip += 1 """ print "--------------------------------------" print ipfreq_combined print ipmag_combined print ipphase_combined """ #-----synthesis----- Y = UF.genSpecSines(ipfreq, ipmag, ipphase, Ns, fs) # generate sines in the spectrum fftbuffer = np.real(ifft(Y)) # compute inverse FFT yw[:hNs - 1] = fftbuffer[hNs + 1:] # undo zero-phase window yw[hNs - 1:] = fftbuffer[:hNs + 1] y[pin - hNs:pin + hNs] += sw * yw # overlap-add and apply a synthesis window #print y[pin-hNs:pin+hNs] pin += H # advance sound pointer Y_combined = UF.genSpecSines(ipfreq_combined, ipmag_combined, ipphase_combined, Ns, fs) # generate sines in the spectrum fftbuffer_combined = np.real(ifft(Y_combined)) # compute inverse FFT yw_combined[:hNs - 1] = fftbuffer_combined[hNs + 1:] # undo zero-phase window yw_combined[hNs - 1:] = fftbuffer_combined[:hNs + 1] y_combined[ pin - hNs:pin + hNs] += sw * yw_combined # overlap-add and apply a synthesis window #print y_combined[pin-hNs:pin+hNs] multi_pin += H """ plt.figure(1) plt.plot(abs(Y)) plt.figure(2) plt.plot(abs(Y_combined)) plt.show() """ return y, y_combined
def sineModelMultiRes(x, fs, w_seq, N_seq, t, B_seq): """ Analysis/synthesis of a sound using the sinusoidal model, without sine tracking using multi-resolution approach. x: input array sound, fs: sample rate w: sequence of three analysis windows N: sequence of three sizes of complex spectrum t: threshold in negative dB B: sequence of three frequency bands, represented as (min Hz, max Hz) tuples returns y: output array sound """ assert len(w_seq) == len( N_seq), "w_seq and N_seq must be sequences of the same size" assert len(w_seq) == len( B_seq), "w_seq and B_seq must be sequences of the same size" k = len(w_seq) # Each analysis frame should be the same length as the largest window # but each hop should be the same length as the smallest window min_window_size = min([item.size for item in w_seq]) max_window_size = max([item.size for item in w_seq]) logger.debug("min_window_size {}".format(min_window_size)) logger.debug("max_window_size {}".format(max_window_size)) hM1 = int(math.floor(min_window_size + 1) / 2) # half analysis window size by rounding hM2 = int(math.floor(min_window_size / 2)) # half analysis window size by floor hM1_max = int(math.floor(max_window_size + 1) / 2) hM2_max = int(math.floor(max_window_size / 2)) max_N = max(N_seq) Ns = 512 # FFT size for synthesis (even) H = Ns / 4 # Hop size used for analysis and synthesis hNs = Ns / 2 # half of synthesis FFT size pin = max(hNs, hM1) # init sound pointer in middle of anal window pend = x.size - pin # last sample to start a frame yw = np.zeros(Ns) # initialize output sound frame y = np.zeros(x.size) # initialize output array sw = np.zeros(Ns) # initialize synthesis window ow = triang(2 * H) # triangular window sw[hNs - H:hNs + H] = ow # add triangular window bh = blackmanharris(Ns) # blackmanharris window bh = bh / sum(bh) # normalized blackmanharris window sw[hNs - H:hNs + H] = sw[hNs - H:hNs + H] / bh[hNs - H:hNs + H] # normalized synthesis window for i in range(len(w_seq)): w_seq[i] = w_seq[i] / sum(w_seq[i]) # normalize analysis windows logger.debug("Hop size {}".format(H)) while pin < pend: # while input sound pointer is within sound #logger.debug("pin {}".format(pin)) # -----analysis----- iplocs = [None] * k ipmags = [None] * k ipphases = [None] * k ipfreqs = [None] * k # The frame of audio to analyse must be as wide as the largest window x1 = get_frame(x, pin, max_window_size) # For each band perform analysis with specified FFT size and window. for i, (w, N) in enumerate(zip(w_seq, N_seq)): iplocs[i], ipmags[i], ipphases[i], ipfreqs[i] = analysis( x1, fs, w, N, t) # For each band, pick detected frequencies inside the band. Ignore detected frequencies outside band. # Aggregate the detected frequencies (and associated magnitude, phase) into a single set of values. final_ipmag = np.array([]) final_ipphase = np.array([]) final_ipfreq = np.array([]) for ipmag, ipphase, ipfreq, (freq_min, freq_max) in zip(ipmags, ipphases, ipfreqs, B_seq): for pmag, pphase, pfreq in zip(ipmag, ipphase, ipfreq): if freq_min <= pfreq < freq_max: final_ipmag = np.append(final_ipmag, pmag) final_ipphase = np.append(final_ipphase, pphase) final_ipfreq = np.append(final_ipfreq, pfreq) #logger.debug("Add {} Hz from range ({}, {})".format(pfreq, freq_min, freq_max)) # -----synthesis----- Y = UF.genSpecSines(final_ipfreq, final_ipmag, final_ipphase, Ns, fs) # generate sines in the spectrum fftbuffer = np.real(ifft(Y)) # compute inverse FFT yw[:hNs - 1] = fftbuffer[hNs + 1:] # undo zero-phase window yw[hNs - 1:] = fftbuffer[:hNs + 1] y[pin - hNs:pin + hNs] += sw * yw # overlap-add and apply a synthesis window pin += H return y
def spsModel(x, fs, w, N, t, stocf): """ Analysis/synthesis of a sound using the sinusoidal plus stochastic model x: input sound, fs: sampling rate, w: analysis window, N: FFT size (minimum 512), t: threshold in negative dB, stocf: decimation factor of mag spectrum for stochastic analysis returns y: output sound, ys: sinusoidal component, yst: stochastic component """ hN = N/2 # size of positive spectrum hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor Ns = 512 # FFT size for synthesis (even) H = Ns/4 # Hop size used for analysis and synthesis hNs = Ns/2 pin = max(hNs, hM1) # initialize sound pointer in middle of analysis window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT ysw = np.zeros(Ns) # initialize output sound frame ystw = np.zeros(Ns) # initialize output sound frame ys = np.zeros(x.size) # initialize output array yst = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) ow = triang(2*H) # overlapping window sw[hNs-H:hNs+H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window wr = bh # window for residual sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] sws = H*hanning(Ns)/2 # synthesis window for stochastic while pin<pend: #-----analysis----- x1 = x[pin-hM1:pin+hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # find peaks iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs*iploc/float(N) # convert peak locations to Hertz ri = pin-hNs-1 # input sound pointer for residual analysis xw2 = x[ri:ri+Ns]*wr # window the input sound fftbuffer = np.zeros(Ns) # reset buffer fftbuffer[:hNs] = xw2[hNs:] # zero-phase window in fftbuffer fftbuffer[hNs:] = xw2[:hNs] X2 = fft(fftbuffer) # compute FFT for residual analysis #-----synthesis----- Ys = UF.genSpecSines(ipfreq, ipmag, ipphase, Ns, fs) # generate spec of sinusoidal component Xr = X2-Ys; # get the residual complex spectrum mXr = 20 * np.log10(abs(Xr[:hNs])) # magnitude spectrum of residual mXrenv = resample(np.maximum(-200, mXr), mXr.size*stocf) # decimate the magnitude spectrum and avoid -Inf stocEnv = resample(mXrenv, hNs) # interpolate to original size pYst = 2*np.pi*np.random.rand(hNs) # generate phase random values Yst = np.zeros(Ns, dtype = complex) Yst[:hNs] = 10**(stocEnv/20) * np.exp(1j*pYst) # generate positive freq. Yst[hNs+1:] = 10**(stocEnv[:0:-1]/20) * np.exp(-1j*pYst[:0:-1]) # generate negative freq. fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Ys)) # inverse FFT of harmonic spectrum ysw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window ysw[hNs-1:] = fftbuffer[:hNs+1] fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Yst)) # inverse FFT of stochastic spectrum ystw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window ystw[hNs-1:] = fftbuffer[:hNs+1] ys[ri:ri+Ns] += sw*ysw # overlap-add for sines yst[ri:ri+Ns] += sws*ystw # overlap-add for stochastic pin += H # advance sound pointer y = ys+yst # sum of sinusoidal and residual components return y, ys, yst
def hprModel_2(x, fs, w, N, t, nH, hfreq, hmag, hphase, outVocalURI, outbackGrURI): """ Analysis/synthesis of a sound using the harmonic plus residual model x: input sound, fs: sampling rate, w: analysis window, N: FFT size (minimum 512), t: threshold in negative dB, nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz, maxf0: maximim f0 frequency in Hz, f0et: error threshold in the f0 detection (ex: 5), maxhd: max. relative deviation in harmonic detection (ex: .2) returns y: output sound, yh: harmonic component, xr: residual component """ hN = N / 2 # size of positive spectrum hM1 = int(math.floor( (w.size + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(w.size / 2)) # half analysis window size by floor Ns = 512 # FFT size for synthesis (even) H = Ns / 4 # Hop size used for analysis and synthesis hNs = Ns / 2 pin = max(hNs, hM1) # initialize sound pointer in middle of analysis window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT yhw = np.zeros(Ns) # initialize output sound frame xrw = np.zeros(Ns) # initialize output sound frame yh = np.zeros(x.size) # initialize output array xr = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) ow = triang(2 * H) # overlapping window sw[hNs - H:hNs + H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window wr = bh # window for residual sw[hNs - H:hNs + H] = sw[hNs - H:hNs + H] / bh[hNs - H:hNs + H] hfreqp = [] f0t = 0 i = 0 while pin < pend and i < len(hfreq): #-----analysis----- hfreqp = hfreq ri = pin - hNs - 1 # input sound pointer for residual analysis xw2 = x[ri:ri + Ns] * wr # window the input sound fftbuffer = np.zeros(Ns) # reset buffer fftbuffer[:hNs] = xw2[hNs:] # zero-phase window in fftbuffer fftbuffer[hNs:] = xw2[:hNs] X2 = fft( fftbuffer) # compute FFT of input signal for residual analysis #-----synthesis----- Yh = UF.genSpecSines(hfreq[i, :], hmag[i, :], hphase[i, :], Ns, fs) # generate sines # soft masking Yh, Xr = softMask(X2, Yh, i) fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Yh)) # inverse FFT of harmonic spectrum yhw[:hNs - 1] = fftbuffer[hNs + 1:] # undo zero-phase window yhw[hNs - 1:] = fftbuffer[:hNs + 1] fftbuffer = np.real(ifft(Xr)) # inverse FFT of residual spectrum xrw[:hNs - 1] = fftbuffer[hNs + 1:] # undo zero-phase window xrw[hNs - 1:] = fftbuffer[:hNs + 1] yh[ri:ri + Ns] += sw * yhw # overlap-add for sines xr[ri:ri + Ns] += sw * xrw # overlap-add for residual pin += H i += 1 # advance sound pointer # sum of harmonic and residual components UF.wavwrite(yh, fs, outVocalURI) print 'written file ' + outVocalURI UF.wavwrite(xr, fs, outbackGrURI) print 'written file ' + outbackGrURI return yh, xr
def sineModel_MultiRes(x, fs, w1, w2, w3, N1, N2, N3, t, B1, B2, B3): """ Week 10, Project: A multi-resolution sinusoidal model Analysis/synthesis of a sound using the sinusoidal model, without sine tracking Using Multi-resolution sine model x: input array sound, w: analysis window, N: size of complex spectrum, t: threshold in negative dB N1, N2, N3: size of the 3 complex spectrum B1, B2, B3: Frequency band edges (Upper limits) of the 3 frequency bands [0 - B1] is the first frequency band [B1 - B2] is the second frequency band [B2 - B3] is the third frequency band returns y: output array sound """ hM1_1 = int(math.floor( (w1.size + 1) / 2)) # half analysis for 1st window size by rounding hM1_2 = int(math.floor(w1.size / 2)) # half analysis for 1st window size by floor hM2_1 = int(math.floor( (w2.size + 1) / 2)) # half analysis for 2nd window size by rounding hM2_2 = int(math.floor(w2.size / 2)) #half analysis for 2nd window size by floor hM3_1 = int(math.floor( (w3.size + 1) / 2)) # half analysis for 3rd window size by rounding hM3_2 = int(math.floor(w3.size / 2)) #half analysis for 3rd window size by floor Ns = 512 # FFT size for synthesis (even) H = Ns // 4 # Hop size used for analysis and synthesis hNs = Ns // 2 # half of synthesis FFT size pin = max(hNs, hM1_1, hM2_1, hM3_1) # init sound pointer in middle of biggest anal window pend = x.size - pin # last sample to start a frame fftbuffer = np.zeros(max(N1, N2, N3)) # initialize buffer for FFT yw = np.zeros(Ns) # initialize output sound frame y = np.zeros(x.size) # initialize output array w_1 = w1 / sum(w1) # normalize the 1st analysis window w_2 = w2 / sum(w2) # normalize the 2nd analysis window w_3 = w3 / sum(w3) # normalize the 3rd analysis window sw = np.zeros(Ns) # initialize synthesis window ow = triang(2 * H) # triangular window sw[hNs - H:hNs + H] = ow # add triangular window bh = blackmanharris(Ns) # blackmanharris window bh = bh / sum(bh) # normalized blackmanharris window sw[hNs - H:hNs + H] = sw[hNs - H:hNs + H] / bh[hNs - H:hNs + H] # normalized synthesis window while pin < pend: # while input sound pointer is within sound # -----analysis----- #Selecting THREE FRAMES for the same CENTRAL PIN POINT, but FOR DIFFERENT LENGTHS x1 = x[pin - hM1_1:pin + hM1_2] # select frame for window size 1 x2 = x[pin - hM2_1:pin + hM2_2] # select frame for window size2 x3 = x[pin - hM3_1:pin + hM3_2] # select frame for window size3 mX1, pX1 = DFT.dftAnal(x1, w1, N1) # compute dft for 1st frame mX2, pX2 = DFT.dftAnal(x2, w2, N2) # compute dft for 2nd frame mX3, pX3 = DFT.dftAnal(x3, w3, N3) # compute dft for 3rd frame ploc1 = UF.peakDetection(mX1, t) # detect locations of peaks for 1st frame ploc2 = UF.peakDetection(mX2, t) # detect locations of peaks for 2nd frame ploc3 = UF.peakDetection(mX3, t) # detect locations of peaks for 3rd frame iploc1, ipmag1, ipphase1 = UF.peakInterp( mX1, pX1, ploc1) # refine peak values by interpolation for the 1st frame iploc2, ipmag2, ipphase2 = UF.peakInterp( mX2, pX2, ploc2) # refine peak values by interpolation for the 2nd frame iploc3, ipmag3, ipphase3 = UF.peakInterp( mX3, pX3, ploc3) # refine peak values by interpolation for the 3rd frame ipfreq1 = fs * iploc1 / float( N1) # convert peak locations of 1st frame to Hertz ipfreq2 = fs * iploc2 / float( N2) # convert peak locations of 2nd frame to Hertz ipfreq3 = fs * iploc3 / float( N3) # convert peak locations of 3rd frame to Hertz # Looking for indices of peak frequencies # in each band, for each window calculation indice_1 = np.logical_and(ipfreq1 > 0, ipfreq1 < B1) indice_2 = np.logical_and(ipfreq2 >= B1, ipfreq2 < B2) indice_3 = np.logical_and(ipfreq3 >= B2, ipfreq3 < B3) # Getting peaks which fall in selected frequency bands ipfreq = np.concatenate( (ipfreq1[indice_1], ipfreq2[indice_2], ipfreq3[indice_3])) ipmag = np.concatenate( (ipmag1[indice_1], ipmag2[indice_2], ipmag3[indice_3])) ipphase = np.concatenate( (ipphase1[indice_1], ipphase2[indice_2], ipphase3[indice_3])) # -----synthesis----- Y = UF.genSpecSines(ipfreq, ipmag, ipphase, Ns, fs) # generate sines in the spectrum fftbuffer = np.real(ifft(Y)) # compute inverse FFT yw[:hNs - 1] = fftbuffer[hNs + 1:] # undo zero-phase window yw[hNs - 1:] = fftbuffer[:hNs + 1] y[pin - hNs:pin + hNs] += sw * yw # overlap-add and apply a synthesis window pin += H # advance sound pointer return y
(fs, x) = UF.wavread('../../../sounds/oboe-A4.wav') M = 601 w = np.blackman(M) N = 1024 hN = N / 2 Ns = 512 hNs = Ns / 2 H = Ns / 4 pin = 5000 t = -70 x1 = x[pin:pin + w.size] mX, pX = DFT.dftAnal(x1, w, N) ploc = UF.peakDetection(mX, hN, t) iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) freqs = iploc * fs / N Y = UF.genSpecSines(freqs, ipmag, ipphase, Ns, fs) mY = 20 * np.log10(abs(Y[:hNs])) pY = np.unwrap(np.angle(Y[:hNs])) y = fftshift(ifft(Y)) * sum(blackmanharris(Ns)) sw = np.zeros(Ns) ow = triang(2 * H) sw[hNs - H:hNs + H] = ow bh = blackmanharris(Ns) bh = bh / sum(bh) sw[hNs - H:hNs + H] = sw[hNs - H:hNs + H] / bh[hNs - H:hNs + H] plt.figure(1, figsize=(9, 6)) plt.subplot(3, 1, 1) plt.plot(np.arange(-hNs, hNs), y, 'b', lw=1.5) plt.plot(np.arange(-hNs, hNs), max(y) * bh / max(bh), 'k', alpha=.5, lw=1.5)
def sineModelMultiRes(x, fs, w_vec, window, N_vec, t, B_vec): """ Analysis/synthesis of a sound using the sinusoidal model, without sine tracking x: input array sound, w: analysis window, N: size of complex spectrum, t: threshold in negative dB returns y: output array sound """ w1 = get_window(window, w_vec[0]) w2 = get_window(window, w_vec[1]) w3 = get_window(window, w_vec[2]) hM1 = int(math.floor( (w1.size + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(w1.size / 2)) # half analysis window size by floor Ns = 512 # FFT size for synthesis (even) H = Ns / 4 # Hop size used for analysis and synthesis hNs = Ns / 2 # half of synthesis FFT size pin = max(hNs, hM1) # init sound pointer in middle of anal window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N_vec[0]) # initialize buffer for FFT yw = np.zeros(Ns) # initialize output sound frame y1 = np.zeros(x.size) # initialize output array y2 = np.zeros(x.size) # initialize output array y3 = np.zeros(x.size) # initialize output array w1 = w1 / sum(w1) # normalize analysis window w2 = w2 / sum(w2) # normalize analysis window w3 = w3 / sum(w3) # normalize analysis window sw = np.zeros(Ns) # initialize synthesis window ow = triang(2 * H) # triangular window sw[hNs - H:hNs + H] = ow # add triangular window bh = blackmanharris(Ns) # blackmanharris window bh = bh / sum(bh) # normalized blackmanharris window sw[hNs - H:hNs + H] = sw[hNs - H:hNs + H] / bh[hNs - H:hNs + H] # normalized synthesis window band_low = B_vec[0] # lower band frequency limit band_mid = B_vec[1] # mid band frequency limit band_high = B_vec[2] # upper band frequency limit low_band_FFTN = N_vec[0] # lower band FFT size mid_band_FFTN = N_vec[1] # mid band FFT size high_band_FFTN = N_vec[2] # upper band FFT size while pin < pend: # while input sound pointer is within sound #-----analysis----- x1 = x[pin - hM1:pin + hM2] # Frame selection - lowest band x2 = x[pin - hM1:pin - hM1 + w_vec[1]] # Frame selection - mid band x3 = x[pin - hM1:pin - hM1 + w_vec[2]] # Frame selection - upper band mX1, pX1 = DFT.dftAnal(x1, w1, low_band_FFTN) # DFT Analysis mX2, pX2 = DFT.dftAnal(x2, w2, mid_band_FFTN) # DFT Analysis mX3, pX3 = DFT.dftAnal(x3, w3, high_band_FFTN) # DFT Analysis ploc1 = UF.peakDetection(mX1, t) # Detect locations of peaks ploc2 = UF.peakDetection(mX2, t) # Detect locations of peaks ploc3 = UF.peakDetection(mX3, t) # Detect locations of peaks pfreq1 = fs * ploc1 / float( low_band_FFTN) # Find a frequency for bandlimiting pfreq2 = fs * ploc2 / float( mid_band_FFTN) # Find a frequency for bandlimiting pfreq3 = fs * ploc3 / float( high_band_FFTN) # Find a frequency for bandlimiting ploc1[pfreq1 > band_low] = 0 # Remove peaks found to be outside each band ploc2[pfreq2 > band_mid] = 0 ploc2[pfreq2 <= band_low] = 0 ploc3[pfreq3 <= band_mid] = 0 ploc1 = ploc1[ploc1.nonzero()] # Trim post-bandlimiting ploc2 = ploc2[ploc2.nonzero()] ploc3 = ploc3[ploc3.nonzero()] iploc1, ipmag1, ipphase1 = UF.peakInterp( mX1, pX1, ploc1) # refine peak values by interpolation iploc2, ipmag2, ipphase2 = UF.peakInterp( mX2, pX2, ploc2) # refine peak values by interpolation iploc3, ipmag3, ipphase3 = UF.peakInterp( mX3, pX3, ploc3) # refine peak values by interpolation ipfreq1 = fs * iploc1 / float( low_band_FFTN) # convert peak locations to Hertz ipfreq2 = fs * iploc2 / float( mid_band_FFTN) # convert peak locations to Hertz ipfreq3 = fs * iploc3 / float( high_band_FFTN) # convert peak locations to Hertz #-----synthesis----- Y1 = UF.genSpecSines(ipfreq1, ipmag1, ipphase1, Ns, fs) # generate sines in the spectrum Y2 = UF.genSpecSines(ipfreq2, ipmag2, ipphase2, Ns, fs) # generate sines in the spectrum Y3 = UF.genSpecSines(ipfreq3, ipmag3, ipphase3, Ns, fs) # generate sines in the spectrum ## Low band fftbuffer = np.real(ifft(Y1)) # compute inverse FFT yw[:hNs - 1] = fftbuffer[hNs + 1:] # undo zero-phase window yw[hNs - 1:] = fftbuffer[:hNs + 1] y1[pin - hNs:pin + hNs] += sw * yw # overlap-add and apply a synthesis window ## Mid band fftbuffer = np.real(ifft(Y2)) # compute inverse FFT yw[:hNs - 1] = fftbuffer[hNs + 1:] # undo zero-phase window yw[hNs - 1:] = fftbuffer[:hNs + 1] y2[pin - hNs:pin + hNs] += sw * yw # overlap-add and apply a synthesis window ## Upper band fftbuffer = np.real(ifft(Y3)) # compute inverse FFT yw[:hNs - 1] = fftbuffer[hNs + 1:] # undo zero-phase window yw[hNs - 1:] = fftbuffer[:hNs + 1] y3[pin - hNs:pin + hNs] += sw * yw # overlap-add and apply a synthesis window pin += H # advance sound pointer return y1, y2, y3
w = get_window('blackman', M) hM1 = int(math.floor((M+1)/2)) hM2 = int(math.floor(M/2)) x1 = x[pin-hM1:pin+hM2] mX, pX = DFT.dftAnal(x1, w, N) ploc = UF.peakDetection(mX, t) iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) ipfreq = fs*iploc/N f0 = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, 0) hfreq, hmag, hphase = HM.harmonicDetection(ipfreq, ipmag, ipphase, f0, nH, [], fs, harmDevSlope) Ns = 512 hNs = 256 Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs) wr = get_window('blackmanharris', Ns) xw2 = x[pin-hNs-1:pin+hNs-1] * wr / sum(wr) fftbuffer = np.zeros(Ns) fftbuffer[:hNs] = xw2[hNs:] fftbuffer[hNs:] = xw2[:hNs] X2 = fft(fftbuffer) Xr = X2 - Yh mXr = 20 * np.log10(abs(Xr[:hNs])) # converting to a DB scale mXrenv = resample(np.maximum(-200, mXr), mXr.size*stocf) stocEnv = resample(mXrenv, hNs) plt.plot(mXr) plt.plot(stocEnv) # smooth versioin
def sineModelOriginal(x, fs, w, N, t): """ Analysis/synthesis of a sound using the sinusoidal model, without sine tracking x: input array sound, w: analysis window, N: size of complex spectrum, t: threshold in negative dB returns y: output array sound """ hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor Ns = 512 # FFT size for synthesis (even) H = Ns/4 # Hop size used for analysis and synthesis hNs = Ns/2 # half of synthesis FFT size pin = max(hNs, hM1) # init sound pointer in middle of anal window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT yw = np.zeros(Ns) # initialize output sound frame y = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) # initialize synthesis window ow = triang(2*H) # triangular window sw[hNs-H:hNs+H] = ow # add triangular window bh = blackmanharris(Ns) # blackmanharris window bh = bh / sum(bh) # normalized blackmanharris window sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] # normalized synthesis window frame = 0 while pin<pend: # while input sound pointer is within sound #-----analysis----- # pin - the center of the analysis window (analysis window is always a power of 2, no less than hNs(512/2)) # pend = x.size - pin x1 = x[pin-hM1:pin+hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # detect locations of peaks pmag = mX[ploc] # get the magnitude of the peaks iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values by interpolation ipfreq = fs*iploc/float(N) # convert peak locations to Hertz Y = UF.genSpecSines(ipfreq, ipmag, ipphase, Ns, fs) # generate sines in the spectrum fftbuffer = np.real(ifft(Y)) # compute inverse FFT yw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window yw[hNs-1:] = fftbuffer[:hNs+1] y[pin-hNs:pin+hNs] += sw*yw # overlap-add and apply a synthesis window # H is always fixed here pin += H # advance sound pointer frame += 1 print "\n\n----- STATISTICS FOR THE LAST FRAME (for debug purposes, etc) -----\n" print "\n" print "Synthesizing the following frequencies for the last frame:" print ipfreq print "Synthesizing the following magnitudes for the last frame:" print ipmag print "Synthesizing the following phases for the last frame:" print ipphase print "\n" print "Total frames analyzed/synthesized:" print frame print "\n\n" UF.wavwrite(y, fs, "./OutFile(sineModel_OriginalAnalysis).wav") #UF.wavwrite(y, fs, "c:/OutFile(sineModel_OriginalAnalysis).wav") return y
def hprModel(x, fs, w, N, t, nH, minf0, maxf0, f0et): """ Analysis/synthesis of a sound using the harmonic plus residual model x: input sound, fs: sampling rate, w: analysis window, N: FFT size (minimum 512), t: threshold in negative dB, nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz, maxf0: maximim f0 frequency in Hz, f0et: error threshold in the f0 detection (ex: 5), maxhd: max. relative deviation in harmonic detection (ex: .2) returns y: output sound, yh: harmonic component, xr: residual component """ hN = N / 2 # size of positive spectrum hM1 = int(math.floor( (w.size + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(w.size / 2)) # half analysis window size by floor Ns = 512 # FFT size for synthesis (even) H = Ns / 4 # Hop size used for analysis and synthesis hNs = Ns / 2 pin = max(hNs, hM1) # initialize sound pointer in middle of analysis window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT yhw = np.zeros(Ns) # initialize output sound frame xrw = np.zeros(Ns) # initialize output sound frame yh = np.zeros(x.size) # initialize output array xr = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) ow = triang(2 * H) # overlapping window sw[hNs - H:hNs + H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window wr = bh # window for residual sw[hNs - H:hNs + H] = sw[hNs - H:hNs + H] / bh[hNs - H:hNs + H] hfreqp = [] f0t = 0 f0stable = 0 while pin < pend: #-----analysis----- x1 = x[pin - hM1:pin + hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # find peaks iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc / N # convert locations to Hz f0t = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 if ((f0stable==0)&(f0t>0)) \ or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 hfreq, hmag, hphase = HM.harmonicDetection(ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs) # find harmonics hfreqp = hfreq ri = pin - hNs - 1 # input sound pointer for residual analysis xw2 = x[ri:ri + Ns] * wr # window the input sound fftbuffer = np.zeros(Ns) # reset buffer fftbuffer[:hNs] = xw2[hNs:] # zero-phase window in fftbuffer fftbuffer[hNs:] = xw2[:hNs] X2 = fft( fftbuffer) # compute FFT of input signal for residual analysis #-----synthesis----- Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs) # generate sines Xr = X2 - Yh # get the residual complex spectrum fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Yh)) # inverse FFT of harmonic spectrum yhw[:hNs - 1] = fftbuffer[hNs + 1:] # undo zero-phase window yhw[hNs - 1:] = fftbuffer[:hNs + 1] fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Xr)) # inverse FFT of residual spectrum xrw[:hNs - 1] = fftbuffer[hNs + 1:] # undo zero-phase window xrw[hNs - 1:] = fftbuffer[:hNs + 1] yh[ri:ri + Ns] += sw * yhw # overlap-add for sines xr[ri:ri + Ns] += sw * xrw # overlap-add for residual pin += H # advance sound pointer y = yh + xr # sum of harmonic and residual components return y, yh, xr
def sineModel_MultiRes(x, fs, w1 , w2, w3, N1, N2, N3, t, B1, B2, B3): """ MultiResolution Analysis/synthesis of a sound using the sinusoidal model, without sine tracking x: input array sound, [w1,w2,w3]: 3 analysis windows, [N1,N2,N3]: 3 sizes of complex spectrum, t: threshold in negative dB, [B1,B2,B3]: 3 frequency bands returns y: output array sound """ h1M1 = int(math.floor((w1.size+1)/2)) # half analysis window 1 size by rounding h1M2 = int(math.floor(w1.size/2)) # half analysis window 1 size by floor h2M1 = int(math.floor((w2.size+1)/2)) # half analysis window 2 size by rounding h2M2 = int(math.floor(w2.size/2)) # half analysis window 2 size by floor h3M1 = int(math.floor((w3.size+1)/2)) # half analysis window 3 size by rounding h3M2 = int(math.floor(w3.size/2)) # half analysis window 3 size by floor Ns = 512 # FFT size for synthesis (even) H = Ns/4 # Hop size used for analysis and synthesis hNs = Ns/2 # half of synthesis FFT size pin = max(hNs, h1M1, h2M1, h3M1) # init sound pointer in middle of biggest anal window pend = x.size - pin # last sample to start a frame fftbuffer = np.zeros(max(N1,N2,N3)) # initialize buffer for FFT yw = np.zeros(Ns) # initialize output sound frame y = np.zeros(x.size) # initialize output array w1 = w1 / sum(w1) # normalize analysis window 1 w2 = w2 / sum(w2) # normalize analysis window 2 w3 = w3 / sum(w3) # normalize analysis window 3 sw = np.zeros(Ns) # initialize synthesis window ow = triang(2*H) # triangular window sw[hNs-H:hNs+H] = ow # add triangular window bh = blackmanharris(Ns) # blackmanharris window bh = bh / sum(bh) # normalized blackmanharris window sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] # normalized synthesis window while pin<pend: # while input sound pointer is within sound #-----analysis----- #same frames with different window sizes centered to pin. x1 = x[pin-h1M1:pin+h1M2] # select frame 1 x2 = x[pin-h2M1:pin+h2M2] # select frame 2 x3 = x[pin-h3M1:pin+h3M2] # select frame 3 mX1, pX1 = DFT.dftAnal(x1, w1, N1) # compute dft of frame 1 mX2, pX2 = DFT.dftAnal(x2, w2, N2) # compute dft of frame 2 mX3, pX3 = DFT.dftAnal(x3, w3, N3) # compute dft of frame 3 ploc1 = UF.peakDetection(mX1, t) # detect locations of peaks of frame 1 ploc2 = UF.peakDetection(mX2, t) # detect locations of peaks of frame 2 ploc3 = UF.peakDetection(mX3, t) # detect locations of peaks of frame 3 iploc1, ipmag1, ipphase1 = UF.peakInterp(mX1, pX1, ploc1) # refine peak values of frame 1 by interpolation iploc2, ipmag2, ipphase2 = UF.peakInterp(mX2, pX2, ploc2) # refine peak values of frame 2 by interpolation iploc3, ipmag3, ipphase3 = UF.peakInterp(mX3, pX3, ploc3) # refine peak values of frame 3 by interpolation ipfreq1 = fs*iploc1/float(N1) # convert peak locations of frame 1 to Hertz ipfreq2 = fs*iploc2/float(N2) # convert peak locations of frame 2 to Hertz ipfreq3 = fs*iploc3/float(N3) # convert peak locations of frame 3 to Hertz #constracting final arrays according to frequency bands. finalfreq = [] finalmag = [] finalphase = [] for i in range(ipfreq1.size): if (ipfreq1[i]>=0 and ipfreq1[i]<=B1): finalfreq.append(ipfreq1[i]) finalmag.append(ipmag1[i]) finalphase.append(ipphase1[i]) for i in range(ipfreq2.size): if (ipfreq2[i]>B1 and ipfreq2[i]<=B2): finalfreq.append(ipfreq2[i]) finalmag.append(ipmag2[i]) finalphase.append(ipphase2[i]) for i in range(ipfreq3.size): if (ipfreq3[i]>B2 and ipfreq3[i]<=B3): finalfreq.append(ipfreq3[i]) finalmag.append(ipmag3[i]) finalphase.append(ipphase3[i]) finalfreq = np.array(finalfreq) finalmag = np.array(finalmag) finalphase = np.array(finalphase) #-----synthesis----- Y = UF.genSpecSines(finalfreq, finalmag, finalphase, Ns, fs) # generate sines in the spectrum fftbuffer = np.real(ifft(Y)) # compute inverse FFT yw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window yw[hNs-1:] = fftbuffer[:hNs+1] y[pin-hNs:pin+hNs] += sw*yw # overlap-add and apply a synthesis window pin += H # advance sound pointer return y
def spsModel(x, fs, w, N, t, stocf): """ Analysis/synthesis of a sound using the sinusoidal plus stochastic model x: input sound, fs: sampling rate, w: analysis window, N: FFT size (minimum 512), t: threshold in negative dB, stocf: decimation factor of mag spectrum for stochastic analysis returns y: output sound, ys: sinusoidal component, yst: stochastic component """ hN = N // 2 # size of positive spectrum hM1 = int(math.floor( (w.size + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(w.size / 2)) # half analysis window size by floor Ns = 512 # FFT size for synthesis (even) H = Ns // 4 # Hop size used for analysis and synthesis hNs = Ns // 2 pin = max(hNs, hM1) # initialize sound pointer in middle of analysis window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT ysw = np.zeros(Ns) # initialize output sound frame ystw = np.zeros(Ns) # initialize output sound frame ys = np.zeros(x.size) # initialize output array yst = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) ow = triang(2 * H) # overlapping window sw[hNs - H:hNs + H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window wr = bh # window for residual sw[hNs - H:hNs + H] = sw[hNs - H:hNs + H] / bh[hNs - H:hNs + H] sws = H * hanning(Ns) / 2 # synthesis window for stochastic while pin < pend: #-----analysis----- x1 = x[pin - hM1:pin + hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # find peaks iploc, ipmag, ipphase = UF.peakInterp( mX, pX, ploc ) # refine peak values iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc / float(N) # convert peak locations to Hertz ri = pin - hNs - 1 # input sound pointer for residual analysis xw2 = x[ri:ri + Ns] * wr # window the input sound fftbuffer = np.zeros(Ns) # reset buffer fftbuffer[:hNs] = xw2[hNs:] # zero-phase window in fftbuffer fftbuffer[hNs:] = xw2[:hNs] X2 = fft(fftbuffer) # compute FFT for residual analysis #-----synthesis----- Ys = UF.genSpecSines(ipfreq, ipmag, ipphase, Ns, fs) # generate spec of sinusoidal component Xr = X2 - Ys # get the residual complex spectrum mXr = 20 * np.log10(abs(Xr[:hNs])) # magnitude spectrum of residual mXrenv = resample( np.maximum(-200, mXr), mXr.size * stocf) # decimate the magnitude spectrum and avoid -Inf stocEnv = resample(mXrenv, hNs) # interpolate to original size pYst = 2 * np.pi * np.random.rand(hNs) # generate phase random values Yst = np.zeros(Ns, dtype=complex) Yst[:hNs] = 10**(stocEnv / 20) * np.exp( 1j * pYst) # generate positive freq. Yst[hNs + 1:] = 10**(stocEnv[:0:-1] / 20) * np.exp( -1j * pYst[:0:-1]) # generate negative freq. fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Ys)) # inverse FFT of harmonic spectrum ysw[:hNs - 1] = fftbuffer[hNs + 1:] # undo zero-phase window ysw[hNs - 1:] = fftbuffer[:hNs + 1] fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Yst)) # inverse FFT of stochastic spectrum ystw[:hNs - 1] = fftbuffer[hNs + 1:] # undo zero-phase window ystw[hNs - 1:] = fftbuffer[:hNs + 1] ys[ri:ri + Ns] += sw * ysw # overlap-add for sines yst[ri:ri + Ns] += sws * ystw # overlap-add for stochastic pin += H # advance sound pointer y = ys + yst # sum of sinusoidal and residual components return y, ys, yst
def hpsModel(x, fs, w, N, t, nH, minf0, maxf0, f0et, stocf): """ Analysis/synthesis of a sound using the harmonic plus stochastic model, one frame at a time, no harmonic tracking x: input sound; fs: sampling rate, w: analysis window; N: FFT size (minimum 512), t: threshold in negative dB, nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz; maxf0: maximim f0 frequency in Hz, f0et: error threshold in the f0 detection (ex: 5); stocf: decimation factor of mag spectrum for stochastic analysis returns y: output sound, yh: harmonic component, yst: stochastic component """ hN = N / 2 # size of positive spectrum hM1 = int(math.floor( (w.size + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(w.size / 2)) # half analysis window size by floor Ns = 512 # FFT size for synthesis (even) H = Ns / 4 # Hop size used for analysis and synthesis hNs = Ns / 2 pin = max(hNs, hM1) # initialize sound pointer in middle of analysis window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT yhw = np.zeros(Ns) # initialize output sound frame ystw = np.zeros(Ns) # initialize output sound frame yh = np.zeros(x.size) # initialize output array yst = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) ow = triang(2 * H) # overlapping window sw[hNs - H:hNs + H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window wr = bh # window for residual sw[hNs - H:hNs + H] = sw[hNs - H:hNs + H] / bh[hNs - H:hNs + H] # synthesis window for harmonic component sws = H * hanning(Ns) / 2 # synthesis window for stochastic hfreqp = [] f0t = 0 f0stable = 0 while pin < pend: #-----analysis----- x1 = x[pin - hM1:pin + hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # find peaks iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc / N # convert peak locations to Hz f0t = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 if ((f0stable==0)&(f0t>0)) \ or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 hfreq, hmag, hphase = HM.harmonicDetection(ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs) # find harmonics hfreqp = hfreq ri = pin - hNs - 1 # input sound pointer for residual analysis xw2 = x[ri:ri + Ns] * wr # window the input sound fftbuffer = np.zeros(Ns) # reset buffer fftbuffer[:hNs] = xw2[hNs:] # zero-phase window in fftbuffer fftbuffer[hNs:] = xw2[:hNs] X2 = fft(fftbuffer) # compute FFT for residual analysis #-----synthesis----- Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs) # generate spec sines of harmonic component Xr = X2 - Yh # get the residual complex spectrum mXr = 20 * np.log10(abs(Xr[:hNs])) # magnitude spectrum of residual mXrenv = resample( np.maximum(-200, mXr), mXr.size * stocf) # decimate the magnitude spectrum and avoid -Inf stocEnv = resample(mXrenv, hNs) # interpolate to original size pYst = 2 * np.pi * np.random.rand(hNs) # generate phase random values Yst = np.zeros(Ns, dtype=complex) Yst[:hNs] = 10**(stocEnv / 20) * np.exp( 1j * pYst) # generate positive freq. Yst[hNs + 1:] = 10**(stocEnv[:0:-1] / 20) * np.exp( -1j * pYst[:0:-1]) # generate negative freq. fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Yh)) # inverse FFT of harmonic spectrum yhw[:hNs - 1] = fftbuffer[hNs + 1:] # undo zero-phase window yhw[hNs - 1:] = fftbuffer[:hNs + 1] fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Yst)) # inverse FFT of stochastic spectrum ystw[:hNs - 1] = fftbuffer[hNs + 1:] # undo zero-phase window ystw[hNs - 1:] = fftbuffer[:hNs + 1] yh[ri:ri + Ns] += sw * yhw # overlap-add for sines yst[ri:ri + Ns] += sws * ystw # overlap-add for stochastic pin += H # advance sound pointer y = yh + yst # sum of harmonic and stochastic components return y, yh, yst
def sineModelMultiRes_combined(x, fs, multi_w, multi_N, t, multi_B): """ Analysis/synthesis of a sound using the sinusoidal model, without sine tracking x: input array sound, w: analysis window, N: size of complex spectrum, t: threshold in negative dB returns y: output array sound """ # fallback for original code w = multi_w[0] N = multi_N[0] bands = range(len(multi_B)) # to iterate over bands #-orig----------------------------- hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor #-multi---------------------------- multi_w_size = np.array([multi_w[i].size for i in bands]) multi_hM1 = np.floor((multi_w_size + 1)/2.0).astype(int) # half analysis window size by rounding multi_hM2 = np.floor(multi_w_size / 2.0).astype(int) # half analysis window size by floor #---------------------------------- Ns = 512 # FFT size for synthesis (even) H = Ns/4 # Hop size used for analysis and synthesis hNs = Ns/2 # half of synthesis FFT size #-orig----------------------------- pin = max(hNs, hM1) # init sound pointer in middle of anal window pend = x.size - max(hNs, hM1) # last sample to start a frame #-multi---------------------------- multi_pin = np.maximum(hNs, multi_hM1) # init sound pointer in middle of anal window multi_pend = x.size - multi_pin # last sample to start a frame #---------------------------------- #-orig----------------------------- fftbuffer = np.zeros(N) # initialize buffer for FFT #-multi---------------------------- fftbuffer_combined = np.zeros(N) #multi_fftbuffer = [np.array(multi_N[i]) for i in bands] #---------------------------------- yw = np.zeros(Ns) # initialize output sound frame y = np.zeros(x.size) # initialize output array #-multi---------------------------- yw_combined = np.zeros(Ns) # initialize output sound frame y_combined = np.zeros(x.size) # initialize output array #-orig----------------------------- w = w / sum(w) # normalize analysis window #-multi---------------------------- multi_w = [multi_w[i] / sum(multi_w[i]) for i in bands] # normalize analysis window #---------------------------------- sw = np.zeros(Ns) # initialize synthesis window ow = triang(2*H) # triangular window sw[hNs-H:hNs+H] = ow # add triangular window bh = blackmanharris(Ns) # blackmanharris window bh = bh / sum(bh) # normalized blackmanharris window sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] # normalized synthesis window while pin<pend and (multi_pin<multi_pend).all(): # while input sound pointer is within sound #-----analysis----- #-orig----------------------------- x1 = x[pin-hM1:pin+hM2] # select frame #-multi---------------------------- multi_x1 = [x[(multi_pin[i] - multi_hM1[i]) : (multi_pin[i] + multi_hM2[i])] for i in bands] # select frame #---------------------------------- #-orig----------------------------- mX, pX = DFT.dftAnal(x1, w, N) # compute dft #-multi---------------------------- multi_mX = [] multi_pX = [] for i in bands: mXi, pXi = DFT.dftAnal(multi_x1[i], multi_w[i], multi_N[i]) multi_mX.append(mXi) multi_pX.append(pXi) #---------------------------------- # we could apply the filters for the bands here already ... #-orig----------------------------- ploc = UF.peakDetection(mX, t) # detect locations of peaks #pmag = mX[ploc] # get the magnitude of the peaks #-multi---------------------------- multi_ploc = [] #multi_pmag = [] for i in bands: ploci = UF.peakDetection(multi_mX[i], t) # detect locations of peaks #pmagi = multi_mX[i][ploci] # get the magnitude of the peaks multi_ploc.append(ploci) #multi_pmag.append(pmagi) #---------------------------------- #-orig----------------------------- iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values by interpolation ipfreq = fs*iploc/float(N) # convert peak locations to Hertz #-multi---------------------------- #multi_iploc = [] multi_ipmag = [] multi_ipphase = [] multi_ipfreq = [] for i in bands: iploci, ipmagi, ipphasei = UF.peakInterp(multi_mX[i], multi_pX[i], multi_ploc[i]) # refine peak values by interpolation ipfreqi = fs*iploci/float(multi_N[i]) # convert peak locations to Hertz #multi_iploc.append(iploci) multi_ipmag.append(ipmagi) multi_ipphase.append(ipphasei) multi_ipfreq.append(ipfreqi) #---------------------------------- # ... but we shall decide here! """ print "--------------------------------------" print ipfreq print ipmag print ipphase """ """ ipfreq_combined = [] ipmag_combined = [] ipphase_combined = [] for i in bands: for p in range(len(multi_ipfreq[i])): f = multi_ipfreq[i][p] if (i == 0 or f >= multi_B[i-1]) and f < multi_B[i]: ipfreq_combined.append(f) ipmag_combined.append(multi_ipmag[i][p]) ipphase_combined.append(multi_ipphase[i][p]) #ipfreq = np.array(ipfreq_combined) #ipmag = np.array(ipmag_combined) #ipphase = np.array(ipphase_combined) """ # count first for array allocation num_ip = 0 for i in bands: for p in range(len(multi_ipfreq[i])): f = multi_ipfreq[i][p] if (i == 0 or f >= multi_B[i-1]) and f < multi_B[i]: num_ip += 1 ipfreq_combined = np.zeros(num_ip) ipmag_combined = np.zeros(num_ip) ipphase_combined = np.zeros(num_ip) ip = 0 for i in bands: for p in range(len(multi_ipfreq[i])): f = multi_ipfreq[i][p] if (i == 0 or f >= multi_B[i-1]) and f < multi_B[i]: ipfreq_combined[ip] = f ipmag_combined[ip] = multi_ipmag[i][p] ipphase_combined[ip] = multi_ipphase[i][p] ip += 1 """ print "--------------------------------------" print ipfreq_combined print ipmag_combined print ipphase_combined """ #-----synthesis----- Y = UF.genSpecSines(ipfreq, ipmag, ipphase, Ns, fs) # generate sines in the spectrum fftbuffer = np.real(ifft(Y)) # compute inverse FFT yw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window yw[hNs-1:] = fftbuffer[:hNs+1] y[pin-hNs:pin+hNs] += sw*yw # overlap-add and apply a synthesis window #print y[pin-hNs:pin+hNs] pin += H # advance sound pointer Y_combined = UF.genSpecSines(ipfreq_combined, ipmag_combined, ipphase_combined, Ns, fs) # generate sines in the spectrum fftbuffer_combined = np.real(ifft(Y_combined)) # compute inverse FFT yw_combined[:hNs-1] = fftbuffer_combined[hNs+1:] # undo zero-phase window yw_combined[hNs-1:] = fftbuffer_combined[:hNs+1] y_combined[pin-hNs:pin+hNs] += sw*yw_combined # overlap-add and apply a synthesis window #print y_combined[pin-hNs:pin+hNs] multi_pin += H """ plt.figure(1) plt.plot(abs(Y)) plt.figure(2) plt.plot(abs(Y_combined)) plt.show() """ return y, y_combined
def sineModelMultiRes(x, fs, Ns, W, M, N, B, T): """ Analysis/synthesis of a sound using the multi-resolution sinusoidal model, without sine tracking x: input array sound, fs: sampling frequency, Ns: FFT size for synthesis, W: array of analysis window types, M: array of analysis windows sizes, N: array of sizes of complex spectrums, B: array of frequency bands separators (ascending order of frequency, number of bands == B.size + 1), T: array of peak detection thresholds in negative dB. returns y: output array sound """ nResolutions = W.size if (nResolutions != N.size) or (nResolutions != B.size + 1) or (nResolutions != T.size): raise ValueError('Parameters W,N,B,T shall have compatible sizes') H = Ns/4 # Hop size used for analysis and synthesis hNs = Ns/2 # half of synthesis FFT size yw = np.zeros(Ns) # initialize output sound frame y = np.zeros(x.size) # initialize output array sw = np.zeros(Ns) # initialize synthesis window ow = triang(2*H) # triangular window sw[hNs-H:hNs+H] = ow # add triangular window bh = blackmanharris(Ns) # blackmanharris window bh = bh / sum(bh) # normalized blackmanharris window sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] # normalized synthesis window HM1 = map(lambda m: math.floor((m+1)/2),M) # half analysis windows sizes by rounding HM2 = map(lambda m: math.floor( m /2),M) # half analysis windows sizes by floor maxHM1 = max(HM1) # max half analysis window size by rounding pin = max(hNs, maxHM1) # init sound pointers in the middle of largest window pend = x.size - pin # last samples to start a frame while pin < pend: # while input sound pointer is within sound combinedIPFreq = np.array([]) combinedIPMag = np.array([]) combinedIPhase = np.array([]) windowSizeAttribution = np.array([]) #-----multi-resolution spectrum calculation----- for k in range(0,nResolutions): windowType = W[k] windowSize = M[k] w = get_window(windowType,windowSize) # normalize analysis window w = w / sum(w) n = N[k] t = T[k] hM1 = HM1[k] # half analysis window size by rounding hM2 = HM2[k] # half analysis window size by floor #-----analysis----- x1 = x[pin-hM1:pin+hM2] # select frame mX, pX = DFT.dftAnal(x1, w, n) # compute dft ploc = UF.peakDetection(mX, t) # detect locations of peaks iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values by interpolation ipfreq = fs*iploc/float(n) # convert peak locations to Hertz if k == 0: # First frequency range starts from zero f0 = 0.0 else: f0 = B[k-1] if k == B.size: # Last frequency range ends at fs/2 f1 = fs / 2.0 else: f1 = B[k] for l in range(0,ipfreq.size): # Pick the peaks (no pun intended:) inside the assigned frequency band f = ipfreq[l] if f0 <= f and f < f1: combinedIPFreq = np.append(combinedIPFreq, f) combinedIPMag = np.append(combinedIPMag , ipmag [l]) combinedIPhase = np.append(combinedIPhase, ipphase[l]) windowSizeAttribution = np.append(windowSizeAttribution, windowSize) # Let's smooth out "double-reported" peaks close to the division frequencies of the frequency ranges freqDiffThreshold = (fs*6)/float(n) smoothedIPFreq = np.array([]) smoothedIPMag = np.array([]) smoothedIPhase = np.array([]) nPeaks = combinedIPFreq.size l = 0 while l < (nPeaks-1): f1 = combinedIPFreq[l] f2 = combinedIPFreq[l+1] m1 = windowSizeAttribution[l] m2 = windowSizeAttribution[l+1] freqDiff = abs(f1-f2) if freqDiff < freqDiffThreshold and m1 != m2: #print '!',f1,f2,m1,m2,freqDiff smoothedIPFreq = np.append(smoothedIPFreq, (f1+f2)/2.0) smoothedIPMag = np.append(smoothedIPMag , (combinedIPMag [l] + combinedIPMag [l+1])/2.0) smoothedIPhase = np.append(smoothedIPhase, (combinedIPhase[l] + combinedIPhase[l+1])/2.0) l = l + 2 else: smoothedIPFreq = np.append(smoothedIPFreq, f1) smoothedIPMag = np.append(smoothedIPMag , combinedIPMag [l]) smoothedIPhase = np.append(smoothedIPhase, combinedIPhase[l]) l = l + 1 # Add the last peak smoothedIPFreq = np.append(smoothedIPFreq,combinedIPFreq[nPeaks-1]) smoothedIPMag = np.append(smoothedIPMag ,combinedIPMag [nPeaks-1]) smoothedIPhase = np.append(smoothedIPhase,combinedIPhase[nPeaks-1]) #-----synthesis----- Y = UF.genSpecSines(smoothedIPFreq, smoothedIPMag, smoothedIPhase, Ns, fs) # generate sines in the spectrum fftbuffer = np.real(ifft(Y)) # compute inverse FFT yw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window yw[hNs-1:] = fftbuffer[:hNs+1] y[pin-hNs:pin+hNs] += sw*yw # overlap-add and apply a synthesis window pin += H # advance sound pointer return y
def hprModel(x, fs, w, N, t, nH, minf0, maxf0, f0et): """ Analysis/synthesis of a sound using the harmonic plus residual model x: input sound, fs: sampling rate, w: analysis window, N: FFT size (minimum 512), t: threshold in negative dB, nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz, maxf0: maximim f0 frequency in Hz, f0et: error threshold in the f0 detection (ex: 5), maxhd: max. relative deviation in harmonic detection (ex: .2) returns y: output sound, yh: harmonic component, xr: residual component """ hN = N/2 # size of positive spectrum hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor Ns = 512 # FFT size for synthesis (even) H = Ns/4 # Hop size used for analysis and synthesis hNs = Ns/2 pin = max(hNs, hM1) # initialize sound pointer in middle of analysis window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT yhw = np.zeros(Ns) # initialize output sound frame xrw = np.zeros(Ns) # initialize output sound frame yh = np.zeros(x.size) # initialize output array xr = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) ow = triang(2*H) # overlapping window sw[hNs-H:hNs+H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window wr = bh # window for residual sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] hfreqp = [] f0t = 0 f0stable = 0 while pin<pend: #-----analysis----- x1 = x[pin-hM1:pin+hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # find peaks iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc/N # convert locations to Hz f0t = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 if ((f0stable==0)&(f0t>0)) \ or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 hfreq, hmag, hphase = HM.harmonicDetection(ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs) # find harmonics hfreqp = hfreq ri = pin-hNs-1 # input sound pointer for residual analysis xw2 = x[ri:ri+Ns]*wr # window the input sound fftbuffer = np.zeros(Ns) # reset buffer fftbuffer[:hNs] = xw2[hNs:] # zero-phase window in fftbuffer fftbuffer[hNs:] = xw2[:hNs] X2 = fft(fftbuffer) # compute FFT of input signal for residual analysis #-----synthesis----- Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs) # generate sines Xr = X2-Yh # get the residual complex spectrum fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Yh)) # inverse FFT of harmonic spectrum yhw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window yhw[hNs-1:] = fftbuffer[:hNs+1] fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Xr)) # inverse FFT of residual spectrum xrw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window xrw[hNs-1:] = fftbuffer[:hNs+1] yh[ri:ri+Ns] += sw*yhw # overlap-add for sines xr[ri:ri+Ns] += sw*xrw # overlap-add for residual pin += H # advance sound pointer y = yh+xr # sum of harmonic and residual components return y, yh, xr
import stft as STFT import sineModel as SM import utilFunctions as UF Ns = 256 hNs = Ns // 2 yw = np.zeros(Ns) fs = 44100 freqs = np.array([1000.0, 4000.0, 8000.0]) amps = np.array([.6, .4, .6]) phases = ([0.5, 1.2, 2.3]) yploc = Ns * freqs / fs ypmag = 20 * np.log10(amps / 2.0) ypphase = phases Y = UF.genSpecSines(freqs, ypmag, ypphase, Ns, fs) mY = 20 * np.log10(abs(Y[:hNs])) pY = np.unwrap(np.angle(Y[:hNs])) y = fftshift(ifft(Y)) * sum(blackmanharris(Ns)) plt.figure(1, figsize=(9, 5)) plt.subplot(3, 1, 1) plt.plot(fs * np.arange(Ns / 2) / Ns, mY, 'r', lw=1.5) plt.axis([0, fs / 2.0, -100, 0]) plt.title("mY, freqs (Hz) = 1000, 4000, 8000; amps = .6, .4, .6") plt.subplot(3, 1, 2) pY[pY == 0] = np.nan plt.plot(fs * np.arange(Ns / 2) / Ns, pY, 'c', lw=1.5) plt.axis([0, fs / 2.0, -.01, 3.0]) plt.title("pY, phases (radians) = .5, 1.2, 2.3")
def sineModelMultiRes(x, f, windows, Nsizes, fBands, t): ''' For each audio frame 1. compute three different DFTs with three different window sizes (which are input parameters) 2. compute the sinusoid peaks for each of the DFTs. 3. choose the peaks from these three DFTs depending on the band to which the frequency of the peak belongs x: input array sound, windows: analysis windows [w1, w2, w3] Nsizes: FFT sizes[N1, N2, N3], t: threshold in negative dB fBands: frequency band edges [B1, B2, B3] ''' # raise error if N not a power of two if not all(UF.isPower2(item) for item in Ns): raise ValueError("All FFT size (N) must be a power of 2") # raise error if window size bigger than fft size if (w.size > N): raise ValueError("Window size (M) is bigger than FFT size") # Check if all sizes in Window are ints if not all(isinstance(item, int) for item in windows): raise ValueError("All window sizes must be ints") # Check if all FFT sizes are ints if not all(isinstance(item, int) for item in Ns): raise ValueError("All FFT sizes must be ints") # Check if all freq bands are ints if not all(isinstance(item, int) for item in Ns): raise ValueError("All freq Bands must be ints") # global synthsis params Ns = 512 # FFT size for synthesis (even) H = Ns//4 # Hop size used for analysis and synthesis hNs = Ns//2 # half of synthesis FFT size yw = np.zeros(Ns) # initialize output sound frame y = np.zeros(x.size) # initialize output array sw = np.zeros(Ns) # initialize synthesis window ow = triang(2*H) # triangular window sw[hNs-H:hNs+H] = ow # add triangular window bh = blackmanharris(Ns) # blackmanharris window bh = bh / sum(bh) # normalized blackmanharris window sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] # normalized synthesis window for i in range(3): w = windows[i] N = Nsizes[i] Bmin, Bmax = fBands[i] hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) pin = max(hNs, hM1) # init sound pointer in middle of anal window pend = x.size - max(hNs, hM1) # last sample to start a frame w = w / sum(w) # normalize analysis window while pin<pend: # while input sound pointer is within sound #-----analysis----- x1 = x[pin-hM1:pin+hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # detect locations of peaks iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values by interpolation ipfreq = fs*iploc/float(N) # convert peak locations to Hertz # Filter indexes within the frequency bands Bmin and Bmax ipmag = ipmag[np.logical_and(ipfreq >= Bmin, ipfreq < Bmax)] ipphase = ipphase[np.logical_and(ipfreq >= Bmin, ipfreq < Bmax)] ipfreq = ipfreq[np.logical_and(ipfreq >= Bmin, ipfreq < Bmax)] #-----synthesis----- Y = UF.genSpecSines(ipfreq, ipmag, ipphase, Ns, fs) # generate sines in the spectrum fftbuffer = np.real(ifft(Y)) # compute inverse FFT yw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window yw[hNs-1:] = fftbuffer[:hNs+1] y[pin-hNs:pin+hNs] += sw*yw # overlap-add and apply a synthesis window pin += H
def loudnessHarmonics (fileName, dumpFile = False): eps = np.finfo(np.float).eps path2SmsTools = '../../sms-tools-master' path2Models = os.path.join(path2SmsTools, 'software/models') sys.path.append(path2Models) import utilFunctions as UF import harmonicModel as HM import dftModel as DFT # Computing predominant melody H = 128 M = 2048 fs = 44100 guessUnvoiced = True MELODIA = ess.PredominantMelody(guessUnvoiced=guessUnvoiced, frameSize=M, hopSize=H) audio = ess.MonoLoader(filename = fileName, sampleRate=fs) () audioEL = ess.EqualLoudness() (audio) pitch = MELODIA(audioEL)[0] # Computing loudness including harmonics LOUDNESS = ess.Loudness() winAnalysis = 'hann' t = -80 harmLoudness = [] ## Synthesis nH = 15 f0et = 5 x = audioEL w = get_window(winAnalysis, M) hM1 = int(math.floor(w.size+1)/2) hM2 = int(math.floor(w.size/2)) Ns = 4*H hNs = Ns/2 startApp = max(hNs, hM1) pin = startApp pend = x.size - startApp x = np.append(np.zeros(startApp), x) x = np.append(x, np.zeros(startApp)) N = 2 * M fftbuffer = np.zeros(N) yh = np.zeros(Ns) y = np.zeros(x.size) w = w / sum(w) sw = np.zeros(Ns) ow = triang(2 * H) sw[hNs-H:hNs+H] = ow bh = blackmanharris(Ns) bh = bh / sum(bh) sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] hfreqp = [] f0t = 0 f0stable = 0 cnt = 0 while pin < pend: x1 = x[pin-hM1:pin+hM2] mX, pX = DFT.dftAnal(x1, w, N) ploc = UF.peakDetection(mX, t) iploc, ipmag, ipphase = UF. peakInterp(mX, pX, ploc) ipfreq = fs * iploc/N f0t = pitch[cnt] if ((f0stable == 0) & (f0t>0) or ((f0stable>0) & (np.abs(f0stable-f0t)<f0stable/5.0))): f0stable = f0t else: f0stable = 0 hfreq, hmag, hphase = HM.harmonicDetection(ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs) hfreqp = hfreq Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs) fftbuffer = np.real(ifft(Yh)) yh[:hNs-1] = fftbuffer[hNs+1:] yh[hNs-1:] = fftbuffer[:hNs+1] yh_frame = sw*yh y[pin-hNs:pin+hNs] += yh_frame pin += H cnt+=1 harmLoudness.append(LOUDNESS(yh_frame.tolist())) harmLoudness = np.array(harmLoudness) timeStamps = np.arange(harmLoudness.size) * H / float(fs) # Plotting # plt.plot(timeStamps, harmLoudness, color = 'b', linewidth=1) # plt.xlabel('Time (s)') # plt.ylabel('Amplitude') # plt.show() loudnessData = np.column_stack((timeStamps, harmLoudness)) # Dumping a csv file if dumpFile: np.savetxt(fileName[:-4] + '-loudnessHarmonics.csv', loudnessData, delimiter=',') return loudnessData
def hpsModel(x, fs, w, N, t, nH, minf0, maxf0, f0et, stocf): """ Analysis/synthesis of a sound using the harmonic plus stochastic model, one frame at a time, no harmonic tracking x: input sound; fs: sampling rate, w: analysis window; N: FFT size (minimum 512), t: threshold in negative dB, nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz; maxf0: maximim f0 frequency in Hz, f0et: error threshold in the f0 detection (ex: 5); stocf: decimation factor of mag spectrum for stochastic analysis returns y: output sound, yh: harmonic component, yst: stochastic component """ hN = N/2 # size of positive spectrum hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor Ns = 512 # FFT size for synthesis (even) H = Ns/4 # Hop size used for analysis and synthesis hNs = Ns/2 pin = max(hNs, hM1) # initialize sound pointer in middle of analysis window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT yhw = np.zeros(Ns) # initialize output sound frame ystw = np.zeros(Ns) # initialize output sound frame yh = np.zeros(x.size) # initialize output array yst = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) ow = triang(2*H) # overlapping window sw[hNs-H:hNs+H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window wr = bh # window for residual sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] # synthesis window for harmonic component sws = H*hanning(Ns)/2 # synthesis window for stochastic hfreqp = [] f0t = 0 f0stable = 0 while pin<pend: #-----analysis----- x1 = x[pin-hM1:pin+hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # find peaks iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc/N # convert peak locations to Hz f0t = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 if ((f0stable==0)&(f0t>0)) \ or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 hfreq, hmag, hphase = HM.harmonicDetection(ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs) # find harmonics hfreqp = hfreq ri = pin-hNs-1 # input sound pointer for residual analysis xw2 = x[ri:ri+Ns]*wr # window the input sound fftbuffer = np.zeros(Ns) # reset buffer fftbuffer[:hNs] = xw2[hNs:] # zero-phase window in fftbuffer fftbuffer[hNs:] = xw2[:hNs] X2 = fft(fftbuffer) # compute FFT for residual analysis #-----synthesis----- Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs) # generate spec sines of harmonic component Xr = X2-Yh # get the residual complex spectrum mXr = 20 * np.log10(abs(Xr[:hNs])) # magnitude spectrum of residual mXrenv = resample(np.maximum(-200, mXr), mXr.size*stocf) # decimate the magnitude spectrum and avoid -Inf stocEnv = resample(mXrenv, hNs) # interpolate to original size pYst = 2*np.pi*np.random.rand(hNs) # generate phase random values Yst = np.zeros(Ns, dtype = complex) Yst[:hNs] = 10**(stocEnv/20) * np.exp(1j*pYst) # generate positive freq. Yst[hNs+1:] = 10**(stocEnv[:0:-1]/20) * np.exp(-1j*pYst[:0:-1]) # generate negative freq. fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Yh)) # inverse FFT of harmonic spectrum yhw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window yhw[hNs-1:] = fftbuffer[:hNs+1] fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Yst)) # inverse FFT of stochastic spectrum ystw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window ystw[hNs-1:] = fftbuffer[:hNs+1] yh[ri:ri+Ns] += sw*yhw # overlap-add for sines yst[ri:ri+Ns] += sws*ystw # overlap-add for stochastic pin += H # advance sound pointer y = yh+yst # sum of harmonic and stochastic components return y, yh, yst
def synthesis(self, harmonicsOutputFilename = None, residualOutputFilename = None): if len(self.pitch) == 0: print 'please do getMelody at first, then do saveMelody.' return if len(self.pitch) < len(self.mX): print 'please make sure that the pitch track belongs to the loaded audio, and they are equal length.' return if len(self.pitch) > len(self.mX): print 'pitch track has more frames than audio file, we will cut frames surplus in pitch track.' self.pitch = self.pitch[:len(self.mX)] if harmonicsOutputFilename == None: harmonicsOutputFilename = self.inputFilename[:-4] + '-harmonics.wav' if residualOutputFilename == None: residualOutputFilename = self.inputFilename[:-4] + '-residual.wav' #----- synthesis code----- H = self.hopSize M = self.frameSize N = 2*self.frameSize fs = self.fs t = -60 # threshold peak detection devRatio = 10 nH = 15 x = self.audio winAnalysis = 'hann' w = get_window(winAnalysis, M) hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor Ns = 4*H # FFT size for synthesis (even) hNs = Ns/2 startApp = max(hNs, hM1) # init sound pointer in middle of anal window pin = startApp pend = x.size - startApp # last sample to start a frame x = np.append(np.zeros(startApp),x) # add zeros at beginning to center first window at sample 0 x = np.append(x,np.zeros(startApp)) # add zeros at the end to analyze last sample fftbuffer = np.zeros(N) # initialize buffer for FFT yh = np.zeros(Ns) # initialize output sound frame yho = np.zeros(x.size) # initialize output array harmonics xr = np.zeros(Ns) # initialize output sound frame xro = np.zeros(x.size) # initialize output array residual w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) # initialize synthesis window ow = triang(2*H) # overlapping window sw[hNs-H:hNs+H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] # window for overlap-add hfreqp = [] f0t = 0 f0stable = 0 cnt = 0 print 'synthesizing ... ...' while pin<pend: #-----analysis----- x1 = x[pin-hM1:pin+hM2] # select frame x2 = x[pin-hNs-1:pin+hNs-1] mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # detect peak locations iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc/N f0t = self.pitch[cnt] if ((f0stable==0)&(f0t>0)) \ or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 hfreq, hmag, hphase = HM.harmonicDetection(ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs, devRatio) # find harmonics hfreqp = hfreq #-----synthesis----- #-----harmonics----- Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs) # generate spec sines fftbuffer = np.real(ifft(Yh)) # inverse FFT yh[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window yh[hNs-1:] = fftbuffer[:hNs+1] yho[pin-hNs:pin+hNs] += sw*yh # overlap-add #-----residual----- X2 = fft(fftshift(x2*bh)) Xr = X2 - Yh fftbuffer = np.real(ifft(Xr)) # inverse FFT xr[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window xr[hNs-1:] = fftbuffer[:hNs+1] xro[pin-hNs:pin+hNs] += sw*xr # overlap-add pin += H # advance sound pointer cnt+=1 yho = np.delete(yho, range(startApp)) # delete half of first window which was added in stftAnal yho = np.delete(yho, range(yho.size-startApp, yho.size)) # add zeros at the end to analyze last sample xro = np.delete(xro, range(startApp)) # delete half of first window which was added in stftAnal xro = np.delete(xro, range(xro.size-startApp, xro.size)) # add zeros at the end to analyze last sample UF.wavwrite(yho, fs, harmonicsOutputFilename) UF.wavwrite(xro, fs, residualOutputFilename) print('synthesis done, harmonics file is saved at :' + harmonicsOutputFilename + '\n' + 'residual file is saved at :' + residualOutputFilename + '\n') self.yho = yho self.xro = xro return (yho, xro)
def sineModelMultiRes(x, fs, w1, w2, w3, N1, N2, N3, t, B1, B2, B3): if (B3 > (44100 / 2)): B3 = 22050 Ns = 512 # FFT size for synthesis (even) H = Ns/4 # Hop size used for analysis and synthesis hNs = Ns/2 # half of synthesis FFT size # ------ The first analysis values for the frequency band #1 (B1) -------- hM11 = int(math.floor((w1.size+1)/2)) # half analysis window size by rounding hM12 = int(math.floor(w1.size/2)) # half analysis window size by floor pin1 = max(hNs, hM11) # init sound pointer in middle of anal window print "\nThe analysis parameters for the band #1:" print "N1" print N1 print "Half analysis window size by rounding (hM11):" print hM11 print "pin 1:" print pin1 pend1 = x.size - max(hNs, hM11) # last sample to start a frame print "pend 1:" print pend1 fftbuffer = np.zeros(N1) # initialize buffer for FFT w1 = w1 / sum(w1) # normalize analysis window # ------ The second analysis for the frequency band #2 (B2) -------- hM21 = int(math.floor((w2.size+1)/2)) # half analysis window size by rounding hM22 = int(math.floor(w2.size/2)) # half analysis window size by floor pin2 = max(hNs, hM21) print "\nThe analysis parameters for the band #2:" print "N2" print N2 print "Half analysis window size by rounding (hM21):" print hM21 print "pin 2:" print pin2 pend2 = x.size - max(hNs, hM21) # last sample to start a frame print "pend 2:" print pend2 fftbuffer = np.zeros(N2) # initialize buffer for FFT w2 = w2 / sum(w2) # normalize analysis window # ------ The third analysis for the frequency band #3 (B3) -------- hM31 = int(math.floor((w3.size+1)/2)) # half analysis window size by rounding hM32 = int(math.floor(w3.size/2)) # half analysis window size by floor pin3 = max(hNs, hM31) # init sound pointer in middle of anal window print "\nThe analysis parameters for the band #3:" print "N3" print N3 print "Half analysis window size by rounding (hM31):" print hM31 print "pin 3:" print pin3 pend3 = x.size - max(hNs, hM31) # last sample to start a frame print "pend 3:" print pend3 print "\n" fftbuffer = np.zeros(N3) # initialize buffer for FFT w3 = w3 / sum(w3) # normalize analysis window #----- The synthesis parameters ----- hM1 = hM11 # half analysis window size by rounding hM2 = hM12 # half analysis window size by floor print "hM1, hM2:" print hM1, hM2 pin = max(hNs, hM1) # init sound pointer in middle of anal window pend = x.size - max(hNs, hM1) # last sample to start a frame print "pin, pend:" print pin print pend yw = np.zeros(Ns) # initialize output sound frame y = np.zeros(x.size) # initialize output array sw = np.zeros(Ns) # initialize synthesis window ow = triang(2*H) # triangular window sw[hNs-H:hNs+H] = ow # add triangular window bh = blackmanharris(Ns) # blackmanharris window bh = bh / sum(bh) # normalized blackmanharris window sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] # normalized synthesis window #------- Analyze the sound "x" in the 3 subfrequency bands and synthesize the sound "y" ----------- frame = 0 while pin1<pend1: # while input sound pointer is within sound #print "The current frame is: " #print frame xt1 = x.copy() xt2 = x.copy() xt3 = x.copy() pin2 = pin1 pin3 = pin1 x1 = xt1[pin1-hM11:pin1+hM12] # select frame mX1, pX1 = DFT.dftAnal(x1, w1, N1) # compute dft ploc1 = UF.peakDetection(mX1, t) # detect locations of peaks #pmag = mX[ploc] # get the magnitude of the peaks iploc1, ipmag1, ipphase1 = UF.peakInterp(mX1, pX1, ploc1)# refine peak values by interpolation ipfreq1 = fs*iploc1/float(N1) # convert peak locations to Hertz x2 = xt2[pin2-hM21:pin2+hM22] # select frame #x2 = x1 mX2, pX2 = DFT.dftAnal(x2, w2, N2) # compute dft ploc2 = UF.peakDetection(mX2, t) # detect locations of peaks #pmag = mX[ploc] # get the magnitude of the peaks iploc2, ipmag2, ipphase2 = UF.peakInterp(mX2, pX2, ploc2)# refine peak values by interpolation ipfreq2 = fs*iploc2/float(N2) # convert peak locations to Hertz x3 = xt3[pin3-hM31:pin3+hM32] # select frame mX3, pX3 = DFT.dftAnal(x3, w3, N3) # compute dft ploc3 = UF.peakDetection(mX3, t) # detect locations of peaks #pmag = mX[ploc] # get the magnitude of the peaks iploc3, ipmag3, ipphase3 = UF.peakInterp(mX3, pX3, ploc3)# refine peak values by interpolation ipfreq3 = fs*iploc3/float(N3) # convert peak locations to Hertz ipfreq, ipmag, ipphase = selectPeaks(ipfreq1, ipmag1, ipphase1, ipfreq2, ipmag2, ipphase2, ipfreq3, ipmag3, ipphase3, B1, B2, B3) #print "Synthesis window size:" #print M Y = UF.genSpecSines(ipfreq, ipmag, ipphase, Ns, fs) # generate sines in the spectrum fftbuffer = np.real(ifft(Y)) # compute inverse FFT yw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window yw[hNs-1:] = fftbuffer[:hNs+1] y[pin1-hNs:pin1+hNs] += sw*yw # overlap-add and apply a synthesis window pin1 += H # advance sound pointer frame += 1 print "\n\n----- STATISTICS FOR THE LAST FRAME (for debug purposes, etc) -----\n" print "The last frame frequencies values:" print ipfreq1 print ipfreq2 print ipfreq3 print "The last frame magnitudes values:" print ipmag1 print ipmag2 print ipmag3 print "The last frame phases values:" print ipphase1 print ipphase2 print ipphase3 """ print "mX1:" print mX1[0:64] print "mX2:" print mX2[0:64] print "mX3:" print mX3[0:64] """ print "\n" print "Synthesizing the following frequencies for the last frame:" print ipfreq print "Synthesizing the following magnitudes for the last frame:" print ipmag print "Synthesizing the following phases for the last frame:" print ipphase print "\n" print "Total frames analyzed/synthesized:" print frame print "\n\n" UF.wavwrite(y, fs, "./OutFile(sineModel_MultiresAnalysis).wav") #UF.wavwrite(y, fs, "c:/OutFile(sineModel_MultiresAnalysis).wav") return y
def spsModel(x, fs, w, N, t, stocf): # Analysis/synthesis of a sound using the sinusoidal plus stochastic model # x: input sound, fs: sampling rate, w: analysis window, # N: FFT size (minimum 512), t: threshold in negative dB, # stocf: decimation factor of mag spectrum for stochastic analysis # returns y: output sound, ys: sinusoidal component, yst: stochastic component hN = N/2 # size of positive spectrum hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor Ns = 512 # FFT size for synthesis (even) H = Ns/4 # Hop size used for analysis and synthesis hNs = Ns/2 pin = max(hNs, hM1) # initialize sound pointer in middle of analysis window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT ysw = np.zeros(Ns) # initialize output sound frame ystw = np.zeros(Ns) # initialize output sound frame ys = np.zeros(x.size) # initialize output array yst = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) ow = triang(2*H) # overlapping window sw[hNs-H:hNs+H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window wr = bh # window for residual sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] while pin<pend: #-----analysis----- xw = x[pin-hM1:pin+hM2] * w # window the input sound fftbuffer = np.zeros(N) # reset buffer fftbuffer[:hM1] = xw[hM2:] # zero-phase window in fftbuffer fftbuffer[N-hM2:] = xw[:hM2] X = fft(fftbuffer) # compute FFT mX = 20 * np.log10(abs(X[:hN])) # magnitude spectrum of positive frequencies ploc = UF.peakDetection(mX, hN, t) pX = np.unwrap(np.angle(X[:hN])) # unwrapped phase spect. of positive freq. iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values iploc = (iploc!=0) * (iploc*Ns/N) # synth. locs ri = pin-hNs-1 # input sound pointer for residual analysis xr = x[ri:ri+Ns]*wr # window the input sound fftbuffer = np.zeros(Ns) # reset buffer fftbuffer[:hNs] = xr[hNs:] # zero-phase window in fftbuffer fftbuffer[hNs:] = xr[:hNs] Xr = fft(fftbuffer) # compute FFT for residual analysis #-----synthesis----- Ys = UF.genSpecSines(fs*iploc/N, ipmag, ipphase, Ns, fs) # generate spec of sinusoidal component Yr = Xr-Ys; # get the residual complex spectrum mYr = 20 * np.log10( abs(Yr[:hNs]) ) # magnitude spectrum of residual mYrenv = resample(np.maximum(-200, mYr), mYr.size*stocf) # decimate the magnitude spectrum and avoid -Inf mYst = resample(mYrenv, hNs) # interpolate to original size mYst = 10**(mYst/20) # dB to linear magnitude fc = 1+round(500.0/fs*Ns) # 500 Hz to bin location mYst[:fc] *= (np.arange(0, fc)/(fc-1))**2 # high pass filter the stochastic component pYst = 2*np.pi*np.random.rand(hNs) # generate phase random values Yst = np.zeros(Ns, dtype = complex) Yst[:hNs] = mYst * np.exp(1j*pYst) # generate positive freq. Yst[hNs+1:] = mYst[:0:-1] * np.exp(-1j*pYst[:0:-1]) # generate negative freq. fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Ys)) # inverse FFT of sinusoidal spectrum ysw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window ysw[hNs-1:] = fftbuffer[:hNs+1] fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Yst)) # inverse FFT of residual spectrum ystw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window ystw[hNs-1:] = fftbuffer[:hNs+1] ys[ri:ri+Ns] += sw*ysw # overlap-add for sines yst[ri:ri+Ns] += sw*ystw # overlap-add for residual pin += H # advance sound pointer y = ys+yst # sum of sinusoidal and residual components return y, ys, yst
(fs, x) = UF.wavread("../../../sounds/oboe-A4.wav") M = 601 w = np.blackman(M) N = 1024 hN = N / 2 Ns = 512 hNs = Ns / 2 pin = 5000 t = -70 x1 = x[pin : pin + w.size] mX, pX = DFT.dftAnal(x1, w, N) ploc = UF.peakDetection(mX, t) iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) freqs = iploc * fs / N Y = UF.genSpecSines(freqs, ipmag, ipphase, Ns, fs) mY = 20 * np.log10(abs(Y[:hNs])) pY = np.unwrap(np.angle(Y[:hNs])) y = fftshift(ifft(Y)) * sum(blackmanharris(Ns)) plt.figure(1, figsize=(9, 6)) plt.subplot(4, 1, 1) plt.plot(np.arange(-M / 2, M / 2), x1, "b", lw=1.5) plt.axis([-M / 2, M / 2, min(x1), max(x1)]) plt.title("x (oboe-A4.wav), M = 601") plt.subplot(4, 1, 2) plt.plot(np.arange(mX.size), mX, "r", lw=1.5) plt.plot(iploc, ipmag, marker="x", color="b", linestyle="", markeredgewidth=1.5) plt.axis([0, hN, -90, max(mX) + 2])
def sineModelMultiRes(x, fs, w, N, t, b): """ Analysis/synthesis of a sound using the sinusoidal model, without sine tracking x: input array sound, w: array of analysis windows, N: array of sizes of complex spectrum, t: threshold in negative dB, b: array of bandwidths' right borders returns y: output array sound """ winParams = [] Ns = 512 # FFT size for synthesis (even) H = Ns/4 # Hop size used for analysis and synthesis hNs = Ns/2 # half of synthesis FFT size yw = np.zeros(Ns) # initialize output sound frame y = np.zeros(x.size) # initialize output array sw = np.zeros(Ns) # initialize synthesis window ow = triang(2*H) # triangular window sw[hNs-H:hNs+H] = ow # add triangular window bh = blackmanharris(Ns) # blackmanharris window bh = bh / sum(bh) # normalized blackmanharris window sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] # normalized synthesis window pin = 0 pend = x.size - 1 for winNum in xrange(len(w)): win = w[winNum] fftN = N[winNum] bandFreq = b[winNum] hM1 = int(math.floor((win.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(win.size/2)) # half analysis window size by floor pin = max(pin, max(hNs, hM1)) # init sound pointer in middle of anal window pend = min(pend, x.size - pin) # last sample to start a frame win = win / sum(win) # normalize analysis window winParams.append({'hM1': hM1, 'hM2': hM2, 'w': win, 'N': fftN, 'b': bandFreq}) while pin<pend: # while input sound pointer is within sound #-----analysis----- prevFreq = 0.0 ipfreq = [] ipmag = [] ipphase = [] for wp in winParams: x1 = x[pin-wp['hM1']:pin+wp['hM2']] # select frame mX, pX = DFT.dftAnal(x1, wp['w'], wp['N']) # compute dft plocw = UF.peakDetection(mX, t) # detect locations of peaks iplocw, ipmagw, ipphasew = UF.peakInterp(mX, pX, plocw) # refine peak values by interpolation ipfreqw = fs*iplocw/float(wp['N']) # convert peak locations to Hertz for fNum in xrange(len(ipfreqw)): if ipfreqw[fNum] < prevFreq: continue if ipfreqw[fNum] >= wp['b']: break ipfreq.append(ipfreqw[fNum]) ipmag.append(ipmagw[fNum]) ipphase.append(ipphasew[fNum]) prevFreq = wp['b'] ipfreq = np.array(ipfreq) ipmag = np.array(ipmag) ipphase = np.array(ipphase) #-----synthesis----- Y = UF.genSpecSines(ipfreq, ipmag, ipphase, Ns, fs) # generate sines in the spectrum fftbuffer = np.real(ifft(Y)) # compute inverse FFT yw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window yw[hNs-1:] = fftbuffer[:hNs+1] y[pin-hNs:pin+hNs] += sw*yw # overlap-add and apply a synthesis window pin += H # advance sound pointer return y