def stochasticModelSynth(stocEnv, H, N): """ Stochastic synthesis of a sound stocEnv: stochastic envelope; H: hop size; N: fft size returns y: output sound """ if not(UF.isPower2(N)): # raise error if N not a power of two raise ValueError("N is not a power of two") hN = N/2+1 # positive size of fft No2 = N/2 # half of N L = stocEnv[:,0].size # number of frames ysize = H*(L+3) # output sound size y = np.zeros(ysize) # initialize output array ws = 2*hanning(N) # synthesis window pout = 0 # output sound pointer for l in range(L): mY = resample(stocEnv[l,:], hN) # interpolate to original size pY = 2*np.pi*np.random.rand(hN) # generate phase random values Y = np.zeros(N, dtype = complex) # initialize synthesis spectrum Y[:hN] = 10**(mY/20) * np.exp(1j*pY) # generate positive freq. Y[hN:] = 10**(mY[-2:0:-1]/20) * np.exp(-1j*pY[-2:0:-1]) # generate negative freq. fftbuffer = np.real(ifft(Y)) # inverse FFT y[pout:pout+N] += ws*fftbuffer # overlap-add pout += H y = np.delete(y, range(No2)) # delete half of first window y = np.delete(y, range(y.size-No2, y.size)) # delete half of the last window return y
def dftAnal(x, w, N): """ Analysis of a signal using the discrete Fourier transform x: input signal, w: analysis window, N: FFT size returns mX, pX: magnitude and phase spectrum """ if not(UF.isPower2(N)): # raise error if N not a power of two raise ValueError("FFT size (N) is not a power of 2") if (w.size > N): # raise error if window size bigger than fft size raise ValueError("Window size (M) is bigger than FFT size") hN = int((N/2)+1) # size of positive spectrum, it includes sample 0 hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor fftbuffer = np.zeros(N) # initialize buffer for FFT w = w / sum(w) # normalize analysis window xw = x*w # window the input sound fftbuffer[:hM1] = xw[hM2:] # zero-phase window in fftbuffer fftbuffer[-hM2:] = xw[:hM2] X = fft(fftbuffer) # compute FFT absX = abs(X[:hN]) # compute absolute value of positive side absX[absX<np.finfo(float).eps] = np.finfo(float).eps # if zeros add epsilon to handle log mX = 20 * np.log10(absX) # magnitude spectrum of positive frequencies in dB X[:hN].real[np.abs(X[:hN].real) < tol] = 0.0 # for phase calculation set to 0 the small values X[:hN].imag[np.abs(X[:hN].imag) < tol] = 0.0 # for phase calculation set to 0 the small values pX = np.unwrap(np.angle(X[:hN])) # unwrapped phase spectrum of positive frequencies return mX, pX
def dft_anal(x, w, N): """ Analysis of a signal using the discrete Fourier transform x: input signal, w: analysis window, N: FFT size returns mX, pX: magnitude and phase spectrum """ if not utilFunctions.isPower2(N): # raise error if N not a power of two raise ValueError("FFT size (N) is not a power of 2") if w.size > N: # raise error if window size bigger than fft size raise ValueError("Window size (M) is bigger than FFT size") hN = (N / 2) + 1 # size of positive spectrum, it includes sample 0 hM1 = int(math.floor((w.size + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(w.size / 2)) # half analysis window size by floor fftbuffer = np.zeros(N) # initialize buffer for FFT w = w / sum(w) # normalize analysis window xw = x * w # window the input sound fftbuffer[:hM1] = xw[hM2:] # zero-phase window in fftbuffer fftbuffer[-hM2:] = xw[:hM2] X = fft(fftbuffer) # compute FFT absX = abs(X[:hN]) # compute ansolute value of positive side absX[absX < np.finfo(float).eps] = np.finfo(float).eps # if zeros add epsilon to handle log mX = 20 * np.log10(absX) # magnitude spectrum of positive frequencies in dB X[:hN].real[np.abs(X[:hN].real) < tol] = 0.0 # for phase calculation set to 0 the small values X[:hN].imag[np.abs(X[:hN].imag) < tol] = 0.0 # for phase calculation set to 0 the small values pX = np.unwrap(np.angle(X[:hN])) # unwrapped phase spectrum of positive frequencies return mX, pX
def stochasticModelSynth(stocEnv, H, N): """ Stochastic synthesis of a sound stocEnv: stochastic envelope; H: hop size; N: fft size returns y: output sound """ if not (UF.isPower2(N)): # raise error if N not a power of two raise ValueError("N is not a power of two") hN = N // 2 + 1 # positive size of fft No2 = N // 2 # half of N L = stocEnv[:, 0].size # number of frames ysize = H * (L + 3) # output sound size y = np.zeros(ysize) # initialize output array ws = 2 * hanning(N) # synthesis window pout = 0 # output sound pointer for l in range(L): mY = resample(stocEnv[l, :], hN) # interpolate to original size pY = 2 * np.pi * np.random.rand(hN) # generate phase random values Y = np.zeros(N, dtype=complex) # initialize synthesis spectrum Y[:hN] = 10**(mY / 20) * np.exp(1j * pY) # generate positive freq. Y[hN:] = 10**(mY[-2:0:-1] / 20) * np.exp( -1j * pY[-2:0:-1]) # generate negative freq. fftbuffer = np.real(ifft(Y)) # inverse FFT y[pout:pout + N] += ws * fftbuffer # overlap-add pout += H y = np.delete(y, range(No2)) # delete half of first window y = np.delete(y, range(y.size - No2, y.size)) # delete half of the last window return y
def dftSynth(mX, pX, M): """ Synthesis of a signal using the discrete Fourier transform mX: magnitude spectrum, pX: phase spectrum, M: window size returns y: output signal """ hN = mX.size # size of positive spectrum, it includes sample 0 N = (hN - 1) * 2 # FFT size if not (UF.isPower2(N) ): # raise error if N not a power of two, thus mX is wrong raise ValueError("size of mX is not (N/2)+1") hM1 = int(math.floor((M + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(M / 2)) # half analysis window size by floor fftbuffer = np.zeros(N) # initialize buffer for FFT y = np.zeros(M) # initialize output array Y = np.zeros(N, dtype=complex) # clean output spectrum Y[:hN] = 10**(mX / 20) * np.exp(1j * pX) # generate positive frequencies Y[hN:] = 10**(mX[-2:0:-1] / 20) * np.exp( -1j * pX[-2:0:-1]) # generate negative frequencies fftbuffer = np.real(ifft(Y)) # compute inverse FFT y[:hM2] = fftbuffer[-hM2:] # undo zero-phase window y[hM2:] = fftbuffer[:hM1] return y
def stochasticModel(x, H, N, stocf): """ Stochastic analysis/synthesis of a sound, one frame at a time x: input array sound, H: hop size, N: fft size stocf: decimation factor of mag spectrum for stochastic analysis, bigger than 0, maximum of 1 returns y: output sound """ hN = N // 2 + 1 # positive size of fft No2 = N // 2 # half of N if (hN * stocf < 3): # raise exception if decimation factor too small raise ValueError("Stochastic decimation factor too small") if (stocf > 1): # raise exception if decimation factor too big raise ValueError("Stochastic decimation factor above 1") if (H <= 0): # raise error if hop size 0 or negative raise ValueError("Hop size (H) smaller or equal to 0") if not (UF.isPower2(N)): # raise error if N not a power of twou raise ValueError("FFT size (N) is not a power of 2") w = hanning(N) # analysis/synthesis window x = np.append( np.zeros(No2), x) # add zeros at beginning to center first window at sample 0 x = np.append(x, np.zeros(No2)) # add zeros at the end to analyze last sample pin = No2 # initialize sound pointer in middle of analysis window pend = x.size - No2 # last sample to start a frame y = np.zeros(x.size) # initialize output array while pin <= pend: #-----analysis----- xw = x[pin - No2:pin + No2] * w # window the input sound X = fft(xw) # compute FFT mX = 20 * np.log10(abs( X[:hN])) # magnitude spectrum of positive frequencies stocEnv = resample(np.maximum(-200, mX), int(hN * stocf)) # decimate the mag spectrum #-----synthesis----- mY = resample(stocEnv, hN) # interpolate to original size pY = 2 * np.pi * np.random.rand(hN) # generate phase random values Y = np.zeros(N, dtype=complex) Y[:hN] = 10**(mY / 20) * np.exp(1j * pY) # generate positive freq. Y[hN:] = 10**(mY[-2:0:-1] / 20) * np.exp( -1j * pY[-2:0:-1]) # generate negative freq. fftbuffer = np.real(ifft(Y)) # inverse FFT y[pin - No2:pin + No2] += w * fftbuffer # overlap-add pin += H # advance sound pointer y = np.delete(y, range(No2)) # delete half of first window which was added y = np.delete(y, range(y.size - No2, y.size)) # delete half of last window which was added return y
def stochasticModel(x, H, N, stocf): """ Stochastic analysis/synthesis of a sound, one frame at a time x: input array sound, H: hop size, N: fft size stocf: decimation factor of mag spectrum for stochastic analysis, bigger than 0, maximum of 1 returns y: output sound """ hN = N/2+1 # positive size of fft No2 = N/2 # half of N if (hN*stocf < 3): # raise exception if decimation factor too small raise ValueError("Stochastic decimation factor too small") if (stocf > 1): # raise exception if decimation factor too big raise ValueError("Stochastic decimation factor above 1") if (H <= 0): # raise error if hop size 0 or negative raise ValueError("Hop size (H) smaller or equal to 0") if not(UF.isPower2(N)): # raise error if N not a power of twou raise ValueError("FFT size (N) is not a power of 2") w = hanning(N) # analysis/synthesis window x = np.append(np.zeros(No2),x) # add zeros at beginning to center first window at sample 0 x = np.append(x,np.zeros(No2)) # add zeros at the end to analyze last sample pin = No2 # initialize sound pointer in middle of analysis window pend = x.size - No2 # last sample to start a frame y = np.zeros(x.size) # initialize output array while pin<=pend: #-----analysis----- xw = x[pin-No2:pin+No2]*w # window the input sound X = fft(xw) # compute FFT mX = 20 * np.log10(abs(X[:hN])) # magnitude spectrum of positive frequencies stocEnv = resample(np.maximum(-200, mX), hN*stocf) # decimate the mag spectrum #-----synthesis----- mY = resample(stocEnv, hN) # interpolate to original size pY = 2*np.pi*np.random.rand(hN) # generate phase random values Y = np.zeros(N, dtype = complex) Y[:hN] = 10**(mY/20) * np.exp(1j*pY) # generate positive freq. Y[hN:] = 10**(mY[-2:0:-1]/20) * np.exp(-1j*pY[-2:0:-1]) # generate negative freq. fftbuffer = np.real(ifft(Y)) # inverse FFT y[pin-No2:pin+No2] += w*fftbuffer # overlap-add pin += H # advance sound pointer y = np.delete(y, range(No2)) # delete half of first window which was added y = np.delete(y, range(y.size-No2, y.size)) # delete half of last window which was added return y
def stochasticModelAnal(x, H, N, stocf): """ Stochastic analysis of a sound x: input array sound, H: hop size, N: fftsize stocf: decimation factor of mag spectrum for stochastic analysis, bigger than 0, maximum of 1 returns stocEnv: stochastic envelope """ hN = N // 2 + 1 # positive size of fft No2 = N // 2 # half of N if (hN * stocf < 3): # raise exception if decimation factor too small raise ValueError("Stochastic decimation factor too small") if (stocf > 1): # raise exception if decimation factor too big raise ValueError("Stochastic decimation factor above 1") if (H <= 0): # raise error if hop size 0 or negative raise ValueError("Hop size (H) smaller or equal to 0") if not (UF.isPower2(N)): # raise error if N not a power of two raise ValueError("FFT size (N) is not a power of 2") w = hanning(N) # analysis window x = np.append( np.zeros(No2), x) # add zeros at beginning to center first window at sample 0 x = np.append(x, np.zeros(No2)) # add zeros at the end to analyze last sample pin = No2 # initialize sound pointer in middle of analysis window pend = x.size - No2 # last sample to start a frame while pin <= pend: xw = x[pin - No2:pin + No2] * w # window the input sound X = fft(xw) # compute FFT mX = 20 * np.log10(abs( X[:hN])) # magnitude spectrum of positive frequencies mY = resample(np.maximum(-200, mX), int(stocf * hN)) # decimate the mag spectrum if pin == No2: # first frame stocEnv = np.array([mY]) else: # rest of frames stocEnv = np.vstack((stocEnv, np.array([mY]))) pin += H # advance sound pointer return stocEnv
def dftModel(x, w, N): """ Analysis/synthesis of a signal using the discrete Fourier transform x: input signal, w: analysis window, N: FFT size returns y: output signal """ if not (UF.isPower2(N)): # raise error if N not a power of twou raise ValueError("FFT size (N) is not a power of 2") if (w.size > N): # raise error if window size bigger than fft size raise ValueError("Window size (M) is bigger than FFT size") if all(x == 0): # if input array is zeros return empty output return np.zeros(x.size) hN = (N / 2) + 1 # size of positive spectrum, it includes sample 0 hM1 = int(math.floor( (w.size + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(w.size / 2)) # half analysis window size by floor fftbuffer = np.zeros(N) # initialize buffer for FFT y = np.zeros(x.size) # initialize output array #----analysis-------- xw = x * w # window the input sound fftbuffer[:hM1] = xw[hM2:] # zero-phase window in fftbuffer fftbuffer[-hM2:] = xw[:hM2] X = fft(fftbuffer) # compute FFT absX = abs(X[:hN]) # compute ansolute value of positive side absX[absX < np.finfo(float).eps] = np.finfo( float).eps # if zeros add epsilon to handle log mX = 20 * np.log10( absX) # magnitude spectrum of positive frequencies in dB pX = np.unwrap(np.angle( X[:hN])) # unwrapped phase spectrum of positive frequencies #-----synthesis----- Y = np.zeros(N, dtype=complex) # clean output spectrum Y[:hN] = 10**(mX / 20) * np.exp(1j * pX) # generate positive frequencies Y[hN:] = 10**(mX[-2:0:-1] / 20) * np.exp( -1j * pX[-2:0:-1]) # generate negative frequencies fftbuffer = np.real(ifft(Y)) # compute inverse FFT y[:hM2] = fftbuffer[-hM2:] # undo zero-phase window y[hM2:] = fftbuffer[:hM1] return y
def stochastic_model_anal(x, H, N, stocf): """ Stochastic analysis of a sound x: input array sound, H: hop size, N: fftsize stocf: decimation factor of mag spectrum for stochastic analysis, bigger than 0, maximum of 1 returns stocEnv: stochastic envelope """ hN = N / 2 + 1 # positive size of fft No2 = N / 2 # half of N if hN * stocf < 3: # raise exception if decimation factor too small raise ValueError("Stochastic decimation factor too small") if stocf > 1: # raise exception if decimation factor too big raise ValueError("Stochastic decimation factor above 1") if H <= 0: # raise error if hop size 0 or negative raise ValueError("Hop size (H) smaller or equal to 0") if not (UF.isPower2(N)): # raise error if N not a power of two raise ValueError("FFT size (N) is not a power of 2") w = hanning(N) # analysis window x = np.append(np.zeros(No2), x) # add zeros at beginning to center first window at sample 0 x = np.append(x, np.zeros(No2)) # add zeros at the end to analyze last sample pin = No2 # initialize sound pointer in middle of analysis window pend = x.size - No2 # last sample to start a frame stocEnv = None while pin <= pend: xw = x[pin - No2:pin + No2] * w # window the input sound X = fft(xw) # compute FFT mX = 20 * np.log10(abs(X[:hN])) # magnitude spectrum of positive frequencies mY = resample(np.maximum(-200, mX), stocf * hN) # decimate the mag spectrum if pin == No2: # first frame stocEnv = np.array([mY]) else: # rest of frames stocEnv = np.vstack((stocEnv, np.array([mY]))) pin += H # advance sound pointer return stocEnv
def dft_synth(mX, pX, M): """ Synthesis of a signal using the discrete Fourier transform mX: magnitude spectrum, pX: phase spectrum, M: window size returns y: output signal """ hN = mX.size # size of positive spectrum, it includes sample 0 N = (hN - 1) * 2 # FFT size if not utilFunctions.isPower2(N): # raise error if N not a power of two, thus mX is wrong raise ValueError("size of mX is not (N/2)+1") hM1 = int(math.floor((M + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(M / 2)) # half analysis window size by floor y = np.zeros(M) # initialize output array Y = np.zeros(N, dtype=complex) # clean output spectrum Y[:hN] = 10 ** (mX / 20) * np.exp(1j * pX) # generate positive frequencies Y[hN:] = 10 ** (mX[-2:0:-1] / 20) * np.exp(-1j * pX[-2:0:-1]) # generate negative frequencies fftbuffer = np.real(ifft(Y)) # compute inverse FFT y[:hM2] = fftbuffer[-hM2:] # undo zero-phase window y[hM2:] = fftbuffer[:hM1] return y
def dftModel(x, w, N): """ Analysis/synthesis of a signal using the discrete Fourier transform x: input signal, w: analysis window, N: FFT size returns y: output signal """ if not(UF.isPower2(N)): # raise error if N not a power of twou raise ValueError("FFT size (N) is not a power of 2") if (w.size > N): # raise error if window size bigger than fft size raise ValueError("Window size (M) is bigger than FFT size") if all(x==0): # if input array is zeros return empty output return np.zeros(x.size) hN = (N/2)+1 # size of positive spectrum, it includes sample 0 hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor fftbuffer = np.zeros(N) # initialize buffer for FFT y = np.zeros(x.size) # initialize output array #----analysis-------- xw = x*w # window the input sound fftbuffer[:hM1] = xw[hM2:] # zero-phase window in fftbuffer fftbuffer[-hM2:] = xw[:hM2] X = fft(fftbuffer) # compute FFT absX = abs(X[:hN]) # compute ansolute value of positive side absX[absX<np.finfo(float).eps] = np.finfo(float).eps # if zeros add epsilon to handle log mX = 20 * np.log10(absX) # magnitude spectrum of positive frequencies in dB pX = np.unwrap(np.angle(X[:hN])) # unwrapped phase spectrum of positive frequencies #-----synthesis----- Y = np.zeros(N, dtype = complex) # clean output spectrum Y[:hN] = 10**(mX/20) * np.exp(1j*pX) # generate positive frequencies Y[hN:] = 10**(mX[-2:0:-1]/20) * np.exp(-1j*pX[-2:0:-1]) # generate negative frequencies fftbuffer = np.real(ifft(Y)) # compute inverse FFT y[:hM2] = fftbuffer[-hM2:] # undo zero-phase window y[hM2:] = fftbuffer[:hM1] return y
def sineModelMultiRes(x, f, windows, Nsizes, fBands, t): ''' For each audio frame 1. compute three different DFTs with three different window sizes (which are input parameters) 2. compute the sinusoid peaks for each of the DFTs. 3. choose the peaks from these three DFTs depending on the band to which the frequency of the peak belongs x: input array sound, windows: analysis windows [w1, w2, w3] Nsizes: FFT sizes[N1, N2, N3], t: threshold in negative dB fBands: frequency band edges [B1, B2, B3] ''' # raise error if N not a power of two if not all(UF.isPower2(item) for item in Ns): raise ValueError("All FFT size (N) must be a power of 2") # raise error if window size bigger than fft size if (w.size > N): raise ValueError("Window size (M) is bigger than FFT size") # Check if all sizes in Window are ints if not all(isinstance(item, int) for item in windows): raise ValueError("All window sizes must be ints") # Check if all FFT sizes are ints if not all(isinstance(item, int) for item in Ns): raise ValueError("All FFT sizes must be ints") # Check if all freq bands are ints if not all(isinstance(item, int) for item in Ns): raise ValueError("All freq Bands must be ints") # global synthsis params Ns = 512 # FFT size for synthesis (even) H = Ns//4 # Hop size used for analysis and synthesis hNs = Ns//2 # half of synthesis FFT size yw = np.zeros(Ns) # initialize output sound frame y = np.zeros(x.size) # initialize output array sw = np.zeros(Ns) # initialize synthesis window ow = triang(2*H) # triangular window sw[hNs-H:hNs+H] = ow # add triangular window bh = blackmanharris(Ns) # blackmanharris window bh = bh / sum(bh) # normalized blackmanharris window sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] # normalized synthesis window for i in range(3): w = windows[i] N = Nsizes[i] Bmin, Bmax = fBands[i] hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) pin = max(hNs, hM1) # init sound pointer in middle of anal window pend = x.size - max(hNs, hM1) # last sample to start a frame w = w / sum(w) # normalize analysis window while pin<pend: # while input sound pointer is within sound #-----analysis----- x1 = x[pin-hM1:pin+hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # detect locations of peaks iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values by interpolation ipfreq = fs*iploc/float(N) # convert peak locations to Hertz # Filter indexes within the frequency bands Bmin and Bmax ipmag = ipmag[np.logical_and(ipfreq >= Bmin, ipfreq < Bmax)] ipphase = ipphase[np.logical_and(ipfreq >= Bmin, ipfreq < Bmax)] ipfreq = ipfreq[np.logical_and(ipfreq >= Bmin, ipfreq < Bmax)] #-----synthesis----- Y = UF.genSpecSines(ipfreq, ipmag, ipphase, Ns, fs) # generate sines in the spectrum fftbuffer = np.real(ifft(Y)) # compute inverse FFT yw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window yw[hNs-1:] = fftbuffer[:hNs+1] y[pin-hNs:pin+hNs] += sw*yw # overlap-add and apply a synthesis window pin += H