def main(inputFile='../../sounds/ocean.wav', H=256, stocf=.1): # ------- analysis parameters ------------------- # inputFile: input sound file (monophonic with sampling rate of 44100) # H: hop size # stocf: decimation factor used for the stochastic approximation # --------- computation ----------------- # read input sound (fs, x) = UF.wavread(inputFile) # compute stochastic model mYst = STM.stochasticModelAnal(x, H, stocf) # synthesize sound from stochastic model y = STM.stochasticModelSynth(mYst, H) outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_stochasticModel.wav' # write output sound UF.wavwrite(y, fs, outputFile) # --------- plotting -------------------- # create figure to plot plt.figure(figsize=(12, 9)) # plot the input sound plt.subplot(3,1,1) plt.plot(np.arange(x.size)/float(fs), x) plt.axis([0, x.size/float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot stochastic representation plt.subplot(3,1,2) numFrames = int(mYst[:,0].size) frmTime = H*np.arange(numFrames)/float(fs) binFreq = np.arange(stocf*H)*float(fs)/(stocf*2*H) plt.pcolormesh(frmTime, binFreq, np.transpose(mYst)) plt.autoscale(tight=True) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.title('stochastic approximation') # plot the output sound plt.subplot(3,1,3) plt.plot(np.arange(y.size)/float(fs), y) plt.axis([0, y.size/float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.tight_layout() plt.show()
def extractHarmSpec(inputFile='../../sounds/ocean.wav', H=256, N=512, stocf=.1): """ inputFile: input sound file (monophonic with sampling rate of 44100) H: hop size, N: fft size stocf: decimation factor used for the stochastic approximation (bigger than 0, maximum 1) """ # read input sound (fs, x) = UF.wavread(inputFile) # compute stochastic model stocEnv = STM.stochasticModelAnal(x, H, N, stocf) # synthesize sound from stochastic model y = STM.stochasticModelSynth(stocEnv, H, N) outputFile = 'output_sounds/' + os.path.basename( inputFile)[:-4] + '_stochasticModel.wav' # write output sound UF.wavwrite(y, fs, outputFile) # create figure to plot plt.figure(figsize=(12, 9)) # plot the input sound plt.subplot(3, 1, 1) plt.plot(np.arange(x.size) / float(fs), x) plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot stochastic representation plt.subplot(3, 1, 2) numFrames = int(stocEnv[:, 0].size) frmTime = H * np.arange(numFrames) / float(fs) binFreq = np.arange(stocf * N / 2) * float(fs) / (stocf * N) plt.pcolormesh(frmTime, binFreq, np.transpose(stocEnv)) plt.autoscale(tight=True) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.title('stochastic approximation') # plot the output sound plt.subplot(3, 1, 3) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.tight_layout() plt.show()
def hpsModelSynth(hfreq, hmag, hphase, stocEnv, N, H, fs): """ Synthesis of a sound using the harmonic plus stochastic model hfreq, hmag: harmonic frequencies and amplitudes; stocEnv: stochastic envelope Ns: synthesis FFT size; H: hop size, fs: sampling rate returns y: output sound, yh: harmonic component, yst: stochastic component """ yh = SM.sineModelSynth(hfreq, hmag, hphase, N, H, fs) # synthesize harmonics yst = STM.stochasticModelSynth(stocEnv, H, H*2) # synthesize stochastic residual y = yh[:min(yh.size, yst.size)]+yst[:min(yh.size, yst.size)] # sum harmonic and stochastic components return y, yh, yst
def spsModelSynth(tfreq, tmag, tphase, stocEnv, N, H, fs): """ Synthesis of a sound using the sinusoidal plus stochastic model tfreq, tmag, tphase: sinusoidal frequencies, amplitudes and phases; stocEnv: stochastic envelope N: synthesis FFT size; H: hop size, fs: sampling rate returns y: output sound, ys: sinusoidal component, yst: stochastic component """ ys = SM.sineModelSynth(tfreq, tmag, tphase, N, H, fs) # synthesize sinusoids yst = STM.stochasticModelSynth(stocEnv, H, H*2) # synthesize stochastic residual y = ys[:min(ys.size, yst.size)]+yst[:min(ys.size, yst.size)] # sum sinusoids and stochastic components return y, ys, yst
def main(inputFile='../../sounds/ocean.wav', H=256, N=512, stocf=.1): """ inputFile: input sound file (monophonic with sampling rate of 44100) H: hop size, N: fft size stocf: decimation factor used for the stochastic approximation (bigger than 0, maximum 1) """ # read input sound (fs, x) = UF.wavread(inputFile) # compute stochastic model stocEnv = STM.stochasticModelAnal(x, H, N, stocf) # synthesize sound from stochastic model y = STM.stochasticModelSynth(stocEnv, H, N) outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_stochasticModel.wav' # write output sound UF.wavwrite(y, fs, outputFile) # create figure to plot plt.figure(figsize=(12, 9)) # plot the input sound plt.subplot(3,1,1) plt.plot(np.arange(x.size)/float(fs), x) plt.axis([0, x.size/float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot stochastic representation plt.subplot(3,1,2) numFrames = int(stocEnv[:,0].size) frmTime = H*np.arange(numFrames)/float(fs) binFreq = np.arange(stocf*(N/2+1))*float(fs)/(stocf*N) plt.pcolormesh(frmTime, binFreq, np.transpose(stocEnv)) plt.autoscale(tight=True) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.title('stochastic approximation') # plot the output sound plt.subplot(3,1,3) plt.plot(np.arange(y.size)/float(fs), y) plt.axis([0, y.size/float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.tight_layout() plt.show(block=False)
def spsModelSynth(tfreq, tmag, tphase, stocEnv, N, H, fs): """ Synthesis of a sound using the sinusoidal plus stochastic model tfreq, tmag, tphase: sinusoidal frequencies, amplitudes and phases; stocEnv: stochastic envelope N: synthesis FFT size; H: hop size, fs: sampling rate returns y: output sound, ys: sinusoidal component, yst: stochastic component """ ys = SM.sineModelSynth(tfreq, tmag, tphase, N, H, fs) # synthesize sinusoids #yst = STM.stochasticModelSynth(stocEnv, H, H*2) # synthesize stochastic residual yst = STM.stochasticModelSynth(stocEnv, H, N) y = ys[:min(ys.size, yst.size)] + yst[:min( ys.size, yst.size)] # sum sinusoids and stochastic components return y, ys, yst
def main(inputFile='../../sounds/ocean.wav', H=256, N=512, stocf=.1): """ inputFile: input sound file (monophonic with sampling rate of 44100) H: hop size, N: fft size stocf: decimation factor used for the stochastic approximation (bigger than 0, maximum 1) """ # read input sound (fs, x) = UF.wavread(inputFile) # compute stochastic model stocEnv = STM.stochasticModelAnal(x, H, N, stocf) # synthesize sound from stochastic model y = STM.stochasticModelSynth(stocEnv, H, N) outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_stochasticModel.wav' # write output sound UF.wavwrite(y, fs, outputFile) return x, fs, stocEnv, y
def extractHarmSpec(inputFile='../../sounds/rain.wav', stocf=0.1, timeScaling=np.array([0, 0, 1, 2])): """ function to perform a time scaling using the stochastic model inputFile: name of input sound file stocf: decimation factor used for the stochastic approximation timeScaling: time scaling factors, in time-value pairs """ # hop size H = 128 # read input sound (fs, x) = UF.wavread(inputFile) # perform stochastic analysis mYst = STC.stochasticModelAnal(x, H, H * 2, stocf) # perform time scaling of stochastic representation ystocEnv = STCT.stochasticTimeScale(mYst, timeScaling) # synthesize output sound y = STC.stochasticModelSynth(ystocEnv, H, H * 2) # write output sound outputFile = 'output_sounds/' + os.path.basename( inputFile)[:-4] + '_stochasticModelTransformation.wav' UF.wavwrite(y, fs, outputFile) # create figure to plot plt.figure(figsize=(12, 9)) # plot the input sound plt.subplot(4, 1, 1) plt.plot(np.arange(x.size) / float(fs), x) plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot stochastic representation plt.subplot(4, 1, 2) numFrames = int(mYst[:, 0].size) frmTime = H * np.arange(numFrames) / float(fs) binFreq = np.arange(stocf * H) * float(fs) / (stocf * 2 * H) plt.pcolormesh(frmTime, binFreq, np.transpose(mYst)) plt.autoscale(tight=True) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.title('stochastic approximation') # plot modified stochastic representation plt.subplot(4, 1, 3) numFrames = int(ystocEnv[:, 0].size) frmTime = H * np.arange(numFrames) / float(fs) binFreq = np.arange(stocf * H) * float(fs) / (stocf * 2 * H) plt.pcolormesh(frmTime, binFreq, np.transpose(ystocEnv)) plt.autoscale(tight=True) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.title('modified stochastic approximation') # plot the output sound plt.subplot(4, 1, 4) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.tight_layout() plt.show()
def main( inputFile="../../sounds/bendir.wav", window="hamming", M=2001, N=2048, t=-80, minSineDur=0.02, maxnSines=150, freqDevOffset=10, freqDevSlope=0.001, stocf=0.2, ): # ------- analysis parameters ------------------- # inputFile: input sound file (monophonic with sampling rate of 44100) # window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) # M: analysis window size # N: fft size (power of two, bigger or equal than M) # t: magnitude threshold of spectral peaks # minSineDur: minimum duration of sinusoidal tracks # maxnSines: maximum number of parallel sinusoids # freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0 # freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation # stocf: decimation factor used for the stochastic approximation # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # --------- computation ----------------- # read input sound (fs, x) = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # perform sinusoidal analysis tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) # subtract sinusoids from original sound Ns = 512 xr = UF.sineSubtraction(x, Ns, H, tfreq, tmag, tphase, fs) # compute stochastic model of residual mYst = STM.stochasticModelAnal(xr, H, stocf) # synthesize sinusoids ys = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs) # synthesize stochastic component yst = STM.stochasticModelSynth(mYst, H) # sum sinusoids and stochastic y = yst[: min(yst.size, ys.size)] + ys[: min(yst.size, ys.size)] # output sound file (monophonic with sampling rate of 44100) outputFileSines = "output_sounds/" + os.path.basename(inputFile)[:-4] + "_spsModel_sines.wav" outputFileStochastic = "output_sounds/" + os.path.basename(inputFile)[:-4] + "_spsModel_stochastic.wav" outputFile = "output_sounds/" + os.path.basename(inputFile)[:-4] + "_spsModel.wav" # write sounds files for sinusoidal, residual, and the sum UF.wavwrite(ys, fs, outputFileSines) UF.wavwrite(yst, fs, outputFileStochastic) UF.wavwrite(y, fs, outputFile) # --------- plotting -------------------- # plot stochastic component plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 10000.0 # plot the input sound plt.subplot(3, 1, 1) plt.plot(np.arange(x.size) / float(fs), x) plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.ylabel("amplitude") plt.xlabel("time (sec)") plt.title("input sound: x") plt.subplot(3, 1, 2) numFrames = int(mYst[:, 0].size) sizeEnv = int(mYst[0, :].size) frmTime = H * np.arange(numFrames) / float(fs) binFreq = (0.5 * fs) * np.arange(sizeEnv * maxplotfreq / (0.5 * fs)) / sizeEnv plt.pcolormesh(frmTime, binFreq, np.transpose(mYst[:, : sizeEnv * maxplotfreq / (0.5 * fs) + 1])) plt.autoscale(tight=True) # plot sinusoidal frequencies on top of stochastic component sines = tfreq * np.less(tfreq, maxplotfreq) sines[sines == 0] = np.nan numFrames = int(sines[:, 0].size) frmTime = H * np.arange(numFrames) / float(fs) plt.plot(frmTime, sines, color="k", ms=3, alpha=1) plt.xlabel("time(s)") plt.ylabel("Frequency(Hz)") plt.autoscale(tight=True) plt.title("sinusoidal + stochastic spectrogram") # plot the output sound plt.subplot(3, 1, 3) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel("amplitude") plt.xlabel("time (sec)") plt.title("output sound: y") plt.tight_layout() plt.show()
sys.path.append( os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../../software/models/')) import utilFunctions as UF import stochasticModel as STM import stft as STFT (fs, x) = UF.wavread('../../../sounds/ocean.wav') w = np.hamming(512) N = 512 H = 256 stocf = .1 mYst = STM.stochasticModelAnal(x, H, N, stocf) y = STM.stochasticModelSynth(mYst, H, N) mX, pX = STFT.stftAnal(x, w, N, H) plt.figure(1, figsize=(9, 7)) plt.subplot(411) plt.plot(np.arange(x.size) / float(fs), x, 'b') plt.title('x (ocean.wav)') plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.subplot(412) numFrames = int(mX[:, 0].size) frmTime = H * np.arange(numFrames) / float(fs) binFreq = np.arange(mX[0, :].size) * float(fs) / N plt.pcolormesh(frmTime, binFreq, np.transpose(mX)) plt.title('mX; M=512, N=512, H=256') plt.autoscale(tight=True)
sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), "../models/")) import stochasticModel as STC import utilFunctions as UF def stochasticTimeScale(stocEnv, timeScaling): # time scaling of the stochastic representation of a sound # stocEnv: stochastic envelope # timeScaling: scaling factors, in time-value pairs # returns ystocEnv: stochastic envelope L = stocEnv[:, 0].size # number of input frames outL = int(L * timeScaling[-1] / timeScaling[-2]) # number of synthesis frames timeScalingEnv = interp1d(timeScaling[::2] / timeScaling[-2], timeScaling[1::2] / timeScaling[-1]) indexes = (L - 1) * timeScalingEnv(np.arange(outL) / float(outL)) ystocEnv = stocEnv[0, :] # first output frame is same than input for l in indexes[1:]: ystocEnv = np.vstack((ystocEnv, stocEnv[round(l), :])) return ystocEnv if __name__ == "__main__": (fs, x) = UF.wavread(os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../sounds/rain.wav")) H = 256 stocf = 0.2 mYst = STC.stochasticModelAnal(x, H, stocf) timeScaling = np.array([0, 0, 1, 2]) ystocEnv = stochasticTimeScale(mYst, timeScaling) y = STC.stochasticModelSynth(ystocEnv, H) UF.play(y, fs)
def main (inputFile='../../sounds/rain.wav', stocf=0.1, timeScaling = np.array([0, 0, 1, 2])): """ function to perform a time scaling using the stochastic model inputFile: name of input sound file stocf: decimation factor used for the stochastic approximation timeScaling: time scaling factors, in time-value pairs """ # hop size H = 128 # read input sound (fs, x) = UF.wavread(inputFile) # perform stochastic analysis mYst = STC.stochasticModelAnal(x, H, H*2, stocf) # perform time scaling of stochastic representation ystocEnv = STCT.stochasticTimeScale(mYst, timeScaling) # synthesize output sound y = STC.stochasticModelSynth(ystocEnv, H, H*2) # write output sound outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_stochasticModelTransformation.wav' UF.wavwrite(y, fs, outputFile) # create figure to plot plt.figure(figsize=(12, 9)) # plot the input sound plt.subplot(4,1,1) plt.plot(np.arange(x.size)/float(fs), x) plt.axis([0, x.size/float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot stochastic representation plt.subplot(4,1,2) numFrames = int(mYst[:,0].size) frmTime = H*np.arange(numFrames)/float(fs) binFreq = np.arange(stocf*H)*float(fs)/(stocf*2*H) plt.pcolormesh(frmTime, binFreq, np.transpose(mYst)) plt.autoscale(tight=True) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.title('stochastic approximation') # plot modified stochastic representation plt.subplot(4,1,3) numFrames = int(ystocEnv[:,0].size) frmTime = H*np.arange(numFrames)/float(fs) binFreq = np.arange(stocf*H)*float(fs)/(stocf*2*H) plt.pcolormesh(frmTime, binFreq, np.transpose(ystocEnv)) plt.autoscale(tight=True) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.title('modified stochastic approximation') # plot the output sound plt.subplot(4,1,4) plt.plot(np.arange(y.size)/float(fs), y) plt.axis([0, y.size/float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.tight_layout() plt.show()
import time import sys, os sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../../software/models/')) import utilFunctions as UF import stochasticModel as STM import stft as STFT (fs, x) = UF.wavread('../../../sounds/ocean.wav') w = np.hamming(512) N = 512 H = 256 stocf = .1 mYst = STM.stochasticModelAnal(x, H, stocf) y = STM.stochasticModelSynth(mYst, H) mX, pX = STFT.stftAnal(x, fs, w, N, H) plt.figure(1, figsize=(9, 7)) plt.subplot(411) plt.plot(np.arange(x.size)/float(fs), x,'b') plt.title('x (ocean.wav)') plt.axis([0,x.size/float(fs),min(x),max(x)]) plt.subplot(412) numFrames = int(mX[:,0].size) frmTime = H*np.arange(numFrames)/float(fs) binFreq = np.arange(N/2)*float(fs)/N plt.pcolormesh(frmTime, binFreq, np.transpose(mX)) plt.title('mX; M=512, N=512, H=256')
import sys, os sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../software/models/')) import utilFunctions as UF import stochasticModel as STM # Read input sound (fs, x) = UF.wavread('../sounds/ocean.wav') # Compute stochastic model H = 128 stocf = .2 stocEnv = STM.stochasticModelAnal(x, H, H*2, stocf) # Synthesize sound from stochastic model y = STM.stochasticModelSynth(stocEnv, H, N)