def hprModelAnal(x, fs, w, N, H, t, minSineDur, nH, minf0, maxf0, f0et, harmDevSlope): """Analysis of a sound using the harmonic plus residual model x: input sound, fs: sampling rate, w: analysis window; N: FFT size, t: threshold in negative dB, minSineDur: minimum duration of sinusoidal tracks nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation returns hfreq, hmag, hphase: harmonic frequencies, magnitude and phases; xr: residual signal """ # perform harmonic analysis hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) Ns = 512 xr = UF.sineSubtraction(x, Ns, H, hfreq, hmag, hphase, fs) # subtract sinusoids from original sound return hfreq, hmag, hphase, xr
def sprModelAnal(x, fs, w, N, H, t, minSineDur, maxnSines, freqDevOffset, freqDevSlope): """ Analysis of a sound using the sinusoidal plus residual model x: input sound, fs: sampling rate, w: analysis window; N: FFT size, t: threshold in negative dB, minSineDur: minimum duration of sinusoidal tracks maxnSines: maximum number of parallel sinusoids freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0 freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation returns hfreq, hmag, hphase: harmonic frequencies, magnitude and phases; xr: residual signal """ # perform sinusoidal analysis tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) Ns = 512 xr = UF.sineSubtraction(x, Ns, H, tfreq, tmag, tphase, fs) # subtract sinusoids from original sound return tfreq, tmag, tphase, xr
def hpsModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur, Ns, stocf): """ Analysis of a sound using the harmonic plus stochastic model x: input sound, fs: sampling rate, w: analysis window; N: FFT size, t: threshold in negative dB, nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz, maxf0: maximim f0 frequency in Hz; f0et: error threshold in the f0 detection (ex: 5), harmDevSlope: slope of harmonic deviation; minSineDur: minimum length of harmonics returns hfreq, hmag, hphase: harmonic frequencies, magnitude and phases; stocEnv: stochastic residual """ # perform harmonic analysis hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) # subtract sinusoids from original sound xr = UF.sineSubtraction(x, Ns, H, hfreq, hmag, hphase, fs) # perform stochastic analysis of residual stocEnv = STM.stochasticModelAnal(xr, H, H*2, stocf) return hfreq, hmag, hphase, stocEnv
def spsModelAnal(x, fs, w, N, H, t, minSineDur, maxnSines, freqDevOffset, freqDevSlope, stocf): """ Analysis of a sound using the sinusoidal plus stochastic model x: input sound, fs: sampling rate, w: analysis window; N: FFT size, t: threshold in negative dB, minSineDur: minimum duration of sinusoidal tracks maxnSines: maximum number of parallel sinusoids freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0 freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation stocf: decimation factor used for the stochastic approximation returns hfreq, hmag, hphase: harmonic frequencies, magnitude and phases; stocEnv: stochastic residual """ # perform sinusoidal analysis tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) Ns = 512 xr = UF.sineSubtraction(x, Ns, H, tfreq, tmag, tphase, fs) # subtract sinusoids from original sound stocEnv = STM.stochasticModelAnal(xr, H, H*2, stocf) # compute stochastic model of residual return tfreq, tmag, tphase, stocEnv
def spsModelAnal(x, fs, w, N, H, t, minSineDur, maxnSines, freqDevOffset, freqDevSlope, stocf): """ Analysis of a sound using the sinusoidal plus stochastic model x: input sound, fs: sampling rate, w: analysis window; N: FFT size, t: threshold in negative dB, minSineDur: minimum duration of sinusoidal tracks maxnSines: maximum number of parallel sinusoids freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0 freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation stocf: decimation factor used for the stochastic approximation returns hfreq, hmag, hphase: harmonic frequencies, magnitude and phases; stocEnv: stochastic residual """ # perform sinusoidal analysis tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) Ns = N #512 xr = UF.sineSubtraction(x, Ns, H, tfreq, tmag, tphase, fs) # subtract sinusoids from original sound #stocEnv = STM.stochasticModelAnal(xr, H, H*2, stocf) # compute stochastic model of residual stocEnv = STM.stochasticModelAnal(xr, H, N, stocf) return tfreq, tmag, tphase, stocEnv
import numpy as np import matplotlib.pyplot as plt from scipy.signal import hamming, hanning, triang, blackmanharris, resample import math import sys, os, time sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../software/models/')) import stft as STFT import utilFunctions as UF import harmonicModel as HM (fs, x) = UF.wavread(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../sounds/cello-double.wav')) w = np.blackman(3501) N = 2048*2 t = -100 nH = 100 minf0 = 140 maxf0 = 150 f0et = 10 minSineDur = .2 harmDevSlope = 0.001 Ns = 512 H = Ns/4 hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) y = HM.harmonicModelSynth(hfreq, hmag, hphase, Ns, H, fs) xr = UF.sineSubtraction(x, Ns, H, hfreq, hmag, hphase, fs)
return y, ys, xr # test the subtraction of sines if __name__ == '__main__': (fs, x) = UF.wavread(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../sounds/bendir.wav')) w = np.hamming(2001) N = 2048 H = 128 t = -100 minSineDur = .02 maxnSines = 200 freqDevOffset = 10 freqDevSlope = 0.001 tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) xr = UF.sineSubtraction(x, N, H, tfreq, tmag, tphase, fs) mXr, pXr = STFT.stftAnal(xr, fs, hamming(H*2), H*2, H) Ns = 512 ys = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs) plt.figure(1, figsize=(9.5, 7)) numFrames = int(mXr[:,0].size) frmTime = H*np.arange(numFrames)/float(fs) binFreq = np.arange(H)*float(fs)/(H*2) plt.pcolormesh(frmTime, binFreq, np.transpose(mXr)) plt.autoscale(tight=True) tfreq[tfreq==0] = np.nan numFrames = int(tfreq[:,0].size) frmTime = H*np.arange(numFrames)/float(fs) plt.plot(frmTime, tfreq, color='k', ms=3, alpha=1)
def main(inputFile='../../sounds/bendir.wav', window='hamming', M=2001, N=2048, t=-80, minSineDur=0.02, maxnSines=150, freqDevOffset=10, freqDevSlope=0.001): # ------- analysis parameters ------------------- # inputFile: input sound file (monophonic with sampling rate of 44100) # window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) # M: analysis window size # N: fft size (power of two, bigger or equal than M) # t: magnitude threshold of spectral peaks # minSineDur: minimum duration of sinusoidal tracks # maxnSines: maximum number of parallel sinusoids # freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0 # freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # --------- computation ----------------- # read input sound (fs, x) = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # perform sinusoidal analysis tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) # subtract sinusoids from original xr = UF.sineSubtraction(x, N, H, tfreq, tmag, tphase, fs) # compute spectrogram of residual mXr, pXr = STFT.stftAnal(xr, fs, w, N, H) # synthesize sinusoids ys = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs) # sum sinusoids and residual y = xr[:min(xr.size, ys.size)]+ys[:min(xr.size, ys.size)] # output sound file (monophonic with sampling rate of 44100) outputFileSines = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sprModel_sines.wav' outputFileResidual = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sprModel_residual.wav' outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sprModel.wav' # write sounds files for sinusoidal, residual, and the sum UF.wavwrite(ys, fs, outputFileSines) UF.wavwrite(xr, fs, outputFileResidual) UF.wavwrite(y, fs, outputFile) # --------- plotting -------------------- # create figure to show plots plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 5000.0 # plot the input sound plt.subplot(3,1,1) plt.plot(np.arange(x.size)/float(fs), x) plt.axis([0, x.size/float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot the magnitude spectrogram of residual plt.subplot(3,1,2) maxplotbin = int(N*maxplotfreq/fs) numFrames = int(mXr[:,0].size) frmTime = H*np.arange(numFrames)/float(fs) binFreq = np.arange(maxplotbin+1)*float(fs)/N plt.pcolormesh(frmTime, binFreq, np.transpose(mXr[:,:maxplotbin+1])) plt.autoscale(tight=True) # plot the sinusoidal frequencies on top of the residual spectrogram tracks = tfreq*np.less(tfreq, maxplotfreq) tracks[tracks<=0] = np.nan plt.plot(frmTime, tracks, color='k') plt.title('sinusoidal tracks + residual spectrogram') plt.autoscale(tight=True) # plot the output sound plt.subplot(3,1,3) plt.plot(np.arange(y.size)/float(fs), y) plt.axis([0, y.size/float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() plt.show()
def main( inputFile="../../sounds/sax-phrase.wav", window="blackman", M=601, N=1024, t=-100, minSineDur=0.1, nH=100, minf0=350, maxf0=700, f0et=5, harmDevSlope=0.01, ): # ------- analysis parameters ------------------- # inputFile: input sound file (monophonic with sampling rate of 44100) # window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) # M: analysis window size # N: fft size (power of two, bigger or equal than M) # t: magnitude threshold of spectral peaks # minSineDur: minimum duration of sinusoidal tracks # nH: maximum number of harmonics # minf0: minimum fundamental frequency in sound # maxf0: maximum fundamental frequency in sound # f0et: maximum error accepted in f0 detection algorithm # harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # --------- computation ----------------- # read input sound (fs, x) = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # find harmonics hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) # subtract harmonics from original sound xr = UF.sineSubtraction(x, Ns, H, hfreq, hmag, hphase, fs) # compute spectrogram of residual mXr, pXr = STFT.stftAnal(xr, fs, w, N, H) # synthesize harmonic component yh = SM.sineModelSynth(hfreq, hmag, hphase, Ns, H, fs) # sum harmonics and residual y = xr[: min(xr.size, yh.size)] + yh[: min(xr.size, yh.size)] # output sound file (monophonic with sampling rate of 44100) outputFileSines = "output_sounds/" + os.path.basename(inputFile)[:-4] + "_hprModel_sines.wav" outputFileResidual = "output_sounds/" + os.path.basename(inputFile)[:-4] + "_hprModel_residual.wav" outputFile = "output_sounds/" + os.path.basename(inputFile)[:-4] + "_hprModel.wav" # write sounds files for harmonics, residual, and the sum UF.wavwrite(yh, fs, outputFileSines) UF.wavwrite(xr, fs, outputFileResidual) UF.wavwrite(y, fs, outputFile) # --------- plotting -------------------- # create figure to plot plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 5000.0 # plot the input sound plt.subplot(3, 1, 1) plt.plot(np.arange(x.size) / float(fs), x) plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.ylabel("amplitude") plt.xlabel("time (sec)") plt.title("input sound: x") # plot the magnitude spectrogram of residual plt.subplot(3, 1, 2) maxplotbin = int(N * maxplotfreq / fs) numFrames = int(mXr[:, 0].size) frmTime = H * np.arange(numFrames) / float(fs) binFreq = np.arange(maxplotbin + 1) * float(fs) / N plt.pcolormesh(frmTime, binFreq, np.transpose(mXr[:, : maxplotbin + 1])) plt.autoscale(tight=True) # plot harmonic frequencies on residual spectrogram harms = hfreq * np.less(hfreq, maxplotfreq) harms[harms == 0] = np.nan numFrames = int(harms[:, 0].size) frmTime = H * np.arange(numFrames) / float(fs) plt.plot(frmTime, harms, color="k", ms=3, alpha=1) plt.xlabel("time(s)") plt.ylabel("frequency(Hz)") plt.autoscale(tight=True) plt.title("harmonics + residual spectrogram") # plot the output sound plt.subplot(3, 1, 3) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel("amplitude") plt.xlabel("time (sec)") plt.title("output sound: y") plt.tight_layout() plt.show()
def main( inputFile="../../sounds/bendir.wav", window="hamming", M=2001, N=2048, t=-80, minSineDur=0.02, maxnSines=150, freqDevOffset=10, freqDevSlope=0.001, stocf=0.2, ): # ------- analysis parameters ------------------- # inputFile: input sound file (monophonic with sampling rate of 44100) # window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) # M: analysis window size # N: fft size (power of two, bigger or equal than M) # t: magnitude threshold of spectral peaks # minSineDur: minimum duration of sinusoidal tracks # maxnSines: maximum number of parallel sinusoids # freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0 # freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation # stocf: decimation factor used for the stochastic approximation # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # --------- computation ----------------- # read input sound (fs, x) = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # perform sinusoidal analysis tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) # subtract sinusoids from original sound Ns = 512 xr = UF.sineSubtraction(x, Ns, H, tfreq, tmag, tphase, fs) # compute stochastic model of residual mYst = STM.stochasticModelAnal(xr, H, stocf) # synthesize sinusoids ys = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs) # synthesize stochastic component yst = STM.stochasticModelSynth(mYst, H) # sum sinusoids and stochastic y = yst[: min(yst.size, ys.size)] + ys[: min(yst.size, ys.size)] # output sound file (monophonic with sampling rate of 44100) outputFileSines = "output_sounds/" + os.path.basename(inputFile)[:-4] + "_spsModel_sines.wav" outputFileStochastic = "output_sounds/" + os.path.basename(inputFile)[:-4] + "_spsModel_stochastic.wav" outputFile = "output_sounds/" + os.path.basename(inputFile)[:-4] + "_spsModel.wav" # write sounds files for sinusoidal, residual, and the sum UF.wavwrite(ys, fs, outputFileSines) UF.wavwrite(yst, fs, outputFileStochastic) UF.wavwrite(y, fs, outputFile) # --------- plotting -------------------- # plot stochastic component plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 10000.0 # plot the input sound plt.subplot(3, 1, 1) plt.plot(np.arange(x.size) / float(fs), x) plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.ylabel("amplitude") plt.xlabel("time (sec)") plt.title("input sound: x") plt.subplot(3, 1, 2) numFrames = int(mYst[:, 0].size) sizeEnv = int(mYst[0, :].size) frmTime = H * np.arange(numFrames) / float(fs) binFreq = (0.5 * fs) * np.arange(sizeEnv * maxplotfreq / (0.5 * fs)) / sizeEnv plt.pcolormesh(frmTime, binFreq, np.transpose(mYst[:, : sizeEnv * maxplotfreq / (0.5 * fs) + 1])) plt.autoscale(tight=True) # plot sinusoidal frequencies on top of stochastic component sines = tfreq * np.less(tfreq, maxplotfreq) sines[sines == 0] = np.nan numFrames = int(sines[:, 0].size) frmTime = H * np.arange(numFrames) / float(fs) plt.plot(frmTime, sines, color="k", ms=3, alpha=1) plt.xlabel("time(s)") plt.ylabel("Frequency(Hz)") plt.autoscale(tight=True) plt.title("sinusoidal + stochastic spectrogram") # plot the output sound plt.subplot(3, 1, 3) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel("amplitude") plt.xlabel("time (sec)") plt.title("output sound: y") plt.tight_layout() plt.show()
N = 1024 t = -100 nH = 40 minf0 = 420 maxf0 = 460 f0et = 5 maxnpeaksTwm = 5 minSineDur = .1 harmDevSlope = 0.01 Ns = 512 H = Ns / 4 mX, pX = STFT.stftAnal(x, fs, w, N, H) hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) xr = UF.sineSubtraction(x, Ns, H, hfreq, hmag, hphase, fs) mXr, pXr = STFT.stftAnal(xr, fs, hamming(Ns), Ns, H) maxplotfreq = 5000.0 plt.figure(1, figsize=(9, 7)) plt.subplot(221) numFrames = int(mX[:, 0].size) frmTime = H * np.arange(numFrames) / float(fs) binFreq = fs * np.arange(N * maxplotfreq / fs) / N plt.pcolormesh(frmTime, binFreq, np.transpose(mX[:, :N * maxplotfreq / fs + 1])) plt.autoscale(tight=True) harms = hfreq * np.less(hfreq, maxplotfreq) harms[harms == 0] = np.nan