def sineODF(file='../../../../../audioDSP_course/assignments/sms-tools/sounds/piano.wav'): fs, x = UF.wavread(file) # set params: M = 1024 # window size H = int(M/3) # hop size t = -80.0 #treshold (dB??) window = 'blackman' # window type fftSize = int(pow(2, np.ceil(np.log2(M)))) # size of FFT N = fftSize maxnSines = 10 # maximum simultaneous sines minSineDur = 0.1 # minimal duration of sines freqDevOffset = 30 # min(??) frequency deviation at 0Hz freqDevSlope = 0.001 # slope increase of min freq dev. w = get_window(window, M) # get analysis window tStamps = genTimeStamps(len(x), M, fs, H) # generate timestamp return? fTrackEst, mTrackEst, pTreckEst = SM.sineModelAnal(x, fs, w, fftSize, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) fTrackTrue = genTrueFreqTracks(tStamps) # get true freq. tracks # plotting: mX, pX = stft.stftAnal(x, fs, w, fftSize, H) maxplotfreq = 1500.0 binFreq = fs*np.arange(N*maxplotfreq/fs)/N plt.pcolormesh(tStamps, binFreq, np.transpose(mX[:,:N*maxplotfreq/fs+1]),cmap = 'hot_r') # plt.plot(fTrackTrue, 'o-', color = 'c', linewidth=3.0) plt.plot(tStamps, fTrackEst, color = 'y', linewidth=2.0) # plt.legend(('True f1', 'True f2', 'Estimated f1', 'Estimated f2')) plt.xlabel('Time (s)') plt.ylabel('Frequency (Hz)') plt.autoscale(tight=True) return fTrackEst
def timeStretchAudio(inputAudio, outputAudio, outputDuration, writeOutput=1): originalWav = Sndfile(inputAudio, 'r') x = originalWav.read_frames(originalWav.nframes) fs = originalWav.samplerate nChannel = originalWav.channels print fs if nChannel >1: x = x[0] w = np.hamming(801) N = 2048 t = -90 minSineDur = .005 maxnSines = 150 freqDevOffset = 20 freqDevSlope = 0.02 Ns = 512 H = Ns/4 tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) inputDur = float(len(tfreq)*H/fs) #timeScale = np.array([0.1,0.1, inputDur, inputDur*2]) timeScale = np.array([0,0, .4,outputDuration]) ytfreq, ytmag = trans.sineTimeScaling(tfreq, tmag, timeScale) y = SM.sineModelSynth(ytfreq, ytmag, np.array([]), Ns, H, fs) if writeOutput ==1: outputWav = Sndfile(outputAudio, 'w', originalWav.format, originalWav.channels, originalWav.samplerate) outputWav.write_frames(y) outputWav.close() else: return y, fs, nChannel
def chirpTracker(inputFile='../../sounds/chirp-150-190-linear.wav'): """ Input: inputFile (string) = wav file including the path Output: M (int) = Window length H (int) = hop size in samples tStamps (numpy array) = A Kx1 numpy array of time stamps at which the frequency components were estimated fTrackEst (numpy array) = A Kx2 numpy array of estimated frequency values, one row per time frame, one column per component fTrackTrue (numpy array) = A Kx2 numpy array of true frequency values, one row per time frame, one column per component K is the number of frames """ # Analysis parameters: Modify values of the parameters marked XX M = 3300 # Window size in samples ### Go through the code below and understand it, do not modify anything ### H = 128 # Hop size in samples N = int(pow(2, np.ceil(np.log2(M)))) # FFT Size, power of 2 larger than M t = -80.0 # threshold window = 'blackman' # Window type maxnSines = 2 # Maximum number of sinusoids at any time frame minSineDur = 0.0 # minimum duration set to zero to not do tracking freqDevOffset = 30 # minimum frequency deviation at 0Hz freqDevSlope = 0.001 # slope increase of minimum frequency deviation fs, x = UF.wavread(inputFile) # read input sound w = get_window(window, M) # Compute analysis window tStamps = genTimeStamps(x.size, M, fs, H) # Generate the tStamps to return # analyze the sound with the sinusoidal model fTrackEst, mTrackEst, pTrackEst = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) fTrackTrue = genTrueFreqTracks( tStamps) # Generate the true frequency tracks tailF = 20 # Compute mean estimation error. 20 frames at the beginning and end not used to compute error meanErr = np.mean(np.abs(fTrackTrue[tailF:-tailF, :] - fTrackEst[tailF:-tailF, :]), axis=0) print "Mean estimation error = " + str( meanErr) + ' Hz' # Print the error to terminal # Plot the estimated and true frequency tracks mX, pX = stft.stftAnal(x, w, N, H) maxplotfreq = 1500.0 binFreq = fs * np.arange(N * maxplotfreq / fs) / N plt.pcolormesh(tStamps, binFreq, np.transpose(mX[:, :N * maxplotfreq / fs + 1]), cmap='hot_r') plt.plot(tStamps, fTrackTrue, 'o-', color='c', linewidth=3.0) plt.plot(tStamps, fTrackEst, color='y', linewidth=2.0) plt.legend(('True f1', 'True f2', 'Estimated f1', 'Estimated f2')) plt.xlabel('Time (s)') plt.ylabel('Frequency (Hz)') plt.autoscale(tight=True) plt.show() return M, H, tStamps, fTrackEst, fTrackTrue # Output returned
def mainlobeTracker(inputFile='../../sounds/sines-440-602-hRange.wav'): """ Input: inputFile (string): wav file including the path Output: window (string): The window type used for analysis t (float) = peak picking threshold (negative dB) tStamps (numpy array) = A Kx1 numpy array of time stamps at which the frequency components were estimated fTrackEst = A Kx2 numpy array of estimated frequency values, one row per time frame, one column per component fTrackTrue = A Kx2 numpy array of true frequency values, one row per time frame, one column per component """ # Analysis parameters: Modify values of the parameters marked XX window = 'blackman' # Window type t = -80 # threshold (negative dB) ### Go through the code below and understand it, do not modify anything ### M = 2047 # Window size N = 4096 # FFT Size H = 128 # Hop size in samples maxnSines = 2 minSineDur = 0.02 freqDevOffset = 10 freqDevSlope = 0.001 # read input sound fs, x = UF.wavread(inputFile) w = get_window(window, M) # Compute analysis window tStamps = genTimeStamps(x.size, M, fs, H) # Generate the tStamps to return # analyze the sound with the sinusoidal model fTrackEst, mTrackEst, pTrackEst = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) fTrackTrue = genTrueFreqTracks( tStamps) # Generate the true frequency tracks tailF = 20 # Compute mean estimation error. 20 frames at the beginning and end not used to compute error meanErr = np.mean(np.abs(fTrackTrue[tailF:-tailF, :] - fTrackEst[tailF:-tailF, :]), axis=0) print "Mean estimation error = " + str( meanErr) + ' Hz' # Print the error to terminal # Plot the estimated and true frequency tracks mX, pX = stft.stftAnal(x, w, N, H) maxplotfreq = 900.0 binFreq = fs * np.arange(N * maxplotfreq / fs) / N plt.pcolormesh(tStamps, binFreq, np.transpose(mX[:, :N * maxplotfreq / fs + 1]), cmap='hot_r') plt.plot(tStamps, fTrackTrue, 'o-', color='c', linewidth=3.0) plt.plot(tStamps, fTrackEst, color='y', linewidth=2.0) plt.legend(('True f1', 'True f2', 'Estimated f1', 'Estimated f2')) plt.xlabel('Time (s)') plt.ylabel('Frequency (Hz)') plt.autoscale(tight=True) plt.show() return window, float(t), tStamps, fTrackEst, fTrackTrue # Output returned
def mainlobeTracker(inputFile = '../sms-tools/sounds/sines-440-602-hRange.wav'): """ Input: inputFile (string): wav file including the path Output: window (string): The window type used for analysis t (float) = peak picking threshold (negative dB) tStamps (numpy array) = A Kx1 numpy array of time stamps at which the frequency components were estimated fTrackEst = A Kx2 numpy array of estimated frequency values, one row per time frame, one column per component fTrackTrue = A Kx2 numpy array of true frequency values, one row per time frame, one column per component """ # Analysis parameters: Modify values of the parameters marked XX window = 'blackman' # Window type t = -67 # threshold (negative dB) # window = blackman && t >= -67: Mean estimation error = [ 0.01060268 1.58192485] Hz # window = blackman harris && t >= -61: Mean estimation error = [ 0.01060268 1.58192485] Hz # ohers failed ### Go through the code below and understand it, do not modify anything ### M = 2047 # Window size N = 4096 # FFT Size H = 128 # Hop size in samples maxnSines = 2 minSineDur = 0.02 freqDevOffset = 10 freqDevSlope = 0.001 # read input sound fs, x = UF.wavread(inputFile) w = get_window(window, M) # Compute analysis window tStamps = genTimeStamps(x.size, M, fs, H) # Generate the tStamps to return # analyze the sound with the sinusoidal model fTrackEst, mTrackEst, pTrackEst = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) fTrackTrue = genTrueFreqTracks(tStamps) # Generate the true frequency tracks tailF = 20 # Compute mean estimation error. 20 frames at the beginning and end not used to compute error meanErr = np.mean(np.abs(fTrackTrue[tailF:-tailF,:] - fTrackEst[tailF:-tailF,:]),axis=0) print("Mean estimation error = " + str(meanErr) + ' Hz') # Print the error to terminal # Plot the estimated and true frequency tracks mX, pX = stft.stftAnal(x, w, N, H) maxplotfreq = 900.0 binFreq = fs * np.arange(N * maxplotfreq / fs) / N plt.pcolormesh(tStamps, binFreq, np.transpose(mX[:,:np.int(N * maxplotfreq / fs + 1)]), cmap='hot_r') plt.plot(tStamps,fTrackTrue, 'o-', color = 'c', linewidth=3.0) plt.plot(tStamps,fTrackEst, color = 'y', linewidth=2.0) plt.legend(('True f1', 'True f2', 'Estimated f1', 'Estimated f2')) plt.title('frequency detection: Window = ' + window + '& t = ' + str(t)) plt.xlabel('Time (s)') plt.ylabel('Frequency (Hz)') plt.autoscale(tight=True) return window, float(t), tStamps, fTrackEst, fTrackTrue # Output returned
def chirpTracker(inputFile='../sms-tools/sounds/chirp-150-190-linear.wav'): """ Input: inputFile (string) = wav file including the path Output: M (int) = Window length H (int) = hop size in samples tStamps (numpy array) = A Kx1 numpy array of time stamps at which the frequency components were estimated fTrackEst (numpy array) = A Kx2 numpy array of estimated frequency values, one row per time frame, one column per component fTrackTrue (numpy array) = A Kx2 numpy array of true frequency values, one row per time frame, one column per component K is the number of frames """ # Analysis parameters: Modify values of the parameters marked XX M = 3298 # Window size in samples ### Go through the code below and understand it, do not modify anything ### H = 128 # Hop size in samples N = int(pow(2, np.ceil(np.log2(M)))) # FFT Size, power of 2 larger than M t = -80.0 # threshold window = 'blackman' # Window type maxnSines = 2 # Maximum number of sinusoids at any time frame minSineDur = 0.0 # minimum duration set to zero to not do tracking freqDevOffset = 30 # minimum frequency deviation at 0Hz freqDevSlope = 0.001 # slope increase of minimum frequency deviation fs, x = UF.wavread(inputFile) # read input sound w = get_window(window, M) # Compute analysis window tStamps = genTimeStamps(x.size, M, fs, H) # Generate the tStamps to return # analyze the sound with the sinusoidal model fTrackEst, mTrackEst, pTrackEst = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) fTrackTrue = genTrueFreqTracks(tStamps) # Generate the true frequency tracks tailF = 20 # Compute mean estimation error. 20 frames at the beginning and end not used to compute error meanErr = np.mean(np.abs(fTrackTrue[tailF:-tailF,:] - fTrackEst[tailF:-tailF,:]),axis=0) print("Mean estimation error = " + str(meanErr) + ' Hz') # Print the error to terminal # Plot the estimated and true frequency tracks mX, pX = stft.stftAnal(x, w, N, H) # stft from anal maxplotfreq = 1500.0 binFreq = fs*np.arange(N*maxplotfreq/fs)/N plt.pcolormesh(tStamps, binFreq, np.transpose(mX[:,:int(N * maxplotfreq / fs + 1)]),cmap = 'hot_r') plt.plot(tStamps,fTrackTrue, 'o-', color = 'c', linewidth=3.0) plt.plot(tStamps,fTrackEst, color = 'y', linewidth=2.0) plt.legend(('True f1', 'True f2', 'Estimated f1', 'Estimated f2')) plt.title('True and estimated frequency, windowsize = ' + str(M)) plt.xlabel('Time (s)') plt.ylabel('Frequency (Hz)') plt.autoscale(tight=True) plt.show() return M, H, tStamps, fTrackEst, fTrackTrue # Output returned
def find_chirp_end_ms_sinusoidal_model(input_path): window = 'blackmanharris' M = 401 N = 2048 t = -90 minSineDur = 0.5 maxnSines = 30 freqDevOffset = 3 freqDevSlope = 0.000000 H = 50 minFreq = 100 maxFreq = 950 (fs, x) = UF.wavread(input_path) w = get_window(window, M) tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope, minFreq, maxFreq) numFrames = int(tfreq[:,0].size) frmTime = H*np.arange(numFrames)/float(fs) tfreq[tfreq<=0] = 0 # tfreq[tfreq<=0] = np.nan # xmX, xpX = stft.stftAnal(x, w, N, H) # # plt.figure(1, figsize=(9.5, 6)) # plt.plot(frmTime, tfreq) # binFreq = np.arange(N/2+1)*float(fs)/N # plt.pcolormesh(frmTime, binFreq, np.transpose(xmX)) # plt.title('mX (piano.wav), M=1001, N=1024, H=256') # plt.autoscale(tight=True) # plt.show() max = 0 max_i = 0 for i in range(0, tfreq.shape[0]): sine_max_i = np.argmax(tfreq[i]) sine_max = tfreq[i][sine_max_i] if sine_max > max: max = sine_max max_i = i end_of_chirp_ms = frmTime[max_i] * 1000 # calculate the amount of missing frequency at the # end of the chirp to add that time back in missingFreq = 1000 - maxFreq time_fix_ms = 14 / 1000 * missingFreq * 1000 return end_of_chirp_ms + time_fix_ms
def mainlobeTracker(inputFile="../../sounds/sines-440-602-hRange.wav"): """ Input: inputFile (string): wav file including the path Output: window (string): The window type used for analysis t (float) = peak picking threshold (negative dB) tStamps (numpy array) = A Kx1 numpy array of time stamps at which the frequency components were estimated fTrackEst = A Kx2 numpy array of estimated frequency values, one row per time frame, one column per component fTrackTrue = A Kx2 numpy array of true frequency values, one row per time frame, one column per component """ # Analysis parameters: Modify values of the parameters marked XX window = "blackmanharris" # Window type t = -93.0 # threshold (negative dB) ### Go through the code below and understand it, do not modify anything ### M = 2047 # Window size N = 4096 # FFT Size H = 128 # Hop size in samples maxnSines = 2 minSineDur = 0.02 freqDevOffset = 10 freqDevSlope = 0.001 # read input sound fs, x = UF.wavread(inputFile) w = get_window(window, M) # Compute analysis window tStamps = genTimeStamps(x.size, M, fs, H) # Generate the tStamps to return # analyze the sound with the sinusoidal model fTrackEst, mTrackEst, pTrackEst = SM.sineModelAnal( x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope ) fTrackTrue = genTrueFreqTracks(tStamps) # Generate the true frequency tracks tailF = 10 # Compute mean estimation error. 50 frames at the beginning and end not used to compute error meanErr = np.mean(np.abs(fTrackTrue[tailF:-tailF, :] - fTrackEst[tailF:-tailF, :]), axis=0) print "Mean estimation error = " + str(meanErr) + " Hz" # Print the error to terminal # Plot the estimated and true frequency tracks mX, pX = stft.stftAnal(x, fs, w, N, H) maxplotfreq = 900.0 binFreq = fs * np.arange(N * maxplotfreq / fs) / N plt.pcolormesh(tStamps, binFreq, np.transpose(mX[:, : N * maxplotfreq / fs + 1]), cmap="hot_r") plt.plot(tStamps, fTrackTrue, "o-", color="c", linewidth=3.0) plt.plot(tStamps, fTrackEst, color="y", linewidth=2.0) plt.legend(("True f1", "True f2", "Estimated f1", "Estimated f2")) plt.xlabel("Time (s)") plt.ylabel("Frequency (Hz)") plt.autoscale(tight=True) return window, t, tStamps, fTrackEst, fTrackTrue # Output returned
def sprModelAnal(x, fs, w, N, H, t, minSineDur, maxnSines, freqDevOffset, freqDevSlope): """ Analysis of a sound using the sinusoidal plus residual model x: input sound, fs: sampling rate, w: analysis window; N: FFT size, t: threshold in negative dB, minSineDur: minimum duration of sinusoidal tracks maxnSines: maximum number of parallel sinusoids freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0 freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation returns hfreq, hmag, hphase: harmonic frequencies, magnitude and phases; xr: residual signal """ # perform sinusoidal analysis tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) Ns = 512 xr = UF.sineSubtraction(x, Ns, H, tfreq, tmag, tphase, fs) # subtract sinusoids from original sound return tfreq, tmag, tphase, xr
def main(inputFile='../../sounds/bendir.wav', window='hamming', M=2001, N=2048, t=-80, minSineDur=0.02, maxnSines=150, freqDevOffset=10, freqDevSlope=0.001): """ Perform analysis/synthesis using the sinusoidal model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size; N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks maxnSines: maximum number of parallel sinusoids freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0 freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound fs, x = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # analyze the sound with the sinusoidal model tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) # synthesize the output sound from the sinusoidal representation y = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs) # output sound file name outputFile = 'output_sounds/' + os.path.basename( inputFile)[:-4] + '_sineModel.wav' # write the synthesized sound obtained from the sinusoidal synthesis UF.wavwrite(y, fs, outputFile) return x, fs, tfreq, y
def spsModelAnal(x, fs, w, N, H, t, minSineDur, maxnSines, freqDevOffset, freqDevSlope, stocf): """ Analysis of a sound using the sinusoidal plus stochastic model x: input sound, fs: sampling rate, w: analysis window; N: FFT size, t: threshold in negative dB, minSineDur: minimum duration of sinusoidal tracks maxnSines: maximum number of parallel sinusoids freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0 freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation stocf: decimation factor used for the stochastic approximation returns hfreq, hmag, hphase: harmonic frequencies, magnitude and phases; stocEnv: stochastic residual """ # perform sinusoidal analysis tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) Ns = 512 xr = UF.sineSubtraction(x, Ns, H, tfreq, tmag, tphase, fs) # subtract sinusoids from original sound stocEnv = STM.stochasticModelAnal(xr, H, H*2, stocf) # compute stochastic model of residual return tfreq, tmag, tphase, stocEnv
def sine_model_analysis(self, window_size=2047, fft_size=4096, hop_size=150, threshold_db=-80, min_sine_dur=0.15, max_sines=15): window = np.blackman(window_size) self.fft_size = fft_size self.window_size = window_size self.hop_size = hop_size self.threshold_db = threshold_db self.min_sine_dur = min_sine_dur self.max_sines = max_sines self.stft_magnitudes, self.stft_phases = STFT.stftAnal( self.signal, window, fft_size, hop_size) self.frequencies, self.magnitudes, self.phases = SM.sineModelAnal( self.signal, self.sample_rate, window, fft_size, hop_size, threshold_db, max_sines, min_sine_dur) self.compute_lines()
def spsModelAnal(x, fs, w, N, H, t, minSineDur, maxnSines, freqDevOffset, freqDevSlope, stocf): """ Analysis of a sound using the sinusoidal plus stochastic model x: input sound, fs: sampling rate, w: analysis window; N: FFT size, t: threshold in negative dB, minSineDur: minimum duration of sinusoidal tracks maxnSines: maximum number of parallel sinusoids freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0 freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation stocf: decimation factor used for the stochastic approximation returns hfreq, hmag, hphase: harmonic frequencies, magnitude and phases; stocEnv: stochastic residual """ # perform sinusoidal analysis tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) Ns = N #512 xr = UF.sineSubtraction(x, Ns, H, tfreq, tmag, tphase, fs) # subtract sinusoids from original sound #stocEnv = STM.stochasticModelAnal(xr, H, H*2, stocf) # compute stochastic model of residual stocEnv = STM.stochasticModelAnal(xr, H, N, stocf) return tfreq, tmag, tphase, stocEnv
def main(inputFile='../../sounds/bendir.wav', window='hamming', M=2001, N=2048, t=-80, minSineDur=0.02, maxnSines=150, freqDevOffset=10, freqDevSlope=0.001): """ Perform analysis/synthesis using the sinusoidal model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size; N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks maxnSines: maximum number of parallel sinusoids freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0 freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound fs, x = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # analyze the sound with the sinusoidal model tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) # synthesize the output sound from the sinusoidal representation y = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs) # output sound file name outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sineModel.wav' # write the synthesized sound obtained from the sinusoidal synthesis UF.wavwrite(y, fs, outputFile) return x,fs,tfreq,y
def exploreSineModel(inputFile='../sms-tools/sounds/multisines.wav'): """ Input: inputFile (string) = wav file including the path Output: return True Discuss on the forum! """ window='hamming' # Window type M=3001 # Window size in sample N=4096 # FFT Size t=-80 # Threshold minSineDur=0.02 # minimum duration of a sinusoid maxnSines=15 # Maximum number of sinusoids at any time frame freqDevOffset=10 # minimum frequency deviation at 0Hz freqDevSlope=0.001 # slope increase of minimum frequency deviation Ns = 512 # size of fft used in synthesis H = 128 # hop size (has to be 1/4 of Ns) fs, x = UF.wavread(inputFile) # read input sound w = get_window(window, M) # compute analysis window # analyze the sound with the sinusoidal model tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) # synthesize the output sound from the sinusoidal representation y = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs) # output sound file name outputFile = os.path.basename(inputFile)[:-4] + '_sineModel.wav' # write the synthesized sound obtained from the sinusoidal synthesis UF.wavwrite(y, fs, outputFile) # create figure to show plots plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 5000.0 # plot the input sound plt.subplot(3,1,1) plt.plot(np.arange(x.size)/float(fs), x) plt.axis([0, x.size/float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot the sinusoidal frequencies plt.subplot(3,1,2) if (tfreq.shape[1] > 0): numFrames = tfreq.shape[0] frmTime = H*np.arange(numFrames)/float(fs) tfreq[tfreq<=0] = np.nan plt.plot(frmTime, tfreq) plt.axis([0, x.size/float(fs), 0, maxplotfreq]) plt.title('frequencies of sinusoidal tracks') # plot the output sound plt.subplot(3,1,3) plt.plot(np.arange(y.size)/float(fs), y) plt.axis([0, y.size/float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() plt.show() return True
import stft as STFT import sineModel as SM import utilFunctions as UF (fs, x) = UF.wavread(os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../../sounds/bendir.wav")) w = np.hamming(2001) N = 2048 H = 200 t = -80 minSineDur = 0.02 maxnSines = 150 freqDevOffset = 10 freqDevSlope = 0.001 mX, pX = STFT.stftAnal(x, fs, w, N, H) tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) plt.figure(1, figsize=(9.5, 7)) maxplotfreq = 800.0 maxplotbin = int(N * maxplotfreq / fs) numFrames = int(mX[:, 0].size) frmTime = H * np.arange(numFrames) / float(fs) binFreq = np.arange(maxplotbin + 1) * float(fs) / N plt.pcolormesh(frmTime, binFreq, np.transpose(np.diff(pX[:, : maxplotbin + 1], axis=1))) plt.autoscale(tight=True) tracks = tfreq * np.less(tfreq, maxplotfreq) tracks[tracks <= 0] = np.nan plt.plot(frmTime, tracks, color="k", lw=1.5) plt.autoscale(tight=True) plt.title("pX + sinusoidal tracks (bendir.wav)")
def analysis(inputFile='../../sounds/mridangam.wav', window='hamming', M=801, N=2048, t=-90, minSineDur=0.01, maxnSines=150, freqDevOffset=20, freqDevSlope=0.02): """ Analyze a sound with the sine model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size; N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks maxnSines: maximum number of parallel sinusoids freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0 freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation returns inputFile: input file name; fs: sampling rate of input file, tfreq, tmag: sinusoidal frequencies and magnitudes """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound (fs, x) = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # compute the sine model of the whole sound tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) # synthesize the sines without original phases y = SM.sineModelSynth(tfreq, tmag, np.array([]), Ns, H, fs) # output sound file (monophonic with sampling rate of 44100) outputFile = 'output_sounds/' + os.path.basename( inputFile)[:-4] + '_sineModel.wav' # write the sound resulting from the inverse stft UF.wavwrite(y, fs, outputFile) # create figure to show plots plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 5000.0 # plot the input sound plt.subplot(3, 1, 1) plt.plot(np.arange(x.size) / float(fs), x) plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot the sinusoidal frequencies if (tfreq.shape[1] > 0): plt.subplot(3, 1, 2) tracks = np.copy(tfreq) tracks = tracks * np.less(tracks, maxplotfreq) tracks[tracks <= 0] = np.nan numFrames = int(tracks[:, 0].size) frmTime = H * np.arange(numFrames) / float(fs) plt.plot(frmTime, tracks) plt.axis([0, x.size / float(fs), 0, maxplotfreq]) plt.title('frequencies of sinusoidal tracks') # plot the output sound plt.subplot(3, 1, 3) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() plt.show(block=False) return inputFile, fs, tfreq, tmag
def main(inputFile='../../sounds/bendir.wav', window='hamming', M=2001, N=2048, t=-80, minSineDur=0.02, maxnSines=150, freqDevOffset=10, freqDevSlope=0.001): """ Perform analysis/synthesis using the sinusoidal model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size; N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks maxnSines: maximum number of parallel sinusoids freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0 freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound fs, x = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # analyze the sound with the sinusoidal model tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) # synthesize the output sound from the sinusoidal representation y = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs) # output sound file name outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sineModel.wav' # write the synthesized sound obtained from the sinusoidal synthesis UF.wavwrite(y, fs, outputFile) # create figure to show plots plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 5000.0 # plot the input sound plt.subplot(3,1,1) plt.plot(np.arange(x.size)/float(fs), x) plt.axis([0, x.size/float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot the sinusoidal frequencies plt.subplot(3,1,2) if (tfreq.shape[1] > 0): numFrames = tfreq.shape[0] frmTime = H*np.arange(numFrames)/float(fs) tfreq[tfreq<=0] = np.nan plt.plot(frmTime, tfreq) plt.axis([0, x.size/float(fs), 0, maxplotfreq]) plt.title('frequencies of sinusoidal tracks') # plot the output sound plt.subplot(3,1,3) plt.plot(np.arange(y.size)/float(fs), y) plt.axis([0, y.size/float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() plt.show(block=False)
sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../../software/models/')) import stft as STFT import sineModel as SM import utilFunctions as UF (fs, x) = UF.wavread(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../../sounds/flute-A4.wav')) w = np.blackman(601) N = 1024 H = 150 t = -80 minSineDur = .1 maxnSines = 150 mX, pX = STFT.stftAnal(x, fs, w, N, H) tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur) plt.figure(1, figsize=(9.5, 5)) maxplotfreq = 5000.0 maxplotbin = int(N*maxplotfreq/fs) numFrames = int(mX[:,0].size) frmTime = H*np.arange(numFrames)/float(fs) binFreq = np.arange(maxplotbin+1)*float(fs)/N plt.pcolormesh(frmTime, binFreq, np.transpose(mX[:,:maxplotbin+1])) plt.autoscale(tight=True) tracks = tfreq*np.less(tfreq, maxplotfreq) tracks[tracks<=0] = np.nan plt.plot(frmTime, tracks, color='k', lw=1.5) plt.autoscale(tight=True) plt.title('mX + sinusoidal tracks (flute-A4.wav)')
import utilFunctions as UF import sineTransformations as SMT (fs, x) = UF.wavread('../../../sounds/mridangam.wav') w = np.hamming(801) N = 2048 t = -90 minSineDur = .005 maxnSines = 150 freqDevOffset = 20 freqDevSlope = 0.02 Ns = 512 H = Ns/4 mX, pX = STFT.stftAnal(x, fs, w, N, H) tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) timeScale = np.array([.01, .0, .03, .03, .335, .4, .355, .42, .671, .8, .691, .82, .858, 1.2, .878, 1.22, 1.185, 1.6, 1.205, 1.62, 1.497, 2.0, 1.517, 2.02, 1.686, 2.4, 1.706, 2.42, 1.978, 2.8]) ytfreq, ytmag = SMT.sineTimeScaling(tfreq, tmag, timeScale) y = SM.sineModelSynth(ytfreq, ytmag, np.array([]), Ns, H, fs) mY, pY = STFT.stftAnal(y, fs, w, N, H) plt.figure(1, figsize=(12, 9)) maxplotfreq = 4000.0 plt.subplot(4,1,1) plt.plot(np.arange(x.size)/float(fs), x, 'b') plt.axis([0,x.size/float(fs),min(x),max(x)]) plt.title('x (mridangam.wav)') plt.subplot(4,1,2) numFrames = int(tfreq[:,0].size) frmTime = H*np.arange(numFrames)/float(fs)
import stft as STFT import sineModel as SM import utilFunctions as UF (fs, x) = UF.wavread('../../../sounds/mridangam.wav') x1 = x[:int(1.49*fs)] w = np.hamming(801) N = 2048 t = -90 minSineDur = .005 maxnSines = 150 freqDevOffset = 20 freqDevSlope = 0.02 Ns = 512 H = Ns//4 sfreq, smag, sphase = SM.sineModelAnal(x1, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) timeScale = np.array([.01, .0, .03, .03, .335, .8, .355, .82, .671, 1.0, .691, 1.02, .858, 1.1, .878, 1.12, 1.185, 1.8, 1.205, 1.82, 1.49, 2.0]) L = sfreq[:,0].size # number of input frames maxInTime = max(timeScale[::2]) # maximum value used as input times maxOutTime = max(timeScale[1::2]) # maximum value used in output times outL = int(L*maxOutTime/maxInTime) # number of output frames inFrames = L*timeScale[::2]/maxInTime # input time values in frames outFrames = outL*timeScale[1::2]/maxOutTime # output time values in frames timeScalingEnv = interp1d(outFrames, inFrames, fill_value=0) # interpolation function indexes = timeScalingEnv(np.arange(outL)) # generate frame indexes for the output ysfreq = sfreq[int(round(indexes[0])),:] # first output frame ysmag = smag[int(round(indexes[0])),:] # first output frame for l in indexes[1:]: # generate frames for output sine tracks ysfreq = np.vstack((ysfreq, sfreq[int(round(l)),:])) ysmag = np.vstack((ysmag, smag[int(round(l)),:]))
def sms_analysis_from_file(input_filename, maxNumSines=SMS.maxNumSines): x, Fs = librosa.load(input_filename) tfreq, tmag, tphase = SM.sineModelAnal(x, Fs, SMS.w, SMS.N_FFT, SMS.H, SMS.t, maxNumSines, SMS.minSineDur, SMS.freqDevOffset, SMS.freqDevSlope) return (tfreq, tmag, tphase, Fs)
def main( inputFile="../../sounds/bendir.wav", window="hamming", M=2001, N=2048, t=-80, minSineDur=0.02, maxnSines=150, freqDevOffset=10, freqDevSlope=0.001, stocf=0.2, ): # ------- analysis parameters ------------------- # inputFile: input sound file (monophonic with sampling rate of 44100) # window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) # M: analysis window size # N: fft size (power of two, bigger or equal than M) # t: magnitude threshold of spectral peaks # minSineDur: minimum duration of sinusoidal tracks # maxnSines: maximum number of parallel sinusoids # freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0 # freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation # stocf: decimation factor used for the stochastic approximation # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # --------- computation ----------------- # read input sound (fs, x) = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # perform sinusoidal analysis tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) # subtract sinusoids from original sound Ns = 512 xr = UF.sineSubtraction(x, Ns, H, tfreq, tmag, tphase, fs) # compute stochastic model of residual mYst = STM.stochasticModelAnal(xr, H, stocf) # synthesize sinusoids ys = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs) # synthesize stochastic component yst = STM.stochasticModelSynth(mYst, H) # sum sinusoids and stochastic y = yst[: min(yst.size, ys.size)] + ys[: min(yst.size, ys.size)] # output sound file (monophonic with sampling rate of 44100) outputFileSines = "output_sounds/" + os.path.basename(inputFile)[:-4] + "_spsModel_sines.wav" outputFileStochastic = "output_sounds/" + os.path.basename(inputFile)[:-4] + "_spsModel_stochastic.wav" outputFile = "output_sounds/" + os.path.basename(inputFile)[:-4] + "_spsModel.wav" # write sounds files for sinusoidal, residual, and the sum UF.wavwrite(ys, fs, outputFileSines) UF.wavwrite(yst, fs, outputFileStochastic) UF.wavwrite(y, fs, outputFile) # --------- plotting -------------------- # plot stochastic component plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 10000.0 # plot the input sound plt.subplot(3, 1, 1) plt.plot(np.arange(x.size) / float(fs), x) plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.ylabel("amplitude") plt.xlabel("time (sec)") plt.title("input sound: x") plt.subplot(3, 1, 2) numFrames = int(mYst[:, 0].size) sizeEnv = int(mYst[0, :].size) frmTime = H * np.arange(numFrames) / float(fs) binFreq = (0.5 * fs) * np.arange(sizeEnv * maxplotfreq / (0.5 * fs)) / sizeEnv plt.pcolormesh(frmTime, binFreq, np.transpose(mYst[:, : sizeEnv * maxplotfreq / (0.5 * fs) + 1])) plt.autoscale(tight=True) # plot sinusoidal frequencies on top of stochastic component sines = tfreq * np.less(tfreq, maxplotfreq) sines[sines == 0] = np.nan numFrames = int(sines[:, 0].size) frmTime = H * np.arange(numFrames) / float(fs) plt.plot(frmTime, sines, color="k", ms=3, alpha=1) plt.xlabel("time(s)") plt.ylabel("Frequency(Hz)") plt.autoscale(tight=True) plt.title("sinusoidal + stochastic spectrogram") # plot the output sound plt.subplot(3, 1, 3) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel("amplitude") plt.xlabel("time (sec)") plt.title("output sound: y") plt.tight_layout() plt.show()
def analysis(inputFile='../../sounds/mridangam.wav', window='hamming', M=801, N=2048, t=-90, minSineDur=0.01, maxnSines=150, freqDevOffset=20, freqDevSlope=0.02): """ Analyze a sound with the sine model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size; N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks maxnSines: maximum number of parallel sinusoids freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0 freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation returns inputFile: input file name; fs: sampling rate of input file, tfreq, tmag: sinusoidal frequencies and magnitudes """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound (fs, x) = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # compute the sine model of the whole sound tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) # synthesize the sines without original phases y = SM.sineModelSynth(tfreq, tmag, np.array([]), Ns, H, fs) # output sound file (monophonic with sampling rate of 44100) outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sineModel.wav' # write the sound resulting from the inverse stft UF.wavwrite(y, fs, outputFile) # create figure to show plots plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 5000.0 # plot the input sound plt.subplot(3,1,1) plt.plot(np.arange(x.size)/float(fs), x) plt.axis([0, x.size/float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot the sinusoidal frequencies if (tfreq.shape[1] > 0): plt.subplot(3,1,2) tracks = np.copy(tfreq) tracks = tracks*np.less(tracks, maxplotfreq) tracks[tracks<=0] = np.nan numFrames = int(tracks[:,0].size) frmTime = H*np.arange(numFrames)/float(fs) plt.plot(frmTime, tracks) plt.axis([0, x.size/float(fs), 0, maxplotfreq]) plt.title('frequencies of sinusoidal tracks') # plot the output sound plt.subplot(3,1,3) plt.plot(np.arange(y.size)/float(fs), y) plt.axis([0, y.size/float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() plt.show(block=False) return inputFile, fs, tfreq, tmag
def exploreSineModel(inputFile='../../sounds/multiSines.wav'): """ Input: inputFile (string) = wav file including the path Output: return True Discuss on the forum! """ # window='hamming' # Window type window='blackmanharris' # Window type # M=3001 # Window size in sample M=3529 # Window size in sample #M=4095 # Window size in sample N=4096 # FFT Size #N=8192 # FFT Size # N=8192 # FFT Size # t=-80 # Threshold t=-50 # Threshold #minSineDur=0.02 # minimum duration of a sinusoid minSineDur=0.01 # minimum duration of a sinusoid maxnSines=15 # Maximum number of sinusoids at any time frame #maxnSines=9 # Maximum number of sinusoids at any time frame freqDevOffset=10 # minimum frequency deviation at 0Hz #freqDevOffset=20 # minimum frequency deviation at 0Hz freqDevSlope=0.001 # slope increase of minimum frequency deviation # Ns = 512 # size of fft used in synthesis # H = 128 # hop size (has to be 1/4 of Ns) Ns = 512 # size of fft used in synthesis H = Ns / 4 # hop size (has to be 1/4 of Ns) fs, x = UF.wavread(inputFile) # read input sound w = get_window(window, M) # compute analysis window # analyze the sound with the sinusoidal model tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) # synthesize the output sound from the sinusoidal representation y = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs) # output sound file name outputFile = os.path.basename(inputFile)[:-4] + '_sineModel.wav' # write the synthesized sound obtained from the sinusoidal synthesis UF.wavwrite(y, fs, outputFile) # SNR calculation x1 = x[:len(y)] e_signal = calculate_energy(x1) e_error = calculate_energy(x1 - y) snr = calculate_snr(e_signal, e_error) print("SNR {}".format(snr)) errorFile = os.path.basename(inputFile)[:-4] + '_sineModel_error.wav' UF.wavwrite(x1 - y, fs, errorFile) # create figure to show plots plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 5000.0 # plot the input sound plt.subplot(3,1,1) plt.plot(np.arange(x.size)/float(fs), x) plt.axis([0, x.size/float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot the sinusoidal frequencies plt.subplot(3,1,2) if (tfreq.shape[1] > 0): numFrames = tfreq.shape[0] frmTime = H*np.arange(numFrames)/float(fs) tfreq[tfreq<=0] = np.nan plt.plot(frmTime, tfreq) plt.axis([0, x.size/float(fs), 0, maxplotfreq]) plt.title('frequencies of sinusoidal tracks') # plot the output sound plt.subplot(3,1,3) plt.plot(np.arange(y.size)/float(fs), y) plt.plot(np.arange(y.size)/float(fs), abs(x1 - y)) # error plt.axis([0, y.size/float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() plt.show() return True
def main(inputFile='../../sounds/bendir.wav', window='hamming', M=2001, N=2048, t=-80, minSineDur=0.02, maxnSines=150, freqDevOffset=10, freqDevSlope=0.001): # ------- analysis parameters ------------------- # inputFile: input sound file (monophonic with sampling rate of 44100) # window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) # M: analysis window size # N: fft size (power of two, bigger or equal than M) # t: magnitude threshold of spectral peaks # minSineDur: minimum duration of sinusoidal tracks # maxnSines: maximum number of parallel sinusoids # freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0 # freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # --------- computation ----------------- # read input sound (fs, x) = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # perform sinusoidal analysis tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) # subtract sinusoids from original xr = UF.sineSubtraction(x, N, H, tfreq, tmag, tphase, fs) # compute spectrogram of residual mXr, pXr = STFT.stftAnal(xr, fs, w, N, H) # synthesize sinusoids ys = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs) # sum sinusoids and residual y = xr[:min(xr.size, ys.size)]+ys[:min(xr.size, ys.size)] # output sound file (monophonic with sampling rate of 44100) outputFileSines = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sprModel_sines.wav' outputFileResidual = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sprModel_residual.wav' outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sprModel.wav' # write sounds files for sinusoidal, residual, and the sum UF.wavwrite(ys, fs, outputFileSines) UF.wavwrite(xr, fs, outputFileResidual) UF.wavwrite(y, fs, outputFile) # --------- plotting -------------------- # create figure to show plots plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 5000.0 # plot the input sound plt.subplot(3,1,1) plt.plot(np.arange(x.size)/float(fs), x) plt.axis([0, x.size/float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot the magnitude spectrogram of residual plt.subplot(3,1,2) maxplotbin = int(N*maxplotfreq/fs) numFrames = int(mXr[:,0].size) frmTime = H*np.arange(numFrames)/float(fs) binFreq = np.arange(maxplotbin+1)*float(fs)/N plt.pcolormesh(frmTime, binFreq, np.transpose(mXr[:,:maxplotbin+1])) plt.autoscale(tight=True) # plot the sinusoidal frequencies on top of the residual spectrogram tracks = tfreq*np.less(tfreq, maxplotfreq) tracks[tracks<=0] = np.nan plt.plot(frmTime, tracks, color='k') plt.title('sinusoidal tracks + residual spectrogram') plt.autoscale(tight=True) # plot the output sound plt.subplot(3,1,3) plt.plot(np.arange(y.size)/float(fs), y) plt.axis([0, y.size/float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() plt.show()
import stft as STFT import sineModel as SM import utilFunctions as UF (fs, x) = UF.wavread('../../../sounds/mridangam.wav') x1 = x[:int(1.49 * fs)] w = np.hamming(801) N = 2048 t = -90 minSineDur = .005 maxnSines = 150 freqDevOffset = 20 freqDevSlope = 0.02 Ns = 512 H = Ns / 4 sfreq, smag, sphase = SM.sineModelAnal(x1, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) timeScale = np.array([ .01, .0, .03, .03, .335, .8, .355, .82, .671, 1.0, .691, 1.02, .858, 1.1, .878, 1.12, 1.185, 1.8, 1.205, 1.82, 1.49, 2.0 ]) L = sfreq[:, 0].size # number of input frames maxInTime = max(timeScale[::2]) # maximum value used as input times maxOutTime = max(timeScale[1::2]) # maximum value used in output times outL = int(L * maxOutTime / maxInTime) # number of output frames inFrames = L * timeScale[::2] / maxInTime # input time values in frames outFrames = outL * timeScale[1::2] / maxOutTime # output time values in frames timeScalingEnv = interp1d(outFrames, inFrames, fill_value=0) # interpolation function indexes = timeScalingEnv( np.arange(outL)) # generate frame indexes for the output ysfreq = sfreq[round(indexes[0]), :] # first output frame
def exploreSineModel(inputFile='multiSines.wav'): """ Input: inputFile (string) = wav file including the path Output: return True """ window = 'hamming' # Window type M = 2001 # Window size in sample N = 2048 # FFT Size t = -80 # Threshold minSineDur = 0.02 # minimum duration of a sinusoid maxnSines = 150 # Maximum number of sinusoids at any time frame freqDevOffset = 10 # minimum frequency deviation at 0Hz freqDevSlope = 0.001 # slope increase of minimum frequency deviation Ns = 512 # size of fft used in synthesis H = 128 # hop size (has to be 1/4 of Ns) fs, x = UF.wavread(inputFile) # read input sound w = get_window(window, M) # compute analysis window # analyze the sound with the sinusoidal model tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) # synthesize the output sound from the sinusoidal representation y = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs) # output sound file name outputFile = os.path.basename(inputFile)[:-4] + '_sineModel.wav' # write the synthesized sound obtained from the sinusoidal synthesis UF.wavwrite(y, fs, outputFile) # create figure to show plots plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 5000.0 # plot the input sound plt.subplot(3, 1, 1) plt.plot(np.arange(x.size) / float(fs), x) plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot the sinusoidal frequencies plt.subplot(3, 1, 2) if (tfreq.shape[1] > 0): numFrames = tfreq.shape[0] frmTime = H * np.arange(numFrames) / float(fs) tfreq[tfreq <= 0] = np.nan plt.plot(frmTime, tfreq) plt.axis([0, x.size / float(fs), 0, maxplotfreq]) plt.title('frequencies of sinusoidal tracks') # plot the output sound plt.subplot(3, 1, 3) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() plt.show() return True
import stft as STFT import sineModel as SM import utilFunctions as UF (fs, x) = UF.wavread( os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../../sounds/flute-A4.wav')) w = np.blackman(601) N = 1024 H = 150 t = -80 minSineDur = .1 maxnSines = 150 mX, pX = STFT.stftAnal(x, fs, w, N, H) tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur) plt.figure(1, figsize=(9.5, 5)) maxplotfreq = 5000.0 maxplotbin = int(N * maxplotfreq / fs) numFrames = int(mX[:, 0].size) frmTime = H * np.arange(numFrames) / float(fs) binFreq = np.arange(maxplotbin + 1) * float(fs) / N plt.pcolormesh(frmTime, binFreq, np.transpose(mX[:, :maxplotbin + 1])) plt.autoscale(tight=True) tracks = tfreq * np.less(tfreq, maxplotfreq) tracks[tracks <= 0] = np.nan plt.plot(frmTime, tracks, color='k', lw=1.5) plt.autoscale(tight=True) plt.title('mX + sinusoidal tracks (flute-A4.wav)')
def main(inputFile='../../sounds/bendir.wav', window='hamming', M=2001, N=2048, t=-80, minSineDur=0.02, maxnSines=150, freqDevOffset=10, freqDevSlope=0.001): """ Perform analysis/synthesis using the sinusoidal model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size; N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks maxnSines: maximum number of parallel sinusoids freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0 freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound fs, x = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # analyze the sound with the sinusoidal model tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) # synthesize the output sound from the sinusoidal representation y = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs) # output sound file name outputFile = 'output_sounds/' + os.path.basename( inputFile)[:-4] + '_sineModel.wav' # write the synthesized sound obtained from the sinusoidal synthesis UF.wavwrite(y, fs, outputFile) # create figure to show plots plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 5000.0 # plot the input sound plt.subplot(3, 1, 1) plt.plot(np.arange(x.size) / float(fs), x) plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot the sinusoidal frequencies plt.subplot(3, 1, 2) if (tfreq.shape[1] > 0): numFrames = tfreq.shape[0] frmTime = H * np.arange(numFrames) / float(fs) tfreq[tfreq <= 0] = np.nan plt.plot(frmTime, tfreq) plt.axis([0, x.size / float(fs), 0, maxplotfreq]) plt.title('frequencies of sinusoidal tracks') # plot the output sound plt.subplot(3, 1, 3) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() plt.show(block=False)