def timeStretchAudio(inputAudio, outputAudio, outputDuration, writeOutput=1): originalWav = Sndfile(inputAudio, 'r') x = originalWav.read_frames(originalWav.nframes) fs = originalWav.samplerate nChannel = originalWav.channels print fs if nChannel >1: x = x[0] w = np.hamming(801) N = 2048 t = -90 minSineDur = .005 maxnSines = 150 freqDevOffset = 20 freqDevSlope = 0.02 Ns = 512 H = Ns/4 tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) inputDur = float(len(tfreq)*H/fs) #timeScale = np.array([0.1,0.1, inputDur, inputDur*2]) timeScale = np.array([0,0, .4,outputDuration]) ytfreq, ytmag = trans.sineTimeScaling(tfreq, tmag, timeScale) y = SM.sineModelSynth(ytfreq, ytmag, np.array([]), Ns, H, fs) if writeOutput ==1: outputWav = Sndfile(outputAudio, 'w', originalWav.format, originalWav.channels, originalWav.samplerate) outputWav.write_frames(y) outputWav.close() else: return y, fs, nChannel
def estimate(inputFile='a7q2-harmonic.wav', window='blackman', M=2101, N=4096, t=-90, minSineDur=0.1, nH=50, minf0=100, maxf0=200, f0et=5, harmDevSlope=0.01): Ns = 512 H = 128 fs, x = UF.wavread(inputFile) w = get_window(window, M) hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et) y = SM.sineModelSynth(hfreq, hmag, hphase, Ns, H, fs) # plt.plot(x) # plt.plot(y) # plt.show() size = min([x.size, y.size]) diff = np.sum(np.abs(x[:size] - y[:size])) std = np.std(f0) print "diff:{0} & std:{1}, M={2} N={3} t={4} minSineDur={5} nH={6} min/max={7}/{8} f0et={9} harmDevSlope={10}" \ .format(diff, std, M, N, t, minSineDur, nH, minf0, maxf0, f0et, harmDevSlope) return diff, std
def transformation_synthesis(inputFile, fs, hfreq, hmag, freqScaling = np.array([0, 2.0, 1, .3]), freqStretching = np.array([0, 1, 1, 1.5]), timbrePreservation = 1, timeScaling = np.array([0, .0, .671, .671, 1.978, 1.978+1.0])): # transform the analysis values returned by the analysis function and synthesize the sound # inputFile: name of input file # fs: sampling rate of input file # tfreq, tmag: sinusoidal frequencies and magnitudes # freqScaling: frequency scaling factors, in time-value pairs # freqStretchig: frequency stretching factors, in time-value pairs # timbrePreservation: 1 preserves original timbre, 0 it does not # timeScaling: time scaling factors, in time-value pairs # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # frequency scaling of the harmonics yhfreq, yhmag = HT.harmonicFreqScaling(hfreq, hmag, freqScaling, freqStretching, timbrePreservation, fs) # time scale the sound yhfreq, yhmag = ST.sineTimeScaling(yhfreq, yhmag, timeScaling) # synthesis y = SM.sineModelSynth(yhfreq, yhmag, np.array([]), Ns, H, fs) # write output sound outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_harmonicModelTransformation.wav' UF.wavwrite(y,fs, outputFile) # --------- plotting -------------------- # create figure to plot plt.figure(figsize=(12, 6)) # frequency range to plot maxplotfreq = 15000.0 plt.subplot(2,1,1) # plot the transformed sinusoidal frequencies tracks = yhfreq*np.less(yhfreq, maxplotfreq) tracks[tracks<=0] = np.nan numFrames = int(tracks[:,0].size) frmTime = H*np.arange(numFrames)/float(fs) plt.plot(frmTime, tracks, color='k') plt.title('transformed harmonic tracks') plt.autoscale(tight=True) # plot the output sound plt.subplot(2,1,2) plt.plot(np.arange(y.size)/float(fs), y) plt.axis([0, y.size/float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() plt.show()
def transformation_synthesis(inputFile, fs, tfreq, tmag, freqScaling = np.array([0, 2.0, 1, .3]), timeScaling = np.array([0, .0, .671, .671, 1.978, 1.978+1.0])): """ Transform the analysis values returned by the analysis function and synthesize the sound inputFile: name of input file; fs: sampling rate of input file tfreq, tmag: sinusoidal frequencies and magnitudes freqScaling: frequency scaling factors, in time-value pairs timeScaling: time scaling factors, in time-value pairs """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # frequency scaling of the sinusoidal tracks ytfreq = ST.sineFreqScaling(tfreq, freqScaling) # time scale the sinusoidal tracks ytfreq, ytmag = ST.sineTimeScaling(ytfreq, tmag, timeScaling) # synthesis y = SM.sineModelSynth(ytfreq, ytmag, np.array([]), Ns, H, fs) # write output sound outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sineModelTransformation.wav' UF.wavwrite(y,fs, outputFile) # create figure to plot plt.figure(figsize=(12, 6)) # frequency range to plot maxplotfreq = 15000.0 # plot the transformed sinusoidal frequencies if (ytfreq.shape[1] > 0): plt.subplot(2,1,1) tracks = np.copy(ytfreq) tracks = tracks*np.less(tracks, maxplotfreq) tracks[tracks<=0] = np.nan numFrames = int(tracks[:,0].size) frmTime = H*np.arange(numFrames)/float(fs) plt.plot(frmTime, tracks) plt.title('transformed sinusoidal tracks') plt.autoscale(tight=True) # plot the output sound plt.subplot(2,1,2) plt.plot(np.arange(y.size)/float(fs), y) plt.axis([0, y.size/float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() plt.show()
def sms_synth_to_file(output_filename, tfreq, tmag, tphase, Fs): """ Synthesis from freq, mag and phase Writes to file Returns y: a vector with audio """ y = np.asarray(SM.sineModelSynth(tfreq, tmag, tphase, SMS.Ns, SMS.H, Fs), dtype='float32') librosa.output.write_wav(output_filename, y, Fs) return y
def hprModelSynth(hfreq, hmag, hphase, xr, N, H, fs): """ Synthesis of a sound using the sinusoidal plus residual model tfreq, tmag, tphase: sinusoidal frequencies, amplitudes and phases; stocEnv: stochastic envelope N: synthesis FFT size; H: hop size, fs: sampling rate returns y: output sound, yh: harmonic component """ yh = SM.sineModelSynth(hfreq, hmag, hphase, N, H, fs) # synthesize sinusoids y = yh[:min(yh.size, xr.size)]+xr[:min(yh.size, xr.size)] # sum sinusoids and residual components return y, yh
def hpsModelSynth(hfreq, hmag, hphase, stocEnv, N, H, fs): """ Synthesis of a sound using the harmonic plus stochastic model hfreq, hmag: harmonic frequencies and amplitudes; stocEnv: stochastic envelope Ns: synthesis FFT size; H: hop size, fs: sampling rate returns y: output sound, yh: harmonic component, yst: stochastic component """ yh = SM.sineModelSynth(hfreq, hmag, hphase, N, H, fs) # synthesize harmonics yst = STM.stochasticModelSynth(stocEnv, H, H*2) # synthesize stochastic residual y = yh[:min(yh.size, yst.size)]+yst[:min(yh.size, yst.size)] # sum harmonic and stochastic components return y, yh, yst
def spsModelSynth(tfreq, tmag, tphase, stocEnv, N, H, fs): """ Synthesis of a sound using the sinusoidal plus stochastic model tfreq, tmag, tphase: sinusoidal frequencies, amplitudes and phases; stocEnv: stochastic envelope N: synthesis FFT size; H: hop size, fs: sampling rate returns y: output sound, ys: sinusoidal component, yst: stochastic component """ ys = SM.sineModelSynth(tfreq, tmag, tphase, N, H, fs) # synthesize sinusoids yst = STM.stochasticModelSynth(stocEnv, H, H*2) # synthesize stochastic residual y = ys[:min(ys.size, yst.size)]+yst[:min(ys.size, yst.size)] # sum sinusoids and stochastic components return y, ys, yst
def sprModelSynth(tfreq, tmag, tphase, xr, N, H, fs): """ Synthesis of a sound using the sinusoidal plus residual models_makam tfreq, tmag, tphase: sinusoidal frequencies, amplitudes and phases; stocEnv: stochastic envelope N: synthesis FFT size; H: hop size, fs: sampling rate returns y: output sound, y: sinusoidal component """ ys = SM.sineModelSynth(tfreq, tmag, tphase, N, H, fs) # synthesize sinusoids y = ys[:min(ys.size, xr.size)] + xr[:min( ys.size, xr.size)] # sum sinusoids and residual components return y, ys
def main(inputFile='../../sounds/vignesh.wav', window='blackman', M=1201, N=2048, t=-90, minSineDur=0.1, nH=100, minf0=130, maxf0=300, f0et=7, harmDevSlope=0.01): """ Analysis and synthesis using the harmonic model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size; N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm harmDevSlope: allowed deviation of harmonic tracks, higher harmonics could have higher allowed deviation """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound (fs, x) = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # detect harmonics of input sound hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) # synthesize the harmonics y = SM.sineModelSynth(hfreq, hmag, hphase, Ns, H, fs) # output sound file (monophonic with sampling rate of 44100) outputFile = 'output_sounds/' + os.path.basename( inputFile)[:-4] + '_harmonicModel.wav' # write the sound resulting from harmonic analysis UF.wavwrite(y, fs, outputFile) return x, fs, hfreq, y
def spsModelSynth(tfreq, tmag, tphase, stocEnv, N, H, fs): """ Synthesis of a sound using the sinusoidal plus stochastic model tfreq, tmag, tphase: sinusoidal frequencies, amplitudes and phases; stocEnv: stochastic envelope N: synthesis FFT size; H: hop size, fs: sampling rate returns y: output sound, ys: sinusoidal component, yst: stochastic component """ ys = SM.sineModelSynth(tfreq, tmag, tphase, N, H, fs) # synthesize sinusoids #yst = STM.stochasticModelSynth(stocEnv, H, H*2) # synthesize stochastic residual yst = STM.stochasticModelSynth(stocEnv, H, N) y = ys[:min(ys.size, yst.size)] + yst[:min( ys.size, yst.size)] # sum sinusoids and stochastic components return y, ys, yst
def resynthesize(hfreq, hmag, hphase, fs, hopSizeMelodia, URIOutputFile): ''' synthesize the harmonics ''' # Ns = 512 Ns = 4 * hopSizeMelodia y = SM.sineModelSynth(hfreq, hmag, hphase, Ns, hopSizeMelodia, fs) # output sound file (monophonic with sampling rate of 44100) # URIOutputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_harmonicModel.wav' # write the sound resulting from harmonic analysis UF.wavwrite(y, fs, URIOutputFile) print 'written file ' + URIOutputFile return y
def main(inputFile='../../sounds/bendir.wav', window='hamming', M=2001, N=2048, t=-80, minSineDur=0.02, maxnSines=150, freqDevOffset=10, freqDevSlope=0.001): """ Perform analysis/synthesis using the sinusoidal model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size; N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks maxnSines: maximum number of parallel sinusoids freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0 freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound fs, x = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # analyze the sound with the sinusoidal model tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) # synthesize the output sound from the sinusoidal representation y = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs) # output sound file name outputFile = 'output_sounds/' + os.path.basename( inputFile)[:-4] + '_sineModel.wav' # write the synthesized sound obtained from the sinusoidal synthesis UF.wavwrite(y, fs, outputFile) return x, fs, tfreq, y
def harmonic_magnitudes_to_audio(hfreqs, magns, phases, options): ''' Compute for each frame harm amplitude convert cent bins to herz get harmonic partials form original spectrum Params: hfreq - harmonics of contour magns - magns of contour return: spectogram contour out_audio_contour - audio of harmonics for a contour ''' pool = Pool() run_sine_model_synth = SineModelSynth(hopSize=512, sampleRate=options.Fs) run_ifft = IFFT(size=options.windowsizeInSamples) run_overl = OverlapAdd(frameSize=options.windowsizeInSamples, hopSize=512, gain=1. / options.windowsizeInSamples) out_audio_contour = np.array(0) for hfreq, hmag, hphase in zip(hfreqs, magns, phases): spectrum, audio_frame = harmonics_to_audio(hfreq, hmag, hphase, run_sine_model_synth, run_ifft, run_overl) out_audio_contour = np.append(out_audio_contour, audio_frame) pool.add('spectrum', spectrum) out_audio_contour = SM.sineModelSynth(hfreqs, magns, phases, 512, 128, 44100) return out_audio_contour, pool['spectrum']
def main(inputFile='../../sounds/bendir.wav', window='hamming', M=2001, N=2048, t=-80, minSineDur=0.02, maxnSines=150, freqDevOffset=10, freqDevSlope=0.001): """ Perform analysis/synthesis using the sinusoidal model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size; N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks maxnSines: maximum number of parallel sinusoids freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0 freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound fs, x = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # analyze the sound with the sinusoidal model tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) # synthesize the output sound from the sinusoidal representation y = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs) # output sound file name outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sineModel.wav' # write the synthesized sound obtained from the sinusoidal synthesis UF.wavwrite(y, fs, outputFile) return x,fs,tfreq,y
def main(inputFile='../../sounds/vignesh.wav', window='blackman', M=1201, N=2048, t=-90, minSineDur=0.1, nH=100, minf0=130, maxf0=300, f0et=7, harmDevSlope=0.01): """ Analysis and synthesis using the harmonic model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size; N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm harmDevSlope: allowed deviation of harmonic tracks, higher harmonics could have higher allowed deviation """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound (fs, x) = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # detect harmonics of input sound hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) # synthesize the harmonics y = SM.sineModelSynth(hfreq, hmag, hphase, Ns, H, fs) # output sound file (monophonic with sampling rate of 44100) outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_harmonicModel.wav' # write the sound resulting from harmonic analysis UF.wavwrite(y, fs, outputFile) return x,fs,hfreq,y
def analysis(inputFile='../../sounds/vignesh.wav', window='blackman', M=1201, N=2048, t=-90, minSineDur=0.1, nH=100, minf0=130, maxf0=300, f0et=7, harmDevSlope=0.01): # analyze a sound with the harmonic model # inputFile: input sound file (monophonic with sampling rate of 44100) # window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) # M: analysis window size # N: fft size (power of two, bigger or equal than M) # t: magnitude threshold of spectral peaks # minSineDur: minimum duration of sinusoidal tracks # nH: maximum number of harmonics # minf0: minimum fundamental frequency in sound # maxf0: maximum fundamental frequency in sound # f0et: maximum error accepted in f0 detection algorithm # harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation # returns inputFile: input file name; fs: sampling rate of input file, # tfreq, tmag: sinusoidal frequencies and magnitudes # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound (fs, x) = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # compute the magnitude and phase spectrogram of input sound mX, pX = STFT.stftAnal(x, fs, w, N, H) # compute the harmonic model of the whole sound hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) # synthesize the sines without original phases y = SM.sineModelSynth(hfreq, hmag, np.array([]), Ns, H, fs) # output sound file (monophonic with sampling rate of 44100) outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_harmonicModel.wav' # write the sound resulting from the inverse stft UF.wavwrite(y, fs, outputFile) # --------- plotting -------------------- # create figure to show plots plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 5000.0 # plot the input sound plt.subplot(3,1,1) plt.plot(np.arange(x.size)/float(fs), x) plt.axis([0, x.size/float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot the magnitude spectrogram plt.subplot(3,1,2) maxplotbin = int(N*maxplotfreq/fs) numFrames = int(mX[:,0].size) frmTime = H*np.arange(numFrames)/float(fs) binFreq = np.arange(maxplotbin+1)*float(fs)/N plt.pcolormesh(frmTime, binFreq, np.transpose(mX[:,:maxplotbin+1])) plt.autoscale(tight=True) # plot the sinusoidal frequencies on top of the spectrogram tracks = hfreq*np.less(hfreq, maxplotfreq) tracks[tracks<=0] = np.nan plt.plot(frmTime, tracks, color='k') plt.title('magnitude spectrogram + harmonic tracks') plt.autoscale(tight=True) # plot the output sound plt.subplot(3,1,3) plt.plot(np.arange(y.size)/float(fs), y) plt.axis([0, y.size/float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() plt.show(block=False) return inputFile, fs, hfreq, hmag
(fs, x) = UF.wavread('../../../sounds/mridangam.wav') w = np.hamming(801) N = 2048 t = -90 minSineDur = .005 maxnSines = 150 freqDevOffset = 20 freqDevSlope = 0.02 Ns = 512 H = Ns/4 mX, pX = STFT.stftAnal(x, fs, w, N, H) tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) timeScale = np.array([.01, .0, .03, .03, .335, .4, .355, .42, .671, .8, .691, .82, .858, 1.2, .878, 1.22, 1.185, 1.6, 1.205, 1.62, 1.497, 2.0, 1.517, 2.02, 1.686, 2.4, 1.706, 2.42, 1.978, 2.8]) ytfreq, ytmag = SMT.sineTimeScaling(tfreq, tmag, timeScale) y = SM.sineModelSynth(ytfreq, ytmag, np.array([]), Ns, H, fs) mY, pY = STFT.stftAnal(y, fs, w, N, H) plt.figure(1, figsize=(12, 9)) maxplotfreq = 4000.0 plt.subplot(4,1,1) plt.plot(np.arange(x.size)/float(fs), x, 'b') plt.axis([0,x.size/float(fs),min(x),max(x)]) plt.title('x (mridangam.wav)') plt.subplot(4,1,2) numFrames = int(tfreq[:,0].size) frmTime = H*np.arange(numFrames)/float(fs) tracks = tfreq*np.less(tfreq, maxplotfreq) tracks[tracks<=0] = np.nan plt.plot(frmTime, tracks, color='k', lw=1)
def analysis(inputFile='../../sounds/vignesh.wav', window='blackman', M=1201, N=2048, t=-90, minSineDur=0.1, nH=100, minf0=130, maxf0=300, f0et=7, harmDevSlope=0.01): """ Analyze a sound with the harmonic model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks minSineDur: minimum duration of sinusoidal tracks nH: maximum number of harmonics minf0: minimum fundamental frequency in sound maxf0: maximum fundamental frequency in sound f0et: maximum error accepted in f0 detection algorithm harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation returns inputFile: input file name; fs: sampling rate of input file, tfreq, tmag: sinusoidal frequencies and magnitudes """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound fs, x = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # compute the harmonic model of the whole sound hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) # synthesize the sines without original phases y = SM.sineModelSynth(hfreq, hmag, np.array([]), Ns, H, fs) # output sound file (monophonic with sampling rate of 44100) outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_harmonicModel.wav' # write the sound resulting from the inverse stft UF.wavwrite(y, fs, outputFile) # create figure to show plots plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 5000.0 # plot the input sound plt.subplot(3,1,1) plt.plot(np.arange(x.size)/float(fs), x) plt.axis([0, x.size/float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') if (hfreq.shape[1] > 0): plt.subplot(3,1,2) tracks = np.copy(hfreq) numFrames = tracks.shape[0] frmTime = H*np.arange(numFrames)/float(fs) tracks[tracks<=0] = np.nan plt.plot(frmTime, tracks) plt.axis([0, x.size/float(fs), 0, maxplotfreq]) plt.title('frequencies of harmonic tracks') # plot the output sound plt.subplot(3,1,3) plt.plot(np.arange(y.size)/float(fs), y) plt.axis([0, y.size/float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() plt.show(block=False) return inputFile, fs, hfreq, hmag
def analysis(inputFile='../../sounds/mridangam.wav', window='hamming', M=801, N=2048, t=-90, minSineDur=0.01, maxnSines=150, freqDevOffset=20, freqDevSlope=0.02): """ Analyze a sound with the sine model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size; N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks maxnSines: maximum number of parallel sinusoids freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0 freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation returns inputFile: input file name; fs: sampling rate of input file, tfreq, tmag: sinusoidal frequencies and magnitudes """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound (fs, x) = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # compute the sine model of the whole sound tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) # synthesize the sines without original phases y = SM.sineModelSynth(tfreq, tmag, np.array([]), Ns, H, fs) # output sound file (monophonic with sampling rate of 44100) outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sineModel.wav' # write the sound resulting from the inverse stft UF.wavwrite(y, fs, outputFile) # create figure to show plots plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 5000.0 # plot the input sound plt.subplot(3,1,1) plt.plot(np.arange(x.size)/float(fs), x) plt.axis([0, x.size/float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot the sinusoidal frequencies if (tfreq.shape[1] > 0): plt.subplot(3,1,2) tracks = np.copy(tfreq) tracks = tracks*np.less(tracks, maxplotfreq) tracks[tracks<=0] = np.nan numFrames = int(tracks[:,0].size) frmTime = H*np.arange(numFrames)/float(fs) plt.plot(frmTime, tracks) plt.axis([0, x.size/float(fs), 0, maxplotfreq]) plt.title('frequencies of sinusoidal tracks') # plot the output sound plt.subplot(3,1,3) plt.plot(np.arange(y.size)/float(fs), y) plt.axis([0, y.size/float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() plt.show(block=False) return inputFile, fs, tfreq, tmag
def analysis(inputFile='../../sounds/vignesh.wav', window='blackman', M=1201, N=2048, t=-90, minSineDur=0.1, nH=100, minf0=130, maxf0=300, f0et=7, harmDevSlope=0.01): """ Analyze a sound with the harmonic model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks minSineDur: minimum duration of sinusoidal tracks nH: maximum number of harmonics minf0: minimum fundamental frequency in sound maxf0: maximum fundamental frequency in sound f0et: maximum error accepted in f0 detection algorithm harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation returns inputFile: input file name; fs: sampling rate of input file, tfreq, tmag: sinusoidal frequencies and magnitudes """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound fs, x = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # compute the harmonic model of the whole sound hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) # synthesize the sines without original phases y = SM.sineModelSynth(hfreq, hmag, np.array([]), Ns, H, fs) # output sound file (monophonic with sampling rate of 44100) outputFile = 'output_sounds/' + os.path.basename( inputFile)[:-4] + '_harmonicModel.wav' # write the sound resulting from the inverse stft UF.wavwrite(y, fs, outputFile) # create figure to show plots plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 5000.0 # plot the input sound plt.subplot(3, 1, 1) plt.plot(np.arange(x.size) / float(fs), x) plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') if (hfreq.shape[1] > 0): plt.subplot(3, 1, 2) tracks = np.copy(hfreq) numFrames = tracks.shape[0] frmTime = H * np.arange(numFrames) / float(fs) tracks[tracks <= 0] = np.nan plt.plot(frmTime, tracks) plt.axis([0, x.size / float(fs), 0, maxplotfreq]) plt.title('frequencies of harmonic tracks') # plot the output sound plt.subplot(3, 1, 3) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() plt.show(block=False) return inputFile, fs, hfreq, hmag
def morph_samepitch_lsf(audio_inp1, audio_inp2, alpha, f0, params, params_ceps): """ Timbre morphing between two sounds of same pitch by linearly interpolating the lsf representation of the true envelope(obtained from its lpc,cepstral representation). Parameters ---------- audio_inp1 : np.array Numpy array containing the first audio signal, in the time domain audio_inp2 : np.array Numpy array containing the second audio signal, in the time domain alpha : float Interpolation factor(0 <= alpha <= 1), alpha*audio1 + (1 - alpha)*audio2 f0 : float Fundamental Frequency(to reconstruct harmonics) params : dict Parameter dictionary for the sine model) containing the following keys - fs : integer Sampling rate of the audio - W : integer Window size(number of frames) - N : integer FFT size(multiple of 2) - H : integer Hop size - t : float Threshold for sinusoidal detection in dB - maxnSines : integer Number of sinusoids to detect params_ceps : dict Parameter Dictionary for the true envelope estimation containing the following keys - thresh : float Threshold(in dB) for the true envelope estimation - ceps_coeffs : integer Number of cepstral coefficients to keep in the true envelope estimation - num_iters : integer Upper bound on number of iterations(if no convergence) Returns ------- audio_morphed : np.array Returns the morphed audio in the time domain """ fs = params['fs'] W = params['W'] N = params['N'] H = params['H'] t = params['t'] maxnSines = params['maxnSines'] thresh = params_ceps['thresh'] ceps_coeffs = params_ceps['ceps_coeffs'] num_iters = params_ceps['num_iters'] w = windows.hann(W) F1,M1,_,_ = hprModelAnal(x = audio_inp1, fs = fs, w = w, N = N, H = H, t = t, nH = maxnSines, minSineDur = 0.02, minf0 = 10, maxf0 = 400, f0et = 5, harmDevSlope = 0.01) F2,M2,_,_ = hprModelAnal(x = audio_inp2, fs = fs, w = w, N = N, H = H, t = t, nH = maxnSines, minSineDur = 0.02, minf0 = 10, maxf0 = 400, f0et = 5, harmDevSlope = 0.01) # Defining the frequency matrix as multiples of the harmonics new_F= np.zeros_like(F1 if F1.shape[0] < F2.shape[0] else F2) for i in range(new_F.shape[1]): new_F[:,i] = (i+1)*f0 # Defining the Magnitude matrix new_M = np.zeros_like(M1 if M1.shape[0] < M2.shape[0] else M2) for i in range(new_M.shape[0]): # print('frame ',i,' of ',new_M.shape[0]) # Performing the envelope interpolation framewise(normalized log(dividing the magnitude by 20)) f1 = interpolate.interp1d(F1[i,:],M1[i,:]/20,kind = 'linear',fill_value = -100, bounds_error=False) f2 = interpolate.interp1d(F2[i,:],M2[i,:]/20,kind = 'linear',fill_value = -100, bounds_error=False) # Frequency bins fbins = np.linspace(0,fs/2,N) finp1 = f1(fbins) finp2 = f2(fbins) specenv1,_,_ = fe.calc_true_envelope_spectral(finp1,N,thresh,ceps_coeffs,num_iters) specenv2,_,_ = fe.calc_true_envelope_spectral(finp2,N,thresh,ceps_coeffs,num_iters) # Obtain the Cepstral Representation of the True envelopes cc_te_1 = np.real(np.fft.ifft(specenv1)) cc_te_2 = np.real(np.fft.ifft(specenv2)) # Define number of LPC(LSF) coefficients to keep # Cannot keep all, as precision error causes the coefficients to blow up L = 60 # Obtaining the LPC Representation from the Cepstral Representation lpc_cc_te_1 = fe.cc_to_lpc(cc_te_1,L) lpc_cc_te_2 = fe.cc_to_lpc(cc_te_2,L) # Obtain LSF representation from the LPC lsf_lpc_cc_te_1 = fe.lpc_to_lsf(lpc_cc_te_1) lsf_lpc_cc_te_2 = fe.lpc_to_lsf(lpc_cc_te_2) # Interpolate the LSF and convert LSF back to LPC lsf_interp = alpha*lsf_lpc_cc_te_1 + (1 - alpha)*lsf_lpc_cc_te_2 lpc_interp = fe.lsf_to_lpc(lsf_interp) # Reconvert LPC's to CC's cc_interp = fe.lpc_to_cc(lpc_interp,L + 1 ,L) # Pad with zeros(Done to reduce number of computations) cc_interp = np.pad(cc_interp,[0 , N - len(cc_interp)],mode = 'constant',constant_values=(0, 0)) # Flip and append the array to give a real frequency signal to the fft input cc_interp = np.concatenate((cc_interp[:N//2],np.flip(cc_interp[1:N//2 + 1]))) # Interpolating the Zeroth coefficient separately(it represents the gain/power of the signals) cc_interp[0] = alpha*cc_te_1[0] + (1 - alpha)*cc_te_2[0] specenv = np.real(np.fft.fft(cc_interp)) # fp = interpolate.interp1d(np.linspace(0,fs/2,N),np.pad(specenv[0:N//2],[0,N//2],mode = 'constant',constant_values=(0, -5)),kind = 'linear',fill_value = 'extrapolate', bounds_error=False) fp = interpolate.interp1d(fbins[:N//2 + 1],specenv[:N//2 + 1],kind = 'linear',fill_value = -10, bounds_error=False) new_M[i,:] = 20*fp(new_F[i,:]) audio_morphed = sineModelSynth(new_F, new_M, np.empty([0,0]), W, H, fs) return audio_morphed
def transformation_synthesis(inputFile, fs, tfreq, tmag, freqScaling=np.array([0, 2.0, 1, .3]), timeScaling=np.array( [0, .0, .671, .671, 1.978, 1.978 + 1.0])): """ Transform the analysis values returned by the analysis function and synthesize the sound inputFile: name of input file; fs: sampling rate of input file tfreq, tmag: sinusoidal frequencies and magnitudes freqScaling: frequency scaling factors, in time-value pairs timeScaling: time scaling factors, in time-value pairs """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # frequency scaling of the sinusoidal tracks ytfreq = ST.sineFreqScaling(tfreq, freqScaling) # time scale the sinusoidal tracks ytfreq, ytmag = ST.sineTimeScaling(ytfreq, tmag, timeScaling) # synthesis y = SM.sineModelSynth(ytfreq, ytmag, np.array([]), Ns, H, fs) # write output sound outputFile = 'output_sounds/' + os.path.basename( inputFile)[:-4] + '_sineModelTransformation.wav' UF.wavwrite(y, fs, outputFile) # create figure to plot plt.figure(figsize=(12, 6)) # frequency range to plot maxplotfreq = 15000.0 # plot the transformed sinusoidal frequencies if (ytfreq.shape[1] > 0): plt.subplot(2, 1, 1) tracks = np.copy(ytfreq) tracks = tracks * np.less(tracks, maxplotfreq) tracks[tracks <= 0] = np.nan numFrames = int(tracks[:, 0].size) frmTime = H * np.arange(numFrames) / float(fs) plt.plot(frmTime, tracks) plt.title('transformed sinusoidal tracks') plt.autoscale(tight=True) # plot the output sound plt.subplot(2, 1, 2) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() plt.show(block=False)
def main(inputFile='../../sounds/bendir.wav', window='hamming', M=2001, N=2048, t=-80, minSineDur=0.02, maxnSines=150, freqDevOffset=10, freqDevSlope=0.001): """ Perform analysis/synthesis using the sinusoidal model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size; N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks maxnSines: maximum number of parallel sinusoids freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0 freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound fs, x = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # analyze the sound with the sinusoidal model tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) # synthesize the output sound from the sinusoidal representation y = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs) # output sound file name outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sineModel.wav' # write the synthesized sound obtained from the sinusoidal synthesis UF.wavwrite(y, fs, outputFile) # create figure to show plots plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 5000.0 # plot the input sound plt.subplot(3,1,1) plt.plot(np.arange(x.size)/float(fs), x) plt.axis([0, x.size/float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot the sinusoidal frequencies plt.subplot(3,1,2) if (tfreq.shape[1] > 0): numFrames = tfreq.shape[0] frmTime = H*np.arange(numFrames)/float(fs) tfreq[tfreq<=0] = np.nan plt.plot(frmTime, tfreq) plt.axis([0, x.size/float(fs), 0, maxplotfreq]) plt.title('frequencies of sinusoidal tracks') # plot the output sound plt.subplot(3,1,3) plt.plot(np.arange(y.size)/float(fs), y) plt.axis([0, y.size/float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() plt.show(block=False)
(fs, x) = UF.wavread('../../../sounds/vignesh.wav') w = np.blackman(1201) N = 2048 t = -90 nH = 100 minf0 = 130 maxf0 = 300 f0et = 7 Ns = 512 H = Ns / 4 minSineDur = .1 harmDevSlope = 0.01 hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) y = SM.sineModelSynth(hfreq, hmag, hphase, Ns, H, fs) numFrames = int(hfreq[:, 0].size) frmTime = H * np.arange(numFrames) / float(fs) plt.figure(1, figsize=(9, 7)) plt.subplot(3, 1, 1) plt.plot(np.arange(x.size) / float(fs), x, 'b') plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.title('x (vignesh.wav)') plt.subplot(3, 1, 2) yhfreq = hfreq yhfreq[hfreq == 0] = np.nan plt.plot(frmTime, hfreq, lw=1.2)
def main( inputFile="../../sounds/bendir.wav", window="hamming", M=2001, N=2048, t=-80, minSineDur=0.02, maxnSines=150, freqDevOffset=10, freqDevSlope=0.001, stocf=0.2, ): # ------- analysis parameters ------------------- # inputFile: input sound file (monophonic with sampling rate of 44100) # window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) # M: analysis window size # N: fft size (power of two, bigger or equal than M) # t: magnitude threshold of spectral peaks # minSineDur: minimum duration of sinusoidal tracks # maxnSines: maximum number of parallel sinusoids # freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0 # freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation # stocf: decimation factor used for the stochastic approximation # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # --------- computation ----------------- # read input sound (fs, x) = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # perform sinusoidal analysis tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) # subtract sinusoids from original sound Ns = 512 xr = UF.sineSubtraction(x, Ns, H, tfreq, tmag, tphase, fs) # compute stochastic model of residual mYst = STM.stochasticModelAnal(xr, H, stocf) # synthesize sinusoids ys = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs) # synthesize stochastic component yst = STM.stochasticModelSynth(mYst, H) # sum sinusoids and stochastic y = yst[: min(yst.size, ys.size)] + ys[: min(yst.size, ys.size)] # output sound file (monophonic with sampling rate of 44100) outputFileSines = "output_sounds/" + os.path.basename(inputFile)[:-4] + "_spsModel_sines.wav" outputFileStochastic = "output_sounds/" + os.path.basename(inputFile)[:-4] + "_spsModel_stochastic.wav" outputFile = "output_sounds/" + os.path.basename(inputFile)[:-4] + "_spsModel.wav" # write sounds files for sinusoidal, residual, and the sum UF.wavwrite(ys, fs, outputFileSines) UF.wavwrite(yst, fs, outputFileStochastic) UF.wavwrite(y, fs, outputFile) # --------- plotting -------------------- # plot stochastic component plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 10000.0 # plot the input sound plt.subplot(3, 1, 1) plt.plot(np.arange(x.size) / float(fs), x) plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.ylabel("amplitude") plt.xlabel("time (sec)") plt.title("input sound: x") plt.subplot(3, 1, 2) numFrames = int(mYst[:, 0].size) sizeEnv = int(mYst[0, :].size) frmTime = H * np.arange(numFrames) / float(fs) binFreq = (0.5 * fs) * np.arange(sizeEnv * maxplotfreq / (0.5 * fs)) / sizeEnv plt.pcolormesh(frmTime, binFreq, np.transpose(mYst[:, : sizeEnv * maxplotfreq / (0.5 * fs) + 1])) plt.autoscale(tight=True) # plot sinusoidal frequencies on top of stochastic component sines = tfreq * np.less(tfreq, maxplotfreq) sines[sines == 0] = np.nan numFrames = int(sines[:, 0].size) frmTime = H * np.arange(numFrames) / float(fs) plt.plot(frmTime, sines, color="k", ms=3, alpha=1) plt.xlabel("time(s)") plt.ylabel("Frequency(Hz)") plt.autoscale(tight=True) plt.title("sinusoidal + stochastic spectrogram") # plot the output sound plt.subplot(3, 1, 3) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel("amplitude") plt.xlabel("time (sec)") plt.title("output sound: y") plt.tight_layout() plt.show()
def pitch_shift_te(audio_inp, params, factor, choice_recon, params_ceps): """ Shifts the pitch by the scalar factor given as the input. Performs interpolation by using the True Envelope of the Spectra. Also returns sound with or without the original residue added. Parameters ---------- audio_inp : np.array Numpy array containing the audio signal, in the time domain params : dict Parameter dictionary for the sine model) containing the following keys - fs : integer Sampling rate of the audio - W : integer Window size(number of frames) - N : integer FFT size(multiple of 2) - H : integer Hop size - t : float Threshold for sinusoidal detection in dB - maxnSines : integer Number of sinusoids to detect factor : float Shift factor for the pitch. New pitch = f * (old pitch) choice_recon : 0 or 1 If 0, returns only the sinusoidal reconstruction If 1, adds the original residue as well to the sinusoidal params_ceps : dict Parameter Dictionary for the true envelope estimation containing the following keys - thresh : float Threshold(in dB) for the true envelope estimation - ceps_coeffs : integer Number of cepstral coefficients to keep in the true envelope estimation - num_iters : integer Upper bound on number of iterations(if no convergence) Returns ------- audio_transformed : np.array Returns the transformed signal in the time domain residue : np.array Residue of the original signal """ fs = params['fs'] W = params['W'] N = params['N'] H = params['H'] t = params['t'] maxnSines = params['maxnSines'] thresh = params_ceps['thresh'] ceps_coeffs = params_ceps['ceps_coeffs'] num_iters = params_ceps['num_iters'] w = windows.hann(W) F,M,P,R = hprModelAnal(x = audio_inp, fs = fs, w = w, N = N, H = H, t = t, nH = maxnSines, minSineDur = 0.1, minf0 = 10, maxf0 = 1000, f0et = 5, harmDevSlope = 0.01) scaled_F = factor*F new_M = M for i in range(F.shape[0]): # Performing the envelope interpolation framewise(normalized log(dividing the magnitude by 20)) f = interpolate.interp1d(F[i,:],M[i,:]/20,kind = 'linear',fill_value = -5, bounds_error=False) # Frequency bins fbins = np.linspace(0,fs/2,N) finp = f(fbins) specenv,_,_ = fe.calc_true_envelope_spectral(finp,N,thresh,ceps_coeffs,num_iters) # Now, once the spectral envelope is obtained, define an interpolating function based on the spectral envelope # fp = interpolate.interp1d(np.linspace(0,fs/2,N),np.pad(specenv[0:N//2],[0,N//2],mode = 'constant',constant_values=(0, -5)),kind = 'linear',fill_value = -5, bounds_error=False) fp = interpolate.interp1d(fbins[:N//2 + 1],specenv[:N//2 + 1],kind = 'linear',fill_value = 'extrapolate', bounds_error=False) new_M[i,:] = 20*fp(scaled_F[i,:]) if(choice_recon == 0): audio_transformed = sineModelSynth(scaled_F, new_M, np.empty([0,0]), W, H, fs) else: audio_transformed = hprModelSynth(scaled_F, new_M, np.empty([0,0]), R, W, H, fs)[0] return audio_transformed,R
window = 'hamming' M = 1001 N = 2048 t = -100 minSineDur = 0.01 maxnSines = 150 freqDevOffset = 30 freqDevSlope = 0.02 Ns = 512 H = 128 fs, x = UF.wavread(inputFile) w = get_window(window, M) tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) # y = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs) y = SM.sineModelSynth(tfreq, tmag, np.array([]), Ns, H, fs) # demonstration of recreated phases UF.wavwrite(y, fs, 'test2.wav') import matplotlib.pyplot as plt plt.plot(x) plt.plot(y) plt.show()
# test the subtraction of sines if __name__ == '__main__': (fs, x) = UF.wavread(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../sounds/bendir.wav')) w = np.hamming(2001) N = 2048 H = 128 t = -100 minSineDur = .02 maxnSines = 200 freqDevOffset = 10 freqDevSlope = 0.001 tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) xr = UF.sineSubtraction(x, N, H, tfreq, tmag, tphase, fs) mXr, pXr = STFT.stftAnal(xr, fs, hamming(H*2), H*2, H) Ns = 512 ys = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs) plt.figure(1, figsize=(9.5, 7)) numFrames = int(mXr[:,0].size) frmTime = H*np.arange(numFrames)/float(fs) binFreq = np.arange(H)*float(fs)/(H*2) plt.pcolormesh(frmTime, binFreq, np.transpose(mXr)) plt.autoscale(tight=True) tfreq[tfreq==0] = np.nan numFrames = int(tfreq[:,0].size) frmTime = H*np.arange(numFrames)/float(fs) plt.plot(frmTime, tfreq, color='k', ms=3, alpha=1) plt.xlabel('Time(s)') plt.ylabel('Frequency(Hz)') plt.autoscale(tight=True)
plt.tight_layout() plt.savefig("cello-phrase-spectrogram.png") # compute the FO and the harmonics t = -97 minf0 = 310 maxf0 = 450 f0et = 4 nH = 70 harmDevSlope = 0.01 Ns = H * 4 minSineDur = 0.3 hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) hfreqt = copy.copy(hfreq) hfreqt[:, 1:] = 0 yf0 = 4 * SM.sineModelSynth(hfreqt, hmag, hphase, Ns, H, fs) yh = SM.sineModelSynth(hfreq, hmag, hphase, Ns, H, fs) UF.wavwrite(yf0, fs, "cello-phrase-f0.wav") UF.wavwrite(yh, fs, "cello-phrase-harmonics.wav") # plot the F0 on top of the spectrogram plt.figure(3, figsize=(16, 4.5)) maxplotfreq = 5000.0 harms = hfreq * np.less(hfreq, maxplotfreq) harms[harms[:, 0] == 0] = np.nan numFrames = int(mX[:, 0].size) frmTime = H * np.arange(numFrames) / float(fs) binFreq = fs * np.arange(N * maxplotfreq / fs) / N plt.pcolormesh(frmTime, binFreq, np.transpose(mX[:, : N * maxplotfreq / fs + 1])) plt.plot(frmTime, harms[:, 0], linewidth=3, color="0") plt.xlabel("time (sec)")
def exploreSineModel(inputFile='../sms-tools/sounds/multisines.wav'): """ Input: inputFile (string) = wav file including the path Output: return True Discuss on the forum! """ window='hamming' # Window type M=3001 # Window size in sample N=4096 # FFT Size t=-80 # Threshold minSineDur=0.02 # minimum duration of a sinusoid maxnSines=15 # Maximum number of sinusoids at any time frame freqDevOffset=10 # minimum frequency deviation at 0Hz freqDevSlope=0.001 # slope increase of minimum frequency deviation Ns = 512 # size of fft used in synthesis H = 128 # hop size (has to be 1/4 of Ns) fs, x = UF.wavread(inputFile) # read input sound w = get_window(window, M) # compute analysis window # analyze the sound with the sinusoidal model tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) # synthesize the output sound from the sinusoidal representation y = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs) # output sound file name outputFile = os.path.basename(inputFile)[:-4] + '_sineModel.wav' # write the synthesized sound obtained from the sinusoidal synthesis UF.wavwrite(y, fs, outputFile) # create figure to show plots plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 5000.0 # plot the input sound plt.subplot(3,1,1) plt.plot(np.arange(x.size)/float(fs), x) plt.axis([0, x.size/float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot the sinusoidal frequencies plt.subplot(3,1,2) if (tfreq.shape[1] > 0): numFrames = tfreq.shape[0] frmTime = H*np.arange(numFrames)/float(fs) tfreq[tfreq<=0] = np.nan plt.plot(frmTime, tfreq) plt.axis([0, x.size/float(fs), 0, maxplotfreq]) plt.title('frequencies of sinusoidal tracks') # plot the output sound plt.subplot(3,1,3) plt.plot(np.arange(y.size)/float(fs), y) plt.axis([0, y.size/float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() plt.show() return True
def morph_samepitch_cc(audio_inp1, audio_inp2, alpha, f0, params, params_ceps): """ Timbre morphing between two sounds of same pitch by linearly interpolating the cepstral representation of the true envelope. Parameters ---------- audio_inp1 : np.array Numpy array containing the first audio signal, in the time domain audio_inp2 : np.array Numpy array containing the second audio signal, in the time domain alpha : float Interpolation factor(0 <= alpha <= 1), alpha*audio1 + (1 - alpha)*audio2 f0 : float Fundamental Frequency(to reconstruct harmonics) params : dict Parameter dictionary for the sine model) containing the following keys - fs : integer Sampling rate of the audio - W : integer Window size(number of frames) - N : integer FFT size(multiple of 2) - H : integer Hop size - t : float Threshold for sinusoidal detection in dB - maxnSines : integer Number of sinusoids to detect params_ceps : dict Parameter Dictionary for the true envelope estimation containing the following keys - thresh : float Threshold(in dB) for the true envelope estimation - ceps_coeffs : integer Number of cepstral coefficients to keep in the true envelope estimation - num_iters : integer Upper bound on number of iterations(if no convergence) Returns ------- audio_morphed : np.array Returns the morphed audio in the time domain """ fs = params['fs'] W = params['W'] N = params['N'] H = params['H'] t = params['t'] maxnSines = params['maxnSines'] thresh = params_ceps['thresh'] ceps_coeffs = params_ceps['ceps_coeffs'] num_iters = params_ceps['num_iters'] w = windows.hann(W) F1,M1,_,_ = hprModelAnal(x = audio_inp1, fs = fs, w = w, N = N, H = H, t = t, nH = maxnSines, minSineDur = 0.02, minf0 = 10, maxf0 = 400, f0et = 5, harmDevSlope = 0.01) F2,M2,_,_ = hprModelAnal(x = audio_inp2, fs = fs, w = w, N = N, H = H, t = t, nH = maxnSines, minSineDur = 0.02, minf0 = 10, maxf0 = 400, f0et = 5, harmDevSlope = 0.01) # Defining the frequency matrix as multiples of the harmonics new_F= np.zeros_like(F1 if F1.shape[0] < F2.shape[0] else F2) for i in range(new_F.shape[1]): new_F[:,i] = (i+1)*f0 # Defining the Magnitude matrix new_M = np.zeros_like(M1 if M1.shape[0] < M2.shape[0] else M2) for i in range(new_M.shape[0]): # Performing the envelope interpolation framewise(normalized log(dividing the magnitude by 20)) f1 = interpolate.interp1d(F1[i,:],M1[i,:]/20,kind = 'linear',fill_value = -100, bounds_error=False) f2 = interpolate.interp1d(F2[i,:],M2[i,:]/20,kind = 'linear',fill_value = -100, bounds_error=False) # Frequency bins fbins = np.linspace(0,fs/2,N) finp1 = f1(fbins) finp2 = f2(fbins) specenv1,_,_ = fe.calc_true_envelope_spectral(finp1,N,thresh,ceps_coeffs,num_iters) specenv2,_,_ = fe.calc_true_envelope_spectral(finp2,N,thresh,ceps_coeffs,num_iters) # Obtain the Cepstral Representation of the True envelopes cc_te_1 = np.real(np.fft.ifft(specenv1)) cc_te_2 = np.real(np.fft.ifft(specenv2)) # Linearly interpolate the cepstral coefficients, and reconstruct the true envelope from that cc_interp = alpha*cc_te_1 + (1 - alpha)*cc_te_2 specenv = np.real(np.fft.fft(cc_interp)) # fp = interpolate.interp1d(np.linspace(0,fs/2,N),np.pad(specenv[0:N//2],[0,N//2],mode = 'constant',constant_values=(0, -5)),kind = 'linear',fill_value = 'extrapolate', bounds_error=False) fp = interpolate.interp1d(fbins[:N//2 + 1],specenv[:N//2 + 1],kind = 'linear',fill_value = -10, bounds_error=False) new_M[i,:] = 20*fp(new_F[i,:]) audio_morphed = sineModelSynth(new_F, new_M, np.empty([0,0]), W, H, fs) return audio_morphed
def main(inputFile='../../sounds/bendir.wav', window='hamming', M=2001, N=2048, t=-80, minSineDur=0.02, maxnSines=150, freqDevOffset=10, freqDevSlope=0.001): """ Perform analysis/synthesis using the sinusoidal model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size; N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks maxnSines: maximum number of parallel sinusoids freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0 freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound fs, x = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # analyze the sound with the sinusoidal model tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) # synthesize the output sound from the sinusoidal representation y = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs) # output sound file name outputFile = 'output_sounds/' + os.path.basename( inputFile)[:-4] + '_sineModel.wav' # write the synthesized sound obtained from the sinusoidal synthesis UF.wavwrite(y, fs, outputFile) # create figure to show plots plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 5000.0 # plot the input sound plt.subplot(3, 1, 1) plt.plot(np.arange(x.size) / float(fs), x) plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot the sinusoidal frequencies plt.subplot(3, 1, 2) if (tfreq.shape[1] > 0): numFrames = tfreq.shape[0] frmTime = H * np.arange(numFrames) / float(fs) tfreq[tfreq <= 0] = np.nan plt.plot(frmTime, tfreq) plt.axis([0, x.size / float(fs), 0, maxplotfreq]) plt.title('frequencies of sinusoidal tracks') # plot the output sound plt.subplot(3, 1, 3) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() plt.show(block=False)
def residue_lpc(audio_inp, params,lpc_order): """ Obtains the LPC representation of the Residual Spectral(LPC envelope), and then generates the residual by IFFT'ing this representation with random phase. Parameters ---------- audio_inp : np.array Numpy array containing the audio signal, in the time domain params : dict Parameter dictionary for the sine model) containing the following keys - fs : Sampling rate of the audio - W : Window size(number of frames) - N : FFT size(multiple of 2) - H : Hop size - t : Threshold for sinusoidal detection in dB - maxnSines : Number of sinusoids to detect lpc_order : integer Number of coefficients in the LPC representation Returns ------- res_transformed : np.array Returns the transformed residue(LPC envelope approximation) in the time domain """ fs = params['fs'] W = params['W'] N = params['N'] H = params['H'] t = params['t'] maxnSines = params['maxnSines'] w = windows.hann(W) F,M,P,R = hprModelAnal(x = audio_inp, fs = fs, w = w, N = N, H = H, t = t, nH = maxnSines, minSineDur = 0.02, minf0 = 10, maxf0 = 400, f0et = 5, harmDevSlope = 0.01) harmonics_recon = sineModelSynth(tfreq = F, tmag = M, tphase = P, N = W, H = H, fs = fs) # Initializing an empty list to store the residual spectral approximations(LPC) xmX = [] # Normalize the Residue before analysis(throws a np zero error otherwise) nf = np.max(np.abs(R)) # nf = 1 # print(nf) R = R/nf for frame in ess.FrameGenerator(R.astype('float32'), W, H): inp = np.pad(frame,[0,N - W],mode = 'constant',constant_values=(0, 0)) env_frame = fe.lpc_envelope(inp,lpc_order,fs,len(inp)//2 + 1) xmX.append(env_frame) xmX = np.array(xmX) XpX = 2*np.pi*np.random.rand(xmX.shape[0],xmX.shape[1]) # xmX,XpX = stftAnal(audio_inp,w,N,H) # Obtain the audio from the above representation res_transformed = stftSynth(xmX, XpX, W, H)*nf # ***Re-normalize the Residual so that it lies in the same range as the original residue*** # scale_init = np.max(np.abs(audio_inp))/np.max(np.abs(R)) # scale_final = np.max(np.abs(harmonics_recon))/scale_init res_transformed = (res_transformed/np.max(np.abs(res_transformed))) return res_transformed
def recon_samples_ls(matrix_ceps_coeffs, midi_pitch, params, f_ref=440, choice_f=0): """ Returns the audio corresponding to an overlap add of each of the frames reconstructed from the latent variables in walk_locs Note : The input should be in log dB (log|X|) Inputs ------ matrix_ceps_coeffs : np.ndarray Matrix whose columns depict the cepstral frames(sequential) midi_pitch : list of int(0 < midi_pitch < 128) List of MIDI number of the pitch at each time frame(can directly feed in the NSynth parameter)(same as the number of columns in the above input matrix) If input is a single number, that will be the pitch for all the frames params : dict Parameter dictionary for the harmonic reconstruction containing the following keys - fs : integer Sampling rate of the audio - W : integer Window size(number of frames) - N : integer FFT size(multiple of 2) - H : integer Hop size - nH : integer Number of harmonics to synthesize f_ref : float Reference frequency for MIDI(440 Hz by default) choice_f : 0 or 1(0 by default) If 0, will accept MIDI pitch and convert it to Hz If 1, will accept and use pitch directly in Hz """ fs = params['fs'] W = params['W'] N = params['N'] H = params['H'] nH = params['nH'] w = windows.hann(W) # Defining the Frequency and Magnitude matrices num_frames = matrix_ceps_coeffs.shape[1] if (type(midi_pitch) == int): midi_pitch = np.zeros(num_frames) + midi_pitch if (choice_f == 0): # Convert MIDI to Hz hz_from_midi = f_ref * (2**((midi_pitch - 69) / 12.0)) f0 = hz_from_midi else: f0 = midi_pitch M = np.zeros((num_frames, nH)) F = np.zeros((num_frames, nH)) for j in range(num_frames): for i in range(F.shape[1]): F[j, i] = (i + 1) * f0[j] # Sample the frequencies from the envelope at each instant for i in range(num_frames): # Flip and append the array to give a real frequency signal to the fft input ceps_current = matrix_ceps_coeffs[:, i] # Pad with zeros cc_real = np.pad(ceps_current, [0, N - len(ceps_current)], mode='constant', constant_values=(0, 0)) cc_real = np.concatenate( (cc_real[:N // 2], np.flip(cc_real[1:N // 2 + 1]))) cc_real[0] = ceps_current[0] # Obtain the Envelope from the cepstrum specenv = np.real(np.fft.fft(cc_real)) fbins = np.linspace(0, fs, N) fp = interpolate.interp1d(np.arange(params['N']), specenv, kind='linear', fill_value='extrapolate', bounds_error=False) M[i, :] = 20 * fp((F[i, :] / fs) * N) audio_recon = sineModelSynth(F, M, np.empty([0, 0]), W, H, fs) return audio_recon
def pitch_shifting(audio_inp, params, factor,choice,choice_recon): """ Shifts the pitch by the scalar factor given as the input. Depending on the choice, performs interpolation to preserve the timbre when shifting the pitch. Also returns sound with or without the original residue added. Parameters ---------- audio_inp : np.array Numpy array containing the audio signal, in the time domain params : dict Parameter dictionary for the sine model) containing the following keys - fs : Sampling rate of the audio - W : Window size(number of frames) - N : FFT size(multiple of 2) - H : Hop size - t : Threshold for sinusoidal detection in dB - maxnSines : Number of sinusoids to detect factor : float Shift factor for the pitch. New pitch = f * (old pitch) choice : 0 or 1 If 0, simply shifts the pitch without amplitude interpolation If 1, performs amplitude interpolation framewise to preserve timbre choice_recon : 0 or 1 If 0, returns only the sinusoidal reconstruction If 1, adds the original residue as well to the sinusoidal Returns ------- audio_transformed : np.array Returns the transformed signal in the time domain Residue : np.array The residue of the signal """ fs = params['fs'] W = params['W'] N = params['N'] H = params['H'] t = params['t'] maxnSines = params['maxnSines'] w = windows.hann(W) F,M,P,R = hprModelAnal(x = audio_inp, fs = fs, w = w, N = N, H = H, t = t, nH = maxnSines, minSineDur = 0.02, minf0 = 10, maxf0 = 400, f0et = 5, harmDevSlope = 0.01) scaled_F = factor*F if(choice == 0): new_M = M else: new_M = M for i in range(F.shape[0]): # Performing the envelope interpolation framewise f = interpolate.interp1d(F[i,:],M[i,:],kind = 'linear',fill_value = -100, bounds_error=False) new_M[i,:] = f(scaled_F[i,:]) if(choice_recon == 0): audio_transformed = sineModelSynth(scaled_F, new_M, np.empty([0,0]), W, H, fs) else: audio_transformed = hprModelSynth(scaled_F, new_M, np.empty([0,0]), R, W, H, fs)[0] return audio_transformed,R
def main(inputFile='../../sounds/bendir.wav', window='hamming', M=2001, N=2048, t=-80, minSineDur=0.02, maxnSines=150, freqDevOffset=10, freqDevSlope=0.001): # ------- analysis parameters ------------------- # inputFile: input sound file (monophonic with sampling rate of 44100) # window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) # M: analysis window size # N: fft size (power of two, bigger or equal than M) # t: magnitude threshold of spectral peaks # minSineDur: minimum duration of sinusoidal tracks # maxnSines: maximum number of parallel sinusoids # freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0 # freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # --------- computation ----------------- # read input sound (fs, x) = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # perform sinusoidal analysis tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) # subtract sinusoids from original xr = UF.sineSubtraction(x, N, H, tfreq, tmag, tphase, fs) # compute spectrogram of residual mXr, pXr = STFT.stftAnal(xr, fs, w, N, H) # synthesize sinusoids ys = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs) # sum sinusoids and residual y = xr[:min(xr.size, ys.size)]+ys[:min(xr.size, ys.size)] # output sound file (monophonic with sampling rate of 44100) outputFileSines = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sprModel_sines.wav' outputFileResidual = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sprModel_residual.wav' outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sprModel.wav' # write sounds files for sinusoidal, residual, and the sum UF.wavwrite(ys, fs, outputFileSines) UF.wavwrite(xr, fs, outputFileResidual) UF.wavwrite(y, fs, outputFile) # --------- plotting -------------------- # create figure to show plots plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 5000.0 # plot the input sound plt.subplot(3,1,1) plt.plot(np.arange(x.size)/float(fs), x) plt.axis([0, x.size/float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot the magnitude spectrogram of residual plt.subplot(3,1,2) maxplotbin = int(N*maxplotfreq/fs) numFrames = int(mXr[:,0].size) frmTime = H*np.arange(numFrames)/float(fs) binFreq = np.arange(maxplotbin+1)*float(fs)/N plt.pcolormesh(frmTime, binFreq, np.transpose(mXr[:,:maxplotbin+1])) plt.autoscale(tight=True) # plot the sinusoidal frequencies on top of the residual spectrogram tracks = tfreq*np.less(tfreq, maxplotfreq) tracks[tracks<=0] = np.nan plt.plot(frmTime, tracks, color='k') plt.title('sinusoidal tracks + residual spectrogram') plt.autoscale(tight=True) # plot the output sound plt.subplot(3,1,3) plt.plot(np.arange(y.size)/float(fs), y) plt.axis([0, y.size/float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() plt.show()
def exploreSineModel(inputFile='multiSines.wav'): """ Input: inputFile (string) = wav file including the path Output: return True """ window = 'hamming' # Window type M = 2001 # Window size in sample N = 2048 # FFT Size t = -80 # Threshold minSineDur = 0.02 # minimum duration of a sinusoid maxnSines = 150 # Maximum number of sinusoids at any time frame freqDevOffset = 10 # minimum frequency deviation at 0Hz freqDevSlope = 0.001 # slope increase of minimum frequency deviation Ns = 512 # size of fft used in synthesis H = 128 # hop size (has to be 1/4 of Ns) fs, x = UF.wavread(inputFile) # read input sound w = get_window(window, M) # compute analysis window # analyze the sound with the sinusoidal model tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) # synthesize the output sound from the sinusoidal representation y = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs) # output sound file name outputFile = os.path.basename(inputFile)[:-4] + '_sineModel.wav' # write the synthesized sound obtained from the sinusoidal synthesis UF.wavwrite(y, fs, outputFile) # create figure to show plots plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 5000.0 # plot the input sound plt.subplot(3, 1, 1) plt.plot(np.arange(x.size) / float(fs), x) plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot the sinusoidal frequencies plt.subplot(3, 1, 2) if (tfreq.shape[1] > 0): numFrames = tfreq.shape[0] frmTime = H * np.arange(numFrames) / float(fs) tfreq[tfreq <= 0] = np.nan plt.plot(frmTime, tfreq) plt.axis([0, x.size / float(fs), 0, maxplotfreq]) plt.title('frequencies of sinusoidal tracks') # plot the output sound plt.subplot(3, 1, 3) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() plt.show() return True
(fs, x) = UF.wavread("../../../sounds/vignesh.wav") w = np.blackman(1201) N = 2048 t = -90 nH = 100 minf0 = 130 maxf0 = 300 f0et = 7 Ns = 512 H = Ns / 4 minSineDur = 0.1 harmDevSlope = 0.01 hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) y = SM.sineModelSynth(hfreq, hmag, hphase, Ns, H, fs) numFrames = int(hfreq[:, 0].size) frmTime = H * np.arange(numFrames) / float(fs) plt.figure(1, figsize=(9, 7)) plt.subplot(3, 1, 1) plt.plot(np.arange(x.size) / float(fs), x, "b") plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.title("x (vignesh.wav)") plt.subplot(3, 1, 2) yhfreq = hfreq yhfreq[hfreq == 0] = np.nan plt.plot(frmTime, hfreq, lw=1.2)
def pitch_shifting_harmonic(audio_inp, params, params_ceps, factor,choice,choice_recon,f0): """ Shifts the pitch by the scalar factor given as the input. But, assumes the sound is harmonic and hence uses only the amplitudes sampled at multiples of the fundamental frequency. Note : Will only perform well for harmonic/sustained sounds. Depending on the choice, performs interpolation to preserve the timbre when shifting the pitch. Also returns sound with or without the original residue added. Parameters ---------- audio_inp : np.array Numpy array containing the audio signal, in the time domain params : dict Parameter dictionary for the sine model) containing the following keys - fs : Sampling rate of the audio - W : Window size(number of frames) - N : FFT size(multiple of 2) - H : Hop size - t : Threshold for sinusoidal detection in dB - maxnSines : Number of sinusoids to detect factor : float Shift factor for the pitch. New pitch = f * (old pitch) choice : 0,1,2 If 0, simply shifts the pitch without amplitude interpolation If 1, performs amplitude interpolation framewise to preserve timbre If 2, uses the True envelope of the amplitude spectrum to sample the points from choice_recon : 0 or 1 If 0, returns only the sinusoidal reconstruction If 1, adds the original residue as well to the sinusoidal f0 : Hz The fundamental frequency of the note Returns ------- audio_transformed : np.array Returns the transformed signal in the time domain """ fs = params['fs'] W = params['W'] N = params['N'] H = params['H'] t = params['t'] maxnSines = params['maxnSines'] thresh = params_ceps['thresh'] ceps_coeffs = params_ceps['ceps_coeffs'] num_iters = params_ceps['num_iters'] w = windows.hann(W) F,M,P,R = hprModelAnal(x = audio_inp, fs = fs, w = w, N = N, H = H, t = t, nH = maxnSines, minSineDur = 0.02, minf0 = 10, maxf0 = 1000, f0et = 5, harmDevSlope = 0.01) new_F= np.zeros_like(F) for i in range(F.shape[1]): new_F[:,i] = (i+1)*f0 scaled_F = factor*new_F if(choice == 0): new_M = M elif(choice == 1): new_M = M for i in range(F.shape[0]): # Performing the envelope interpolation framewise f = interpolate.interp1d(F[i,:],M[i,:],kind = 'linear',fill_value = -100, bounds_error=False) new_M[i,:] = f(scaled_F[i,:]) else: new_M = M for i in range(F.shape[0]): # Performing the envelope interpolation framewise(normalized log(dividing the magnitude by 20)) f = interpolate.interp1d(F[i,:],M[i,:]/20,kind = 'linear',fill_value = -5, bounds_error=False) # Frequency bins fbins = np.linspace(0,fs/2,2*N) finp = f(fbins) specenv,_,_ = fe.calc_true_envelope_spectral(finp,N,thresh,ceps_coeffs,num_iters) # Now, once the spectral envelope is obtained, define an interpolating function based on the spectral envelope fp = interpolate.interp1d(fbins[:N//2 + 1],specenv[:N//2 + 1],kind = 'linear',fill_value = 'extrapolate', bounds_error=False) new_M[i,:] = 20*fp(scaled_F[i,:]) if(choice_recon == 0): audio_transformed = sineModelSynth(scaled_F, new_M, np.empty([0,0]), W, H, fs) else: audio_transformed = hprModelSynth(scaled_F, new_M, np.empty([0,0]), R, W, H, fs)[0] return audio_transformed
def sustain_sound_gen(audio_inp, params, params_ceps, f0, rwl, alpha): """ Re-synthesizes the input audio using a random walk starting from the middle frame of the audio. Parameters ---------- audio_inp : np.array Numpy array containing the audio signal, in the time domain params : dict Parameter dictionary for the sine model) containing the following keys - fs : integer Sampling rate of the audio - W : integer Window size(number of frames) - N : integer FFT size(multiple of 2) - H : integer Hop size - t : float Threshold for sinusoidal detection in dB - maxnSines : integer Number of sinusoids to detect params_ceps : dict Parameter Dictionary for the true envelope estimation containing the following keys - thresh : float Threshold(in dB) for the true envelope estimation - ceps_coeffs : integer Number of cepstral coefficients to keep in the true envelope estimation - num_iters : integer Upper bound on number of iterations(if no convergence) f0 : float Fundamental frequency(or pitch) of the note rwl : Integer Number of hops to consider around the middle frame alpha : float(0<alpha<1) Closeness to the current frame(for continuity of the spectral frames during reconstruction) Returns ------- audio_transformed : np.array Returns the transformed signal in the time domain """ fs = params['fs'] W = params['W'] N = params['N'] H = params['H'] t = params['t'] maxnSines = params['maxnSines'] thresh = params_ceps['thresh'] ceps_coeffs = params_ceps['ceps_coeffs'] num_iters = params_ceps['num_iters'] w = windows.hann(W) F,M,P,R = hprModelAnal(x = audio_inp, fs = fs, w = w, N = N, H = H, t = t, nH = maxnSines, minSineDur = 0.02, minf0 = 10, maxf0 = 1000, f0et = 5, harmDevSlope = 0.01) new_F= np.zeros_like(F) for i in range(F.shape[1]): new_F[:,i] = (i+1)*f0 new_M = M # Initial parameters for random walk midpoint = F.shape[0]//2 # Selecting the middle frame current_frame = midpoint f = interpolate.interp1d(F[current_frame,:],M[current_frame,:]/20,kind = 'linear',fill_value = -5, bounds_error=False) # Frequency bins fbins = np.linspace(0,fs/2,N) finp = f(fbins) specenv_at,_,_ = fe.calc_true_envelope_spectral(finp,N,thresh,ceps_coeffs,num_iters) # Reconstruct the Magnitude array from the frequency array(only the middle frame but) for i in range(M.shape[0]): # Updating the current frame as per a random walk update(add upper and lower threshold) current_frame = current_frame + random.choice([-rwl,rwl]) if(current_frame >= M.shape[0] - 1): current_frame = M.shape[0] - 1 if(current_frame <= 0): current_frame = 0 f = interpolate.interp1d(F[current_frame,:],M[current_frame,:]/20,kind = 'linear',fill_value = -5, bounds_error=False) # Frequency bins fbins = np.linspace(0,fs/2,N) finp = f(fbins) specenv_new,_,_ = fe.calc_true_envelope_spectral(finp,N,thresh,ceps_coeffs,num_iters) # Pnce the initial and final envelopes are obtained, interpolate to obtain the new(intermediate) envelope # The closer the envelope is to 1, the less the envelope will change from its current value specenv_at = alpha*specenv_at + (1 - alpha)*specenv_new # Now, once the spectral envelope is obtained, define an interpolating function based on the spectral envelope fp = interpolate.interp1d(fbins[:N//2 + 1],specenv_at[:N//2 + 1],kind = 'linear',fill_value = 'extrapolate', bounds_error=False) new_M[i,:] = 20*fp(new_F[i,:]) # Reconstruction of the sound ignoring the residual audio_transformed = sineModelSynth(new_F, new_M, np.empty([0,0]), W, H, fs) return audio_transformed
os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../../sounds/bendir.wav')) x1 = x[0:50000] w = np.blackman(2001) N = 2048 H = 500 t = -90 minSineDur = .01 maxnSines = 150 freqDevOffset = 20 freqDevSlope = 0.02 Ns = 512 H = Ns / 4 tfreq, tmag, tphase = SM.sineModelAnal(x1, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) y = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs) numFrames = int(tfreq[:, 0].size) frmTime = H * np.arange(numFrames) / float(fs) maxplotfreq = 3000.0 plt.figure(1, figsize=(9, 7)) plt.subplot(3, 1, 1) plt.plot(np.arange(x1.size) / float(fs), x1, 'b', lw=1.5) plt.axis([0, x1.size / float(fs), min(x1), max(x1)]) plt.title('x (bendir.wav)') plt.subplot(3, 1, 2) tracks = tfreq * np.less(tfreq, maxplotfreq) tracks[tracks <= 0] = np.nan
def sineModelMultiRes(inputFile="../../sounds/orchestra.wav", windows=(signal.blackman(4095), signal.hamming(2047), np.hamming(1023)), Ns=(4096, 2048, 1024), Bs=(1000, 5000, 22050), t=-80, minSineDur=0.02, maxnSines=150, freqDevOffset=10, freqDevSlope=0.001, PlotIt=True): sN = 512 H = sN/4 (fs, x) = UF.wavread(inputFile) tfreq, tmag, tphase = sineModelMultiResAnal(x, fs, windows, Ns, Bs, H, t, minSineDur, maxnSines, freqDevOffset, freqDevSlope) y = SM.sineModelSynth(tfreq, tmag, tphase, sN, H, fs) # calculate diff between x & y diffLength = min([x.size, y.size]) diff = np.abs(x[:diffLength] - y[:diffLength]) print("diff {0}".format(np.sum(diff))) outputFile = os.path.basename(inputFile)[:-4] + '_sineModelMulti.wav' UF.wavwrite(y, fs, outputFile) if not PlotIt: return plt.figure(figsize=(12, 9)) maxplotfreq = 10000.0 # plot the input sound plt.subplot(3,1,1) plt.plot(np.arange(x.size)/float(fs), x) plt.axis([0, x.size/float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot the sinusoidal frequencies plt.subplot(3,1,2) if (tfreq.shape[1] > 0): numFrames = tfreq.shape[0] frmTime = H*np.arange(numFrames)/float(fs) tfreq[tfreq<=0] = np.nan plt.ylabel('frequency (Hz)') plt.xlabel('time (sec)') plt.title('input sound: x') plt.plot(frmTime, tfreq) plt.axis([0, x.size/float(fs), 0, maxplotfreq]) plt.title('frequencies of sinusoidal tracks') # plot the output sound plt.subplot(3,1,3) plt.plot(np.arange(y.size)/float(fs), y) plt.axis([0, y.size/float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() plt.show()
(fs, x) = UF.wavread('../../../sounds/mridangam.wav') w = np.hamming(801) N = 2048 t = -90 minSineDur = .005 maxnSines = 150 freqDevOffset = 20 freqDevSlope = 0.02 Ns = 512 H = Ns/4 mX, pX = STFT.stftAnal(x, w, N, H) tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) timeScale = np.array([.01, .0, .03, .03, .335, .4, .355, .42, .671, .8, .691, .82, .858, 1.2, .878, 1.22, 1.185, 1.6, 1.205, 1.62, 1.497, 2.0, 1.517, 2.02, 1.686, 2.4, 1.706, 2.42, 1.978, 2.8]) ytfreq, ytmag = SMT.sineTimeScaling(tfreq, tmag, timeScale) y = SM.sineModelSynth(ytfreq, ytmag, np.array([]), Ns, H, fs) mY, pY = STFT.stftAnal(y, w, N, H) plt.figure(1, figsize=(12, 9)) maxplotfreq = 4000.0 plt.subplot(4,1,1) plt.plot(np.arange(x.size)/float(fs), x, 'b') plt.axis([0,x.size/float(fs),min(x),max(x)]) plt.title('x (mridangam.wav)') plt.subplot(4,1,2) numFrames = int(tfreq[:,0].size) frmTime = H*np.arange(numFrames)/float(fs) tracks = tfreq*np.less(tfreq, maxplotfreq) tracks[tracks<=0] = np.nan plt.plot(frmTime, tracks, color='k', lw=1)
def main( inputFile="../../sounds/sax-phrase.wav", window="blackman", M=601, N=1024, t=-100, minSineDur=0.1, nH=100, minf0=350, maxf0=700, f0et=5, harmDevSlope=0.01, ): # ------- analysis parameters ------------------- # inputFile: input sound file (monophonic with sampling rate of 44100) # window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) # M: analysis window size # N: fft size (power of two, bigger or equal than M) # t: magnitude threshold of spectral peaks # minSineDur: minimum duration of sinusoidal tracks # nH: maximum number of harmonics # minf0: minimum fundamental frequency in sound # maxf0: maximum fundamental frequency in sound # f0et: maximum error accepted in f0 detection algorithm # harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # --------- computation ----------------- # read input sound (fs, x) = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # find harmonics hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) # subtract harmonics from original sound xr = UF.sineSubtraction(x, Ns, H, hfreq, hmag, hphase, fs) # compute spectrogram of residual mXr, pXr = STFT.stftAnal(xr, fs, w, N, H) # synthesize harmonic component yh = SM.sineModelSynth(hfreq, hmag, hphase, Ns, H, fs) # sum harmonics and residual y = xr[: min(xr.size, yh.size)] + yh[: min(xr.size, yh.size)] # output sound file (monophonic with sampling rate of 44100) outputFileSines = "output_sounds/" + os.path.basename(inputFile)[:-4] + "_hprModel_sines.wav" outputFileResidual = "output_sounds/" + os.path.basename(inputFile)[:-4] + "_hprModel_residual.wav" outputFile = "output_sounds/" + os.path.basename(inputFile)[:-4] + "_hprModel.wav" # write sounds files for harmonics, residual, and the sum UF.wavwrite(yh, fs, outputFileSines) UF.wavwrite(xr, fs, outputFileResidual) UF.wavwrite(y, fs, outputFile) # --------- plotting -------------------- # create figure to plot plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 5000.0 # plot the input sound plt.subplot(3, 1, 1) plt.plot(np.arange(x.size) / float(fs), x) plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.ylabel("amplitude") plt.xlabel("time (sec)") plt.title("input sound: x") # plot the magnitude spectrogram of residual plt.subplot(3, 1, 2) maxplotbin = int(N * maxplotfreq / fs) numFrames = int(mXr[:, 0].size) frmTime = H * np.arange(numFrames) / float(fs) binFreq = np.arange(maxplotbin + 1) * float(fs) / N plt.pcolormesh(frmTime, binFreq, np.transpose(mXr[:, : maxplotbin + 1])) plt.autoscale(tight=True) # plot harmonic frequencies on residual spectrogram harms = hfreq * np.less(hfreq, maxplotfreq) harms[harms == 0] = np.nan numFrames = int(harms[:, 0].size) frmTime = H * np.arange(numFrames) / float(fs) plt.plot(frmTime, harms, color="k", ms=3, alpha=1) plt.xlabel("time(s)") plt.ylabel("frequency(Hz)") plt.autoscale(tight=True) plt.title("harmonics + residual spectrogram") # plot the output sound plt.subplot(3, 1, 3) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel("amplitude") plt.xlabel("time (sec)") plt.title("output sound: y") plt.tight_layout() plt.show()
def analysis(inputFile='../../sounds/mridangam.wav', window='hamming', M=801, N=2048, t=-90, minSineDur=0.01, maxnSines=150, freqDevOffset=20, freqDevSlope=0.02): """ Analyze a sound with the sine model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size; N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks maxnSines: maximum number of parallel sinusoids freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0 freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation returns inputFile: input file name; fs: sampling rate of input file, tfreq, tmag: sinusoidal frequencies and magnitudes """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound (fs, x) = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # compute the sine model of the whole sound tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) # synthesize the sines without original phases y = SM.sineModelSynth(tfreq, tmag, np.array([]), Ns, H, fs) # output sound file (monophonic with sampling rate of 44100) outputFile = 'output_sounds/' + os.path.basename( inputFile)[:-4] + '_sineModel.wav' # write the sound resulting from the inverse stft UF.wavwrite(y, fs, outputFile) # create figure to show plots plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 5000.0 # plot the input sound plt.subplot(3, 1, 1) plt.plot(np.arange(x.size) / float(fs), x) plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot the sinusoidal frequencies if (tfreq.shape[1] > 0): plt.subplot(3, 1, 2) tracks = np.copy(tfreq) tracks = tracks * np.less(tracks, maxplotfreq) tracks[tracks <= 0] = np.nan numFrames = int(tracks[:, 0].size) frmTime = H * np.arange(numFrames) / float(fs) plt.plot(frmTime, tracks) plt.axis([0, x.size / float(fs), 0, maxplotfreq]) plt.title('frequencies of sinusoidal tracks') # plot the output sound plt.subplot(3, 1, 3) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() plt.show(block=False) return inputFile, fs, tfreq, tmag
def exploreSineModel(inputFile='../../sounds/multiSines.wav'): """ Input: inputFile (string) = wav file including the path Output: return True Discuss on the forum! """ # window='hamming' # Window type window='blackmanharris' # Window type # M=3001 # Window size in sample M=3529 # Window size in sample #M=4095 # Window size in sample N=4096 # FFT Size #N=8192 # FFT Size # N=8192 # FFT Size # t=-80 # Threshold t=-50 # Threshold #minSineDur=0.02 # minimum duration of a sinusoid minSineDur=0.01 # minimum duration of a sinusoid maxnSines=15 # Maximum number of sinusoids at any time frame #maxnSines=9 # Maximum number of sinusoids at any time frame freqDevOffset=10 # minimum frequency deviation at 0Hz #freqDevOffset=20 # minimum frequency deviation at 0Hz freqDevSlope=0.001 # slope increase of minimum frequency deviation # Ns = 512 # size of fft used in synthesis # H = 128 # hop size (has to be 1/4 of Ns) Ns = 512 # size of fft used in synthesis H = Ns / 4 # hop size (has to be 1/4 of Ns) fs, x = UF.wavread(inputFile) # read input sound w = get_window(window, M) # compute analysis window # analyze the sound with the sinusoidal model tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) # synthesize the output sound from the sinusoidal representation y = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs) # output sound file name outputFile = os.path.basename(inputFile)[:-4] + '_sineModel.wav' # write the synthesized sound obtained from the sinusoidal synthesis UF.wavwrite(y, fs, outputFile) # SNR calculation x1 = x[:len(y)] e_signal = calculate_energy(x1) e_error = calculate_energy(x1 - y) snr = calculate_snr(e_signal, e_error) print("SNR {}".format(snr)) errorFile = os.path.basename(inputFile)[:-4] + '_sineModel_error.wav' UF.wavwrite(x1 - y, fs, errorFile) # create figure to show plots plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 5000.0 # plot the input sound plt.subplot(3,1,1) plt.plot(np.arange(x.size)/float(fs), x) plt.axis([0, x.size/float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot the sinusoidal frequencies plt.subplot(3,1,2) if (tfreq.shape[1] > 0): numFrames = tfreq.shape[0] frmTime = H*np.arange(numFrames)/float(fs) tfreq[tfreq<=0] = np.nan plt.plot(frmTime, tfreq) plt.axis([0, x.size/float(fs), 0, maxplotfreq]) plt.title('frequencies of sinusoidal tracks') # plot the output sound plt.subplot(3,1,3) plt.plot(np.arange(y.size)/float(fs), y) plt.plot(np.arange(y.size)/float(fs), abs(x1 - y)) # error plt.axis([0, y.size/float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() plt.show() return True
zp = np.pad(ceps_current, [0, params['N'] - len(ceps_current)], mode='constant', constant_values=(0, 0)) zp = np.concatenate( (zp[:params['N'] // 2], np.flip(zp[1:params['N'] // 2 + 1]))) zp[0] = ceps_current[0] # Obtain the Envelope from the cepstrum specenv = np.real(np.fft.fft(zp)) fbins = np.linspace(0, params['fs'], params['N']) fp = interpolate.interp1d(np.arange(params['N']), specenv, kind='linear', fill_value='extrapolate', bounds_error=False) new_M[j, :] = 20 * fp((new_F[j, :] / params['fs']) * params['N']) # zp = np.pad(frame,[0,params['N'] - len(frame)],mode = 'constant',constant_values=(0, 0)) # zp = np.concatenate((zp[:params['N']//2],np.flip(zp[1:params['N']//2 + 1]))) # specenv = np.real(np.fft.fft(zp)) # # print(fbins[:params['N']//2 + 1]) # # print(specenv[:params['N']//2 + 1]) # fp = interpolate.interp1d(np.arange(params['N']//2),specenv[:params['N']//2],kind = 'linear',fill_value = 'extrapolate', bounds_error=False) # new_M[j,:] = 20*fp((new_F[j,:]/params['fs'])*params['N']) arecon = sineModelSynth(new_F, new_M, np.empty([0, 0]), params['W'], params['H'], params['fs']) write(filename=dir_dump + str(k) + '_recon_param.wav', rate=params['fs'], data=arecon.astype('float32'))
yhfreq[l,ind_valid] = yhfreq[l,ind_valid] * freqScaling[l] return yhfreq, yhmag if __name__ == '__main__': (fs, x) = UF.wavread(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../sounds/soprano-E4.wav')) w = np.blackman(801) N = 1024 t = -90 nH = 100 minf0 = 250 maxf0 = 400 f0et = 8 minSineDur = .1 harmDevSlope = 0.01 Ns = 512 H = Ns/4 hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) freqScaling = np.array([0, 3, 1, .5]) freqStretching = np.array([]) timbrePreservation = 1 hfreqt, hmagt = harmonicFreqScaling(hfreq, hmag, freqScaling, freqStretching, timbrePreservation, fs) timeScaling = np.array([0, 0, 1, .5, 2, 4]) hfreqt, hmagt = ST.sineTimeScaling(hfreq, hmag, timeScaling) yh = SM.sineModelSynth(hfreqt, hmagt, np.array([]), Ns, H, fs) UF.play(yh, fs)