def transformation_synthesis(inputFile1, fs, hfreq1, hmag1, stocEnv1, inputFile2, hfreq2, hmag2, stocEnv2, hfreqIntp=np.array([0, 0, .1, 0, .9, 1, 1, 1]), hmagIntp=np.array([0, 0, .1, 0, .9, 1, 1, 1]), stocIntp=np.array([0, 0, .1, 0, .9, 1, 1, 1])): """ Transform the analysis values returned by the analysis function and synthesize the sound inputFile1: name of input file 1 fs: sampling rate of input file 1 hfreq1, hmag1, stocEnv1: hps representation of sound 1 inputFile2: name of input file 2 hfreq2, hmag2, stocEnv2: hps representation of sound 2 hfreqIntp: interpolation factor between the harmonic frequencies of the two sounds, 0 is sound 1 and 1 is sound 2 (time,value pairs) hmagIntp: interpolation factor between the harmonic magnitudes of the two sounds, 0 is sound 1 and 1 is sound 2 (time,value pairs) stocIntp: interpolation factor between the stochastic representation of the two sounds, 0 is sound 1 and 1 is sound 2 (time,value pairs) """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # morph the two sounds yhfreq, yhmag, ystocEnv = HPST.hpsMorph(hfreq1, hmag1, stocEnv1, hfreq2, hmag2, stocEnv2, hfreqIntp, hmagIntp, stocIntp) # synthesis y, yh, yst = HPS.hpsModelSynth(yhfreq, yhmag, np.array([]), ystocEnv, Ns, H, fs) # write output sound outputFile = 'output_sounds/' + os.path.basename( inputFile1)[:-4] + '_hpsMorph.wav' UF.wavwrite(y, fs, outputFile)
def main(inputFile='../../sounds/sax-phrase-short.wav', window='blackman', M=601, N=1024, t=-100, minSineDur=0.1, nH=100, minf0=350, maxf0=700, f0et=5, harmDevSlope=0.01, stocf=0.1): """ inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size; N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation stocf: decimation factor used for the stochastic approximation """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound (fs, x) = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # compute the harmonic plus stochastic model of the whole sound hfreq, hmag, hphase, stocEnv = HPS.hpsModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur, Ns, stocf) # synthesize a sound from the harmonic plus stochastic representation y, yh, yst = HPS.hpsModelSynth(hfreq, hmag, hphase, stocEnv, Ns, H, fs) # output sound file (monophonic with sampling rate of 44100) outputFileSines = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hpsModel_sines.wav' outputFileStochastic = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hpsModel_stochastic.wav' outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hpsModel.wav' # write sounds files for harmonics, stochastic, and the sum UF.wavwrite(yh, fs, outputFileSines) UF.wavwrite(yst, fs, outputFileStochastic) UF.wavwrite(y, fs, outputFile) return x, fs, hfreq, stocEnv, y
def analysis(inputFile='../../sounds/sax-phrase-short.wav', window='blackman', M=601, N=1024, t=-100, minSineDur=0.1, nH=100, minf0=350, maxf0=700, f0et=5, harmDevSlope=0.01, stocf=0.1): """ Analyze a sound with the harmonic plus stochastic model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks minSineDur: minimum duration of sinusoidal tracks nH: maximum number of harmonics minf0: minimum fundamental frequency in sound maxf0: maximum fundamental frequency in sound f0et: maximum error accepted in f0 detection algorithm harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation stocf: decimation factor used for the stochastic approximation returns inputFile: input file name; fs: sampling rate of input file, hfreq, hmag: harmonic frequencies, magnitude; mYst: stochastic residual """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound (fs, x) = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # compute the harmonic plus stochastic model of the whole sound hfreq, hmag, hphase, mYst = HPS.hpsModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur, Ns, stocf) # synthesize the harmonic plus stochastic model without original phases y, yh, yst = HPS.hpsModelSynth(hfreq, hmag, np.array([]), mYst, Ns, H, fs) # write output sound outputFile = 'output_sounds/' + os.path.basename( inputFile)[:-4] + '_hpsModel.wav' UF.wavwrite(y, fs, outputFile) # create figure to plot plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 15000.0 # plot the input sound plt.subplot(3, 1, 1) plt.plot(np.arange(x.size) / float(fs), x) plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot spectrogram stochastic compoment plt.subplot(3, 1, 2) numFrames = int(mYst[:, 0].size) sizeEnv = int(mYst[0, :].size) frmTime = H * np.arange(numFrames) / float(fs) binFreq = (.5 * fs) * np.arange(sizeEnv * maxplotfreq / (.5 * fs)) / sizeEnv plt.pcolormesh( frmTime, binFreq, np.transpose(mYst[:, :int(sizeEnv * maxplotfreq / (.5 * fs)) + 1])) plt.autoscale(tight=True) # plot harmonic on top of stochastic spectrogram if (hfreq.shape[1] > 0): harms = hfreq * np.less(hfreq, maxplotfreq) harms[harms == 0] = np.nan numFrames = int(harms[:, 0].size) frmTime = H * np.arange(numFrames) / float(fs) plt.plot(frmTime, harms, color='k', ms=3, alpha=1) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.autoscale(tight=True) plt.title('harmonics + stochastic spectrogram') # plot the output sound plt.subplot(3, 1, 3) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() plt.show(block=False) return inputFile, fs, hfreq, hmag, mYst
def transformation_synthesis( inputFile, fs, hfreq, hmag, mYst, freqScaling=np.array([0, 1.2, 2.01, 1.2, 2.679, .7, 3.146, .7]), freqStretching=np.array([0, 1, 2.01, 1, 2.679, 1.5, 3.146, 1.5]), timbrePreservation=1, timeScaling=np.array([0, 0, 2.138, 2.138 - 1.0, 3.146, 3.146])): """ transform the analysis values returned by the analysis function and synthesize the sound inputFile: name of input file fs: sampling rate of input file hfreq, hmag: harmonic frequencies and magnitudes mYst: stochastic residual freqScaling: frequency scaling factors, in time-value pairs (value of 1 no scaling) freqStretching: frequency stretching factors, in time-value pairs (value of 1 no stretching) timbrePreservation: 1 preserves original timbre, 0 it does not timeScaling: time scaling factors, in time-value pairs """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # frequency scaling of the harmonics hfreqt, hmagt = HT.harmonicFreqScaling(hfreq, hmag, freqScaling, freqStretching, timbrePreservation, fs) # time scaling the sound yhfreq, yhmag, ystocEnv = HPST.hpsTimeScale(hfreqt, hmagt, mYst, timeScaling) # synthesis from the trasformed hps representation y, yh, yst = HPS.hpsModelSynth(yhfreq, yhmag, np.array([]), ystocEnv, Ns, H, fs) # write output sound outputFile = 'output_sounds/' + os.path.basename( inputFile)[:-4] + '_hpsModelTransformation.wav' UF.wavwrite(y, fs, outputFile) # create figure to plot plt.figure(figsize=(12, 6)) # frequency range to plot maxplotfreq = 15000.0 # plot spectrogram of transformed stochastic compoment plt.subplot(2, 1, 1) numFrames = int(ystocEnv[:, 0].size) sizeEnv = int(ystocEnv[0, :].size) frmTime = H * np.arange(numFrames) / float(fs) binFreq = (.5 * fs) * np.arange(sizeEnv * maxplotfreq / (.5 * fs)) / sizeEnv plt.pcolormesh( frmTime, binFreq, np.transpose(ystocEnv[:, :int(sizeEnv * maxplotfreq / (.5 * fs)) + 1])) plt.autoscale(tight=True) # plot transformed harmonic on top of stochastic spectrogram if (yhfreq.shape[1] > 0): harms = yhfreq * np.less(yhfreq, maxplotfreq) harms[harms == 0] = np.nan numFrames = int(harms[:, 0].size) frmTime = H * np.arange(numFrames) / float(fs) plt.plot(frmTime, harms, color='k', ms=3, alpha=1) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.autoscale(tight=True) plt.title('harmonics + stochastic spectrogram') # plot the output sound plt.subplot(2, 1, 2) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() plt.show()
def analysis(inputFile='../../sounds/sax-phrase-short.wav', window='blackman', M=601, N=1024, t=-100, minSineDur=0.1, nH=100, minf0=350, maxf0=700, f0et=5, harmDevSlope=0.01, stocf=0.1): """ Analyze a sound with the harmonic plus stochastic model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks minSineDur: minimum duration of sinusoidal tracks nH: maximum number of harmonics minf0: minimum fundamental frequency in sound maxf0: maximum fundamental frequency in sound f0et: maximum error accepted in f0 detection algorithm harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation stocf: decimation factor used for the stochastic approximation returns inputFile: input file name; fs: sampling rate of input file, hfreq, hmag: harmonic frequencies, magnitude; mYst: stochastic residual """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound (fs, x) = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # compute the harmonic plus stochastic model of the whole sound hfreq, hmag, hphase, mYst = HPS.hpsModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur, Ns, stocf) # synthesize the harmonic plus stochastic model without original phases y, yh, yst = HPS.hpsModelSynth(hfreq, hmag, np.array([]), mYst, Ns, H, fs) # write output sound outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hpsModel.wav' UF.wavwrite(y,fs, outputFile) # create figure to plot plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 15000.0 # plot the input sound plt.subplot(3,1,1) plt.plot(np.arange(x.size)/float(fs), x) plt.axis([0, x.size/float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot spectrogram stochastic compoment plt.subplot(3,1,2) numFrames = int(mYst[:,0].size) sizeEnv = int(mYst[0,:].size) frmTime = H*np.arange(numFrames)/float(fs) binFreq = (.5*fs)*np.arange(sizeEnv*maxplotfreq/(.5*fs))/sizeEnv plt.pcolormesh(frmTime, binFreq, np.transpose(mYst[:,:sizeEnv*maxplotfreq/(.5*fs)+1])) plt.autoscale(tight=True) # plot harmonic on top of stochastic spectrogram if (hfreq.shape[1] > 0): harms = hfreq*np.less(hfreq,maxplotfreq) harms[harms==0] = np.nan numFrames = int(harms[:,0].size) frmTime = H*np.arange(numFrames)/float(fs) plt.plot(frmTime, harms, color='k', ms=3, alpha=1) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.autoscale(tight=True) plt.title('harmonics + stochastic spectrogram') # plot the output sound plt.subplot(3,1,3) plt.plot(np.arange(y.size)/float(fs), y) plt.axis([0, y.size/float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() plt.show(block=False) return inputFile, fs, hfreq, hmag, mYst
def transformation_synthesis(inputFile, fs, hfreq, hmag, mYst, freqScaling = np.array([0, 1.2, 2.01, 1.2, 2.679, .7, 3.146, .7]), freqStretching = np.array([0, 1, 2.01, 1, 2.679, 1.5, 3.146, 1.5]), timbrePreservation = 1, timeScaling = np.array([0, 0, 2.138, 2.138-1.0, 3.146, 3.146])): """ transform the analysis values returned by the analysis function and synthesize the sound inputFile: name of input file fs: sampling rate of input file hfreq, hmag: harmonic frequencies and magnitudes mYst: stochastic residual freqScaling: frequency scaling factors, in time-value pairs (value of 1 no scaling) freqStretching: frequency stretching factors, in time-value pairs (value of 1 no stretching) timbrePreservation: 1 preserves original timbre, 0 it does not timeScaling: time scaling factors, in time-value pairs """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # frequency scaling of the harmonics hfreqt, hmagt = HT.harmonicFreqScaling(hfreq, hmag, freqScaling, freqStretching, timbrePreservation, fs) # time scaling the sound yhfreq, yhmag, ystocEnv = HPST.hpsTimeScale(hfreqt, hmagt, mYst, timeScaling) # synthesis from the trasformed hps representation y, yh, yst = HPS.hpsModelSynth(yhfreq, yhmag, np.array([]), ystocEnv, Ns, H, fs) # write output sound outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hpsModelTransformation.wav' UF.wavwrite(y,fs, outputFile) # create figure to plot plt.figure(figsize=(12, 6)) # frequency range to plot maxplotfreq = 15000.0 # plot spectrogram of transformed stochastic compoment plt.subplot(2,1,1) numFrames = int(ystocEnv[:,0].size) sizeEnv = int(ystocEnv[0,:].size) frmTime = H*np.arange(numFrames)/float(fs) binFreq = (.5*fs)*np.arange(sizeEnv*maxplotfreq/(.5*fs))/sizeEnv plt.pcolormesh(frmTime, binFreq, np.transpose(ystocEnv[:,:sizeEnv*maxplotfreq/(.5*fs)+1])) plt.autoscale(tight=True) # plot transformed harmonic on top of stochastic spectrogram if (yhfreq.shape[1] > 0): harms = yhfreq*np.less(yhfreq,maxplotfreq) harms[harms==0] = np.nan numFrames = int(harms[:,0].size) frmTime = H*np.arange(numFrames)/float(fs) plt.plot(frmTime, harms, color='k', ms=3, alpha=1) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.autoscale(tight=True) plt.title('harmonics + stochastic spectrogram') # plot the output sound plt.subplot(2,1,2) plt.plot(np.arange(y.size)/float(fs), y) plt.axis([0, y.size/float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() plt.show()
frmTime = H * np.arange(numFrames) / float(fs) binFreq = fs * np.arange(N * maxplotfreq / fs) / N plt.pcolormesh(frmTime, binFreq, np.transpose(mXr[:, : N * maxplotfreq / fs + 1])) plt.xlabel("time (sec)") plt.ylabel("frequency (Hz)") plt.title("spectrogram of residual") plt.autoscale(tight=True) plt.tight_layout() plt.savefig("cello-phrase-residual.png") # compute the harmonic plus stochastic model stocf = 0.5 hfreq, hmag, hphase, mYst = HPS.hpsModelAnal( x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur, Ns, stocf ) y, yh, yst = HPS.hpsModelSynth(hfreq, hmag, np.array([]), mYst, Ns, H, fs) UF.wavwrite(yst, fs, "cello-phrase-stochastic.wav") UF.wavwrite(yh, fs, "cello-phrase-harmonic.wav") UF.wavwrite(y, fs, "cello-phrase-synthesis.wav") # plot the spectrogram of the stochastic component plt.figure(6, figsize=(16, 4.5)) maxplotfreq = 5000.0 numFrames = int(mYst[:, 0].size) Nst = 2 * int(mYst[0, :].size) lastbin = int(Nst * maxplotfreq / fs) maxplotfreq = fs * (lastbin - 1) / Nst frmTime = H * np.arange(numFrames) / float(fs) binFreq = fs * np.arange(lastbin) / Nst plt.pcolormesh(frmTime, binFreq, np.transpose(mYst[:, :lastbin])) plt.xlabel("time (sec)")
Ns = 512 H = 128 (fs1, x1) = UF.wavread(inputFile1) (fs2, x2) = UF.wavread(inputFile2) w1 = get_window(window1, M1) w2 = get_window(window2, M2) hfreq1, hmag1, hphase1, stocEnv1 = HPS.hpsModelAnal(x1, fs1, w1, N1, H, t1, nH, minf01, maxf01, f0et1, harmDevSlope1, minSineDur1, Ns, stocf) hfreq2, hmag2, hphase2, stocEnv2 = HPS.hpsModelAnal(x2, fs2, w2, N2, H, t2, nH, minf02, maxf02, f0et2, harmDevSlope2, minSineDur2, Ns, stocf) hfreqIntp = np.array([0, 0, .1, 0, .9, 1, 1, 1]) hmagIntp = np.array([0, 0, .1, 0, .9, 1, 1, 1]) stocIntp = np.array([0, 0, .1, 0, .9, 1, 1, 1]) yhfreq, yhmag, ystocEnv = HPST.hpsMorph(hfreq1, hmag1, stocEnv1, hfreq2, hmag2, stocEnv2, hfreqIntp, hmagIntp, stocIntp) y, yh, yst = HPS.hpsModelSynth(yhfreq, yhmag, np.array([]), ystocEnv, Ns, H, fs1) UF.wavwrite(y,fs1, 'hps-morph-total.wav') plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 15000.0 # plot spectrogram stochastic component of sound 1 plt.subplot(3,1,1) numFrames = int(stocEnv1[:,0].size) sizeEnv = int(stocEnv1[0,:].size) frmTime = H*np.arange(numFrames)/float(fs1) binFreq = (.5*fs1)*np.arange(sizeEnv*maxplotfreq/(.5*fs1))/sizeEnv plt.pcolormesh(frmTime, binFreq, np.transpose(stocEnv1[:,:int(sizeEnv*maxplotfreq/(.5*fs1)+1)]))
def transformation_synthesis(inputFile1, fs, hfreq1, hmag1, stocEnv1, inputFile2, hfreq2, hmag2, stocEnv2, hfreqIntp=np.array([0, 0, .1, 0, .9, 1, 1, 1]), hmagIntp=np.array([0, 0, .1, 0, .9, 1, 1, 1]), stocIntp=np.array([0, 0, .1, 0, .9, 1, 1, 1])): """ Transform the analysis values returned by the analysis function and synthesize the sound inputFile1: name of input file 1 fs: sampling rate of input file 1 hfreq1, hmag1, stocEnv1: hps representation of sound 1 inputFile2: name of input file 2 hfreq2, hmag2, stocEnv2: hps representation of sound 2 hfreqIntp: interpolation factor between the harmonic frequencies of the two sounds, 0 is sound 1 and 1 is sound 2 (time,value pairs) hmagIntp: interpolation factor between the harmonic magnitudes of the two sounds, 0 is sound 1 and 1 is sound 2 (time,value pairs) stocIntp: interpolation factor between the stochastic representation of the two sounds, 0 is sound 1 and 1 is sound 2 (time,value pairs) """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # morph the two sounds yhfreq, yhmag, ystocEnv = HPST.hpsMorph(hfreq1, hmag1, stocEnv1, hfreq2, hmag2, stocEnv2, hfreqIntp, hmagIntp, stocIntp) # synthesis y, yh, yst = HPS.hpsModelSynth(yhfreq, yhmag, np.array([]), ystocEnv, Ns, H, fs) # write output sound outputFile = 'output_sounds/' + os.path.basename( inputFile1)[:-4] + '_hpsMorph.wav' UF.wavwrite(y, fs, outputFile) # create figure to plot plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 15000.0 # plot spectrogram of transformed stochastic compoment plt.subplot(2, 1, 1) numFrames = int(ystocEnv[:, 0].size) sizeEnv = int(ystocEnv[0, :].size) frmTime = H * np.arange(numFrames) / float(fs) binFreq = (.5 * fs) * np.arange(sizeEnv * maxplotfreq / (.5 * fs)) / sizeEnv plt.pcolormesh( frmTime, binFreq, np.transpose(ystocEnv[:, :int(sizeEnv * maxplotfreq / (.5 * fs)) + 1])) plt.autoscale(tight=True) # plot transformed harmonic on top of stochastic spectrogram if (yhfreq.shape[1] > 0): harms = np.copy(yhfreq) harms = harms * np.less(harms, maxplotfreq) harms[harms == 0] = np.nan numFrames = int(harms[:, 0].size) frmTime = H * np.arange(numFrames) / float(fs) plt.plot(frmTime, harms, color='k', ms=3, alpha=1) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.autoscale(tight=True) plt.title('harmonics + stochastic spectrogram') # plot the output sound plt.subplot(2, 1, 2) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() plt.show()
def transformation_synthesis(inputFile1, fs, hfreq1, hmag1, stocEnv1, inputFile2, hfreq2, hmag2, stocEnv2, hfreqIntp = np.array([0, 0, .1, 0, .9, 1, 1, 1]), hmagIntp = np.array([0, 0, .1, 0, .9, 1, 1, 1]), stocIntp = np.array([0, 0, .1, 0, .9, 1, 1, 1])): """ Transform the analysis values returned by the analysis function and synthesize the sound inputFile1: name of input file 1 fs: sampling rate of input file 1 hfreq1, hmag1, stocEnv1: hps representation of sound 1 inputFile2: name of input file 2 hfreq2, hmag2, stocEnv2: hps representation of sound 2 hfreqIntp: interpolation factor between the harmonic frequencies of the two sounds, 0 is sound 1 and 1 is sound 2 (time,value pairs) hmagIntp: interpolation factor between the harmonic magnitudes of the two sounds, 0 is sound 1 and 1 is sound 2 (time,value pairs) stocIntp: interpolation factor between the stochastic representation of the two sounds, 0 is sound 1 and 1 is sound 2 (time,value pairs) """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # morph the two sounds yhfreq, yhmag, ystocEnv = HPST.hpsMorph(hfreq1, hmag1, stocEnv1, hfreq2, hmag2, stocEnv2, hfreqIntp, hmagIntp, stocIntp) # synthesis y, yh, yst = HPS.hpsModelSynth(yhfreq, yhmag, np.array([]), ystocEnv, Ns, H, fs) # write output sound outputFile = 'output_sounds/' + os.path.basename(inputFile1)[:-4] + '_hpsMorph.wav' UF.wavwrite(y, fs, outputFile) # create figure to plot plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 15000.0 # plot spectrogram of transformed stochastic compoment plt.subplot(2,1,1) numFrames = int(ystocEnv[:,0].size) sizeEnv = int(ystocEnv[0,:].size) frmTime = H*np.arange(numFrames)/float(fs) binFreq = (.5*fs)*np.arange(sizeEnv*maxplotfreq/(.5*fs))/sizeEnv plt.pcolormesh(frmTime, binFreq, np.transpose(ystocEnv[:,:sizeEnv*maxplotfreq/(.5*fs)+1])) plt.autoscale(tight=True) # plot transformed harmonic on top of stochastic spectrogram if (yhfreq.shape[1] > 0): harms = np.copy(yhfreq) harms = harms*np.less(harms,maxplotfreq) harms[harms==0] = np.nan numFrames = int(harms[:,0].size) frmTime = H*np.arange(numFrames)/float(fs) plt.plot(frmTime, harms, color='k', ms=3, alpha=1) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.autoscale(tight=True) plt.title('harmonics + stochastic spectrogram') # plot the output sound plt.subplot(2,1,2) plt.plot(np.arange(y.size)/float(fs), y) plt.axis([0, y.size/float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() plt.show(block=False)
w = np.blackman(601) N = 1024 t = -100 nH = 100 minf0 = 350 maxf0 = 700 f0et = 5 minSineDur = .1 harmDevSlope = 0.01 Ns = 512 H = Ns / 4 stocf = .2 hfreq, hmag, hphase, mYst = HPS.hpsModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur, Ns, stocf) y, yh, yst = HPS.hpsModelSynth(hfreq, hmag, hphase, mYst, Ns, H, fs) maxplotfreq = 10000.0 plt.figure(1, figsize=(9, 7)) plt.subplot(311) plt.plot(np.arange(x.size) / float(fs), x, 'b') plt.autoscale(tight=True) plt.title('x (sax-phrase-short.wav)') plt.subplot(312) numFrames = int(mYst[:, 0].size) sizeEnv = int(mYst[0, :].size) frmTime = H * np.arange(numFrames) / float(fs) binFreq = (.5 * fs) * np.arange(sizeEnv * maxplotfreq / (.5 * fs)) / sizeEnv plt.pcolormesh(frmTime, binFreq,
def transformation_synthesis_stereo(inputFile, fs, hfreq, hmag, mYst, freqScaling = np.array([0, 1.2, 2.01, 1.2, 2.679, .7, 3.146, .7]), freqStretching = np.array([0, 1, 2.01, 1, 2.679, 1.5, 3.146, 1.5]), timbrePreservation = 1, timeScaling = np.array([0, 0, 2.138, 2.138-1.0, 3.146, 3.146]), inputSound=[]): """ transform the analysis values returned by the analysis function and synthesize the sound inputFile: name of input file fs: sampling rate of input file hfreq, hmag: harmonic frequencies and magnitudes mYst: stochastic residual freqScaling: tuple (for L and R channels) of arrays of frequency scaling factors, in time-value pairs (value of 1 no scaling) freqStretching: tuple (for L and R channels) of arrays of frequency stretching factors, in time-value pairs (value of 1 no stretching) timbrePreservation: tuple (for L and R channels) of arrays of 1 preserves original timbre, 0 it does not timeScaling: tuple (for L and R channels) of arrays of time scaling factors, in time-value pairs inputSound: original sound, used for auto-attenuation of the output sound """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # frequency scaling of the harmonics hfreqtLeft , hmagtLeft = HT.harmonicFreqScaling(hfreq, hmag, freqScaling[0], freqStretching[0], timbrePreservation[0], fs) hfreqtRight, hmagtRight = HT.harmonicFreqScaling(hfreq, hmag, freqScaling[1], freqStretching[1], timbrePreservation[1], fs) # time scaling the sound yhfreqLeft , yhmagLeft , ystocEnvLeft = HPST.hpsTimeScale(hfreqtLeft , hmagtLeft , mYst, timeScaling[0]) yhfreqRight, yhmagRight, ystocEnvRight = HPST.hpsTimeScale(hfreqtRight, hmagtRight, mYst, timeScaling[1]) # synthesis from the trasformed hps representation yLeft, yhLeft, ystLeft = HPS.hpsModelSynth(yhfreqLeft , yhmagLeft , np.array([]), ystocEnvLeft , Ns, H, fs) yRight, yhRight, ystRight = HPS.hpsModelSynth(yhfreqRight, yhmagRight, np.array([]), ystocEnvRight, Ns, H, fs) # write output sound outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hpsModelTransformation.wav' UF.wavwriteStereo(yLeft, yRight, fs, outputFile, inputSound) # create figure to plot plt.figure(figsize=(12, 12)) # frequency range to plot maxplotfreq = 15000.0 def plotTransformedSignals(channelName,subplot1,subplot2,yhfreq,ystocEnv,y): # plot spectrogram of transformed stochastic compoment plt.subplot(subplot1) numFrames = int(ystocEnv[:,0].size) sizeEnv = int(ystocEnv[0,:].size) frmTime = H*np.arange(numFrames)/float(fs) binFreq = (.5*fs)*np.arange(sizeEnv*maxplotfreq/(.5*fs))/sizeEnv plt.pcolormesh(frmTime, binFreq, np.transpose(ystocEnv[:,:sizeEnv*maxplotfreq/(.5*fs)+1])) plt.autoscale(tight=True) # plot transformed harmonic on top of stochastic spectrogram if (yhfreq.shape[1] > 0): harms = yhfreq*np.less(yhfreq,maxplotfreq) harms[harms==0] = np.nan numFrames = int(harms[:,0].size) frmTime = H*np.arange(numFrames)/float(fs) plt.plot(frmTime, harms, color='k', ms=3, alpha=1) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.autoscale(tight=True) plt.title('harmonics + stochastic spectrogram (' + channelName + ')') # plot the output sound plt.subplot(subplot2) plt.plot(np.arange(y.size)/float(fs), y) plt.axis([0, y.size/float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y (' + channelName + ')') plotTransformedSignals('Left' , 411, 413, yhfreqLeft, ystocEnvLeft, yLeft) plotTransformedSignals('Right', 412, 414, yhfreqRight, ystocEnvRight, yRight) plt.tight_layout() plt.show()
sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../software/models/')) import hpsModel as HPS import utilFunctions as UF if __name__ == '__main__': (fs, x) = UF.wavread(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../sounds/sax-phrase.wav')) w = np.blackman(601) N = 1024 t = -100 nH = 100 minf0 = 350 maxf0 = 700 f0et = 5 maxnpeaksTwm = 5 minSineDur = .1 harmDevSlope = 0.01 Ns = 512 H = Ns/4 stocf = .2 hfreq, hmag, hphase, mYst = HPS.hpsModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur, Ns, stocf) y, yh, yst = HPS.hpsModelSynth(hfreq, hmag, hphase, mYst, Ns, H, fs) # UF.play(y, fs) # UF.play(yh, fs) # UF.play(yst, fs) UF.wavwrite(y,fs,'sax-phrase-total-synthesis.wav') UF.wavwrite(yh,fs,'sax-phrase-harmonic-component.wav') UF.wavwrite(yst,fs,'sax-phrase-stochastic-component.wav')
def main( inputFile="../../sounds/sax-phrase-short.wav", window="blackman", M=601, N=1024, t=-100, minSineDur=0.1, nH=100, minf0=350, maxf0=700, f0et=5, harmDevSlope=0.01, stocf=0.1, ): """ inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size; N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation stocf: decimation factor used for the stochastic approximation """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound (fs, x) = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # compute the harmonic plus stochastic model of the whole sound hfreq, hmag, hphase, stocEnv = HPS.hpsModelAnal( x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur, Ns, stocf ) # synthesize a sound from the harmonic plus stochastic representation y, yh, yst = HPS.hpsModelSynth(hfreq, hmag, hphase, stocEnv, Ns, H, fs) # output sound file (monophonic with sampling rate of 44100) outputFileSines = "output_sounds/" + os.path.basename(inputFile)[:-4] + "_hpsModel_sines.wav" outputFileStochastic = "output_sounds/" + os.path.basename(inputFile)[:-4] + "_hpsModel_stochastic.wav" outputFile = "output_sounds/" + os.path.basename(inputFile)[:-4] + "_hpsModel.wav" # write sounds files for harmonics, stochastic, and the sum UF.wavwrite(yh, fs, outputFileSines) UF.wavwrite(yst, fs, outputFileStochastic) UF.wavwrite(y, fs, outputFile) # create figure to plot plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 15000.0 # plot the input sound plt.subplot(3, 1, 1) plt.plot(np.arange(x.size) / float(fs), x) plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.ylabel("amplitude") plt.xlabel("time (sec)") plt.title("input sound: x") # plot spectrogram stochastic component plt.subplot(3, 1, 2) numFrames = int(stocEnv[:, 0].size) sizeEnv = int(stocEnv[0, :].size) frmTime = H * np.arange(numFrames) / float(fs) binFreq = (0.5 * fs) * np.arange(sizeEnv * maxplotfreq / (0.5 * fs)) / sizeEnv plt.pcolormesh(frmTime, binFreq, np.transpose(stocEnv[:, : sizeEnv * maxplotfreq / (0.5 * fs) + 1])) plt.autoscale(tight=True) # plot harmonic on top of stochastic spectrogram if hfreq.shape[1] > 0: harms = hfreq * np.less(hfreq, maxplotfreq) harms[harms == 0] = np.nan numFrames = harms.shape[0] frmTime = H * np.arange(numFrames) / float(fs) plt.plot(frmTime, harms, color="k", ms=3, alpha=1) plt.xlabel("time (sec)") plt.ylabel("frequency (Hz)") plt.autoscale(tight=True) plt.title("harmonics + stochastic spectrogram") # plot the output sound plt.subplot(3, 1, 3) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel("amplitude") plt.xlabel("time (sec)") plt.title("output sound: y") plt.tight_layout() plt.show(block=False)