def test_reconstruct_sound(): fs, x = audio.read_wav(sound_path("sax-phrase-short.wav")) window_size, fft_size, hop_size = 4001, 4096, 2048 window = get_window('hamming', window_size) xtfreq, xtmag, xtphase = harmonic.from_audio( x, fs, window, fft_size, hop_size, t=-80, nH=20, minf0=100, maxf0=2000, f0et=5, harmDevSlope=0.01, minSineDur=.02) x_reconstructed = sine.to_audio(xtfreq, xtmag, xtphase, fft_size, hop_size, fs) assert 138746 == len(x) expected_frame_count = int(math.ceil(float(len(x)) / hop_size)) assert expected_frame_count == len(xtfreq) assert expected_frame_count == len(xtmag) assert expected_frame_count == len(xtphase) assert xtfreq.shape[1] <= 100 # statistics of the model for regression testing without explicitly storing the whole data assert np.allclose(1738.618043903208, xtfreq.mean()) assert np.allclose(-64.939768348945279, xtmag.mean()) assert np.allclose(1.6687005886001871, xtphase.mean()) # TODO: this is completely off, it should be equal to len(x)! assert 69 * 2048 == len(x_reconstructed) assert np.allclose(0.036941947007791701, rmse(x, x_reconstructed[:len(x)]))
def test_reconstruct_sound(): fs, x = audio.read_wav(sound_path("sax-phrase-short.wav")) window_size, fft_size, hop_size = 4001, 4096, 2048 window = get_window('hamming', window_size) mag_spectrogram, phase_spectrogram = stft.from_audio( x, window, fft_size, hop_size) x_reconstructed = stft.to_audio(mag_spectrogram, phase_spectrogram, window_size, hop_size) assert 138746 == len(x) expected_frame_count = int(math.ceil(float(len(x)) / hop_size)) assert expected_frame_count == len(mag_spectrogram) assert expected_frame_count == len(phase_spectrogram) # statistics of the spectrogram for regression testing without explicitly storing the whole data assert np.allclose(-102.86187076588583, np.mean(mag_spectrogram)) assert np.allclose(11.368333745102881, np.mean(phase_spectrogram)) # TODO: should be the same as len(x) assert expected_frame_count * hop_size == len(x_reconstructed) assert np.allclose(0.0014030089623073237, rmse(x, x_reconstructed[:len(x)]))
def test_reconstruct_sound(): fs, x = audio.read_wav(sound_path("sax-phrase-short.wav")) window_size, fft_size, hop_size = 4001, 4096, 2048 window = get_window('hamming', window_size) xtfreq, xtmag, xtphase = sine.from_audio( x, fs, window, fft_size, hop_size, t=-80, maxnSines=100, minSineDur=.01, freqDevOffset=20, freqDevSlope=0.01) x_reconstructed = sine.to_audio(xtfreq, xtmag, xtphase, fft_size, hop_size, fs) assert 138746 == len(x) expected_frame_count = int(math.ceil(float(len(x)) / hop_size)) assert expected_frame_count == len(xtfreq) assert expected_frame_count == len(xtmag) assert expected_frame_count == len(xtphase) assert xtfreq.shape[1] <= 100 # statistics of the model for regression testing without explicitly storing the whole data assert np.allclose(945.892990545, xtfreq.mean()) assert np.allclose(-30.3138495002, xtmag.mean()) assert np.allclose(1.34449391701, xtphase.mean()) # TODO: this is completely off, it should be equal to len(x)! assert 69 * 2048 == len(x_reconstructed) assert np.allclose(0.010812475879315771, rmse(x, x_reconstructed[:len(x)]))
def main(inputFile=demo_sound_path('ocean.wav'), H=256, N=512, stocf=.1, interactive=True, plotFile=False): """ inputFile: input sound file (monophonic with sampling rate of 44100) H: hop size, N: fft size stocf: decimation factor used for the stochastic approximation (bigger than 0, maximum 1) """ # read input sound (fs, x) = audio.read_wav(inputFile) # compute stochastic model stocEnv = stochastic.from_audio(x, H, N, stocf) # synthesize sound from stochastic model y = stochastic.to_audio(stocEnv, H, N) outputFile = 'output_sounds/' + strip_file(inputFile) + '_stochasticModel.wav' # write output sound audio.write_wav(y, fs, outputFile) # create figure to plot plt.figure(figsize=(12, 9)) # plot the input sound plt.subplot(3, 1, 1) plt.plot(np.arange(x.size) / float(fs), x) plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot stochastic representation plt.subplot(3, 1, 2) numFrames = int(stocEnv.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) binFreq = np.arange(stocf * (N / 2 + 1)) * float(fs) / (stocf * N) plt.pcolormesh(frmTime, binFreq, np.transpose(stocEnv)) plt.autoscale(tight=True) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.title('stochastic approximation') # plot the output sound plt.subplot(3, 1, 3) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.tight_layout() if interactive: plt.show() if plotFile: plt.savefig('output_plots/%s_stochastic_model.png' % files.strip_file(inputFile))
def test_reconstruct_sound(): fs, x = audio.read_wav(sound_path("sax-phrase-short.wav")) window_size, fft_size, hop_size = 2001, 2048, 128 window = get_window('hamming', window_size) # fix the random seed for reproducibility np.random.seed(42) xtfreq, xtmag, xtphase, stocEnv = hps.from_audio(x, fs, window, fft_size, hop_size, t=-80, minSineDur=.02, nH=20, minf0=100, maxf0=2000, f0et=5, harmDevSlope=0.01, Ns=512, stocf=0.5) x_reconstructed, x_sine, x_stochastic = hps.to_audio( xtfreq, xtmag, xtphase, stocEnv, 512, hop_size, fs) assert 138746 == len(x) expected_frame_count = int(math.ceil(float(len(x)) / hop_size)) assert expected_frame_count == len(xtfreq) assert expected_frame_count == len(xtmag) assert expected_frame_count == len(xtphase) assert xtfreq.shape[1] <= 100 # statistics of the model for regression testing without explicitly storing the whole data assert np.allclose(1731.8324721982437, xtfreq.mean()) assert np.allclose(-69.877742948220671, xtmag.mean()) assert np.allclose(1.8019294703328628, xtphase.mean()) # TODO: this is completely off, it should be equal to len(x)! assert 1083 * 128 == len(x_reconstructed) assert 1083 * 128 == len(x_sine) # TODO: this is insane assert 1085 * 128 == len(x_stochastic) assert np.allclose(0.038065851967889502, rmse(x[:len(x_reconstructed)], x_reconstructed)) assert np.allclose(0.025543282494159769, rmse(x[:len(x_reconstructed)], x_sine)) assert np.allclose( 0.097999320671614418, rmse(x[:len(x_reconstructed)], x_stochastic[:len(x_reconstructed)])) assert np.allclose( 0.0, rmse(x_sine + x_stochastic[:len(x_reconstructed)], x_reconstructed))
def test_reconstruct_sound(): fs, x = audio.read_wav(sound_path("sax-phrase-short.wav")) window_size, fft_size, hop_size = 2001, 2048, 128 window = get_window('hamming', window_size) # fix the random seed for reproducibility np.random.seed(42) xtfreq, xtmag, xtphase, stocEnv = sps.from_audio(x, fs, window, fft_size, hop_size, t=-80, maxnSines=100, minSineDur=.01, freqDevOffset=20, freqDevSlope=0.01, stocf=0.5) x_reconstructed, x_sine, x_stochastic = sps.to_audio( xtfreq, xtmag, xtphase, stocEnv, 512, hop_size, fs) assert 138746 == len(x) expected_frame_count = int(math.ceil(float(len(x)) / hop_size)) assert expected_frame_count == len(xtfreq) assert expected_frame_count == len(xtmag) assert expected_frame_count == len(xtphase) assert xtfreq.shape[1] <= 100 # statistics of the model for regression testing without explicitly storing the whole data assert np.allclose(799.3384358567838, xtfreq.mean()) assert np.allclose(-24.080251067421795, xtmag.mean()) assert np.allclose(1.0900513921895467, xtphase.mean()) # TODO: this is completely off, it should be equal to len(x)! assert 1083 * 128 == len(x_reconstructed) assert 1083 * 128 == len(x_sine) # TODO: this is insane assert 1085 * 128 == len(x_stochastic) assert np.allclose(0.0061891379818097133, rmse(x[:len(x_reconstructed)], x_reconstructed)) assert np.allclose(0.0043912712540510645, rmse(x[:len(x_reconstructed)], x_sine)) assert np.allclose( 0.093780097561056638, rmse(x[:len(x_reconstructed)], x_stochastic[:len(x_reconstructed)])) assert np.allclose( 0.0, rmse(x_sine + x_stochastic[:len(x_reconstructed)], x_reconstructed))
pX = np.angle(X[:hN]) # -----synthesis----- mY = resample(mXenv, hN) # interpolate to original size pY = 2 * np.pi * np.random.rand(hN) # generate phase random values Y = np.zeros(N, dtype=complex) Y[:hN] = 10**(mY / 20) * np.exp(1j * pY) # generate positive freq. Y[hN:] = 10**(mY[-2:0:-1] / 20) * np.exp( -1j * pY[-2:0:-1]) # generate negative freq. fftbuffer = np.real(ifft(Y)) # inverse FFT y = fftbuffer * N / 2 return mX, pX, mY, pY, y # example call of stochasticModel function if __name__ == '__main__': (fs, x) = audio.read_wav('../../../sounds/ocean.wav') w = np.hanning(1024) N = 1024 stocf = 0.2 maxFreq = 10000.0 lastbin = N * maxFreq / fs first = 1000 last = first + w.size mX, pX, mY, pY, y = stochasticModelFrame(x[first:last], w, N, stocf) plt.figure(1, figsize=(9, 7)) plt.subplot(4, 1, 1) plt.plot(np.arange(first, last) / float(fs), x[first:last]) plt.axis([ first / float(fs), last / float(fs), min(x[first:last]),
import math # matplotlib without any blocking GUI import matplotlib as mpl mpl.use('Agg') import matplotlib.pyplot as plt import numpy as np from scipy.signal import resample from smst.utils import audio from smst.models import dft (fs, x1) = audio.read_wav('../../../sounds/orchestra.wav') (fs, x2) = audio.read_wav('../../../sounds/speech-male.wav') w1 = np.hamming(1024) N1 = 1024 H1 = 256 w2 = np.hamming(1024) N2 = 1024 smoothf = .1 balancef = .7 M1 = w1.size # size of analysis window hM1_1 = int(math.floor((M1 + 1) / 2)) # half analysis window size by rounding hM1_2 = int(math.floor(M1 / 2)) # half analysis window size by floor M2 = w2.size # size of analysis window hM2_1 = int(math.floor((M2 + 1) / 2)) # half analysis window size by rounding hM2_2 = int(math.floor(M2 / 2)) # half analysis window size by floor2 loc1 = 14843 loc2 = 9294
# matplotlib without any blocking GUI import matplotlib as mpl mpl.use('Agg') import matplotlib.pyplot as plt import numpy as np from smst.utils import audio from smst.models import sine, stft (fs, x) = audio.read_wav('../../../sounds/flute-A4.wav') w = np.blackman(601) N = 1024 H = 150 t = -80 minSineDur = .1 maxnSines = 150 mX, pX = stft.from_audio(x, w, N, H) tfreq, tmag, tphase = sine.from_audio(x, fs, w, N, H, t, maxnSines, minSineDur) plt.figure(1, figsize=(9.5, 5)) maxplotfreq = 5000.0 maxplotbin = int(N * maxplotfreq / fs) numFrames = int(mX.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) binFreq = np.arange(maxplotbin + 1) * float(fs) / N plt.pcolormesh(frmTime, binFreq, np.transpose(mX[:, :maxplotbin + 1])) plt.autoscale(tight=True) tracks = tfreq * np.less(tfreq, maxplotfreq) tracks[tracks <= 0] = np.nan
def main(inputFile=demo_sound_path('bendir.wav'), window='hamming', M=2001, N=2048, t=-80, minSineDur=0.02, maxnSines=150, freqDevOffset=10, freqDevSlope=0.001, interactive=True, plotFile=False): """ Perform analysis/synthesis using the sinusoidal model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size; N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks maxnSines: maximum number of parallel sinusoids freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0 freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound fs, x = audio.read_wav(inputFile) # compute analysis window w = get_window(window, M) # analyze the sound with the sinusoidal model tfreq, tmag, tphase = sine.from_audio(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) # synthesize the output sound from the sinusoidal representation y = sine.to_audio(tfreq, tmag, tphase, Ns, H, fs) # output sound file name outputFile = 'output_sounds/' + strip_file(inputFile) + '_sineModel.wav' # write the synthesized sound obtained from the sinusoidal synthesis audio.write_wav(y, fs, outputFile) # create figure to show plots plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 5000.0 # plot the input sound plt.subplot(3, 1, 1) plt.plot(np.arange(x.size) / float(fs), x) plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot the sinusoidal frequencies plt.subplot(3, 1, 2) if (tfreq.shape[1] > 0): numFrames = tfreq.shape[0] frmTime = H * np.arange(numFrames) / float(fs) tfreq[tfreq <= 0] = np.nan plt.plot(frmTime, tfreq) plt.axis([0, x.size / float(fs), 0, maxplotfreq]) plt.title('frequencies of sinusoidal tracks') # plot the output sound plt.subplot(3, 1, 3) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() if interactive: plt.show() if plotFile: plt.savefig('output_plots/%s_sine_model.png' % files.strip_file(inputFile))
# matplotlib without any blocking GUI import matplotlib as mpl mpl.use('Agg') import matplotlib.pyplot as plt import numpy as np from smst.utils import audio from smst.models import sine, stft plt.figure(1, figsize=(9, 7)) plt.subplot(211) (fs, x) = audio.read_wav('../../../sounds/vibraphone-C6.wav') w = np.blackman(401) N = 512 H = 100 t = -100 minSineDur = .02 maxnSines = 150 freqDevOffset = 20 freqDevSlope = 0.01 mX, pX = stft.from_audio(x, w, N, H) tfreq, tmag, tphase = sine.from_audio(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) maxplotfreq = 10000.0 maxplotbin = int(N * maxplotfreq / fs) numFrames = int(mX.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) binFreq = np.arange(maxplotbin + 1) * float(fs) / N
import essentia.standard as ess # matplotlib without any blocking GUI import matplotlib as mpl mpl.use('Agg') import matplotlib.pyplot as plt import numpy as np from smst.utils import audio (fs, x) = audio.read_wav('../../../sounds/piano.wav') start = 13860 M = 800 xp = x[start:start + M] / float(max(x[start:start + M])) r = ess.AutoCorrelation(normalization='standard')(xp) r = r / max(r) peaks = ess.PeakDetection(threshold=.11, interpolate=False, minPosition=.01)(r) plt.figure(1, figsize=(9, 7)) plt.subplot(211) plt.plot(np.arange(M) / float(fs), xp, lw=1.5) plt.axis([0, (M - 1) / float(fs), min(xp), max(xp)]) plt.xlabel('time (sec)') plt.ylabel('amplitude') plt.title('x (piano.wav)') plt.subplot(212) plt.plot(np.arange(M) / float(fs), r, 'r', lw=1.5) plt.plot(peaks[0] * (M - 1) / float(fs), peaks[1], 'x',
inputFile2 = '../../../sounds/soprano-E4.wav' window2 = 'blackman' M2 = 901 N2 = 1024 t2 = -100 minSineDur2 = 0.05 minf02 = 250 maxf02 = 500 f0et2 = 10 harmDevSlope2 = 0.01 Ns = 512 H = 128 (fs1, x1) = audio.read_wav(inputFile1) (fs2, x2) = audio.read_wav(inputFile2) w1 = get_window(window1, M1) w2 = get_window(window2, M2) hfreq1, hmag1, hphase1, stocEnv1 = hps.from_audio(x1, fs1, w1, N1, H, t1, nH, minf01, maxf01, f0et1, harmDevSlope1, minSineDur1, Ns, stocf) hfreq2, hmag2, hphase2, stocEnv2 = hps.from_audio(x2, fs2, w2, N2, H, t2, nH, minf02, maxf02, f0et2, harmDevSlope2, minSineDur2, Ns, stocf) hfreqIntp = np.array([0, .5, 1, .5]) hmagIntp = np.array([0, .5, 1, .5]) stocIntp = np.array([0, .5, 1, .5])
def main(inputFile=demo_sound_path('piano.wav'), window='blackman', M=511, N=1024, time=.2, interactive=True, plotFile=False): """ inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (choice of rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size (odd integer value) N: fft size (power of two, bigger or equal than than M) time: time to start analysis (in seconds) """ # read input sound (monophonic with sampling rate of 44100) fs, x = audio.read_wav(inputFile) # compute analysis window w = get_window(window, M) # get a fragment of the input sound of size M sample = int(time * fs) if (sample + M >= x.size or sample < 0): # raise error if time outside of sound raise ValueError("Time outside sound boundaries") x_frame = x[sample:sample + M] # compute the dft of the sound fragment mX, pX = dft.from_audio(x_frame, w, N) # compute the inverse dft of the spectrum y = dft.to_audio(mX, pX, w.size) * sum(w) # create figure plt.figure(figsize=(12, 9)) # plot the sound fragment plt.subplot(4, 1, 1) plt.plot(time + np.arange(M) / float(fs), x_frame) plt.axis([time, time + M / float(fs), min(x_frame), max(x_frame)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot the magnitude spectrum plt.subplot(4, 1, 2) plt.plot(float(fs) * np.arange(mX.size) / float(N), mX, 'r') plt.axis([0, fs / 2.0, min(mX), max(mX)]) plt.title('magnitude spectrum: mX') plt.ylabel('amplitude (dB)') plt.xlabel('frequency (Hz)') # plot the phase spectrum plt.subplot(4, 1, 3) plt.plot(float(fs) * np.arange(pX.size) / float(N), pX, 'c') plt.axis([0, fs / 2.0, min(pX), max(pX)]) plt.title('phase spectrum: pX') plt.ylabel('phase (radians)') plt.xlabel('frequency (Hz)') # plot the sound resulting from the inverse dft plt.subplot(4, 1, 4) plt.plot(time + np.arange(M) / float(fs), y) plt.axis([time, time + M / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() if interactive: plt.show() if plotFile: plt.savefig('output_plots/%s_dft_model.png' % files.strip_file(inputFile))
# matplotlib without any blocking GUI import matplotlib as mpl mpl.use('Agg') import matplotlib.pyplot as plt import numpy as np from smst.utils import audio, peaks from smst.models import dft (fs, x) = audio.read_wav('../../../sounds/sine-440-490.wav') w = np.hamming(3529) N = 32768 hN = N / 2 t = -20 pin = 4850 x1 = x[pin:pin + w.size] mX1, pX1 = dft.from_audio(x1, w, N) ploc = peaks.find_peaks(mX1, t) pmag = mX1[ploc] iploc, ipmag, ipphase = peaks.interpolate_peaks(mX1, pX1, ploc) plt.figure(1, figsize=(9, 6)) plt.subplot(311) plt.plot(fs * np.arange(pX1.size) / float(N), pX1, 'c', lw=1.5) plt.plot(fs * iploc / N, ipphase, marker='x', color='b', alpha=1, linestyle='',
# matplotlib without any blocking GUI import matplotlib as mpl mpl.use('Agg') import matplotlib.pyplot as plt import numpy as np from smst.utils import audio from smst.models import harmonic, sine (fs, x) = audio.read_wav('../../../sounds/vignesh.wav') w = np.blackman(1201) N = 2048 t = -90 nH = 100 minf0 = 130 maxf0 = 300 f0et = 7 Ns = 512 H = Ns / 4 minSineDur = .1 harmDevSlope = 0.01 hfreq, hmag, hphase = harmonic.from_audio(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) y = sine.to_audio(hfreq, hmag, hphase, Ns, H, fs) numFrames = int(hfreq.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) plt.figure(1, figsize=(9, 7))
# matplotlib without any blocking GUI import matplotlib as mpl mpl.use('Agg') import matplotlib.pyplot as plt import numpy as np from smst.utils import audio from smst.models import dft (fs, x) = audio.read_wav('../../../sounds/sine-440.wav') M = 400 x1 = x[2000:2000 + M] N = 2048 hM = int(M / 2.0) w = np.hamming(M) mX, pX = dft.from_audio(x1, w, N) freqaxis = fs * np.arange(0, mX.size) / float(N) taxis = np.arange(N) / float(fs) plt.figure(1, figsize=(9.5, 7)) plt.subplot(3, 1, 1) plt.plot(np.arange(M) / float(fs), x1, 'b', lw=1.5) plt.axis([0, (M - 1) / float(fs), min(x1) - .1, max(x1) + .1]) plt.title('x (sine-440.wav)') plt.subplot(3, 1, 2) plt.plot(freqaxis, mX, 'r', lw=1.5) plt.axis([0, fs / 10, -80, max(mX) + 1]) plt.title('mX')
def analysis(inputFile=demo_sound_path('sax-phrase-short.wav'), window='blackman', M=601, N=1024, t=-100, minSineDur=0.1, nH=100, minf0=350, maxf0=700, f0et=5, harmDevSlope=0.01, stocf=0.1, interactive=True, plotFile=False): """ Analyze a sound with the harmonic plus stochastic model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks minSineDur: minimum duration of sinusoidal tracks nH: maximum number of harmonics minf0: minimum fundamental frequency in sound maxf0: maximum fundamental frequency in sound f0et: maximum error accepted in f0 detection algorithm harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation stocf: decimation factor used for the stochastic approximation returns inputFile: input file name; fs: sampling rate of input file, hfreq, hmag: harmonic frequencies, magnitude; mYst: stochastic residual """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound (fs, x) = audio.read_wav(inputFile) # compute analysis window w = get_window(window, M) # compute the harmonic plus stochastic model of the whole sound hfreq, hmag, hphase, mYst = hps.from_audio(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur, Ns, stocf) # synthesize the harmonic plus stochastic model without original phases y, yh, yst = hps.to_audio(hfreq, hmag, np.array([]), mYst, Ns, H, fs) # write output sound outputFile = 'output_sounds/' + strip_file(inputFile) + '_hpsModel.wav' audio.write_wav(y, fs, outputFile) # create figure to plot plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 15000.0 # plot the input sound plt.subplot(3, 1, 1) plt.plot(np.arange(x.size) / float(fs), x) plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot spectrogram stochastic compoment plt.subplot(3, 1, 2) numFrames = int(mYst.shape[0]) sizeEnv = int(mYst.shape[1]) frmTime = H * np.arange(numFrames) / float(fs) binFreq = (.5 * fs) * np.arange(sizeEnv * maxplotfreq / (.5 * fs)) / sizeEnv plt.pcolormesh(frmTime, binFreq, np.transpose(mYst[:, :sizeEnv * maxplotfreq / (.5 * fs) + 1])) plt.autoscale(tight=True) # plot harmonic on top of stochastic spectrogram if (hfreq.shape[1] > 0): harms = hfreq * np.less(hfreq, maxplotfreq) harms[harms == 0] = np.nan numFrames = int(harms.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) plt.plot(frmTime, harms, color='k', ms=3, alpha=1) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.autoscale(tight=True) plt.title('harmonics + stochastic spectrogram') # plot the output sound plt.subplot(3, 1, 3) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() if interactive: plt.show(block=False) if plotFile: plt.savefig('output_plots/%s_hps_transformation_analysis.png' % files.strip_file(inputFile)) return inputFile, fs, hfreq, hmag, mYst
import math # matplotlib without any blocking GUI import matplotlib as mpl mpl.use('Agg') import matplotlib.pyplot as plt import numpy as np from smst.utils import audio from smst.models import dft (fs, x) = audio.read_wav('../../../sounds/violin-B3.wav') w = np.hamming(1024) N = 1024 pin = 5000 hM1 = int(math.floor((w.size + 1) / 2)) hM2 = int(math.floor(w.size / 2)) x1 = x[pin - hM1:pin + hM2] mX, pX = dft.from_audio(x1, w, N) plt.figure(1, figsize=(9.5, 5)) plt.subplot(311) plt.plot(np.arange(-hM1, hM2), x1, lw=1.5) plt.axis([-hM1, hM2, min(x1), max(x1)]) plt.ylabel('amplitude') plt.title('x (violin-B3.wav)') plt.subplot(3, 1, 2) plt.plot(np.arange(mX.size), mX, 'r', lw=1.5) plt.axis([0, mX.size, -90, max(mX)]) plt.title('magnitude spectrum: mX = 20*log10(abs(X))')
window = 'blackman' M = 601 N = 1024 t = -100 minSineDur = 0.1 nH = 100 minf0 = 350 maxf0 = 700 f0et = 5 harmDevSlope = 0.01 stocf = 0.1 Ns = 512 H = 128 (fs, x) = audio.read_wav(inputFile) w = get_window(window, M) hfreq, hmag, hphase, mYst = hps.from_audio(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur, Ns, stocf) timeScaling = np.array([0, 0, 2.138, 2.138 - 1.5, 3.146, 3.146]) yhfreq, yhmag, ystocEnv = hps.scale_time(hfreq, hmag, mYst, timeScaling) y, yh, yst = hps.to_audio(yhfreq, yhmag, np.array([]), ystocEnv, Ns, H, fs) audio.write_wav(y, fs, 'hps-transformation.wav') plt.figure(figsize=(12, 9)) maxplotfreq = 14900.0 # plot the input sound
import essentia.standard as ess # matplotlib without any blocking GUI import matplotlib as mpl mpl.use('Agg') import matplotlib.pyplot as plt import numpy as np from scipy.fftpack import fft from scipy.signal import hamming from smst.utils import audio lpc = ess.LPC(order=14) N = 512 (fs, x) = audio.read_wav('../../../sounds/soprano-E4.wav') first = 20000 last = first + N x1 = x[first:last] X = fft(hamming(N) * x1) mX = 20 * np.log10(abs(X[:N / 2])) coeff = lpc(x1) Y = fft(coeff[0], N) mY = 20 * np.log10(abs(Y[:N / 2])) plt.figure(1, figsize=(9, 5)) plt.subplot(2, 1, 1) plt.plot(np.arange(first, last) / float(fs), x[first:last], 'b', lw=1.5) plt.axis([ first / float(fs), last / float(fs), min(x[first:last]),
# matplotlib without any blocking GUI import matplotlib as mpl mpl.use('Agg') import matplotlib.pyplot as plt import numpy as np from smst.utils import audio from smst.models import hps (fs, x) = audio.read_wav('../../../sounds/sax-phrase-short.wav') w = np.blackman(601) N = 1024 t = -100 nH = 100 minf0 = 350 maxf0 = 700 f0et = 5 minSineDur = .1 harmDevSlope = 0.01 Ns = 512 H = Ns / 4 stocf = .2 hfreq, hmag, hphase, mYst = hps.from_audio(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur, Ns, stocf) y, yh, yst = hps.to_audio(hfreq, hmag, hphase, mYst, Ns, H, fs) maxplotfreq = 10000.0 plt.figure(1, figsize=(9, 7))
# matplotlib without any blocking GUI import matplotlib as mpl mpl.use('Agg') import matplotlib.pyplot as plt import numpy as np from scipy.interpolate import interp1d from smst.utils import audio from smst.models import sine (fs, x) = audio.read_wav('../../../sounds/mridangam.wav') x1 = x[:int(1.49 * fs)] w = np.hamming(801) N = 2048 t = -90 minSineDur = .005 maxnSines = 150 freqDevOffset = 20 freqDevSlope = 0.02 Ns = 512 H = Ns / 4 sfreq, smag, sphase = sine.from_audio(x1, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) timeScale = np.array([ .01, .0, .03, .03, .335, .8, .355, .82, .671, 1.0, .691, 1.02, .858, 1.1, .878, 1.12, 1.185, 1.8, 1.205, 1.82, 1.49, 2.0 ]) L = sfreq.shape[0] # number of input frames maxInTime = max(timeScale[::2]) # maximum value used as input times maxOutTime = max(timeScale[1::2]) # maximum value used in output times
def main(inputFile=demo_sound_path('sax-phrase-short.wav'), window='blackman', M=601, N=1024, t=-100, minSineDur=0.1, nH=100, minf0=350, maxf0=700, f0et=5, harmDevSlope=0.01, interactive=True, plotFile=False): """ Perform analysis/synthesis using the harmonic plus residual model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size; N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound (fs, x) = audio.read_wav(inputFile) # compute analysis window w = get_window(window, M) # find harmonics and residual hfreq, hmag, hphase, xr = hpr.from_audio(x, fs, w, N, H, t, minSineDur, nH, minf0, maxf0, f0et, harmDevSlope) # compute spectrogram of residual mXr, pXr = stft.from_audio(xr, w, N, H) # synthesize hpr model y, yh = hpr.to_audio(hfreq, hmag, hphase, xr, Ns, H, fs) # output sound file (monophonic with sampling rate of 44100) baseFileName = files.strip_file(inputFile) outputFileSines, outputFileResidual, outputFile = [ 'output_sounds/%s_hprModel%s.wav' % (baseFileName, i) for i in ('_sines', '_residual', '') ] # write sounds files for harmonics, residual, and the sum audio.write_wav(yh, fs, outputFileSines) audio.write_wav(xr, fs, outputFileResidual) audio.write_wav(y, fs, outputFile) # create figure to plot plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 5000.0 # plot the input sound plt.subplot(3, 1, 1) plt.plot(np.arange(x.size) / float(fs), x) plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot the magnitude spectrogram of residual plt.subplot(3, 1, 2) maxplotbin = int(N * maxplotfreq / fs) numFrames = int(mXr.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) binFreq = np.arange(maxplotbin + 1) * float(fs) / N plt.pcolormesh(frmTime, binFreq, np.transpose(mXr[:, :maxplotbin + 1])) plt.autoscale(tight=True) # plot harmonic frequencies on residual spectrogram if (hfreq.shape[1] > 0): harms = hfreq * np.less(hfreq, maxplotfreq) harms[harms == 0] = np.nan numFrames = int(harms.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) plt.plot(frmTime, harms, color='k', ms=3, alpha=1) plt.xlabel('time(s)') plt.ylabel('frequency(Hz)') plt.autoscale(tight=True) plt.title('harmonics + residual spectrogram') # plot the output sound plt.subplot(3, 1, 3) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() if interactive: plt.show() if plotFile: plt.savefig('output_plots/%s_hpr_model.png' % files.strip_file(inputFile))
def main(inputFile=demo_sound_path('rain.wav'), stocf=0.1, timeScaling=np.array([0, 0, 1, 2]), interactive=True, plotFile=False): """ function to perform a time scaling using the stochastic model inputFile: name of input sound file stocf: decimation factor used for the stochastic approximation timeScaling: time scaling factors, in time-value pairs """ # hop size H = 128 # read input sound (fs, x) = audio.read_wav(inputFile) # perform stochastic analysis mYst = stochastic.from_audio(x, H, H * 2, stocf) # perform time scaling of stochastic representation ystocEnv = stochastic.scale_time(mYst, timeScaling) # synthesize output sound y = stochastic.to_audio(ystocEnv, H, H * 2) # write output sound outputFile = 'output_sounds/' + strip_file(inputFile) + '_stochasticModelTransformation.wav' audio.write_wav(y, fs, outputFile) # create figure to plot plt.figure(figsize=(12, 9)) # plot the input sound plt.subplot(4, 1, 1) plt.plot(np.arange(x.size) / float(fs), x) plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot stochastic representation plt.subplot(4, 1, 2) numFrames = int(mYst.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) binFreq = np.arange(stocf * H) * float(fs) / (stocf * 2 * H) plt.pcolormesh(frmTime, binFreq, np.transpose(mYst)) plt.autoscale(tight=True) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.title('stochastic approximation') # plot modified stochastic representation plt.subplot(4, 1, 3) numFrames = int(ystocEnv.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) binFreq = np.arange(stocf * H) * float(fs) / (stocf * 2 * H) plt.pcolormesh(frmTime, binFreq, np.transpose(ystocEnv)) plt.autoscale(tight=True) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.title('modified stochastic approximation') # plot the output sound plt.subplot(4, 1, 4) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.tight_layout() if interactive: plt.show() if plotFile: plt.savefig('output_plots/%s_stochastic_transformation.png' % files.strip_file(inputFile))
def main(inputFile1=demo_sound_path('ocean.wav'), inputFile2=demo_sound_path('speech-male.wav'), window1='hamming', window2='hamming', M1=1024, M2=1024, N1=1024, N2=1024, H1=256, smoothf=.5, balancef=0.2, interactive=True, plotFile=False): """ Function to perform a morph between two sounds inputFile1: name of input sound file to be used as source inputFile2: name of input sound file to be used as filter window1 and window2: windows for both files M1 and M2: window sizes for both files N1 and N2: fft sizes for both sounds H1: hop size for sound 1 (the one for sound 2 is computed automatically) smoothf: smoothing factor to be applyed to magnitude spectrum of sound 2 before morphing balancef: balance factor between booth sounds, 0 is sound 1 and 1 is sound 2 """ # read input sounds (fs, x1) = audio.read_wav(inputFile1) (fs, x2) = audio.read_wav(inputFile2) # compute analysis windows w1 = get_window(window1, M1) w2 = get_window(window2, M2) # perform morphing y = stft.morph(x1, x2, fs, w1, N1, w2, N2, H1, smoothf, balancef) # compute the magnitude and phase spectrogram of input sound (for plotting) mX1, pX1 = stft.from_audio(x1, w1, N1, H1) # compute the magnitude and phase spectrogram of output sound (for plotting) mY, pY = stft.from_audio(y, w1, N1, H1) # write output sound outputFile = 'output_sounds/' + os.path.basename( inputFile1)[:-4] + '_stftMorph.wav' audio.write_wav(y, fs, outputFile) # create figure to plot plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 10000.0 # plot sound 1 plt.subplot(4, 1, 1) plt.plot(np.arange(x1.size) / float(fs), x1) plt.axis([0, x1.size / float(fs), min(x1), max(x1)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot magnitude spectrogram of sound 1 plt.subplot(4, 1, 2) numFrames = int(mX1.shape[0]) frmTime = H1 * np.arange(numFrames) / float(fs) binFreq = fs * np.arange(N1 * maxplotfreq / fs) / N1 plt.pcolormesh(frmTime, binFreq, np.transpose(mX1[:, :N1 * maxplotfreq / fs + 1])) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.title('magnitude spectrogram of x') plt.autoscale(tight=True) # plot magnitude spectrogram of morphed sound plt.subplot(4, 1, 3) numFrames = int(mY.shape[0]) frmTime = H1 * np.arange(numFrames) / float(fs) binFreq = fs * np.arange(N1 * maxplotfreq / fs) / N1 plt.pcolormesh(frmTime, binFreq, np.transpose(mY[:, :N1 * maxplotfreq / fs + 1])) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.title('magnitude spectrogram of y') plt.autoscale(tight=True) # plot the morphed sound plt.subplot(4, 1, 4) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() if interactive: plt.show() if plotFile: plt.savefig( 'output_plots/%s_%s_stft_morph.png' % (files.strip_file(inputFile1), files.strip_file(inputFile2)))
# matplotlib without any blocking GUI import matplotlib as mpl mpl.use('Agg') import matplotlib.pyplot as plt import numpy as np from smst.utils import audio from smst.models import dft (fs, x) = audio.read_wav('../../../sounds/oboe-A4.wav') w = np.hamming(401) N = 1024 pin = 5000 x1 = x[pin:pin + w.size] mX, pX = dft.from_audio(x1, w, N) plt.figure(1, figsize=(9.5, 7)) plt.subplot(311) plt.plot(np.arange(pin, pin + w.size) / float(fs), x1, 'b', lw=1.5) plt.axis([pin / float(fs), (pin + w.size) / float(fs), min(x1), max(x1)]) plt.title('x (oboe-A4.wav), M=401') plt.subplot(3, 1, 2) plt.plot(fs * np.arange(mX.size) / float(N), mX, 'r', lw=1.5) plt.axis([0, 8000, -80, max(mX)]) plt.title('mX; Hamming window, N=1024') plt.subplot(3, 1, 3) plt.plot(fs * np.arange(mX.size) / float(N), pX, 'c', lw=1.5) plt.axis([0, 8000, -12, 15])
def analysis(inputFile1=demo_sound_path('violin-B3.wav'), window1='blackman', M1=1001, N1=1024, t1=-100, minSineDur1=0.05, nH=60, minf01=200, maxf01=300, f0et1=10, harmDevSlope1=0.01, stocf=0.1, inputFile2=demo_sound_path('soprano-E4.wav'), window2='blackman', M2=901, N2=1024, t2=-100, minSineDur2=0.05, minf02=250, maxf02=500, f0et2=10, harmDevSlope2=0.01, interactive=True, plotFile=False): """ Analyze two sounds with the harmonic plus stochastic model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks minSineDur: minimum duration of sinusoidal tracks nH: maximum number of harmonics minf0: minimum fundamental frequency in sound maxf0: maximum fundamental frequency in sound f0et: maximum error accepted in f0 detection algorithm harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation stocf: decimation factor used for the stochastic approximation returns inputFile: input file name; fs: sampling rate of input file, hfreq, hmag: harmonic frequencies, magnitude; stocEnv: stochastic residual """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sounds (fs1, x1) = audio.read_wav(inputFile1) (fs2, x2) = audio.read_wav(inputFile2) # compute analysis windows w1 = get_window(window1, M1) w2 = get_window(window2, M2) # compute the harmonic plus stochastic models hfreq1, hmag1, hphase1, stocEnv1 = hps.from_audio(x1, fs1, w1, N1, H, t1, nH, minf01, maxf01, f0et1, harmDevSlope1, minSineDur1, Ns, stocf) hfreq2, hmag2, hphase2, stocEnv2 = hps.from_audio(x2, fs2, w2, N2, H, t2, nH, minf02, maxf02, f0et2, harmDevSlope2, minSineDur2, Ns, stocf) # create figure to plot plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 15000.0 # plot spectrogram stochastic component of sound 1 plt.subplot(2, 1, 1) numFrames = int(stocEnv1.shape[0]) sizeEnv = int(stocEnv1.shape[1]) frmTime = H * np.arange(numFrames) / float(fs1) binFreq = (.5 * fs1) * np.arange(sizeEnv * maxplotfreq / (.5 * fs1)) / sizeEnv plt.pcolormesh( frmTime, binFreq, np.transpose(stocEnv1[:, :sizeEnv * maxplotfreq / (.5 * fs1) + 1])) plt.autoscale(tight=True) # plot harmonic on top of stochastic spectrogram of sound 1 if (hfreq1.shape[1] > 0): harms = np.copy(hfreq1) harms = harms * np.less(harms, maxplotfreq) harms[harms == 0] = np.nan numFrames = int(harms.shape[0]) frmTime = H * np.arange(numFrames) / float(fs1) plt.plot(frmTime, harms, color='k', ms=3, alpha=1) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.autoscale(tight=True) plt.title('harmonics + stochastic spectrogram of sound 1') # plot spectrogram stochastic component of sound 2 plt.subplot(2, 1, 2) numFrames = int(stocEnv2.shape[0]) sizeEnv = int(stocEnv2.shape[1]) frmTime = H * np.arange(numFrames) / float(fs2) binFreq = (.5 * fs2) * np.arange(sizeEnv * maxplotfreq / (.5 * fs2)) / sizeEnv plt.pcolormesh( frmTime, binFreq, np.transpose(stocEnv2[:, :sizeEnv * maxplotfreq / (.5 * fs2) + 1])) plt.autoscale(tight=True) # plot harmonic on top of stochastic spectrogram of sound 2 if (hfreq2.shape[1] > 0): harms = np.copy(hfreq2) harms = harms * np.less(harms, maxplotfreq) harms[harms == 0] = np.nan numFrames = int(harms.shape[0]) frmTime = H * np.arange(numFrames) / float(fs2) plt.plot(frmTime, harms, color='k', ms=3, alpha=1) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.autoscale(tight=True) plt.title('harmonics + stochastic spectrogram of sound 2') plt.tight_layout() if interactive: plt.show(block=False) if plotFile: plt.savefig( 'output_plots/%s_%s_hps_morph_analysis.png' % (files.strip_file(inputFile1), files.strip_file(inputFile2))) return inputFile1, fs1, hfreq1, hmag1, stocEnv1, inputFile2, hfreq2, hmag2, stocEnv2
def analysis(inputFile=demo_sound_path('mridangam.wav'), window='hamming', M=801, N=2048, t=-90, minSineDur=0.01, maxnSines=150, freqDevOffset=20, freqDevSlope=0.02, interactive=True, plotFile=False): """ Analyze a sound with the sine model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size; N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks maxnSines: maximum number of parallel sinusoids freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0 freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation returns inputFile: input file name; fs: sampling rate of input file, tfreq, tmag: sinusoidal frequencies and magnitudes """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound (fs, x) = audio.read_wav(inputFile) # compute analysis window w = get_window(window, M) # compute the sine model of the whole sound tfreq, tmag, tphase = sine.from_audio(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) # synthesize the sines without original phases y = sine.to_audio(tfreq, tmag, np.array([]), Ns, H, fs) # output sound file (monophonic with sampling rate of 44100) outputFile = 'output_sounds/' + strip_file(inputFile) + '_sineModel.wav' # write the sound resulting from the inverse stft audio.write_wav(y, fs, outputFile) # create figure to show plots plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 5000.0 # plot the input sound plt.subplot(3, 1, 1) plt.plot(np.arange(x.size) / float(fs), x) plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot the sinusoidal frequencies if (tfreq.shape[1] > 0): plt.subplot(3, 1, 2) tracks = np.copy(tfreq) tracks = tracks * np.less(tracks, maxplotfreq) tracks[tracks <= 0] = np.nan numFrames = int(tracks.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) plt.plot(frmTime, tracks) plt.axis([0, x.size / float(fs), 0, maxplotfreq]) plt.title('frequencies of sinusoidal tracks') # plot the output sound plt.subplot(3, 1, 3) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() if interactive: plt.show(block=False) if plotFile: plt.savefig('output_plots/%s_sine_transformation_analysis.png' % files.strip_file(inputFile)) return inputFile, fs, tfreq, tmag
# matplotlib without any blocking GUI import matplotlib as mpl mpl.use('Agg') import matplotlib.pyplot as plt import numpy as np from smst.utils import audio from smst.models import sine, stft (fs, x) = audio.read_wav('../../../sounds/bendir.wav') w = np.hamming(2001) N = 2048 H = 200 t = -80 minSineDur = .02 maxnSines = 150 freqDevOffset = 10 freqDevSlope = 0.001 mX, pX = stft.from_audio(x, w, N, H) tfreq, tmag, tphase = sine.from_audio(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) plt.figure(1, figsize=(9.5, 7)) maxplotfreq = 800.0 maxplotbin = int(N * maxplotfreq / fs) numFrames = int(mX.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) binFreq = np.arange(maxplotbin + 1) * float(fs) / N plt.pcolormesh(frmTime, binFreq, np.transpose(np.diff(pX[:, :maxplotbin + 1], axis=1)))