示例#1
0
def test_reconstruct_sound():
    fs, x = audio.read_wav(sound_path("sax-phrase-short.wav"))

    window_size, fft_size, hop_size = 4001, 4096, 2048
    window = get_window('hamming', window_size)

    xtfreq, xtmag, xtphase = harmonic.from_audio(
        x, fs, window, fft_size, hop_size,
        t=-80, nH=20, minf0=100, maxf0=2000, f0et=5, harmDevSlope=0.01, minSineDur=.02)
    x_reconstructed = sine.to_audio(xtfreq, xtmag, xtphase, fft_size, hop_size, fs)

    assert 138746 == len(x)

    expected_frame_count = int(math.ceil(float(len(x)) / hop_size))
    assert expected_frame_count == len(xtfreq)
    assert expected_frame_count == len(xtmag)
    assert expected_frame_count == len(xtphase)

    assert xtfreq.shape[1] <= 100

    # statistics of the model for regression testing without explicitly storing the whole data
    assert np.allclose(1738.618043903208, xtfreq.mean())
    assert np.allclose(-64.939768348945279, xtmag.mean())
    assert np.allclose(1.6687005886001871, xtphase.mean())

    # TODO: this is completely off, it should be equal to len(x)!
    assert 69 * 2048 == len(x_reconstructed)

    assert np.allclose(0.036941947007791701, rmse(x, x_reconstructed[:len(x)]))
示例#2
0
def test_reconstruct_sound():
    fs, x = audio.read_wav(sound_path("sax-phrase-short.wav"))

    window_size, fft_size, hop_size = 4001, 4096, 2048
    window = get_window('hamming', window_size)

    mag_spectrogram, phase_spectrogram = stft.from_audio(
        x, window, fft_size, hop_size)
    x_reconstructed = stft.to_audio(mag_spectrogram, phase_spectrogram,
                                    window_size, hop_size)

    assert 138746 == len(x)

    expected_frame_count = int(math.ceil(float(len(x)) / hop_size))
    assert expected_frame_count == len(mag_spectrogram)
    assert expected_frame_count == len(phase_spectrogram)

    # statistics of the spectrogram for regression testing without explicitly storing the whole data
    assert np.allclose(-102.86187076588583, np.mean(mag_spectrogram))
    assert np.allclose(11.368333745102881, np.mean(phase_spectrogram))

    # TODO: should be the same as len(x)
    assert expected_frame_count * hop_size == len(x_reconstructed)

    assert np.allclose(0.0014030089623073237, rmse(x,
                                                   x_reconstructed[:len(x)]))
示例#3
0
def test_reconstruct_sound():
    fs, x = audio.read_wav(sound_path("sax-phrase-short.wav"))

    window_size, fft_size, hop_size = 4001, 4096, 2048
    window = get_window('hamming', window_size)

    xtfreq, xtmag, xtphase = sine.from_audio(
        x, fs, window, fft_size, hop_size,
        t=-80, maxnSines=100, minSineDur=.01, freqDevOffset=20, freqDevSlope=0.01)
    x_reconstructed = sine.to_audio(xtfreq, xtmag, xtphase, fft_size, hop_size, fs)

    assert 138746 == len(x)

    expected_frame_count = int(math.ceil(float(len(x)) / hop_size))
    assert expected_frame_count == len(xtfreq)
    assert expected_frame_count == len(xtmag)
    assert expected_frame_count == len(xtphase)

    assert xtfreq.shape[1] <= 100

    # statistics of the model for regression testing without explicitly storing the whole data
    assert np.allclose(945.892990545, xtfreq.mean())
    assert np.allclose(-30.3138495002, xtmag.mean())
    assert np.allclose(1.34449391701, xtphase.mean())

    # TODO: this is completely off, it should be equal to len(x)!
    assert 69 * 2048 == len(x_reconstructed)

    assert np.allclose(0.010812475879315771, rmse(x, x_reconstructed[:len(x)]))
示例#4
0
def main(inputFile=demo_sound_path('ocean.wav'), H=256, N=512, stocf=.1,
         interactive=True, plotFile=False):
    """
    inputFile: input sound file (monophonic with sampling rate of 44100)
    H: hop size, N: fft size
    stocf: decimation factor used for the stochastic approximation (bigger than 0, maximum 1)
    """

    # read input sound
    (fs, x) = audio.read_wav(inputFile)

    # compute stochastic model
    stocEnv = stochastic.from_audio(x, H, N, stocf)

    # synthesize sound from stochastic model
    y = stochastic.to_audio(stocEnv, H, N)

    outputFile = 'output_sounds/' + strip_file(inputFile) + '_stochasticModel.wav'

    # write output sound
    audio.write_wav(y, fs, outputFile)

    # create figure to plot
    plt.figure(figsize=(12, 9))

    # plot the input sound
    plt.subplot(3, 1, 1)
    plt.plot(np.arange(x.size) / float(fs), x)
    plt.axis([0, x.size / float(fs), min(x), max(x)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    # plot stochastic representation
    plt.subplot(3, 1, 2)
    numFrames = int(stocEnv.shape[0])
    frmTime = H * np.arange(numFrames) / float(fs)
    binFreq = np.arange(stocf * (N / 2 + 1)) * float(fs) / (stocf * N)
    plt.pcolormesh(frmTime, binFreq, np.transpose(stocEnv))
    plt.autoscale(tight=True)
    plt.xlabel('time (sec)')
    plt.ylabel('frequency (Hz)')
    plt.title('stochastic approximation')

    # plot the output sound
    plt.subplot(3, 1, 3)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')

    plt.tight_layout()

    if interactive:
        plt.show()
    if plotFile:
        plt.savefig('output_plots/%s_stochastic_model.png' % files.strip_file(inputFile))
示例#5
0
def test_reconstruct_sound():
    fs, x = audio.read_wav(sound_path("sax-phrase-short.wav"))

    window_size, fft_size, hop_size = 2001, 2048, 128
    window = get_window('hamming', window_size)

    # fix the random seed for reproducibility
    np.random.seed(42)

    xtfreq, xtmag, xtphase, stocEnv = hps.from_audio(x,
                                                     fs,
                                                     window,
                                                     fft_size,
                                                     hop_size,
                                                     t=-80,
                                                     minSineDur=.02,
                                                     nH=20,
                                                     minf0=100,
                                                     maxf0=2000,
                                                     f0et=5,
                                                     harmDevSlope=0.01,
                                                     Ns=512,
                                                     stocf=0.5)
    x_reconstructed, x_sine, x_stochastic = hps.to_audio(
        xtfreq, xtmag, xtphase, stocEnv, 512, hop_size, fs)

    assert 138746 == len(x)

    expected_frame_count = int(math.ceil(float(len(x)) / hop_size))
    assert expected_frame_count == len(xtfreq)
    assert expected_frame_count == len(xtmag)
    assert expected_frame_count == len(xtphase)

    assert xtfreq.shape[1] <= 100

    # statistics of the model for regression testing without explicitly storing the whole data
    assert np.allclose(1731.8324721982437, xtfreq.mean())
    assert np.allclose(-69.877742948220671, xtmag.mean())
    assert np.allclose(1.8019294703328628, xtphase.mean())

    # TODO: this is completely off, it should be equal to len(x)!
    assert 1083 * 128 == len(x_reconstructed)
    assert 1083 * 128 == len(x_sine)
    # TODO: this is insane
    assert 1085 * 128 == len(x_stochastic)

    assert np.allclose(0.038065851967889502,
                       rmse(x[:len(x_reconstructed)], x_reconstructed))
    assert np.allclose(0.025543282494159769,
                       rmse(x[:len(x_reconstructed)], x_sine))
    assert np.allclose(
        0.097999320671614418,
        rmse(x[:len(x_reconstructed)], x_stochastic[:len(x_reconstructed)]))
    assert np.allclose(
        0.0, rmse(x_sine + x_stochastic[:len(x_reconstructed)],
                  x_reconstructed))
示例#6
0
def test_reconstruct_sound():
    fs, x = audio.read_wav(sound_path("sax-phrase-short.wav"))

    window_size, fft_size, hop_size = 2001, 2048, 128
    window = get_window('hamming', window_size)

    # fix the random seed for reproducibility
    np.random.seed(42)

    xtfreq, xtmag, xtphase, stocEnv = sps.from_audio(x,
                                                     fs,
                                                     window,
                                                     fft_size,
                                                     hop_size,
                                                     t=-80,
                                                     maxnSines=100,
                                                     minSineDur=.01,
                                                     freqDevOffset=20,
                                                     freqDevSlope=0.01,
                                                     stocf=0.5)
    x_reconstructed, x_sine, x_stochastic = sps.to_audio(
        xtfreq, xtmag, xtphase, stocEnv, 512, hop_size, fs)

    assert 138746 == len(x)

    expected_frame_count = int(math.ceil(float(len(x)) / hop_size))
    assert expected_frame_count == len(xtfreq)
    assert expected_frame_count == len(xtmag)
    assert expected_frame_count == len(xtphase)

    assert xtfreq.shape[1] <= 100

    # statistics of the model for regression testing without explicitly storing the whole data
    assert np.allclose(799.3384358567838, xtfreq.mean())
    assert np.allclose(-24.080251067421795, xtmag.mean())
    assert np.allclose(1.0900513921895467, xtphase.mean())

    # TODO: this is completely off, it should be equal to len(x)!
    assert 1083 * 128 == len(x_reconstructed)
    assert 1083 * 128 == len(x_sine)
    # TODO: this is insane
    assert 1085 * 128 == len(x_stochastic)

    assert np.allclose(0.0061891379818097133,
                       rmse(x[:len(x_reconstructed)], x_reconstructed))
    assert np.allclose(0.0043912712540510645,
                       rmse(x[:len(x_reconstructed)], x_sine))
    assert np.allclose(
        0.093780097561056638,
        rmse(x[:len(x_reconstructed)], x_stochastic[:len(x_reconstructed)]))
    assert np.allclose(
        0.0, rmse(x_sine + x_stochastic[:len(x_reconstructed)],
                  x_reconstructed))
示例#7
0
    pX = np.angle(X[:hN])
    # -----synthesis-----
    mY = resample(mXenv, hN)  # interpolate to original size
    pY = 2 * np.pi * np.random.rand(hN)  # generate phase random values
    Y = np.zeros(N, dtype=complex)
    Y[:hN] = 10**(mY / 20) * np.exp(1j * pY)  # generate positive freq.
    Y[hN:] = 10**(mY[-2:0:-1] / 20) * np.exp(
        -1j * pY[-2:0:-1])  # generate negative freq.
    fftbuffer = np.real(ifft(Y))  # inverse FFT
    y = fftbuffer * N / 2
    return mX, pX, mY, pY, y


# example call of stochasticModel function
if __name__ == '__main__':
    (fs, x) = audio.read_wav('../../../sounds/ocean.wav')
    w = np.hanning(1024)
    N = 1024
    stocf = 0.2
    maxFreq = 10000.0
    lastbin = N * maxFreq / fs
    first = 1000
    last = first + w.size
    mX, pX, mY, pY, y = stochasticModelFrame(x[first:last], w, N, stocf)

    plt.figure(1, figsize=(9, 7))
    plt.subplot(4, 1, 1)
    plt.plot(np.arange(first, last) / float(fs), x[first:last])
    plt.axis([
        first / float(fs), last / float(fs),
        min(x[first:last]),
示例#8
0
import math
# matplotlib without any blocking GUI
import matplotlib as mpl

mpl.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
from scipy.signal import resample

from smst.utils import audio
from smst.models import dft

(fs, x1) = audio.read_wav('../../../sounds/orchestra.wav')
(fs, x2) = audio.read_wav('../../../sounds/speech-male.wav')
w1 = np.hamming(1024)
N1 = 1024
H1 = 256
w2 = np.hamming(1024)
N2 = 1024
smoothf = .1
balancef = .7

M1 = w1.size  # size of analysis window
hM1_1 = int(math.floor((M1 + 1) / 2))  # half analysis window size by rounding
hM1_2 = int(math.floor(M1 / 2))  # half analysis window size by floor
M2 = w2.size  # size of analysis window
hM2_1 = int(math.floor((M2 + 1) / 2))  # half analysis window size by rounding
hM2_2 = int(math.floor(M2 / 2))  # half analysis window size by floor2
loc1 = 14843
loc2 = 9294
# matplotlib without any blocking GUI
import matplotlib as mpl

mpl.use('Agg')
import matplotlib.pyplot as plt
import numpy as np

from smst.utils import audio
from smst.models import sine, stft

(fs, x) = audio.read_wav('../../../sounds/flute-A4.wav')
w = np.blackman(601)
N = 1024
H = 150
t = -80
minSineDur = .1
maxnSines = 150
mX, pX = stft.from_audio(x, w, N, H)
tfreq, tmag, tphase = sine.from_audio(x, fs, w, N, H, t, maxnSines, minSineDur)

plt.figure(1, figsize=(9.5, 5))
maxplotfreq = 5000.0
maxplotbin = int(N * maxplotfreq / fs)
numFrames = int(mX.shape[0])
frmTime = H * np.arange(numFrames) / float(fs)
binFreq = np.arange(maxplotbin + 1) * float(fs) / N
plt.pcolormesh(frmTime, binFreq, np.transpose(mX[:, :maxplotbin + 1]))
plt.autoscale(tight=True)

tracks = tfreq * np.less(tfreq, maxplotfreq)
tracks[tracks <= 0] = np.nan
示例#10
0
def main(inputFile=demo_sound_path('bendir.wav'), window='hamming', M=2001, N=2048, t=-80, minSineDur=0.02,
         maxnSines=150, freqDevOffset=10, freqDevSlope=0.001,
         interactive=True, plotFile=False):
    """
    Perform analysis/synthesis using the sinusoidal model
    inputFile: input sound file (monophonic with sampling rate of 44100)
    window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)
    M: analysis window size; N: fft size (power of two, bigger or equal than M)
    t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks
    maxnSines: maximum number of parallel sinusoids
    freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0
    freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation
    """

    # size of fft used in synthesis
    Ns = 512

    # hop size (has to be 1/4 of Ns)
    H = 128

    # read input sound
    fs, x = audio.read_wav(inputFile)

    # compute analysis window
    w = get_window(window, M)

    # analyze the sound with the sinusoidal model
    tfreq, tmag, tphase = sine.from_audio(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope)

    # synthesize the output sound from the sinusoidal representation
    y = sine.to_audio(tfreq, tmag, tphase, Ns, H, fs)

    # output sound file name
    outputFile = 'output_sounds/' + strip_file(inputFile) + '_sineModel.wav'

    # write the synthesized sound obtained from the sinusoidal synthesis
    audio.write_wav(y, fs, outputFile)

    # create figure to show plots
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 5000.0

    # plot the input sound
    plt.subplot(3, 1, 1)
    plt.plot(np.arange(x.size) / float(fs), x)
    plt.axis([0, x.size / float(fs), min(x), max(x)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    # plot the sinusoidal frequencies
    plt.subplot(3, 1, 2)
    if (tfreq.shape[1] > 0):
        numFrames = tfreq.shape[0]
        frmTime = H * np.arange(numFrames) / float(fs)
        tfreq[tfreq <= 0] = np.nan
        plt.plot(frmTime, tfreq)
        plt.axis([0, x.size / float(fs), 0, maxplotfreq])
        plt.title('frequencies of sinusoidal tracks')

    # plot the output sound
    plt.subplot(3, 1, 3)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()

    if interactive:
        plt.show()
    if plotFile:
        plt.savefig('output_plots/%s_sine_model.png' % files.strip_file(inputFile))
# matplotlib without any blocking GUI
import matplotlib as mpl

mpl.use('Agg')
import matplotlib.pyplot as plt
import numpy as np

from smst.utils import audio
from smst.models import sine, stft

plt.figure(1, figsize=(9, 7))

plt.subplot(211)
(fs, x) = audio.read_wav('../../../sounds/vibraphone-C6.wav')
w = np.blackman(401)
N = 512
H = 100
t = -100
minSineDur = .02
maxnSines = 150
freqDevOffset = 20
freqDevSlope = 0.01
mX, pX = stft.from_audio(x, w, N, H)
tfreq, tmag, tphase = sine.from_audio(x, fs, w, N, H, t, maxnSines, minSineDur,
                                      freqDevOffset, freqDevSlope)

maxplotfreq = 10000.0
maxplotbin = int(N * maxplotfreq / fs)
numFrames = int(mX.shape[0])
frmTime = H * np.arange(numFrames) / float(fs)
binFreq = np.arange(maxplotbin + 1) * float(fs) / N
import essentia.standard as ess
# matplotlib without any blocking GUI
import matplotlib as mpl

mpl.use('Agg')
import matplotlib.pyplot as plt
import numpy as np

from smst.utils import audio

(fs, x) = audio.read_wav('../../../sounds/piano.wav')
start = 13860
M = 800
xp = x[start:start + M] / float(max(x[start:start + M]))
r = ess.AutoCorrelation(normalization='standard')(xp)
r = r / max(r)
peaks = ess.PeakDetection(threshold=.11, interpolate=False, minPosition=.01)(r)

plt.figure(1, figsize=(9, 7))
plt.subplot(211)
plt.plot(np.arange(M) / float(fs), xp, lw=1.5)
plt.axis([0, (M - 1) / float(fs), min(xp), max(xp)])
plt.xlabel('time (sec)')
plt.ylabel('amplitude')
plt.title('x (piano.wav)')

plt.subplot(212)
plt.plot(np.arange(M) / float(fs), r, 'r', lw=1.5)
plt.plot(peaks[0] * (M - 1) / float(fs),
         peaks[1],
         'x',
示例#13
0
inputFile2 = '../../../sounds/soprano-E4.wav'
window2 = 'blackman'
M2 = 901
N2 = 1024
t2 = -100
minSineDur2 = 0.05
minf02 = 250
maxf02 = 500
f0et2 = 10
harmDevSlope2 = 0.01

Ns = 512
H = 128

(fs1, x1) = audio.read_wav(inputFile1)
(fs2, x2) = audio.read_wav(inputFile2)
w1 = get_window(window1, M1)
w2 = get_window(window2, M2)
hfreq1, hmag1, hphase1, stocEnv1 = hps.from_audio(x1, fs1, w1, N1, H, t1, nH,
                                                  minf01, maxf01, f0et1,
                                                  harmDevSlope1, minSineDur1,
                                                  Ns, stocf)
hfreq2, hmag2, hphase2, stocEnv2 = hps.from_audio(x2, fs2, w2, N2, H, t2, nH,
                                                  minf02, maxf02, f0et2,
                                                  harmDevSlope2, minSineDur2,
                                                  Ns, stocf)

hfreqIntp = np.array([0, .5, 1, .5])
hmagIntp = np.array([0, .5, 1, .5])
stocIntp = np.array([0, .5, 1, .5])
示例#14
0
def main(inputFile=demo_sound_path('piano.wav'),
         window='blackman',
         M=511,
         N=1024,
         time=.2,
         interactive=True,
         plotFile=False):
    """
    inputFile: input sound file (monophonic with sampling rate of 44100)
    window: analysis window type (choice of rectangular, hanning, hamming, blackman, blackmanharris)
    M: analysis window size (odd integer value)
    N: fft size (power of two, bigger or equal than than M)
    time: time  to start analysis (in seconds)
    """

    # read input sound (monophonic with sampling rate of 44100)
    fs, x = audio.read_wav(inputFile)

    # compute analysis window
    w = get_window(window, M)

    # get a fragment of the input sound of size M
    sample = int(time * fs)
    if (sample + M >= x.size
            or sample < 0):  # raise error if time outside of sound
        raise ValueError("Time outside sound boundaries")
    x_frame = x[sample:sample + M]

    # compute the dft of the sound fragment
    mX, pX = dft.from_audio(x_frame, w, N)

    # compute the inverse dft of the spectrum
    y = dft.to_audio(mX, pX, w.size) * sum(w)

    # create figure
    plt.figure(figsize=(12, 9))

    # plot the sound fragment
    plt.subplot(4, 1, 1)
    plt.plot(time + np.arange(M) / float(fs), x_frame)
    plt.axis([time, time + M / float(fs), min(x_frame), max(x_frame)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    # plot the magnitude spectrum
    plt.subplot(4, 1, 2)
    plt.plot(float(fs) * np.arange(mX.size) / float(N), mX, 'r')
    plt.axis([0, fs / 2.0, min(mX), max(mX)])
    plt.title('magnitude spectrum: mX')
    plt.ylabel('amplitude (dB)')
    plt.xlabel('frequency (Hz)')

    # plot the phase spectrum
    plt.subplot(4, 1, 3)
    plt.plot(float(fs) * np.arange(pX.size) / float(N), pX, 'c')
    plt.axis([0, fs / 2.0, min(pX), max(pX)])
    plt.title('phase spectrum: pX')
    plt.ylabel('phase (radians)')
    plt.xlabel('frequency (Hz)')

    # plot the sound resulting from the inverse dft
    plt.subplot(4, 1, 4)
    plt.plot(time + np.arange(M) / float(fs), y)
    plt.axis([time, time + M / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()

    if interactive:
        plt.show()
    if plotFile:
        plt.savefig('output_plots/%s_dft_model.png' %
                    files.strip_file(inputFile))
示例#15
0
# matplotlib without any blocking GUI
import matplotlib as mpl

mpl.use('Agg')
import matplotlib.pyplot as plt
import numpy as np

from smst.utils import audio, peaks
from smst.models import dft

(fs, x) = audio.read_wav('../../../sounds/sine-440-490.wav')
w = np.hamming(3529)
N = 32768
hN = N / 2
t = -20
pin = 4850
x1 = x[pin:pin + w.size]
mX1, pX1 = dft.from_audio(x1, w, N)
ploc = peaks.find_peaks(mX1, t)
pmag = mX1[ploc]
iploc, ipmag, ipphase = peaks.interpolate_peaks(mX1, pX1, ploc)

plt.figure(1, figsize=(9, 6))
plt.subplot(311)
plt.plot(fs * np.arange(pX1.size) / float(N), pX1, 'c', lw=1.5)
plt.plot(fs * iploc / N,
         ipphase,
         marker='x',
         color='b',
         alpha=1,
         linestyle='',
# matplotlib without any blocking GUI
import matplotlib as mpl

mpl.use('Agg')
import matplotlib.pyplot as plt
import numpy as np

from smst.utils import audio
from smst.models import harmonic, sine

(fs, x) = audio.read_wav('../../../sounds/vignesh.wav')
w = np.blackman(1201)
N = 2048
t = -90
nH = 100
minf0 = 130
maxf0 = 300
f0et = 7
Ns = 512
H = Ns / 4
minSineDur = .1
harmDevSlope = 0.01
hfreq, hmag, hphase = harmonic.from_audio(x, fs, w, N, H, t, nH, minf0, maxf0,
                                          f0et, harmDevSlope, minSineDur)
y = sine.to_audio(hfreq, hmag, hphase, Ns, H, fs)

numFrames = int(hfreq.shape[0])
frmTime = H * np.arange(numFrames) / float(fs)

plt.figure(1, figsize=(9, 7))
示例#17
0
# matplotlib without any blocking GUI
import matplotlib as mpl

mpl.use('Agg')
import matplotlib.pyplot as plt
import numpy as np

from smst.utils import audio
from smst.models import dft

(fs, x) = audio.read_wav('../../../sounds/sine-440.wav')
M = 400
x1 = x[2000:2000 + M]
N = 2048
hM = int(M / 2.0)
w = np.hamming(M)
mX, pX = dft.from_audio(x1, w, N)
freqaxis = fs * np.arange(0, mX.size) / float(N)
taxis = np.arange(N) / float(fs)

plt.figure(1, figsize=(9.5, 7))

plt.subplot(3, 1, 1)
plt.plot(np.arange(M) / float(fs), x1, 'b', lw=1.5)
plt.axis([0, (M - 1) / float(fs), min(x1) - .1, max(x1) + .1])
plt.title('x (sine-440.wav)')

plt.subplot(3, 1, 2)
plt.plot(freqaxis, mX, 'r', lw=1.5)
plt.axis([0, fs / 10, -80, max(mX) + 1])
plt.title('mX')
示例#18
0
def analysis(inputFile=demo_sound_path('sax-phrase-short.wav'), window='blackman', M=601, N=1024, t=-100,
             minSineDur=0.1, nH=100, minf0=350, maxf0=700, f0et=5, harmDevSlope=0.01, stocf=0.1,
             interactive=True, plotFile=False):
    """
    Analyze a sound with the harmonic plus stochastic model
    inputFile: input sound file (monophonic with sampling rate of 44100)
    window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)
    M: analysis window size
    N: fft size (power of two, bigger or equal than M)
    t: magnitude threshold of spectral peaks
    minSineDur: minimum duration of sinusoidal tracks
    nH: maximum number of harmonics
    minf0: minimum fundamental frequency in sound
    maxf0: maximum fundamental frequency in sound
    f0et: maximum error accepted in f0 detection algorithm
    harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation
    stocf: decimation factor used for the stochastic approximation
    returns inputFile: input file name; fs: sampling rate of input file,
            hfreq, hmag: harmonic frequencies, magnitude; mYst: stochastic residual
    """

    # size of fft used in synthesis
    Ns = 512

    # hop size (has to be 1/4 of Ns)
    H = 128

    # read input sound
    (fs, x) = audio.read_wav(inputFile)

    # compute analysis window
    w = get_window(window, M)

    # compute the harmonic plus stochastic model of the whole sound
    hfreq, hmag, hphase, mYst = hps.from_audio(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur, Ns,
                                              stocf)

    # synthesize the harmonic plus stochastic model without original phases
    y, yh, yst = hps.to_audio(hfreq, hmag, np.array([]), mYst, Ns, H, fs)

    # write output sound
    outputFile = 'output_sounds/' + strip_file(inputFile) + '_hpsModel.wav'
    audio.write_wav(y, fs, outputFile)

    # create figure to plot
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 15000.0

    # plot the input sound
    plt.subplot(3, 1, 1)
    plt.plot(np.arange(x.size) / float(fs), x)
    plt.axis([0, x.size / float(fs), min(x), max(x)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    # plot spectrogram stochastic compoment
    plt.subplot(3, 1, 2)
    numFrames = int(mYst.shape[0])
    sizeEnv = int(mYst.shape[1])
    frmTime = H * np.arange(numFrames) / float(fs)
    binFreq = (.5 * fs) * np.arange(sizeEnv * maxplotfreq / (.5 * fs)) / sizeEnv
    plt.pcolormesh(frmTime, binFreq, np.transpose(mYst[:, :sizeEnv * maxplotfreq / (.5 * fs) + 1]))
    plt.autoscale(tight=True)

    # plot harmonic on top of stochastic spectrogram
    if (hfreq.shape[1] > 0):
        harms = hfreq * np.less(hfreq, maxplotfreq)
        harms[harms == 0] = np.nan
        numFrames = int(harms.shape[0])
        frmTime = H * np.arange(numFrames) / float(fs)
        plt.plot(frmTime, harms, color='k', ms=3, alpha=1)
        plt.xlabel('time (sec)')
        plt.ylabel('frequency (Hz)')
        plt.autoscale(tight=True)
        plt.title('harmonics + stochastic spectrogram')

    # plot the output sound
    plt.subplot(3, 1, 3)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()

    if interactive:
        plt.show(block=False)
    if plotFile:
        plt.savefig('output_plots/%s_hps_transformation_analysis.png' % files.strip_file(inputFile))

    return inputFile, fs, hfreq, hmag, mYst
示例#19
0
import math
# matplotlib without any blocking GUI
import matplotlib as mpl

mpl.use('Agg')
import matplotlib.pyplot as plt
import numpy as np

from smst.utils import audio
from smst.models import dft

(fs, x) = audio.read_wav('../../../sounds/violin-B3.wav')
w = np.hamming(1024)
N = 1024
pin = 5000
hM1 = int(math.floor((w.size + 1) / 2))
hM2 = int(math.floor(w.size / 2))
x1 = x[pin - hM1:pin + hM2]
mX, pX = dft.from_audio(x1, w, N)

plt.figure(1, figsize=(9.5, 5))
plt.subplot(311)
plt.plot(np.arange(-hM1, hM2), x1, lw=1.5)
plt.axis([-hM1, hM2, min(x1), max(x1)])
plt.ylabel('amplitude')
plt.title('x (violin-B3.wav)')

plt.subplot(3, 1, 2)
plt.plot(np.arange(mX.size), mX, 'r', lw=1.5)
plt.axis([0, mX.size, -90, max(mX)])
plt.title('magnitude spectrum: mX = 20*log10(abs(X))')
示例#20
0
window = 'blackman'
M = 601
N = 1024
t = -100
minSineDur = 0.1
nH = 100
minf0 = 350
maxf0 = 700
f0et = 5
harmDevSlope = 0.01
stocf = 0.1

Ns = 512
H = 128

(fs, x) = audio.read_wav(inputFile)
w = get_window(window, M)
hfreq, hmag, hphase, mYst = hps.from_audio(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur, Ns,
                                          stocf)
timeScaling = np.array([0, 0, 2.138, 2.138 - 1.5, 3.146, 3.146])
yhfreq, yhmag, ystocEnv = hps.scale_time(hfreq, hmag, mYst, timeScaling)

y, yh, yst = hps.to_audio(yhfreq, yhmag, np.array([]), ystocEnv, Ns, H, fs)

audio.write_wav(y, fs, 'hps-transformation.wav')

plt.figure(figsize=(12, 9))

maxplotfreq = 14900.0

# plot the input sound
示例#21
0
import essentia.standard as ess
# matplotlib without any blocking GUI
import matplotlib as mpl

mpl.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
from scipy.fftpack import fft
from scipy.signal import hamming

from smst.utils import audio

lpc = ess.LPC(order=14)
N = 512
(fs, x) = audio.read_wav('../../../sounds/soprano-E4.wav')
first = 20000
last = first + N
x1 = x[first:last]
X = fft(hamming(N) * x1)
mX = 20 * np.log10(abs(X[:N / 2]))

coeff = lpc(x1)
Y = fft(coeff[0], N)
mY = 20 * np.log10(abs(Y[:N / 2]))

plt.figure(1, figsize=(9, 5))
plt.subplot(2, 1, 1)
plt.plot(np.arange(first, last) / float(fs), x[first:last], 'b', lw=1.5)
plt.axis([
    first / float(fs), last / float(fs),
    min(x[first:last]),
示例#22
0
# matplotlib without any blocking GUI
import matplotlib as mpl

mpl.use('Agg')
import matplotlib.pyplot as plt
import numpy as np

from smst.utils import audio
from smst.models import hps

(fs, x) = audio.read_wav('../../../sounds/sax-phrase-short.wav')
w = np.blackman(601)
N = 1024
t = -100
nH = 100
minf0 = 350
maxf0 = 700
f0et = 5
minSineDur = .1
harmDevSlope = 0.01
Ns = 512
H = Ns / 4
stocf = .2
hfreq, hmag, hphase, mYst = hps.from_audio(x, fs, w, N, H, t, nH, minf0, maxf0,
                                           f0et, harmDevSlope, minSineDur, Ns,
                                           stocf)
y, yh, yst = hps.to_audio(hfreq, hmag, hphase, mYst, Ns, H, fs)

maxplotfreq = 10000.0
plt.figure(1, figsize=(9, 7))
# matplotlib without any blocking GUI
import matplotlib as mpl

mpl.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
from scipy.interpolate import interp1d

from smst.utils import audio
from smst.models import sine

(fs, x) = audio.read_wav('../../../sounds/mridangam.wav')
x1 = x[:int(1.49 * fs)]
w = np.hamming(801)
N = 2048
t = -90
minSineDur = .005
maxnSines = 150
freqDevOffset = 20
freqDevSlope = 0.02
Ns = 512
H = Ns / 4
sfreq, smag, sphase = sine.from_audio(x1, fs, w, N, H, t, maxnSines,
                                      minSineDur, freqDevOffset, freqDevSlope)
timeScale = np.array([
    .01, .0, .03, .03, .335, .8, .355, .82, .671, 1.0, .691, 1.02, .858, 1.1,
    .878, 1.12, 1.185, 1.8, 1.205, 1.82, 1.49, 2.0
])
L = sfreq.shape[0]  # number of input frames
maxInTime = max(timeScale[::2])  # maximum value used as input times
maxOutTime = max(timeScale[1::2])  # maximum value used in output times
示例#24
0
def main(inputFile=demo_sound_path('sax-phrase-short.wav'),
         window='blackman',
         M=601,
         N=1024,
         t=-100,
         minSineDur=0.1,
         nH=100,
         minf0=350,
         maxf0=700,
         f0et=5,
         harmDevSlope=0.01,
         interactive=True,
         plotFile=False):
    """
    Perform analysis/synthesis using the harmonic plus residual model
    inputFile: input sound file (monophonic with sampling rate of 44100)
    window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)
    M: analysis window size; N: fft size (power of two, bigger or equal than M)
    t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks
    nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound
    maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm
    harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation
    """

    # size of fft used in synthesis
    Ns = 512

    # hop size (has to be 1/4 of Ns)
    H = 128

    # read input sound
    (fs, x) = audio.read_wav(inputFile)

    # compute analysis window
    w = get_window(window, M)

    # find harmonics and residual
    hfreq, hmag, hphase, xr = hpr.from_audio(x, fs, w, N, H, t, minSineDur, nH,
                                             minf0, maxf0, f0et, harmDevSlope)

    # compute spectrogram of residual
    mXr, pXr = stft.from_audio(xr, w, N, H)

    # synthesize hpr model
    y, yh = hpr.to_audio(hfreq, hmag, hphase, xr, Ns, H, fs)

    # output sound file (monophonic with sampling rate of 44100)
    baseFileName = files.strip_file(inputFile)
    outputFileSines, outputFileResidual, outputFile = [
        'output_sounds/%s_hprModel%s.wav' % (baseFileName, i)
        for i in ('_sines', '_residual', '')
    ]

    # write sounds files for harmonics, residual, and the sum
    audio.write_wav(yh, fs, outputFileSines)
    audio.write_wav(xr, fs, outputFileResidual)
    audio.write_wav(y, fs, outputFile)

    # create figure to plot
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 5000.0

    # plot the input sound
    plt.subplot(3, 1, 1)
    plt.plot(np.arange(x.size) / float(fs), x)
    plt.axis([0, x.size / float(fs), min(x), max(x)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    # plot the magnitude spectrogram of residual
    plt.subplot(3, 1, 2)
    maxplotbin = int(N * maxplotfreq / fs)
    numFrames = int(mXr.shape[0])
    frmTime = H * np.arange(numFrames) / float(fs)
    binFreq = np.arange(maxplotbin + 1) * float(fs) / N
    plt.pcolormesh(frmTime, binFreq, np.transpose(mXr[:, :maxplotbin + 1]))
    plt.autoscale(tight=True)

    # plot harmonic frequencies on residual spectrogram
    if (hfreq.shape[1] > 0):
        harms = hfreq * np.less(hfreq, maxplotfreq)
        harms[harms == 0] = np.nan
        numFrames = int(harms.shape[0])
        frmTime = H * np.arange(numFrames) / float(fs)
        plt.plot(frmTime, harms, color='k', ms=3, alpha=1)
        plt.xlabel('time(s)')
        plt.ylabel('frequency(Hz)')
        plt.autoscale(tight=True)
        plt.title('harmonics + residual spectrogram')

    # plot the output sound
    plt.subplot(3, 1, 3)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()

    if interactive:
        plt.show()
    if plotFile:
        plt.savefig('output_plots/%s_hpr_model.png' %
                    files.strip_file(inputFile))
示例#25
0
def main(inputFile=demo_sound_path('rain.wav'), stocf=0.1, timeScaling=np.array([0, 0, 1, 2]),
         interactive=True, plotFile=False):
    """
    function to perform a time scaling using the stochastic model
    inputFile: name of input sound file
    stocf: decimation factor used for the stochastic approximation
    timeScaling: time scaling factors, in time-value pairs
    """

    # hop size
    H = 128

    # read input sound
    (fs, x) = audio.read_wav(inputFile)

    # perform stochastic analysis
    mYst = stochastic.from_audio(x, H, H * 2, stocf)

    # perform time scaling of stochastic representation
    ystocEnv = stochastic.scale_time(mYst, timeScaling)

    # synthesize output sound
    y = stochastic.to_audio(ystocEnv, H, H * 2)

    # write output sound
    outputFile = 'output_sounds/' + strip_file(inputFile) + '_stochasticModelTransformation.wav'
    audio.write_wav(y, fs, outputFile)

    # create figure to plot
    plt.figure(figsize=(12, 9))

    # plot the input sound
    plt.subplot(4, 1, 1)
    plt.plot(np.arange(x.size) / float(fs), x)
    plt.axis([0, x.size / float(fs), min(x), max(x)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    # plot stochastic representation
    plt.subplot(4, 1, 2)
    numFrames = int(mYst.shape[0])
    frmTime = H * np.arange(numFrames) / float(fs)
    binFreq = np.arange(stocf * H) * float(fs) / (stocf * 2 * H)
    plt.pcolormesh(frmTime, binFreq, np.transpose(mYst))
    plt.autoscale(tight=True)
    plt.xlabel('time (sec)')
    plt.ylabel('frequency (Hz)')
    plt.title('stochastic approximation')

    # plot modified stochastic representation
    plt.subplot(4, 1, 3)
    numFrames = int(ystocEnv.shape[0])
    frmTime = H * np.arange(numFrames) / float(fs)
    binFreq = np.arange(stocf * H) * float(fs) / (stocf * 2 * H)
    plt.pcolormesh(frmTime, binFreq, np.transpose(ystocEnv))
    plt.autoscale(tight=True)
    plt.xlabel('time (sec)')
    plt.ylabel('frequency (Hz)')
    plt.title('modified stochastic approximation')

    # plot the output sound
    plt.subplot(4, 1, 4)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')

    plt.tight_layout()

    if interactive:
        plt.show()
    if plotFile:
        plt.savefig('output_plots/%s_stochastic_transformation.png' % files.strip_file(inputFile))
示例#26
0
def main(inputFile1=demo_sound_path('ocean.wav'),
         inputFile2=demo_sound_path('speech-male.wav'),
         window1='hamming',
         window2='hamming',
         M1=1024,
         M2=1024,
         N1=1024,
         N2=1024,
         H1=256,
         smoothf=.5,
         balancef=0.2,
         interactive=True,
         plotFile=False):
    """
    Function to perform a morph between two sounds
    inputFile1: name of input sound file to be used as source
    inputFile2: name of input sound file to be used as filter
    window1 and window2: windows for both files
    M1 and M2: window sizes for both files
    N1 and N2: fft sizes for both sounds
    H1: hop size for sound 1 (the one for sound 2 is computed automatically)
    smoothf: smoothing factor to be applyed to magnitude spectrum of sound 2 before morphing
    balancef: balance factor between booth sounds, 0 is sound 1 and 1 is sound 2
    """

    # read input sounds
    (fs, x1) = audio.read_wav(inputFile1)
    (fs, x2) = audio.read_wav(inputFile2)

    # compute analysis windows
    w1 = get_window(window1, M1)
    w2 = get_window(window2, M2)

    # perform morphing
    y = stft.morph(x1, x2, fs, w1, N1, w2, N2, H1, smoothf, balancef)

    # compute the magnitude and phase spectrogram of input sound (for plotting)
    mX1, pX1 = stft.from_audio(x1, w1, N1, H1)

    # compute the magnitude and phase spectrogram of output sound (for plotting)
    mY, pY = stft.from_audio(y, w1, N1, H1)

    # write output sound
    outputFile = 'output_sounds/' + os.path.basename(
        inputFile1)[:-4] + '_stftMorph.wav'
    audio.write_wav(y, fs, outputFile)

    # create figure to plot
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 10000.0

    # plot sound 1
    plt.subplot(4, 1, 1)
    plt.plot(np.arange(x1.size) / float(fs), x1)
    plt.axis([0, x1.size / float(fs), min(x1), max(x1)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    # plot magnitude spectrogram of sound 1
    plt.subplot(4, 1, 2)
    numFrames = int(mX1.shape[0])
    frmTime = H1 * np.arange(numFrames) / float(fs)
    binFreq = fs * np.arange(N1 * maxplotfreq / fs) / N1
    plt.pcolormesh(frmTime, binFreq,
                   np.transpose(mX1[:, :N1 * maxplotfreq / fs + 1]))
    plt.xlabel('time (sec)')
    plt.ylabel('frequency (Hz)')
    plt.title('magnitude spectrogram of x')
    plt.autoscale(tight=True)

    # plot magnitude spectrogram of morphed sound
    plt.subplot(4, 1, 3)
    numFrames = int(mY.shape[0])
    frmTime = H1 * np.arange(numFrames) / float(fs)
    binFreq = fs * np.arange(N1 * maxplotfreq / fs) / N1
    plt.pcolormesh(frmTime, binFreq,
                   np.transpose(mY[:, :N1 * maxplotfreq / fs + 1]))
    plt.xlabel('time (sec)')
    plt.ylabel('frequency (Hz)')
    plt.title('magnitude spectrogram of y')
    plt.autoscale(tight=True)

    # plot the morphed sound
    plt.subplot(4, 1, 4)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()

    if interactive:
        plt.show()
    if plotFile:
        plt.savefig(
            'output_plots/%s_%s_stft_morph.png' %
            (files.strip_file(inputFile1), files.strip_file(inputFile2)))
示例#27
0
# matplotlib without any blocking GUI
import matplotlib as mpl

mpl.use('Agg')
import matplotlib.pyplot as plt
import numpy as np

from smst.utils import audio
from smst.models import dft

(fs, x) = audio.read_wav('../../../sounds/oboe-A4.wav')
w = np.hamming(401)
N = 1024
pin = 5000
x1 = x[pin:pin + w.size]
mX, pX = dft.from_audio(x1, w, N)

plt.figure(1, figsize=(9.5, 7))
plt.subplot(311)
plt.plot(np.arange(pin, pin + w.size) / float(fs), x1, 'b', lw=1.5)
plt.axis([pin / float(fs), (pin + w.size) / float(fs), min(x1), max(x1)])
plt.title('x (oboe-A4.wav), M=401')

plt.subplot(3, 1, 2)
plt.plot(fs * np.arange(mX.size) / float(N), mX, 'r', lw=1.5)
plt.axis([0, 8000, -80, max(mX)])
plt.title('mX; Hamming window, N=1024')

plt.subplot(3, 1, 3)
plt.plot(fs * np.arange(mX.size) / float(N), pX, 'c', lw=1.5)
plt.axis([0, 8000, -12, 15])
示例#28
0
def analysis(inputFile1=demo_sound_path('violin-B3.wav'),
             window1='blackman',
             M1=1001,
             N1=1024,
             t1=-100,
             minSineDur1=0.05,
             nH=60,
             minf01=200,
             maxf01=300,
             f0et1=10,
             harmDevSlope1=0.01,
             stocf=0.1,
             inputFile2=demo_sound_path('soprano-E4.wav'),
             window2='blackman',
             M2=901,
             N2=1024,
             t2=-100,
             minSineDur2=0.05,
             minf02=250,
             maxf02=500,
             f0et2=10,
             harmDevSlope2=0.01,
             interactive=True,
             plotFile=False):
    """
    Analyze two sounds with the harmonic plus stochastic model
    inputFile: input sound file (monophonic with sampling rate of 44100)
    window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)
    M: analysis window size
    N: fft size (power of two, bigger or equal than M)
    t: magnitude threshold of spectral peaks
    minSineDur: minimum duration of sinusoidal tracks
    nH: maximum number of harmonics
    minf0: minimum fundamental frequency in sound
    maxf0: maximum fundamental frequency in sound
    f0et: maximum error accepted in f0 detection algorithm
    harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation
    stocf: decimation factor used for the stochastic approximation
    returns inputFile: input file name; fs: sampling rate of input file,
            hfreq, hmag: harmonic frequencies, magnitude; stocEnv: stochastic residual
    """

    # size of fft used in synthesis
    Ns = 512
    # hop size (has to be 1/4 of Ns)
    H = 128
    # read input sounds
    (fs1, x1) = audio.read_wav(inputFile1)
    (fs2, x2) = audio.read_wav(inputFile2)
    # compute analysis windows
    w1 = get_window(window1, M1)
    w2 = get_window(window2, M2)
    # compute the harmonic plus stochastic models
    hfreq1, hmag1, hphase1, stocEnv1 = hps.from_audio(x1, fs1, w1, N1, H, t1,
                                                      nH, minf01, maxf01,
                                                      f0et1, harmDevSlope1,
                                                      minSineDur1, Ns, stocf)
    hfreq2, hmag2, hphase2, stocEnv2 = hps.from_audio(x2, fs2, w2, N2, H, t2,
                                                      nH, minf02, maxf02,
                                                      f0et2, harmDevSlope2,
                                                      minSineDur2, Ns, stocf)

    # create figure to plot
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 15000.0

    # plot spectrogram stochastic component of sound 1
    plt.subplot(2, 1, 1)
    numFrames = int(stocEnv1.shape[0])
    sizeEnv = int(stocEnv1.shape[1])
    frmTime = H * np.arange(numFrames) / float(fs1)
    binFreq = (.5 * fs1) * np.arange(sizeEnv * maxplotfreq /
                                     (.5 * fs1)) / sizeEnv
    plt.pcolormesh(
        frmTime, binFreq,
        np.transpose(stocEnv1[:, :sizeEnv * maxplotfreq / (.5 * fs1) + 1]))
    plt.autoscale(tight=True)

    # plot harmonic on top of stochastic spectrogram of sound 1
    if (hfreq1.shape[1] > 0):
        harms = np.copy(hfreq1)
        harms = harms * np.less(harms, maxplotfreq)
        harms[harms == 0] = np.nan
        numFrames = int(harms.shape[0])
        frmTime = H * np.arange(numFrames) / float(fs1)
        plt.plot(frmTime, harms, color='k', ms=3, alpha=1)
        plt.xlabel('time (sec)')
        plt.ylabel('frequency (Hz)')
        plt.autoscale(tight=True)
        plt.title('harmonics + stochastic spectrogram of sound 1')

    # plot spectrogram stochastic component of sound 2
    plt.subplot(2, 1, 2)
    numFrames = int(stocEnv2.shape[0])
    sizeEnv = int(stocEnv2.shape[1])
    frmTime = H * np.arange(numFrames) / float(fs2)
    binFreq = (.5 * fs2) * np.arange(sizeEnv * maxplotfreq /
                                     (.5 * fs2)) / sizeEnv
    plt.pcolormesh(
        frmTime, binFreq,
        np.transpose(stocEnv2[:, :sizeEnv * maxplotfreq / (.5 * fs2) + 1]))
    plt.autoscale(tight=True)

    # plot harmonic on top of stochastic spectrogram of sound 2
    if (hfreq2.shape[1] > 0):
        harms = np.copy(hfreq2)
        harms = harms * np.less(harms, maxplotfreq)
        harms[harms == 0] = np.nan
        numFrames = int(harms.shape[0])
        frmTime = H * np.arange(numFrames) / float(fs2)
        plt.plot(frmTime, harms, color='k', ms=3, alpha=1)
        plt.xlabel('time (sec)')
        plt.ylabel('frequency (Hz)')
        plt.autoscale(tight=True)
        plt.title('harmonics + stochastic spectrogram of sound 2')

    plt.tight_layout()

    if interactive:
        plt.show(block=False)
    if plotFile:
        plt.savefig(
            'output_plots/%s_%s_hps_morph_analysis.png' %
            (files.strip_file(inputFile1), files.strip_file(inputFile2)))

    return inputFile1, fs1, hfreq1, hmag1, stocEnv1, inputFile2, hfreq2, hmag2, stocEnv2
示例#29
0
def analysis(inputFile=demo_sound_path('mridangam.wav'),
             window='hamming',
             M=801,
             N=2048,
             t=-90,
             minSineDur=0.01,
             maxnSines=150,
             freqDevOffset=20,
             freqDevSlope=0.02,
             interactive=True,
             plotFile=False):
    """
    Analyze a sound with the sine model
    inputFile: input sound file (monophonic with sampling rate of 44100)
    window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)
    M: analysis window size; N: fft size (power of two, bigger or equal than M)
    t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks
    maxnSines: maximum number of parallel sinusoids
    freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0
    freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation
    returns inputFile: input file name; fs: sampling rate of input file,
            tfreq, tmag: sinusoidal frequencies and magnitudes
    """

    # size of fft used in synthesis
    Ns = 512

    # hop size (has to be 1/4 of Ns)
    H = 128

    # read input sound
    (fs, x) = audio.read_wav(inputFile)

    # compute analysis window
    w = get_window(window, M)

    # compute the sine model of the whole sound
    tfreq, tmag, tphase = sine.from_audio(x, fs, w, N, H, t, maxnSines,
                                          minSineDur, freqDevOffset,
                                          freqDevSlope)

    # synthesize the sines without original phases
    y = sine.to_audio(tfreq, tmag, np.array([]), Ns, H, fs)

    # output sound file (monophonic with sampling rate of 44100)
    outputFile = 'output_sounds/' + strip_file(inputFile) + '_sineModel.wav'

    # write the sound resulting from the inverse stft
    audio.write_wav(y, fs, outputFile)

    # create figure to show plots
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 5000.0

    # plot the input sound
    plt.subplot(3, 1, 1)
    plt.plot(np.arange(x.size) / float(fs), x)
    plt.axis([0, x.size / float(fs), min(x), max(x)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    # plot the sinusoidal frequencies
    if (tfreq.shape[1] > 0):
        plt.subplot(3, 1, 2)
        tracks = np.copy(tfreq)
        tracks = tracks * np.less(tracks, maxplotfreq)
        tracks[tracks <= 0] = np.nan
        numFrames = int(tracks.shape[0])
        frmTime = H * np.arange(numFrames) / float(fs)
        plt.plot(frmTime, tracks)
        plt.axis([0, x.size / float(fs), 0, maxplotfreq])
        plt.title('frequencies of sinusoidal tracks')

    # plot the output sound
    plt.subplot(3, 1, 3)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()

    if interactive:
        plt.show(block=False)
    if plotFile:
        plt.savefig('output_plots/%s_sine_transformation_analysis.png' %
                    files.strip_file(inputFile))

    return inputFile, fs, tfreq, tmag
示例#30
0
# matplotlib without any blocking GUI
import matplotlib as mpl

mpl.use('Agg')
import matplotlib.pyplot as plt
import numpy as np

from smst.utils import audio
from smst.models import sine, stft

(fs, x) = audio.read_wav('../../../sounds/bendir.wav')
w = np.hamming(2001)
N = 2048
H = 200
t = -80
minSineDur = .02
maxnSines = 150
freqDevOffset = 10
freqDevSlope = 0.001
mX, pX = stft.from_audio(x, w, N, H)
tfreq, tmag, tphase = sine.from_audio(x, fs, w, N, H, t, maxnSines, minSineDur,
                                      freqDevOffset, freqDevSlope)

plt.figure(1, figsize=(9.5, 7))
maxplotfreq = 800.0
maxplotbin = int(N * maxplotfreq / fs)
numFrames = int(mX.shape[0])
frmTime = H * np.arange(numFrames) / float(fs)
binFreq = np.arange(maxplotbin + 1) * float(fs) / N
plt.pcolormesh(frmTime, binFreq,
               np.transpose(np.diff(pX[:, :maxplotbin + 1], axis=1)))