Пример #1
0
def writeExampleFiles():
    """
    A convenience function: writes out example files, some of them with optimal parameters found by exploreSineModelMultiRes()
    """
    inputFile='../../sounds/orchestra.wav'
    fs, x = UF.wavread(inputFile)
    W = np.array(['blackmanharris'])
    M = np.array([1001])
    N = np.array([4096])
    B = np.array([ ])
    T = np.array([-90])
    Ns = 512
    best = Best()
    y = best.calculateAndUpdate(x, fs, Ns, W, M, N, B, T)
    outputFile = inputFile[:-4] + '_optimizedSineModel.wav'
    print '->',outputFile
    UF.wavwrite(y, fs, outputFile)

    inputFile='../../sounds/121061__thirsk__160-link-strings-2-mono.wav'
    fs, x = UF.wavread(inputFile)
    W = np.array(['hamming','hamming','hamming'])
    M = np.array([3001,1501,751])
    N = np.array([16384,8192,4096])
    B = np.array([2756.25,5512.5])
    T = np.array([-90,-90,-90])
    Ns = 512
    best = Best()
    y = best.calculateAndUpdate(x, fs, Ns, W, M, N, B, T)
    outputFile = inputFile[:-4] + '_optimizedSineModel.wav'
    print '->',outputFile
    UF.wavwrite(y, fs, outputFile)

    inputFile='../../sounds/orchestra.wav'
    fs, x = UF.wavread(inputFile)
    W = np.array(['hamming','hamming','hamming'])
    M = np.array([3001,1501,751])
    N = np.array([16384,8192,4096])
    B = np.array([2756.25,5512.5])
    T = np.array([-90,-90,-90])
    Ns = 512
    best = Best()
    y = best.calculateAndUpdate(x, fs, Ns, W, M, N, B, T)
    outputFile = inputFile[:-4] + '_nonOptimizedSineModel.wav'
    print '->',outputFile
    UF.wavwrite(y, fs, outputFile)

    inputFile='../../sounds/121061__thirsk__160-link-strings-2-mono.wav'
    fs, x = UF.wavread(inputFile)
    W = np.array(['blackmanharris'])
    M = np.array([1001])
    N = np.array([4096])
    B = np.array([ ])
    T = np.array([-90])
    Ns = 512
    best = Best()
    y = best.calculateAndUpdate(x, fs, Ns, W, M, N, B, T)
    outputFile = inputFile[:-4] + '_nonOptimizedSineModel.wav'
    print '->',outputFile
    UF.wavwrite(y, fs, outputFile)
Пример #2
0
def computeSNR(inputFile, window, M, N, H):
    """
    Input:
            inputFile (string): wav file name including the path 
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, 
                    blackman, blackmanharris)
            M (integer): analysis window length (odd positive integer)
            N (integer): fft size (power of two, > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a python tuple of both the SNR values (SNR1, SNR2)
            SNR1 and SNR2 are floats.
    """
    ## your code here

    def energy(X, k1, k2):
        X2 = np.power(X, 2)
        return np.sum(X2[k1:k2])

    fs, x = UF.wavread(inputFile)
    w = get_window(window, M)
    xsyn = stft.stft(x, fs, w, N, H)
    noise = np.subtract(xsyn, x)
    
    Esignal1 = energy(x, 0, len(x))
    Enoise1 = energy(noise, 0, len(noise))
    SNR1 = 10*np.log10(Esignal1/Enoise1)
    
    Esignal2 = energy(x, M+1, len(x)-M-1)
    Enoise2 = energy(noise, M+1, len(noise)-M-1)
    SNR2 = 10*np.log10(Esignal2/Enoise2)

    return SNR1, SNR2
Пример #3
0
def computeODF(inputFile, window, M, N, H):
    """
    Inputs:
            inputFile (string): input sound file (monophonic with sampling rate of 44100)
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, 
                blackman, blackmanharris)
            M (integer): analysis window size (odd integer value)
            N (integer): fft size (power of two, bigger or equal than than M)
            H (integer): hop size for the STFT computation
    Output:
            The function should return a numpy array with two columns, where the first column is the ODF 
            computed on the low frequency band and the second column is the ODF computed on the high 
            frequency band.
            ODF[:,0]: ODF computed in band 0 < f < 3000 Hz 
            ODF[:,1]: ODF computed in band 3000 < f < 10000 Hz
    """
    ### your code here
    fs, x = UF.wavread(inputFile)

    w = get_window(window, M)

    mX = stft.stftAnal(x, fs, w, N, H)[0]

    X = 10 ** (mX / 20.0)

    b3k = int(N*3000.0/fs)
    b10k = int(N*10000.0/fs)

    o3k = odf(X[:, 1:b3k+1])
    o10k = odf(X[:, b3k+1:b10k+1])

    return np.column_stack((o3k, o10k))
Пример #4
0
def sineModelOriginalTest1(inputFile, M):
    
    print "\n\n\n###############  RUN THE ORIGINAL TEST (without multiresolution)  ###############\n"
    
    #M1 = 4095
    M1 = M
    
    print "M: "
    print M
    
    N1 = int(pow(2, np.ceil(np.log2(M1))))      # FFT Size, power of 2 larger than M

    print "N1: "
    print N1
    
    t = -80.0                                   # threshold

    fs, x = UF.wavread(inputFile)               # read input sound
    
    #print "Ploting \"x\""
    #plt.plot(x)
    
    window = 'blackman'                         # Window type
    w1 = get_window(window, M1)                 # compute analysis window
            
    return sineModelOriginal(x,fs,w1,N1,t)
Пример #5
0
def phaseFlux(inFile, window, M, bins, H, passes, th, inhibTh, inhibRel, plot=False):
    fs, x = UF.wavread(inFile)  # read file
    x = normalise(x)            # normalise
    if plot:
        plt.plot(-transient(x, 10))

    N = len(x)                  # length of file
    win = get_window(window, M) # create window
   

    # STFT:
    X = np.ndarray(shape=(N/H, bins), dtype='complex')
    for n in range(N/H):
        Xpart = x[n * H:n * H + M]
        if len(Xpart) < len(win):
            Xpart = zeropad(Xpart, len(win))
        X[n] = UF.fft(Xpart * win, bins)    # gets auto zerophased and padded

    '''
    bins:               0   1   2   3   .. fftSize
timefrms:      
            0       [   .   .   .   .   ..  .   
            1           .   .   .   .   ..  .   
            2           .   .   .   .   ..  .
            ..          ..  ..  ..  ..  ..  ..  ]
            N/H
    '''
    
    mX = np.abs(X)  # get magnitude
    mX = normalise(mX)
    pX = np.angle(X) # get phase
    pX = normalise(pX)

    pX_uw = nd_unwrapPhase(pX, 0); # unwrapPhase
    # pX_uw = pX 

    derv1 = nd_derivative(pX_uw, 0)
    derv2 = nd_derivative(derv1, 0)
    
    binmul = np.ndarray(shape=(N/H, bins), dtype='float')
    for n in range(N/H):
        for k in range(bins):
            binmul[n][k] = 1

    onsets = np.ndarray(shape=(passes, N), dtype='float')
    for p in range(passes):
        for n in range(N/H):
            val = np.sum(derv2[n] * mX[n])
            if val / bins > th:
                onsets[p][n*H] = val
                for k in range(bins):
                    if derv2[n][k] > inhibTh:
                        for m in range(inhibRel):
                            binmul[n+m][k] = m/inhibRel 
                            if n+m > N/H:
                                break;
    
            mX *= binmul;
    # onsets = np.transpose(onsets)
    return normalise(onsets)
Пример #6
0
def computeSNR(inputFile, window, M, N, H):
    """
    Input:
            inputFile (string): wav file name including the path 
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, 
                    blackman, blackmanharris)
            M (integer): analysis window length (odd positive integer)
            N (integer): fft size (power of two, > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a python tuple of both the SNR values (SNR1, SNR2)
            SNR1 and SNR2 are floats.
    """
    ## your code here
    def energy(mag):
        e = np.sum((10 ** (mag / 20)) ** 2)
        return e
    
    (fs, x) = UF.wavread(inputFile)
    w = get_window(window, M)
    
    mX, pX = STFT.stftAnal(x, fs, w, N, H)
    y = STFT.stftSynth(mX, pX, M, H)
    n = x - y[:x.size]
    n2 = x[w.size:-w.size] - y[:x.size][w.size:-w.size]
    
    mN, pN = STFT.stftAnal(n, fs, w, N, H)
    mN2, pN2 = STFT.stftAnal(n2, fs, w, N, H)
    
    snr1 = 10 * np.log10(energy(mX) / energy(mN))
    snr2 = 10 * np.log10(energy(mX) / energy(mN2))
        
    return snr1, snr2
Пример #7
0
def minFreqEstErr(inputFile, f):
    """
    Inputs:
            inputFile (string) = wav file including the path
            f (float) = frequency of the sinusoid present in the input audio signal (Hz)
    Output:
            fEst (float) = Estimated frequency of the sinusoid (Hz)
            M (int) = Window size
            N (int) = FFT size
    """
    # analysis parameters:
    window = 'blackman'
    t = -40
    fs, x = UF.wavread(inputFile)
    x_half = len(x) // 2
    f_error = np.inf
    k = 1
    while f_error > 0.05:  # Hz
        M = 100 * k + 1
        M2 = M // 2
        W = get_window(window, M)
        N = int(2 ** np.ceil(np.log2(M)))
        mX, pX = DFT.dftAnal(x[x_half - M2: x_half - M2 + M], W, N)
        ploc = UF.peakDetection(mX, t)
        iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc)
        fEst = iploc * fs / N
        f_error = np.abs(f - fEst)
        k += 1
    return(fEst, M, N)
Пример #8
0
def computeSNR(inputFile, window, M, N, H):
    """
    Input:
            inputFile (string): wav file name including the path 
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, 
                    blackman, blackmanharris)
            M (integer): analysis window length (odd positive integer)
            N (integer): fft size (power of two, > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a python tuple of both the SNR values (SNR1, SNR2)
            SNR1 and SNR2 are floats.
    """
    ## your code here
    fs, x = UF.wavread(inputFile)

    w = get_window(window, M)

    y = stft.stft(x, fs, w, N, H)
    noise = np.array(x - y)

    E_x = np.sum( abs(x)**2 )
    E_noise = np.sum( abs(noise)**2 )

    E_xAfterM = np.sum( abs( x[M : x.size-M] )**2 )
    E_nAfterM = np.sum( abs( noise[M : x.size-M] )**2 )

    SNR1 = 10 * np.log10(E_x / E_noise)
    SNR2 = 10 * np.log10(E_xAfterM/E_nAfterM)

    return (SNR1, SNR2)
Пример #9
0
def computeEngEnv(inputFile, window, M, N, H):
    """
    Inputs:
            inputFile (string): input sound file (monophonic with sampling rate of 44100)
            window (string): analysis window type (choice of rectangular, triangular, hanning, 
                hamming, blackman, blackmanharris)
            M (integer): analysis window size (odd positive integer)
            N (integer): FFT size (power of 2, such that N > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a numpy array engEnv with shape Kx2, K = Number of frames
            containing energy envelop of the signal in decibles (dB) scale
            engEnv[:,0]: Energy envelope in band 0 < f < 3000 Hz (in dB)
            engEnv[:,1]: Energy envelope in band 3000 < f < 10000 Hz (in dB)
    """
    
    ### your code here
    fs,x = UF.wavread(inputFile)
    w = get_window(window,M)

    mX,pX = stft.stftAnal(x,w,N,H)
    mX = pow(10,mX/20.)
    
    band_energy = np.zeros((len(mX),2))
    for frm_idx in range(len(mX)):
        frm = mX[frm_idx]
        for k in range(len(frm)):
            cur_f = k*44100/N
            if cur_f > 0 and cur_f < 3000:
                band_energy[frm_idx,0] += (frm[k]*frm[k])
            elif cur_f > 3000 and cur_f < 10000:
                band_energy[frm_idx,1] += (frm[k]*frm[k])

    band_energy = 10.0*np.log10(band_energy)
    return band_energy
Пример #10
0
def computeModel(inputFile, B, M, window = 'hanning', t = -90):

    bands = range(len(B))

    fs, x = UF.wavread(inputFile)
    w = [get_window(window, M[i]) for i in bands]
    N = (2**np.ceil(np.log2(B))).astype(int)

    y_combined = SMMR.sineModelMultiRes(x, fs, w, N, t, B)

    #y, y_combined = SMMR.sineModelMultiRes_combined(x, fs, w, N, t, B)

    # output sound file name
    outputFileInputFile = 'output_sounds/' + os.path.basename(inputFile)
    #outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sineModel.wav'
    outputFile_combined = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sineModelMultiRes.wav'

    # write the synthesized sound obtained from the sinusoidal synthesis
    UF.wavwrite(x, fs, outputFileInputFile)
    #UF.wavwrite(y, fs, outputFile)
    UF.wavwrite(y_combined, fs, outputFile_combined)

    plt.figure()
    plt.plot(x)
    plt.plot(y_combined)
    plt.show()
Пример #11
0
def computeSNR(inputFile, window, M, N, H):
    """
    Input:
            inputFile (string): wav file name including the path 
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, 
                    blackman, blackmanharris)
            M (integer): analysis window length (odd positive integer)
            N (integer): fft size (power of two, > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a python tuple of both the SNR values (SNR1, SNR2)
            SNR1 and SNR2 are floats.
    """
    
    ## your code here
    
    fs, x = UF.wavread(inputFile)
    
    w = get_window(window, M)
    
    xrec = stft.stft(x, fs, w, N, H)
    
    eSignal = energy(x)
    eSignal_part = energy(x[M:-M])
    eNoise = energy(x-xrec)
    eNoise_part = energy((x-xrec)[M:-M])

    snr = 10 * np.log10(eSignal / eNoise)
    snr_part = 10 * np.log10(eSignal_part / eNoise_part)
    
    return snr, snr_part
Пример #12
0
def computeSNR(inputFile, window, M, N, H):
    """
    Input:
            inputFile (string): wav file name including the path 
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, 
                    blackman, blackmanharris)
            M (integer): analysis window length (odd positive integer)
            N (integer): fft size (power of two, > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a python tuple of both the SNR values (SNR1, SNR2)
            SNR1 and SNR2 are floats.
    """
    ## your code here
    w = get_window(window, M)  # get the window
    (fs, x) = UF.wavread(inputFile)
    # x: input sound, w: analysis window, N: FFT size, H: hop size
    # returns y: output sound
    STFTX = stft.stft(x, fs, w, N, H)
    xoutput = np.arange(x.size)
    energynoise = 0
    energynoise2 = 0
    for i in range(0, x.size):
        energynoise += np.power(np.abs(x[i].real) - np.abs(STFTX[i].real), 2)
        if i > M and i < x.size - M:
            energynoise2 += np.power(np.abs(x[i].real) - np.abs(STFTX[i].real), 2)
    energysignal = 0
    energysignal2 = 0
    for i in range(0, x.size):
        energysignal += np.power(np.abs(x[i].real), 2)
        if i > M and i < x.size - M:
            energysignal2 += np.power(np.abs(x[i].real), 2)
    SNR1 = 10 * np.log10(energysignal / energynoise)
    SNR2 = 10 * np.log10(energysignal2 / energynoise2)
    return SNR1, SNR2
Пример #13
0
def computeEngEnv(inputFile, window, M, N, H):
    """
    Inputs:
            inputFile (string): input sound file (monophonic with sampling rate of 44100)
            window (string): analysis window type (choice of rectangular, triangular, hanning, 
                hamming, blackman, blackmanharris)
            M (integer): analysis window size (odd positive integer)
            N (integer): FFT size (power of 2, such that N > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a numpy array engEnv with shape Kx2, K = Number of frames
            containing energy envelop of the signal in decibles (dB) scale
            engEnv[:,0]: Energy envelope in band 0 < f < 3000 Hz (in dB)
            engEnv[:,1]: Energy envelope in band 3000 < f < 10000 Hz (in dB)
    """
    
    (fs,x) = UF.wavread(inputFile)
    w = get_window(window, M)
    (xmX, xpX) = stft.stftAnal(x, fs, w, N, H)

    kLow1 = 0
    
    kLow2 = 0
    while (True):
	kLow2 += 1
	if( (kLow2 < N*(fLow2)/float(fs)) & (kLow2 > N*(fLow2)/float(fs) - 1.0 ) ):
	    break
    
    kHigh1 = 0
    while (True):
	kHigh1 += 1
	if( (kHigh1 < N*(fHigh1)/float(fs)) & (kHigh1 > N*(fHigh1)/float(fs) - 1.0 ) ):
	    break
    
    kHigh2 = 0
    while (True):
	kHigh2 += 1
	if( (kHigh2 < N*(fHigh2)/float(fs)) & (kHigh2 > N*(fHigh2)/float(fs) - 1.0 ) ):
	    break
    
    nHops = int(xmX.shape[0])
    out = np.zeros((nHops,2))
    
    i = 0
    while i < nHops:
        subxmX = xmX[i,:]
    
        subLowxmX = subxmX[kLow1+1:kLow2+1]
        subLowxmX = 10**(subLowxmX/20)
        eSignalLow = sum(subLowxmX**2)
        out[i,0] = 10.0*np.log10(eSignalLow)

        subHighxmX = subxmX[kHigh1+1:kHigh2+1]
        subHighxmX = 10**(subHighxmX/20)
        eSignalHigh = sum(subHighxmX**2)
        out[i,1] = 10.0*np.log10(eSignalHigh)
    
        i += 1 

    return out
Пример #14
0
def computeSNR(inputFile, window, M, N, H):
    """
    Input:
            inputFile (string): wav file name including the path 
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, 
                    blackman, blackmanharris)
            M (integer): analysis window length (odd positive integer)
            N (integer): fft size (power of two, > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a python tuple of both the SNR values (SNR1, SNR2)
            SNR1 and SNR2 are floats.
    """

    #read from the file
    FS, x = UF.wavread(inputFile)

    w = get_window(window, M)
    #do a stft computation
    y = stft.stft(x, FS, w, N, H)

    #compute SNR over complete signal
    diff = y - x
    energy_signal = (y**2).sum()
    energy_noise = (diff**2).sum()
    SNR1 = 10 * np.log10(energy_signal/energy_noise)

    #compute SNR over sliced signal
    energy_signal_sliced = (y[M:-M]**2).sum()
    energy_noise_sliced = (diff[M:-M]**2).sum()
    SNR2 = 10 * np.log10(energy_signal_sliced/energy_noise_sliced)


    return (SNR1, SNR2)
Пример #15
0
def computeODF(inputFile, window, M, N, H):
    """
    Inputs:
            inputFile (string): input sound file (monophonic with sampling rate of 44100)
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, 
                blackman, blackmanharris)
            M (integer): analysis window size (odd integer value)
            N (integer): fft size (power of two, bigger or equal than than M)
            H (integer): hop size for the STFT computation
    Output:
            The function should return a numpy array with two columns, where the first column is the ODF 
            computed on the low frequency band and the second column is the ODF computed on the high 
            frequency band.
            ODF[:,0]: ODF computed in band 0 < f < 3000 Hz 
            ODF[:,1]: ODF computed in band 3000 < f < 10000 Hz
    """
    
    ### your code here
    windowing = get_window(window, M)

    (fs, x) = UF.wavread(inputFile)

    mX, pX = stft.stftAnal(x, fs, windowing, N, H)

    bin0 = 1
    bin3000 = np.floor(3000.0*N/fs)
    bin10000 = np.floor(10000.0*N/fs)
    bin3000up = np.ceil(3000.0*N/fs)

    ODF = np.zeros((mX.shape[0], 2))

    prevODF3000 = 0.0
    prevODF10000 = 0.0

    for i in range(mX.shape[0]):
        env3000 = np.sum(np.square(10**(mX[i,1:bin3000+1] / 20)))
        env3000db = 10 * np.log10(env3000)

        odf3000 = env3000db - prevODF3000
        prevODF3000 = env3000db
 
        if odf3000 <= 0.0:
            odf3000 = 0.0

        ODF[i,0] = odf3000
        

        env10000 = np.sum(np.square(10**(mX[i,bin3000up:bin10000+1] / 20)))
        env10000db = 10 * np.log10(env10000)

        odf10000 = env10000db - prevODF10000
        prevODF10000 = env10000db

        if odf10000 <= 0.0:
            odf10000 = 0.0

        ODF[i,1] = odf10000

        
    return ODF
Пример #16
0
def computeSNR(inputFile, window, M, N, H):
    """
    Input:
            inputFile (string): wav file name including the path 
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, 
                    blackman, blackmanharris)
            M (integer): analysis window length (odd positive integer)
            N (integer): fft size (power of two, > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a python tuple of both the SNR values (SNR1, SNR2)
            SNR1 and SNR2 are floats.
    """
    x = UF.wavread(inputFile)[1]
    w = get_window(window, M)

    xSynth = stft(x, 1.0, w, N, H)

    eSignal1 = sum(x**2)
    eNoise1 = sum((x-xSynth)**2)
    SNR1 = 10.0*np.log10(eSignal1/eNoise1)

    x2 = x[M:len(x)-M]
    xSynth2 = xSynth[M:len(xSynth)-M]

    eSignal2 = sum(x2**2)
    eNoise2 = sum((x2-xSynth2)**2)
    SNR2 = 10.0*np.log10(eSignal2/eNoise2)

    return (SNR1,SNR2)
Пример #17
0
def getJawaab(ipFile = '../dataset/testInputs/testInput_1.wav', ipulsePos = getPulsePosFromAnn('../dataset/testInputs/testInput_1.csv'), strokeModels = None, oFile = './tablaOutput.wav', randomFlag = 1):
    # If poolFeats are not built, give an error!
    if strokeModels == None:
        print "Train models first before calling getJawaab() ..."
        opulsePos = None
        strokeSeq = None
        oFile = None
        ts = None
    else:
        print "Getting jawaab..."
        pulsePeriod = np.median(np.diff(ipulsePos))
        print pulsePeriod
        fss, audioIn = UF.wavread(ipFile)
        if randomFlag == 1:
            strokeSeq, tStamps, opulsePos = genRandomComposition(pulsePeriod, pieceDur = len(audioIn)/params.Fs, strokeModels = strokeModels)
        else:
            invCmat = getInvCovarianceMatrix(strokeModels)
            strokeSeq, tStamps, opulsePos = genSimilarComposition(pulsePeriod, pieceDur = len(audioIn)/params.Fs, strokeModels = strokeModels, iAudioFile = ipFile, iPos = ipulsePos,invC = invCmat)
        print strokeSeq
        print tStamps
        print opulsePos
        if oFile != None:
            audio = genAudioFromStrokeSeq(strokeModels,strokeSeq,tStamps)
            audio = audio/(np.max(audio) + 0.01)
            UF.wavwrite(audio, params.Fs, oFile)
    return opulsePos, strokeSeq, tStamps, oFile
Пример #18
0
def main(inputFile = '../../sounds/piano.wav', window = 'hamming', M = 1024, N = 1024, H = 512):
	"""
	analysis/synthesis using the STFT
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (choice of rectangular, hanning, hamming, blackman, blackmanharris)	
	M: analysis window size 
	N: fft size (power of two, bigger or equal than M)  
	H: hop size (at least 1/2 of analysis window size to have good overlap-add)               
	"""

	# read input sound (monophonic with sampling rate of 44100)
	fs, x = UF.wavread(inputFile)

	# compute analysis window
	w = get_window(window, M)

	# compute the magnitude and phase spectrogram
	mX, pX = STFT.stftAnal(x, fs, w, N, H)
	 
	# perform the inverse stft
	y = STFT.stftSynth(mX, pX, M, H)

	# output sound file (monophonic with sampling rate of 44100)
	outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_stft.wav'   

	# write the sound resulting from the inverse stft
	UF.wavwrite(y, fs, outputFile)
	return x, fs, mX, pX, y
Пример #19
0
def sineODF(file='../../../../../audioDSP_course/assignments/sms-tools/sounds/piano.wav'):
    fs, x = UF.wavread(file)

    # set params:
    M = 1024    # window size
    H = int(M/3)     # hop size
    t = -80.0   #treshold (dB??)
    window = 'blackman' # window type
    fftSize = int(pow(2, np.ceil(np.log2(M))))  # size of FFT
    N = fftSize
    maxnSines = 10      # maximum simultaneous sines
    minSineDur = 0.1    # minimal duration of sines
    freqDevOffset = 30  # min(??) frequency deviation at 0Hz
    freqDevSlope = 0.001    # slope increase of min freq dev.


    w = get_window(window, M)    # get analysis window
    tStamps = genTimeStamps(len(x), M, fs, H)    # generate timestamp return?
    fTrackEst, mTrackEst, pTreckEst = SM.sineModelAnal(x, fs, w, fftSize, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope)

    fTrackTrue = genTrueFreqTracks(tStamps) # get true freq. tracks

    # plotting:
    mX, pX = stft.stftAnal(x, fs, w, fftSize, H)
    maxplotfreq = 1500.0
    binFreq = fs*np.arange(N*maxplotfreq/fs)/N
    plt.pcolormesh(tStamps, binFreq, np.transpose(mX[:,:N*maxplotfreq/fs+1]),cmap = 'hot_r')
    # plt.plot(fTrackTrue, 'o-', color = 'c', linewidth=3.0)
    plt.plot(tStamps, fTrackEst, color = 'y', linewidth=2.0)
    # plt.legend(('True f1', 'True f2', 'Estimated f1', 'Estimated f2'))
    plt.xlabel('Time (s)')
    plt.ylabel('Frequency (Hz)')
    plt.autoscale(tight=True)
    return fTrackEst
Пример #20
0
def computeEngEnv(inputFile, window, M, N, H):
    """
    Inputs:
            inputFile (string): input sound file (monophonic with sampling rate of 44100)
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, 
                blackman, blackmanharris)
            M (integer): analysis window size (odd positive integer)
            N (integer): FFT size (power of 2, such that N > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a numpy array engEnv with shape Kx2, K = Number of frames
            containing energy envelop of the signal in decibles (dB) scale
            engEnv[:,0]: Energy envelope in band 0 < f < 3000 Hz (in dB)
            engEnv[:,1]: Energy envelope in band 3000 < f < 10000 Hz (in dB)
    """
    
    ### your code here
    def energy(mag):
        e = 10 * np.log10(np.sum((10 ** (mag / 20)) ** 2, axis=1))
        return e
    
    (fs, x) = UF.wavread(inputFile)
    border_bin = int(np.ceil(float(3000) * N / fs))
    max_bin = int(np.ceil(float(10000) * N / fs))
    w = get_window(window, M)
    
    mX, pX = STFT.stftAnal(x, fs, w, N, H)
    low = np.transpose(np.transpose(mX)[1:border_bin])
    high = np.transpose(np.transpose(mX)[border_bin:max_bin])
    
    e_low = energy(low)
    e_high = energy(high)
    
    envs = np.append([e_low], [e_high], axis=0)
    envs = np.transpose(envs)
    
    # draw graph
    plt.figure(1, figsize=(9.5, 6))

    plt.subplot(211)
    numFrames = mX.shape[0]
    frmTime = H*np.arange(numFrames)/float(fs)
    binFreq = np.arange(mX.shape[1])*float(fs)/N
    plt.pcolormesh(frmTime, binFreq, np.transpose(mX))
    plt.title('mX ({0}), M={1}, N={2}, H={3}'.format(inputFile, M, N, H))
    plt.autoscale(tight=True)
    
    plt.subplot(212)
    plt.plot(frmTime, e_low, color="blue", label="row")
    plt.plot(frmTime, e_high, color="red", label="high")
    plt.title('Energy of Envelopes')
    plt.autoscale(tight=True)

    plt.tight_layout()
    plt.show()
    
    return envs
Пример #21
0
def readAudio(inputFile='../../sounds/piano.wav'):

	sys.path.append('/home/vagrant/sms-tools/software/models')
	import utilFunctions as UF

	print("Input File: ", inputFile)
	(fs, x) = UF.wavread(inputFile)	
	y = x[50000:50010]
        return y
Пример #22
0
def testModuleLive(inputFile = '../dataset/testInputs/testInput_3.wav', pulsePos = getPulsePosFromAnn('../dataset/testInputs/testInput_3.csv')):    
    global strokeModelsG
    ipulsePer = np.median(np.diff(pulsePos))/10
    # print ipulsePer
    fss, ipAudio = UF.wavread(inputFile)
    print "Analysing input..."
    testFeatFull, strokeSeq, strokeTime, strokeAmp, opulsePer = getJawaabLive(ipAudio, ipulsePer)
    audioOut = genAudioFromStrokeSeq(strokeModelsG,strokeSeq,strokeAmp,strokeTime)
    return testFeatFull, audioOut, strokeSeq, strokeTime, strokeAmp, opulsePer
Пример #23
0
def minMaxAudio(inputFile):
    """
    Input:
        inputFile: file name of the wav file (including path)
    Output:
        A tuple of the minimum and the maximum value of the audio samples, like: (min_val, max_val)
    """
    wav_array = wavread(inputFile)
    return (wav_array[1].min(), wav_array[1].max())
def main(inputFile='../../sounds/ocean.wav', H=256, stocf=.1):

	# ------- analysis parameters -------------------

	# inputFile: input sound file (monophonic with sampling rate of 44100)
	# H: hop size
	# stocf: decimation factor used for the stochastic approximation

	# --------- computation -----------------  

	# read input sound
	(fs, x) = UF.wavread(inputFile)

	# compute stochastic model                                          
	mYst = STM.stochasticModelAnal(x, H, stocf)             

	# synthesize sound from stochastic model
	y = STM.stochasticModelSynth(mYst, H)    

	outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_stochasticModel.wav'

	# write output sound
	UF.wavwrite(y, fs, outputFile)               

	# --------- plotting --------------------

	# create figure to plot
	plt.figure(figsize=(12, 9))

	# plot the input sound
	plt.subplot(3,1,1)
	plt.plot(np.arange(x.size)/float(fs), x)
	plt.axis([0, x.size/float(fs), min(x), max(x)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')
	plt.title('input sound: x')

	# plot stochastic representation
	plt.subplot(3,1,2)
	numFrames = int(mYst[:,0].size)
	frmTime = H*np.arange(numFrames)/float(fs)                             
	binFreq = np.arange(stocf*H)*float(fs)/(stocf*2*H)                      
	plt.pcolormesh(frmTime, binFreq, np.transpose(mYst))
	plt.autoscale(tight=True)
	plt.xlabel('time (sec)')
	plt.ylabel('frequency (Hz)')
	plt.title('stochastic approximation')

	# plot the output sound
	plt.subplot(3,1,3)
	plt.plot(np.arange(y.size)/float(fs), y)
	plt.axis([0, y.size/float(fs), min(y), max(y)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')

	plt.tight_layout()
	plt.show()
Пример #25
0
def readAudio(inputFile):
    """
    Input:
        inputFile: the path to the wav file      
    Output:
        The function should return a numpy array that contains 10 samples of the audio.
    """
    x, audio = wavread(inputFile)
    first10 = audio[50000:50010]
    return first10
Пример #26
0
def downsampleAudio(inputFile, M):
    """
    Inputs:
        inputFile: file name of the wav file (including path)
        	M: downsampling factor (positive integer)
    """
    ## Your code here
    fs, x = wavread(inputFile)
    y = hopSamples(x, M)
    wavwrite(y, fs, 'test.wav')
Пример #27
0
def minMaxAudio(inputFile):
    """
    Input:
        inputFile: file name of the wav file (including path)
    Output:
        A tuple of the minimum and the maximum value of the audio samples, like: (min_val, max_val)
    """
    ## Your code here
    (fs, x) = wavread(inputFile)
    return (min(x), max(x))
Пример #28
0
def readAudio(inputFile):
    """
    Input:
        inputFile: the path to the wav file      
    Output:
        The function should return a numpy array that contains 10 samples of the audio.
    """
    ## Your code here
    (_, arr) = wavread(inputFile)
    return arr[50000:50010]
Пример #29
0
def segmentStableNotesRegions(inputFile = '../../sounds/sax-phrase-short.wav', stdThsld=10, minNoteDur=0.1,
                              winStable = 3, window='hamming', M=1024, N=2048, H=256, f0et=5.0, t=-100,
                              minf0=310, maxf0=650):
    """
    Function to segment the stable note regions in an audio signal
    Input:
        inputFile (string): wav file including the path
        stdThsld (float): threshold for detecting stable regions in the f0 contour
        minNoteDur (float): minimum allowed segment length (note duration)
        winStable (integer): number of samples used for computing standard deviation
        window (string): analysis window
        M (integer): window size used for computing f0 contour
        N (integer): FFT size used for computing f0 contour
        H (integer): Hop size used for computing f0 contour
        f0et (float): error threshold used for the f0 computation
        t (float): magnitude threshold in dB used in spectral peak picking
        minf0 (float): minimum fundamental frequency in Hz
        maxf0 (float): maximum fundamental frequency in Hz
    Output:
        segments (np.ndarray): Numpy array containing starting and ending frame indices of every
                               segment.
    """
    fs, x = UF.wavread(inputFile)                               #reading inputFile
    w  = get_window(window, M)                                  #obtaining analysis window
    f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et)  #estimating F0

    ### Your code here

    # 1. convert f0 values from Hz to Cents (as described in pdf document)
    f0[f0 < eps] = eps
    f0_cents = 1200 * np.log2(f0 / 55.0)

    # 2. create an array containing standard deviation of last winStable samples
    numFrames = len(f0_cents)
    frameIndex = np.arange(winStable - 1, numFrames)
    sds = np.array(map(lambda i: np.std(f0_cents[i + 1 - winStable:i+1]),
                       frameIndex))

    # 3. apply threshold on standard deviation values to find indices of the stable points in melody
    stableF0Indices = winStable - 1 + np.where(sds < stdThsld)[0]
    #print zip(sds, winStable - 1 + np.arange(len(sds)))

    # 4. create segments of continuous stable points such that concequtive stable points belong to
    #    same segment
    segments = groupConsecutiveRuns(stableF0Indices)

    # 5. apply segment filtering, i.e. remove segments with are < minNoteDur in length
    minNoteDurFrames = int(minNoteDur * fs / H)
    segments = filter(lambda x: len(x) >= minNoteDurFrames, segments)
    segments = map(lambda xs: [xs[0], xs[-1]], segments)
    segments = np.array(segments)

    #plotSpectogramF0Segments(x, fs, w, N, H, f0, segments)

    return segments
Пример #30
0
def segmentStableNotesRegions(inputFile = '../../sounds/sax-phrase-short.wav', stdThsld=10, minNoteDur=0.1,
                              winStable = 3, window='hamming', M=1024, N=2048, H=256, f0et=5.0, t=-100,
                              minf0=310, maxf0=650):
    """
    Function to segment the stable note regions in an audio signal
    Input:
        inputFile (string): wav file including the path
        stdThsld (float): threshold for detecting stable regions in the f0 contour (in cents)
        minNoteDur (float): minimum allowed segment length (note duration)
        winStable (integer): number of samples used for computing standard deviation
        window (string): analysis window
        M (integer): window size used for computing f0 contour
        N (integer): FFT size used for computing f0 contour
        H (integer): Hop size used for computing f0 contour
        f0et (float): error threshold used for the f0 computation
        t (float): magnitude threshold in dB used in spectral peak picking
        minf0 (float): minimum fundamental frequency in Hz
        maxf0 (float): maximum fundamental frequency in Hz
    Output:
        segments (np.ndarray): Numpy array containing starting and ending frame indexes of every segment.
    """
    fs, x = UF.wavread(inputFile)                               #reading inputFile
    w  = get_window(window, M)                                  #obtaining analysis window
    f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et)  #estimating F0

    ### your code here

    # 1. convert f0 values from Hz to Cents (as described in pdf document)
    f0Cents = 1200. * np.log2(f0 / 55.)

    #2. create an array containing standard deviation of last winStable samples
    #3. apply threshold on standard deviation values to find indexes of the stable points in melody
    stdBelowTh = np.zeros(np.shape(f0), np.bool)
    for i in range(winStable,len(f0)):
        stdBelowTh[i] = np.std(f0Cents[i-winStable:i]) < stdThsld

    #4. create segments of continuous stable points such that consecutive stable points belong to same segment
    segments = []
    currSeg = []
    for i in range(winStable,len(f0)):
        if stdBelowTh[i]:
            currSeg.append(i)
        else:
            if len(currSeg) > 0:
                segments.append([currSeg[0]-1, currSeg[-1]-1])
                currSeg = []


    #5. apply segment filtering, i.e. remove segments with are < minNoteDur in length
    segments = np.array(filter(lambda x: x[1] - x[0] >= 1.*fs*minNoteDur/H, segments))

    # plotSpectogramF0Segments(x, fs, w, N, H, f0, segments)  # Plot spectrogram and F0 if needed

    # return segments
    return segments
Пример #31
0
def main(inputFile='../../sounds/sax-phrase-short.wav', window='blackman', M=601, N=1024, t=-100, 
	minSineDur=0.1, nH=100, minf0=350, maxf0=700, f0et=5, harmDevSlope=0.01, stocf=0.1):
	"""
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)	
	M: analysis window size; N: fft size (power of two, bigger or equal than M)
	t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks
	nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound
	maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm                                                                                            
	harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation
	stocf: decimation factor used for the stochastic approximation
	"""

	# size of fft used in synthesis
	Ns = 512

	# hop size (has to be 1/4 of Ns)
	H = 128

	# read input sound
	(fs, x) = UF.wavread(inputFile)

	# compute analysis window
	w = get_window(window, M)

	# compute the harmonic plus stochastic model of the whole sound
	hfreq, hmag, hphase, stocEnv = HPS.hpsModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur, Ns, stocf)
		
	# synthesize a sound from the harmonic plus stochastic representation
	y, yh, yst = HPS.hpsModelSynth(hfreq, hmag, hphase, stocEnv, Ns, H, fs)

	# output sound file (monophonic with sampling rate of 44100)
	outputFileSines = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hpsModel_sines.wav'
	outputFileStochastic = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hpsModel_stochastic.wav'
	outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hpsModel.wav'

	# write sounds files for harmonics, stochastic, and the sum
	UF.wavwrite(yh, fs, outputFileSines)
	UF.wavwrite(yst, fs, outputFileStochastic)
	UF.wavwrite(y, fs, outputFile)

	# create figure to plot
	plt.figure(figsize=(12, 9))

	# frequency range to plot
	maxplotfreq = 15000.0

	# plot the input sound
	plt.subplot(3,1,1)
	plt.plot(np.arange(x.size)/float(fs), x)
	plt.axis([0, x.size/float(fs), min(x), max(x)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')
	plt.title('input sound: x')

	# plot spectrogram stochastic component
	plt.subplot(3,1,2)
	numFrames = int(stocEnv[:,0].size)
	sizeEnv = int(stocEnv[0,:].size)
	frmTime = H*np.arange(numFrames)/float(fs)
	binFreq = (.5*fs)*np.arange(sizeEnv*maxplotfreq/(.5*fs))/sizeEnv                      
	plt.pcolormesh(frmTime, binFreq, np.transpose(stocEnv[:,:sizeEnv*maxplotfreq/(.5*fs)+1]))
	plt.autoscale(tight=True)

	# plot harmonic on top of stochastic spectrogram
	if (hfreq.shape[1] > 0):
		harms = hfreq*np.less(hfreq,maxplotfreq)
		harms[harms==0] = np.nan
		numFrames = harms.shape[0]
		frmTime = H*np.arange(numFrames)/float(fs) 
		plt.plot(frmTime, harms, color='k', ms=3, alpha=1)
		plt.xlabel('time (sec)')
		plt.ylabel('frequency (Hz)')
		plt.autoscale(tight=True)
		plt.title('harmonics + stochastic spectrogram')

	# plot the output sound
	plt.subplot(3,1,3)
	plt.plot(np.arange(y.size)/float(fs), y)
	plt.axis([0, y.size/float(fs), min(y), max(y)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')
	plt.title('output sound: y')

	plt.tight_layout()
	plt.show()
Пример #32
0
	mXenv = resample(np.maximum(-200, mX), mX.size*stocf)  # decimate the mag spectrum     
	pX = np.angle(X[:hN])
	#-----synthesis-----
	mY = resample(mXenv, hN)                               # interpolate to original size
	pY = 2*np.pi*np.random.rand(hN)                        # generate phase random values
	Y = np.zeros(N, dtype = complex)
	Y[:hN] = 10**(mY/20) * np.exp(1j*pY)                   # generate positive freq.
	Y[hN:] = 10**(mY[-2:0:-1]/20) * np.exp(-1j*pY[-2:0:-1]) # generate negative freq.
	fftbuffer = np.real( ifft(Y) )                         # inverse FFT
	y = fftbuffer*N/2                                  
	return mX, pX, mY, pY, y
		
		
# example call of stochasticModel function
if __name__ == '__main__':
	(fs, x) = UF.wavread('../../../sounds/ocean.wav')
	w = np.hanning(1024)
	N = 1024
	stocf = 0.2
	maxFreq = 10000.0
	lastbin = N*maxFreq/fs
	first = 1000
	last = first+w.size
	mX, pX, mY, pY, y = stochasticModelFrame(x[first:last], w, N, stocf)
	
	plt.figure(1, figsize=(9, 7))
	plt.subplot(4,1,1)
	plt.plot(np.arange(first, last)/float(fs), x[first:last])
	plt.axis([first/float(fs), last/float(fs), min(x[first:last]), max(x[first:last])])
	plt.title('x (ocean.wav)')
	plt.subplot(4,1,2)
inputFile = '../../../sounds/flute-A4.wav'
window = 'blackman'
M = 801
N = 2048
t = -90
minSineDur = 0.1
nH = 40
minf0 = 350
maxf0 = 700
f0et = 8
harmDevSlope = 0.1
Ns = 512
H = 128

(fs, x) = UF.wavread(inputFile)
w = get_window(window, M)
hfreq, hmag, hphase, xr = HPR.hprModelAnal(x, fs, w, N, H, t, minSineDur, nH,
                                           minf0, maxf0, f0et, harmDevSlope)

mXr, pXr = STFT.stftAnal(xr, fs, w, N, H)

freqScaling = np.array([0, 1.5, 1, 1.5])
freqStretching = np.array([0, 1.1, 1, 1.1])
timbrePreservation = 1

hfreqt, hmagt = HT.harmonicFreqScaling(hfreq, hmag, freqScaling,
                                       freqStretching, timbrePreservation, fs)

y, yh = HPR.hprModelSynth(hfreqt, hmagt, np.array([]), xr, Ns, H, fs)
Пример #34
0
import numpy as np
import time, os, sys
from scipy.signal import hamming, resample
import matplotlib.pyplot as plt

sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../../software/models/'))
sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../../software/transformations/'))

import dftModel as DFT
import utilFunctions as UF
import stftTransformations as STFTT
import stochasticModel as STOC
import math
import stft as STFT

(fs, x1) = UF.wavread('../../../sounds/orchestra.wav')
(fs, x2) = UF.wavread('../../../sounds/speech-male.wav')
w1 = np.hamming(1024)
N1 = 1024
H1 = 256
w2 = np.hamming(1024)
N2 = 1024
smoothf = .2
balancef = 0.5
y = STFTT.stftMorph(x1, x2, fs, w1, N1, w2, N2, H1, smoothf, balancef)
L = int(x1.size/H1)
H2 = int(x2.size/L)
mX2 = STOC.stochasticModelAnal(x2,H2,H2*2, smoothf)
mX,pX = STFT.stftAnal(x1, fs, w1, N1, H1)
mY,pY = STFT.stftAnal(y, fs, w1, N1, H1)
maxplotfreq = 10000.0
Пример #35
0
def estimateInharmonicity(inputFile='../../sounds/piano.wav',
                          t1=0.1,
                          t2=0.5,
                          window='hamming',
                          M=2048,
                          N=2048,
                          H=128,
                          f0et=5.0,
                          t=-90,
                          minf0=130,
                          maxf0=180,
                          nH=10):
    """
    Function to estimate the extent of inharmonicity present in a sound
    Input:
        inputFile (string): wav file including the path
        t1 (float): start time of the segment considered for computing inharmonicity
        t2 (float): end time of the segment considered for computing inharmonicity
        window (string): analysis window
        M (integer): window size used for computing f0 contour
        N (integer): FFT size used for computing f0 contour
        H (integer): Hop size used for computing f0 contour
        f0et (float): error threshold used for the f0 computation
        t (float): magnitude threshold in dB used in spectral peak picking
        minf0 (float): minimum fundamental frequency in Hz
        maxf0 (float): maximum fundamental frequency in Hz
        nH (integer): number of integers considered for computing inharmonicity
    Output:
        meanInharm (float or np.float): mean inharmonicity over all the frames between the time interval 
                                        t1 and t2. 
    """
    # 0. Read the audio file and obtain an analysis window

    fs, x = UF.wavread(inputFile)

    w = get_window(window, M)

    # 1. Use harmonic model to compute the harmonic frequencies and magnitudes
    xhfreq, xhmag, xhphase = HM.harmonicModelAnal(x,
                                                  fs,
                                                  w,
                                                  N,
                                                  H,
                                                  t,
                                                  nH,
                                                  minf0,
                                                  maxf0,
                                                  f0et,
                                                  harmDevSlope=0.01,
                                                  minSineDur=0.0)

    # 2. Extract the time segment in which you need to compute the inharmonicity.

    interval_start = int(math.ceil(t1 * fs / float(H)))
    interval_end = int(math.ceil(t2 * fs / float(H)))

    # 3. Compute the mean inharmonicity of the segment

    # Refer to the pdf for the formulas used

    f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et)

    f0_slice = f0[interval_start:interval_end]
    sliced = xhfreq[interval_start:interval_end]
    inharmon = np.zeros(sliced.size)

    for index, arr in enumerate(sliced):
        tmp_sum = 0

        for j in range(1, arr.size):
            val = j + 1
            tmp_sum += np.abs(arr[j] - val * f0_slice[index]) / float(val)

        inharmon[index] = tmp_sum * (1 / float(nH))

    mean_inharmon = sum(inharmon) / (interval_end - interval_start + 1)

    return mean_inharmon
Пример #36
0
def sineModelAnalEnhanced(
        inputFile='../../sounds/sines-440-602-transient.wav'):
    """
    Input:
           inputFile (string): wav file including the path
    Output:
           tStamps: A Kx1 numpy array of time stamps at which the frequency components were estimated
           tfreq: A Kx2 numpy array of frequency values, one column per component
    """
    phaseDevThres = 1e-2  # Allowed deviation in phase
    M = 2047  # window size
    N = 4096  # FFT size
    t = -80  # threshold in negative dB
    H = 128  # hop-size
    window = 'blackman'  # window type
    fs, x = UF.wavread(inputFile)  # Read input file
    w = get_window(window, M)  # Get the window
    hM1 = int(np.floor(
        (w.size + 1) / 2))  # half analysis window size by rounding
    hM2 = int(np.floor(w.size / 2))  # half analysis window size by floor
    x = np.append(
        np.zeros(hM2),
        x)  # add zeros at beginning to center first window at sample 0
    x = np.append(x,
                  np.zeros(hM2))  # add zeros at the end to analyze last sample
    pin = hM1  # initialize sound pointer in middle of analysis window
    pend = x.size - hM1  # last sample to start a frame
    tStamps = np.arange(pin, pend, H) / float(fs)  # Generate time stamps
    w = w / sum(w)  # normalize analysis window
    tfreq = np.array([])
    while pin < pend:  # while input sound pointer is within sound
        x1 = x[pin - hM1:pin + hM2]  # select frame
        mX, pX = SM.DFT.dftAnal(x1, w, N)  # compute dft
        ploc = UF.peakDetection(mX, t)  # detect locations of peaks
        ###### CODE DIFFERENT FROM sineModelAnal() #########
        # Phase based mainlobe tracking
        plocSelMask = np.zeros(len(ploc))
        for pindex, p in enumerate(ploc):
            if p > 2 and p < (
                    len(pX) - 2
            ):  # Peaks at either end of the spectrum are not processed
                if selectFlatPhasePeak(
                        pX, p, phaseDevThres
                ):  # Select the peak if the phase spectrum around the peak is flat
                    plocSelMask[pindex] = 1
            else:
                plocSelMask[pindex] = 1
        plocSel = ploc[plocSelMask.nonzero()[0]]  # Select the ones chosen
        if len(plocSel
               ) != 2:  # Ignoring frames that don't return two selected peaks
            ipfreq = [0.0, 0.0]
        else:
            iploc, ipmag, ipphase = UF.peakInterp(
                mX, pX, plocSel
            )  # Only selected peaks to refine peak values by interpolation
            ipfreq = fs * iploc / float(N)  # convert peak locations to Hertz
        ###### CODE DIFFERENT FROM sineModelAnal() #########
        if pin == hM1:  # if first frame initialize output frequency track
            tfreq = ipfreq
        else:  # rest of frames append values to frequency track
            tfreq = np.vstack((tfreq, ipfreq))
        pin += H
    # Plot the estimated frequency tracks
    mX, pX = stft.stftAnal(x, w, N, H)
    maxplotfreq = 1500.0
    binFreq = fs * np.arange(N * maxplotfreq / fs) / N
    numFrames = int(mX[:, 0].size)
    frmTime = H * np.arange(numFrames) / float(fs)
    plt.pcolormesh(frmTime,
                   binFreq,
                   np.transpose(mX[:, :N * maxplotfreq / fs + 1]),
                   cmap='hot_r')
    plt.plot(tStamps, tfreq[:, 0], color='y', linewidth=2.0)
    plt.plot(tStamps, tfreq[:, 1], color='c', linewidth=2.0)
    plt.legend(('Estimated f1', 'Estimated f2'))
    plt.xlabel('Time (s)')
    plt.ylabel('Frequency (Hz)')
    plt.autoscale(tight=True)
    plt.show()
    return tStamps, tfreq
Пример #37
0
import numpy as np
import matplotlib.pyplot as plt
import sys, os, time

sys.path.append(
    os.path.join(os.path.dirname(os.path.realpath(__file__)),
                 '../../../software/models/'))

import stft as STFT
import sineModel as SM
import utilFunctions as UF

(fs, x) = UF.wavread(
    os.path.join(os.path.dirname(os.path.realpath(__file__)),
                 '../../../sounds/flute-A4.wav'))
w = np.blackman(601)
N = 1024
H = 150
t = -80
minSineDur = .1
maxnSines = 150
mX, pX = STFT.stftAnal(x, w, N, H)
tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines,
                                       minSineDur)

plt.figure(1, figsize=(9.5, 5))
maxplotfreq = 5000.0
maxplotbin = int(N * maxplotfreq / fs)
numFrames = int(mX[:, 0].size)
frmTime = H * np.arange(numFrames) / float(fs)
binFreq = np.arange(maxplotbin + 1) * float(fs) / N
def estimateInharmonicity(inputFile='../../sounds/piano.wav',
                          t1=0.1,
                          t2=0.5,
                          window='hamming',
                          M=2048,
                          N=2048,
                          H=128,
                          f0et=5.0,
                          t=-90,
                          minf0=130,
                          maxf0=180,
                          nH=10):
    """
    Function to estimate the extent of inharmonicity present in a sound
    Input:
        inputFile (string): wav file including the path
        t1 (float): start time of the segment considered for computing inharmonicity
        t2 (float): end time of the segment considered for computing inharmonicity
        window (string): analysis window
        M (integer): window size used for computing f0 contour
        N (integer): FFT size used for computing f0 contour
        H (integer): Hop size used for computing f0 contour
        f0et (float): error threshold used for the f0 computation
        t (float): magnitude threshold in dB used in spectral peak picking
        minf0 (float): minimum fundamental frequency in Hz
        maxf0 (float): maximum fundamental frequency in Hz
        nH (integer): number of integers considered for computing inharmonicity
    Output:
        meanInharm (float or np.float): mean inharmonicity over all the frames between the time interval 
                                        t1 and t2. 
	"""
    # 0. Read the audio file and obtain an analysis window
    fs, x = UF.wavread(inputFile)  # reading inputFile
    w = get_window(window, M)  # obtaining analysis window

    # 1. Use harmonic model to compute the harmonic frequencies and magnitudes
    xhfreq, xhmag, xhphase = HM.harmonicModelAnal(x,
                                                  fs,
                                                  w,
                                                  N,
                                                  H,
                                                  t,
                                                  nH,
                                                  minf0,
                                                  maxf0,
                                                  f0et,
                                                  harmDevSlope=0.01,
                                                  minSineDur=0.0)

    # 2. Extract the time segment in which you need to compute the inharmonicity.
    lt1 = int(np.ceil(fs * t1 / float(H)))
    lt2 = int(np.floor(fs * t2 / float(H)))
    xSeg = xhfreq[lt1:lt2]

    # 3. Compute the mean inharmonicity of the segment
    I = np.zeros(xSeg.shape[0])
    for l in range(0, xSeg.shape[0]):
        nonZeroFreqs = np.where(xSeg[l, :] > 0.0)[0]
        nonZeroFreqs = np.delete(nonZeroFreqs, 0)
        for r in nonZeroFreqs:
            I[l] += (np.abs(xSeg[l, r] - (r + 1) * xSeg[l, 0])) / float(r + 1)
        #I[l] = 1.0/nonZeroFreqs.size * I[l]
        I[l] = 1.0 / nH * I[l]

    meanInharm = 1.0 / (lt2 - lt1) * np.sum(I)

    return meanInharm
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import hamming, triang, blackmanharris
import math
import sys, os, functools, time
sys.path.append(
    os.path.join(os.path.dirname(os.path.realpath(__file__)),
                 '../../../software/models/'))

import dftModel as DFT
import utilFunctions as UF

(fs, x) = UF.wavread('../../../sounds/oboe-A4.wav')
N = 512 * 2
M = 511
t = -60
w = np.hamming(M)
start = .8 * fs
hN = N / 2
hM = (M + 1) / 2

x1 = x[start:start + M]
mX, pX = DFT.dftAnal(x1, w, N)
ploc = UF.peakDetection(mX, t)
iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc)
pmag = mX[ploc]
freqaxis = fs * np.arange(mX.size) / float(N)

plt.figure(1, figsize=(9, 6))
plt.subplot(2, 1, 1)
plt.plot(freqaxis, mX, 'r', lw=1.5)
Пример #40
0
def main(inputFile='../../sounds/bendir.wav',
         window='hamming',
         M=2001,
         N=2048,
         t=-80,
         minSineDur=0.02,
         maxnSines=150,
         freqDevOffset=10,
         freqDevSlope=0.001):
    """
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)
	M: analysis window size
	N: fft size (power of two, bigger or equal than M)
	t: magnitude threshold of spectral peaks
	minSineDur: minimum duration of sinusoidal tracks
	maxnSines: maximum number of parallel sinusoids
	freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0
	freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation
	"""

    # size of fft used in synthesis
    Ns = 512

    # hop size (has to be 1/4 of Ns)
    H = 128

    # read input sound
    (fs, x) = UF.wavread(inputFile)

    # compute analysis window
    w = get_window(window, M)

    # perform sinusoidal plus residual analysis
    tfreq, tmag, tphase, xr = SPR.sprModelAnal(x, fs, w, N, H, t, minSineDur,
                                               maxnSines, freqDevOffset,
                                               freqDevSlope)

    # compute spectrogram of residual
    mXr, pXr = STFT.stftAnal(xr, w, N, H)

    # sum sinusoids and residual
    y, ys = SPR.sprModelSynth(tfreq, tmag, tphase, xr, Ns, H, fs)

    # output sound file (monophonic with sampling rate of 44100)
    outputFileSines = 'output_sounds/' + os.path.basename(
        inputFile)[:-4] + '_sprModel_sines.wav'
    outputFileResidual = 'output_sounds/' + os.path.basename(
        inputFile)[:-4] + '_sprModel_residual.wav'
    outputFile = 'output_sounds/' + os.path.basename(
        inputFile)[:-4] + '_sprModel.wav'

    # write sounds files for sinusoidal, residual, and the sum
    UF.wavwrite(ys, fs, outputFileSines)
    UF.wavwrite(xr, fs, outputFileResidual)
    UF.wavwrite(y, fs, outputFile)

    # create figure to show plots
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 5000.0

    # plot the input sound
    plt.subplot(3, 1, 1)
    plt.plot(np.arange(x.size) / float(fs), x)
    plt.axis([0, x.size / float(fs), min(x), max(x)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    # plot the magnitude spectrogram of residual
    plt.subplot(3, 1, 2)
    maxplotbin = int(N * maxplotfreq / fs)
    numFrames = int(mXr[:, 0].size)
    frmTime = H * np.arange(numFrames) / float(fs)
    binFreq = np.arange(maxplotbin + 1) * float(fs) / N
    plt.pcolormesh(frmTime, binFreq, np.transpose(mXr[:, :maxplotbin + 1]))
    plt.autoscale(tight=True)

    # plot the sinusoidal frequencies on top of the residual spectrogram
    if (tfreq.shape[1] > 0):
        tracks = tfreq * np.less(tfreq, maxplotfreq)
        tracks[tracks <= 0] = np.nan
        plt.plot(frmTime, tracks, color='k')
        plt.title('sinusoidal tracks + residual spectrogram')
        plt.autoscale(tight=True)

    # plot the output sound
    plt.subplot(3, 1, 3)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()
    plt.ion()
    plt.show()
Пример #41
0
import numpy as np
import sys, os, math
from scipy.fftpack import fft

sys.path.append(
    os.path.join(os.path.dirname(os.path.realpath(__file__)),
                 '../software/models/'))

import utilFunctions as UF

M = 501
hM1 = int(math.floor((M + 1) / 2))
hM2 = int(math.floor(M / 2))

(fs, x) = UF.wavread('../sounds/soprano-E4.wav')
x1 = x[5000:5000 + M] * np.hamming(M)

N = 1024
fftbuffer = np.zeros(N)
fftbuffer[:hM1] = x1[hM2:]
fftbuffer[N - hM2:] = x1[:hM2]

X = fft(fftbuffer)
mX = 20 * np.log10(abs(X))
pX = np.unwrap(np.angle(X))
Пример #42
0
def main(inputFile='../../sounds/piano.wav',
         window='blackman',
         M=511,
         N=1024,
         time=.2):
    """
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (choice of rectangular, hanning, hamming, blackman, blackmanharris)
	M: analysis window size (odd integer value)
	N: fft size (power of two, bigger or equal than than M)
	time: time  to start analysis (in seconds)
	"""

    # read input sound (monophonic with sampling rate of 44100)
    fs, x = UF.wavread(inputFile)

    # compute analysis window
    w = get_window(window, M)

    # get a fragment of the input sound of size M
    sample = int(time * fs)
    if (sample + M >= x.size
            or sample < 0):  # raise error if time outside of sound
        raise ValueError("Time outside sound boundaries")
    x1 = x[sample:sample + M]

    # compute the dft of the sound fragment
    mX, pX = DFT.dftAnal(x1, w, N)

    # compute the inverse dft of the spectrum
    y = DFT.dftSynth(mX, pX, w.size) * sum(w)

    # create figure
    plt.figure(figsize=(12, 9))

    # plot the sound fragment
    plt.subplot(4, 1, 1)
    plt.plot(time + np.arange(M) / float(fs), x1)
    plt.axis([time, time + M / float(fs), min(x1), max(x1)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    # plot the magnitude spectrum
    plt.subplot(4, 1, 2)
    plt.plot(float(fs) * np.arange(mX.size) / float(N), mX, 'r')
    plt.axis([0, fs / 2.0, min(mX), max(mX)])
    plt.title('magnitude spectrum: mX')
    plt.ylabel('amplitude (dB)')
    plt.xlabel('frequency (Hz)')

    # plot the phase spectrum
    plt.subplot(4, 1, 3)
    plt.plot(float(fs) * np.arange(pX.size) / float(N), pX, 'c')
    plt.axis([0, fs / 2.0, min(pX), max(pX)])
    plt.title('phase spectrum: pX')
    plt.ylabel('phase (radians)')
    plt.xlabel('frequency (Hz)')

    # plot the sound resulting from the inverse dft
    plt.subplot(4, 1, 4)
    plt.plot(time + np.arange(M) / float(fs), y)
    plt.axis([time, time + M / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()
    plt.ion()
    plt.show()
import numpy as np
import time, os, sys
import matplotlib.pyplot as plt
sys.path.append(
    os.path.join(os.path.dirname(os.path.realpath(__file__)),
                 '../../../software/models/'))
sys.path.append(
    os.path.join(os.path.dirname(os.path.realpath(__file__)),
                 '../../../software/transformations/'))
import utilFunctions as UF
import stftTransformations as STFTT
import stft as STFT

(fs, x) = UF.wavread('../../../sounds/orchestra.wav')
w = np.hamming(2048)
N = 2048
H = 512
# design a band stop filter using a hanning window
startBin = int(N * 500.0 / fs)
nBins = int(N * 2000.0 / fs)
bandpass = (np.hanning(nBins) * 65.0) - 60
filt = np.zeros(N / 2) - 60
filt[startBin:startBin + nBins] = bandpass
y = STFTT.stftFiltering(x, fs, w, N, H, filt)
mX, pX = STFT.stftAnal(x, fs, w, N, H)
mY, pY = STFT.stftAnal(y, fs, w, N, H)

plt.figure(1, figsize=(12, 9))
plt.subplot(311)
numFrames = int(mX[:, 0].size)
frmTime = H * np.arange(numFrames) / float(fs)
Пример #44
0
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import get_window
import sys, os
sys.path.append(
    os.path.join(os.path.dirname(os.path.realpath(__file__)),
                 "/home/pvardanis/sms-tools/software/models/"))
import dftModel as DFT
import utilFunctions as UF

(fs, x) = UF.wavread("/home/pvardanis/sms-tools/sounds/sine-440.wav")
M = 501
N = 2048
t = -20  #threshold
w = get_window('hamming', M)
x1 = x[int(.8 * fs):int(.8 * fs + M)]
mX, pX = DFT.dftAnal(x1, w, N)
ploc = UF.peakDetection(mX, t)
iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc)
pmag = mX[ploc]

freqaxis = fs * np.arange(N / 2 + 1) / float(N)
plt.plot(freqaxis, mX)
plt.plot(fs * iploc / float(N), ipmag, marker='x', linestyle='')

plt.show()
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import hamming, triang, blackmanharris
import math
from scipy.fftpack import fft, ifft, fftshift
import sys, os, functools, time

sys.path.append(
    os.path.join(os.path.dirname(os.path.realpath(__file__)),
                 '../../../software/models/'))
import dftModel as DFT
import utilFunctions as UF
import harmonicModel as HM

(fs, x) = UF.wavread('../../../sounds/flute-A4.wav')
pos = int(.8 * fs)
M = 601
hM1 = (M + 1) // 2
hM2 = M // 2
w = np.hamming(M)
N = 1024
t = -100
nH = 40
minf0 = 420
maxf0 = 460
f0et = 5
maxnpeaksTwm = 5
minSineDur = .1
harmDevSlope = 0.01
Ns = 512
H = Ns // 4
Пример #46
0
def extractHarmSpec(inputFile='../../sounds/piano.wav',
                    window='hamming',
                    M=1024,
                    N=1024,
                    H=512):
    """
	analysis/synthesis using the STFT
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (choice of rectangular, hanning, hamming, blackman, blackmanharris)	
	M: analysis window size 
	N: fft size (power of two, bigger or equal than M)  
	H: hop size (at least 1/2 of analysis window size to have good overlap-add)               
	"""

    # read input sound (monophonic with sampling rate of 44100)
    fs, x = UF.wavread(inputFile)

    # compute analysis window
    w = get_window(window, M)

    # compute the magnitude and phase spectrogram
    mX, pX = STFT.stftAnal(x, fs, w, N, H)

    # perform the inverse stft
    y = STFT.stftSynth(mX, pX, M, H)

    # output sound file (monophonic with sampling rate of 44100)
    outputFile = 'output_sounds/' + os.path.basename(
        inputFile)[:-4] + '_stft.wav'

    # write the sound resulting from the inverse stft
    UF.wavwrite(y, fs, outputFile)

    # create figure to plot
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 5000.0

    # plot the input sound
    plt.subplot(4, 1, 1)
    plt.plot(np.arange(x.size) / float(fs), x)
    plt.axis([0, x.size / float(fs), min(x), max(x)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    # plot magnitude spectrogram
    plt.subplot(4, 1, 2)
    numFrames = int(mX[:, 0].size)
    frmTime = H * np.arange(numFrames) / float(fs)
    binFreq = fs * np.arange(N * maxplotfreq / fs) / N
    plt.pcolormesh(frmTime, binFreq,
                   np.transpose(mX[:, :N * maxplotfreq / fs + 1]))
    plt.xlabel('time (sec)')
    plt.ylabel('frequency (Hz)')
    plt.title('magnitude spectrogram')
    plt.autoscale(tight=True)

    # plot the phase spectrogram
    plt.subplot(4, 1, 3)
    numFrames = int(pX[:, 0].size)
    frmTime = H * np.arange(numFrames) / float(fs)
    binFreq = fs * np.arange(N * maxplotfreq / fs) / N
    plt.pcolormesh(
        frmTime, binFreq,
        np.transpose(np.diff(pX[:, :N * maxplotfreq / fs + 1], axis=1)))
    plt.xlabel('time (sec)')
    plt.ylabel('frequency (Hz)')
    plt.title('phase spectrogram (derivative)')
    plt.autoscale(tight=True)

    # plot the output sound
    plt.subplot(4, 1, 4)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()
    plt.show()
Пример #47
0
def main(inputFile1='../../sounds/ocean.wav',
         inputFile2='../../sounds/speech-male.wav',
         window1='hamming',
         window2='hamming',
         M1=1024,
         M2=1024,
         N1=1024,
         N2=1024,
         H1=256,
         smoothf=.5,
         balancef=0.2):
    """
	Function to perform a morph between two sounds
	inputFile1: name of input sound file to be used as source
	inputFile2: name of input sound file to be used as filter
	window1 and window2: windows for both files
	M1 and M2: window sizes for both files
	N1 and N2: fft sizes for both sounds
	H1: hop size for sound 1 (the one for sound 2 is computed automatically)
	smoothf: smoothing factor to be applyed to magnitude spectrum of sound 2 before morphing
	balancef: balance factor between booth sounds, 0 is sound 1 and 1 is sound 2
	"""

    # read input sounds
    (fs, x1) = UF.wavread(inputFile1)
    (fs, x2) = UF.wavread(inputFile2)

    # compute analysis windows
    w1 = get_window(window1, M1)
    w2 = get_window(window2, M2)

    # perform morphing
    y = STFTT.stftMorph(x1, x2, fs, w1, N1, w2, N2, H1, smoothf, balancef)

    # compute the magnitude and phase spectrogram of input sound (for plotting)
    mX1, pX1 = STFT.stftAnal(x1, w1, N1, H1)

    # compute the magnitude and phase spectrogram of output sound (for plotting)
    mY, pY = STFT.stftAnal(y, w1, N1, H1)

    # write output sound
    outputFile = 'output_sounds/' + os.path.basename(
        inputFile1)[:-4] + '_stftMorph.wav'
    UF.wavwrite(y, fs, outputFile)

    # create figure to plot
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 10000.0

    # plot sound 1
    plt.subplot(4, 1, 1)
    plt.plot(np.arange(x1.size) / float(fs), x1)
    plt.axis([0, x1.size / float(fs), min(x1), max(x1)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    # plot magnitude spectrogram of sound 1
    plt.subplot(4, 1, 2)
    numFrames = int(mX1[:, 0].size)
    frmTime = H1 * np.arange(numFrames) / float(fs)
    binFreq = fs * np.arange(N1 * maxplotfreq / fs) / N1
    plt.pcolormesh(frmTime, binFreq,
                   np.transpose(mX1[:, :int(N1 * maxplotfreq / fs) + 1]))
    plt.xlabel('time (sec)')
    plt.ylabel('frequency (Hz)')
    plt.title('magnitude spectrogram of x')
    plt.autoscale(tight=True)

    # plot magnitude spectrogram of morphed sound
    plt.subplot(4, 1, 3)
    numFrames = int(mY[:, 0].size)
    frmTime = H1 * np.arange(numFrames) / float(fs)
    binFreq = fs * np.arange(N1 * maxplotfreq / fs) / N1
    plt.pcolormesh(frmTime, binFreq,
                   np.transpose(mY[:, :int(N1 * maxplotfreq / fs) + 1]))
    plt.xlabel('time (sec)')
    plt.ylabel('frequency (Hz)')
    plt.title('magnitude spectrogram of y')
    plt.autoscale(tight=True)

    # plot the morphed sound
    plt.subplot(4, 1, 4)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()
    plt.show()
Пример #48
0
def computeODF(inputFile, window, M, N, H):
    """
    Inputs:
            inputFile (string): input sound file (monophonic with sampling rate of 44100)
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, 
                blackman, blackmanharris)
            M (integer): analysis window size (odd integer value)
            N (integer): fft size (power of two, bigger or equal than than M)
            H (integer): hop size for the STFT computation
    Output:
            The function should return a numpy array with two columns, where the first column is the ODF 
            computed on the low frequency band and the second column is the ODF computed on the high 
            frequency band.
            ODF[:,0]: ODF computed in band 0 < f < 3000 Hz 
            ODF[:,1]: ODF computed in band 3000 < f < 10000 Hz
    """

    ### your code here

    # read input sound (monophonic with sampling rate of 44100)
    fs, x = UF.wavread(inputFile)
    w = get_window(window, M)

    mX, pX = stft.stftAnal(x, w, N, H)

    ## bin = (f * N) / fs
    Bin0hz = 0
    BinUp3000hz = int(np.ceil((3000.0 * N) / fs))
    BinTo3000hz = int(np.floor((3000.0 * N) / fs))
    Bin10000hz = int(np.ceil((10000.0 * N) / fs))

    Bins0hzbetween3000hz = np.arange(Bin0hz + 1, BinUp3000hz)
    Bins3000hzbetween10000hz = np.arange(BinTo3000hz + 1, Bin10000hz)

    nFrames = mX[:, 0].size  # number of frames
    mXlow = np.zeros(
        Bins0hzbetween3000hz.size)  # initialize low frecuency array
    mXhigh = np.zeros(
        Bins3000hzbetween10000hz.size)  # initialize high frecuency array
    engEnv = np.zeros((nFrames, 2))  # create energy envelopes array
    ODF = np.zeros((nFrames, 2))  # create onset detection array

    for i in range(nFrames):  # iterate over all frames
        mXlow = np.take(mX[i, :],
                        Bins0hzbetween3000hz)  # take only low frecuency bins
        mXhigh = np.take(
            mX[i, :],
            Bins3000hzbetween10000hz)  # take only high frecuency bins

        mXlowLinear = 10.0**(mXlow / 20)  # transform db to linear
        Elow = sum(mXlowLinear**2)  # compute energy
        Edblow = 10 * np.log10(Elow)  # transform linear to db
        engEnv[i, 0] = Edblow  # assign energy to right frame

        mXhighLinear = 10.0**(mXhigh / 20)  # transform db to linear
        Ehigh = sum(mXhighLinear**2)  # compute energy
        Edbhigh = 10 * np.log10(Ehigh)  # transform linear to db
        engEnv[i, 1] = Edbhigh  # assign energy to right frame

        if i > 0:
            ODFLow = engEnv[i, 0] - engEnv[i - 1, 0]
            ODFHigh = engEnv[i, 1] - engEnv[i - 1, 1]
            ODF[i, 0] = ODFLow if ODFLow > 0.0 else 0.0
            ODF[i, 1] = ODFHigh if ODFHigh > 0.0 else 0.0

    #----plot the spectrum and low/high frecuencies energy

    plt.figure(1, figsize=(9.5, 6))

    plt.subplot(211)
    numFrames = int(mX[:, 0].size)
    frmTime = H * np.arange(numFrames) / float(fs)
    binFreq = np.arange(N / 2 + 1) * float(fs) / N
    plt.pcolormesh(frmTime, binFreq, np.transpose(mX))
    plt.title('mX (' + inputFile + '), M=' + str(M) + ', N=' + str(N) +
              ', H=' + str(H) + '')
    plt.autoscale(tight=True)

    plt.subplot(212)
    numFrames = int(mX[:, 0].size)
    frmTime = H * np.arange(numFrames) / float(fs)
    binFreq = np.arange(N / 2 + 1) * float(fs) / N
    #plt.plot(frmTime,np.transpose(ODF[:,0]),label='ODF low')
    #plt.plot(frmTime,np.transpose(ODF[:,1]),label='ODF high')
    plt.bar(frmTime,
            np.transpose(ODF[:, 0]),
            width=frmTime / numFrames,
            label='ODF low',
            color='blue')
    plt.bar(frmTime,
            np.transpose(ODF[:, 1]),
            width=frmTime / numFrames,
            label='ODF high',
            color='green')

    plt.title('ODF low and high (' + inputFile + '), M=' + str(M) + ', N=' +
              str(N) + ', H=' + str(H) + '')
    plt.autoscale(tight=True)

    plt.tight_layout()
    plt.legend()
    plt.grid(True)
    #plt.savefig('spectrogram.png')
    plt.show()

    return ODF
Пример #49
0
def segment_stable_notes_monophonic(
        inputFile='../../sounds/sax-phrase-short.wav',
        stdThsld=10,
        minNoteDur=0.1,
        winStable=3,
        window='hamming',
        M=1024,
        N=2048,
        H=256,
        f0et=5.0,
        t=-100,
        minf0=310,
        maxf0=650):
    """
    Function to segment the stable note regions in an audio signal
    Input:
        inputFile (string): wav file including the path
        stdThsld (float): threshold for detecting stable regions in the f0 contour (in cents)
        minNoteDur (float): minimum allowed segment length (note duration)  
        winStable (integer): number of samples used for computing standard deviation
        window (string): analysis window
        M (integer): window size used for computing f0 contour
        N (integer): FFT size used for computing f0 contour
        H (integer): Hop size used for computing f0 contour
        f0et (float): error threshold used for the f0 computation
        t (float): magnitude threshold in dB used in spectral peak picking
        minf0 (float): minimum fundamental frequency in Hz
        maxf0 (float): maximum fundamental frequency in Hz
    Output:
        segments (np.ndarray): Numpy array containing starting and ending frame indexes of every segment.
    """

    fs, x = UF.wavread(inputFile)  #reading inputFile
    w = get_window(window, M)  #obtaining analysis window
    f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et)  #estimating F0

    # your code here

    # 1. convert f0 values from Hz to Cents (as described in pdf document)

    f0[f0 < eps] = eps
    tuning = 55.0  # A4=440 Hz -> tuning=A1=55 Hz
    cent_f0 = 1200 * np.log2(f0 / tuning)

    # 2. create an array containing standard deviation of last winStable samples

    std_winStable = [
        np.std(cent_f0[index - winStable:index])
        for index in range(winStable, cent_f0.size + 1)
    ]
    std_winStable = np.array(std_winStable)

    # 3. apply threshold on standard deviation values to find indexes of the stable points in melody

    std_below_threshold = np.where(std_winStable < stdThsld)[0]

    # 4. create segments of continuous stable points such that consecutive stable points belong to same segment

    std_contiguous = std_below_threshold[1:] - std_below_threshold[:-1]
    contiguous_index = np.where(std_contiguous == 1)
    initial = [
        x for x in contiguous_index[0]
        if x - 1 not in contiguous_index[0] and x + 1 in contiguous_index[0]
    ]
    final = [
        x for x in contiguous_index[0]
        if x - 1 in contiguous_index[0] and x + 1 not in contiguous_index[0]
    ]
    segments = list(zip(initial, final))

    # 5. apply segment filtering, i.e. remove segments with are < minNoteDur in length

    samples_minNoteDur = int(minNoteDur * fs / H)
    segments = [(x, y) for x, y in segments if y - x >= samples_minNoteDur]
    segments = np.array(segments)

    # plotSpectogramF0Segments(x, fs, w, N, H, f0, segments)  # Plot spectrogram and F0 if needed

    return segments
Пример #50
0
def stft_onset(inputFile, window, M, N, H, freq_thresholds, show_plot=False, debug=False):
    """
    Inputs:
            inputFile (string): input sound file (monophonic with sampling rate of 44100)
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming,
                blackman, blackmanharris)
            M (integer): analysis window size (odd integer value)
            N (integer): fft size (power of two, bigger or equal than than M)
            H (integer): hop size for the STFT computation
            freq_thresholds (list): Contains frequency tuples (initial, end) of the
            different chunks, these frequencies are excluded from the chunk.
            show_plot (boolean): enable/disable spectrogram, energy envelope and onset function visualization
            debug (boolean): enable/disable debug messages during execution

    Output:
            The function should return a numpy array with two columns, where the first column is the ODF
            computed on the low frequency band and the second column is the ODF computed on the high
            frequency band.
            ODF[:,0]: ODF computed in band 0 < f < 3000 Hz
            ODF[:,1]: ODF computed in band 3000 < f < 10000 Hz
    """

    # your code here

    fs, x = UF.wavread(inputFile)
    w = get_window(window, M)
    dBX, pX = stft.stftAnal(x, w, N, H)

    X = 10**(dBX/20.0)
    dft_size = X[0, :].size
    if debug:
        print("Spectrogram shape = {}".format(X.shape))

    band_index = dsp_toolbox.slice_spectrum(X, freq_thresholds, fs, N, dft_size)
    if debug:
        print("band_index elements = {}".format(len(band_index)))

    onset_result = np.array([])
    energy_result = np.array([])
    for band in band_index:
        energy = dsp_toolbox.energy(X[:, band], axis=-1)
        energy[energy < eps] = eps
        db_energy = 10*np.log10(energy)
        odf_band = db_energy[1:] - db_energy[:-1]
        odf_band[odf_band < 0] = 0  # Half wave rectification
        odf_band = np.insert(odf_band, 0, 0)
        # add extra sample in the beginning to match with energy envelope size
        db_energy = np.reshape(db_energy, (db_energy.size, 1))
        odf_band = np.reshape(odf_band, (odf_band.size, 1))
        # print("db_energy shape = {}".format(db_energy.shape))
        # print("odf_band shape = {}".format(odf_band.shape))
        energy_result = db_energy if energy_result.size == 0 else np.concatenate((energy_result, db_energy), axis=0)
        onset_result = odf_band if onset_result.size == 0 else np.concatenate((onset_result, odf_band), axis=0)

    if debug:
        print("energy_result shape = {}".format(energy_result.shape))
        print("onset_result shape = {}".format(onset_result.shape))

    if show_plot:
        plt.figure(1, figsize=(9.5, 6))

        plt.subplot(311)
        numFrames = int(dBX[:, 0].size)
        frmTime = H*np.arange(numFrames)/float(fs)
        binFreq = np.arange(N/2+1)*float(fs)/N
        plt.pcolormesh(frmTime, binFreq, np.transpose(dBX))
        plt.title('Spectrogram')
        plt.autoscale(tight=True)

        plt.subplot(312)
        for i in range(np.size(energy_result, 1)):
            plt.plot(energy_result[:, i], label='band {}'.format(i))
        # plt.plot(frmTime, db_high_mx, label='high band')
        plt.title('Energy envelopes')
        plt.legend()
        plt.autoscale(tight=True)
        plt.show(block=False)

        plt.subplot(313)
        for i in range(np.size(onset_result, 1)):
            plt.plot(onset_result[:, i], label='band {}'.format(i))
        # plt.plot(frmTime, odf_high, label='high band')
        plt.title('Onset detection function')
        plt.legend()
        plt.autoscale(tight=True)
        plt.show(block=False)

    return onset_result
from scipy.interpolate import interp1d

sys.path.append(
    os.path.join(os.path.dirname(os.path.realpath(__file__)),
                 '../../../software/models/'))
sys.path.append(
    os.path.join(os.path.dirname(os.path.realpath(__file__)),
                 '../../../software/transformations/'))

import sineModel as SM
import stft as STFT
import sineModel as SM
import utilFunctions as UF
import sineTransformations as SMT

(fs, x) = UF.wavread('../../../sounds/mridangam.wav')
w = np.hamming(801)
N = 2048
t = -90
minSineDur = .005
maxnSines = 150
freqDevOffset = 20
freqDevSlope = 0.02
Ns = 512
H = Ns // 4
mX, pX = STFT.stftAnal(x, w, N, H)
tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines,
                                       minSineDur, freqDevOffset, freqDevSlope)
timeScale = np.array([
    .01, .0, .03, .03, .335, .4, .355, .42, .671, .8, .691, .82, .858, 1.2,
    .878, 1.22, 1.185, 1.6, 1.205, 1.62, 1.497, 2.0, 1.517, 2.02, 1.686, 2.4,
Пример #52
0
def main(inputFile='../../sounds/bendir.wav', window='hamming', M=2001, N=2048, t=-80, minSineDur=0.02,
	maxnSines=150, freqDevOffset=10, freqDevSlope=0.001, stocf=0.2):
	"""
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)
	M: analysis window size; N: fft size (power of two, bigger or equal than M)
	t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks
	maxnSines: maximum number of parallel sinusoids
	freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0
	freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation
	stocf: decimation factor used for the stochastic approximation
	"""

	# size of fft used in synthesis
	Ns = 512

	# hop size (has to be 1/4 of Ns)
	H = 128

	# read input sound
	(fs, x) = UF.wavread(inputFile)

	# compute analysis window
	w = get_window(window, M)

	# perform sinusoidal+sotchastic analysis
	tfreq, tmag, tphase, stocEnv = SPS.spsModelAnal(x, fs, w, N, H, t, minSineDur, maxnSines, freqDevOffset, freqDevSlope, stocf)

	# synthesize sinusoidal+stochastic model
	y, ys, yst = SPS.spsModelSynth(tfreq, tmag, tphase, stocEnv, Ns, H, fs)

	# output sound file (monophonic with sampling rate of 44100)
	outputFileSines = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_spsModel_sines.wav'
	outputFileStochastic = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_spsModel_stochastic.wav'
	outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_spsModel.wav'

	# write sounds files for sinusoidal, residual, and the sum
	UF.wavwrite(ys, fs, outputFileSines)
	UF.wavwrite(yst, fs, outputFileStochastic)
	UF.wavwrite(y, fs, outputFile)

	# create figure to plot
	plt.figure(figsize=(12, 9))

	# frequency range to plot
	maxplotfreq = 10000.0

	# plot the input sound
	plt.subplot(3,1,1)
	plt.plot(np.arange(x.size)/float(fs), x)
	plt.axis([0, x.size/float(fs), min(x), max(x)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')
	plt.title('input sound: x')

	plt.subplot(3,1,2)
	numFrames = int(stocEnv[:,0].size)
	sizeEnv = int(stocEnv[0,:].size)
	frmTime = H*np.arange(numFrames)/float(fs)
	binFreq = (.5*fs)*np.arange(sizeEnv*maxplotfreq/(.5*fs))/sizeEnv
	plt.pcolormesh(frmTime, binFreq, np.transpose(stocEnv[:,:sizeEnv*maxplotfreq/(.5*fs)+1]))
	plt.autoscale(tight=True)

	# plot sinusoidal frequencies on top of stochastic component
	if (tfreq.shape[1] > 0):
		sines = tfreq*np.less(tfreq,maxplotfreq)
		sines[sines==0] = np.nan
		numFrames = int(sines[:,0].size)
		frmTime = H*np.arange(numFrames)/float(fs)
		plt.plot(frmTime, sines, color='k', ms=3, alpha=1)
		plt.xlabel('time(s)')
		plt.ylabel('Frequency(Hz)')
		plt.autoscale(tight=True)
		plt.title('sinusoidal + stochastic spectrogram')

	# plot the output sound
	plt.subplot(3,1,3)
	plt.plot(np.arange(y.size)/float(fs), y)
	plt.axis([0, y.size/float(fs), min(y), max(y)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')
	plt.title('output sound: y')

	plt.tight_layout()
        plt.ion()
	plt.show()
def analysis(inputFile='../../sounds/vignesh.wav',
             window='blackman',
             M=1201,
             N=2048,
             t=-90,
             minSineDur=0.1,
             nH=100,
             minf0=130,
             maxf0=300,
             f0et=7,
             harmDevSlope=0.01):
    """
	Analyze a sound with the harmonic model
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)	
	M: analysis window size 
	N: fft size (power of two, bigger or equal than M)
	t: magnitude threshold of spectral peaks 
	minSineDur: minimum duration of sinusoidal tracks
	nH: maximum number of harmonics
	minf0: minimum fundamental frequency in sound
	maxf0: maximum fundamental frequency in sound
	f0et: maximum error accepted in f0 detection algorithm                                                                                            
	harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation
	returns inputFile: input file name; fs: sampling rate of input file, tfreq, 
						tmag: sinusoidal frequencies and magnitudes
	"""

    # size of fft used in synthesis
    Ns = 512

    # hop size (has to be 1/4 of Ns)
    H = 128

    # read input sound
    fs, x = UF.wavread(inputFile)

    # compute analysis window
    w = get_window(window, M)

    # compute the harmonic model of the whole sound
    hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0,
                                               maxf0, f0et, harmDevSlope,
                                               minSineDur)

    # synthesize the sines without original phases
    y = SM.sineModelSynth(hfreq, hmag, np.array([]), Ns, H, fs)

    # output sound file (monophonic with sampling rate of 44100)
    outputFile = 'output_sounds/' + os.path.basename(
        inputFile)[:-4] + '_harmonicModel.wav'

    # write the sound resulting from the inverse stft
    UF.wavwrite(y, fs, outputFile)

    # create figure to show plots
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 5000.0

    # plot the input sound
    plt.subplot(3, 1, 1)
    plt.plot(np.arange(x.size) / float(fs), x)
    plt.axis([0, x.size / float(fs), min(x), max(x)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    if (hfreq.shape[1] > 0):
        plt.subplot(3, 1, 2)
        tracks = np.copy(hfreq)
        numFrames = tracks.shape[0]
        frmTime = H * np.arange(numFrames) / float(fs)
        tracks[tracks <= 0] = np.nan
        plt.plot(frmTime, tracks)
        plt.axis([0, x.size / float(fs), 0, maxplotfreq])
        plt.title('frequencies of harmonic tracks')

    # plot the output sound
    plt.subplot(3, 1, 3)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()
    plt.show(block=False)

    return inputFile, fs, hfreq, hmag
Пример #54
0
import numpy as np
from scipy.signal import get_window, resample
from scipy.fftpack import fft
import sys, os, math
sys.path.append(
    os.path.join(os.path.dirname(os.path.realpath(__file__)),
                 '../../software/models/'))
import utilFunctions as UF
import dftModel as DFT

fs, x1 = UF.wavread('../../sounds/rain.wav')
fs, x2 = UF.wavread('../../sounds/soprano-E4.wav')

M = N = 512
w = get_window('hanning', M)
x1w = x1[10000:10000 + M] * w
x2w = x2[10000:10000 + M] * w

mX1, pX1 = DFT.dftAnal(x1w, w, N)
mX2, pX2 = DFT.dftAnal(x2w, w, N)

smoothf = 0.2
mX2smooth1 = resample(np.maximum(-200.0, mX2), mX2.size * smoothf)
mX2smooth2 = resample(mX2smooth1, N / 2 + 1)

balancef = 0.5
mY = balancef * mX2smooth2 + (1.0 - balancef) * mX1

y = DFT.dftSynth(mY, pX1, N) * sum(w)

import matplotlib.pyplot as plt
def estimateF0(inputFile='../../sounds/cello-double-2.wav'):
    """
    Function to estimate fundamental frequency (f0) in an audio signal. This function also plots the 
    f0 contour on the spectrogram and synthesize the f0 contour.
    Input:
        inputFile (string): wav file including the path
    Output:
        f0 (numpy array): array of the estimated fundamental frequency (f0) values
    """

    ### Change these analysis parameter values marked as XX
    window = 'blackman'
    M = 21000
    N = 8192 * 4
    f0et = 5
    t = -70
    minf0 = 140
    maxf0 = 210

    ### Do not modify the code below
    H = 256  #fix hop size

    fs, x = UF.wavread(inputFile)  #reading inputFile
    w = get_window(window, M)  #obtaining analysis window

    ### Method 1
    f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et)  #estimating F0
    startFrame = int(np.floor(0.5 * fs / H))
    endFrame = int(np.ceil(4.0 * fs / H))
    f0[:startFrame] = 0
    f0[endFrame:] = 0
    y = UF.sinewaveSynth(f0, 0.8, H, fs)
    UF.wavwrite(y, fs, 'synthF0Contour.wav')

    ## Code for plotting the f0 contour on top of the spectrogram
    # frequency range to plot
    maxplotfreq = 500.0
    fontSize = 16
    plot = 1

    fig = plt.figure()
    ax = fig.add_subplot(111)

    mX, pX = stft.stftAnal(x, w, N, H)  #using same params as used for analysis
    mX = np.transpose(mX[:, :int(N * (maxplotfreq / fs)) + 1])

    timeStamps = np.arange(mX.shape[1]) * H / float(fs)
    binFreqs = np.arange(mX.shape[0]) * fs / float(N)

    plt.pcolormesh(timeStamps, binFreqs, mX)
    plt.plot(timeStamps, f0, color='k', linewidth=1.5)
    plt.plot([0.5, 0.5], [0, maxplotfreq], color='b', linewidth=1.5)
    plt.plot([4.0, 4.0], [0, maxplotfreq], color='b', linewidth=1.5)

    plt.autoscale(tight=True)
    plt.ylabel('Frequency (Hz)', fontsize=fontSize)
    plt.xlabel('Time (s)', fontsize=fontSize)
    plt.legend(('f0', ))

    xLim = ax.get_xlim()
    yLim = ax.get_ylim()
    ax.set_aspect((xLim[1] - xLim[0]) / (2.0 * (yLim[1] - yLim[0])))

    if plot == 1:  #save the plot too!
        plt.autoscale(tight=True)
        plt.show()
    else:
        fig.tight_layout()
        fig.savefig('f0_over_Spectrogram.png', dpi=150, bbox_inches='tight')

    return f0
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import hamming, triang, blackmanharris
import math
import sys, os, functools, time
sys.path.append(
    os.path.join(os.path.dirname(os.path.realpath(__file__)),
                 '../../../software/models/'))

import dftModel as DFT
import utilFunctions as UF

(fs, x) = UF.wavread('../../../sounds/piano.wav')
M = 1100
w = np.blackman(M)
N = 2048
pin = .3 * fs
hM1 = int(math.floor((w.size + 1) / 2))
hM2 = int(math.floor(w.size / 2))
x1 = x[pin - hM1:pin + hM2]
mX, pX = DFT.dftAnal(x1, w, N)

plt.figure(1, figsize=(9, 7))
plt.subplot(311)
plt.plot(np.arange(-hM1, hM2) / float(fs), x1, lw=1.5)
plt.axis([-hM1 / float(fs), hM2 / float(fs), min(x1), max(x1)])
plt.title('x (piano.wav)')

plt.subplot(3, 1, 2)
plt.plot(fs * np.arange(mX.size) / float(N), mX, 'r', lw=1.5)
plt.axis([0, fs / 4, -90, max(mX)])
def main(inputFile='../../sounds/Dark Guitar String.wav',
         window='blackmanharris',
         M=3001,
         N=4096,
         t=-100,
         minSineDur=0.02,
         maxnSines=30,
         freqDevOffset=10,
         freqDevSlope=0.001):
    """
	Perform analysis/synthesis using the sinusoidal model
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)
	M: analysis window size; N: fft size (power of two, bigger or equal than M)
	t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks
	maxnSines: maximum number of parallel sinusoids
	freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0
	freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation
	"""

    # size of fft used in synthesis
    Ns = 512

    # hop size (has to be 1/4 of Ns)
    H = 128

    # read input sound
    fs, x = UF.wavread(inputFile)

    # compute analysis window
    w = get_window(window, M)

    # analyze the sound with the sinusoidal model
    tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines,
                                           minSineDur, freqDevOffset,
                                           freqDevSlope)

    # synthesize the output sound from the sinusoidal representation
    y = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs)

    # output sound file name
    outputFile = 'output_sounds/' + os.path.basename(
        inputFile)[:-4] + '_sineModel_test.wav'

    # write the synthesized sound obtained from the sinusoidal synthesis
    UF.wavwrite(y, fs, outputFile)

    # create figure to show plots
    plt.figure(figsize=(9, 6))

    # frequency range to plot
    maxplotfreq = 5000.0

    # plot the input sound
    plt.subplot(3, 1, 1)
    plt.plot(np.arange(x.size) / float(fs), x)
    plt.axis([0, x.size / float(fs), min(x), max(x)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    # plot the sinusoidal frequencies
    plt.subplot(3, 1, 2)
    if (tfreq.shape[1] > 0):
        numFrames = tfreq.shape[0]
        frmTime = H * np.arange(numFrames) / float(fs)
        tfreq[tfreq <= 0] = np.nan
        plt.plot(frmTime, tfreq)
        plt.axis([0, x.size / float(fs), 0, maxplotfreq])
        plt.title('frequencies of sinusoidal tracks')

    # plot the output sound
    plt.subplot(3, 1, 3)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()
    plt.ion()
    plt.show()
    return tfreq, tmag, tphase
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import hamming, triang, blackmanharris
import math
import sys, os, functools, time

sys.path.append(
    os.path.join(os.path.dirname(os.path.realpath(__file__)),
                 '../../../software/models/'))

import dftModel as DFT
import utilFunctions as UF

(fs, x) = UF.wavread('../../../sounds/carnatic.wav')
pin = 1.4 * fs
w = np.blackman(1601)
N = 4096
hM1 = int(math.floor((w.size + 1) / 2))
hM2 = int(math.floor(w.size / 2))
x1 = x[pin - hM1:pin + hM2]
mX, pX = DFT.dftAnal(x1, w, N)

plt.figure(1, figsize=(9, 7))
plt.subplot(311)
plt.plot(np.arange(-hM1, hM2) / float(fs), x1, lw=1.5)
plt.axis([-hM1 / float(fs), hM2 / float(fs), min(x1), max(x1)])
plt.title('x (carnatic.wav)')

plt.subplot(3, 1, 2)
plt.plot(fs * np.arange(mX.size) / float(N), mX, 'r', lw=1.5)
plt.axis([0, fs / 4, -100, max(mX)])
Пример #59
0
import sys, os

sys.path.append(
    os.path.join(os.path.dirname(os.path.realpath(__file__)),
                 '../../software/models'))
import dftModel as DFT
import utilFunctions as UF

# For sine-440.wav:
# (fs, x) = UF.wavread('../../sounds/sine-440.wav')
# M = 501
# N = 512
# N = 2048  # better freq resolution for sine-440.wav

# For sine-440-490.wav
(fs, x) = UF.wavread('../../sounds/sine-440-490.wav')
M = 3528  # M = 4 bins * 44100 / (490-440)
N = 4096  # N > M

t = -20  # threshold
w = get_window('hamming', M)
x1 = x[0.8 * fs:0.8 * fs + M]
mX, pX = DFT.dftAnal(x1, w, N)
ploc = UF.peakDetection(mX, t)
pmag = mX[ploc]

freqaxis = fs * np.arange(N / 2 + 1) / float(N)
plt.plot(freqaxis, mX)
plt.plot(fs * ploc / float(N), pmag, marker='x', linestyle='')

# quadratic interpolation:
Пример #60
0
def analysis(inputFile1='../../sounds/violin-B3.wav', window1='blackman', M1=1001, N1=1024, t1=-100, 
	minSineDur1=0.05, nH=60, minf01=200, maxf01=300, f0et1=10, harmDevSlope1=0.01, stocf=0.1,
	inputFile2='../../sounds/soprano-E4.wav', window2='blackman', M2=901, N2=1024, t2=-100, 
	minSineDur2=0.05, minf02=250, maxf02=500, f0et2=10, harmDevSlope2=0.01):
	"""
	Analyze two sounds with the harmonic plus stochastic model
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)	
	M: analysis window size 
	N: fft size (power of two, bigger or equal than M)
	t: magnitude threshold of spectral peaks 
	minSineDur: minimum duration of sinusoidal tracks
	nH: maximum number of harmonics
	minf0: minimum fundamental frequency in sound
	maxf0: maximum fundamental frequency in sound
	f0et: maximum error accepted in f0 detection algorithm                                                                                            
	harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation
	stocf: decimation factor used for the stochastic approximation
	returns inputFile: input file name; fs: sampling rate of input file,
	        hfreq, hmag: harmonic frequencies, magnitude; stocEnv: stochastic residual
	"""

	# size of fft used in synthesis
	Ns = 512
	# hop size (has to be 1/4 of Ns)
	H = 128
	# read input sounds
	(fs1, x1) = UF.wavread(inputFile1)
	(fs2, x2) = UF.wavread(inputFile2)
	# compute analysis windows
	w1 = get_window(window1, M1)
	w2 = get_window(window2, M2)
	# compute the harmonic plus stochastic models
	hfreq1, hmag1, hphase1, stocEnv1 = HPS.hpsModelAnal(x1, fs1, w1, N1, H, t1, nH, minf01, maxf01, f0et1, harmDevSlope1, minSineDur1, Ns, stocf)
	hfreq2, hmag2, hphase2, stocEnv2 = HPS.hpsModelAnal(x2, fs2, w2, N2, H, t2, nH, minf02, maxf02, f0et2, harmDevSlope2, minSineDur2, Ns, stocf)

	# create figure to plot
	plt.figure(figsize=(12, 9))

	# frequency range to plot
	maxplotfreq = 15000.0

	# plot spectrogram stochastic component of sound 1
	plt.subplot(2,1,1)
	numFrames = int(stocEnv1[:,0].size)
	sizeEnv = int(stocEnv1[0,:].size)
	frmTime = H*np.arange(numFrames)/float(fs1)
	binFreq = (.5*fs1)*np.arange(sizeEnv*maxplotfreq/(.5*fs1))/sizeEnv                      
	plt.pcolormesh(frmTime, binFreq, np.transpose(stocEnv1[:,:int(sizeEnv*maxplotfreq/(.5*fs1))+1]))
	plt.autoscale(tight=True)

	# plot harmonic on top of stochastic spectrogram of sound 1
	if (hfreq1.shape[1] > 0):
		harms = np.copy(hfreq1)
		harms = harms*np.less(harms,maxplotfreq)
		harms[harms==0] = np.nan
		numFrames = int(harms[:,0].size)
		frmTime = H*np.arange(numFrames)/float(fs1) 
		plt.plot(frmTime, harms, color='k', ms=3, alpha=1)
		plt.xlabel('time (sec)')
		plt.ylabel('frequency (Hz)')
		plt.autoscale(tight=True)
		plt.title('harmonics + stochastic spectrogram of sound 1')

	# plot spectrogram stochastic component of sound 2
	plt.subplot(2,1,2)
	numFrames = int(stocEnv2[:,0].size)
	sizeEnv = int(stocEnv2[0,:].size)
	frmTime = H*np.arange(numFrames)/float(fs2)
	binFreq = (.5*fs2)*np.arange(sizeEnv*maxplotfreq/(.5*fs2))/sizeEnv                      
	plt.pcolormesh(frmTime, binFreq, np.transpose(stocEnv2[:,:int(sizeEnv*maxplotfreq/(.5*fs2))+1]))
	plt.autoscale(tight=True)

	# plot harmonic on top of stochastic spectrogram of sound 2
	if (hfreq2.shape[1] > 0):
		harms = np.copy(hfreq2)
		harms = harms*np.less(harms,maxplotfreq)
		harms[harms==0] = np.nan
		numFrames = int(harms[:,0].size)
		frmTime = H*np.arange(numFrames)/float(fs2) 
		plt.plot(frmTime, harms, color='k', ms=3, alpha=1)
		plt.xlabel('time (sec)')
		plt.ylabel('frequency (Hz)')
		plt.autoscale(tight=True)
		plt.title('harmonics + stochastic spectrogram of sound 2')

	plt.tight_layout()
	plt.show(block=False)
	
	return inputFile1, fs1, hfreq1, hmag1, stocEnv1, inputFile2, hfreq2, hmag2, stocEnv2