示例#1
0
def minFreqEstErr(inputFile, f):
    """
    Inputs:
            inputFile (string) = wav file including the path
            f (float) = frequency of the sinusoid present in the input audio signal (Hz)
    Output:
            fEst (float) = Estimated frequency of the sinusoid (Hz)
            M (int) = Window size
            N (int) = FFT size
    """
    # analysis parameters:
    window = 'blackman'
    t = -40

    ### Your code here
    fs, s = wavread(FileName)
    durin = len(s) / fs
    k = 1
    M = 101
    N = 128
    w = get_window('blackman', M)
    hm1 = math.floor(len(w) / 2)
    hm2 = math.floor((len(w) / 2 + 1))
    hfs = math.floor(fs / 2)
    cer = []

    frame = s[hfs - hm1:hfs + hm2]
    X, Xf = dftAnal(frame, w, N)
    peak = UF.peakDetection(X, -40)
    iploc, ipmag, ipphase = UF.peakInterp(X, Xf, peak)
    err = abs(490 - iploc * fs / N)
    while err[0] > 0.05:
        k = k + 1
        M = 100 * k + 1

        if M > N:
            N = N * 2

        w = get_window('blackman', M)
        hm1 = math.floor(len(w) / 2)
        hm2 = math.floor((len(w) / 2 + 1))
        frame = s[hfs - hm1:hfs + hm2]

        X, Xf = dftAnal(frame, w, N)
        peak = UF.peakDetection(X, -40)
        iploc, ipmag, ipphase = UF.peakInterp(X, Xf, peak)
        err = abs(f - iploc * fs / N)
        fest = iploc * fs / N
        cer = np.append(cer, err)
        return np.stack(fest, M, N)
示例#2
0
def minFreqEstErr(inputFile, f):
    """
    Inputs:
            inputFile (string) = wav file including the path
            f (float) = frequency of the sinusoid present in the input audio signal (Hz)
    Output:
            fEst (float) = Estimated frequency of the sinusoid (Hz)
            M (int) = Window size
            N (int) = FFT size
    """
    # analysis parameters:
    window = 'blackman'
    t = -40

    ### Your code here
    (fs, x) = UF.wavread(inputFile)
    ferror = 1
    M = int(np.floor(fs / f))
    k = int(np.floor(M / 100))
    while (ferror >= 0.05):
        M = 100 * k + 1
        Ns = int(2**(np.ceil(np.log2(M))))
        w = get_window(window, M)
        x1 = x[int(.5 * fs) - M / 2:int(.5 * fs) + (M + 1) / 2]
        mX, pX = DFT.dftAnal(x1, w, Ns)
        ploc = UF.peakDetection(mX, t)
        iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc)
        fEst = fs * iploc / float(Ns)
        ferror = abs(fEst - f)
        k += 1

    return float(fEst), M, Ns
示例#3
0
def minFreqEstErr(inputFile, f):
    """
    Inputs:
            inputFile (string) = wav file including the path
            f (float) = frequency of the sinusoid present in the input audio signal (Hz)
    Output:
            fEst (float) = Estimated frequency of the sinusoid (Hz)
            M (int) = Window size
            N (int) = FFT size
    """
    # analysis parameters:
    window = 'blackman'
    t = -40

    # Your code here
    (fs, x) = UF.wavread(inputFile)

    for k in range(1, 50):
        M = 100 * k + 1
        N = 2**int(np.ceil(np.log2(M)))
        w = get_window(window, M)
        x1 = x[int(.5 * fs - ((M - 1) / 2)):int(.5 * fs + ((M + 1) / 2))]
        mX, pX = DFT.dftAnal(x1, w, N)
        pLoc = UF.peakDetection(mX, t)
        (peakLoc, pMag, pPhase) = UF.peakInterp(mX, pX, pLoc)
        fEst = (peakLoc[0] / N) * fs
        if abs(fEst - f) < 0.05:
            break
    return fEst, M, N
示例#4
0
def minFreqEstErr(inputFile, f):
    """
    Inputs:
            inputFile (string) = wav file including the path
            f (float) = frequency of the sinusoid present in the input audio signal (Hz)
    Output:
            fEst (float) = Estimated frequency of the sinusoid (Hz)
            M (int) = Window size
            N (int) = FFT size
    """
    # analysis parameters:
    window = 'blackman'
    t = -40    
    fs, x = UF.wavread(inputFile)
    #print len(x)
    startidx = int(44100 * 0.5)
    ### Your code here
    for i in range(1, 100):
        M = int(i * 100 + 1)
        N = 2
        for j in range(1, 100):
            if 2 ** j > M:
                N = 2 ** j
                break
        #print str(i) + ' ' + str(N)
        windf = get_window(window, M)
        mx, px = DFT.dftAnal(x[startidx:startidx+M], windf, int(N))
        #print fs * ploc[0] / N
        ploc = UF.peakDetection(mx, t)
        iploc, ipmag, ipphase = UF.peakInterp(mx, px, ploc)
        freq = iploc[0] * fs / N
        if abs(freq - f) <= 0.05:
            return (freq, M, N)
示例#5
0
def analysis(x, fs, w, N, t):
    """Extracted from sineModel. Perform windowed analysis on audio frame."""
    hM1 = int(math.floor(
        (w.size + 1) / 2))  # half analysis window size by rounding
    hM2 = int(math.floor(w.size / 2))  # half analysis window size by floor
    pin = int(math.floor(len(x) + 1) /
              2)  # init sound pointer in middle of data window

    # -----analysis-----
    x1 = x[pin - hM1:pin + hM2]  # select frame
    # logger.debug("Analyse input: N {N}, M {M}, x[{pin}-{hM1}={lo}, {pin}+{hM2}={hi}]"
    #              .format(N=N,
    #                      M=w.size,
    #                      pin=pin,
    #                      hM1=hM1,
    #                      lo=pin-hM1,
    #                      hM2=hM2,
    #                      hi=pin+hM2))

    mX, pX = DFT.dftAnal(x1, w, N)  # compute dft
    ploc = UF.peakDetection(mX, t)  # detect locations of peaks
    iploc, ipmag, ipphase = UF.peakInterp(
        mX, pX, ploc)  # refine peak values by interpolation
    ipfreq = fs * iploc / float(N)  # convert peak locations to Hertz

    return iploc, ipmag, ipphase, ipfreq
示例#6
0
def minFreqEstErr(inputFile, f):
    """
    Inputs:
            inputFile (string) = wav file including the path
            f (float) = frequency of the sinusoid present in the input audio signal (Hz)
    Output:
            fEst (float) = Estimated frequency of the sinusoid (Hz)
            M (int) = Window size
            N (int) = FFT size
    """
    # analysis parameters:
    window = 'blackman'
    t = -40

    (fs, x) = UF.wavread(inputFile)

    for i in range(1, 25):
        M = 100 * i + 1
        hM = int(math.floor(M / 2))
        N = int(1 << (M - 1).bit_length())
        w = get_window(window, M)
        x1 = x[0.5 * fs - hM - 1:0.5 * fs + hM]
        mX, pX = DFT.dftAnal(x1, w, N)
        ploc = UF.peakDetection(mX, t)
        iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc)
        if iploc:
            fEst = fs * iploc[0] / float(N)
            if (abs(f - fEst) < 0.05):
                break

    return fEst, M, N
示例#7
0
def f0Twm(x, fs, w, N, H, t, minf0, maxf0, f0et):
  # fundamental frequency detection using twm algorithm
  # x: input sound, fs: sampling rate, w: analysis window, 
  # N: FFT size (minimum 512), t: threshold in negative dB, 
  # minf0: minimum f0 frequency in Hz, maxf0: maximim f0 frequency in Hz, 
  # f0et: error threshold in the f0 detection (ex: 5),
  # returns f0: fundamental frequency
  hN = N/2                                        # size of positive spectrum
  hM1 = int(math.floor((w.size+1)/2))             # half analysis window size by rounding
  hM2 = int(math.floor(w.size/2))                 # half analysis window size by floor
  x = np.append(np.zeros(hM2),x)                  # add zeros at beginning to center first window at sample 0
  x = np.append(x,np.zeros(hM1))                  # add zeros at the end to analyze last sample
  pin = hM1                                       # init sound pointer in middle of anal window          
  pend = x.size - hM1                             # last sample to start a frame
  fftbuffer = np.zeros(N)                         # initialize buffer for FFT
  w = w / sum(w)                                  # normalize analysis window
  f0 = []
  f0t = 0
  f0stable = 0
  while pin<pend:             
    x1 = x[pin-hM1:pin+hM2]                       # select frame
    mX, pX = DFT.dftAnal(x1, w, N)                # compute dft           
    ploc = UF.peakDetection(mX, hN, t)            # detect peak locations   
    iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc)   # refine peak values
    ipfreq = fs * iploc/N
    f0t = UF.f0DetectionTwm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable)  # find f0
    if ((f0stable==0)&(f0t>0)) \
        or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)):
      f0stable = f0t                                # consider a stable f0 if it is close to the previous one
    else:
      f0stable = 0

    f0 = np.append(f0, f0t)
    pin += H                                        # advance sound pointer
  return f0
示例#8
0
文件: A5Part1.py 项目: Jee-Bee/ASPFMA
def minFreqEstErr(inputFile, f):
    """
    Inputs:
            inputFile (string) = wav file including the path
            f (float) = frequency of the sinusoid present in the input audio signal (Hz)
    Output:
            fEst (float) = Estimated frequency of the sinusoid (Hz)
            M (int) = Window size
            N (int) = FFT size
    """
    # analysis parameters:
    window = 'blackman'
    t = -40
    fs, x = UF.wavread(inputFile)
    x_half = len(x) // 2
    f_error = np.inf
    k = 1
    while f_error > 0.05:  # Hz
        M = 100 * k + 1
        M2 = M // 2
        W = get_window(window, M)
        N = int(2 ** np.ceil(np.log2(M)))
        mX, pX = DFT.dftAnal(x[x_half - M2: x_half - M2 + M], W, N)
        ploc = UF.peakDetection(mX, t)
        iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc)
        fEst = iploc * fs / N
        f_error = np.abs(f - fEst)
        k += 1
    return(fEst, M, N)
示例#9
0
def minFreqEstErr(inputFile='sine-440.wav', f=440):
    """
    Inputs:
            inputFile (string) = wav file including the path
            f (float) = frequency of the sinusoid present in the input audio signal (Hz)
    Output:
            fEst (float) = Estimated frequency of the sinusoid (Hz)
            M (int) = Window size
            N (int) = FFT size
    """

    window = 'blackman'
    t = -40
    (fs, x) = UF.wavread(inputFile)
    k = 11
    N = 2

    while True:
        M = 100 * k + 1
        while N < M:
            N = N * 2
        w = get_window(window, M)
        x1 = x[.5 * fs:.5 * fs + M]
        mX, pX = DFT.dftAnal(x1, w, N)
        ploc = UF.peakDetection(mX, t)
        iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc)
        fEst = fs * iploc[0] / float(N)
        if abs(fEst - f) < 0.05:
            break
        else:
            k += 1

    return (fEst, M, N)
示例#10
0
文件: A5Part1.py 项目: akkeh/SOGM_jr3
def check_k(fr1, fr2, fs, k, window):
    fr = fr1
    for fr in fr1 + np.arange(fr2-fr1):
        t = np.arange(441000.0)
        x = np.sin(2.0*np.pi * fr * t / fs)
        M = 100*k+1
        i=0
        while (2**i) < M:
            i+=1
        N = 2**i
        h = M/2
        l_h = len(x)/2 - h + 1
        h_h = l_h + M
        x_cnk = x[l_h:h_h]
        w = get_window(window, M)
        (mX, pX) = DFT.dftAnal(x_cnk, w, N)
        p_loc = UF.peakDetection(mX, -40)
        p_int = UF.peakInterp(mX, pX, p_loc)
        peak = p_int[0]*(fs/float(N))
        p = peak[0]
        if abs(p-fr) > 0.05:
            print "fr: ", fr, " error: ", abs(p-fr)
            return 0
        print "fr: ", fr, " checked"
        fr+=1
        

    return 3
示例#11
0
def minFreqEstErr(inputFile, f):
    """
    Inputs:
            inputFile (string) = wav file including the path
            f (float) = frequency of the sinusoid present in the input audio signal (Hz)
    Output:
            fEst (float) = Estimated frequency of the sinusoid (Hz)
            M (int) = Window size
            N (int) = FFT size
    """
    # analysis parameters:
    window = 'blackman'
    t = -40

    ### Your code here
    (fs, x) = UF.wavread(inputFile)

    for k in range(1, 20):
        M = 100 * k + 1
        hM = int(math.floor(M / 2))
        N = np.power(2, math.ceil(np.log2(M)))
        w = get_window(window, M)

        x1 = x[int(.5 * fs) - hM - 1:int(.5 * fs) + hM]
        mX, pX = DFT.dftAnal(x1, w, N)
        ploc = UF.peakDetection(mX, t)
        iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc)
        fEst = fs * iploc[0] / float(N)
        if (abs(f - fEst) < 0.05):
            return fEst, M, N
示例#12
0
def minFreqEstErr(inputFile, f):
    """
    Inputs:
            inputFile (string) = wav file including the path
            f (float) = frequency of the sinusoid present in the input audio signal (Hz)
    Output:
            fEst (float) = Estimated frequency of the sinusoid (Hz)
            M (int) = Window size
            N (int) = FFT size
    """
    # analysis parameters:
    window = 'blackman'
    t = -40

    fs, x = UF.wavread(inputFile)
    for k in range(1, 100):
        M = 100 * k + 1
        w = get_window(window, M)
        N = int(pow(2, np.ceil(np.log2(M))))
        xCenter = int(0.5 * fs)
        x2 = x[xCenter-M/2:xCenter+M/2+1]
        mX, pX = DFT.dftAnal(x2, w, N)
        ploc = UF.peakDetection(mX, t)
        iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc)
        fEst = fs * iploc / float(N)
        if (np.abs(fEst - f) < 0.05):
            return fEst[0], M, N 
def harmonicModel(x, fs, w, N, t, nH, minf0, maxf0, f0et):
	"""
	Analysis/synthesis of a sound using the sinusoidal harmonic model
	x: input sound, fs: sampling rate, w: analysis window, 
	N: FFT size (minimum 512), t: threshold in negative dB, 
	nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz, 
	maxf0: maximim f0 frequency in Hz, 
	f0et: error threshold in the f0 detection (ex: 5),
	returns y: output array sound
	"""

	hN = N/2                                                # size of positive spectrum
	hM1 = int(math.floor((w.size+1)/2))                     # half analysis window size by rounding
	hM2 = int(math.floor(w.size/2))                         # half analysis window size by floor
	x = np.append(np.zeros(hM2),x)                          # add zeros at beginning to center first window at sample 0
	x = np.append(x,np.zeros(hM1))                          # add zeros at the end to analyze last sample
	Ns = 512                                                # FFT size for synthesis (even)
	H = Ns/4                                                # Hop size used for analysis and synthesis
	hNs = Ns/2      
	pin = max(hNs, hM1)                                     # init sound pointer in middle of anal window          
	pend = x.size - max(hNs, hM1)                           # last sample to start a frame
	fftbuffer = np.zeros(N)                                 # initialize buffer for FFT
	yh = np.zeros(Ns)                                       # initialize output sound frame
	y = np.zeros(x.size)                                    # initialize output array
	w = w / sum(w)                                          # normalize analysis window
	sw = np.zeros(Ns)                                       # initialize synthesis window
	ow = triang(2*H)                                        # overlapping window
	sw[hNs-H:hNs+H] = ow      
	bh = blackmanharris(Ns)                                 # synthesis window
	bh = bh / sum(bh)                                       # normalize synthesis window
	sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H]     # window for overlap-add
	hfreqp = []
	f0t = 0
	f0stable = 0
	while pin<pend:             
	#-----analysis-----             
		x1 = x[pin-hM1:pin+hM2]                               # select frame
		mX, pX = DFT.dftAnal(x1, w, N)                        # compute dft
		ploc = UF.peakDetection(mX, t)                        # detect peak locations     
		iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc)   # refine peak values
		ipfreq = fs * iploc/N
		f0t = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable)  # find f0
		if ((f0stable==0)&(f0t>0)) \
				or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)):
			f0stable = f0t                                     # consider a stable f0 if it is close to the previous one
		else:
			f0stable = 0
		hfreq, hmag, hphase = harmonicDetection(ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs) # find harmonics
		hfreqp = hfreq
	#-----synthesis-----
		Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs)     # generate spec sines          
		fftbuffer = np.real(ifft(Yh))                         # inverse FFT
		yh[:hNs-1] = fftbuffer[hNs+1:]                        # undo zero-phase window
		yh[hNs-1:] = fftbuffer[:hNs+1] 
		y[pin-hNs:pin+hNs] += sw*yh                           # overlap-add
		pin += H                                              # advance sound pointer
	y = np.delete(y, range(hM2))                            # delete half of first window which was added in stftAnal
	y = np.delete(y, range(y.size-hM1, y.size))             # add zeros at the end to analyze last sample
	return y
示例#14
0
def harmonicModel(x, fs, w, N, t, nH, minf0, maxf0, f0et):
	"""
	Analysis/synthesis of a sound using the sinusoidal harmonic model
	x: input sound, fs: sampling rate, w: analysis window, 
	N: FFT size (minimum 512), t: threshold in negative dB, 
	nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz, 
	maxf0: maximim f0 frequency in Hz, 
	f0et: error threshold in the f0 detection (ex: 5),
	returns y: output array sound
	"""

	hN = N/2                                                # size of positive spectrum
	hM1 = int(math.floor((w.size+1)/2))                     # half analysis window size by rounding
	hM2 = int(math.floor(w.size/2))                         # half analysis window size by floor
	x = np.append(np.zeros(hM2),x)                          # add zeros at beginning to center first window at sample 0
	x = np.append(x,np.zeros(hM1))                          # add zeros at the end to analyze last sample
	Ns = 512                                                # FFT size for synthesis (even)
	H = Ns/4                                                # Hop size used for analysis and synthesis
	hNs = Ns/2      
	pin = max(hNs, hM1)                                     # init sound pointer in middle of anal window          
	pend = x.size - max(hNs, hM1)                           # last sample to start a frame
	fftbuffer = np.zeros(N)                                 # initialize buffer for FFT
	yh = np.zeros(Ns)                                       # initialize output sound frame
	y = np.zeros(x.size)                                    # initialize output array
	w = w / sum(w)                                          # normalize analysis window
	sw = np.zeros(Ns)                                       # initialize synthesis window
	ow = triang(2*H)                                        # overlapping window
	sw[int(hNs-H):int(hNs+H)] = int(ow)      
	bh = blackmanharris(Ns)                                 # synthesis window
	bh = bh / sum(bh)                                       # normalize synthesis window
	sw[int(hNs-H):int(hNs+H)] = sw[int(hNs-H):int(hNs+H)] / bh[int(hNs-H):int(hNs+H)]     # window for overlap-add
	hfreqp = []
	f0t = 0
	f0stable = 0
	while pin<pend:             
	#-----analysis-----             
		x1 = x[pin-hM1:pin+hM2]                               # select frame
		mX, pX = DFT.dftAnal(x1, w, N)                        # compute dft
		ploc = UF.peakDetection(mX, t)                        # detect peak locations     
		iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc)   # refine peak values
		ipfreq = fs * iploc/N
		f0t = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable)  # find f0
		if ((f0stable==0)&(f0t>0)) \
				or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)):
			f0stable = f0t                                     # consider a stable f0 if it is close to the previous one
		else:
			f0stable = 0
		hfreq, hmag, hphase = harmonicDetection(ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs) # find harmonics
		hfreqp = hfreq
	#-----synthesis-----
		Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs)     # generate spec sines          
		fftbuffer = np.real(ifft(Yh))                         # inverse FFT
		yh[:int(hNs-1)] = fftbuffer[int(hNs+1):]                        # undo zero-phase window
		yh[int(hNs-1):] = fftbuffer[:int(hNs+1)] 
		y[pin-hNs:pin+hNs] += sw*yh                           # overlap-add
		pin += H                                              # advance sound pointer
	y = np.delete(y, range(hM2))                            # delete half of first window which was added in stftAnal
	y = np.delete(y, range(y.size-hM1, y.size))             # add zeros at the end to analyze last sample
	return y
示例#15
0
def sprModel(x, fs, w, N, t):
	"""
	Analysis/synthesis of a sound using the sinusoidal plus residual model, one frame at a time
	x: input sound, fs: sampling rate, w: analysis window, 
	N: FFT size (minimum 512), t: threshold in negative dB, 
	returns y: output sound, ys: sinusoidal component, xr: residual component
	"""

	hN = N/2                                                      # size of positive spectrum
	hM1 = int(math.floor((w.size+1)/2))                           # half analysis window size by rounding
	hM2 = int(math.floor(w.size/2))                               # half analysis window size by floor
	Ns = 512                                                      # FFT size for synthesis (even)
	H = Ns/4                                                      # Hop size used for analysis and synthesis
	hNs = Ns/2      
	pin = max(hNs, hM1)                                           # initialize sound pointer in middle of analysis window          
	pend = x.size - max(hNs, hM1)                                 # last sample to start a frame
	fftbuffer = np.zeros(N)                                       # initialize buffer for FFT
	ysw = np.zeros(Ns)                                            # initialize output sound frame
	xrw = np.zeros(Ns)                                            # initialize output sound frame
	ys = np.zeros(x.size)                                         # initialize output array
	xr = np.zeros(x.size)                                         # initialize output array
	w = w / sum(w)                                                # normalize analysis window
	sw = np.zeros(Ns)     
	ow = triang(2*H)                                              # overlapping window
	sw[hNs-H:hNs+H] = ow      
	bh = blackmanharris(Ns)                                       # synthesis window
	bh = bh / sum(bh)                                             # normalize synthesis window
	wr = bh                                                       # window for residual
	sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H]
	while pin<pend:  
  #-----analysis-----             
		x1 = x[pin-hM1:pin+hM2]                                     # select frame
		mX, pX = DFT.dftAnal(x1, w, N)                              # compute dft
		ploc = UF.peakDetection(mX, t)                              # find peaks 
		iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc)         # refine peak values		iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc)          # refine peak values
		ipfreq = fs*iploc/float(N)                                  # convert peak locations to Hertz
		ri = pin-hNs-1                                              # input sound pointer for residual analysis
		xw2 = x[ri:ri+Ns]*wr                                        # window the input sound                                       
		fftbuffer = np.zeros(Ns)                                    # reset buffer
		fftbuffer[:hNs] = xw2[hNs:]                                 # zero-phase window in fftbuffer
		fftbuffer[hNs:] = xw2[:hNs]                           
		X2 = fft(fftbuffer)                                         # compute FFT for residual analysis
  #-----synthesis-----
		Ys = UF.genSpecSines(ipfreq, ipmag, ipphase, Ns, fs)        # generate spec of sinusoidal component          
		Xr = X2-Ys;                                                 # get the residual complex spectrum
		fftbuffer = np.zeros(Ns)
		fftbuffer = np.real(ifft(Ys))                               # inverse FFT of sinusoidal spectrum
		ysw[:hNs-1] = fftbuffer[hNs+1:]                             # undo zero-phase window
		ysw[hNs-1:] = fftbuffer[:hNs+1] 
		fftbuffer = np.zeros(Ns)
		fftbuffer = np.real(ifft(Xr))                               # inverse FFT of residual spectrum
		xrw[:hNs-1] = fftbuffer[hNs+1:]                             # undo zero-phase window
		xrw[hNs-1:] = fftbuffer[:hNs+1]
		ys[ri:ri+Ns] += sw*ysw                                      # overlap-add for sines
		xr[ri:ri+Ns] += sw*xrw                                      # overlap-add for residual
		pin += H                                                    # advance sound pointer
	y = ys+xr                                                     # sum of sinusoidal and residual components
	return y, ys, xr
示例#16
0
 def peak(m, n):
     hfs = fs * 0.5
     x1 = x[hfs-m/2:hfs+(m+1)/2]
     w = get_window(window, m)
     mX, pX = DFT.dftAnal(x1, w, n)
     
     ploc = UF.peakDetection(mX, t)
     iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc)
     fest = fs * iploc[0] / n
     return fest, ploc, mX, pX
示例#17
0
def minFreqEstErr(inputFile, f):
    """
    Inputs:
            inputFile (string) = wav file including the path
            f (float) = frequency of the sinusoid present in the input audio signal (Hz)
    Output:
            fEst (float) = Estimated frequency of the sinusoid (Hz)
            M (int) = Window size
            N (int) = FFT size
    """
    # analysis parameters:
    window = 'blackman'
    t = -40

    ### Your code here
    # read file
    fs, x = UF.wavread(inputFile)

    # initial error, must higher than 0.05
    est_error = 1.0
    MIN_ERROR = 0.05
    k = 1
    # iterate all the allowd values to find M and N
    while est_error >= MIN_ERROR:

        M = 100 * k + 1

        # N bigger than M and it is power of 2
        N = 2**int(np.ceil(np.log2(M)))

        # get a segment from x, such as from the middle
        x1 = x[int(0.5 * fs - M // 2):int(0.5 * fs) + (M + 1) // 2]

        # get window
        w = get_window(window, M)

        # dft it
        mX, pX = DFT.dftAnal(x1, w, N)

        # peak detection
        ploc = UF.peakDetection(mX, t)

        # peak interpolation
        iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc)
        ipfreq = iploc[0] * fs / N

        # estimated error
        est_error = np.abs(ipfreq - f)
        #print(M, N, ipfreq, f, est_error)

        # increase k
        k += 1

    return est_error, M, N
示例#18
0
def run_one_estimate(x, fs, M, window=DEFAULT_WINDOW, t=DEFAULT_THRESHOLD):
    center_sample = int(len(x) / 2)
    start_sample = center_sample - int(M / 2)
    end_sample = start_sample + M
    N = min_power_2(M)
    x1 = x[start_sample:end_sample]
    w = get_window(window, M)
    mX, pX = DFT.dftAnal(x1, w, N)
    ploc = UF.peakDetection(mX, t)
    iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc)
    fEst = iploc * fs / N
    return (mX, pX, ploc, iploc, ipmag, ipphase, fEst, N)
示例#19
0
def f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et):
    """
	Fundamental frequency detection of a sound using twm algorithm
	x: input sound; fs: sampling rate; w: analysis window; 
	N: FFT size; t: threshold in negative dB, 
	minf0: minimum f0 frequency in Hz, maxf0: maximim f0 frequency in Hz, 
	f0et: error threshold in the f0 detection (ex: 5),
	returns f0: fundamental frequency
	"""
    if (minf0 < 0):  # raise exception if minf0 is smaller than 0
        raise ValueError(
            "Minumum fundamental frequency (minf0) smaller than 0")

    if (maxf0 >= 10000):  # raise exception if maxf0 is bigger than fs/2
        raise ValueError(
            "Maximum fundamental frequency (maxf0) bigger than 10000Hz")

    if (H <= 0):  # raise error if hop size 0 or negative
        raise ValueError("Hop size (H) smaller or equal to 0")

    hN = N / 2  # size of positive spectrum
    hM1 = int(math.floor(
        (w.size + 1) / 2))  # half analysis window size by rounding
    hM2 = int(math.floor(w.size / 2))  # half analysis window size by floor
    x = np.append(
        np.zeros(hM2),
        x)  # add zeros at beginning to center first window at sample 0
    x = np.append(x,
                  np.zeros(hM1))  # add zeros at the end to analyze last sample
    pin = hM1  # init sound pointer in middle of anal window
    pend = x.size - hM1  # last sample to start a frame
    fftbuffer = np.zeros(N)  # initialize buffer for FFT
    w = w / sum(w)  # normalize analysis window
    f0 = []  # initialize f0 output
    f0t = 0  # initialize f0 track
    f0stable = 0  # initialize f0 stable
    while pin < pend:
        x1 = x[pin - hM1:pin + hM2]  # select frame
        mX, pX = DFT.dftAnal(x1, w, N)  # compute dft
        ploc = UF.peakDetection(mX, t)  # detect peak locations
        iploc, ipmag, ipphase = UF.peakInterp(mX, pX,
                                              ploc)  # refine peak values
        ipfreq = fs * iploc / N  # convert locations to Hez
        f0t = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable)  # find f0
        if ((f0stable==0)&(f0t>0)) \
          or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)):
            f0stable = f0t  # consider a stable f0 if it is close to the previous one
        else:
            f0stable = 0
        f0 = np.append(f0, f0t)  # add f0 to output array
        pin += H  # advance sound pointer
    return f0
def minFreqEstErr(inputFile, f):
    """
    Inputs:
            inputFile (string) = wav file including the path
            f (float) = frequency of the sinusoid present in the input audio signal (Hz)
    Output:
            fEst (float) = Estimated frequency of the sinusoid (Hz)
            M (int) = Window size
            N (int) = FFT size
    """
    # analysis parameters:
    window = 'blackman'
    t = -40
   
    #read the file
    fs, s = UF.wavread(inputFile)

    fEst = 0
    error = abs(f - fEst)
    k = 1
    #begin iteration
    while error > 0.05:

        #set window_size for this iteration
        M = 100 * k + 1

        #compute FFT size as next power of two
        exponent = int(np.log2(M)) + 1
        FFT_size = 2**exponent
        df = float(fs) / FFT_size

        #slice the input signal
        s_sliced = s[0.5 * fs - M/2 : 0.5 * fs + M/2 + 1]

        #generate window
        w = get_window("blackman", M)

        #compute DFT
        mX, pX = DFT.dftAnal(s_sliced, w, FFT_size)

        #detect the peaks
        peak_locations = UF.peakDetection(mX, t)
        iploc, ipmag, ipphase = UF.peakInterp(mX, pX, peak_locations)

        fEst = iploc[0] * df

        error = abs(fEst - f)

        k += 1

    return (fEst, M, FFT_size)
示例#21
0
def harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope=0.01, minSineDur=0.02):
    """
	Analysis of a sound using the sinusoidal harmonic model
	x: input sound; fs: sampling rate, w: analysis window; N: FFT size (minimum 512); t: threshold in negative dB, 
	nH: maximum number of harmonics;  minf0: minimum f0 frequency in Hz, 
	maxf0: maximim f0 frequency in Hz; f0et: error threshold in the f0 detection (ex: 5),
	harmDevSlope: slope of harmonic deviation; minSineDur: minimum length of harmonics
	returns xhfreq, xhmag, xhphase: harmonic frequencies, magnitudes and phases
	"""

    if minSineDur < 0:  # raise exception if minSineDur is smaller than 0
        raise ValueError("Minimum duration of sine tracks smaller than 0")

    hN = N / 2  # size of positive spectrum
    hM1 = int(math.floor((w.size + 1) / 2))  # half analysis window size by rounding
    hM2 = int(math.floor(w.size / 2))  # half analysis window size by floor
    x = np.append(np.zeros(hM2), x)  # add zeros at beginning to center first window at sample 0
    x = np.append(x, np.zeros(hM2))  # add zeros at the end to analyze last sample
    pin = hM1  # init sound pointer in middle of anal window
    pend = x.size - hM1  # last sample to start a frame
    fftbuffer = np.zeros(N)  # initialize buffer for FFT
    w = w / sum(w)  # normalize analysis window
    hfreqp = []  # initialize harmonic frequencies of previous frame
    f0t = 0  # initialize f0 track
    f0stable = 0  # initialize f0 stable
    while pin <= pend:
        x1 = x[pin - hM1 : pin + hM2]  # select frame
        mX, pX = DFT.dftAnal(x1, w, N)  # compute dft
        ploc = UF.peakDetection(mX, t)  # detect peak locations
        iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc)  # refine peak values
        ipfreq = fs * iploc / N  # convert locations to Hz
        f0t = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable)  # find f0
        if ((f0stable == 0) & (f0t > 0)) or ((f0stable > 0) & (np.abs(f0stable - f0t) < f0stable / 5.0)):
            f0stable = f0t  # consider a stable f0 if it is close to the previous one
        else:
            f0stable = 0
        hfreq, hmag, hphase = harmonicDetection(
            ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs, harmDevSlope
        )  # find harmonics
        hfreqp = hfreq
        if pin == hM1:  # first frame
            xhfreq = np.array([hfreq])
            xhmag = np.array([hmag])
            xhphase = np.array([hphase])
        else:  # next frames
            xhfreq = np.vstack((xhfreq, np.array([hfreq])))
            xhmag = np.vstack((xhmag, np.array([hmag])))
            xhphase = np.vstack((xhphase, np.array([hphase])))
        pin += H  # advance sound pointer
    xhfreq = SM.cleaningSineTracks(xhfreq, round(fs * minSineDur / H))  # delete tracks shorter than minSineDur
    return xhfreq, xhmag, xhphase
示例#22
0
def sineModelMultiRes(x, fs, wList, NList, t, BList):
	"""
	Analysis/synthesis of a sound using the sinusoidal model, without sine tracking
	x: input array sound, w: analysis window, N: size of complex spectrum, t: threshold in negative dB 
	returns y: output array sound
	"""

	#-----synthesis params init-----             
	Ns = 512                                                # FFT size for synthesis (even)
	H = Ns/4                                                # Hop size used for analysis and synthesis
	hNs = Ns/2                                              # half of synthesis FFT size
	yw = np.zeros(Ns)                                       # initialize output sound frame
	y = np.zeros(x.size)                                    # initialize output array
	sw = np.zeros(Ns)                                       # initialize synthesis window
	ow = triang(2*H)                                        # triangular window
	sw[hNs-H:hNs+H] = ow                                    # add triangular window
	bh = blackmanharris(Ns)                                 # blackmanharris window
	bh = bh / sum(bh)                                       # normalized blackmanharris window
	sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H]     # normalized synthesis window
	for i in range(3):
	#-----analysis params init-----             
		w = wList[i]
		N = NList[i]
		Bmin = BList[i][0]
		Bmax = BList[i][1]
		hM1 = int(math.floor((w.size+1)/2))                     # half analysis window size by rounding
		hM2 = int(math.floor(w.size/2))                         # half analysis window size by floor
		pin = max(hNs, hM1)                                     # init sound pointer in middle of anal window       
		pend = x.size - max(hNs, hM1)                           # last sample to start a frame
		fftbuffer = np.zeros(N)                                 # initialize buffer for FFT	
		w = w / sum(w)                                          # normalize analysis window
		while pin<pend:                                         # while input sound pointer is within sound 
		#-----analysis-----             			
			x1 = x[pin-hM1:pin+hM2]                               # select frame
			mX, pX = DFT.dftAnal(x1, w, N)                        # compute dft
			ploc = UF.peakDetection(mX, t)                        # detect locations of peaks
			iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc)   # refine peak values by interpolation
			ipfreq = fs*iploc/float(N)                            # convert peak locations to Hertz
			ipmag = ipmag[np.logical_and(ipfreq>=Bmin, ipfreq<Bmax)]
			ipphase = ipphase[np.logical_and(ipfreq>=Bmin, ipfreq<Bmax)]
			ipfreq = ipfreq[np.logical_and(ipfreq>=Bmin, ipfreq<Bmax)]
		#-----synthesis-----
			Y = UF.genSpecSines(ipfreq, ipmag, ipphase, Ns, fs)   # generate sines in the spectrum         
			fftbuffer = np.real(ifft(Y))                          # compute inverse FFT
			yw[:hNs-1] = fftbuffer[hNs+1:]                        # undo zero-phase window
			yw[hNs-1:] = fftbuffer[:hNs+1] 
			y[pin-hNs:pin+hNs] += sw*yw                           # overlap-add and apply a synthesis window
			pin += H                                              # advance sound pointer

	return y
示例#23
0
def minFreqEstErr(inputFile, f):
    """
    Inputs:
            inputFile (string) = wav file including the path
            f (float) = frequency of the sinusoid present in the input audio signal (Hz)
    Output:
            fEst (float) = Estimated frequency of the sinusoid (Hz)
            M (int) = Window size
            N (int) = FFT size
    """
    # analysis parameters:
    window = 'blackman'
    t = -40 # -40dB magnitude threshold for peak picking
    
    ### Your code here

    (fs, x) = UF.wavread(inputFile)
    
    def smallest_power(M):
        p=1
        Np = np.power(2,p)
        while (Np < M):
            Np = np.power(2,p)
            p += 1	

        return Np

    k = 1 # initializing k
    
    while (True):
        
        M = 100*k+1 # k is the minimum positive integer for which the fEst error < 0.05Hz
        hM2 = M//2+1
        hM1 = M//2
        x1 = x[int(.5*fs-hM2):int(.5*fs+hM1)] # reading a single frame centered around the middle
        #of the input signal
        w = get_window(window, M)
        N = smallest_power(M)
        mX, pX = DFT.dftAnal(x1, w, N) # get the magnitude and phase spectrum
        ploc = UF.peakDetection(mX, t) # get peak locations
        iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc)
        ipfreq = iploc*fs/float(N) # get frequency values of peaks
        fEst = ipfreq # [np.argmax(ipmag)] # get the maximum frequency
        if abs(fEst-f)<=0.05:
            break
        k += 1; # try the next possible window size
        
    

    return float(fEst), int(M), int(N)
示例#24
0
    def proc_frame(self, frame):
        self.frames = np.append(self.frames, frame)
        pend = self.frames.size - self.hM1
        # initialize f0 track
        f0t = 0
        # initialize f0 stable
        f0stable = 0

        while self.pin < pend:
            # select frame
            x1 = self.frames[self.pin - self.hM1:self.pin + self.hM2]
            # compute dft
            mX, pX = DFT.dftAnal(x1, self.w, self.N)
            if self.pin == self.hM1:
                self.magnitudes = mX
                self.phases = pX
            else:
                self.magnitudes = np.vstack((self.magnitudes, mX))
                self.phases = np.vstack((self.phases, mX))

            # detect peak locations
            ploc = UF.peakDetection(mX, self.t)
            # refine peak values
            iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc)
            # convert locations to Hz
            ipfreq = self.fs * iploc / self.N
            # find f0
            f0t = UF.f0Twm(ipfreq, ipmag, self.f0et, \
                           self.minf0, self.maxf0, f0stable)
            if ((f0stable==0)&(f0t>0)) \
                            or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)):
                # consider a stable f0 if it is close to the previous one
                f0stable = f0t
            else:
                f0stable = 0

            self.fundamentals = np.append(self.fundamentals, f0t)
            self.fundamentals_file.write('%f\t%f\n' % (self.cur_time, f0t))

            self.pin += self.H
            self.cur_time += 1.0 * self.H / self.fs

        if self.fundamentals.shape[0] > self.MAX_BUF:
            self.fundamentals = self.fundamentals[-self.MAX_BUF:]
            self.magnitudes = self.magnitudes[-self.MAX_BUF:]
            self.phases = self.phases[-self.MAX_BUF:]

        if self.frames.shape[0] > self.fs:
            self.pin -= self.frames.shape[0] - self.fs
            self.frames = self.frames[-self.fs:]
示例#25
0
def sineModelAnal(x, fs, w, N, H, t, maxnSines = 100, minSineDur=.01, freqDevOffset=20, freqDevSlope=0.01):
	"""
	Analysis of a sound using the sinusoidal models_makam with sine tracking
	x: input array sound, w: analysis window, N: size of complex spectrum, H: hop-size, t: threshold in negative dB
	maxnSines: maximum number of sines per frame, minSineDur: minimum duration of sines in seconds
	freqDevOffset: minimum frequency deviation at 0Hz, freqDevSlope: slope increase of minimum frequency deviation
	returns xtfreq, xtmag, xtphase: frequencies, magnitudes and phases of sinusoidal tracks
	"""
	
	if (minSineDur <0):                          # raise error if minSineDur is smaller than 0
		raise ValueError("Minimum duration of sine tracks smaller than 0")
	
	hM1 = int(math.floor((w.size+1)/2))                     # half analysis window size by rounding
	hM2 = int(math.floor(w.size/2))                         # half analysis window size by floor
	x = np.append(np.zeros(hM2),x)                          # add zeros at beginning to center first window at sample 0
	x = np.append(x,np.zeros(hM2))                          # add zeros at the end to analyze last sample
	pin = hM1                                               # initialize sound pointer in middle of analysis window       
	pend = x.size - hM1                                     # last sample to start a frame
	w = w / sum(w)                                          # normalize analysis window
	tfreq = np.array([])
	while pin<pend:                                         # while input sound pointer is within sound            
		x1 = x[pin-hM1:pin+hM2]                               # select frame
		mX, pX = DFT.dftAnal(x1, w, N)                        # compute dft
		ploc = UF.peakDetection(mX, t)                        # detect locations of peaks
		pmag = mX[ploc]                                       # get the magnitude of the peaks
		iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc)   # refine peak values by interpolation
		ipfreq = fs*iploc/float(N)                            # convert peak locations to Hertz
		# perform sinusoidal tracking by adding peaks to trajectories
		tfreq, tmag, tphase = sineTracking(ipfreq, ipmag, ipphase, tfreq, freqDevOffset, freqDevSlope)
		tfreq = np.resize(tfreq, min(maxnSines, tfreq.size))  # limit number of tracks to maxnSines
		tmag = np.resize(tmag, min(maxnSines, tmag.size))     # limit number of tracks to maxnSines
		tphase = np.resize(tphase, min(maxnSines, tphase.size)) # limit number of tracks to maxnSines
		jtfreq = np.zeros(maxnSines)                          # temporary output array
		jtmag = np.zeros(maxnSines)                           # temporary output array
		jtphase = np.zeros(maxnSines)                         # temporary output array   
		jtfreq[:tfreq.size]=tfreq                             # save track frequencies to temporary array
		jtmag[:tmag.size]=tmag                                # save track magnitudes to temporary array
		jtphase[:tphase.size]=tphase                          # save track magnitudes to temporary array
		if pin == hM1:                                        # if first frame initialize output sine tracks
			xtfreq = jtfreq 
			xtmag = jtmag
			xtphase = jtphase
		else:                                                 # rest of frames append values to sine tracks
			xtfreq = np.vstack((xtfreq, jtfreq))
			xtmag = np.vstack((xtmag, jtmag))
			xtphase = np.vstack((xtphase, jtphase))
		pin += H
	# delete sine tracks shorter than minSineDur
	xtfreq = cleaningSineTracks(xtfreq, round(fs*minSineDur/H))  
	return xtfreq, xtmag, xtphase
示例#26
0
def sineModelAnal(x, fs, w, N, H, t, maxnSines = 100, minSineDur=.01, freqDevOffset=20, freqDevSlope=0.01):
	"""
	Analysis of a sound using the sinusoidal model with sine tracking
	x: input array sound, w: analysis window, N: size of complex spectrum, H: hop-size, t: threshold in negative dB
	maxnSines: maximum number of sines per frame, minSineDur: minimum duration of sines in seconds
	freqDevOffset: minimum frequency deviation at 0Hz, freqDevSlope: slope increase of minimum frequency deviation
	returns xtfreq, xtmag, xtphase: frequencies, magnitudes and phases of sinusoidal tracks
	"""
	
	if (minSineDur <0):                          # raise error if minSineDur is smaller than 0
		raise ValueError("Minimum duration of sine tracks smaller than 0")
	
	hM1 = int(math.floor((w.size+1)/2))                     # half analysis window size by rounding
	hM2 = int(math.floor(w.size/2))                         # half analysis window size by floor
	x = np.append(np.zeros(hM2),x)                          # add zeros at beginning to center first window at sample 0
	x = np.append(x,np.zeros(hM2))                          # add zeros at the end to analyze last sample
	pin = hM1                                               # initialize sound pointer in middle of analysis window       
	pend = x.size - hM1                                     # last sample to start a frame
	w = w / sum(w)                                          # normalize analysis window
	tfreq = np.array([])
	while pin<pend:                                         # while input sound pointer is within sound            
		x1 = x[pin-hM1:pin+hM2]                               # select frame
		mX, pX = DFT.dftAnal(x1, w, N)                        # compute dft
		ploc = UF.peakDetection(mX, t)                        # detect locations of peaks
		pmag = mX[ploc]                                       # get the magnitude of the peaks
		iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc)   # refine peak values by interpolation
		ipfreq = fs*iploc/float(N)                            # convert peak locations to Hertz
		# perform sinusoidal tracking by adding peaks to trajectories
		tfreq, tmag, tphase = sineTracking(ipfreq, ipmag, ipphase, tfreq, freqDevOffset, freqDevSlope)
		tfreq = np.resize(tfreq, min(maxnSines, tfreq.size))  # limit number of tracks to maxnSines
		tmag = np.resize(tmag, min(maxnSines, tmag.size))     # limit number of tracks to maxnSines
		tphase = np.resize(tphase, min(maxnSines, tphase.size)) # limit number of tracks to maxnSines
		jtfreq = np.zeros(maxnSines)                          # temporary output array
		jtmag = np.zeros(maxnSines)                           # temporary output array
		jtphase = np.zeros(maxnSines)                         # temporary output array   
		jtfreq[:tfreq.size]=tfreq                             # save track frequencies to temporary array
		jtmag[:tmag.size]=tmag                                # save track magnitudes to temporary array
		jtphase[:tphase.size]=tphase                          # save track magnitudes to temporary array
		if pin == hM1:                                        # if first frame initialize output sine tracks
			xtfreq = jtfreq 
			xtmag = jtmag
			xtphase = jtphase
		else:                                                 # rest of frames append values to sine tracks
			xtfreq = np.vstack((xtfreq, jtfreq))
			xtmag = np.vstack((xtmag, jtmag))
			xtphase = np.vstack((xtphase, jtphase))
		pin += H
	# delete sine tracks shorter than minSineDur
	xtfreq = cleaningSineTracks(xtfreq, round(fs*minSineDur/H))  
	return xtfreq, xtmag, xtphase
示例#27
0
def harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope=0.01, minSineDur=.02):
	"""
	Analysis of a sound using the sinusoidal harmonic model
	x: input sound; fs: sampling rate, w: analysis window; N: FFT size (minimum 512); t: threshold in negative dB, 
	nH: maximum number of harmonics;  minf0: minimum f0 frequency in Hz, 
	maxf0: maximim f0 frequency in Hz; f0et: error threshold in the f0 detection (ex: 5),
	harmDevSlope: slope of harmonic deviation; minSineDur: minimum length of harmonics
	returns xhfreq, xhmag, xhphase: harmonic frequencies, magnitudes and phases
	"""

	if (minSineDur <0):                                     # raise exception if minSineDur is smaller than 0
		raise ValueError("Minimum duration of sine tracks smaller than 0")
		
	hN = N/2                                                # size of positive spectrum
	hM1 = int(math.floor((w.size+1)/2))                     # half analysis window size by rounding
	hM2 = int(math.floor(w.size/2))                         # half analysis window size by floor
	x = np.append(np.zeros(hM2),x)                          # add zeros at beginning to center first window at sample 0
	x = np.append(x,np.zeros(hM2))                          # add zeros at the end to analyze last sample
	pin = hM1                                               # init sound pointer in middle of anal window          
	pend = x.size - hM1                                     # last sample to start a frame
	fftbuffer = np.zeros(N)                                 # initialize buffer for FFT
	w = w / sum(w)                                          # normalize analysis window
	hfreqp = []                                             # initialize harmonic frequencies of previous frame
	f0t = 0                                                 # initialize f0 track
	f0stable = 0                                            # initialize f0 stable
	while pin<=pend:           
		x1 = x[pin-hM1:pin+hM2]                               # select frame
		mX, pX = DFT.dftAnal(x1, w, N)                        # compute dft            
		ploc = UF.peakDetection(mX, t)                        # detect peak locations   
		iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc)   # refine peak values
		ipfreq = fs * iploc/N                                 # convert locations to Hz
		f0t = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable)  # find f0
		if ((f0stable==0)&(f0t>0)) \
				or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)):
			f0stable = f0t                                      # consider a stable f0 if it is close to the previous one
		else:
			f0stable = 0
		hfreq, hmag, hphase = harmonicDetection(ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs, harmDevSlope) # find harmonics
		hfreqp = hfreq
		if pin == hM1:                                        # first frame
			xhfreq = np.array([hfreq])
			xhmag = np.array([hmag])
			xhphase = np.array([hphase])
		else:                                                 # next frames
			xhfreq = np.vstack((xhfreq,np.array([hfreq])))
			xhmag = np.vstack((xhmag, np.array([hmag])))
			xhphase = np.vstack((xhphase, np.array([hphase])))
		pin += H                                              # advance sound pointer
	xhfreq = SM.cleaningSineTracks(xhfreq, round(fs*minSineDur/H))     # delete tracks shorter than minSineDur
	return xhfreq, xhmag, xhphase
示例#28
0
def minFreqEstErr(inputFile, f):
    """
    Inputs:
            inputFile (string) = wav file including the path
            f (float) = frequency of the sinusoid present in the input audio signal (Hz)
    Output:
            fEst (float) = Estimated frequency of the sinusoid (Hz)
            M (int) = Window size
            N (int) = FFT size
    """
    # analysis parameters:
    window = 'blackman'
    t = -40

    ### Your code here
    k = 1
    M = 100*k + 1
    N = nextPow2(M)
    fs, x = UF.wavread(inputFile)
    w = get_window(window, M)
    x1	= x[ 0.5*fs - M/2.0 : 0.5*fs + M/2.0]
    mX, pX = DFT.dftAnal(x1, w, N)
    ploc = UF.peakDetection(mX, t)
    iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc)
    fEst = iploc * fs / N
    while len(fEst) < 1 or abs(f - fEst[0]) >= 0.05:
        k += 1
        M = 100*k + 1
        N = nextPow2(M)
        w = get_window(window, M)
        fs, x = UF.wavread(inputFile)
        x1	= x[ 0.5*fs - M/2.0 : 0.5*fs + M/2.0]
        mX, pX = DFT.dftAnal(x1, w, N)
        ploc = UF.peakDetection(mX, t)
        iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc)
        fEst = iploc * fs / N
    return (fEst[0], M, N)
示例#29
0
def sineModelMultiRes(x, fs, w, N, t, B):
    """
	Analysis/synthesis of a sound using the sinusoidal model, without sine tracking
	x: input array sound, w: array of analysis windows, N: array of sizes of complex spectrum,
	t: threshold in negative dB, B: array of frequency bands
	returns y: output array sound
	"""

    hM1 = [int(math.floor((_w.size + 1) / 2)) for _w in w]  # half analysis window(s) size by rounding
    hM2 = [int(math.floor(_w.size / 2)) for _w in w]  # half analysis window(s) size by floor
    Ns = 512  # FFT size for synthesis (even)
    H = Ns / 4  # Hop size used for analysis and synthesis
    hNs = Ns / 2  # half of synthesis FFT size
    pin = max(hNs, max(hM1))  # init sound pointer in middle of anal window
    pend = x.size - max(hNs, max(hM1))  # last sample to start a frame
    fftbuffer = np.array([])  # initialize buffer for FFT
    yw = np.zeros(Ns)  # initialize output sound frame
    y = np.zeros(x.size)  # initialize output array
    w = [_w / sum(_w) for _w in w]  # normalize analysis window(s)
    sw = np.zeros(Ns)  # initialize synthesis window
    ow = triang(2 * H)  # triangular window
    sw[hNs - H : hNs + H] = ow  # add triangular window
    bh = blackmanharris(Ns)  # blackmanharris window
    bh = bh / sum(bh)  # normalized blackmanharris window
    sw[hNs - H : hNs + H] = sw[hNs - H : hNs + H] / bh[hNs - H : hNs + H]  # normalized synthesis window
    while pin < pend:  # while input sound pointer is within sound
        # -----analysis-----
        ipmag = ipphase = ipfreq = np.array([])  # initialize the synthesis arrays
        for i in range(0, len(w)):  # for each window, use some loop variables ('_' prefix)
            _hM1, _hM2, _w, _N, _B = (hM1[i], hM2[i], w[i], N[i], B[i])
            x1 = x[pin - _hM1 : pin + _hM2]  # select frame
            mX, pX = DFT.dftAnal(x1, _w, _N)  # compute dft
            ploc = UF.peakDetection(mX, t)  # detect locations of peaks
            iploc, _ipmag, _ipphase = UF.peakInterp(mX, pX, ploc)  # refine peak values by interpolation
            _ipfreq = fs * iploc / float(_N)  # convert peak locations to Hertz
            lo, hi = (_B[0], _B[1])  # low/high from band tuples [..(lo, hi)..]
            mask = (_ipfreq >= lo) * (_ipfreq < hi)  # mask for in-band components
            ipmag = np.append(ipmag, _ipmag * mask)  # mask and append components
            ipphase = np.append(ipphase, _ipphase * mask)
            ipfreq = np.append(ipfreq, _ipfreq * mask)
            # -----synthesis-----
        Y = UF.genSpecSines(ipfreq, ipmag, ipphase, Ns, fs)  # generate sines in the spectrum
        fftbuffer = np.real(ifft(Y))  # compute inverse FFT
        yw[: hNs - 1] = fftbuffer[hNs + 1 :]  # undo zero-phase window
        yw[hNs - 1 :] = fftbuffer[: hNs + 1]
        y[pin - hNs : pin + hNs] += sw * yw  # overlap-add and apply a synthesis window
        pin += H  # advance sound pointer
    return y
def test():
    window = 'blackman'
    t = -40
    fs = 44100
    a = [101, 200, 440]
    k = 1
    matched = False
    while True:
        M = (100*k) + 1

        N = int(pow(2, np.ceil(np.log2(M))))  

        w = get_window(window, M)

        for f in np.arange(100,8000):
        #for i in range(len(a)):
            #f = a[i]
            x = generateSine(f)

            hx = len(x) / 2
            x1 = x[(.5*fs)-(M/2):(.5*fs)+((M/2)+1)]
            #x1 = x[hx-(M/2):hx+(M/2)+1]
        
            mX, pX = DFT.dftAnal(x1, w, N)
            ploc = UF.peakDetection(mX, t)
            pmag = mX[ploc]

            (iploc, ipmag, ipphase) = UF.peakInterp(mX, pX, ploc)

            fEst = (fs * float(np.sum(iploc))) / float(N)
            esterror = np.abs(fEst - f)
            print esterror

            if (esterror > 0.05):
                matched = False
                break
            else:
                matched = True
        
        if matched:
            break
        else:
            k += 1
    
    print fEst 
    print k
    print M
    print N
示例#31
0
def minFreqEstErr(inputFile, f):
    """
    Inputs:
            inputFile (string) = wav file including the path
            f (float) = frequency of the sinusoid present in the input audio signal (Hz)
    Output:
            fEst (float) = Estimated frequency of the sinusoid (Hz)
            M (int) = Window size
            N (int) = FFT size
    """
    # analysis parameters:
    window = 'blackman'
    t = -40
    thresholdForError = 0.05

    # Your code here
    #read file
    fs, x = UF.wavread(inputFile)
    #determine the sampe that is at 0.5 seconds into the sounds
    timeStamp = 0.5  #  The timestamp where to center our windowed signal around
    binAtTimeStamp = int(timeStamp * fs)  #Bin number at the timestamp

    #set range for k
    k_range = np.arange(1, (x.size - 1) / 100)

    # initialize fft size N (minimum window size)
    N = 100 * 1 + 1
    fEst = 0
    #Iterate through range of M_Range
    for k in k_range:
        M = 100 * k + 1

        x1 = x[binAtTimeStamp - M / 2:binAtTimeStamp + M / 2 +
               1]  #get x1 as M no. samples of x centered around timeStamp

        w = get_window(window, M)  #get window (<window>, <size of M>)
        N = next_power_of_2(
            M)  #get FFT size as a power of 2, and greater than M
        mX, pX = DFT.dftAnal(x1, w, N)
        ploc = UF.peakDetection(mX, t)
        iploc, _, _ = UF.peakInterp(mX, pX, ploc)
        peakInHz = iploc * fs / float(N)
        if (abs(peakInHz - f) < thresholdForError):
            return float(peakInHz), M, N
        else:
            continue

    return float(fEst), int(M), int(N)
示例#32
0
def sineModel(x, fs, w, N, t):
    """
	Analysis/synthesis of a sound using the sinusoidal model, without sine tracking
	x: input array sound, w: analysis window, N: size of complex spectrum, t: threshold in negative dB 
	returns y: output array sound
	"""

    hN = N / 2  # size of positive spectrum
    hM1 = int(math.floor(
        (w.size + 1) / 2))  # half analysis window size by rounding
    hM2 = int(math.floor(w.size / 2))  # half analysis window size by floor
    Ns = 512  # FFT size for synthesis (even)
    H = Ns / 4  # Hop size used for analysis and synthesis
    hNs = Ns / 2  # half of synthesis FFT size
    pin = max(hNs, hM1)  # init sound pointer in middle of anal window
    pend = x.size - max(hNs, hM1)  # last sample to start a frame
    fftbuffer = np.zeros(N)  # initialize buffer for FFT
    yw = np.zeros(Ns)  # initialize output sound frame
    y = np.zeros(x.size)  # initialize output array
    w = w / sum(w)  # normalize analysis window
    sw = np.zeros(Ns)  # initialize synthesis window
    ow = triang(2 * H)  # triangular window
    sw[hNs - H:hNs + H] = ow  # add triangular window
    bh = blackmanharris(Ns)  # blackmanharris window
    bh = bh / sum(bh)  # normalized blackmanharris window
    sw[hNs - H:hNs +
       H] = sw[hNs - H:hNs + H] / bh[hNs - H:hNs +
                                     H]  # normalized synthesis window
    while pin < pend:  # while input sound pointer is within sound
        #-----analysis-----
        x1 = x[pin - hM1:pin + hM2]  # select frame
        mX, pX = DFT.dftAnal(x1, w, N)  # compute dft
        ploc = UF.peakDetection(mX, t)  # detect locations of peaks
        pmag = mX[ploc]  # get the magnitude of the peaks
        iploc, ipmag, ipphase = UF.peakInterp(
            mX, pX, ploc)  # refine peak values by interpolation
        ipfreq = fs * iploc / float(N)  # convert peak locations to Hertz
        #-----synthesis-----
        Y = UF.genSpecSines(ipfreq, ipmag, ipphase, Ns,
                            fs)  # generate sines in the spectrum
        fftbuffer = np.real(ifft(Y))  # compute inverse FFT
        yw[:hNs - 1] = fftbuffer[hNs + 1:]  # undo zero-phase window
        yw[hNs - 1:] = fftbuffer[:hNs + 1]
        y[pin - hNs:pin +
          hNs] += sw * yw  # overlap-add and apply a synthesis window
        pin += H  # advance sound pointer
    return y
示例#33
0
def sineModelAnal(x, fs, w, N, H, t, maxnSines = 100, minSineDur=.01, freqDevOffset=20, freqDevSlope=0.01):
  # Analysis of a sound using the sinusoidal model
  # x: input array sound, w: analysis window, N: size of complex spectrum,
  # H: hop-size, t: threshold in negative dB
  # maxnSines: maximum number of sines per frame
  # minSineDur: minimum duration of sines in seconds
  # freqDevOffset: minimum frequency deviation at 0Hz 
  # freqDevSlope: slope increase of minimum frequency deviation
  # returns xtfreq, xtmag, xtphase: frequencies, magnitudes and phases of sinusoids
  hN = N/2                                                # size of positive spectrum
  hM1 = int(math.floor((w.size+1)/2))                     # half analysis window size by rounding
  hM2 = int(math.floor(w.size/2))                         # half analysis window size by floor
  x = np.append(np.zeros(hM2),x)                          # add zeros at beginning to center first window at sample 0
  x = np.append(x,np.zeros(hM2))                          # add zeros at the end to analyze last sample
  pin = hM1                                               # initialize sound pointer in middle of analysis window       
  pend = x.size - hM1                                     # last sample to start a frame
  w = w / sum(w)                                          # normalize analysis window
  tfreq = np.array([])
  while pin<pend:                                         # while input sound pointer is within sound            
    x1 = x[pin-hM1:pin+hM2]                               # select frame
    mX, pX = DFT.dftAnal(x1, w, N)                        # compute dft
    ploc = UF.peakDetection(mX, hN, t)                    # detect locations of peaks
    pmag = mX[ploc]                                       # get the magnitude of the peaks
    iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc)   # refine peak values by interpolation
    ipfreq = fs*iploc/float(N)
    tfreq, tmag, tphase = UF.sineTracking(ipfreq, ipmag, ipphase, tfreq, freqDevOffset, freqDevSlope)
    tfreq = np.resize(tfreq, min(maxnSines, tfreq.size))
    tmag = np.resize(tmag, min(maxnSines, tmag.size))
    tphase = np.resize(tphase, min(maxnSines, tphase.size))
    jtfreq = np.zeros(maxnSines) 
    jtmag = np.zeros(maxnSines)  
    jtphase = np.zeros(maxnSines)    
    jtfreq[:tfreq.size]=tfreq 
    jtmag[:tmag.size]=tmag
    jtphase[:tphase.size]=tphase 
    if pin == hM1:
      xtfreq = jtfreq 
      xtmag = jtmag
      xtphase = jtphase
    else:
      xtfreq = np.vstack((xtfreq, jtfreq))
      xtmag = np.vstack((xtmag, jtmag))
      xtphase = np.vstack((xtphase, jtphase))
    pin += H
  xtfreq = UF.cleaningSineTracks(xtfreq, round(fs*minSineDur/H))
  return xtfreq, xtmag, xtphase
示例#34
0
文件: A6Part4.py 项目: Jee-Bee/ASPFMA
def f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et):
    """
    Fundamental frequency detection of a sound using twm algorithm
    x: input sound; fs: sampling rate; w: analysis window; 
    N: FFT size; t: threshold in negative dB, 
    minf0: minimum f0 frequency in Hz, maxf0: maximim f0 frequency in Hz, 
    f0et: error threshold in the f0 detection (ex: 5),
    returns f0: fundamental frequency
    """
    if (minf0 < 0):                                            # raise exception if minf0 is smaller than 0
        raise ValueError("Minumum fundamental frequency (minf0) smaller than 0")
    
    if (maxf0 >= 10000):                                       # raise exception if maxf0 is bigger than fs/2
        raise ValueError("Maximum fundamental frequency (maxf0) bigger than 10000Hz")
    
    if (H <= 0):                                               # raise error if hop size 0 or negative
        raise ValueError("Hop size (H) smaller or equal to 0")
        
    hN = N/2                                                   # size of positive spectrum
    hM1 = int(math.floor((w.size+1)/2))                        # half analysis window size by rounding
    hM2 = int(math.floor(w.size/2))                            # half analysis window size by floor
    x = np.append(np.zeros(hM2),x)                             # add zeros at beginning to center first window at sample 0
    x = np.append(x,np.zeros(hM1))                             # add zeros at the end to analyze last sample
    pin = hM1                                                  # init sound pointer in middle of anal window          
    pend = x.size - hM1                                        # last sample to start a frame
    fftbuffer = np.zeros(N)                                    # initialize buffer for FFT
    w = w / sum(w)                                             # normalize analysis window
    f0 = []                                                    # initialize f0 output
    f0t = 0                                                    # initialize f0 track
    f0stable = 0                                               # initialize f0 stable
    while pin<pend:             
        x1 = x[pin-hM1:pin+hM2]                                  # select frame
        mX, pX = DFT.dftAnal(x1, w, N)                           # compute dft           
        ploc = UF.peakDetection(mX, t)                           # detect peak locations   
        iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc)      # refine peak values
        ipfreq = fs * iploc/N                                    # convert locations to Hez
        f0t = f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable)  # find f0
        if ((f0stable==0)&(f0t>0)) \
                or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)):
            f0stable = f0t                                         # consider a stable f0 if it is close to the previous one
        else:
            f0stable = 0
        f0 = np.append(f0, f0t)                                  # add f0 to output array
        pin += H                                                 # advance sound pointer
    return f0
def minFreqEstErr(inputFile, f):
    """
    Inputs:
            inputFile (string) = wav file including the path
            f (float) = frequency of the sinusoid present in the input audio signal (Hz)
    Output:
            fEst (float) = Estimated frequency of the sinusoid (Hz)
            M (int) = Window size
            N (int) = FFT size
    """
    # analysis parameters:
    window = 'blackman'
    t = -40
    
    ### Your code here
    (fs, x) = UF.wavread(inputFile)    

    numbins = 6
    k = 21
    #M = int(numbins * fs / f)
    
    
    M = (100*k) + 1

    N = int(pow(2, np.ceil(np.log2(M))))  

    w = get_window(window, M)

    hx = len(x) / 2
    x1 = x[(.5*fs)-(M/2):(.5*fs)+((M/2)+1)]
    #x1 = x[hx-(M/2):hx+(M/2)+1]
        
    mX, pX = DFT.dftAnal(x1, w, N)
    ploc = UF.peakDetection(mX, t)
    pmag = mX[ploc]

    (iploc, ipmag, ipphase) = UF.peakInterp(mX, pX, ploc)

    fEst = (fs * float(np.sum(iploc))) / float(N)
    
    esterror = np.abs(fEst - f)
    print esterror

    return (fEst, M, N)
示例#36
0
文件: A5Part1.py 项目: akkeh/SOGM_jr3
def minFreqEstErr(inputFile, f):
    """
    Inputs:
            inputFile (string) = wav file including the path
            f (float) = frequency of the sinusoid present in the input audio signal (Hz)
    Output:
            fEst (float) = Estimated frequency of the sinusoid (Hz)
            M (int) = Window size
            N (int) = FFT size
    """
    # analysis parameters:
    window = 'blackman'
    t = -40
    
    ### Your code here
    (fs, x) = UF.wavread(inputFile)
    print fs
    #k = find_k(100, 2000, fs, window)
    k=1
    
    while check_k(100, 2000, fs, k, window) < 2:
        print k
        k+=1
   
    M = 100*k+1
    print "M: ", M
    i=0
    while (2**i) < M:
        i+=1
    N = 2**i
    print "N: ", N
    print "fs: ", fs
    print "length: ", len(x)
    center = len(x)/2
    h = M/2
    x_cnk = x[center-h:center+h+1]
    print "chunk: ", len(x_cnk)
    w = get_window(window, M)
    (mX, pX) = DFT.dftAnal(x_cnk, w, N)
    p_loc = UF.peakDetection(mX, t)
    p_int = UF.peakInterp(mX, pX, p_loc)
    peak = p_int[0]*(fs/float(N))
    return (peak[0], M, N)
示例#37
0
def minFreqEstErr(inputFile, f):
    """
    Inputs:
            inputFile (string) = wav file including the path
            f (float) = frequency of the sinusoid present in the input audio signal (Hz)
    Output:
            fEst (float) = Estimated frequency of the sinusoid (Hz)
            M (int) = Window size
            N (int) = FFT size
    """
    # analysis parameters:
    window = 'blackman'
    t = -40

    ### Your code here
    (fs, x) = UF.wavread(inputFile)
    offset = 0.5
    center = offset * fs
    x_center = len(x) / 2

    k = 2
    while True:
        M = 100 * k + 1
        N = smallest_power_of_2_greater_than(M)
        hM1 = int(math.floor((M+1)/2))
        hM2 = int(math.floor(M/2))
        half_window = M / 2
        #x1 = x[x_center - half_window : x_center + half_window + 1]
        windowSize = M
        lowerIndex = (len(x) / 2) - (windowSize / 2) + 1
        upperIndex = lowerIndex + windowSize
        #x1 = x[lowerIndex:upperIndex]
        x1 = x[fs*0.5-M/2:fs*0.5+M/2+1]
        w = get_window(window, M)
        mX, pX = DFT.dftAnal(x1, w, N)
        ploc = UF.peakDetection(mX, t)
        iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc)
        #print M, N, len(x1), ploc, iploc
        fsin = iploc[0] * fs / float(N)
        if abs(fsin - f) < 0.05:
            return fsin, M, N
        k = k + 1
示例#38
0
def minFreqEstErr(inputFile, f):
    """
    Inputs:
            inputFile (string) = wav file including the path
            f (float) = frequency of the sinusoid present in the input audio signal (Hz)
    Output:
            fEst (float) = Estimated frequency of the sinusoid (Hz)
            M (int) = Window size
            N (int) = FFT size
    """
    # analysis parameters:
    window = 'blackman'
    t = -40

    ### Your code here
    (fs, x) = UF.wavread(inputFile)
    for k in range(5, 25):
        M = k * 100 + 1
        for j in range(8, 13):
            if (2**j > M):
                break
        N = 2**j
        w = get_window('blackman', M)
        td = -40
        x1 = x[int(0.5 * fs):int(0.5 * fs + M)]
        mX, pX = DFT.dftAnal(x1, w, N)
        ploc = UF.peakDetection(mX, td)
        iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc)

        peak_hz = fs * iploc / float(N)
        f_err = abs(f - peak_hz)
        print(M, N, peak_hz, f_err)
        if (5 == k):
            cur_min = f_err
        if (cur_min >= f_err):
            fEst = peak_hz
            find_M = M
            find_N = N
            cur_min = f_err
    print(fEst, find_M, find_N)
    return (fEst, find_M, find_N)
示例#39
0
def minFreqEstErr(inputFile, f):

    # analysis parameters:
    window = 'blackman'
    t = -40

    (fs, x) = UF.wavread(inputFile)

    # Get window from half of sound file
    half = int(x.size / 2)

    # Window size of 100 * k + 1
    M = 101

    # Initialise
    N = 0
    freq = 0
    err = 0.05

    while (M < x.size):
        w = get_window(window, M)

        win_size = int(M / 2)

        # Taking window from halfway
        x1 = x[half - win_size:half + win_size + 1]
        N = nextPow(M)
        mX, pX = DFT.dftAnal(x1, w, N)

        ploc = UF.peakDetection(mX, t)
        # iploc = interpolated peak location, ipmag = magnitude val, ipphase = phase values
        iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc)

        freq = float(iploc) * float(fs) / N

        if (abs(f - freq) < err):
            break

        M += 100

    return (freq, M, N)
    def dftAnal(p, w, N, B):
        hM1 = int(math.floor((w.size+1)/2))
        hM2 = int(math.floor(w.size/2))
        x1 = x[p-hM1:p+hM2]
        fftbuffer = np.zeros(N)
        rw = w / sum(w)
        mX, pX = DFT.dftAnal(x1, rw, N)
        
        upperIndex = Bs.index(B)
        lower_bin = 1
        if upperIndex > 0:
            lower_bin = int(np.ceil(float(Bs[upperIndex-1])*N/fs))
        upper_bin = int(np.ceil(float(B)*N/fs))
        
        ploc = UF.peakDetection(mX, t)
        # Peak choice
        ploc = ploc[np.logical_and(ploc > lower_bin, ploc <= upper_bin)]
        iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc)
        ipfreq = fs*iploc/float(N)

        return (ipfreq, ipmag, ipphase)
示例#41
0
	def dftAnalMultiRes(p, w, N, B):
		hM1 = int(math.floor((w.size+1)/2))             # half analysis window size by rounding
		hM2 = int(math.floor(w.size/2))                 # half analysis window size by floor
		x1 = x[pin-hM1:pin+hM2]                         # select frame
		fftbuffer = np.zeros(N)
		rw = w / sum(w)                                 # normalize analysis window		
		mX, pX = DFT.dftAnal(x1, w, N)                  # compute dft

		upper_index = Bs.index(B)
		if upper_index > 0:
			lower_bin = int(np.ceil(float(Bs[upper_index-1]) * N / fs))
		else:
			lower_bin = 1
		upper_bin = int(np.ceil(float(B) * N / fs))

		ploc = UF.peakDetection(mX, t)                        # detect locations of peaks
		ploc = ploc[np.logical_and(ploc>lower_bin, ploc<=upper_bin)] # choose the peaks in band
		iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc)   # refine peak values by interpolation
		ipfreq = fs*iploc/float(N)                            # convert peak locations to Hertz

		return (ipfreq, ipmag, ipphase)
示例#42
0
def minFreqEstErr(inputFile, f):
    """
    Inputs:
            inputFile (string) = wav file including the path
            f (float) = frequency of the sinusoid present in the input audio signal (Hz)
    Output:
            fEst (float) = Estimated frequency of the sinusoid (Hz)
            M (int) = Window size
            N (int) = FFT size
    """
    # analysis parameters:
    window = 'blackman'
    t = -40

    fs, x = UF.wavread(inputFile)

    F_min = 100.0
    F_max = 2000.0

    k = 1
    while True:
        M = 100 * k + 1
        N = int(2 ** (math.floor(np.log2(M)) + 1))
        #print("M {}, N {}".format(M, N))
        w = get_window(window, M)

        x1 = x[0.5 * fs - (M + 1) / 2:0.5 * fs + (M + 1) / 2 - 1]  # M must be odd
        mX, pX = DFT.dftAnal(x1, w, N)
        ploc = UF.peakDetection(mX, t)
        iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc)
        ipfreq = fs * iploc / float(N)
        fEst = ipfreq[0]
        fEstError = abs(fEst - f)
        print("fEstError {0:.3f}".format(fEstError))
        if fEstError < 0.05:
            break
        k += 1

    print("fEst {}, M {}, N {}, frequency estimation error {:.3f}".format(fEst, M, N, fEstError))
    return fEst, M, N
示例#43
0
def minFreqEstErr(inputFile, f):
    """
    Inputs:
            inputFile (string) = wav file including the path
            f (float) = frequency of the sinusoid present in the input audio signal (Hz)
    Output:
            fEst (float) = Estimated frequency of the sinusoid (Hz)
            M (int) = Window size
            N (int) = FFT size
    """
    # analysis parameters:
    window = 'blackman'
    tol = -40

    (fs, x) = UF.wavread(inputFile)

    nMax = len(x)
    nMid = int(float(nMax)/2)

    k = 0
    M = 1
    fEst = -1.0
    while (M <= nMax) & (abs(f-fEst)>0.05):
        k += 1
        M = 100*k +1

        xSubSet = x[nMid-np.floor(M/2.0):nMid+np.floor(M/2.0)+1]
        w = get_window(window, M)

        N = np.ceil(np.log(float(M))/np.log(2))
        N = int(2**N)

        (mX,pX) = DFT.dftAnal(xSubSet, w, N)
        ploc = UF.peakDetection(mX, tol)
        iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc)

        fEst = iploc*fs/N
        
    return (fEst[0], M, N)
示例#44
0
def sineModel(x, fs, w, N, t):
  # Analysis/synthesis of a sound using the sinusoidal model
  # x: input array sound, w: analysis window, N: size of complex spectrum,
  # t: threshold in negative dB 
  # returns y: output array sound
  hN = N/2                                                # size of positive spectrum
  hM1 = int(math.floor((w.size+1)/2))                     # half analysis window size by rounding
  hM2 = int(math.floor(w.size/2))                         # half analysis window size by floor
  Ns = 512                                                # FFT size for synthesis (even)
  H = Ns/4                                                # Hop size used for analysis and synthesis
  hNs = Ns/2                                              # half of synthesis FFT size
  pin = max(hNs, hM1)                                     # init sound pointer in middle of anal window       
  pend = x.size - max(hNs, hM1)                           # last sample to start a frame
  fftbuffer = np.zeros(N)                                 # initialize buffer for FFT
  yw = np.zeros(Ns)                                       # initialize output sound frame
  y = np.zeros(x.size)                                    # initialize output array
  w = w / sum(w)                                          # normalize analysis window
  sw = np.zeros(Ns)                                       # initialize synthesis window
  ow = triang(2*H);                                       # triangular window
  sw[hNs-H:hNs+H] = ow                                    # add triangular window
  bh = blackmanharris(Ns)                                 # blackmanharris window
  bh = bh / sum(bh)                                       # normalized blackmanharris window
  sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H]     # normalized synthesis window
  while pin<pend:                                         # while input sound pointer is within sound 
  #-----analysis-----             
    x1 = x[pin-hM1:pin+hM2]                               # select frame
    mX, pX = DFT.dftAnal(x1, w, N)                        # compute dft
    ploc = UF.peakDetection(mX, hN, t)                    # detect locations of peaks
    pmag = mX[ploc]                                       # get the magnitude of the peaks
    iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc)   # refine peak values by interpolation
  #-----synthesis-----
    plocs = iploc*Ns/N                                    # adapt peak locations to size of synthesis FFT
    Y = UF.genSpecSines(fs*plocs/N, ipmag, ipphase, Ns, fs)    # generate sines in the spectrum         
    fftbuffer = np.real(ifft(Y))                          # compute inverse FFT
    yw[:hNs-1] = fftbuffer[hNs+1:]                        # undo zero-phase window
    yw[hNs-1:] = fftbuffer[:hNs+1] 
    y[pin-hNs:pin+hNs] += sw*yw                           # overlap-add and apply a synthesis window
    pin += H                                              # advance sound pointer
  return y
示例#45
0
文件: A5Part1.py 项目: akkeh/SOGM_jr3
def findk(fr, fs, window, startk):
    k = 1 
    p=0
    while abs(p-fr) > 0.05:
        t = np.arange(441000.0)
        x = np.sin(2.0*np.pi * fr * t / fs)
        M = 100*k+1
        i=0
        while (2**i) < M:
            i+=1
        N = 2**i
        h = M/2
        l_h = len(x)/2 - h + 1
        h_h = l_h + M
        x_cnk = x[l_h:h_h]
        w = get_window(window, M)
        (mX, pX) = DFT.dftAnal(x_cnk, w, N)
        p_loc = UF.peakDetection(mX, -40)
        p_int = UF.peakInterp(mX, pX, p_loc)
        peak = p_int[0]*(fs/float(N))
        p = peak[0]
        k+=1
    return k, abs(p-fr)
示例#46
0
def minFreqEstErr(inputFile, f):
    """
	Inputs:
            inputFile (string) = wav file including the path
            f (float) = frequency of the sinusoid present in the input audio signal (Hz)
    Output:
            fEst (float) = Estimated frequency of the sinusoid (Hz)
            M (int) = Window size
            N (int) = FFT size
    """
    # analysis parameters:
    window = 'blackman'
    t = -40

    (fs, x) = UF.wavread(inputFile)  # read in the inputFile
    Ns = 2**np.arange(24)  # List of possible FFT sizes
    error = 0.05  # allowable frequency error in Hz

    for k in xrange(1, 100):
        M = 100 * k + 1
        w = get_window(window, M)  # get the window
        hM1 = int(math.floor(
            (M + 1) / 2))  # half analysis window size by rounding
        hM2 = int(math.floor(M / 2))  # half analysis window size by floor
        fftbuffer = x[x.size / 2 - hM2:x.size / 2 + hM1]  # dftBuffer
        N = Ns[np.where(
            Ns > M)[0][0]]  # Get the smallest N value larger than M
        (mX, pX) = DFT.dftAnal(fftbuffer, w, N)  # Calculate the dft
        ploc = UF.peakDetection(mX, t)  # Get peak locations
        (iploc, ipmag, ipphase) = UF.peakInterp(
            mX, pX, ploc)  # parabolic interpolation to find peak values
        fEst = fs * iploc[0] / N
        if abs(fEst - f) <= error:
            break

    return (fEst, M, N)
示例#47
0
def minFreqEstErr(inputFile, f):
    """
    Inputs:
            inputFile (string) = wav file including the path
            f (float) = frequency of the sinusoid present in the input audio signal (Hz)
    Output:
            fEst (float) = Estimated frequency of the sinusoid (Hz)
            M (int) = Window size
            N (int) = FFT size
    """

    t = -40
    window = 'blackman'

    ### Your code here
    fs, x = UF.wavread(inputFile)
    center = 0.5 * x.size
    window = 'blackman'
    t = -40
    estimationError = 1000
    iterM = 1

    while estimationError > 0.05:
        M = iterM * 100 + 1
        fragment = x[int(center - M / 2):int(center + M / 2 + 1)]
        w = get_window(window, M, False)
        N = int(np.power(2, np.ceil(np.log2(M))))  #nearest N

        mX, pX = DFT.dftAnal(fragment, w, N)
        ploc = UF.peakDetection(mX, t)
        iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc)
        locBinToHz = iploc[0] * fs / N
        estimationError = np.abs(f - locBinToHz)
        iterM = iterM + 1

    return locBinToHz, int(M), int(N)
示例#48
0
def time2Freq(x, fs, w, N, pinFirst, hopSizeMelodia, t):
    '''
    makes fourier transform, peak thresholding and interpolation  for one window
    return interpolated 
    iploc, ipmag, ipphase
    '''

    ###################
    ## prepare params
    hM1 = int(math.floor(
        (w.size + 1) / 2))  # half analysis window size by rounding
    hM2 = int(math.floor(w.size / 2))  # half analysis window size by floor
    x = np.append(
        np.zeros(hM2),
        x)  # add zeros at beginning to center first window at sample 0
    x = np.append(x,
                  np.zeros(hM2))  # add zeros at the end to analyze last sample
    #     pin = hM1                                               # init sound pointer in middle of anal window
    pin = pinFirst + 300 * hopSizeMelodia
    pend = x.size - hM1  # last sample to start a frame

    ########################
    # process one window
    print "at time {}".format(pin / fs)

    x1 = x[pin - hM1:pin + hM2]  # select frame
    mX, pX = DFT.dftAnal(x1, w, N)  # compute dft

    ploc = UF.peakDetection(mX, t)  # detect peak locations

    iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc)  # refine peak values

    # optional
    visualize(N, mX, pin, fs, ploc, iploc, ipmag, ipphase)

    return mX, iploc, ipmag, ipphase
示例#49
0
def sineModelMultiRes(x, fs, multi_w, multi_N, t, multi_B):
    """
    Analysis/synthesis of a sound using the sinusoidal model, without sine tracking
    x: input array sound, w: analysis window, N: size of complex spectrum, t: threshold in negative dB
    returns y: output array sound
    """

    bands = range(len(multi_B))                                     # to iterate over bands

    N = max(multi_N)

    multi_w_size = np.array([multi_w[i].size for i in bands])
    multi_hM1 = np.floor((multi_w_size + 1)/2.0).astype(int)                     # half analysis window size by rounding
    multi_hM2 = np.floor(multi_w_size / 2.0).astype(int)                         # half analysis window size by floor

    Ns = 512                                                # FFT size for synthesis (even)
    H = Ns/4                                                # Hop size used for analysis and synthesis
    hNs = Ns/2                                              # half of synthesis FFT size

    multi_pin = np.maximum(hNs, multi_hM1)                    # init sound pointer in middle of anal window
    multi_pend = x.size - multi_pin                           # last sample to start a frame

    fftbuffer_combined = np.zeros(N)

    yw_combined = np.zeros(Ns)                                       # initialize output sound frame
    y_combined = np.zeros(x.size)                                    # initialize output array

    multi_w = [multi_w[i] / sum(multi_w[i]) for i in bands]                                          # normalize analysis window

    sw = np.zeros(Ns)                                       # initialize synthesis window
    ow = triang(2*H)                                        # triangular window
    sw[hNs-H:hNs+H] = ow                                    # add triangular window
    bh = blackmanharris(Ns)                                 # blackmanharris window
    bh = bh / sum(bh)                                       # normalized blackmanharris window
    sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H]     # normalized synthesis window

    while (multi_pin<multi_pend).all():                                         # while input sound pointer is within sound
    #-----analysis-----

        multi_x1 = [x[(multi_pin[i] - multi_hM1[i]) : (multi_pin[i] + multi_hM2[i])] for i in bands]                               # select frame

        multi_mX = []
        multi_pX = []
        for i in bands:
            mXi, pXi = DFT.dftAnal(multi_x1[i], multi_w[i], multi_N[i])
            multi_mX.append(mXi)
            multi_pX.append(pXi)

        multi_ploc = []
        for i in bands:
            ploci = UF.peakDetection(multi_mX[i], t)                        # detect locations of peaks
            multi_ploc.append(ploci)

        multi_ipmag = []
        multi_ipphase = []
        multi_ipfreq = []
        for i in bands:
            iploci, ipmagi, ipphasei = UF.peakInterp(multi_mX[i], multi_pX[i], multi_ploc[i])   # refine peak values by interpolation
            ipfreqi = fs*iploci/float(multi_N[i])                            # convert peak locations to Hertz
            multi_ipmag.append(ipmagi)
            multi_ipphase.append(ipphasei)
            multi_ipfreq.append(ipfreqi)

        # count first for array allocation
        num_ip = 0
        for i in bands:
            for p in range(len(multi_ipfreq[i])):
                f = multi_ipfreq[i][p]
                if (i == 0 or f >= multi_B[i-1]) and f < multi_B[i]:
                    num_ip += 1

        ipfreq_combined = np.zeros(num_ip)
        ipmag_combined = np.zeros(num_ip)
        ipphase_combined = np.zeros(num_ip)
        ip = 0
        for i in bands:
            for p in range(len(multi_ipfreq[i])):
                f = multi_ipfreq[i][p]
                if (i == 0 or f >= multi_B[i-1]) and f < multi_B[i]:
                    ipfreq_combined[ip] = f
                    ipmag_combined[ip] = multi_ipmag[i][p]
                    ipphase_combined[ip] = multi_ipphase[i][p]
                    ip += 1

    #-----synthesis-----
        Y_combined = UF.genSpecSines(ipfreq_combined, ipmag_combined, ipphase_combined, Ns, fs)   # generate sines in the spectrum
        fftbuffer_combined = np.real(ifft(Y_combined))                          # compute inverse FFT
        yw_combined[:hNs-1] = fftbuffer_combined[hNs+1:]                        # undo zero-phase window
        yw_combined[hNs-1:] = fftbuffer_combined[:hNs+1]
        y_combined[multi_pin[0]-hNs:multi_pin[0]+hNs] += sw*yw_combined                           # overlap-add and apply a synthesis window
        multi_pin += H

    return y_combined
示例#50
0
def hprModel(x, fs, w, N, t, nH, minf0, maxf0, f0et):
	"""
	Analysis/synthesis of a sound using the harmonic plus residual model
	x: input sound, fs: sampling rate, w: analysis window, 
	N: FFT size (minimum 512), t: threshold in negative dB, 
	nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz, 
	maxf0: maximim f0 frequency in Hz, 
	f0et: error threshold in the f0 detection (ex: 5),
	maxhd: max. relative deviation in harmonic detection (ex: .2)
	returns y: output sound, yh: harmonic component, xr: residual component
	"""

	hN = N/2                                                      # size of positive spectrum
	hM1 = int(math.floor((w.size+1)/2))                           # half analysis window size by rounding
	hM2 = int(math.floor(w.size/2))                               # half analysis window size by floor
	Ns = 512                                                      # FFT size for synthesis (even)
	H = Ns/4                                                      # Hop size used for analysis and synthesis
	hNs = Ns/2      
	pin = max(hNs, hM1)                                           # initialize sound pointer in middle of analysis window          
	pend = x.size - max(hNs, hM1)                                 # last sample to start a frame
	fftbuffer = np.zeros(N)                                       # initialize buffer for FFT
	yhw = np.zeros(Ns)                                            # initialize output sound frame
	xrw = np.zeros(Ns)                                            # initialize output sound frame
	yh = np.zeros(x.size)                                         # initialize output array
	xr = np.zeros(x.size)                                         # initialize output array
	w = w / sum(w)                                                # normalize analysis window
	sw = np.zeros(Ns)     
	ow = triang(2*H)                                              # overlapping window
	sw[hNs-H:hNs+H] = ow      
	bh = blackmanharris(Ns)                                       # synthesis window
	bh = bh / sum(bh)                                             # normalize synthesis window
	wr = bh                                                       # window for residual
	sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H]
	hfreqp = []
	f0t = 0
	f0stable = 0
	while pin<pend:  
	#-----analysis-----             
		x1 = x[pin-hM1:pin+hM2]                                     # select frame
		mX, pX = DFT.dftAnal(x1, w, N)                              # compute dft
		ploc = UF.peakDetection(mX, t)                              # find peaks 
		iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc)         # refine peak values
		ipfreq = fs * iploc/N                                       # convert locations to Hz
		f0t = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0
		if ((f0stable==0)&(f0t>0)) \
			or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)):
			f0stable = f0t                                            # consider a stable f0 if it is close to the previous one
		else:
			f0stable = 0
		hfreq, hmag, hphase = HM.harmonicDetection(ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs) # find harmonics
		hfreqp = hfreq
		ri = pin-hNs-1                                             # input sound pointer for residual analysis
		xw2 = x[ri:ri+Ns]*wr                                       # window the input sound                     
		fftbuffer = np.zeros(Ns)                                   # reset buffer
		fftbuffer[:hNs] = xw2[hNs:]                                # zero-phase window in fftbuffer
		fftbuffer[hNs:] = xw2[:hNs]                     
		X2 = fft(fftbuffer)                                        # compute FFT of input signal for residual analysis
		#-----synthesis-----
		Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs)          # generate sines
		Xr = X2-Yh                                                 # get the residual complex spectrum                       
		fftbuffer = np.zeros(Ns)
		fftbuffer = np.real(ifft(Yh))                              # inverse FFT of harmonic spectrum
		yhw[:hNs-1] = fftbuffer[hNs+1:]                            # undo zero-phase window
		yhw[hNs-1:] = fftbuffer[:hNs+1] 
		fftbuffer = np.zeros(Ns)
		fftbuffer = np.real(ifft(Xr))                              # inverse FFT of residual spectrum
		xrw[:hNs-1] = fftbuffer[hNs+1:]                            # undo zero-phase window
		xrw[hNs-1:] = fftbuffer[:hNs+1]
		yh[ri:ri+Ns] += sw*yhw                                     # overlap-add for sines
		xr[ri:ri+Ns] += sw*xrw                                     # overlap-add for residual
		pin += H                                                   # advance sound pointer
	y = yh+xr                                                    # sum of harmonic and residual components
	return y, yh, xr
示例#51
0
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import get_window
import sys, os
sys.path.append(
    os.path.join(os.path.dirname(os.path.dirname(sys.path[0])), 'software',
                 'models'))
import dftModel as DFT
import utilFunctions as UF

fs, x = UF.wavread('../../sounds/sine-440.wav')
M = 501
N = 512 * 4
t = -20
w = get_window('hamming', M)
x1 = x[int(.8 * fs):int(.8 * fs + M)]
mX, pX = DFT.dftAnal(x1, w, N)
ploc = UF.peakDetection(mX, t)
iploc, ipmag, iphase = UF.peakInterp(mX, pX, ploc)
pmag = mX[ploc]

freqaxis = fs * np.arange(N / 2 + 1) / float(N)
plt.plot(freqaxis, mX)
plt.plot(fs * iploc / float(N), ipmag, marker='x', linestyle='')
plt.show()
sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../../software/models/"))
import dftModel as DFT
import utilFunctions as UF

(fs, x) = UF.wavread("../../../sounds/oboe-A4.wav")
M = 601
w = np.blackman(M)
N = 1024
hN = N / 2
Ns = 512
hNs = Ns / 2
pin = 5000
t = -70
x1 = x[pin : pin + w.size]
mX, pX = DFT.dftAnal(x1, w, N)
ploc = UF.peakDetection(mX, t)
iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc)
freqs = iploc * fs / N
Y = UF.genSpecSines(freqs, ipmag, ipphase, Ns, fs)
mY = 20 * np.log10(abs(Y[:hNs]))
pY = np.unwrap(np.angle(Y[:hNs]))
y = fftshift(ifft(Y)) * sum(blackmanharris(Ns))

plt.figure(1, figsize=(9, 6))

plt.subplot(4, 1, 1)
plt.plot(np.arange(-M / 2, M / 2), x1, "b", lw=1.5)
plt.axis([-M / 2, M / 2, min(x1), max(x1)])
plt.title("x (oboe-A4.wav), M = 601")

plt.subplot(4, 1, 2)
示例#53
0
def sineModelAnalEnhanced(
        inputFile='../../sounds/sines-440-602-transient.wav'):
    """
    Input:
           inputFile (string): wav file including the path
    Output:
           tStamps: A Kx1 numpy array of time stamps at which the frequency components were estimated
           tfreq: A Kx2 numpy array of frequency values, one column per component
	"""
    phaseDevThres = 1e-2  # Allowed deviation in phase
    M = 2047  # window size
    N = 4096  # FFT size
    t = -80  # threshold in negative dB
    H = 128  # hop-size
    window = 'blackman'  # window type
    fs, x = UF.wavread(inputFile)  # Read input file
    w = get_window(window, M)  # Get the window
    hM1 = int(np.floor(
        (w.size + 1) / 2))  # half analysis window size by rounding
    hM2 = int(np.floor(w.size / 2))  # half analysis window size by floor
    x = np.append(
        np.zeros(hM2),
        x)  # add zeros at beginning to center first window at sample 0
    x = np.append(x,
                  np.zeros(hM2))  # add zeros at the end to analyze last sample
    pin = hM1  # initialize sound pointer in middle of analysis window
    pend = x.size - hM1  # last sample to start a frame
    tStamps = np.arange(pin, pend, H) / float(fs)  # Generate time stamps
    w = w / sum(w)  # normalize analysis window
    tfreq = np.array([])
    while pin < pend:  # while input sound pointer is within sound
        x1 = x[pin - hM1:pin + hM2]  # select frame
        mX, pX = SM.DFT.dftAnal(x1, w, N)  # compute dft
        ploc = UF.peakDetection(mX, t)  # detect locations of peaks
        ###### CODE DIFFERENT FROM sineModelAnal() #########
        # Phase based mainlobe tracking
        plocSelMask = np.zeros(len(ploc))
        for pindex, p in enumerate(ploc):
            if p > 2 and p < (
                    len(pX) - 2
            ):  # Peaks at either end of the spectrum are not processed
                if selectFlatPhasePeak(
                        pX, p, phaseDevThres
                ):  # Select the peak if the phase spectrum around the peak is flat
                    plocSelMask[pindex] = 1
            else:
                plocSelMask[pindex] = 1
        plocSel = ploc[plocSelMask.nonzero()[0]]  # Select the ones chosen
        if len(plocSel
               ) != 2:  # Ignoring frames that don't return two selected peaks
            ipfreq = [0.0, 0.0]
        else:
            iploc, ipmag, ipphase = UF.peakInterp(
                mX, pX, plocSel
            )  # Only selected peaks to refine peak values by interpolation
            ipfreq = fs * iploc / float(N)  # convert peak locations to Hertz
        ###### CODE DIFFERENT FROM sineModelAnal() #########
        if pin == hM1:  # if first frame initialize output frequency track
            tfreq = ipfreq
        else:  # rest of frames append values to frequency track
            tfreq = np.vstack((tfreq, ipfreq))
        pin += H
    # Plot the estimated frequency tracks
    mX, pX = stft.stftAnal(x, w, N, H)
    maxplotfreq = 1500.0
    binFreq = fs * np.arange(N * maxplotfreq / fs) / N
    numFrames = int(mX[:, 0].size)
    frmTime = H * np.arange(numFrames) / float(fs)
    plt.pcolormesh(frmTime,
                   binFreq,
                   np.transpose(mX[:, :N * maxplotfreq / fs + 1]),
                   cmap='hot_r')
    plt.plot(tStamps, tfreq[:, 0], color='y', linewidth=2.0)
    plt.plot(tStamps, tfreq[:, 1], color='c', linewidth=2.0)
    plt.legend(('Estimated f1', 'Estimated f2'))
    plt.xlabel('Time (s)')
    plt.ylabel('Frequency (Hz)')
    plt.autoscale(tight=True)
    return tStamps, tfreq
示例#54
0
def sineModelMultiRes(x, fs, Ns, W, M, N, B, T):
    """
    Analysis/synthesis of a sound using the multi-resolution sinusoidal model, without sine tracking
    x:  input array sound,
    fs: sampling frequency, 
    Ns: FFT size for synthesis, 
    W:  array of analysis window types, 
    M:  array of analysis windows sizes, 
    N:  array of sizes of complex spectrums,
    B:  array of frequency bands separators (ascending order of frequency, number of bands == B.size + 1),
    T:  array of peak detection thresholds in negative dB. 
    returns y: output array sound
    """
    
    nResolutions = W.size    
    if (nResolutions != N.size) or (nResolutions != B.size + 1) or (nResolutions != T.size): 
        raise ValueError('Parameters W,N,B,T shall have compatible sizes')    

    H = Ns/4                                                # Hop size used for analysis and synthesis
    hNs = Ns/2                                              # half of synthesis FFT size
    yw = np.zeros(Ns)                                       # initialize output sound frame
    y = np.zeros(x.size)                                    # initialize output array
    sw = np.zeros(Ns)                                       # initialize synthesis window
    ow = triang(2*H)                                        # triangular window
    sw[hNs-H:hNs+H] = ow                                    # add triangular window
    bh = blackmanharris(Ns)                                 # blackmanharris window
    bh = bh / sum(bh)                                       # normalized blackmanharris window
    sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H]     # normalized synthesis window

    HM1 = map(lambda m: math.floor((m+1)/2),M)              # half analysis windows sizes by rounding
    HM2 = map(lambda m: math.floor( m   /2),M)              # half analysis windows sizes by floor
    maxHM1 = max(HM1)                                       # max half analysis window size by rounding
    pin = max(hNs, maxHM1)                                  # init sound pointers in the middle of largest window       
    pend = x.size - pin                                     # last samples to start a frame
        
    while pin < pend:                                       # while input sound pointer is within sound

        combinedIPFreq = np.array([])
        combinedIPMag  = np.array([])
        combinedIPhase = np.array([])
        windowSizeAttribution = np.array([])
        
        #-----multi-resolution spectrum calculation-----
        for k in range(0,nResolutions):
            windowType = W[k]
            windowSize = M[k]
            w = get_window(windowType,windowSize)                 # normalize analysis window
            w = w / sum(w)
            n = N[k]
            t = T[k]
            hM1 = HM1[k]                                          # half analysis window size by rounding
            hM2 = HM2[k]                                          # half analysis window size by floor
        	#-----analysis-----             
            x1 = x[pin-hM1:pin+hM2]                               # select frame
            mX, pX = DFT.dftAnal(x1, w, n)                        # compute dft
            ploc = UF.peakDetection(mX, t)                        # detect locations of peaks
            iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc)   # refine peak values by interpolation
            ipfreq = fs*iploc/float(n)                            # convert peak locations to Hertz
            
            if k == 0:    # First frequency range starts from zero 
                f0 = 0.0
            else:
                f0 = B[k-1]
            if k == B.size:    # Last frequency range ends at fs/2
                f1 = fs / 2.0
            else:
                f1 = B[k]
            
            for l in range(0,ipfreq.size):    # Pick the peaks (no pun intended:) inside the assigned frequency band
                f = ipfreq[l]
                if f0 <= f and f < f1:
                    combinedIPFreq = np.append(combinedIPFreq, f)
                    combinedIPMag  = np.append(combinedIPMag , ipmag  [l])
                    combinedIPhase = np.append(combinedIPhase, ipphase[l])
                    windowSizeAttribution = np.append(windowSizeAttribution, windowSize)
            
        
        # Let's smooth out "double-reported" peaks close to the division frequencies of the frequency ranges        
        freqDiffThreshold = (fs*6)/float(n)
        
        smoothedIPFreq = np.array([])
        smoothedIPMag  = np.array([])
        smoothedIPhase = np.array([])
        
        nPeaks = combinedIPFreq.size
        l = 0
        while l < (nPeaks-1):
            f1 = combinedIPFreq[l]
            f2 = combinedIPFreq[l+1]
            m1 = windowSizeAttribution[l]
            m2 = windowSizeAttribution[l+1]
            freqDiff = abs(f1-f2)
            if freqDiff < freqDiffThreshold and m1 != m2:
                #print '!',f1,f2,m1,m2,freqDiff
                smoothedIPFreq = np.append(smoothedIPFreq, (f1+f2)/2.0)
                smoothedIPMag  = np.append(smoothedIPMag , (combinedIPMag [l] + combinedIPMag [l+1])/2.0)
                smoothedIPhase = np.append(smoothedIPhase, (combinedIPhase[l] + combinedIPhase[l+1])/2.0)
                l = l + 2
            else:
                smoothedIPFreq = np.append(smoothedIPFreq, f1)
                smoothedIPMag  = np.append(smoothedIPMag , combinedIPMag [l])
                smoothedIPhase = np.append(smoothedIPhase, combinedIPhase[l])
                l = l + 1
        # Add the last peak        
        smoothedIPFreq = np.append(smoothedIPFreq,combinedIPFreq[nPeaks-1])
        smoothedIPMag  = np.append(smoothedIPMag ,combinedIPMag [nPeaks-1])
        smoothedIPhase = np.append(smoothedIPhase,combinedIPhase[nPeaks-1])

        #-----synthesis-----
        Y = UF.genSpecSines(smoothedIPFreq, smoothedIPMag, smoothedIPhase, Ns, fs)   # generate sines in the spectrum         
        fftbuffer = np.real(ifft(Y))                          # compute inverse FFT
        yw[:hNs-1] = fftbuffer[hNs+1:]                        # undo zero-phase window
        yw[hNs-1:] = fftbuffer[:hNs+1] 
        y[pin-hNs:pin+hNs] += sw*yw                           # overlap-add and apply a synthesis window
        pin += H                                              # advance sound pointer

    return y
import matplotlib.pyplot as plt
from scipy.signal import hamming, triang, blackmanharris
import sys, os, functools, time
sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../../software/models/'))
import dftModel as DFT
import utilFunctions as UF

(fs, x) = UF.wavread('../../../sounds/sine-440+490.wav')
w = np.hamming(3529)
N = 16084*2
hN = N/2
t = -20
pin = 4850
x1 = x[pin:pin+w.size]
mX1, pX1 = DFT.dftAnal(x1, w, N)
ploc = UF.peakDetection(mX1, hN, t)
pmag = mX1[ploc] 
iploc, ipmag, ipphase = UF.peakInterp(mX1, pX1, ploc)

plt.figure(1, figsize=(9, 6))
plt.subplot(311)
plt.plot(fs*np.arange(0,N/2)/float(N), pX1, 'c', lw=1.5)
plt.plot(fs * iploc / N, ipphase, marker='x', color='b', alpha=1, linestyle='', markeredgewidth=1.5) 
plt.axis([200, 1000, 50, 180])
plt.title('pX + peaks (sine-440+490.wav)')


(fs, x) = UF.wavread('../../../sounds/vibraphone-C6.wav')
w = np.blackman(401)
N = 1024
hN = N/2
示例#56
0
def hpsModel(x, fs, w, N, t, nH, minf0, maxf0, f0et, stocf):
	"""
	Analysis/synthesis of a sound using the harmonic plus stochastic model, one frame at a time, no harmonic tracking
	x: input sound; fs: sampling rate, w: analysis window; N: FFT size (minimum 512), t: threshold in negative dB, 
	nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz; maxf0: maximim f0 frequency in Hz, 
	f0et: error threshold in the f0 detection (ex: 5); stocf: decimation factor of mag spectrum for stochastic analysis
	returns y: output sound, yh: harmonic component, yst: stochastic component
	"""

	hN = N/2                                               # size of positive spectrum
	hM1 = int(math.floor((w.size+1)/2))                    # half analysis window size by rounding
	hM2 = int(math.floor(w.size/2))                        # half analysis window size by floor
	Ns = 512                                               # FFT size for synthesis (even)
	H = Ns/4                                               # Hop size used for analysis and synthesis
	hNs = Ns/2      
	pin = max(hNs, hM1)                                    # initialize sound pointer in middle of analysis window          
	pend = x.size - max(hNs, hM1)                          # last sample to start a frame
	fftbuffer = np.zeros(N)                                # initialize buffer for FFT
	yhw = np.zeros(Ns)                                     # initialize output sound frame
	ystw = np.zeros(Ns)                                    # initialize output sound frame
	yh = np.zeros(x.size)                                  # initialize output array
	yst = np.zeros(x.size)                                 # initialize output array
	w = w / sum(w)                                         # normalize analysis window
	sw = np.zeros(Ns)     
	ow = triang(2*H)                                       # overlapping window
	sw[hNs-H:hNs+H] = ow      
	bh = blackmanharris(Ns)                                # synthesis window
	bh = bh / sum(bh)                                      # normalize synthesis window
	wr = bh                                                # window for residual
	sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H]    # synthesis window for harmonic component
	sws = H*hanning(Ns)/2                                  # synthesis window for stochastic
	hfreqp = []
	f0t = 0
	f0stable = 0
	while pin<pend:  
	#-----analysis-----             
		x1 = x[pin-hM1:pin+hM2]                              # select frame
		mX, pX = DFT.dftAnal(x1, w, N)                       # compute dft
		ploc = UF.peakDetection(mX, t)                       # find peaks                
		iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc)  # refine peak values
		ipfreq = fs * iploc/N                                # convert peak locations to Hz
		f0t = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable)  # find f0
		if ((f0stable==0)&(f0t>0)) \
			or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)):
			f0stable = f0t                                     # consider a stable f0 if it is close to the previous one
		else:
			f0stable = 0
		hfreq, hmag, hphase = HM.harmonicDetection(ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs) # find harmonics
		hfreqp = hfreq
		ri = pin-hNs-1                                       # input sound pointer for residual analysis
		xw2 = x[ri:ri+Ns]*wr                                 # window the input sound                                       
		fftbuffer = np.zeros(Ns)                             # reset buffer
		fftbuffer[:hNs] = xw2[hNs:]                          # zero-phase window in fftbuffer
		fftbuffer[hNs:] = xw2[:hNs]                           
		X2 = fft(fftbuffer)                                  # compute FFT for residual analysis
	#-----synthesis-----
		Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs)    # generate spec sines of harmonic component          
		Xr = X2-Yh                                           # get the residual complex spectrum
		mXr = 20 * np.log10(abs(Xr[:hNs]))                   # magnitude spectrum of residual
		mXrenv = resample(np.maximum(-200, mXr), mXr.size*stocf) # decimate the magnitude spectrum and avoid -Inf                     
		stocEnv = resample(mXrenv, hNs)                      # interpolate to original size
		pYst = 2*np.pi*np.random.rand(hNs)                   # generate phase random values
		Yst = np.zeros(Ns, dtype = complex)
		Yst[:hNs] = 10**(stocEnv/20) * np.exp(1j*pYst)       # generate positive freq.
		Yst[hNs+1:] = 10**(stocEnv[:0:-1]/20) * np.exp(-1j*pYst[:0:-1])  # generate negative freq.

		fftbuffer = np.zeros(Ns)
		fftbuffer = np.real(ifft(Yh))                         # inverse FFT of harmonic spectrum
		yhw[:hNs-1] = fftbuffer[hNs+1:]                       # undo zero-phase window
		yhw[hNs-1:] = fftbuffer[:hNs+1] 

		fftbuffer = np.zeros(Ns)
		fftbuffer = np.real(ifft(Yst))                        # inverse FFT of stochastic spectrum
		ystw[:hNs-1] = fftbuffer[hNs+1:]                      # undo zero-phase window
		ystw[hNs-1:] = fftbuffer[:hNs+1]

		yh[ri:ri+Ns] += sw*yhw                                # overlap-add for sines
		yst[ri:ri+Ns] += sws*ystw                             # overlap-add for stochastic
		pin += H                                              # advance sound pointer
	
	y = yh+yst                                              # sum of harmonic and stochastic components
	return y, yh, yst
def hpsModel(x, fs, w, N, t, nH, minf0, maxf0, f0et, stocf):
    """
	Analysis/synthesis of a sound using the harmonic plus stochastic model, one frame at a time, no harmonic tracking
	x: input sound; fs: sampling rate, w: analysis window; N: FFT size (minimum 512), t: threshold in negative dB, 
	nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz; maxf0: maximim f0 frequency in Hz, 
	f0et: error threshold in the f0 detection (ex: 5); stocf: decimation factor of mag spectrum for stochastic analysis
	returns y: output sound, yh: harmonic component, yst: stochastic component
	"""

    hN = N / 2  # size of positive spectrum
    hM1 = int(math.floor(
        (w.size + 1) / 2))  # half analysis window size by rounding
    hM2 = int(math.floor(w.size / 2))  # half analysis window size by floor
    Ns = 512  # FFT size for synthesis (even)
    H = Ns / 4  # Hop size used for analysis and synthesis
    hNs = Ns / 2
    pin = max(hNs,
              hM1)  # initialize sound pointer in middle of analysis window
    pend = x.size - max(hNs, hM1)  # last sample to start a frame
    fftbuffer = np.zeros(N)  # initialize buffer for FFT
    yhw = np.zeros(Ns)  # initialize output sound frame
    ystw = np.zeros(Ns)  # initialize output sound frame
    yh = np.zeros(x.size)  # initialize output array
    yst = np.zeros(x.size)  # initialize output array
    w = w / sum(w)  # normalize analysis window
    sw = np.zeros(Ns)
    ow = triang(2 * H)  # overlapping window
    sw[hNs - H:hNs + H] = ow
    bh = blackmanharris(Ns)  # synthesis window
    bh = bh / sum(bh)  # normalize synthesis window
    wr = bh  # window for residual
    sw[hNs - H:hNs +
       H] = sw[hNs - H:hNs +
               H] / bh[hNs - H:hNs +
                       H]  # synthesis window for harmonic component
    sws = H * hanning(Ns) / 2  # synthesis window for stochastic
    hfreqp = []
    f0t = 0
    f0stable = 0
    while pin < pend:
        #-----analysis-----
        x1 = x[pin - hM1:pin + hM2]  # select frame
        mX, pX = DFT.dftAnal(x1, w, N)  # compute dft
        ploc = UF.peakDetection(mX, t)  # find peaks
        iploc, ipmag, ipphase = UF.peakInterp(mX, pX,
                                              ploc)  # refine peak values
        ipfreq = fs * iploc / N  # convert peak locations to Hz
        f0t = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable)  # find f0
        if ((f0stable==0)&(f0t>0)) \
         or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)):
            f0stable = f0t  # consider a stable f0 if it is close to the previous one
        else:
            f0stable = 0
        hfreq, hmag, hphase = HM.harmonicDetection(ipfreq, ipmag, ipphase, f0t,
                                                   nH, hfreqp,
                                                   fs)  # find harmonics
        hfreqp = hfreq
        ri = pin - hNs - 1  # input sound pointer for residual analysis
        xw2 = x[ri:ri + Ns] * wr  # window the input sound
        fftbuffer = np.zeros(Ns)  # reset buffer
        fftbuffer[:hNs] = xw2[hNs:]  # zero-phase window in fftbuffer
        fftbuffer[hNs:] = xw2[:hNs]
        X2 = fft(fftbuffer)  # compute FFT for residual analysis
        #-----synthesis-----
        Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns,
                             fs)  # generate spec sines of harmonic component
        Xr = X2 - Yh  # get the residual complex spectrum
        mXr = 20 * np.log10(abs(Xr[:hNs]))  # magnitude spectrum of residual
        mXrenv = resample(
            np.maximum(-200, mXr),
            mXr.size * stocf)  # decimate the magnitude spectrum and avoid -Inf
        stocEnv = resample(mXrenv, hNs)  # interpolate to original size
        pYst = 2 * np.pi * np.random.rand(hNs)  # generate phase random values
        Yst = np.zeros(Ns, dtype=complex)
        Yst[:hNs] = 10**(stocEnv / 20) * np.exp(
            1j * pYst)  # generate positive freq.
        Yst[hNs + 1:] = 10**(stocEnv[:0:-1] / 20) * np.exp(
            -1j * pYst[:0:-1])  # generate negative freq.

        fftbuffer = np.zeros(Ns)
        fftbuffer = np.real(ifft(Yh))  # inverse FFT of harmonic spectrum
        yhw[:hNs - 1] = fftbuffer[hNs + 1:]  # undo zero-phase window
        yhw[hNs - 1:] = fftbuffer[:hNs + 1]

        fftbuffer = np.zeros(Ns)
        fftbuffer = np.real(ifft(Yst))  # inverse FFT of stochastic spectrum
        ystw[:hNs - 1] = fftbuffer[hNs + 1:]  # undo zero-phase window
        ystw[hNs - 1:] = fftbuffer[:hNs + 1]

        yh[ri:ri + Ns] += sw * yhw  # overlap-add for sines
        yst[ri:ri + Ns] += sws * ystw  # overlap-add for stochastic
        pin += H  # advance sound pointer

    y = yh + yst  # sum of harmonic and stochastic components
    return y, yh, yst
示例#58
0
def sineModelAnalEnhanced(inputFile= '../../sounds/sines-440-602-transient.wav'):
    """
    Input:
           inputFile (string): wav file including the path
    Output:
           tStamps: A Kx1 numpy array of time stamps at which the frequency components were estimated
           tfreq: A Kx2 numpy array of frequency values, one column per component
    """
    phaseDevThres = 1e-2                                   # Allowed deviation in phase
    M = 2047                                               # window size
    N = 4096                                               # FFT size 
    t = -80                                                # threshold in negative dB
    H = 128                                                # hop-size
    window='blackman'                                      # window type
    fs, x = UF.wavread(inputFile)                          # Read input file
    w = get_window(window, M)                              # Get the window
    hM1 = int(np.floor((w.size+1)/2))                      # half analysis window size by rounding
    hM2 = int(np.floor(w.size/2))                          # half analysis window size by floor
    x = np.append(np.zeros(hM2),x)                         # add zeros at beginning to center first window at sample 0
    x = np.append(x,np.zeros(hM2))                         # add zeros at the end to analyze last sample
    pin = hM1                                              # initialize sound pointer in middle of analysis window       
    pend = x.size - hM1                                    # last sample to start a frame
    tStamps = np.arange(pin,pend,H)/float(fs)              # Generate time stamps
    w = w / sum(w)                                         # normalize analysis window
    tfreq = np.array([])
    while pin<pend:                                        # while input sound pointer is within sound            
        x1 = x[pin-hM1:pin+hM2]                            # select frame
        mX, pX = SM.DFT.dftAnal(x1, w, N)                  # compute dft
        ploc = UF.peakDetection(mX, t)                     # detect locations of peaks
        ###### CODE DIFFERENT FROM sineModelAnal() #########
        # Phase based mainlobe tracking
        plocSelMask = np.zeros(len(ploc))                  
        for pindex, p in enumerate(ploc):
            if p > 2 and p < (len(pX) - 2):                    # Peaks at either end of the spectrum are not processed
                if selectFlatPhasePeak(pX, p, phaseDevThres):  # Select the peak if the phase spectrum around the peak is flat
                    plocSelMask[pindex] = 1        
            else:
                plocSelMask[pindex] = 1                        
        plocSel = ploc[plocSelMask.nonzero()[0]]               # Select the ones chosen
        if len(plocSel) != 2:                                  # Ignoring frames that don't return two selected peaks
            ipfreq = [0.0, 0.0]
        else:
            iploc, ipmag, ipphase = UF.peakInterp(mX, pX, plocSel) # Only selected peaks to refine peak values by interpolation
            ipfreq = fs*iploc/float(N)                             # convert peak locations to Hertz
        ###### CODE DIFFERENT FROM sineModelAnal() #########
        if pin == hM1:                                        # if first frame initialize output frequency track
            tfreq = ipfreq 
        else:                                                 # rest of frames append values to frequency track
            tfreq = np.vstack((tfreq, ipfreq))
        pin += H
    # Plot the estimated frequency tracks
    mX, pX = stft.stftAnal(x, fs, w, N, H)
    maxplotfreq = 1500.0
    binFreq = fs*np.arange(N*maxplotfreq/fs)/N
    numFrames = int(mX[:,0].size)
    frmTime = H*np.arange(numFrames)/float(fs) 
    plt.pcolormesh(frmTime, binFreq, np.transpose(mX[:,:N*maxplotfreq/fs+1]), cmap='hot_r')
    plt.plot(tStamps,tfreq[:,0], color = 'y', linewidth=2.0)
    plt.plot(tStamps,tfreq[:,1], color = 'c', linewidth=2.0)
    plt.legend(('Estimated f1', 'Estimated f2'))
    plt.xlabel('Time (s)')
    plt.ylabel('Frequency (Hz)')
    plt.autoscale(tight=True)
    return tStamps, tfreq