Пример #1
0
def stft(x, w, N, H):
    """
    Analysis/synthesis of a sound using the short-time Fourier transform
    x: input sound, w: analysis window, N: FFT size, H: hop size
    returns y: output sound
    """

    if H <= 0:  # raise error if hop size 0 or negative
        raise ValueError("Hop size (H) smaller or equal to 0")

    M = w.size  # size of analysis window
    hM1 = int(math.floor((M + 1) / 2))  # half analysis window size by rounding
    hM2 = int(math.floor(M / 2))  # half analysis window size by floor
    x = np.append(np.zeros(hM2), x)  # add zeros at beginning to center first window at sample 0
    x = np.append(x, np.zeros(hM1))  # add zeros at the end to analyze last sample
    pin = hM1  # initialize sound pointer in middle of analysis window
    pend = x.size - hM1  # last sample to start a frame
    w = w / sum(w)  # normalize analysis window
    y = np.zeros(x.size)  # initialize output array
    while pin <= pend:  # while sound pointer is smaller than last sample
        # -----analysis-----
        x1 = x[pin - hM1:pin + hM2]  # select one frame of input sound
        mX, pX = dftModel.dft_anal(x1, w, N)  # compute dft
        # -----synthesis-----
        y1 = dftModel.dft_synth(mX, pX, M)  # compute idft
        y[pin - hM1:pin + hM2] += H * y1  # overlap-add to generate output sound
        pin += H  # advance sound pointer
    y = np.delete(y, range(hM2))  # delete half of first window which was added in stftAnal
    y = np.delete(y, range(y.size - hM1, y.size))  # delete half of the last window which as added in stftAnal
    return y
Пример #2
0
def stft_anal(x, w, N, H):
    """
    Analysis of a sound using the short-time Fourier transform
    x: input array sound, w: analysis window, N: FFT size, H: hop size
    returns xmX, xpX: magnitude and phase spectra
    """


    if H <= 0:  # raise error if hop size 0 or negative
        raise ValueError("Hop size (H) smaller or equal to 0")

    M = w.size  # size of analysis window
    hM1 = int(math.floor((M + 1) / 2))  # half analysis window size by rounding
    hM2 = int(math.floor(M / 2))  # half analysis window size by floor
    x = np.append(np.zeros(hM2), x)  # add zeros at beginning to center first window at sample 0
    x = np.append(x, np.zeros(hM2))  # add zeros at the end to analyze last sample
    pin = hM1  # initialize sound pointer in middle of analysis window
    pend = x.size - hM1  # last sample to start a frame
    w = w / sum(w)  # normalize analysis window

    xmX = None
    xpX = None

    while pin <= pend:  # while sound pointer is smaller than last sample
        x1 = x[pin - hM1:pin + hM2]  # select one frame of input sound
        mX, pX = dftModel.dft_anal(x1, w, N)  # compute dft
        if pin == hM1:  # if first frame create output arrays
            xmX = np.array([mX])
            xpX = np.array([pX])
        else:  # append output to existing array
            xmX = np.vstack((xmX, np.array([mX])))
            xpX = np.vstack((xpX, np.array([pX])))
        pin += H  # advance sound pointer
    return xmX, xpX
Пример #3
0
def sine_model_anal(x, fs, w, N, H, t, maxnSines=100, minSineDur=.01, freqDevOffset=20, freqDevSlope=0.01):
    """
    Analysis of a sound using the sinusoidal model with sine tracking
    x: input array sound, w: analysis window, N: size of complex spectrum, H: hop-size, t: threshold in negative dB
    maxnSines: maximum number of sines per frame, minSineDur: minimum duration of sines in seconds
    freqDevOffset: minimum frequency deviation at 0Hz, freqDevSlope: slope increase of minimum frequency deviation
    returns xtfreq, xtmag, xtphase: frequencies, magnitudes and phases of sinusoidal tracks
    """

    if (minSineDur < 0):  # raise error if minSineDur is smaller than 0
        raise ValueError("Minimum duration of sine tracks smaller than 0")

    hM1 = int(math.floor((w.size + 1) / 2))  # half analysis window size by rounding
    hM2 = int(math.floor(w.size / 2))  # half analysis window size by floor
    x = np.append(np.zeros(hM2), x)  # add zeros at beginning to center first window at sample 0
    x = np.append(x, np.zeros(hM2))  # add zeros at the end to analyze last sample
    pin = hM1  # initialize sound pointer in middle of analysis window
    pend = x.size - hM1  # last sample to start a frame
    w = w / sum(w)  # normalize analysis window
    tfreq = np.array([])
    while pin < pend:  # while input sound pointer is within sound
        x1 = x[pin - hM1:pin + hM2]  # select frame
        mX, pX = dftModel.dft_anal(x1, w, N)  # compute dft
        ploc = utilFunctions.peakDetection(mX, t)  # detect locations of peaks
        iploc, ipmag, ipphase = utilFunctions.peakInterp(mX, pX, ploc)  # refine peak values by interpolation
        ipfreq = fs * iploc / float(N)  # convert peak locations to Hertz
        # perform sinusoidal tracking by adding peaks to trajectories
        tfreq, tmag, tphase = sine_tracking(ipfreq, ipmag, ipphase, tfreq, freqDevOffset, freqDevSlope)
        tfreq = np.resize(tfreq, min(maxnSines, tfreq.size))  # limit number of tracks to maxnSines
        tmag = np.resize(tmag, min(maxnSines, tmag.size))  # limit number of tracks to maxnSines
        tphase = np.resize(tphase, min(maxnSines, tphase.size))  # limit number of tracks to maxnSines
        jtfreq = np.zeros(maxnSines)  # temporary output array
        jtmag = np.zeros(maxnSines)  # temporary output array
        jtphase = np.zeros(maxnSines)  # temporary output array
        jtfreq[:tfreq.size] = tfreq  # save track frequencies to temporary array
        jtmag[:tmag.size] = tmag  # save track magnitudes to temporary array
        jtphase[:tphase.size] = tphase  # save track magnitudes to temporary array
        if pin == hM1:  # if first frame initialize output sine tracks
            xtfreq = jtfreq
            xtmag = jtmag
            xtphase = jtphase
        else:  # rest of frames append values to sine tracks
            xtfreq = np.vstack((xtfreq, jtfreq))
            xtmag = np.vstack((xtmag, jtmag))
            xtphase = np.vstack((xtphase, jtphase))
        pin += H
    # delete sine tracks shorter than minSineDur
    xtfreq = clean_sine_tracks(xtfreq, round(fs * minSineDur / H))
    return xtfreq, xtmag, xtphase
Пример #4
0
def harmonic_model_anal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope=0.01, minSineDur=0.02):
    """
    Analysis of a sound using the sinusoidal harmonic model
    x: input sound; fs: sampling rate, w: analysis window; N: FFT size (minimum 512); t: threshold in negative dB,
    nH: maximum number of harmonics;  minf0: minimum f0 frequency in Hz,
    maxf0: maximim f0 frequency in Hz; f0et: error threshold in the f0 detection (ex: 5),
    harmDevSlope: slope of harmonic deviation; minSineDur: minimum length of harmonics
    returns xhfreq, xhmag, xhphase: harmonic frequencies, magnitudes and phases
    """

    if minSineDur < 0:  # raise exception if minSineDur is smaller than 0
        raise ValueError("Minimum duration of sine tracks smaller than 0")

    hM1 = int(math.floor((w.size + 1) / 2))  # half analysis window size by rounding
    hM2 = int(math.floor(w.size / 2))  # half analysis window size by floor
    x = np.append(np.zeros(hM2), x)  # add zeros at beginning to center first window at sample 0
    x = np.append(x, np.zeros(hM2))  # add zeros at the end to analyze last sample
    pin = hM1  # init sound pointer in middle of anal window
    pend = x.size - hM1  # last sample to start a frame
    w = w / sum(w)  # normalize analysis window
    hfreqp = []  # initialize harmonic frequencies of previous frame
    f0stable = 0  # initialize f0 stable
    while pin <= pend:
        x1 = x[pin - hM1 : pin + hM2]  # select frame
        mX, pX = dftModel.dft_anal(x1, w, N)  # compute dft
        ploc = utilFunctions.peakDetection(mX, t)  # detect peak locations
        iploc, ipmag, ipphase = utilFunctions.peakInterp(mX, pX, ploc)  # refine peak values
        ipfreq = fs * iploc / N  # convert locations to Hz
        f0t = utilFunctions.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable)  # find f0
        if ((f0stable == 0) & (f0t > 0)) or ((f0stable > 0) & (np.abs(f0stable - f0t) < f0stable / 5.0)):
            f0stable = f0t  # consider a stable f0 if it is close to the previous one
        else:
            f0stable = 0
        hfreq, hmag, hphase = harmonic_detection(
            ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs, harmDevSlope
        )  # find harmonics
        hfreqp = hfreq
        if pin == hM1:  # first frame
            xhfreq = np.array([hfreq])
            xhmag = np.array([hmag])
            xhphase = np.array([hphase])
        else:  # next frames
            xhfreq = np.vstack((xhfreq, np.array([hfreq])))
            xhmag = np.vstack((xhmag, np.array([hmag])))
            xhphase = np.vstack((xhphase, np.array([hphase])))
        pin += H  # advance sound pointer
    xhfreq = sineModel.clean_sine_tracks(xhfreq, round(fs * minSineDur / H))  # delete tracks shorter than minSineDur
    return xhfreq, xhmag, xhphase
Пример #5
0
def f0_detection(x, fs, w, N, H, t, minf0, maxf0, f0et):
    """
    Fundamental frequency detection of a sound using twm algorithm
    x: input sound; fs: sampling rate; w: analysis window;
    N: FFT size; t: threshold in negative dB,
    minf0: minimum f0 frequency in Hz, maxf0: maximim f0 frequency in Hz,
    f0et: error threshold in the f0 detection (ex: 5),
    returns f0: fundamental frequency
    """
    if minf0 < 0:  # raise exception if minf0 is smaller than 0
        raise ValueError("Minumum fundamental frequency (minf0) smaller than 0")

    if maxf0 >= 10000:  # raise exception if maxf0 is bigger than fs/2
        raise ValueError("Maximum fundamental frequency (maxf0) bigger than 10000Hz")

    if H <= 0:  # raise error if hop size 0 or negative
        raise ValueError("Hop size (H) smaller or equal to 0")

    hM1 = int(math.floor((w.size + 1) / 2))  # half analysis window size by rounding
    hM2 = int(math.floor(w.size / 2))  # half analysis window size by floor
    x = np.append(np.zeros(hM2), x)  # add zeros at beginning to center first window at sample 0
    x = np.append(x, np.zeros(hM1))  # add zeros at the end to analyze last sample
    pin = hM1  # init sound pointer in middle of anal window
    pend = x.size - hM1  # last sample to start a frame
    w = w / sum(w)  # normalize analysis window
    f0 = []  # initialize f0 output
    f0stable = 0  # initialize f0 stable
    while pin < pend:
        x1 = x[pin - hM1 : pin + hM2]  # select frame
        mX, pX = dftModel.dft_anal(x1, w, N)  # compute dft
        ploc = utilFunctions.peakDetection(mX, t)  # detect peak locations
        iploc, ipmag, ipphase = utilFunctions.peakInterp(mX, pX, ploc)  # refine peak values
        ipfreq = fs * iploc / N  # convert locations to Hez
        f0t = utilFunctions.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable)  # find f0
        if ((f0stable == 0) & (f0t > 0)) or ((f0stable > 0) & (np.abs(f0stable - f0t) < f0stable / 5.0)):
            f0stable = f0t  # consider a stable f0 if it is close to the previous one
        else:
            f0stable = 0
        f0 = np.append(f0, f0t)  # add f0 to output array
        pin += H  # advance sound pointer
    return f0