Пример #1
0
def test_erb_space():
    hi = 11025
    lo = 100
    t0 = time.time()
    old_space = oldfilt.erb_space(lo, hi, num=100)
    t1 = time.time()
    new_space = newfilt.erb_space(lo, hi, num=100, __test=True)
    t2 = time.time()
    print(
        f'Old method took {t1 - t0} seconds. New method took {t2 - t1} seconds.'
    )
    assert np.allclose(new_space.get(), old_space)
Пример #2
0
    def get_gtf_kernel(self):
        """
        """
        cfs = gt_filters.erb_space(self.cf_low, self.cf_high, self.n_band)
        self.cfs = cfs

        sample_times = np.arange(0, self.filter_len, 1) / self.fs
        irs = np.zeros((self.filter_len, self.n_band), dtype=np.float32)

        EarQ = 9.26449
        minBW = 24.7
        order = 1
        N = 4
        for band_i in range(self.n_band):
            ERB = ((cfs[band_i] / EarQ)**order + minBW**order)**(1 / order)
            b = 1.019 * ERB
            numerator = np.multiply(
                sample_times**(N - 1),
                np.cos(2 * np.pi * cfs[band_i] * sample_times))
            denominator = np.exp(2 * np.pi * b * sample_times)
            irs[:, band_i] = np.divide(numerator, denominator)

        gain = np.max(np.abs(np.fft.fft(irs, axis=0)), axis=0)
        irs_gain_norm = np.divide(np.flipud(irs), gain)
        if self.is_padd:
            kernel = np.concatenate(
                (irs_gain_norm, np.zeros((self.filter_len, self.n_band))),
                axis=0)
        else:
            kernel = irs_gain_norm
        return kernel
Пример #3
0
def compare_cfs():
    fs = 1e3
    n_band = 16
    freq_low = 70
    freq_high = 7000
    gtf_obj = gtf_proposed(fs,
                           cf_low=freq_low,
                           freq_high=freq_high,
                           n_band=n_band)
    cfs_proposed = gtf_obj.cfs
    bws_proposed = gtf_obj.cal_bw(cfs_proposed)

    cfs_ref = gtf_ref.erb_space(low_freq=freq_low,
                                high_freq=freq_high,
                                num=n_band)[::-1]
    bws_ref = gtf_obj.cal_bw(cfs_ref)

    fig, ax = plt.subplots(1, 1)
    ax.errorbar(np.arange(n_band),
                cfs_proposed,
                yerr=bws_proposed / 2,
                linewidth=2,
                label='Todd')
    ax.errorbar(np.arange(n_band) + n_band / 100,
                cfs_ref,
                yerr=bws_ref / 2,
                linewidth=2,
                label='Detly')
    ax.set_xlabel('freq_band')
    ax.set_ylabel('freq(Hz)')
    ax.legend()
    fig.savefig(f'images/validate/cfs_n{n_band}.png', dpi=100)
Пример #4
0
def fft_weights(nfft, fs, nfilts, width, fmin, fmax, maxlen):
    """
    :param nfft: the source FFT size
    :param sr: sampling rate (Hz)
    :param nfilts: the number of output bands required (default 64)
    :param width: the constant width of each band in Bark (default 1)
    :param fmin: lower limit of frequencies (Hz)
    :param fmax: upper limit of frequencies (Hz)
    :param maxlen: number of bins to truncate the rows to
    
    :return: a tuple `weights`, `gain` with the calculated weight matrices and
             gain vectors
    
    Generate a matrix of weights to combine FFT bins into Gammatone bins.
    
    Note about `maxlen` parameter: While wts has nfft columns, the second half
    are all zero. Hence, aud spectrum is::
    
        fft2gammatonemx(nfft,sr)*abs(fft(xincols,nfft))
    
    `maxlen` truncates the rows to this many bins.
    
    | (c) 2004-2009 Dan Ellis [email protected]  based on rastamat/audspec.m
    | (c) 2012 Jason Heeris (Python implementation)
    """
    ucirc = np.exp(1j * 2 * np.pi * np.arange(0, int(nfft / 2 + 1)) /
                   nfft)[None, ...]

    # Common ERB filter code factored out
    cf_array = filters.erb_space(fmin, fmax, nfilts)[::-1]

    _, A11, A12, A13, A14, _, _, _, B2, gain = (filters.make_erb_filters(
        fs, cf_array, width).T)

    A11, A12, A13, A14 = A11[..., None], A12[..., None], A13[...,
                                                             None], A14[...,
                                                                        None]

    r = np.sqrt(B2)
    theta = 2 * np.pi * cf_array / fs
    pole = (r * np.exp(1j * theta))[..., None]

    GTord = 4

    weights = np.zeros((nfilts, nfft))

    weights[:, 0:ucirc.shape[1]] = (np.abs(ucirc + A11 * fs) *
                                    np.abs(ucirc + A12 * fs) *
                                    np.abs(ucirc + A13 * fs) *
                                    np.abs(ucirc + A14 * fs) *
                                    np.abs(fs * (pole - ucirc) *
                                           (pole.conj() - ucirc))**(-GTord) /
                                    gain[..., None])

    weights = weights[:, 0:maxlen]

    return weights, gain
Пример #5
0
    def __init__(self,
                 f_lo,
                 f_hi,
                 num_chan,
                 f_s,
                 filt_type='gammatone',
                 bounding=True):
        # basic parameters and placeholders
        self.f_s = f_s
        self.dt = 1. / f_s
        self.num_chan = num_chan
        self.chunks = []
        self.processed = False
        self.filt_type = filt_type
        self.bounding = bounding

        if filt_type == 'gammatone':
            self.f_c = gtf.erb_space(f_lo, f_hi, num=num_chan)
            self.f_c = np.flip(self.f_c)
            self.erb_coefs = gtf.make_erb_filters(f_s, self.f_c)
            self.bw = [self.erb_calc(f) for f in self.f_c]
            # for k, f in enumerate(self.f_c):
            #     print("Freq:\t", f, "BW: \t", self.bw[k])

        else:
            # calculate frequencies and bandwidths of channels
            self.f_c = np.logspace(np.log10(f_lo), np.log10(f_hi), num_chan)
            c = 2.**(1. / 6.) - 1 / (2.**(1. / 6.))  # bw multiplier
            self.bw = [max(100.0, f_c * c) for f_c in self.f_c]
            print(self.f_c)

            # Set up filter coefficients for each channel
            self.a = []
            self.b = []
            for k in range(self.num_chan):
                b, a = dsp.bessel(2,
                                  np.array([
                                      max(self.f_c[k] - 0.5 * self.bw[k],
                                          15.0), self.f_c[k] + 0.5 * self.bw[k]
                                  ]) * (2 / f_s),
                                  btype='bandpass')
                self.a.append(a)
                self.b.append(b)

        # Set up FDLs for each channel
        self.fdl = [
            FDL(self.f_c[k], self.bw[k], self.f_s, bounding=self.bounding)
            for k in range(self.num_chan)
        ]
Пример #6
0
def apply_gammatone(data, sample_frequency, nb_channels=20, low_cf=20,
                    window_time=0.5, overlap_time=0.1,
                    compression=None, accurate=True):
    """Return the response of a gammatone filterbank to data

    Calculate a spectrogram-like time frequency magnitude array based
    on gammatone subband filters. The waveform ``data`` (at sample
    rate ``sample_frequency``) is passed through an multi-channel
    gammatone auditory model filterbank, with lowest frequency
    ``min_cf`` and highest frequency ``sample_frequency`` / 2. The
    outputs of each band then have their energy integrated over
    windows of ``window_time`` seconds, advancing by ``overlap_time``
    secs for successive columns. The energy is then optionally
    compressed by log10 or cubic root. These magnitudes are returned
    as a nonnegative real matrix with ``nb_channels`` rows (excepted
    for log compression where values in dB are negative).

    Parameters:
    -----------

    data (float numpy array): 1D input data to be processed

    sample_frequency (int): sample frequency of the data in Hz

    nb_channels (int): number of frequency channels in the filterbank

    low_cf (float): lowest center frequency of the filterbank in Hz

    window_time (float): integration time of the window in seconds

    overlap_time (float): overlap time of two successive windows in seconds

    compression (string): compression method to use on energy, choose
        None to disable compression, 'log' for 20*np.log10(X) or
        'cubic' for X**(1/3), default is None

    accurate (bool): use the full filterbank approach instead of the
        weighted FFT approximation. This is much slower, and uses a
        lot of memory, but is more accurate. Default is True.

    Returns:
    --------

    output (float numpy array): 2D filterbank response to the input
        data, where output.shape[0] (time axis) depends on the window
        time and output.shape[1] == nb_channels

    center_frequencies (float numpy array): center frequencies of each
        channel in Hz.

    """
    import gammatone.gtgram
    import gammatone.fftweight
    from gammatone.filters import erb_space

    # choose real gammatones or FFT approximation
    gtgram = (gammatone.gtgram.gtgram if accurate
              else gammatone.fftweight.fft_gtgram)

    logging.getLogger('prosolia').debug(
        'computing gammatone filterbank on %s channels, %s compression%s',
        nb_channels, compression, ', accurate' if accurate else '')

    # get the center frequencies in increasing order
    center_frequencies = erb_space(
        low_cf, sample_frequency/2, nb_channels)[::-1]

    # get the filterbank output (with increasing frequencies)
    output = np.flipud(gtgram(
        data,
        sample_frequency,
        window_time,
        overlap_time,
        nb_channels,
        low_cf))

    # compress the output
    compress = {'log': lambda X: 20 * np.log10(X),
                'cubic': lambda X: X ** (1./3)}
    try:
        output = compress[compression](output)
    except KeyError:
        pass

    return output, center_frequencies
Пример #7
0
def fft_weights(
    nfft,
    fs,
    nfilts,
    width,
    fmin,
    fmax,
    maxlen):
    """
    :param nfft: the source FFT size
    :param sr: sampling rate (Hz)
    :param nfilts: the number of output bands required (default 64)
    :param width: the constant width of each band in Bark (default 1)
    :param fmin: lower limit of frequencies (Hz)
    :param fmax: upper limit of frequencies (Hz)
    :param maxlen: number of bins to truncate the rows to
    
    :return: a tuple `weights`, `gain` with the calculated weight matrices and
             gain vectors
    
    Generate a matrix of weights to combine FFT bins into Gammatone bins.
    
    Note about `maxlen` parameter: While wts has nfft columns, the second half
    are all zero. Hence, aud spectrum is::
    
        fft2gammatonemx(nfft,sr)*abs(fft(xincols,nfft))
    
    `maxlen` truncates the rows to this many bins.
    
    | (c) 2004-2009 Dan Ellis [email protected]  based on rastamat/audspec.m
    | (c) 2012 Jason Heeris (Python implementation)
    """
    ucirc = np.exp(1j * 2 * np.pi * np.arange(0, nfft / 2 + 1) / nfft)[None, ...]
    
    # Common ERB filter code factored out
    cf_array = filters.erb_space(fmin, fmax, nfilts)[::-1]

    _, A11, A12, A13, A14, _, _, _, B2, gain = (
        filters.make_erb_filters(fs, cf_array, width).T
    )
    
    A11, A12, A13, A14 = A11[..., None], A12[..., None], A13[..., None], A14[..., None]

    r = np.sqrt(B2)
    theta = 2 * np.pi * cf_array / fs    
    pole = (r * np.exp(1j * theta))[..., None]
    
    GTord = 4
    
    weights = np.zeros((nfilts, nfft))

    weights[:, 0:ucirc.shape[1]] = (
          np.abs(ucirc + A11 * fs) * np.abs(ucirc + A12 * fs)
        * np.abs(ucirc + A13 * fs) * np.abs(ucirc + A14 * fs)
        * np.abs(fs * (pole - ucirc) * (pole.conj() - ucirc)) ** (-GTord)
        / gain[..., None]
    )

    weights = weights[:, 0:int(maxlen)]

    return weights, gain
Пример #8
0
import numpy as np
import gammatone.filters as gtf
import matplotlib.pyplot as plt

# make signal
f_s = 44100
dt = 1 / f_s
dur = 0.1
t = np.arange(0, dur, dt)
num_h = 8
f0 = 220.0
in_sig = np.zeros_like(t)
for p in range(1, num_h + 1):
    in_sig += np.cos(2 * np.pi * f0 * p * t)

# set up gammatone stuff
erb_freqs = gtf.erb_space(100.0, 4000.0, num=100)
print(erb_freqs)
erb_coefs = gtf.make_erb_filters(f_s, erb_freqs)
filted = gtf.erb_filterbank(in_sig, erb_coefs)

for k, channel in enumerate(filted):
    plt.plot(t, channel + 100 - k, color='k')

plt.show()
Пример #9
0
lowf = 60  #Hz
highf = 2100  #Hz
d = 0.89  #amount to remove detected signal from residual
time = 100  #time to start analyzing audio
delta_time = 100  # time between analysis frames
notes = np.zeros(6, dtype=float)

input_signal = wave.read('16kHz_acTuned.wav')
fs = input_signal[0]
T = 1 / fs
audio = np.asarray(input_signal[1])

while time < (len(audio) - fs * time / 1000):
    trim_audio = clip_audio(audio, time)

    center_freqs = gt.erb_space(lowf, highf, num_freqs)
    filt_coefs = gt.make_erb_filters(fs, center_freqs)
    channels = gt.erb_filterbank(trim_audio, filt_coefs)

    process_chan = np.empty(
        [len(channels), len(channels[0]) * 2], dtype=complex)
    mag_chan = np.empty_like(process_chan, dtype=float)

    #Cochlea simulation
    for idx in range(len(channels)):
        process_chan[idx, :] = compression(channels[idx, :])
        process_chan[idx, :] = half_wave_rectification(process_chan[idx, :])
        low_cutoff = center_freqs[idx] * 1.5
        process_chan[idx, :] = butter_lfilter(process_chan[idx, :], low_cutoff,
                                              fs)
        mag_chan[idx, :] = np.absolute(np.fft.fft(process_chan[idx, :]))
Пример #10
0
idcs = strengths > 0.99
pitches *= idcs
fig = plt.figure()
ax1 = fig.add_subplot(1, 2, 2)
ax1.set_xscale("log")
ax1.stem(CFs, pitches, basefmt=" ")
skip = 10
ax1.set_xticks([cf for cf in CFs[::skip]])
ax1.grid("on", axis='y')
ax1.set_xlabel("CF of Adapative Template", size=16)
ax1.set_ylabel("Phase-locked firing rate (Hz)", size=16)
ax1.get_xaxis().set_major_formatter(matplotlib.ticker.ScalarFormatter())
ax1.tick_params(axis='both', which='both', labelsize=12)

## place-rate profile, place-rate coding
f_c = gtf.erb_space(100., 1600., num=100)
erb_coefs = gtf.make_erb_filters(f_s, f_c)
filt_channels = gtf.erb_filterbank(in_sig, erb_coefs)
channel_power = np.zeros(len(filt_channels))
for k in range(len(channel_power)):
    channel_power[k] = np.sqrt(np.dot(filt_channels[k], filt_channels[k]))
channel_power /= np.max(channel_power)
ax2 = fig.add_subplot(1, 2, 1)
ax2.set_xscale("log")
skip = 10
ax2.set_xticks([cf for cf in f_c[::skip]])
ax2.stem(f_c, channel_power, basefmt=" ")
ax2.grid("on", axis='y')
ax2.set_xlabel("CF of Auditory Channel", size=16)
ax2.set_ylabel("Normalized Power/Firing Rate", size=16)
ax2.get_xaxis().set_major_formatter(matplotlib.ticker.ScalarFormatter())
Пример #11
0
## Signal for SAC approach
in_sig = np.zeros_like(t)
in_sig_mt = np.zeros_like(t)
for p in range(2, num_h + 1):
    if p == mistuned_h:
        in_sig_mt += np.cos(2 * np.pi * f0 * p * t * mistuning)
    else:
        in_sig_mt += np.cos(2 * np.pi * f0 * p * t)
    in_sig += np.cos(2 * np.pi * f0 * p * t)
in_sig /= np.max(in_sig)
in_sig_mt /= np.max(in_sig_mt)

################################################################################
# SAC Peak Picking
################################################################################
f_c = gtf.erb_space(20., 5000., num=num_channels)
erb_coefs = gtf.make_erb_filters(f_s, f_c)
filt_channels = gtf.erb_filterbank(in_sig, erb_coefs)
filt_channels_mt = gtf.erb_filterbank(in_sig_mt, erb_coefs)
ac_channels = np.zeros((num_channels, w_size))
summary_ac = np.zeros(w_size, dtype=float)
ac_channels_mt = np.zeros((num_channels, w_size))
summary_ac_mt = np.zeros(w_size, dtype=float)

for k in range(num_channels):
    ac_channels[k, :] = dsp.correlate(filt_channels[k, -w_size:],
                                      filt_channels[k, -w_size:],
                                      mode='same')
    ac_channels_mt[k, :] = dsp.correlate(filt_channels_mt[k, -w_size:],
                                         filt_channels_mt[k, -w_size:],
                                         mode='same')