def test_erb_space(): hi = 11025 lo = 100 t0 = time.time() old_space = oldfilt.erb_space(lo, hi, num=100) t1 = time.time() new_space = newfilt.erb_space(lo, hi, num=100, __test=True) t2 = time.time() print( f'Old method took {t1 - t0} seconds. New method took {t2 - t1} seconds.' ) assert np.allclose(new_space.get(), old_space)
def get_gtf_kernel(self): """ """ cfs = gt_filters.erb_space(self.cf_low, self.cf_high, self.n_band) self.cfs = cfs sample_times = np.arange(0, self.filter_len, 1) / self.fs irs = np.zeros((self.filter_len, self.n_band), dtype=np.float32) EarQ = 9.26449 minBW = 24.7 order = 1 N = 4 for band_i in range(self.n_band): ERB = ((cfs[band_i] / EarQ)**order + minBW**order)**(1 / order) b = 1.019 * ERB numerator = np.multiply( sample_times**(N - 1), np.cos(2 * np.pi * cfs[band_i] * sample_times)) denominator = np.exp(2 * np.pi * b * sample_times) irs[:, band_i] = np.divide(numerator, denominator) gain = np.max(np.abs(np.fft.fft(irs, axis=0)), axis=0) irs_gain_norm = np.divide(np.flipud(irs), gain) if self.is_padd: kernel = np.concatenate( (irs_gain_norm, np.zeros((self.filter_len, self.n_band))), axis=0) else: kernel = irs_gain_norm return kernel
def compare_cfs(): fs = 1e3 n_band = 16 freq_low = 70 freq_high = 7000 gtf_obj = gtf_proposed(fs, cf_low=freq_low, freq_high=freq_high, n_band=n_band) cfs_proposed = gtf_obj.cfs bws_proposed = gtf_obj.cal_bw(cfs_proposed) cfs_ref = gtf_ref.erb_space(low_freq=freq_low, high_freq=freq_high, num=n_band)[::-1] bws_ref = gtf_obj.cal_bw(cfs_ref) fig, ax = plt.subplots(1, 1) ax.errorbar(np.arange(n_band), cfs_proposed, yerr=bws_proposed / 2, linewidth=2, label='Todd') ax.errorbar(np.arange(n_band) + n_band / 100, cfs_ref, yerr=bws_ref / 2, linewidth=2, label='Detly') ax.set_xlabel('freq_band') ax.set_ylabel('freq(Hz)') ax.legend() fig.savefig(f'images/validate/cfs_n{n_band}.png', dpi=100)
def fft_weights(nfft, fs, nfilts, width, fmin, fmax, maxlen): """ :param nfft: the source FFT size :param sr: sampling rate (Hz) :param nfilts: the number of output bands required (default 64) :param width: the constant width of each band in Bark (default 1) :param fmin: lower limit of frequencies (Hz) :param fmax: upper limit of frequencies (Hz) :param maxlen: number of bins to truncate the rows to :return: a tuple `weights`, `gain` with the calculated weight matrices and gain vectors Generate a matrix of weights to combine FFT bins into Gammatone bins. Note about `maxlen` parameter: While wts has nfft columns, the second half are all zero. Hence, aud spectrum is:: fft2gammatonemx(nfft,sr)*abs(fft(xincols,nfft)) `maxlen` truncates the rows to this many bins. | (c) 2004-2009 Dan Ellis [email protected] based on rastamat/audspec.m | (c) 2012 Jason Heeris (Python implementation) """ ucirc = np.exp(1j * 2 * np.pi * np.arange(0, int(nfft / 2 + 1)) / nfft)[None, ...] # Common ERB filter code factored out cf_array = filters.erb_space(fmin, fmax, nfilts)[::-1] _, A11, A12, A13, A14, _, _, _, B2, gain = (filters.make_erb_filters( fs, cf_array, width).T) A11, A12, A13, A14 = A11[..., None], A12[..., None], A13[..., None], A14[..., None] r = np.sqrt(B2) theta = 2 * np.pi * cf_array / fs pole = (r * np.exp(1j * theta))[..., None] GTord = 4 weights = np.zeros((nfilts, nfft)) weights[:, 0:ucirc.shape[1]] = (np.abs(ucirc + A11 * fs) * np.abs(ucirc + A12 * fs) * np.abs(ucirc + A13 * fs) * np.abs(ucirc + A14 * fs) * np.abs(fs * (pole - ucirc) * (pole.conj() - ucirc))**(-GTord) / gain[..., None]) weights = weights[:, 0:maxlen] return weights, gain
def __init__(self, f_lo, f_hi, num_chan, f_s, filt_type='gammatone', bounding=True): # basic parameters and placeholders self.f_s = f_s self.dt = 1. / f_s self.num_chan = num_chan self.chunks = [] self.processed = False self.filt_type = filt_type self.bounding = bounding if filt_type == 'gammatone': self.f_c = gtf.erb_space(f_lo, f_hi, num=num_chan) self.f_c = np.flip(self.f_c) self.erb_coefs = gtf.make_erb_filters(f_s, self.f_c) self.bw = [self.erb_calc(f) for f in self.f_c] # for k, f in enumerate(self.f_c): # print("Freq:\t", f, "BW: \t", self.bw[k]) else: # calculate frequencies and bandwidths of channels self.f_c = np.logspace(np.log10(f_lo), np.log10(f_hi), num_chan) c = 2.**(1. / 6.) - 1 / (2.**(1. / 6.)) # bw multiplier self.bw = [max(100.0, f_c * c) for f_c in self.f_c] print(self.f_c) # Set up filter coefficients for each channel self.a = [] self.b = [] for k in range(self.num_chan): b, a = dsp.bessel(2, np.array([ max(self.f_c[k] - 0.5 * self.bw[k], 15.0), self.f_c[k] + 0.5 * self.bw[k] ]) * (2 / f_s), btype='bandpass') self.a.append(a) self.b.append(b) # Set up FDLs for each channel self.fdl = [ FDL(self.f_c[k], self.bw[k], self.f_s, bounding=self.bounding) for k in range(self.num_chan) ]
def apply_gammatone(data, sample_frequency, nb_channels=20, low_cf=20, window_time=0.5, overlap_time=0.1, compression=None, accurate=True): """Return the response of a gammatone filterbank to data Calculate a spectrogram-like time frequency magnitude array based on gammatone subband filters. The waveform ``data`` (at sample rate ``sample_frequency``) is passed through an multi-channel gammatone auditory model filterbank, with lowest frequency ``min_cf`` and highest frequency ``sample_frequency`` / 2. The outputs of each band then have their energy integrated over windows of ``window_time`` seconds, advancing by ``overlap_time`` secs for successive columns. The energy is then optionally compressed by log10 or cubic root. These magnitudes are returned as a nonnegative real matrix with ``nb_channels`` rows (excepted for log compression where values in dB are negative). Parameters: ----------- data (float numpy array): 1D input data to be processed sample_frequency (int): sample frequency of the data in Hz nb_channels (int): number of frequency channels in the filterbank low_cf (float): lowest center frequency of the filterbank in Hz window_time (float): integration time of the window in seconds overlap_time (float): overlap time of two successive windows in seconds compression (string): compression method to use on energy, choose None to disable compression, 'log' for 20*np.log10(X) or 'cubic' for X**(1/3), default is None accurate (bool): use the full filterbank approach instead of the weighted FFT approximation. This is much slower, and uses a lot of memory, but is more accurate. Default is True. Returns: -------- output (float numpy array): 2D filterbank response to the input data, where output.shape[0] (time axis) depends on the window time and output.shape[1] == nb_channels center_frequencies (float numpy array): center frequencies of each channel in Hz. """ import gammatone.gtgram import gammatone.fftweight from gammatone.filters import erb_space # choose real gammatones or FFT approximation gtgram = (gammatone.gtgram.gtgram if accurate else gammatone.fftweight.fft_gtgram) logging.getLogger('prosolia').debug( 'computing gammatone filterbank on %s channels, %s compression%s', nb_channels, compression, ', accurate' if accurate else '') # get the center frequencies in increasing order center_frequencies = erb_space( low_cf, sample_frequency/2, nb_channels)[::-1] # get the filterbank output (with increasing frequencies) output = np.flipud(gtgram( data, sample_frequency, window_time, overlap_time, nb_channels, low_cf)) # compress the output compress = {'log': lambda X: 20 * np.log10(X), 'cubic': lambda X: X ** (1./3)} try: output = compress[compression](output) except KeyError: pass return output, center_frequencies
def fft_weights( nfft, fs, nfilts, width, fmin, fmax, maxlen): """ :param nfft: the source FFT size :param sr: sampling rate (Hz) :param nfilts: the number of output bands required (default 64) :param width: the constant width of each band in Bark (default 1) :param fmin: lower limit of frequencies (Hz) :param fmax: upper limit of frequencies (Hz) :param maxlen: number of bins to truncate the rows to :return: a tuple `weights`, `gain` with the calculated weight matrices and gain vectors Generate a matrix of weights to combine FFT bins into Gammatone bins. Note about `maxlen` parameter: While wts has nfft columns, the second half are all zero. Hence, aud spectrum is:: fft2gammatonemx(nfft,sr)*abs(fft(xincols,nfft)) `maxlen` truncates the rows to this many bins. | (c) 2004-2009 Dan Ellis [email protected] based on rastamat/audspec.m | (c) 2012 Jason Heeris (Python implementation) """ ucirc = np.exp(1j * 2 * np.pi * np.arange(0, nfft / 2 + 1) / nfft)[None, ...] # Common ERB filter code factored out cf_array = filters.erb_space(fmin, fmax, nfilts)[::-1] _, A11, A12, A13, A14, _, _, _, B2, gain = ( filters.make_erb_filters(fs, cf_array, width).T ) A11, A12, A13, A14 = A11[..., None], A12[..., None], A13[..., None], A14[..., None] r = np.sqrt(B2) theta = 2 * np.pi * cf_array / fs pole = (r * np.exp(1j * theta))[..., None] GTord = 4 weights = np.zeros((nfilts, nfft)) weights[:, 0:ucirc.shape[1]] = ( np.abs(ucirc + A11 * fs) * np.abs(ucirc + A12 * fs) * np.abs(ucirc + A13 * fs) * np.abs(ucirc + A14 * fs) * np.abs(fs * (pole - ucirc) * (pole.conj() - ucirc)) ** (-GTord) / gain[..., None] ) weights = weights[:, 0:int(maxlen)] return weights, gain
import numpy as np import gammatone.filters as gtf import matplotlib.pyplot as plt # make signal f_s = 44100 dt = 1 / f_s dur = 0.1 t = np.arange(0, dur, dt) num_h = 8 f0 = 220.0 in_sig = np.zeros_like(t) for p in range(1, num_h + 1): in_sig += np.cos(2 * np.pi * f0 * p * t) # set up gammatone stuff erb_freqs = gtf.erb_space(100.0, 4000.0, num=100) print(erb_freqs) erb_coefs = gtf.make_erb_filters(f_s, erb_freqs) filted = gtf.erb_filterbank(in_sig, erb_coefs) for k, channel in enumerate(filted): plt.plot(t, channel + 100 - k, color='k') plt.show()
lowf = 60 #Hz highf = 2100 #Hz d = 0.89 #amount to remove detected signal from residual time = 100 #time to start analyzing audio delta_time = 100 # time between analysis frames notes = np.zeros(6, dtype=float) input_signal = wave.read('16kHz_acTuned.wav') fs = input_signal[0] T = 1 / fs audio = np.asarray(input_signal[1]) while time < (len(audio) - fs * time / 1000): trim_audio = clip_audio(audio, time) center_freqs = gt.erb_space(lowf, highf, num_freqs) filt_coefs = gt.make_erb_filters(fs, center_freqs) channels = gt.erb_filterbank(trim_audio, filt_coefs) process_chan = np.empty( [len(channels), len(channels[0]) * 2], dtype=complex) mag_chan = np.empty_like(process_chan, dtype=float) #Cochlea simulation for idx in range(len(channels)): process_chan[idx, :] = compression(channels[idx, :]) process_chan[idx, :] = half_wave_rectification(process_chan[idx, :]) low_cutoff = center_freqs[idx] * 1.5 process_chan[idx, :] = butter_lfilter(process_chan[idx, :], low_cutoff, fs) mag_chan[idx, :] = np.absolute(np.fft.fft(process_chan[idx, :]))
idcs = strengths > 0.99 pitches *= idcs fig = plt.figure() ax1 = fig.add_subplot(1, 2, 2) ax1.set_xscale("log") ax1.stem(CFs, pitches, basefmt=" ") skip = 10 ax1.set_xticks([cf for cf in CFs[::skip]]) ax1.grid("on", axis='y') ax1.set_xlabel("CF of Adapative Template", size=16) ax1.set_ylabel("Phase-locked firing rate (Hz)", size=16) ax1.get_xaxis().set_major_formatter(matplotlib.ticker.ScalarFormatter()) ax1.tick_params(axis='both', which='both', labelsize=12) ## place-rate profile, place-rate coding f_c = gtf.erb_space(100., 1600., num=100) erb_coefs = gtf.make_erb_filters(f_s, f_c) filt_channels = gtf.erb_filterbank(in_sig, erb_coefs) channel_power = np.zeros(len(filt_channels)) for k in range(len(channel_power)): channel_power[k] = np.sqrt(np.dot(filt_channels[k], filt_channels[k])) channel_power /= np.max(channel_power) ax2 = fig.add_subplot(1, 2, 1) ax2.set_xscale("log") skip = 10 ax2.set_xticks([cf for cf in f_c[::skip]]) ax2.stem(f_c, channel_power, basefmt=" ") ax2.grid("on", axis='y') ax2.set_xlabel("CF of Auditory Channel", size=16) ax2.set_ylabel("Normalized Power/Firing Rate", size=16) ax2.get_xaxis().set_major_formatter(matplotlib.ticker.ScalarFormatter())
## Signal for SAC approach in_sig = np.zeros_like(t) in_sig_mt = np.zeros_like(t) for p in range(2, num_h + 1): if p == mistuned_h: in_sig_mt += np.cos(2 * np.pi * f0 * p * t * mistuning) else: in_sig_mt += np.cos(2 * np.pi * f0 * p * t) in_sig += np.cos(2 * np.pi * f0 * p * t) in_sig /= np.max(in_sig) in_sig_mt /= np.max(in_sig_mt) ################################################################################ # SAC Peak Picking ################################################################################ f_c = gtf.erb_space(20., 5000., num=num_channels) erb_coefs = gtf.make_erb_filters(f_s, f_c) filt_channels = gtf.erb_filterbank(in_sig, erb_coefs) filt_channels_mt = gtf.erb_filterbank(in_sig_mt, erb_coefs) ac_channels = np.zeros((num_channels, w_size)) summary_ac = np.zeros(w_size, dtype=float) ac_channels_mt = np.zeros((num_channels, w_size)) summary_ac_mt = np.zeros(w_size, dtype=float) for k in range(num_channels): ac_channels[k, :] = dsp.correlate(filt_channels[k, -w_size:], filt_channels[k, -w_size:], mode='same') ac_channels_mt[k, :] = dsp.correlate(filt_channels_mt[k, -w_size:], filt_channels_mt[k, -w_size:], mode='same')