def spectrogram_audio(audio, n_bands=32, sfreq=44100., filt_kind='nsl', freq_spacing='erb', fmin=170, fmax=7000, **kws_spec): ''' Extracts a (roughly) auditory system spectrogram. This is loosely based on the NSL toolbox. Note that many of these steps can be controlled with various flags defined above. Here are the steps it takes: 1. Filter the sound with a frequencies that are erb log-spaced 2. Extract the analytic amplitude of the sound 3. Compression with a sigmoid 4. Low-pass filtering this amplitude 5. First-order derivative across frequencies (basically just taking the diff of successive frequencies) 6. Half-wave rectification Parameters ---------- audio : array, shape (n_times,) The input sound. n_bands : int, default=32 The number of frequency bands in our filter filt_kind : one of ['drnl', 'nsl'] How to extract the spectrogram. Options mean: drnl : a self-contained cochlea model, so we don't add any extra processing afterward. However, it seems to be unstable for high F (>5000). Look into brian.hears for more documentation on this. nsl : An implementation of the wav2aud function in the NSL toolbox. It is meant to mimic many processing steps of the cochlea and early auditory pathways. It is implemented with brian.hears. freq_spacing : string ['erb', 'log'] What frequency spacing to use kws_spec : dictionary Keywords to be passed to the spectrogram function (DRNL or spectrogram_nsl) OUTPUTS -------- spec : array, shape (n_frequencies, n_times) The extracted audio spectrogram. freqs : array, shape (n_frequencies,) The center frequencies for the spectrogram ''' # Auditory filterbank + amplitude extraction cfreqs = create_center_frequencies(fmin, fmax, n_bands, kind=freq_spacing) if filt_kind == 'drnl': sfreq = float(sfreq)*Hz snd = hears.Sound(audio, samplerate=sfreq) spec = hears.DRNL(snd, cfreqs, type='human', **kws_spec).process() spec = spec.T elif filt_kind == 'nsl': spec = spectrogram_nsl(audio, sfreq, cfreqs, **kws_spec) return spec, cfreqs
def extract_nsl_spectrogram(sig, Fs, cfs): '''Implements a version of the "wav2aud" function in the NSL toolbox. Uses Brian hears to chain most of the computations to be done online. This is effectively what it does: 1. Gammatone filterbank at provided cfs (erbspace recommended) 2. Half-wave rectification 3. Low-pass filtering at 2Khz 4. First-order derivative across frequencies (basically just taking the diff of successive frequencies to sharpen output) 5. Half-wave rectification #2 6. An exponentially-decaying average, with time constant chosen to be similar to that reported in the NSL toolbox (8ms) INPUTS -------- sig : array The auditory signals we'll use to extract. Should be time x feats, or 1-d Fs : float, int The sampling rate of the signal cfs : list of floats, ints The center frequencies that we'll use for initial filtering. OUTPUTS -------- out : array, [tpts, len(cfs)] The auditory spectrogram of the signal ''' Fs = float(Fs) * Hz snd = hears.Sound(sig, samplerate=Fs) # Cochlear model snd_filt = hears.Gammatone(snd, cfs) # Hair cell stages clp = lambda x: np.clip(x, 0, np.inf) snd_hwr = hears.FunctionFilterbank(snd_filt, clp) snd_lpf = hears.LowPass(snd_hwr, 2000) # Lateral inhibitory network rands = lambda x: sigp.roll_and_subtract(x, hwr=True) snd_lin = hears.FunctionFilterbank(snd_lpf, rands) # Initial processing out = snd_lin.process() # Time integration. # Time constant is 8ms, which we approximate with halfwidth of 12 half_pt = (12. / 1000) * Fs out = pd.stats.moments.ewma(out, halflife=half_pt) return out
def spectrogram_nsl(sig, sfreq, cfs, comp_kind='exp', comp_fac=3): '''Extract a cochlear / mid-brain spectrogram. Implements a version of the "wav2aud" function in the NSL toolbox. Uses Brian hears to chain most of the computations to be done online. This is effectively what it does: 1. Gammatone filterbank at provided cfs (erbspace recommended) 2. Half-wave rectification 3. Low-pass filtering at 2Khz 4. First-order derivative across frequencies (basically just taking the diff of successive frequencies to sharpen output) 5. Half-wave rectification #2 6. An exponentially-decaying average, with time constant chosen to be similar to that reported in the NSL toolbox (8ms) Parameters ---------- sig : numpy array, shape (n_times,) The auditory waveform sfreq : int The sampling frequency of the sound waveform cfs : array, shape (n_freqs,) The center frequencies to be extracted comp_kind : string The kind of compression to use. See `compress_signal` comp_fac : int The compression factor to pass to `compress_signal`. OUTPUTS -------- spec : array, shape (n_frequencies, n_times) The extracted audio spectrogram. freqs : array, shape (n_frequencies,) The center frequencies for the spectrogram ''' sfreq = float(sfreq)*Hz snd = hears.Sound(sig, samplerate=sfreq) # ---- Cochlear model print('Pulling frequencies with cochlear model') snd_filt = hears.Gammatone(snd, cfs) # ---- Hair cell stages # Halfwave Rectify print('Half-wave rectification') clp = lambda x: np.clip(x, 0, np.inf) snd_hwr = hears.FunctionFilterbank(snd_filt, clp) # Non-linear compression print('Non-linear compression and low-pass filter') comp = lambda x: compress_signal(x, comp_kind, comp_fac) snd_cmp = hears.FunctionFilterbank(snd_hwr, comp) # Lowpass filter snd_lpf = hears.LowPass(snd_cmp, 2000) # ---- Lateral inhibitory network print('Lateral inhibitory network') rands = lambda x: roll_and_subtract(x, hwr=True) snd_lin = hears.FunctionFilterbank(snd_lpf, rands) # Initial processing out = snd_lin.process() # Time integration. print('leaky integration') for i in range(out.shape[1]): out[:, i] = leaky_integrate(out[:, i], time_const=8, sfreq=float(sfreq)) return out.T
import brian.hears as bh import numpy as np from .utils import hz2mel, mel2hz # NB! Although the dummy sound is never used, it must be first set # because Brian Hears isn't really designed for online sounds, which # NengoSound is. So, we set this then immediately swap it. dummy_sound = bh.Sound(np.zeros(1)) def erbspace(low, high, n_freq): """Sample ERB distribution; low and high in Hz.""" f = np.linspace(low, high, n_freq) * 0.001 # original f in kHz return 6.23 * np.square(f) + 93.39 * f + 28.52 def melspace(low, high, n_freq): return mel2hz(np.linspace(hz2mel(low), hz2mel(high), n_freq)) def rectify(filterbank, scale=3): """Half wave rectify and scale.""" def _bm2ihc(x, scale=scale): return scale * np.clip(x, 0, np.inf) ihc = bh.FunctionFilterbank(filterbank, _bm2ihc) ihc.cached_buffer_end = 0 # Fails if we don't do this... return ihc
def spectrogram_audio(audio, n_bands=32, sfreq=44100, sig_fac=.1, compression='log', low_p_cut=None, lin=True, n_jobs=3, filt_kind='nsl', freq_kind='erb', Flo=170, Fhi=7000, amp='atonce'): ''' Extracts a (roughly) auditory system spectrogram. This is loosely based on the NSL toolbox. Note that many of these steps can be controlled with various flags defined above. Here are the steps it takes: 1. Filter the sound with a frequencies that are erb log-spaced 2. Extract the analytic amplitude of the sound 3. Compression with a sigmoid 4. Low-pass filtering this amplitude 5. First-order derivative across frequencies (basically just taking the diff of successive frequencies) 6. Half-wave rectification Parameters ---------- audio : array, shape (n_times,) The input sound. n_bands : int, default=32 The number of frequency bands in our filter sig_fac : float The sigmoidal compression factor. See `compress_signal` for usage lin : bool Whether to include the first order derivative AKA the lateral inhibitory network low_p_cut : int | None The cutoff for the lowpass filter, or None for no filter filt_kind : one of ['drnl', 'nsl'] How to extract the spectrogram. Options mean: drnl : a self-contained cochlea model, so we don't add any extra processing afterward. However, it seems to be unstable for high F (>5000). Look into brian.hears for more documentation on this. nsl : an implementation of the wav2aud function in the NSL toolbox. It is implemented with brian.hears freq_kind : string ['erb', 'log'] What frequency spacing to use amp : string ['online', 'atonce'] Do we calculate the envelope of the signal online or at once? OUTPUTS -------- out : DataFrame, time x features The extracted spectrogram. ''' # Auditory filterbank + amplitude extraction print('Running filterbank with {0} filters'.format(n_bands)) csfreq = create_center_frequencies(Flo, Fhi, n_bands, kind=freq_kind) if filt_kind == 'drnl': sfreq = float(sfreq) * Hz snd = hears.Sound(audio, samplerate=sfreq) spec = hears.DRNL(snd, cfs, type='human').process() return spec, cfs elif filt_kind == 'nsl': spec = spectrogram_nsl(audio, sfreq, cfs) return spec, cfs