def signals2logmel(signals, sample_rate, num_mel_bins): signals, sample_rate = signals[:,::3], sample_rate // 3 flen = audio.ms_to_frames(sample_rate, 25) fstep = audio.ms_to_frames(sample_rate, 10) S = tf.math.square(tf.math.abs(tf.signal.stft(signals, flen, fstep, fft_length=512))) # S = audio.spectrograms(signals, sample_rate) S = audio.linear_to_mel(S, sample_rate, num_mel_bins=num_mel_bins, fmax=tf.cast(sample_rate, tf.float32)) S = tf.math.log(1e-6 + S) S = cmvn(S, axis=1) return S
def test_spectrograms(self): for path in audiofiles: s, r = audio.read_wav(path) for len_ms in range(20, 101, 20): for n_fft in (256, 512, 1024, 2048): if n_fft < audio.ms_to_frames(r, len_ms): continue step_ms = len_ms // 2 powspec = audio.spectrograms(np.expand_dims(s, 0), r, frame_length_ms=len_ms, frame_step_ms=step_ms, fft_length=n_fft)[0] assert not np.isnan(powspec.numpy()).any() assert powspec.shape[0] == s.shape[0] // audio.ms_to_frames(r, step_ms) - 1 assert powspec.shape[1] == n_fft // 2 + 1
def test_ms_to_frames(self): for sr in range(1000, 60000, 1000): for ms in range(1, 5000, 100): nframes = (sr // 1000) * ms assert audio.ms_to_frames(sr, ms).numpy() == nframes