def test_filterbank(device): from speechbrain.processing.features import Filterbank compute_fbanks = Filterbank().to(device) inputs = torch.ones([10, 101, 201], device=device) assert torch.jit.trace(compute_fbanks, inputs) # Check amin (-100 dB) inputs = torch.zeros([10, 101, 201], device=device) fbanks = compute_fbanks(inputs) assert torch.equal(fbanks, torch.ones_like(fbanks) * -100) # Check top_db fbanks = torch.zeros([1, 1, 1], device=device) expected = torch.Tensor([[[-100]]]).to(device) fbanks_db = compute_fbanks._amplitude_to_DB(fbanks) assert torch.equal(fbanks_db, expected) # Making sure independent computation gives same results # as the batch computation input1 = torch.rand([1, 101, 201], device=device) * 10 input2 = torch.rand([1, 101, 201], device=device) input3 = torch.cat([input1, input2], dim=0) fbank1 = compute_fbanks(input1) fbank2 = compute_fbanks(input2) fbank3 = compute_fbanks(input3) assert torch.sum(torch.abs(fbank1[0] - fbank3[0])) < 8e-05 assert torch.sum(torch.abs(fbank2[0] - fbank3[1])) < 8e-05
def __init__( self, deltas=False, context=False, requires_grad=False, sample_rate=16000, n_fft=400, n_mels=40, filter_shape="triangular", param_change_factor=1.0, param_rand_factor=0.0, left_frames=5, right_frames=5, ): super().__init__() self.deltas = deltas self.context = context self.requires_grad = requires_grad self.compute_STFT = STFT(sample_rate=sample_rate, n_fft=n_fft) self.compute_fbanks = Filterbank( n_fft=n_fft, n_mels=n_mels, f_min=0, f_max=sample_rate / 2, freeze=not requires_grad, filter_shape=filter_shape, param_change_factor=param_change_factor, param_rand_factor=param_rand_factor, ) self.compute_deltas = Deltas(input_size=n_mels) self.context_window = ContextWindow( left_frames=left_frames, right_frames=right_frames, )
def test_filterbank(): from speechbrain.processing.features import Filterbank compute_fbanks = Filterbank() inputs = torch.ones([10, 101, 201]) assert torch.jit.trace(compute_fbanks, inputs)
def test_features_multimic(device): from speechbrain.processing.features import Filterbank compute_fbanks = Filterbank().to(device) inputs = torch.rand([10, 101, 201], device=device) output = compute_fbanks(inputs) inputs_ch2 = torch.stack((inputs, inputs), -1) output_ch2 = compute_fbanks(inputs_ch2) output_ch2 = output_ch2[..., 0] assert torch.sum(output - output_ch2) < 1e-05
def __init__( self, deltas=True, context=True, requires_grad=False, sample_rate=16000, f_min=0, f_max=None, n_fft=400, n_mels=23, n_mfcc=20, filter_shape="triangular", param_change_factor=1.0, param_rand_factor=0.0, left_frames=5, right_frames=5, win_length=25, hop_length=10, ): super().__init__() self.deltas = deltas self.context = context self.requires_grad = requires_grad if f_max is None: f_max = sample_rate / 2 self.compute_STFT = STFT( sample_rate=sample_rate, n_fft=n_fft, win_length=win_length, hop_length=hop_length, ) self.compute_fbanks = Filterbank( sample_rate=sample_rate, n_fft=n_fft, n_mels=n_mels, f_min=f_min, f_max=f_max, freeze=not requires_grad, filter_shape=filter_shape, param_change_factor=param_change_factor, param_rand_factor=param_rand_factor, ) self.compute_dct = DCT(input_size=n_mels, n_out=n_mfcc) self.compute_deltas = Deltas(input_size=n_mfcc) self.context_window = ContextWindow( left_frames=left_frames, right_frames=right_frames, )