def test_crop_time(): for i in [1, 2, 5]: a2s = AudioToSpec.from_cfg(AudioConfig.Voice()) audio = test_audio_tensor(seconds=3) crop = CropTime(i * 1000) inp, out = apply_transform(crop, a2s(audio)) _test_eq(i, round(out.duration)) _test_close(out.width, int((i / inp.duration) * inp.width), eps=1.01)
def test_mask_time(): c, f, t = 2, 120, 80 min_size = 5 max_size = 7 sg = AudioSpectrogram(torch.rand([c, f, t])) val = 10 # Use a value not in the original spectrogram gradient_sg = AudioSpectrogram( torch.linspace(0, 1, t).view(1, 1, t).repeat([c, f, 1])) ones = torch.ones_like(sg) # Test patching with mean with patch( "fastaudio.augment.functional.region_mask", side_effect=[ torch.BoolTensor([[1] * 10 + [0] * (t - 10)]), ], ): mask_with_mean = MaskTimeGPU(min_size=min_size, max_size=max_size, mask_val=None) # Use a gradient so we can be sure the mean will never show up outside the mask inp, out = apply_transform(mask_with_mean, gradient_sg) channelwise_mean = inp[..., :10].mean(dim=(-2, -1)).reshape(-1, 1, 1) _test_close( out[..., :10], channelwise_mean * ones[..., :10], ) assert not (out[..., 10:] == channelwise_mean).any(), out == channelwise_mean # Test multiple masks (and patching with value) with patch( "fastaudio.augment.functional.region_mask", side_effect=[ torch.BoolTensor([[1] * 10 + [0] * (t - 10), [0] * (t - 10) + [1] * 10]), ], ): mask_with_val = MaskTimeGPU(min_size=min_size, num_masks=2, max_size=max_size, mask_val=val) inp, out = apply_transform(mask_with_val, sg) _test_eq( out[..., :10], val * ones[..., :10], ) _test_eq( out[..., t - 10:], val * ones[..., t - 10:], ) matches = out[..., 10:t - 10] == val assert not matches.any(), matches
def test_upsample(audio): """ Make sure that the Upsampling is possible. This can take a while depending on the target sample rate """ for _ in range(10): random_sr = random.randint(16000, 72000) random_upsample = Resample(random_sr)(audio) num_samples = random_upsample.nsamples _test_close(num_samples, abs(audio.nsamples // (audio.sr / random_sr)), eps=1.1)
def test_upsample(audio): for _ in range(10): random_sr = random.randint(16000, 72000) random_upsample = Resample(random_sr)(audio) num_samples = random_upsample.nsamples _test_close(num_samples, abs(audio.nsamples // (audio.sr / random_sr)), eps=1.1)