def test_crop_time_after_padding(): sg_orig = test_audio_tensor() a2s = AudioToSpec.from_cfg(AudioConfig.Voice()) sg = a2s(sg_orig) crop_time = CropTime((sg.duration + 5) * 1000, pad_mode=AudioPadType.Zeros_After) inp, out = apply_transform(crop_time, sg.clone()) _test_ne(sg.duration, sg_orig.duration)
def test_crop_time(): for i in [1, 2, 5]: a2s = AudioToSpec.from_cfg(AudioConfig.Voice()) audio = test_audio_tensor(seconds=3) crop = CropTime(i * 1000) inp, out = apply_transform(crop, a2s(audio)) _test_eq(i, round(out.duration)) _test_close(out.width, int((i / inp.duration) * inp.width), eps=1.01)
def test_resize_int(): # Test when size is an int size = 224 resize_int = TfmResize(size) audio = test_audio_tensor() a2s = AudioToSpec.from_cfg(AudioConfig.Voice()) sg = a2s(audio) inp, out = apply_transform(resize_int, sg) _test_eq(out.shape[1:], torch.Size([size, size]))
def test_crop_time_repeat_padding(): "Test that repeat padding works when cropping time" repeat = 3 audio = test_audio_tensor() crop_12000ms_repeat = CropTime(repeat * 1000 * audio.duration, pad_mode=AudioPadType.Repeat) a2s = AudioToSpec.from_cfg(AudioConfig.Voice()) sg = a2s(audio) inp, out = apply_transform(crop_12000ms_repeat, sg) _test_eq(inp.width, sg.width) _test_ne(sg.width, out.width)
def test_delta_channels(): " nchannels for a spectrogram is how many channels its original audio had " delta = Delta() audio = test_audio_tensor(channels=1) a2s = AudioToSpec.from_cfg(AudioConfig.Voice()) sg = a2s(audio) inp, out = apply_transform(delta, sg) _test_eq(out.nchannels, inp.nchannels * 3) _test_eq(out.shape[1:], inp.shape[1:]) _test_ne(out[0], out[1])
def test_mask_freq(): # create a random time mask and test that it is being correctly applied size, start, val = [random.randint(1, 50) for i in range(3)] time_mask_test = MaskTime(size=size, start=start, val=val) audio = test_audio_tensor() a2s = AudioToSpec.from_cfg(AudioConfig.Voice()) sg = a2s(audio) inp, out = apply_transform(time_mask_test, sg) _test_eq( out[:, :, start:start + size], val * torch.ones_like(inp)[:, :, start:start + size], )
def test_delta_channels(): " nchannels for a spectrogram is how many channels its original audio had " delta = DeltaGPU() # Explicitly check more than one channel audio = test_audio_tensor(channels=2) a2s = AudioToSpec.from_cfg(AudioConfig.Voice()) sg = a2s(audio) inp, out = apply_transform(delta, sg) _test_eq(out.nchannels, inp.nchannels * 3) _test_eq(out.shape[-2:], inp.shape[-2:]) for i1, i2 in [(0, 2), (1, 3), (0, 4), (1, 5), (2, 4), (3, 5)]: assert not torch.allclose(out[i1], out[i2])