def test_crop_time_after_padding(): sg_orig = test_audio_tensor() a2s = AudioToSpec.from_cfg(AudioConfig.Voice()) sg = a2s(sg_orig) crop_time = CropTime((sg.duration + 5) * 1000, pad_mode=AudioPadType.Zeros_After) inp, out = apply_transform(crop_time, sg.clone()) _test_ne(sg.duration, sg_orig.duration)
def test_crop_time(): for i in [1, 2, 5]: a2s = AudioToSpec.from_cfg(AudioConfig.Voice()) audio = test_audio_tensor(seconds=3) crop = CropTime(i * 1000) inp, out = apply_transform(crop, a2s(audio)) _test_eq(i, round(out.duration)) _test_close(out.width, int((i / inp.duration) * inp.width), eps=1.01)
def test_resize_int(): # Test when size is an int size = 224 resize_int = TfmResize(size) audio = test_audio_tensor() a2s = AudioToSpec.from_cfg(AudioConfig.Voice()) sg = a2s(audio) inp, out = apply_transform(resize_int, sg) _test_eq(out.shape[1:], torch.Size([size, size]))
def test_crop_time_repeat_padding(): "Test that repeat padding works when cropping time" repeat = 3 audio = test_audio_tensor() crop_12000ms_repeat = CropTime(repeat * 1000 * audio.duration, pad_mode=AudioPadType.Repeat) a2s = AudioToSpec.from_cfg(AudioConfig.Voice()) sg = a2s(audio) inp, out = apply_transform(crop_12000ms_repeat, sg) _test_eq(inp.width, sg.width) _test_ne(sg.width, out.width)
def test_delta_channels(): " nchannels for a spectrogram is how many channels its original audio had " delta = Delta() audio = test_audio_tensor(channels=1) a2s = AudioToSpec.from_cfg(AudioConfig.Voice()) sg = a2s(audio) inp, out = apply_transform(delta, sg) _test_eq(out.nchannels, inp.nchannels * 3) _test_eq(out.shape[1:], inp.shape[1:]) _test_ne(out[0], out[1])
def test_load_audio_with_basic_config(): """ Grab a random file, test that the n_fft are passed successfully via config and stored in sg settings """ sg_cfg = AudioConfig.BasicSpectrogram(n_fft=2000, hop_length=155) a2sg = AudioToSpec.from_cfg(sg_cfg) audio = test_audio_tensor() sg = a2sg(audio) assert sg.n_fft == sg_cfg.n_fft assert sg.width == int(audio.nsamples / sg_cfg.hop_length) + 1
def test_mask_freq(): # create a random time mask and test that it is being correctly applied size, start, val = [random.randint(1, 50) for i in range(3)] time_mask_test = MaskTime(size=size, start=start, val=val) audio = test_audio_tensor() a2s = AudioToSpec.from_cfg(AudioConfig.Voice()) sg = a2s(audio) inp, out = apply_transform(time_mask_test, sg) _test_eq( out[:, :, start:start + size], val * torch.ones_like(inp)[:, :, start:start + size], )
def test_load_audio_with_basic_config(): """ Grab a random file, test that the n_fft are passed successfully via config and stored in sg settings """ p = untar_data(URLs.SAMPLE_SPEAKERS10) f = p / "train/f0001_us_f0001_00001.wav" oa = OpenAudio([f]) sg_cfg = AudioConfig.BasicSpectrogram(n_fft=2000, hop_length=155) a2sg = AudioToSpec.from_cfg(sg_cfg) sg = a2sg(oa(0)) assert sg.n_fft == sg_cfg.n_fft assert sg.width == int(oa(0).nsamples / sg_cfg.hop_length) + 1
def test_delta_channels(): " nchannels for a spectrogram is how many channels its original audio had " delta = DeltaGPU() # Explicitly check more than one channel audio = test_audio_tensor(channels=2) a2s = AudioToSpec.from_cfg(AudioConfig.Voice()) sg = a2s(audio) inp, out = apply_transform(delta, sg) _test_eq(out.nchannels, inp.nchannels * 3) _test_eq(out.shape[-2:], inp.shape[-2:]) for i1, i2 in [(0, 2), (1, 3), (0, 4), (1, 5), (2, 4), (3, 5)]: assert not torch.allclose(out[i1], out[i2])
def test_basic_config(): "Make sure mel setting is passed down and is false for normal spectro" sg_cfg = AudioConfig.BasicSpectrogram() assert sg_cfg.mel == False
def test_show_spectrogram(): audio = test_audio_tensor() a2s = AudioToMFCC.from_cfg(AudioConfig.BasicMFCC()) sg = a2s(audio) sg.show()
def test_mfcc_transform(): audio = test_audio_tensor() a2s = AudioToMFCC.from_cfg(AudioConfig.BasicMFCC()) sg = a2s(audio) assert len(sg.shape) == 3
def test_sg_roll(): roll = SGRoll() audio = test_audio_tensor() a2s = AudioToSpec.from_cfg(AudioConfig.BasicSpectrogram()) inp, out = apply_transform(roll, a2s(audio)) _test_ne(inp, out)
def test_signal_shift_on_sg(): audio = test_audio_tensor() a2s = AudioToSpec.from_cfg(AudioConfig.BasicSpectrogram()) shifter = SignalShifter(1, 1) inp, out = apply_transform(shifter, a2s(audio)) _test_ne(inp, out)