def test_saved_spectrogram_keeps_metadata(): # Same issue as the test above item0 = test_audio_tensor() DBMelSpec = SpectrogramTransformer(mel=True, to_db=True) a2s = DBMelSpec(f_max=20000, n_mels=137) sg = a2s(item0) with TemporaryFile("wb+") as f: torch.save(sg, f) f.seek(0, 0) # Go back to the begining of the file to read new_sg = torch.load(f) assert new_sg.sr == item0.sr
def test_spectrograms_right_side_up(): DBMelSpec = SpectrogramTransformer(mel=True, to_db=True) a2s_5hz = DBMelSpec( sample_rate=16000, n_fft=1024, win_length=1024, hop_length=512, f_min=0.0, f_max=20000, pad=0, n_mels=137, ) sine_5hz = 0.5 * torch.cos(2 * math.pi * 5 * torch.arange(0, 1.0, 1.0 / 16000)) at_5hz = AudioTensor(sine_5hz[None], 16000) sg_5hz = a2s_5hz(at_5hz) max_row = sg_5hz.max(dim=1).indices.mode().values.item() assert max_row < 2
def test_crop_time_with_pipeline(ex_files): """ AudioToSpec->CropTime and ResizeSignal->AudioToSpec will result in same size images """ oa = OpenAudio(ex_files) crop_dur = random.randint(1000, 5000) DBMelSpec = SpectrogramTransformer(mel=True, to_db=True) pipe_cropsig = Pipeline( [oa, DBMelSpec(hop_length=128), CropTime(crop_dur)]) pipe_cropspec = Pipeline([ oa, ResizeSignal(crop_dur), DBMelSpec(hop_length=128), ]) for i in range(4): _test_eq(pipe_cropsig(i).width, pipe_cropspec(i).width)
def test_load_audio(): item0 = test_audio_tensor() DBMelSpec = SpectrogramTransformer(mel=True, to_db=True) a2s = DBMelSpec(f_max=20000, n_mels=137) sg = a2s(item0) assert type(item0.data) == torch.Tensor assert item0.sr == 16000 assert item0.nchannels == 1 assert item0.nsamples == 32000 assert item0.duration == 2 assert sg.f_max == 20000 assert sg.hop_length == 512 assert sg.sr == item0.sr assert sg.mel assert sg.to_db assert sg.nchannels == 1 assert sg.height == 137 assert sg.n_mels == sg.height assert sg.width == 63 defaults = { k: v.default for k, v in inspect.signature(MelSpectrogram).parameters.items() } hop_length = 345 a2s = DBMelSpec(f_max=20000, hop_length=hop_length) sg = a2s(item0) assert sg.n_mels == defaults["n_mels"] assert sg.n_fft == 1024 assert sg.shape[1] == sg.n_mels assert sg.hop_length == hop_length # test the spectrogram and audio have same duration, both are computed # on the fly as transforms can change their duration _close(sg.duration, item0.duration, eps=0.1)