def test_amplitude_to_DB_reversible(self, shape): """Round trip between amplitude and db should return the original for various shape This implicitly also tests `DB_to_amplitude`. """ amplitude_mult = 20. power_mult = 10. amin = 1e-10 ref = 1.0 db_mult = math.log10(max(amin, ref)) torch.manual_seed(0) spec = torch.rand(*shape, dtype=self.dtype, device=self.device) * 200 # Spectrogram amplitude -> DB -> amplitude db = F.amplitude_to_DB(spec, amplitude_mult, amin, db_mult, top_db=None) x2 = F.DB_to_amplitude(db, ref, 0.5) self.assertEqual(x2, spec, atol=5e-5, rtol=1e-5) # Spectrogram power -> DB -> power db = F.amplitude_to_DB(spec, power_mult, amin, db_mult, top_db=None) x2 = F.DB_to_amplitude(db, ref, 1.) self.assertEqual(x2, spec)
def test_DB_to_amplitude(self): # Make some noise x = torch.rand(1000) spectrogram = torchaudio.transforms.Spectrogram() spec = spectrogram(x) amin = 1e-10 ref = 1.0 db_multiplier = math.log10(max(amin, ref)) # Waveform amplitude -> DB -> amplitude multiplier = 20. power = 0.5 db = F.amplitude_to_DB(torch.abs(x), multiplier, amin, db_multiplier, top_db=None) x2 = F.DB_to_amplitude(db, ref, power) self.assertEqual(x2, torch.abs(x), atol=5e-5, rtol=1e-5) # Spectrogram amplitude -> DB -> amplitude db = F.amplitude_to_DB(spec, multiplier, amin, db_multiplier, top_db=None) x2 = F.DB_to_amplitude(db, ref, power) self.assertEqual(x2, spec, atol=5e-5, rtol=1e-5) # Waveform power -> DB -> power multiplier = 10. power = 1. db = F.amplitude_to_DB(x, multiplier, amin, db_multiplier, top_db=None) x2 = F.DB_to_amplitude(db, ref, power) self.assertEqual(x2, torch.abs(x), atol=5e-5, rtol=1e-5) # Spectrogram power -> DB -> power db = F.amplitude_to_DB(spec, multiplier, amin, db_multiplier, top_db=None) x2 = F.DB_to_amplitude(db, ref, power) self.assertEqual(x2, spec, atol=5e-5, rtol=1e-5)
def db_to_amplitude(spectrogram: Tensor, hp: HParams) -> Tensor: r"""Wrapper around torchaudio.functional.DB_to_amplitude(). Args: spectrogram (Tensor): spectrogram in the decibel scale. Shape: [B, FREQ, FRAMES] or [B, N_MELS, FRAMES] if it is a melspectrogram. hp (HParams): parameters. Parameters needed are power. Returns: spectrogram (Tensor): spectrogram in power/amplitude scale. Shape: Shape: [B, FREQ, FRAMES] or [B, N_MELS, FRAMES] if it is a melspectrogram. """ assert len(spectrogram.size()) == 3, \ "Dimensions of spectrogram should be 3: [B, FREQ, FRAMES] or [B, N_MELS, FRAMES], " \ "but found {}".format(len(spectrogram.size())) # power_exp calculated according to torchaudio.functional.DB_to_amplitude docs power_exp = 1 if hp.audio.spectrogram_type == 'power' else 0.5 return F.DB_to_amplitude(spectrogram, ref=1, power=power_exp)
def __getitem__(self, idx): if self.num_frames != -1: idx = idx % self.num_frames low_index, high_index = binarySearch(self.cumulative_sum, idx+1) file_name = self.df.iloc[high_index]['file_name'] emb_path = os.path.join(self.embeddings_dir, file_name) spec_path = os.path.join(self.spectrograms_dir, file_name) audio_prep_path = os.path.join(self.audio_pred_dir, file_name) if low_index == 0 and high_index == 0: frame_idx = idx else: frame_idx = idx - self.cumulative_sum[low_index] with open(emb_path, 'rb') as f: emb = np.load(f) with open(spec_path, 'rb') as f: spec = np.load(f) with open(audio_prep_path, 'rb') as f: audio_prep = np.load(f) emb_tensor = torch.from_numpy(emb[frame_idx]).float() spec_tensor = torch.from_numpy(spec[frame_idx]).float().permute(2, 0, 1) audio_prep_tensor = torch.from_numpy(audio_prep[frame_idx]).float() if self.emb_means is not None and self.emb_stds is not None: emb_tensor = ( emb_tensor - torch.tensor(self.emb_means) ) / torch.tensor(self.emb_stds) if self.return_amp is True: spec_tensor_amp = F.DB_to_amplitude(x = spec_tensor, ref = 1, power = 0.5) if self.spec_means is not None and self.spec_stds is not None: spec_tensor_amp = ( spec_tensor_amp - torch.tensor(self.spec_means) ) / torch.tensor(self.spec_stds) spec_tensor_amp = spec_tensor_amp.float() return emb_tensor, spec_tensor_amp, audio_prep_tensor, file_name, torch.tensor(frame_idx) else: return emb_tensor, spec_tensor, audio_prep_tensor, file_name, torch.tensor(frame_idx)
def func(tensor): ref = 1. power = 1. return F.DB_to_amplitude(tensor, ref, power)