Пример #1
0
    def test_amplitude_to_DB_reversible(self, shape):
        """Round trip between amplitude and db should return the original for various shape

        This implicitly also tests `DB_to_amplitude`.

        """
        amplitude_mult = 20.
        power_mult = 10.
        amin = 1e-10
        ref = 1.0
        db_mult = math.log10(max(amin, ref))

        torch.manual_seed(0)
        spec = torch.rand(*shape, dtype=self.dtype, device=self.device) * 200

        # Spectrogram amplitude -> DB -> amplitude
        db = F.amplitude_to_DB(spec,
                               amplitude_mult,
                               amin,
                               db_mult,
                               top_db=None)
        x2 = F.DB_to_amplitude(db, ref, 0.5)

        self.assertEqual(x2, spec, atol=5e-5, rtol=1e-5)

        # Spectrogram power -> DB -> power
        db = F.amplitude_to_DB(spec, power_mult, amin, db_mult, top_db=None)
        x2 = F.DB_to_amplitude(db, ref, 1.)

        self.assertEqual(x2, spec)
Пример #2
0
    def test_DB_to_amplitude(self):
        # Make some noise
        x = torch.rand(1000)
        spectrogram = torchaudio.transforms.Spectrogram()
        spec = spectrogram(x)

        amin = 1e-10
        ref = 1.0
        db_multiplier = math.log10(max(amin, ref))

        # Waveform amplitude -> DB -> amplitude
        multiplier = 20.
        power = 0.5

        db = F.amplitude_to_DB(torch.abs(x),
                               multiplier,
                               amin,
                               db_multiplier,
                               top_db=None)
        x2 = F.DB_to_amplitude(db, ref, power)

        self.assertEqual(x2, torch.abs(x), atol=5e-5, rtol=1e-5)

        # Spectrogram amplitude -> DB -> amplitude
        db = F.amplitude_to_DB(spec,
                               multiplier,
                               amin,
                               db_multiplier,
                               top_db=None)
        x2 = F.DB_to_amplitude(db, ref, power)

        self.assertEqual(x2, spec, atol=5e-5, rtol=1e-5)

        # Waveform power -> DB -> power
        multiplier = 10.
        power = 1.

        db = F.amplitude_to_DB(x, multiplier, amin, db_multiplier, top_db=None)
        x2 = F.DB_to_amplitude(db, ref, power)

        self.assertEqual(x2, torch.abs(x), atol=5e-5, rtol=1e-5)

        # Spectrogram power -> DB -> power
        db = F.amplitude_to_DB(spec,
                               multiplier,
                               amin,
                               db_multiplier,
                               top_db=None)
        x2 = F.DB_to_amplitude(db, ref, power)

        self.assertEqual(x2, spec, atol=5e-5, rtol=1e-5)
Пример #3
0
def db_to_amplitude(spectrogram: Tensor, hp: HParams) -> Tensor:
    r"""Wrapper around torchaudio.functional.DB_to_amplitude().

    Args:
        spectrogram (Tensor): spectrogram in the decibel scale.
            Shape: [B, FREQ, FRAMES] or [B, N_MELS, FRAMES] if it is a melspectrogram.
        hp (HParams): parameters. Parameters needed are power.

    Returns:
        spectrogram (Tensor): spectrogram in power/amplitude scale.
            Shape: Shape: [B, FREQ, FRAMES] or [B, N_MELS, FRAMES] if it is a melspectrogram.
    """
    assert len(spectrogram.size()) == 3, \
        "Dimensions of spectrogram should be 3: [B, FREQ, FRAMES] or [B, N_MELS, FRAMES], " \
        "but found {}".format(len(spectrogram.size()))

    # power_exp calculated according to torchaudio.functional.DB_to_amplitude docs
    power_exp = 1 if hp.audio.spectrogram_type == 'power' else 0.5
    return F.DB_to_amplitude(spectrogram, ref=1, power=power_exp)
 def __getitem__(self, idx):
     
     if self.num_frames != -1:
         idx = idx % self.num_frames
     
     low_index, high_index = binarySearch(self.cumulative_sum, idx+1)
     file_name = self.df.iloc[high_index]['file_name']
     emb_path = os.path.join(self.embeddings_dir, file_name)        
     spec_path = os.path.join(self.spectrograms_dir, file_name)
     audio_prep_path = os.path.join(self.audio_pred_dir, file_name)
     
     if low_index == 0 and high_index == 0:
         frame_idx = idx
     else:
         frame_idx = idx - self.cumulative_sum[low_index]
         
     with open(emb_path, 'rb') as f:
         emb = np.load(f)
     with open(spec_path, 'rb') as f:
         spec = np.load(f)
     with open(audio_prep_path, 'rb') as f:
         audio_prep = np.load(f)
     
     emb_tensor = torch.from_numpy(emb[frame_idx]).float()
     spec_tensor = torch.from_numpy(spec[frame_idx]).float().permute(2, 0, 1)
     audio_prep_tensor = torch.from_numpy(audio_prep[frame_idx]).float()
     
     if self.emb_means is not None and self.emb_stds is not None:
         emb_tensor = ( emb_tensor - torch.tensor(self.emb_means) ) / torch.tensor(self.emb_stds)
     
             
     if self.return_amp is True:
         spec_tensor_amp = F.DB_to_amplitude(x = spec_tensor, ref = 1, power = 0.5)
         
         if self.spec_means is not None and self.spec_stds is not None:
             spec_tensor_amp = ( spec_tensor_amp - torch.tensor(self.spec_means) ) / torch.tensor(self.spec_stds)
             spec_tensor_amp = spec_tensor_amp.float()
         
         return emb_tensor, spec_tensor_amp, audio_prep_tensor, file_name, torch.tensor(frame_idx)
     else:
         return emb_tensor, spec_tensor, audio_prep_tensor, file_name, torch.tensor(frame_idx)
 def func(tensor):
     ref = 1.
     power = 1.
     return F.DB_to_amplitude(tensor, ref, power)