示例#1
0
    def test_batch_pitch(self):
        waveform, sample_rate = torchaudio.load(self.test_filepath)

        # Single then transform then batch
        expected = F.detect_pitch_frequency(waveform, sample_rate)
        expected = expected.unsqueeze(0).repeat(3, 1, 1)

        # Batch then transform
        waveform = waveform.unsqueeze(0).repeat(3, 1, 1)
        computed = F.detect_pitch_frequency(waveform, sample_rate)

        self.assertTrue(computed.shape == expected.shape,
                        (computed.shape, expected.shape))
        self.assertTrue(torch.allclose(computed, expected))
示例#2
0
    def test_detect_pitch_frequency_pitch(self, frequency):
        sample_rate = 44100
        test_sine_waveform = get_sinusoid(frequency=frequency,
                                          sample_rate=sample_rate,
                                          duration=5)

        freq = F.detect_pitch_frequency(test_sine_waveform, sample_rate)

        threshold = 1
        s = ((freq - frequency).abs() > threshold).sum()
        self.assertFalse(s)
    dct_type=2,
    norm='ortho')

plot_spectrogram(mfcc_librosa)

mse = torch.square(mfcc - mfcc_librosa).mean().item()
print('Mean Square Difference: ', mse)

######################################################################
# Pitch
# -----
#

waveform, sample_rate = get_speech_sample()

pitch = F.detect_pitch_frequency(waveform, sample_rate)
plot_pitch(waveform, sample_rate, pitch)
play_audio(waveform, sample_rate)

######################################################################
# Kaldi Pitch (beta)
# ------------------
#
# Kaldi Pitch feature [1] is a pitch detection mechanism tuned for automatic
# speech recognition (ASR) applications. This is a beta feature in ``torchaudio``,
# and it is available only in ``functional``.
#
# 1. A pitch extraction algorithm tuned for automatic speech recognition
#
#    Ghahremani, B. BabaAli, D. Povey, K. Riedhammer, J. Trmal and S.
#    Khudanpur
 def func(tensor):
     sample_rate = 44100
     return F.detect_pitch_frequency(tensor, sample_rate)