def test_read_wav_internal(self):
     fs, wav = wavfile.read(self.mono_wav, False)
     wave_data = feat.read_wav_kaldi_internal(wav, fs)
     # Correct number of channels
     self.assertEqual(wave_data.data().num_rows, 1)
     # Correct number of samples
     self.assertEqual(wav.shape[0], wave_data.data().num_cols)
Exemplo n.º 2
0
def get_ppg(wav_path, deps):
    """
    Input:
        wav_path: *.wav path
        deps = DependenciesPPG()
    Library:
        from scipy.io import wavfile
        from common import feat
    """
    fs, wav = wavfile.read(wav_path)
    wave_data = feat.read_wav_kaldi_internal(wav, fs)
    seq = ppg.compute_full_ppg_wrapper(wave_data, deps.nnet, deps.lda, 10)
    return seq
    def get_monophone_ppg(self) -> ndarray:
        """A wrapper function to initialize the monophone ppg of this utterance.

        Requires non-empty waveform, fs, and kaldi_shift.

        Returns:
            The monophone ppgs in numpy ndarray format.
        """
        if self.kaldi_shift < 1:  # ms
            raise ValueError('Invalid frame kaldi frame shift parameter %d.',
                             self.kaldi_shift)
        if self.wav.size == 0 or self.fs < 0:
            raise ValueError('To perform alignment, the object must contain '
                             'valid speech data and sampling frequency.')

        wav_kaldi = read_wav_kaldi_internal(self.wav, self.fs)
        ppg_deps = ppg.DependenciesPPG()
        self.monophone_ppg = ppg.compute_monophone_ppg(wav_kaldi, ppg_deps.nnet,
                                                       ppg_deps.lda,
                                                       ppg_deps.monophone_trans,
                                                       self.kaldi_shift)
        return self.monophone_ppg
Exemplo n.º 4
0
def get_ppg(wav_path, deps):
    fs, wav = wavfile.read(wav_path)
    wave_data = feat.read_wav_kaldi_internal(wav, fs)
    seq = ppg.compute_full_ppg_wrapper(wave_data, deps.nnet, deps.lda, 10)
    return seq
Exemplo n.º 5
0
def get_ppg(wav, fs):
    wave_data = feat.read_wav_kaldi_internal(wav, fs)
    seq = compute_full_ppg_wrapper(wave_data, ppg_deps.nnet, ppg_deps.lda, mel_window_step)
    return seq