def extract_fbank_features( waveform, sample_rate: int, output_path: Optional[Path] = None, n_mel_bins: int = 80, overwrite: bool = False, ): if output_path is not None and output_path.is_file() and not overwrite: return _waveform = waveform * (2**15) # Kaldi compliance: 16-bit signed integers _waveform = _waveform.squeeze().numpy() features = _get_kaldi_fbank(_waveform, sample_rate, n_mel_bins) if features is None: features = _get_torchaudio_fbank(_waveform, sample_rate, n_mel_bins) if features is None: raise ImportError( "Please install pyKaldi or torchaudio to enable fbank feature extraction" ) if output_path is not None: np.save(output_path.as_posix(), features) else: return features
def extract_fbank_features( waveform, sample_rate, output_path=None, n_mel_bins=80, apply_utterance_cmvn=True, overwrite=False, ): if output_path is not None and op.exists(output_path) and not overwrite: return _waveform = waveform * (2**15) # Kaldi compliance: 16-bit signed integers _waveform = _waveform.squeeze().numpy() features = _get_kaldi_fbank(_waveform, sample_rate, n_mel_bins) if features is None: features = _get_torchaudio_fbank(_waveform, sample_rate, n_mel_bins) if features is None: raise ImportError("Please install pyKaldi or torchaudio to enable " "online filterbank feature extraction") if apply_utterance_cmvn: cmvn = UtteranceCMVN(norm_means=True, norm_vars=True) features = cmvn(features) if output_path is not None: np.save(output_path, features) else: return features