def _preprocess_audio(audio_file_path, audio_featurizer, normalize): """Load the audio file and compute spectrogram feature.""" data, _ = soundfile.read(audio_file_path) feature = featurizer.compute_spectrogram_feature( data, audio_featurizer.sample_rate, audio_featurizer.stride_ms, audio_featurizer.window_ms) # Feature normalization if normalize: feature = _normalize_audio_feature(feature) # Adding Channel dimension for conv2D input. feature = np.expand_dims(feature, axis=2) return feature
def _preprocess_audio(audio_file_path, audio_sample_rate, audio_featurizer, normalize): """Load the audio file in memory and compute spectrogram feature.""" tf.logging.info( "Extracting spectrogram feature for {}".format(audio_file_path)) sample_rate, data = wavfile.read(audio_file_path) assert sample_rate == audio_sample_rate if data.dtype not in [np.float32, np.float64]: data = data.astype(np.float32) / np.iinfo(data.dtype).max feature = featurizer.compute_spectrogram_feature( data, audio_featurizer.frame_length, audio_featurizer.frame_step, audio_featurizer.fft_length) if normalize: feature = _normalize_audio_feature(feature) return feature