def get_denoised_audio(noisyAudio): noiseAudioFeatureExtractor = FeatureExtractor(noisyAudio, windowLength=windowLength, overlap=overlap, sample_rate=sr) noise_stft_features = noiseAudioFeatureExtractor.get_stft_spectrogram() # Paper: Besides, spectral phase was not used in the training phase. # At reconstruction, noisy spectral phase was used instead to # perform in- verse STFT and recover human speech. noisyPhase = np.angle(noise_stft_features) noise_stft_features = np.abs(noise_stft_features) mean = np.mean(noise_stft_features) std = np.std(noise_stft_features) noise_stft_features = (noise_stft_features - mean) / std predictors = prepare_input_features(noise_stft_features) predictors = np.reshape( predictors, (predictors.shape[0], predictors.shape[1], 1, predictors.shape[2])) predictors = np.transpose(predictors, (3, 0, 1, 2)).astype(np.float32) STFTFullyConvolutional = model.predict(predictors) denoisedAudioFullyConvolutional = revert_features_to_audio( STFTFullyConvolutional, noisyPhase, noiseAudioFeatureExtractor, mean, std) return denoisedAudioFullyConvolutional
def parallel_audio_processing(self, clean_filename): clean_audio, _ = read_audio(clean_filename, self.sample_rate) # remove silent frame from clean audio clean_audio = self._remove_silent_frames(clean_audio) # sample random fixed-sized snippets of audio clean_audio = self._audio_random_crop(clean_audio, duration=self.audio_max_duration) ## extract stft features from clean audio ## clean_audio_fe = FeatureExtractor(clean_audio, windowLength=self.window_length, overlap=self.overlap, sample_rate=self.sample_rate) clean_spectrogram = clean_audio_fe.get_stft_spectrogram() ## clean_spectrogram = cleanAudioFE.get_mel_spectrogram() # get the clean phase clean_phase = np.angle(clean_spectrogram) # get the clean spectral magnitude clean_magnitude = np.abs(clean_spectrogram) # noise generation noise_magnitude = self._gen_noise_stft(clean_magnitude, 0) #clean_magnitude = self._phase_aware_scaling(clean_magnitude, clean_phase, noise_phase) scaler = StandardScaler(copy=False, with_mean=True, with_std=True) noise_magnitude = scaler.fit_transform(noise_magnitude) clean_magnitude = scaler.transform(clean_magnitude) return noise_magnitude, clean_magnitude, clean_phase
def parallel_audio_processing(self, clean_filename): clean_audio, _ = read_audio(clean_filename, self.sample_rate) # remove silent frame from clean audio clean_audio = self._remove_silent_frames(clean_audio) noise_filename = self._sample_noise_filename() # read the noise filename noise_audio, sr = read_audio(noise_filename, self.sample_rate) # remove silent frame from noise audio noise_audio = self._remove_silent_frames(noise_audio) # sample random fixed-sized snippets of audio clean_audio = self._audio_random_crop(clean_audio, duration=self.audio_max_duration) # add noise to input image noiseInput = self._add_noise_to_clean_audio(clean_audio, noise_audio) # extract stft features from noisy audio noisy_input_fe = FeatureExtractor(noiseInput, windowLength=self.window_length, overlap=self.overlap, sample_rate=self.sample_rate) noise_spectrogram = noisy_input_fe.get_stft_spectrogram() # Or get the phase angle (in radians) # noisy_stft_magnitude, noisy_stft_phase = librosa.magphase(noisy_stft_features) noise_phase = np.angle(noise_spectrogram) # get the magnitude of the spectral noise_magnitude = np.abs(noise_spectrogram) # extract stft features from clean audio clean_audio_fe = FeatureExtractor(clean_audio, windowLength=self.window_length, overlap=self.overlap, sample_rate=self.sample_rate) clean_spectrogram = clean_audio_fe.get_stft_spectrogram() # clean_spectrogram = cleanAudioFE.get_mel_spectrogram() # get the clean phase clean_phase = np.angle(clean_spectrogram) # get the clean spectral magnitude clean_magnitude = np.abs(clean_spectrogram) # clean_magnitude = 2 * clean_magnitude / np.sum(scipy.signal.hamming(self.window_length, sym=False)) clean_magnitude = self._phase_aware_scaling(clean_magnitude, clean_phase, noise_phase) scaler = StandardScaler(copy=False, with_mean=True, with_std=True) noise_magnitude = scaler.fit_transform(noise_magnitude) clean_magnitude = scaler.transform(clean_magnitude) return noise_magnitude, clean_magnitude, noise_phase
return noise_stft_mag_features, clean_stft_magnitude, noise_stft_phase train_dataset = tf.data.TFRecordDataset([train_tfrecords_filenames]) train_dataset = train_dataset.map(tf_record_parser) train_dataset = train_dataset.repeat(1) train_dataset = train_dataset.batch(1000) train_dataset = train_dataset.prefetch( buffer_size=tf.data.experimental.AUTOTUNE) window_length = 256 overlap = 64 sr = 16000 feature_extractor = FeatureExtractor(None, windowLength=window_length, overlap=overlap, sample_rate=sr) def revert_features_to_audio(features, phase, cleanMean=None, cleanStd=None): # scale the outpus back to the original range if cleanMean and cleanStd: features = cleanStd * features + cleanMean phase = np.transpose(phase, (1, 0)) features = np.squeeze(features) # features = librosa.db_to_amplitude(features) # features = librosa.db_to_power(features) features = features * np.exp( 1j * phase) # that fixes the abs() ope previously done
model.load_weights( os.path.join(mozilla_basepath, 'denoiser_cnn_log_mel_generator.h5')) cleanAudio, sr = read_audio(os.path.join(mozilla_basepath, 'clips', 'common_voice_en_16526.mp3'), sample_rate=fs) print("Min:", np.min(cleanAudio), "Max:", np.max(cleanAudio)) noiseAudio, sr = read_audio(os.path.join(urbansound_basepath, 'audio', 'fold10', '7913-3-0-0.wav'), sample_rate=fs) print("Min:", np.min(noiseAudio), "Max:", np.max(noiseAudio)) cleanAudioFeatureExtractor = FeatureExtractor(cleanAudio, windowLength=windowLength, overlap=overlap, sample_rate=sr) stft_features = cleanAudioFeatureExtractor.get_stft_spectrogram() stft_features = np.abs(stft_features) print("Min:", np.min(stft_features), "Max:", np.max(stft_features)) noisyAudio = add_noise_to_clean_audio(cleanAudio, noiseAudio) noiseAudioFeatureExtractor = FeatureExtractor(noisyAudio, windowLength=windowLength, overlap=overlap, sample_rate=sr) noise_stft_features = noiseAudioFeatureExtractor.get_stft_spectrogram() def revert_features_to_audio2(features, phase, cleanMean=None,