示例#1
0
def get_denoised_audio(noisyAudio):
    noiseAudioFeatureExtractor = FeatureExtractor(noisyAudio,
                                                  windowLength=windowLength,
                                                  overlap=overlap,
                                                  sample_rate=sr)
    noise_stft_features = noiseAudioFeatureExtractor.get_stft_spectrogram()

    # Paper: Besides, spectral phase was not used in the training phase.
    # At reconstruction, noisy spectral phase was used instead to
    # perform in- verse STFT and recover human speech.
    noisyPhase = np.angle(noise_stft_features)
    noise_stft_features = np.abs(noise_stft_features)

    mean = np.mean(noise_stft_features)
    std = np.std(noise_stft_features)
    noise_stft_features = (noise_stft_features - mean) / std
    predictors = prepare_input_features(noise_stft_features)
    predictors = np.reshape(
        predictors,
        (predictors.shape[0], predictors.shape[1], 1, predictors.shape[2]))
    predictors = np.transpose(predictors, (3, 0, 1, 2)).astype(np.float32)
    STFTFullyConvolutional = model.predict(predictors)
    denoisedAudioFullyConvolutional = revert_features_to_audio(
        STFTFullyConvolutional, noisyPhase, noiseAudioFeatureExtractor, mean,
        std)
    return denoisedAudioFullyConvolutional
示例#2
0
    def parallel_audio_processing(self, clean_filename):

        clean_audio, _ = read_audio(clean_filename, self.sample_rate)

        # remove silent frame from clean audio
        clean_audio = self._remove_silent_frames(clean_audio)
        
        # sample random fixed-sized snippets of audio
        clean_audio = self._audio_random_crop(clean_audio, duration=self.audio_max_duration)
        
        ## extract stft features from clean audio ##
        clean_audio_fe = FeatureExtractor(clean_audio, windowLength=self.window_length,
                                          overlap=self.overlap, sample_rate=self.sample_rate)
        clean_spectrogram = clean_audio_fe.get_stft_spectrogram()
        ## clean_spectrogram = cleanAudioFE.get_mel_spectrogram()
        
        # get the clean phase
        clean_phase = np.angle(clean_spectrogram)
        # get the clean spectral magnitude
        clean_magnitude = np.abs(clean_spectrogram)
        
        # noise generation
        noise_magnitude = self._gen_noise_stft(clean_magnitude, 0)
        #clean_magnitude = self._phase_aware_scaling(clean_magnitude, clean_phase, noise_phase)
        scaler = StandardScaler(copy=False, with_mean=True, with_std=True)
        noise_magnitude = scaler.fit_transform(noise_magnitude)
        clean_magnitude = scaler.transform(clean_magnitude)

        return noise_magnitude, clean_magnitude, clean_phase
    def parallel_audio_processing(self, clean_filename):

        clean_audio, _ = read_audio(clean_filename, self.sample_rate)

        # remove silent frame from clean audio
        clean_audio = self._remove_silent_frames(clean_audio)

        noise_filename = self._sample_noise_filename()

        # read the noise filename
        noise_audio, sr = read_audio(noise_filename, self.sample_rate)

        # remove silent frame from noise audio
        noise_audio = self._remove_silent_frames(noise_audio)

        # sample random fixed-sized snippets of audio
        clean_audio = self._audio_random_crop(clean_audio,
                                              duration=self.audio_max_duration)

        # add noise to input image
        noiseInput = self._add_noise_to_clean_audio(clean_audio, noise_audio)

        # extract stft features from noisy audio
        noisy_input_fe = FeatureExtractor(noiseInput,
                                          windowLength=self.window_length,
                                          overlap=self.overlap,
                                          sample_rate=self.sample_rate)
        noise_spectrogram = noisy_input_fe.get_stft_spectrogram()

        # Or get the phase angle (in radians)
        # noisy_stft_magnitude, noisy_stft_phase = librosa.magphase(noisy_stft_features)
        noise_phase = np.angle(noise_spectrogram)

        # get the magnitude of the spectral
        noise_magnitude = np.abs(noise_spectrogram)

        # extract stft features from clean audio
        clean_audio_fe = FeatureExtractor(clean_audio,
                                          windowLength=self.window_length,
                                          overlap=self.overlap,
                                          sample_rate=self.sample_rate)
        clean_spectrogram = clean_audio_fe.get_stft_spectrogram()
        # clean_spectrogram = cleanAudioFE.get_mel_spectrogram()

        # get the clean phase
        clean_phase = np.angle(clean_spectrogram)

        # get the clean spectral magnitude
        clean_magnitude = np.abs(clean_spectrogram)
        # clean_magnitude = 2 * clean_magnitude / np.sum(scipy.signal.hamming(self.window_length, sym=False))

        clean_magnitude = self._phase_aware_scaling(clean_magnitude,
                                                    clean_phase, noise_phase)

        scaler = StandardScaler(copy=False, with_mean=True, with_std=True)
        noise_magnitude = scaler.fit_transform(noise_magnitude)
        clean_magnitude = scaler.transform(clean_magnitude)

        return noise_magnitude, clean_magnitude, noise_phase
示例#4
0
    return noise_stft_mag_features, clean_stft_magnitude, noise_stft_phase


train_dataset = tf.data.TFRecordDataset([train_tfrecords_filenames])
train_dataset = train_dataset.map(tf_record_parser)
train_dataset = train_dataset.repeat(1)
train_dataset = train_dataset.batch(1000)
train_dataset = train_dataset.prefetch(
    buffer_size=tf.data.experimental.AUTOTUNE)

window_length = 256
overlap = 64
sr = 16000

feature_extractor = FeatureExtractor(None,
                                     windowLength=window_length,
                                     overlap=overlap,
                                     sample_rate=sr)


def revert_features_to_audio(features, phase, cleanMean=None, cleanStd=None):
    # scale the outpus back to the original range
    if cleanMean and cleanStd:
        features = cleanStd * features + cleanMean

    phase = np.transpose(phase, (1, 0))
    features = np.squeeze(features)

    # features = librosa.db_to_amplitude(features)
    # features = librosa.db_to_power(features)
    features = features * np.exp(
        1j * phase)  # that fixes the abs() ope previously done
示例#5
0
    model.load_weights(
        os.path.join(mozilla_basepath, 'denoiser_cnn_log_mel_generator.h5'))

    cleanAudio, sr = read_audio(os.path.join(mozilla_basepath, 'clips',
                                             'common_voice_en_16526.mp3'),
                                sample_rate=fs)
    print("Min:", np.min(cleanAudio), "Max:", np.max(cleanAudio))

    noiseAudio, sr = read_audio(os.path.join(urbansound_basepath, 'audio',
                                             'fold10', '7913-3-0-0.wav'),
                                sample_rate=fs)
    print("Min:", np.min(noiseAudio), "Max:", np.max(noiseAudio))

    cleanAudioFeatureExtractor = FeatureExtractor(cleanAudio,
                                                  windowLength=windowLength,
                                                  overlap=overlap,
                                                  sample_rate=sr)
    stft_features = cleanAudioFeatureExtractor.get_stft_spectrogram()
    stft_features = np.abs(stft_features)
    print("Min:", np.min(stft_features), "Max:", np.max(stft_features))

    noisyAudio = add_noise_to_clean_audio(cleanAudio, noiseAudio)
    noiseAudioFeatureExtractor = FeatureExtractor(noisyAudio,
                                                  windowLength=windowLength,
                                                  overlap=overlap,
                                                  sample_rate=sr)
    noise_stft_features = noiseAudioFeatureExtractor.get_stft_spectrogram()

    def revert_features_to_audio2(features,
                                  phase,
                                  cleanMean=None,