Exemplo n.º 1
0
def reduce_noise(source_file,
                 output_file=None,
                 initial_noise=30,
                 window_size=0,
                 noise_threshold=0.15):
    logmmse_from_file(source_file,
                      output_file=(output_file or source_file),
                      initial_noise=initial_noise,
                      window_size=window_size,
                      noise_threshold=noise_threshold)
def prediction(weights_dir, model_name, input_dir, output_dir, sample_rate, frame_length, hop_length_frame, n_fft,
               hop_length_fft):

    json_file = open(weights_dir + model_name + '.json', 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    loaded_model = model_from_json(loaded_model_json)
    loaded_model.load_weights(weights_dir + model_name + '.h5')
    print("Loaded model from disk!")

    list_files = os.listdir(input_dir)
    for file in list_files:
        logmmse.logmmse_from_file(input_file=input_dir + file, output_file=output_dir + 'Remastered-' + file)
        audio_temp = dt.audio_files_to_file(input_dir, file, sample_rate)
        # logmmse.logmmse(data=audio_temp, sampling_rate=32000, output_file=output_dir + 'Remastered-' + file)
        # audio_out = nr.reduce_noise(audio_clip=audio_temp, noise_clip=audio_temp, n_fft=n_fft+1, win_length=n_fft+1,
        #                             hop_length=hop_length_fft)
        # librosa.output.write_wav(output_dir + 'Remastered-' + file, audio_out, sample_rate)
        audio_file = dt.audio_files_to_file(output_dir, 'Remastered-' + file, sample_rate)

        # fig, ax = plt.subplots(figsize=(12, 6))
        # plt.title('Audio')
        # plt.ylabel('Amplitude')
        # plt.xlabel('Time(s)')
        # ax.plot(audio_temp)
        # ax.plot(audio_file, alpha=0.5)
        # plt.show()

        audio_list = [audio_file]
        audio = dt.audio_list_to_numpy(audio_list, frame_length, hop_length_frame)

        dim_square_spec = int(n_fft / 2) + 1

        m_amp_db_audio, m_pha_audio = dt.audio_numpy_to_matrix_spectrogram(audio, dim_square_spec, n_fft,
                                                                           hop_length_fft)

        x_in = dt.scaled_in(m_amp_db_audio)
        x_in = x_in.reshape(x_in.shape[0], x_in.shape[1], x_in.shape[2], 1)
        x_pred = loaded_model.predict(x_in)
        inv_sca_x_pred = dt.inv_scaled_out(x_pred)
        x_denoise = m_amp_db_audio - inv_sca_x_pred[:, :, :, 0]

        audio_denoise_recons = dt.matrix_spectrogram_to_numpy_audio(x_denoise, m_pha_audio, frame_length,
                                                                    hop_length_fft)
        nb_samples = audio_denoise_recons.shape[0]
        denoise_long = audio_denoise_recons.reshape(1, nb_samples * frame_length) * 10
        librosa.output.write_wav(output_dir + 'Final-' + file, denoise_long[0, :], sample_rate)
        noise_recons = dt.matrix_spectrogram_to_numpy_audio(inv_sca_x_pred[:, :, :, 0], m_pha_audio, frame_length,
                                                            hop_length_fft)
        nb_samples = noise_recons.shape[0]
        noise_long = noise_recons.reshape(1, nb_samples * frame_length)
        librosa.output.write_wav(output_dir + 'Noise-' + file, noise_long[0, :], sample_rate)
Exemplo n.º 3
0
def get_command_recording():
    _, file_name = tempfile.mkstemp(".wav", prefix="input_command")
    wave_file = wave.open(file_name, "wb")
    wave_file.setnchannels(1)
    wave_file.setsampwidth(pa.get_sample_size(pyaudio.paInt16))
    wave_file.setframerate(sample_rate)
    nframes = int(sample_rate / frames_per_buffer * DURATION)
    for _ in range(nframes):
        audio = audio_stream.read(frames_per_buffer)
        wave_file.writeframes(audio)
    wave_file.close()
    logmmse_from_file(file_name, file_name)
    return file_name
Exemplo n.º 4
0
def reconstruct_audio(signal_filename, output_filename, low_cutoff,
                      high_cutoff):
    data = np.load(signal_filename)
    arr = data['arr_0'][:, ::2, 1:].astype(np.float64)
    arr = arr.reshape(-1, arr.shape[2])

    filter = lambda x: signal.sosfilt(
        signal.butter(11, (low_cutoff - 50, high_cutoff + 50),
                      'bandpass',
                      fs=2200,
                      output='sos'), x)
    combined_signal = combine(arr, 4, filter)

    sos = signal.butter(11, (low_cutoff, high_cutoff),
                        'bandpass',
                        fs=2200,
                        output='sos')
    filtered = signal.sosfilt(sos, combined_signal)
    wav_res = np.clip((32767 * filtered / np.max(filtered)).astype('i2'),
                      -32767, 32767)
    wavfile.write(output_filename, 2200, wav_res)

    denoised_filename = output_filename[:output_filename.
                                        rindex('.')] + '-denoised.wav'
    output = logmmse_from_file(output_filename)
    wavfile.write(denoised_filename, 2200, output)
Exemplo n.º 5
0
def reducenoise():
    logmmse_from_file(voicewave,
                      output_file=voicerd,
                      initial_noise=60,
                      window_size=0,
                      noise_threshold=0.15)
for j in multipliers:

    def get_audio():
        return randomizer.get_noisy_speech(_set='dev')

    set_snr(j[0])
    for i in range(0, EPOCHS):
        hm, bg, mfcc_feature = generate_sample(_n_filt=NFILT,
                                               _winlen=WINLEN,
                                               _winstep=WINSTEP,
                                               _winfunc=np.hamming,
                                               _generator=get_audio,
                                               to_write=True)
        logmmse_from_file(
            os.path.join(os.path.dirname(__file__),
                         '../sample_test/mixed.wav'),
            os.path.join(os.path.dirname(__file__),
                         '../sample_test/logmmse.wav'))
        sequence_feature = np.array(
            to_sequence_with_stride(mfcc_feature,
                                    left_pad=PAD_L,
                                    right_pad=PAD_R))
        # sys model
        prediction = predict(sequence_feature, 'sys_model')
        estimate = get_estimate(bg,
                                prediction,
                                _winlen=WINLEN,
                                _winstep=WINSTEP,
                                _winfunc=np.hamming)
        write_wav('sys_model.wav', estimate.astype(np.float32))
        evaluate(estimated_sources=['sys_model.wav', 'logmmse.wav'])