def prediction(weights_path, name_model, audio_dir_prediction,
               dir_save_prediction, audio_input_prediction,
               audio_output_prediction, sample_rate, min_duration,
               frame_length, hop_length_frame, n_fft, hop_length_fft):
    """ This function takes as input pretrained weights, noisy voice sound to denoise, predict
    the denoise sound and save it to disk.
    """

    # load json and create model
    json_file = open(weights_path + '/' + name_model + '.json', 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    loaded_model = model_from_json(loaded_model_json)
    # load weights into new model
    loaded_model.load_weights(weights_path + '/' + name_model + '.h5')
    print("Loaded model from disk")

    # Extracting noise and voice from folder and convert to numpy
    audio = audio_files_to_numpy(audio_dir_prediction, audio_input_prediction,
                                 sample_rate, frame_length, hop_length_frame,
                                 min_duration)

    #Dimensions of squared spectrogram
    dim_square_spec = int(n_fft / 2) + 1
    print(dim_square_spec)

    # Create Amplitude and phase of the sounds
    m_amp_db_audio, m_pha_audio = numpy_audio_to_matrix_spectrogram(
        audio, dim_square_spec, n_fft, hop_length_fft)

    #global scaling to have distribution -1/1
    X_in = scaled_in(m_amp_db_audio)
    #Reshape for prediction
    X_in = X_in.reshape(X_in.shape[0], X_in.shape[1], X_in.shape[2], 1)
    #Prediction using loaded network
    X_pred = loaded_model.predict(X_in)
    #Rescale back the noise model
    inv_sca_X_pred = inv_scaled_ou(X_pred)
    #Remove noise model from noisy speech
    X_denoise = m_amp_db_audio - inv_sca_X_pred[:, :, :, 0]
    #Reconstruct audio from denoised spectrogram and phase
    print(X_denoise.shape)
    print(m_pha_audio.shape)
    print(frame_length)
    print(hop_length_fft)
    audio_denoise_recons = matrix_spectrogram_to_numpy_audio(
        X_denoise, m_pha_audio, frame_length, hop_length_fft)
    #Number of frames
    nb_samples = audio_denoise_recons.shape[0]
    #Save all frames in one file
    denoise_long = audio_denoise_recons.reshape(1,
                                                nb_samples * frame_length) * 10
    # Update of Librosa no longer uses .output use sf.write instead
    #librosa.output.write_wav(dir_save_prediction + audio_output_prediction, denoise_long[0, :], sample_rate)
    import soundfile as sf
    sf.write(dir_save_prediction + audio_output_prediction, denoise_long[0, :],
             sample_rate, 'PCM_16')
示例#2
0
def prediction(weights_path, model, audio_input_prediction, sample_rate,
               min_duration, frame_length, hop_length_frame, n_fft,
               hop_length_fft):
    """ This function takes as input pretrained weights, noisy voice sound to denoise, predict
    the denoise sound and save it to disk.
    """

    loaded_model = model
    # load weights into new model
    loaded_model.load_weights(weights_path + '/' + 'model_best.h5')
    print("Loaded model from disk")

    audio_dir_prediction = ""
    # Extracting noise and voice from folder and convert to numpy
    audio = audio_files_to_numpy(audio_dir_prediction, audio_input_prediction,
                                 sample_rate, frame_length, hop_length_frame,
                                 min_duration)

    #Dimensions of squared spectrogram
    dim_square_spec = int(n_fft / 2) + 1
    print(dim_square_spec)

    # Create Amplitude and phase of the sounds
    m_amp_db_audio, m_pha_audio = numpy_audio_to_matrix_spectrogram(
        audio, dim_square_spec, n_fft, hop_length_fft)

    #global scaling to have distribution -1/1
    X_in = scaled_in(m_amp_db_audio)
    #Reshape for prediction
    X_in = X_in.reshape(X_in.shape[0], X_in.shape[1], X_in.shape[2], 1)
    #Prediction using loaded network
    X_pred = loaded_model.predict(X_in)
    #Rescale back the noise model
    inv_sca_X_pred = inv_scaled_ou(X_pred)
    #Remove noise model from noisy speech
    X_denoise = m_amp_db_audio - inv_sca_X_pred[:, :, :, 0]
    #Reconstruct audio from denoised spectrogram and phase
    print(X_denoise.shape)
    print(m_pha_audio.shape)
    print(frame_length)
    print(hop_length_fft)
    audio_denoise_recons = matrix_spectrogram_to_numpy_audio(
        X_denoise, m_pha_audio, frame_length, hop_length_fft)
    #Number of frames
    nb_samples = audio_denoise_recons.shape[0]
    #Save all frames in one file
    denoise_long = audio_denoise_recons.reshape(1,
                                                nb_samples * frame_length) * 10
    return [denoise_long[0, :], sample_rate]
def decode(weights_path=args.weights_folder,
           name_model=args.name_model,
           audio_dir_prediction=args.audio_dir_prediction,
           dir_save_prediction=args.dir_save_prediction,
           audio_input_prediction=args.audio_input_prediction,
           audio_output_prediction=args.audio_output_prediction,
           sample_rate=args.sample_rate,
           min_duration=args.min_duration,
           frame_length=args.frame_length,
           hop_length_frame=args.hop_length_frame,
           n_fft=args.n_fft,
           hop_length_fft=args.hop_length_fft):

    loaded_model = ConvAutoEncoder(weights_path=weights_path)
    loaded_model.load_weights()
    loaded_model.info()
    print("Loaded model from disk")
    audio = audio_files_to_numpy(audio_dir_prediction, audio_input_prediction,
                                 sample_rate, frame_length, hop_length_frame,
                                 min_duration)
    #Dimensions of squared spectrogram
    dim_square_spec = int(n_fft / 2) + 1
    # Create Amplitude and phase of the sounds
    m_amp_db_audio, m_pha_audio = numpy_audio_to_matrix_spectrogram(
        audio, dim_square_spec, n_fft, hop_length_fft)

    data_compress = np.load('aaa.npy')
    print(data_compress.shape)
    decoded = loaded_model.decode(data_compress)
    #Rescale back the noise model
    inv_sca_X_pred = inv_scaled_ou(decoded)
    #Remove noise model from noisy speech
    X_denoise = m_amp_db_audio - inv_sca_X_pred[:, :, :, 0]
    #Reconstruct audio from denoised spectrogram and phase
    print(X_denoise.shape)
    print(m_pha_audio.shape)
    print(frame_length)
    print(hop_length_fft)
    audio_denoise_recons = matrix_spectrogram_to_numpy_audio(
        X_denoise, m_pha_audio, frame_length, hop_length_fft)
    #Number of frames
    nb_samples = audio_denoise_recons.shape[0]
    #Save all frames in one file
    denoise_long = audio_denoise_recons.reshape(1,
                                                nb_samples * frame_length) * 10
    librosa.output.write_wav(dir_save_prediction + audio_output_prediction,
                             denoise_long[0, :], sample_rate)
    print('saved audio decoded file in:',
          dir_save_prediction + audio_output_prediction)
hop_length_fft = 63

dim_square_spec = int(n_fft / 2) + 1

# Create Amplitude and phase of the sounds
m_amp_db_audio, m_pha_audio = numpy_audio_to_matrix_spectrogram(
    audio, dim_square_spec, n_fft, hop_length_fft)

#global scaling to have distribution -1/1
X_in = scaled_in(m_amp_db_audio)
#Reshape for prediction
X_in = X_in.reshape(X_in.shape[0], X_in.shape[1], X_in.shape[2], 1)
#Prediction using loaded network
X_pred = loaded_model.predict(X_in)
#Rescale back the noise model
inv_sca_X_pred = inv_scaled_ou(X_pred)
#Remove noise model from noisy speech
X_denoise = m_amp_db_audio - inv_sca_X_pred[:, :, :, 0]
#Reconstruct audio from denoised spectrogram and phase
audio_denoise_recons = matrix_spectrogram_to_numpy_audio(
    X_denoise, m_pha_audio, frame_length, hop_length_fft)
#Number of frames
nb_samples = audio_denoise_recons.shape[0]


def test_dimensions_spectrogram():
    """ test that dimensions are correct"""
    assert dim_square_spec == 128
    assert dim_square_spec == m_amp_db_audio.shape[1]
    assert dim_square_spec == m_amp_db_audio.shape[2]
    assert dim_square_spec == X_denoise.shape[1]