Python numpy_audio_to_matrix_spectrogram примеры, data_tools.numpy_audio_to_matrix_spectrogram Python примеры использования

Пример #1

0

Показать файл

Файл: prediction_denoise.py Проект: Tollanador/Speech-enhancement

def prediction(weights_path, name_model, audio_dir_prediction,
               dir_save_prediction, audio_input_prediction,
               audio_output_prediction, sample_rate, min_duration,
               frame_length, hop_length_frame, n_fft, hop_length_fft):
    """ This function takes as input pretrained weights, noisy voice sound to denoise, predict
    the denoise sound and save it to disk.
    """

    # load json and create model
    json_file = open(weights_path + '/' + name_model + '.json', 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    loaded_model = model_from_json(loaded_model_json)
    # load weights into new model
    loaded_model.load_weights(weights_path + '/' + name_model + '.h5')
    print("Loaded model from disk")

    # Extracting noise and voice from folder and convert to numpy
    audio = audio_files_to_numpy(audio_dir_prediction, audio_input_prediction,
                                 sample_rate, frame_length, hop_length_frame,
                                 min_duration)

    #Dimensions of squared spectrogram
    dim_square_spec = int(n_fft / 2) + 1
    print(dim_square_spec)

    # Create Amplitude and phase of the sounds
    m_amp_db_audio, m_pha_audio = numpy_audio_to_matrix_spectrogram(
        audio, dim_square_spec, n_fft, hop_length_fft)

    #global scaling to have distribution -1/1
    X_in = scaled_in(m_amp_db_audio)
    #Reshape for prediction
    X_in = X_in.reshape(X_in.shape[0], X_in.shape[1], X_in.shape[2], 1)
    #Prediction using loaded network
    X_pred = loaded_model.predict(X_in)
    #Rescale back the noise model
    inv_sca_X_pred = inv_scaled_ou(X_pred)
    #Remove noise model from noisy speech
    X_denoise = m_amp_db_audio - inv_sca_X_pred[:, :, :, 0]
    #Reconstruct audio from denoised spectrogram and phase
    print(X_denoise.shape)
    print(m_pha_audio.shape)
    print(frame_length)
    print(hop_length_fft)
    audio_denoise_recons = matrix_spectrogram_to_numpy_audio(
        X_denoise, m_pha_audio, frame_length, hop_length_fft)
    #Number of frames
    nb_samples = audio_denoise_recons.shape[0]
    #Save all frames in one file
    denoise_long = audio_denoise_recons.reshape(1,
                                                nb_samples * frame_length) * 10
    # Update of Librosa no longer uses .output use sf.write instead
    #librosa.output.write_wav(dir_save_prediction + audio_output_prediction, denoise_long[0, :], sample_rate)
    import soundfile as sf
    sf.write(dir_save_prediction + audio_output_prediction, denoise_long[0, :],
             sample_rate, 'PCM_16')

Пример #2

0

Показать файл

def prediction(weights_path, model, audio_input_prediction, sample_rate,
               min_duration, frame_length, hop_length_frame, n_fft,
               hop_length_fft):
    """ This function takes as input pretrained weights, noisy voice sound to denoise, predict
    the denoise sound and save it to disk.
    """

    loaded_model = model
    # load weights into new model
    loaded_model.load_weights(weights_path + '/' + 'model_best.h5')
    print("Loaded model from disk")

    audio_dir_prediction = ""
    # Extracting noise and voice from folder and convert to numpy
    audio = audio_files_to_numpy(audio_dir_prediction, audio_input_prediction,
                                 sample_rate, frame_length, hop_length_frame,
                                 min_duration)

    #Dimensions of squared spectrogram
    dim_square_spec = int(n_fft / 2) + 1
    print(dim_square_spec)

    # Create Amplitude and phase of the sounds
    m_amp_db_audio, m_pha_audio = numpy_audio_to_matrix_spectrogram(
        audio, dim_square_spec, n_fft, hop_length_fft)

    #global scaling to have distribution -1/1
    X_in = scaled_in(m_amp_db_audio)
    #Reshape for prediction
    X_in = X_in.reshape(X_in.shape[0], X_in.shape[1], X_in.shape[2], 1)
    #Prediction using loaded network
    X_pred = loaded_model.predict(X_in)
    #Rescale back the noise model
    inv_sca_X_pred = inv_scaled_ou(X_pred)
    #Remove noise model from noisy speech
    X_denoise = m_amp_db_audio - inv_sca_X_pred[:, :, :, 0]
    #Reconstruct audio from denoised spectrogram and phase
    print(X_denoise.shape)
    print(m_pha_audio.shape)
    print(frame_length)
    print(hop_length_fft)
    audio_denoise_recons = matrix_spectrogram_to_numpy_audio(
        X_denoise, m_pha_audio, frame_length, hop_length_fft)
    #Number of frames
    nb_samples = audio_denoise_recons.shape[0]
    #Save all frames in one file
    denoise_long = audio_denoise_recons.reshape(1,
                                                nb_samples * frame_length) * 10
    return [denoise_long[0, :], sample_rate]

Пример #3

0

Показать файл

Файл: decode.py Проект: dangvansam98/speech-enhancement-flask

def decode(weights_path=args.weights_folder,
           name_model=args.name_model,
           audio_dir_prediction=args.audio_dir_prediction,
           dir_save_prediction=args.dir_save_prediction,
           audio_input_prediction=args.audio_input_prediction,
           audio_output_prediction=args.audio_output_prediction,
           sample_rate=args.sample_rate,
           min_duration=args.min_duration,
           frame_length=args.frame_length,
           hop_length_frame=args.hop_length_frame,
           n_fft=args.n_fft,
           hop_length_fft=args.hop_length_fft):

    loaded_model = ConvAutoEncoder(weights_path=weights_path)
    loaded_model.load_weights()
    loaded_model.info()
    print("Loaded model from disk")
    audio = audio_files_to_numpy(audio_dir_prediction, audio_input_prediction,
                                 sample_rate, frame_length, hop_length_frame,
                                 min_duration)
    #Dimensions of squared spectrogram
    dim_square_spec = int(n_fft / 2) + 1
    # Create Amplitude and phase of the sounds
    m_amp_db_audio, m_pha_audio = numpy_audio_to_matrix_spectrogram(
        audio, dim_square_spec, n_fft, hop_length_fft)

    data_compress = np.load('aaa.npy')
    print(data_compress.shape)
    decoded = loaded_model.decode(data_compress)
    #Rescale back the noise model
    inv_sca_X_pred = inv_scaled_ou(decoded)
    #Remove noise model from noisy speech
    X_denoise = m_amp_db_audio - inv_sca_X_pred[:, :, :, 0]
    #Reconstruct audio from denoised spectrogram and phase
    print(X_denoise.shape)
    print(m_pha_audio.shape)
    print(frame_length)
    print(hop_length_fft)
    audio_denoise_recons = matrix_spectrogram_to_numpy_audio(
        X_denoise, m_pha_audio, frame_length, hop_length_fft)
    #Number of frames
    nb_samples = audio_denoise_recons.shape[0]
    #Save all frames in one file
    denoise_long = audio_denoise_recons.reshape(1,
                                                nb_samples * frame_length) * 10
    librosa.output.write_wav(dir_save_prediction + audio_output_prediction,
                             denoise_long[0, :], sample_rate)
    print('saved audio decoded file in:',
          dir_save_prediction + audio_output_prediction)

Пример #4

0

Показать файл

def prediction(weights_path, name_model, audio_dir_prediction,
               audio_input_prediction, sample_rate, min_duration, frame_length,
               hop_length_frame, n_fft, hop_length_fft, mode):
    """ This function takes as input pretrained weights, noisy voice sound to denoise, predict
    the denoise sound and save it to disk.
    """
    loaded_model = load_model(weights_path + '/' + name_model + '.h5')
    print("Loaded model from disk")

    # Extracting noise and voice from folder and convert to numpy
    audio = audio_file_to_numpy(audio_dir_prediction,
                                str(audio_input_prediction), sample_rate,
                                frame_length, hop_length_frame, min_duration,
                                mode)

    # Dimensions of squared spectrogram
    dim_square_spec = int(n_fft / 2) + 1
    print("dim_square_spec:{}".format(dim_square_spec))

    m_amp_db_audio, m_pha_audio = numpy_audio_to_matrix_spectrogram(
        audio, dim_square_spec, n_fft, hop_length_fft)

    # global scaling to have distribution -1/1
    X_in = scaled_in(m_amp_db_audio)
    # Reshape for prediction
    X_in = X_in.reshape(X_in.shape[0], X_in.shape[1], X_in.shape[2], 1)
    # Prediction using loaded network
    X_pred = loaded_model.predict(X_in)
    audio_class = audio
    for i in range(audio_class.shape[0]):
        audio_class[i, :] = audio_class[i, :] * 0 + X_pred[i]

    # Number of frames
    nb_samples = audio_class.shape[0]
    # Save all frames in one file
    audio_class_long = audio_class.reshape(1, nb_samples * frame_length)
    # librosa.output.write_wav(dir_save_prediction + audio_output_prediction, denoise_long[0, :], sample_rate)
    # sf.write(dir_save_prediction + audio_output_prediction, res, sample_rate)

    print("声音的强度{:.2f}".format(np.mean(np.abs(m_amp_db_audio))))
    print("漏水的概率{:.2f}".format(np.mean(audio_class_long)))

Пример #5

0

Показать файл

Файл: encode.py Проект: dangvansam98/speech-enhancement-flask

def encode(weights_path=args.weights_folder,
           name_model=args.name_model,
           audio_dir_prediction=args.audio_dir_prediction,
           dir_save_prediction=args.dir_save_prediction,
           audio_input_prediction=args.audio_input_prediction,
           audio_output_prediction=args.audio_output_prediction,
           sample_rate=args.sample_rate,
           min_duration=args.min_duration,
           frame_length=args.frame_length,
           hop_length_frame=args.hop_length_frame,
           n_fft=args.n_fft,
           hop_length_fft=args.hop_length_fft):
    loaded_model = ConvAutoEncoder(weights_path=weights_path)
    loaded_model.load_weights()
    loaded_model.info()
    print("Loaded model from:", weights_path)
    # Extracting noise and voice from folder and convert to numpy
    audio = audio_files_to_numpy(audio_dir_prediction, audio_input_prediction,
                                 sample_rate, frame_length, hop_length_frame,
                                 min_duration)
    #Dimensions of squared spectrogram
    dim_square_spec = int(n_fft / 2) + 1
    # Create Amplitude and phase of the sounds
    m_amp_db_audio, m_pha_audio = numpy_audio_to_matrix_spectrogram(
        audio, dim_square_spec, n_fft, hop_length_fft)
    #global scaling to have distribution -1/1
    X_in = scaled_in(m_amp_db_audio)
    #Reshape for prediction
    X_in = X_in.reshape(X_in.shape[0], X_in.shape[1], X_in.shape[2], 1)
    encoded = loaded_model.encode(X_in)
    #print(encoded)

    print('encoded.shape:'.encoded.shape)
    np.save('aaa', encoded)
    print('encoded file:', audio_dir_prediction + str(audio_input_prediction))
    print('save to: aaa.npy')

Пример #6

0

Показать файл

Файл: prepare_data.py Проект: OlgaFomin/Speech-enhancement

def create_data(noise_dir, voice_dir, path_save_time_serie, path_save_sound,
                path_save_spectrogram, sample_rate, min_duration, frame_length,
                hop_length_frame, hop_length_frame_noise, nb_samples, n_fft,
                hop_length_fft):
    """This function will randomly blend some clean voices from voice_dir with some noises from noise_dir
    and save the spectrograms of noisy voice, noise and clean voices to disk as well as complex phase,
    time series and sounds. This aims at preparing datasets for denoising training. It takes as inputs
    parameters defined in args module"""

    list_noise_files = os.listdir(noise_dir)
    list_voice_files = os.listdir(voice_dir)

    def remove_ds_store(lst):
        """remove mac specific file if present"""
        if '.DS_Store' in lst:
            lst.remove('.DS_Store')

        return lst

    list_noise_files = remove_ds_store(list_noise_files)
    list_voice_files = remove_ds_store(list_voice_files)

    nb_voice_files = len(list_voice_files)
    nb_noise_files = len(list_noise_files)

    # Extracting noise and voice from folder and convert to numpy
    noise = audio_files_to_numpy(noise_dir, list_noise_files, sample_rate,
                                 frame_length, hop_length_frame_noise,
                                 min_duration)

    voice = audio_files_to_numpy(voice_dir, list_voice_files, sample_rate,
                                 frame_length, hop_length_frame, min_duration)

    # Blend some clean voices with random selected noises (and a random level of noise)
    # N2N additions - prod_extra_noisy_voice , prod_extra_noise , prod_total_noise
    prod_voice, prod_noise, prod_noisy_voice, prod_extra_noisy_voice, prod_extra_noise, prod_total_noise = blend_noise_randomly(
        voice, noise, nb_samples, frame_length)

    # To save the long audio generated to disk to QC:
    noisy_voice_long = prod_noisy_voice.reshape(1, nb_samples * frame_length)
    librosa.output.write_wav(path_save_sound + 'noisy_voice_long.wav',
                             noisy_voice_long[0, :], sample_rate)
    voice_long = prod_voice.reshape(1, nb_samples * frame_length)
    librosa.output.write_wav(path_save_sound + 'voice_long.wav',
                             voice_long[0, :], sample_rate)
    noise_long = prod_noise.reshape(1, nb_samples * frame_length)
    librosa.output.write_wav(path_save_sound + 'noise_long.wav',
                             noise_long[0, :], sample_rate)
    # N2N additions -
    extra_noisy_voice_long = prod_extra_noisy_voice.reshape(
        1, nb_samples * frame_length)
    librosa.output.write_wav(path_save_sound + 'extra_noisy_voice_long.wav',
                             extra_noisy_voice_long[0, :], sample_rate)
    extra_noise_long = prod_extra_noise.reshape(1, nb_samples * frame_length)
    librosa.output.write_wav(path_save_sound + 'extra_noise_long.wav',
                             extra_noise_long[0, :], sample_rate)
    total_noise_long = prod_total_noise.reshape(1, nb_samples * frame_length)
    librosa.output.write_wav(path_save_sound + 'total_noise_long.wav',
                             total_noise_long[0, :], sample_rate)

    # Squared spectrogram dimensions
    dim_square_spec = int(n_fft / 2) + 1

    # Create Amplitude and phase of the sounds
    m_amp_db_voice, m_pha_voice = numpy_audio_to_matrix_spectrogram(
        prod_voice, dim_square_spec, n_fft, hop_length_fft)
    m_amp_db_noise, m_pha_noise = numpy_audio_to_matrix_spectrogram(
        prod_noise, dim_square_spec, n_fft, hop_length_fft)
    m_amp_db_noisy_voice, m_pha_noisy_voice = numpy_audio_to_matrix_spectrogram(
        prod_noisy_voice, dim_square_spec, n_fft, hop_length_fft)
    # N2N additions -
    m_amp_db_extra_noise, m_pha_extra_noise = numpy_audio_to_matrix_spectrogram(
        prod_extra_noise, dim_square_spec, n_fft, hop_length_fft)
    m_amp_db_total_noise, m_pha_total_noise = numpy_audio_to_matrix_spectrogram(
        prod_total_noise, dim_square_spec, n_fft, hop_length_fft)
    m_amp_db_extra_noisy_voice, m_pha_extra_noisy_voice = numpy_audio_to_matrix_spectrogram(
        prod_extra_noisy_voice, dim_square_spec, n_fft, hop_length_fft)

    # Save to disk for Training / QC
    np.save(path_save_time_serie + 'voice_timeserie', prod_voice)
    np.save(path_save_time_serie + 'noise_timeserie', prod_noise)
    np.save(path_save_time_serie + 'noisy_voice_timeserie', prod_noisy_voice)

    np.save(path_save_spectrogram + 'voice_amp_db', m_amp_db_voice)
    np.save(path_save_spectrogram + 'noise_amp_db', m_amp_db_noise)
    np.save(path_save_spectrogram + 'noisy_voice_amp_db', m_amp_db_noisy_voice)

    np.save(path_save_spectrogram + 'voice_pha_db', m_pha_voice)
    np.save(path_save_spectrogram + 'noise_pha_db', m_pha_noise)
    np.save(path_save_spectrogram + 'noisy_voice_pha_db', m_pha_noisy_voice)

    # N2N additions -
    np.save(path_save_time_serie + 'extra_noise_timeserie', prod_extra_noise)
    np.save(path_save_time_serie + 'total_noise_timeserie', prod_total_noise)
    np.save(path_save_time_serie + 'extra_noisy_voice_timeserie',
            prod_extra_noisy_voice)

    np.save(path_save_spectrogram + 'extra_noise_amp_db', m_amp_db_extra_noise)
    np.save(path_save_spectrogram + 'total_noise_amp_db', m_amp_db_total_noise)
    np.save(path_save_spectrogram + 'extra_noisy_voice_amp_db',
            m_amp_db_extra_noisy_voice)

    np.save(path_save_spectrogram + 'extra_noise_pha_db', m_pha_extra_noise)
    np.save(path_save_spectrogram + 'total_noise_pha_db', m_pha_total_noise)
    np.save(path_save_spectrogram + 'extra_noisy_voice_pha_db',
            m_pha_extra_noisy_voice)

Пример #7

0

Показать файл

def create_data(noise_dir, voice_dir, path_save_time_serie, path_save_sound,
                path_save_spectrogram, sample_rate, min_duration, frame_length,
                hop_length_frame, hop_length_frame_noise, nb_samples, n_fft,
                hop_length_fft):
    """This function will randomly blend some clean voices from voice_dir with some noises from noise_dir
    and save the spectrograms of noisy voice, noise and clean voices to disk as well as complex phase,
    time series and sounds. This aims at preparing datasets for denoising training. It takes as inputs
    parameters defined in args module"""

    #os.listdir(voice_dir)
    list_noise_files = []
    list_voice_files = []

    for root, dirs, files in os.walk(noise_dir):
        for file in files:
            if file.endswith(('.mp3', '.wav', '.flac')):
                splitted_text = file.split('-')
                if (splitted_text[1] == '3'):
                    list_noise_files.append(os.path.join(root, file))

    for root, dirs, files in os.walk(voice_dir):
        for file in files:
            if file.endswith(('.mp3', '.wav', '.flac')):
                list_voice_files.append(os.path.join(root, file))

    nb_voice_files = len(list_voice_files)
    nb_noise_files = len(list_noise_files)

    print(len(list_voice_files))
    print(len(list_noise_files))

    # Extracting noise and voice from folder and convert to numpy
    noise = audio_files_to_numpy(noise_dir, list_noise_files, sample_rate,
                                 frame_length, hop_length_frame_noise,
                                 min_duration)

    voice = audio_files_to_numpy(voice_dir, list_voice_files, sample_rate,
                                 frame_length, hop_length_frame, min_duration)

    # Blend some clean voices with random selected noises (and a random level of noise)
    prod_voice, prod_noise, prod_noisy_voice = blend_noise_randomly(
        voice, noise, nb_samples, frame_length)

    # To save the long audio generated to disk to QC:
    noisy_voice_long = prod_noisy_voice.reshape(1, nb_samples * frame_length)
    librosa.output.write_wav(path_save_sound + 'noisy_voice_long.wav',
                             noisy_voice_long[0, :], sample_rate)
    voice_long = prod_voice.reshape(1, nb_samples * frame_length)
    librosa.output.write_wav(path_save_sound + 'voice_long.wav',
                             voice_long[0, :], sample_rate)
    noise_long = prod_noise.reshape(1, nb_samples * frame_length)
    librosa.output.write_wav(path_save_sound + 'noise_long.wav',
                             noise_long[0, :], sample_rate)

    # Squared spectrogram dimensions
    dim_square_spec = int(n_fft / 2) + 1

    # Create Amplitude and phase of the sounds
    m_amp_db_voice, m_pha_voice = numpy_audio_to_matrix_spectrogram(
        prod_voice, dim_square_spec, n_fft, hop_length_fft)
    m_amp_db_noise, m_pha_noise = numpy_audio_to_matrix_spectrogram(
        prod_noise, dim_square_spec, n_fft, hop_length_fft)
    m_amp_db_noisy_voice, m_pha_noisy_voice = numpy_audio_to_matrix_spectrogram(
        prod_noisy_voice, dim_square_spec, n_fft, hop_length_fft)

    np.save(path_save_time_serie + 'voice_timeserie', prod_voice)
    np.save(path_save_time_serie + 'noise_timeserie', prod_noise)
    np.save(path_save_time_serie + 'noisy_voice_timeserie', prod_noisy_voice)

    np.save(path_save_spectrogram + 'voice_amp_db', m_amp_db_voice)
    np.save(path_save_spectrogram + 'noise_amp_db', m_amp_db_noise)
    np.save(path_save_spectrogram + 'noisy_voice_amp_db', m_amp_db_noisy_voice)

    np.save(path_save_spectrogram + 'voice_pha_db', m_pha_voice)
    np.save(path_save_spectrogram + 'noise_pha_db', m_pha_noise)
    np.save(path_save_spectrogram + 'noisy_voice_pha_db', m_pha_noisy_voice)

Пример #8

0

Показать файл

def create_data(noise_dir, voice_dir, path_save_time_serie, path_save_sound,
                path_save_spectrogram, sample_rate, min_duration, frame_length,
                hop_length_frame, hop_length_frame_noise, nb_samples, n_fft,
                hop_length_fft, mode):
    """This function will randomly blend some clean voices from voice_dir with some noises from noise_dir
    and save the spectrograms of noisy voice, noise and clean voices to disk as well as complex phase,
    time series and sounds. This aims at preparing datasets for denoising training. It takes as inputs
    parameters defined in args module"""

    alllist_noise_files = os.listdir(noise_dir)
    alllist_voice_files = os.listdir(voice_dir)

    def remove_ds_store(lst):
        """remove mac specific file if present"""
        if '.DS_Store' in lst:
            lst.remove('.DS_Store')

        return lst

    list_noise_files = remove_ds_store(alllist_noise_files)
    list_voice_files = remove_ds_store(alllist_voice_files)

    nb_voice_files = len(alllist_voice_files)
    nb_noise_files = len(alllist_noise_files)

    n = 1
    list_list_voice_id = list()
    for k in range(0, n):
        list_voice_id = list()
        for id in range(nb_voice_files):
            if (k / n * nb_voice_files <= id) & (id <= (
                (k + n) / n) * nb_voice_files):
                list_voice_id.append(id)
        list_list_voice_id.append(list_voice_id)

    list_list_noise_id = list()
    for k in range(0, n):
        list_noise_id = list()
        for id in range(nb_noise_files):
            if (k / n * nb_noise_files <= id) & (id <= (
                (k + n) / n) * nb_noise_files):
                list_noise_id.append(id)
        list_list_noise_id.append(list_noise_id)

    ii = 0
    for list_noise_id in list_list_noise_id:
        list_noise_files = []
        for id in list_noise_id:
            list_noise_files.append(alllist_noise_files[id])

        for list_voice_id in list_list_voice_id:
            list_voice_files = []
            for id in list_voice_id:
                list_voice_files.append(alllist_voice_files[id])

            # Extracting noise and voice from folder and convert to numpy
            noise = audio_files_to_numpy(noise_dir, list_noise_files,
                                         sample_rate, frame_length,
                                         hop_length_frame_noise * 1,
                                         min_duration, mode)

            voice = audio_files_to_numpy(voice_dir, list_voice_files,
                                         sample_rate, frame_length,
                                         hop_length_frame * 10, min_duration,
                                         mode)

            # Blend some clean voices with random selected noises (and a random level of noise)

            prod_voice, prod_noise, prod_noisy_voice = blend_noise_randomly(
                voice, noise, nb_samples, frame_length, 1)

            # To save the long audio generated to disk to QC:
            noisy_voice_long = prod_noisy_voice.reshape(
                1, nb_samples * frame_length)
            sf.write(path_save_sound + '/noisy_voice_long' + str(ii) + '.wav',
                     noisy_voice_long[0, :], sample_rate)
            voice_long = prod_voice.reshape(1, nb_samples * frame_length)
            sf.write(path_save_sound + '/voice_long' + str(ii) + '.wav',
                     voice_long[0, :], sample_rate)
            noise_long = prod_noise.reshape(1, nb_samples * frame_length)
            sf.write(path_save_sound + '/noise_long' + str(ii) + '.wav',
                     noise_long[0, :], sample_rate)

            # Squared spectrogram dimensions
            dim_square_spec = int(n_fft / 2) + 1

            # Create Amplitude and phase of the sounds
            m_amp_db_voice, m_pha_voice = numpy_audio_to_matrix_spectrogram(
                prod_voice, dim_square_spec, n_fft, hop_length_fft)
            m_amp_db_noise, m_pha_noise = numpy_audio_to_matrix_spectrogram(
                prod_noise, dim_square_spec, n_fft, hop_length_fft)
            m_amp_db_noisy_voice, m_pha_noisy_voice = numpy_audio_to_matrix_spectrogram(
                prod_noisy_voice, dim_square_spec, n_fft, hop_length_fft)

            # Save to disk for Training / QC
            # np.save(path_save_time_serie + '/voice_timeserie'+str(ii), prod_voice)
            # np.save(path_save_time_serie + '/noise_timeserie'+str(ii), prod_noise)
            # np.save(path_save_time_serie + '/noisy_voice_timeserie'+str(ii), prod_noisy_voice)

            np.save(path_save_spectrogram + '/voice_amp_db' + str(ii),
                    m_amp_db_voice)
            np.save(path_save_spectrogram + '/noise_amp_db' + str(ii),
                    m_amp_db_noise)
            # np.save(path_save_spectrogram + '/noisy_voice_amp_db'+str(ii), m_amp_db_noisy_voice)

            np.save(path_save_spectrogram + '/voice_pha_db' + str(ii),
                    m_pha_voice)
            np.save(path_save_spectrogram + '/noise_pha_db' + str(ii),
                    m_pha_noise)
            # np.save(path_save_spectrogram + '/noisy_voice_pha_db'+str(ii), m_pha_noisy_voice)

            ii = ii + 1

Пример #9

0

Показать файл

Файл: test_prediction.py Проект: shivam1423/Speech-Enhancement

# hop length for noise files (we split noise into several windows)
hop_length_frame_noise = 5000

# Extracting noise and voice from folder and convert to numpy
audio = audio_files_to_numpy(audio_dir_prediction, audio_input_prediction,
                             sample_rate, frame_length, hop_length_frame,
                             min_duration)

# Choosing n_fft and hop_length_fft to have squared spectrograms
n_fft = 255
hop_length_fft = 63

dim_square_spec = int(n_fft / 2) + 1

# Create Amplitude and phase of the sounds
m_amp_db_audio, m_pha_audio = numpy_audio_to_matrix_spectrogram(
    audio, dim_square_spec, n_fft, hop_length_fft)

#global scaling to have distribution -1/1
X_in = scaled_in(m_amp_db_audio)
#Reshape for prediction
X_in = X_in.reshape(X_in.shape[0], X_in.shape[1], X_in.shape[2], 1)
#Prediction using loaded network
X_pred = loaded_model.predict(X_in)
#Rescale back the noise model
inv_sca_X_pred = inv_scaled_ou(X_pred)
#Remove noise model from noisy speech
X_denoise = m_amp_db_audio - inv_sca_X_pred[:, :, :, 0]
#Reconstruct audio from denoised spectrogram and phase
audio_denoise_recons = matrix_spectrogram_to_numpy_audio(
    X_denoise, m_pha_audio, frame_length, hop_length_fft)
#Number of frames

Python numpy_audio_to_matrix_spectrogram примеры использования