示例#1
0
def single_normal():
    # audio_data = get_audio_nochime('data/new_dataset/216m/2m_pub_new', ch_range=range(1, 9), fs=16000)
    # noise_data = get_audio_nochime('data/new_dataset/blstm_noise/noise_124', ch_range=range(1, 9), fs=16000)
    # audio_data = get_audio_nochime(args.data_directory, ch_range=range(1, 3), fs=16000)
    t_io = 0
    t_net = 0
    t_beamform = 0

    # check execution time
    with Timer() as t:
        audio_data = get_audio_nochime(args.data_directory,
                                       ch_range=range(1, 3),
                                       fs=16000)
        context_samples = 0
        print("audio_data: ", audio_data.shape, end="\n")
        # for i in range (0, 8):
        #     print(audio_data[i][1])
    t_io += t.msecs

    Y = stft(audio_data, time_dim=1).transpose((1, 0, 2))
    # N = stft(noise_data, time_dim=1).transpose((1, 0, 2))

    Y_phase = np.divide(Y, abs(Y))
    print("Y: ", Y.shape, "Y_phase: ", Y_phase.shape, end="\n")
    # Y_var with or without chainer Variable class doesn't give any different
    Y_var = Variable(np.abs(Y).astype(np.float32))

    # N_var = Variable(np.abs(N).astype(np.float32), True)
    # blstm_noise = Variable(np.abs(blstm_noise).astype(np.float32), True)

    with Timer() as t:
        # mask estimation
        N_masks, X_masks = model.calc_masks(Y_var)
        # Noise_masks = model.calc_mask_noise(N_var)
        print("N_masks: ", N_masks.shape, end="\n")
        N_masks.to_cpu()
        X_masks.to_cpu()
    t_net += t.msecs
    # Noise_masks.to_cpu()

    with Timer() as t:
        N_mask = np.median(N_masks.data, axis=1)
        X_mask = np.median(X_masks.data, axis=1)

        # Noise_mask = np.median(Noise_masks.data, axis=1)

        # signal = audioread('data/new_dataset/216m/2m_pub_new' + '.CH{}.wav'.format(ch), sample_rate=16000)
        # noise = audioread('data/new_dataset/gevnoise/gevnoise' + '.CH{}.wav'.format(ch), sample_rate=16000)
        # signal_ = stft(signal)
        # noise_ = stft(noise)
        #
        # signal_phase = np.divide(signal, abs(signal_))
        # noise_masks = model.calc_mask_noise(noise_)
        # noise_to = np.multiply(noise_masks.data, signal_)
        # noise_to = np.multiply(noise_to, signal_phase)
        # audiowrite(istft(noise_to)[context_samples:],
        #            "/home/hipo/workspace/BeamSaber/result/noise/noise_to_.CH{}.wav".format(ch), 16000, True, True)

        Noise = np.multiply(N_masks.data, Y)
        Noise = np.multiply(Noise, Y_phase)
        # Y_phase_med = np.median(Y_phase, axis=1)
        # print(Noise.shape)
        # for ch in range(0, 8):
        #     audiowrite(istft(Noise[:,ch,:])[context_samples:],
        #                "/home/hipo/workspace/BeamSaber/result/noise/2mnoise_.CH{}.wav".format(ch), 16000, True, True)
        Noise = np.median(Noise, axis=1)

        # print("N_mask: ", N_mask.shape, "X_mask: ", X_mask.shape, "Y_phase: ", Y_phase.shape, end="\n")
        Y_hat = gev_wrapper_on_masks(Y, N_mask, X_mask)
        # print(Y_hat.shape)
        # print("Noise: ", Noise.shape)
    t_beamform += t.msecs

    with Timer() as t:
        audiowrite(
            istft(Noise)[context_samples:],
            "/media/hipo/lento/workspace/BeamSaber/tools/enhancement/gev/PublicFOMLSA/sample/{}_noise.wav"
            .format(args.exNum), 16000, True, True)
        audiowrite(
            istft(Y_hat)[context_samples:],
            "/media/hipo/lento/workspace/BeamSaber/tools/enhancement/gev/PublicFOMLSA/sample/{}_gev.wav"
            .format(args.exNum), 16000, True, True)
    t_io += t.msecs
    print(
        'Timings: I/O: {:.2f}s | Net: {:.2f}s | Beamformer: {:.2f}s | Total: {:.2f}s'
        .format(t_io / 1000, t_net / 1000, t_beamform / 1000,
                ((t_io + t_net + t_beamform) / 1000)))
示例#2
0
                cur_line[0], cur_line[1], cur_line[2])
    t_io += t.msecs
    Y = stft(audio_data, time_dim=1).transpose((1, 0, 2))
    Y_var = Variable(np.abs(Y).astype(np.float32))
    if args.gpu >= 0:
        Y_var.to_gpu(args.gpu)
    with Timer() as t:
        N_masks, X_masks = model.calc_masks(Y_var)
        N_masks.to_cpu()
        X_masks.to_cpu()
    t_net += t.msecs

    with Timer() as t:
        N_mask = np.median(N_masks.data, axis=1)
        X_mask = np.median(X_masks.data, axis=1)
        Y_hat = gev_wrapper_on_masks(Y, N_mask, X_mask)
    t_beamform += t.msecs

    if scenario == 'simu':
        wsj_name = cur_line.split('/')[-1].split('_')[1]
        spk = cur_line.split('/')[-1].split('_')[0]
        env = cur_line.split('/')[-1].split('_')[-1]

    elif scenario == 'real':
        wsj_name = cur_line[3]
        spk = cur_line[0].split('/')[-1].split('_')[0]
        env = cur_line[0].split('/')[-1].split('_')[-1]

    filename = os.path.join(args.output_dir,
                            '{}05_{}_{}'.format(stage, env.lower(), scenario),
                            '{}_{}_{}.wav'.format(spk, wsj_name, env.upper()))
示例#3
0
文件: beamform.py 项目: fgnt/nn-gev
                    cur_line[0], cur_line[1], cur_line[2])
    t_io += t.msecs
    Y = stft(audio_data, time_dim=1).transpose((1, 0, 2))
    Y_var = Variable(np.abs(Y).astype(np.float32), True)
    if args.gpu >= 0:
        Y_var.to_gpu(args.gpu)
    with Timer() as t:
        N_masks, X_masks = model.calc_masks(Y_var)
        N_masks.to_cpu()
        X_masks.to_cpu()
    t_net += t.msecs

    with Timer() as t:
        N_mask = np.median(N_masks.data, axis=1)
        X_mask = np.median(X_masks.data, axis=1)
        Y_hat = gev_wrapper_on_masks(Y, N_mask, X_mask)
    t_beamform += t.msecs

    if scenario == 'simu':
        wsj_name = cur_line.split('/')[-1].split('_')[1]
        spk = cur_line.split('/')[-1].split('_')[0]
        env = cur_line.split('/')[-1].split('_')[-1]
    elif scenario == 'real':
        wsj_name = cur_line[3]
        spk = cur_line[0].split('/')[-1].split('_')[0]
        env = cur_line[0].split('/')[-1].split('_')[-1]

    filename = os.path.join(
            args.output_dir,
            '{}05_{}_{}'.format(stage, env.lower(), scenario),
            '{}_{}_{}.wav'.format(spk, wsj_name, env.upper())
示例#4
0
    N_masks, X_masks = model.calc_masks(Y_var)
    N_masks.to_cpu()
    X_masks.to_cpu()
t_net += t.msecs

with Timer() as t:
    N_mask = np.median(N_masks.data, axis=1)
    X_mask = np.median(X_masks.data, axis=1)
    print("Y: ",
          Y.shape,
          "N_mask: ",
          N_mask.shape,
          "X_mask: ",
          X_mask.shape,
          end="\n")
    Y_hat = gev_wrapper_on_masks(Y, N_mask, X_mask)
    # audiowrite(istft(Y_hat), "new_dataset_result/2m_feedback_.wav", 48000, True, True)
t_beamform += t.msecs

# second pass beamforming
# second_channel = audioread('AUDIO_RECORDING.CH2.wav', sample_rate=48000)
second_channel = audioread('new_dataset/2m/2m_pub_new.CH5.wav',
                           sample_rate=48000)
second_channel = np.expand_dims(second_channel, axis=0)
print("second_size", second_channel.shape, end="\n")

second_channel = stft(second_channel, time_dim=1).transpose((1, 0, 2))
print("Y_hat: ", Y_hat.shape, "second_size", second_channel.shape, end="\n")

Y_hat = np.expand_dims(Y_hat, axis=1)
Y_var_second = Variable(np.abs(Y_hat).astype(np.float32), True)
示例#5
0
    t_io += t.msecs
    Y = stft(audio_data, time_dim=1).transpose((1, 0, 2))
    Y_var = Variable(np.abs(Y).astype(np.float32), True)
    if args.gpu >= 0:
        Y_var.to_gpu(args.gpu)
    with Timer() as t:
        N_masks, X_masks = model.calc_masks(Y_var)
        N_masks.to_cpu()
        X_masks.to_cpu()
    t_net += t.msecs

    with Timer() as t:
        data_tmp = X_masks.data
        N_mask = np.median(N_masks.data, axis=1)
        X_mask = np.median(X_masks.data, axis=1)
        Y_hat = gev_wrapper_on_masks(Y, N_mask, X_mask, output_setup,
                                     corr_info)

        #N_mask = N_masks.data
        #X_mask = X_masks.data
        #Y_hat = mcmf_wrapper_on_masks(Y, N_mask, X_mask, output_setup, corr_info)
    t_beamform += t.msecs

    # the spliter in Win '\' and Linux '/'
    if scenario == 'simu':
        wsj_name = cur_line.split('\\')[-1].split('_')[1]
        spk = cur_line.split('\\')[-1].split('_')[0]
        env = cur_line.split('\\')[-1].split('_')[-1]
    elif scenario == 'real':
        wsj_name = cur_line[3]
        spk = cur_line[0].split('\\')[-1].split('_')[0]
        env = cur_line[0].split('\\')[-1].split('_')[-1]