def single_normal(): # audio_data = get_audio_nochime('data/new_dataset/216m/2m_pub_new', ch_range=range(1, 9), fs=16000) # noise_data = get_audio_nochime('data/new_dataset/blstm_noise/noise_124', ch_range=range(1, 9), fs=16000) # audio_data = get_audio_nochime(args.data_directory, ch_range=range(1, 3), fs=16000) t_io = 0 t_net = 0 t_beamform = 0 # check execution time with Timer() as t: audio_data = get_audio_nochime(args.data_directory, ch_range=range(1, 3), fs=16000) context_samples = 0 print("audio_data: ", audio_data.shape, end="\n") # for i in range (0, 8): # print(audio_data[i][1]) t_io += t.msecs Y = stft(audio_data, time_dim=1).transpose((1, 0, 2)) # N = stft(noise_data, time_dim=1).transpose((1, 0, 2)) Y_phase = np.divide(Y, abs(Y)) print("Y: ", Y.shape, "Y_phase: ", Y_phase.shape, end="\n") # Y_var with or without chainer Variable class doesn't give any different Y_var = Variable(np.abs(Y).astype(np.float32)) # N_var = Variable(np.abs(N).astype(np.float32), True) # blstm_noise = Variable(np.abs(blstm_noise).astype(np.float32), True) with Timer() as t: # mask estimation N_masks, X_masks = model.calc_masks(Y_var) # Noise_masks = model.calc_mask_noise(N_var) print("N_masks: ", N_masks.shape, end="\n") N_masks.to_cpu() X_masks.to_cpu() t_net += t.msecs # Noise_masks.to_cpu() with Timer() as t: N_mask = np.median(N_masks.data, axis=1) X_mask = np.median(X_masks.data, axis=1) # Noise_mask = np.median(Noise_masks.data, axis=1) # signal = audioread('data/new_dataset/216m/2m_pub_new' + '.CH{}.wav'.format(ch), sample_rate=16000) # noise = audioread('data/new_dataset/gevnoise/gevnoise' + '.CH{}.wav'.format(ch), sample_rate=16000) # signal_ = stft(signal) # noise_ = stft(noise) # # signal_phase = np.divide(signal, abs(signal_)) # noise_masks = model.calc_mask_noise(noise_) # noise_to = np.multiply(noise_masks.data, signal_) # noise_to = np.multiply(noise_to, signal_phase) # audiowrite(istft(noise_to)[context_samples:], # "/home/hipo/workspace/BeamSaber/result/noise/noise_to_.CH{}.wav".format(ch), 16000, True, True) Noise = np.multiply(N_masks.data, Y) Noise = np.multiply(Noise, Y_phase) # Y_phase_med = np.median(Y_phase, axis=1) # print(Noise.shape) # for ch in range(0, 8): # audiowrite(istft(Noise[:,ch,:])[context_samples:], # "/home/hipo/workspace/BeamSaber/result/noise/2mnoise_.CH{}.wav".format(ch), 16000, True, True) Noise = np.median(Noise, axis=1) # print("N_mask: ", N_mask.shape, "X_mask: ", X_mask.shape, "Y_phase: ", Y_phase.shape, end="\n") Y_hat = gev_wrapper_on_masks(Y, N_mask, X_mask) # print(Y_hat.shape) # print("Noise: ", Noise.shape) t_beamform += t.msecs with Timer() as t: audiowrite( istft(Noise)[context_samples:], "/media/hipo/lento/workspace/BeamSaber/tools/enhancement/gev/PublicFOMLSA/sample/{}_noise.wav" .format(args.exNum), 16000, True, True) audiowrite( istft(Y_hat)[context_samples:], "/media/hipo/lento/workspace/BeamSaber/tools/enhancement/gev/PublicFOMLSA/sample/{}_gev.wav" .format(args.exNum), 16000, True, True) t_io += t.msecs print( 'Timings: I/O: {:.2f}s | Net: {:.2f}s | Beamformer: {:.2f}s | Total: {:.2f}s' .format(t_io / 1000, t_net / 1000, t_beamform / 1000, ((t_io + t_net + t_beamform) / 1000)))
cur_line[0], cur_line[1], cur_line[2]) t_io += t.msecs Y = stft(audio_data, time_dim=1).transpose((1, 0, 2)) Y_var = Variable(np.abs(Y).astype(np.float32)) if args.gpu >= 0: Y_var.to_gpu(args.gpu) with Timer() as t: N_masks, X_masks = model.calc_masks(Y_var) N_masks.to_cpu() X_masks.to_cpu() t_net += t.msecs with Timer() as t: N_mask = np.median(N_masks.data, axis=1) X_mask = np.median(X_masks.data, axis=1) Y_hat = gev_wrapper_on_masks(Y, N_mask, X_mask) t_beamform += t.msecs if scenario == 'simu': wsj_name = cur_line.split('/')[-1].split('_')[1] spk = cur_line.split('/')[-1].split('_')[0] env = cur_line.split('/')[-1].split('_')[-1] elif scenario == 'real': wsj_name = cur_line[3] spk = cur_line[0].split('/')[-1].split('_')[0] env = cur_line[0].split('/')[-1].split('_')[-1] filename = os.path.join(args.output_dir, '{}05_{}_{}'.format(stage, env.lower(), scenario), '{}_{}_{}.wav'.format(spk, wsj_name, env.upper()))
cur_line[0], cur_line[1], cur_line[2]) t_io += t.msecs Y = stft(audio_data, time_dim=1).transpose((1, 0, 2)) Y_var = Variable(np.abs(Y).astype(np.float32), True) if args.gpu >= 0: Y_var.to_gpu(args.gpu) with Timer() as t: N_masks, X_masks = model.calc_masks(Y_var) N_masks.to_cpu() X_masks.to_cpu() t_net += t.msecs with Timer() as t: N_mask = np.median(N_masks.data, axis=1) X_mask = np.median(X_masks.data, axis=1) Y_hat = gev_wrapper_on_masks(Y, N_mask, X_mask) t_beamform += t.msecs if scenario == 'simu': wsj_name = cur_line.split('/')[-1].split('_')[1] spk = cur_line.split('/')[-1].split('_')[0] env = cur_line.split('/')[-1].split('_')[-1] elif scenario == 'real': wsj_name = cur_line[3] spk = cur_line[0].split('/')[-1].split('_')[0] env = cur_line[0].split('/')[-1].split('_')[-1] filename = os.path.join( args.output_dir, '{}05_{}_{}'.format(stage, env.lower(), scenario), '{}_{}_{}.wav'.format(spk, wsj_name, env.upper())
N_masks, X_masks = model.calc_masks(Y_var) N_masks.to_cpu() X_masks.to_cpu() t_net += t.msecs with Timer() as t: N_mask = np.median(N_masks.data, axis=1) X_mask = np.median(X_masks.data, axis=1) print("Y: ", Y.shape, "N_mask: ", N_mask.shape, "X_mask: ", X_mask.shape, end="\n") Y_hat = gev_wrapper_on_masks(Y, N_mask, X_mask) # audiowrite(istft(Y_hat), "new_dataset_result/2m_feedback_.wav", 48000, True, True) t_beamform += t.msecs # second pass beamforming # second_channel = audioread('AUDIO_RECORDING.CH2.wav', sample_rate=48000) second_channel = audioread('new_dataset/2m/2m_pub_new.CH5.wav', sample_rate=48000) second_channel = np.expand_dims(second_channel, axis=0) print("second_size", second_channel.shape, end="\n") second_channel = stft(second_channel, time_dim=1).transpose((1, 0, 2)) print("Y_hat: ", Y_hat.shape, "second_size", second_channel.shape, end="\n") Y_hat = np.expand_dims(Y_hat, axis=1) Y_var_second = Variable(np.abs(Y_hat).astype(np.float32), True)
t_io += t.msecs Y = stft(audio_data, time_dim=1).transpose((1, 0, 2)) Y_var = Variable(np.abs(Y).astype(np.float32), True) if args.gpu >= 0: Y_var.to_gpu(args.gpu) with Timer() as t: N_masks, X_masks = model.calc_masks(Y_var) N_masks.to_cpu() X_masks.to_cpu() t_net += t.msecs with Timer() as t: data_tmp = X_masks.data N_mask = np.median(N_masks.data, axis=1) X_mask = np.median(X_masks.data, axis=1) Y_hat = gev_wrapper_on_masks(Y, N_mask, X_mask, output_setup, corr_info) #N_mask = N_masks.data #X_mask = X_masks.data #Y_hat = mcmf_wrapper_on_masks(Y, N_mask, X_mask, output_setup, corr_info) t_beamform += t.msecs # the spliter in Win '\' and Linux '/' if scenario == 'simu': wsj_name = cur_line.split('\\')[-1].split('_')[1] spk = cur_line.split('\\')[-1].split('_')[0] env = cur_line.split('\\')[-1].split('_')[-1] elif scenario == 'real': wsj_name = cur_line[3] spk = cur_line[0].split('\\')[-1].split('_')[0] env = cur_line[0].split('\\')[-1].split('_')[-1]