ch_range=range(1, 9), fs=48000) # audio_data = get_audio_nochime('new_dataset/new_audio/AUDIO_RECORDING', ch_range=range(1, 9), fs=49000) # calculate the time for load the audio files t_io += t.msecs # change the audio files into frequency domain Y = stft(audio_data, time_dim=1).transpose((1, 0, 2)) print(audio_data.shape, type(audio_data)) Y_var = Variable(np.abs(Y).astype(np.float32), True) # mask estimation with Timer() as t: N_masks, X_masks = model.calc_masks(Y_var) N_masks.to_cpu() X_masks.to_cpu() t_net += t.msecs with Timer() as t: N_mask = np.median(N_masks.data, axis=1) X_mask = np.median(X_masks.data, axis=1) print("Y: ", Y.shape, "N_mask: ", N_mask.shape, "X_mask: ", X_mask.shape, end="\n") Y_hat = gev_wrapper_on_masks(Y, N_mask, X_mask)
# Beamform loop for cur_line in tqdm(flist): with Timer() as t: if scenario == 'simu': audio_data = get_audio_data(cur_line) context_samples = 0 elif scenario == 'real': audio_data, context_samples = get_audio_data_with_context( cur_line[0], cur_line[1], cur_line[2]) t_io += t.msecs Y = stft(audio_data, time_dim=1).transpose((1, 0, 2)) Y_var = Variable(np.abs(Y).astype(np.float32)) if args.gpu >= 0: Y_var.to_gpu(args.gpu) with Timer() as t: N_masks, X_masks = model.calc_masks(Y_var) N_masks.to_cpu() X_masks.to_cpu() t_net += t.msecs with Timer() as t: N_mask = np.median(N_masks.data, axis=1) X_mask = np.median(X_masks.data, axis=1) Y_hat_dicts = bf_wrapper_on_masks(Y, N_mask, X_mask, beamformers=beamformers) t_beamform += t.msecs if scenario == 'simu': wsj_name = cur_line.split('/')[-1].split('_')[1] spk = cur_line.split('/')[-1].split('_')[0] env = cur_line.split('/')[-1].split('_')[-1] elif scenario == 'real':
# Beamform loop for cur_line in tqdm(flist): with Timer() as t: if scenario == 'simu': audio_data = get_audio_data(cur_line) context_samples = 0 elif scenario == 'real': audio_data, context_samples = get_audio_data_with_context( cur_line[0], cur_line[1], cur_line[2]) t_io += t.msecs Y = stft(audio_data, time_dim=1).transpose((1, 0, 2)) Y_var = Variable(np.abs(Y).astype(np.float32), True) if args.gpu >= 0: Y_var.to_gpu(args.gpu) with Timer() as t: N_masks, X_masks = model.calc_masks(Y_var) N_masks.to_cpu() X_masks.to_cpu() t_net += t.msecs with Timer() as t: N_mask = np.median(N_masks.data, axis=1) X_mask = np.median(X_masks.data, axis=1) Y_hat = gev_wrapper_on_masks(Y, N_mask, X_mask) t_beamform += t.msecs if scenario == 'simu': wsj_name = cur_line.split('/')[-1].split('_')[1] spk = cur_line.split('/')[-1].split('_')[0] env = cur_line.split('/')[-1].split('_')[-1] elif scenario == 'real':