def main(conf): model_path = os.path.join(conf["exp_dir"], "best_model.ckpt") #model_path = "/tmp/pycharm_project_591/exp/tmp/checkpoints/epoch=8-step=2519.ckpt" pretrain = torch.load(model_path, map_location="cpu") model = TasNet() model.load_state_dict(pretrain) conf["use_gpu"] = False # Handle device placement if conf["use_gpu"]: model.cuda() model_device = next(model.parameters()).device #test_set = TACDataset(args.test_json, train=False) test_set = OnlineSimulationDataset(vctk_audio, ms_snsd, 48, simulation_config_test, truncator, "./test_online", 50) # Used to reorder sources only #loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx") # Randomly choose the indexes of sentences to save. torch.no_grad().__enter__() input_sdr_list = [] output_sdr_list = [] model.eval() for idx in tqdm(range(len(test_set))): # Forward the network on the mixture. input = test_set.__getitem__(idx) mix = input[0] fusion = Prep(input) mix = np.expand_dims(mix, axis=0) # 1 * channel * length mix = torch.from_numpy(mix).to(model_device).float() ref = input[3] * MAX_INT16 #raw = torch.tensor(mix, dtype=torch.float32, device=model_device) ref = torch.tensor(ref, dtype=torch.float32, device=model_device) #valid_mics = torch.ones((len(mix), 1)).to(dtype=torch.long, device=raw.device) est_list = [] for i in range(conf['train_conf']['net']['n_src']): est = model(mix, fusion[i]) est_list.append(est) spks = torch.cat(est_list, dim=1) ref = center_trim(ref, spks).transpose(1, 0) #loss, spks = loss_func(spks, ref, return_est=True) spks = spks.data.cpu().numpy().squeeze() ref = ref.data.cpu().numpy() for idx, samps in enumerate(spks): samps = samps * MAX_INT16 input_sdr_list.append( compute_sdr(ref[0, idx], mix[0, 0, :] * MAX_INT16)) output_sdr_list.append(compute_sdr(ref[0, idx], samps)) input_sdr_array = np.array(input_sdr_list) output_sdr_array = np.array(output_sdr_list) result = np.median(output_sdr_array - input_sdr_array) print("The SNR: " + str(result))
def get_sdr(self, fusion_list, mix_list, ref_list): input_sdr_list = [] output_sdr_list = [] with th.no_grad(): self.nnet.eval() for idx in range(len(fusion_list)): # Forward the network on the mixture. # input = dataset.__getitem__(idx) mix = mix_list[idx] fusion = fusion_list[idx] mix = np.expand_dims(mix, axis=0) # 1 * channel * length mix = th.from_numpy(mix).to(device=self.device).float() ref = ref_list[idx] * MAX_INT16 # raw = torch.tensor(mix, dtype=torch.float32, device=model_device) ref = th.tensor(ref, dtype=th.float32, device=self.device) # valid_mics = torch.ones((len(mix), 1)).to(dtype=torch.long, device=raw.device) est_list = [] for i in range(n_spks): est = self.nnet(mix, fusion[i]) est_list.append(est) spks = th.cat(est_list, dim=1) ref = center_trim(ref, spks).transpose(1, 0) # loss, spks = loss_func(spks, ref, return_est=True) spks = spks.data.cpu().numpy().squeeze() ref = ref.data.cpu().numpy() norm = np.linalg.norm(mix[0, 0, :], np.inf) for idx, samps in enumerate(spks): #samps = samps * norm / np.max(np.abs(samps)) samps = samps * MAX_INT16 input_sdr_list.append(compute_sdr(ref[0, idx], mix[0, 0, :] * MAX_INT16)) output_sdr_list.append(compute_sdr(ref[0, idx], samps)) input_sdr_array = np.array(input_sdr_list) output_sdr_array = np.array(output_sdr_list) result = np.median(output_sdr_array - input_sdr_array) print("The SNR: " + str(result)) return result
for i, batch in enumerate(dl2): IDX = batch.numpy() x = stft_mix_test[IDX] vocal_results, accom_results = cass.test(x) b_r.append(vocal_results) c_r.append(accom_results) b_r = np.concatenate(b_r, axis=0) c_r = np.concatenate(c_r, axis=0) results = [b_r, c_r] # compute error vocal_error = compute_lx_error(stft_vocal_test, results[0], stft_mix_test_pha) accom_error = compute_lx_error(stft_accom_test, results[1], stft_mix_test_pha) vocal_sdr, vocal_std, vocal_med, vocal_min, vocal_max = compute_sdr( stft_vocal_test, results[0], stft_mix_test_pha, stft_mix_test) accom_sdr, accom_std, accom_med, accom_min, accom_max = compute_sdr( stft_accom_test, results[1], stft_mix_test_pha, stft_mix_test) curr_error = 0.5 * (vocal_error + accom_error) curr_sdr = 0.5 * (vocal_sdr + accom_sdr) print(10 * "=") print("Epoch {}, vocal error: {:.4f}, accom error: {:.4f}".format( j, vocal_error, accom_error)) print("Curr Best: {:.4f}, Avg Error: {:.4f}, Avg sdr: {:.4f}".format( best_result, 0.5 * (vocal_error + accom_error), 0.5 * (vocal_sdr + accom_sdr))) print(" vocal sdr: {:.4f}, accom sdr: {:.4f}".format( vocal_sdr, accom_sdr)) print("STD: vocal sdr: {:.4f}, accom sdr: {:.4f}".format( vocal_std, accom_std)) print("MED: vocal sdr: {:.4f}, accom sdr: {:.4f}".format(
a = stft_bass[IDX] b = stft_sax[IDX] c = stft_clar[IDX] d = stft_vio[IDX] enc_losses, dec_losses, dis_losses = cass.train(x, [a, b, c, d]) results = cass.test(stft_mix_test) # compute error bass_error = compute_lx_error(stft_bass_test, results[0], stft_mix_test_pha) sax_error = compute_lx_error(stft_sax_test, results[1], stft_mix_test_pha) clar_error = compute_lx_error(stft_clar_test, results[2], stft_mix_test_pha) vio_error = compute_lx_error(stft_vio_test, results[3], stft_mix_test_pha) bass_sdr, bass_std, bass_med, bass_min, bass_max = compute_sdr( stft_bass_test, results[0], stft_mix_test_pha, stft_mix_test) sax_sdr, sax_std, sax_med, sax_min, sax_max = compute_sdr( stft_sax_test, results[1], stft_mix_test_pha, stft_mix_test) clar_sdr, clar_std, clar_med, clar_min, clar_max = compute_sdr( stft_clar_test, results[2], stft_mix_test_pha, stft_mix_test) vio_sdr, vio_std, vio_med, vio_min, vio_max = compute_sdr( stft_vio_test, results[3], stft_mix_test_pha, stft_mix_test) curr_error = 0.25 * (bass_error + sax_error + clar_error + vio_error) print(10 * "=") print( "Epoch {}, Bass error: {:.4f}, Sax error: {:.4f}, Clar error: {:.4f}, Vio error: {:.4f}" .format(j, bass_error, sax_error, clar_error, vio_error)) print("Curr Best: {:.4f}, Avg Error: {:.4f}, Avg sdr: {:.4f}".format( best_result, curr_error, 0.25 * (bass_sdr + sax_sdr + clar_sdr + vio_sdr))) print(