def bss_eval_global(wavs_mono, wavs_src1, wavs_src2, wavs_src1_pred, wavs_src2_pred): assert len(wavs_mono) == len(wavs_src1) == len(wavs_src2) == len(wavs_src1_pred) == len(wavs_src2_pred) num_samples = len(wavs_mono) gnsdr = np.zeros(2) gsir = np.zeros(2) gsar = np.zeros(2) frames_total = 0 for wav_mono, wav_src1, wav_src2, wav_src1_pred, wav_src2_pred in zip(wavs_mono, wavs_src1, wavs_src2, wavs_src1_pred, wavs_src2_pred): len_cropped = wav_src1_pred.shape[-1] wav_mono_cropped = wav_mono[:len_cropped] wav_src1_cropped = wav_src1[:len_cropped] wav_src2_cropped = wav_src2[:len_cropped] sdr, sir, sar, _ = bss_eval_sources(reference_sources = np.asarray([wav_src1_cropped, wav_src2_cropped]), estimated_sources = np.asarray([wav_src1_pred, wav_src2_pred]), compute_permutation = False) sdr_mono, _, _, _ = bss_eval_sources(reference_sources = np.asarray([wav_src1_cropped, wav_src2_cropped]), estimated_sources = np.asarray([wav_mono_cropped, wav_mono_cropped]), compute_permutation = False) nsdr = sdr - sdr_mono gnsdr += len_cropped * nsdr gsir += len_cropped * sir gsar += len_cropped * sar frames_total += len_cropped gnsdr = gnsdr / frames_total gsir = gsir / frames_total gsar = gsar / frames_total return gnsdr, gsir, gsar
def bss_eval_global(mixed_wav, src1_wav, src2_wav, pred_src1_wav, pred_src2_wav, n): len_cropped = pred_src1_wav.shape[-1] src1_wav = src1_wav[:, :len_cropped] src2_wav = src2_wav[:, :len_cropped] mixed_wav = mixed_wav[:, :len_cropped] gnsdr, gsir, gsar = np.zeros(2), np.zeros(2), np.zeros(2) total_len = 0 for i in range(n): if np.sum(np.abs(src1_wav[i])) < 1e-10 or np.sum(np.abs( src2_wav[i])) < 1e-10: continue sdr, sir, sar, _ = bss_eval_sources( np.array([src1_wav[i], src2_wav[i]]), np.array([pred_src1_wav[i], pred_src2_wav[i]]), False) sdr_mixed, _, _, _ = bss_eval_sources( np.array([src1_wav[i], src2_wav[i]]), np.array([mixed_wav[i], mixed_wav[i]]), False) nsdr = sdr - sdr_mixed gnsdr += len_cropped * nsdr gsir += len_cropped * sir gsar += len_cropped * sar total_len += len_cropped gnsdr = gnsdr / total_len gsir = gsir / total_len gsar = gsar / total_len return gnsdr, gsir, gsar
def bss_eval_global(wavs_mono, wavs_src1, wavs_src2, wavs_src1_pred, wavs_src2_pred): print(len(wavs_mono), len(wavs_src1) , len(wavs_src2) , len(wavs_src1_pred) , len(wavs_src2_pred)) assert len(wavs_mono) == len(wavs_src1) == len(wavs_src2) == len(wavs_src1_pred) == len(wavs_src2_pred) num_samples = len(wavs_mono) gnsdr = np.zeros(2) gsir = np.zeros(2) gsar = np.zeros(2) frames_total = 0 step = 1 for wav_mono, wav_src1, wav_src2, wav_src1_pred, wav_src2_pred in zip(wavs_mono, wavs_src1, wavs_src2, wavs_src1_pred, wavs_src2_pred): len_cropped = wav_src1_pred.shape[-1] wav_mono_cropped = wav_mono[:len_cropped] wav_src1_cropped = wav_src1[:len_cropped] wav_src2_cropped = wav_src2[:len_cropped] sdr, sir, sar, _ = bss_eval_sources(reference_sources = np.asarray([wav_src1_cropped, wav_src2_cropped]), estimated_sources = np.asarray([wav_src1_pred, wav_src2_pred]), compute_permutation = False) sdr_mono, _, _, _ = bss_eval_sources(reference_sources = np.asarray([wav_src1_cropped, wav_src2_cropped]), estimated_sources = np.asarray([wav_mono_cropped, wav_mono_cropped]), compute_permutation = False) nsdr = sdr - sdr_mono gnsdr += len_cropped * nsdr gsir += len_cropped * sir gsar += len_cropped * sar frames_total += len_cropped print("{}/{}\n".format(step, len(wavs_mono))) step += 1 gnsdr = gnsdr / frames_total gsir = gsir / frames_total gsar = gsar / frames_total return gnsdr, gsir, gsar
def bss_eval_global(mixed_wav, src1_wav, src2_wav, pred_src1_wav, pred_src2_wav): len_cropped = pred_src1_wav.shape[-1] src1_wav = src1_wav[:, :len_cropped] src2_wav = src2_wav[:, :len_cropped] mixed_wav = mixed_wav[:, :len_cropped] gnsdr = np.zeros(2) gsir = np.zeros(2) gsar = np.zeros(2) total_len = 0 for i in range(EvalConfig.NUM_EVAL): sdr, sir, sar, _ = bss_eval_sources( np.array([src1_wav[i], src2_wav[i]]), np.array([pred_src1_wav[i], pred_src2_wav[i]]), False) sdr_mixed, _, _, _ = bss_eval_sources( np.array([src1_wav[i], src2_wav[i]]), np.array([mixed_wav[i], mixed_wav[i]]), False) nsdr = sdr - sdr_mixed gnsdr += len_cropped * nsdr gsir += len_cropped * sir gsar += len_cropped * sar total_len += len_cropped gnsdr = gnsdr / total_len gsir = gsir / total_len gsar = gsar / total_len return gnsdr, gsir, gsar
def SDR(est, egs, mix): ''' calculate SDR est: Network generated audio egs: Ground Truth ''' sdr, _, _, _ = bss_eval_sources(egs, est) mix_sdr, _, _, _ = bss_eval_sources(egs, mix) return float(sdr - mix_sdr)
def SDR(est, egs, mix): ''' calculate SDR est: Network generated audio egs: Ground Truth ''' length = est.numpy().shape[0] sdr, _, _, _ = bss_eval_sources(egs.numpy()[:length], est.numpy()[:length]) mix_sdr, _, _, _ = bss_eval_sources(egs.numpy()[:length], mix.numpy()[:length]) return float(sdr-mix_sdr)
def separation_metrics(pred_left, pred_right, gt_left, gt_right, mix): if audio_empty(gt_left) or audio_empty(gt_right) or audio_empty( pred_right) or audio_empty(pred_left) or audio_empty(mix): print("----------- Empty -----------") return None sdr, sir, sar, _ = bss_eval_sources(np.asarray([gt_left, gt_right]), np.asarray([pred_left, pred_right]), False) sdr_mix, _, _, _ = bss_eval_sources(np.asarray([gt_left, gt_right]), np.asarray([mix, mix]), False) return sdr.mean(), sir.mean(), sar.mean(), sdr_mix.mean()
def cal_SDR_improve(clean, direct, enhance): """ calculate a SDR1: direct to clean calculate a SDR2: enhance to clean return :SDR2 - SDR1 (improvement) """ import sys sys.path.append('/home/cjf/workspace/201903_dereverLocEnhance/mir_eval_master/') from mir_eval import separation as sep SDR1, SIR1, SAR1, perm1 = sep.bss_eval_sources(clean, enhance, False) SDR2, SIR2, SAR2, perm2 = sep.bss_eval_sources(clean, direct, False) return SDR1 - SDR2
def bss_eval(mixed_wav, src1_wav, src2_wav, pred_src1_wav, pred_src2_wav): len = pred_src1_wav.shape[0] src1_wav = src1_wav[:len] src2_wav = src2_wav[:len] mixed_wav = mixed_wav[:len] sdr, sir, sar, _ = bss_eval_sources(np.array([src1_wav, src2_wav]), np.array( [pred_src1_wav, pred_src2_wav]), compute_permutation=True) sdr_mixed, _, _, _ = bss_eval_sources(np.array([src1_wav, src2_wav]), np.array([mixed_wav, mixed_wav]), compute_permutation=True) nsdr = sdr - sdr_mixed return nsdr, sir, sar, len
def cal_sdri(src_ref, src_est, mix): """Calculate Source-to-Distortion Ratio improvement (SDRi). NOTE: bss_eval_sources is very very slow. Args: src_ref: numpy.ndarray, [C, T] src_est: numpy.ndarray, [C, T], reordered by best PIT permutation mix: numpy.ndarray, [T] Returns: average_SDRi """ src_anchor = np.stack([mix, mix], axis=0) sdr, sir, sar, popt = bss_eval_sources(src_ref, src_est) sdr0, sir0, sar0, popt0 = bss_eval_sources(src_ref, src_anchor) avg_sdri = ((sdr[0] - sdr0[0]) + (sdr[1] - sdr0[1])) / 2 return avg_sdri
def sdr_sir_sar(gt_audio, sep_audio, sed_y, inside_only): """gt_audio, sep_audio.shape: (n_channels, n_samples) """ if inside_only: n_step = cfg.n_step active_locts = np.where(sed_y==1)[0] onset = int(round(active_locts[0] * n_step)) offset = int(round((active_locts[-1] + 1) * n_step)) in_gt_audio = gt_audio[:, onset : offset] in_sep_audio = sep_audio[:, onset : offset] (sdr, sir, sar, perm) = bss_eval_sources(in_gt_audio, in_sep_audio, compute_permutation=False) return sdr, sir, sar else: (sdr, sir, sar, perm) = bss_eval_sources(gt_audio, sep_audio, compute_permutation=False) return sdr, sir, sar
def validate(audio, model, embedder, testloader, writer, epoch): model.eval() criterion = nn.MSELoss() with torch.no_grad(): for i, batch in enumerate(testloader): dvec_mel, target_wav, mixed_wav, target_mag, mixed_mag, mixed_phase = batch[ 0] dvec_mel = dvec_mel.cuda() target_mag = target_mag.unsqueeze(0).cuda() mixed_mag = mixed_mag.unsqueeze(0).cuda() dvec = embedder(dvec_mel) dvec = dvec.unsqueeze(0) est_mask = model(mixed_mag, dvec) est_mag = est_mask * mixed_mag test_loss = criterion(est_mag, target_mag).item() mixed_mag = mixed_mag[0].cpu().detach().numpy() target_mag = target_mag[0].cpu().detach().numpy() est_mag = est_mag[0].cpu().detach().numpy() est_wav = audio.spec2wav(est_mag, mixed_phase) est_mask = est_mask[0].cpu().detach().numpy() test_sdr_avg = bss_eval_sources(target_wav, est_wav, False)[0][0] test_loss_avg = test_loss writer.log_evaluation_data(mixed_wav, target_wav, est_wav, mixed_mag.T, target_mag.T, est_mag.T, est_mask.T, (epoch - 1), i) writer.log_evaluation_avg(test_loss_avg, test_sdr_avg, (epoch - 1)) break
def test(epoch): # testing data model.eval() start_time = time.time() with torch.no_grad(): avesdr = 0 numSongs = 0 sdrmedian = np.zeros(50) for iloader, xtrain, ytrain in loadtest: iloader=iloader.item() listofpred0 = [] cnt,aveloss=0,0 for ind in range(0, xtrain.shape[-1] - sampleSize, sampleSize): if (xtrain[0, 0, ind:ind + sampleSize].shape[0] < (sampleSize)): break output = model(xtrain[:, :,ind:ind + sampleSize].to(device)) listofpred0.append(output.reshape(-1).cpu().numpy()) loss = criterion(output, (ytrain[:, :,ind:ind + sampleSize].to(device))) cnt+=1 aveloss += float(loss) aveloss /= cnt print('loss for test:{},num{},epoch{}'.format(aveloss, iloader,epoch)) ans0 = mu_law_decode(np.concatenate(listofpred0)) if(iloader >= 150): sdr = bss_eval_sources(mu_law_decode(ytrain[0,0,:ans0.shape[0]].cpu().numpy()), ans0) avesdr += sdr[0][0] sdrmedian[iloader-150] = sdr[0][0] #print('each ele of median',sdrmedian[iloader-150],iloader-150) numSongs += 1 if(iloader > 160):continue if not os.path.exists('vsCorpus/'): os.makedirs('vsCorpus/') sf.write(savemusic.format(iloader), ans0, sample_rate) print('test stored done', np.round(time.time() - start_time)) print('sdr mean:', avesdr / numSongs) print('sdr median:', np.median(sdrmedian))
def validate(audio, model, embedder, testloader, writer, step): model.eval() criterion = nn.MSELoss() with torch.no_grad(): for batch in testloader: dvec_mel, target_wav, mixed_wav, target_mag, mixed_mag, mixed_phase = batch[ 0] dvec_mel = dvec_mel.cuda() target_mag = target_mag.unsqueeze(0).cuda() mixed_mag = mixed_mag.unsqueeze(0).cuda() dvec = embedder(dvec_mel) dvec = dvec.unsqueeze(0) est_mask = model(mixed_mag, dvec) est_mag = est_mask * mixed_mag test_loss = criterion(target_mag, est_mag).item() mixed_mag = mixed_mag[0].cpu().detach().numpy() target_mag = target_mag[0].cpu().detach().numpy() est_mag = est_mag[0].cpu().detach().numpy() est_wav = audio.spec2wav(est_mag, mixed_phase) est_mask = est_mask[0].cpu().detach().numpy() sdr = bss_eval_sources(target_wav, est_wav, False)[0][0] writer.log_evaluation(test_loss, sdr, mixed_wav, target_wav, est_wav, mixed_mag.T, target_mag.T, est_mag.T, est_mask.T, step) break model.train()
def test_preprocessed_data(net_type): from pystoi import stoi import pesq from mir_eval.separation import bss_eval_sources path = 'preprocessed_test_data_' + net_type + '/' if os.path.isdir(path): files = [f for f in os.listdir(path) if f.endswith('.npy')] sdr_a = [] pesq_a = [] stoi_a = [] processed = 0 for i, f in enumerate(files): signals = np.load(path + f) clean_speech = signals[:,0] recovered_speech = signals[:,1] if np.any(clean_speech) and np.any(recovered_speech): PESQ = pesq.pesq(dsp.audio_fs, clean_speech, recovered_speech, 'wb') STOI = stoi(clean_speech, recovered_speech, dsp.audio_fs, extended=False) SDR, sir, sar, perm = bss_eval_sources(clean_speech, recovered_speech) sdr_a.append(SDR[0]) pesq_a.append(PESQ) stoi_a.append(STOI) processed += 1 if i < len(files)-1: print('[Metric computation: {}% complete]'.format(100.0*(i+1)/len(files)), end='\r') else: print('[Metric computation: {}% complete]'.format(100.0*(i+1)/len(files)), end='\n') metrics = np.array([sdr_a, pesq_a, stoi_a]).T np.save(net_type + '_metrics.npy', metrics) print("Finished pre-processed testing of net '{}', {} files out of {} were processed into {}_metrics.npy".format(net_type, processed, len(files), net_type)) else: print("Error: Preprocessed data for the model not found")
def run(args): sep_reader = AudioReader(args.sep_scp) ref_reader = AudioReader(args.ref_scp) utt_snr = open(args.per_utt, "w") if args.per_utt else None utt_ali = open(args.utt_ali, "w") if args.utt_ali else None reporter = Report(args.spk2class) # sep: N x S for key, sep in sep_reader: # ref: N x S ref = ref_reader[key] # keep same shape nsamps = min(sep.shape[-1], ref.shape[-1]) sdr, _, _, ali = bss_eval_sources(ref[:, :nsamps], sep[:, :nsamps]) sdr = np.mean(sdr) reporter.add(key, sdr) if utt_snr: utt_snr.write("{}\t{:.2f}\n".format(key, sdr)) if utt_ali: ali_str = " ".join(map(str, ali)) utt_ali.write(f"{key}\t{ali_str}\n") reporter.report() if utt_snr: utt_snr.close() if utt_ali: utt_ali.close()
def sdr_batch_eval(target_sources, noisy_sources, estimated_sources, sample_rate=16e3, step_size=10, sequence_lengths=None): sdr_list = [] sir_list = [] sar_list = [] n_samples_frame = int(step_size / 1e3 * sample_rate) for i, (target, noisy, estimated) in enumerate( zip(target_sources, noisy_sources, estimated_sources)): if sequence_lengths is not None: target = target[:sequence_lengths[i] * n_samples_frame] noisy = noisy[:len(target)] estimated = estimated[:len(target)] # Skip evaluation if estimated sources is all-zero vector if np.any(estimated): ref_sources = np.vstack([target, noisy]) est_sources = np.vstack([estimated, np.ones_like(estimated)]) sdr, sir, sar, _ = bss_eval_sources(ref_sources, est_sources, compute_permutation=False) sdr_list.append(sdr[0]) sir_list.append(sir[0]) sar_list.append(sar[0]) return np.array(sdr_list), np.array(sir_list), np.array(sar_list)
def audio_to_bsseval(s1hats, s2hats, s1s, s2s): bss_evals = [] bss_evals_paris = [] for i, (s1hat, s2hat, s1, s2) in enumerate(zip(s1hats, s2hats, s1s, s2s)): print('Computing bssevals for mixture {}'.format(i)) sourcehat_mat = np.concatenate( [s1hat.reshape(1, -1), s2hat.reshape(1, -1)], 0) source_mat = np.concatenate([s1.reshape(1, -1), s2.reshape(1, -1)], 0) Nhat, N = sourcehat_mat.shape[1], source_mat.shape[1] Nmin = min([N, Nhat]) bss_evals.append( mevalsep.bss_eval_sources(source_mat[:, :Nmin], sourcehat_mat[:, :Nmin])) bss_evals_paris.append([ tu.bss_eval(sourcehat_mat[0, :Nmin], 0, source_mat[:, :Nmin]), tu.bss_eval(sourcehat_mat[1, :Nmin], 1, source_mat[:, :Nmin]) ]) print(bss_evals) print(bss_evals_paris) return bss_evals
def sdr_batch_eval_ss(target_source, estimated_source, sample_rate=16e3, step_size=10, sequence_lengths=None): """ Single source version of SDR, SIR and SDR computation """ sdr_list = [] sir_list = [] sar_list = [] n_samples_frame = int(step_size / 1e3 * sample_rate) for i, (target, estimated) in enumerate(zip(target_source, estimated_source)): if sequence_lengths is not None: target = target[:sequence_lengths[i] * n_samples_frame] estimated = estimated[:len(target)] # Skip evaluation if estimated sources is all-zero vector if np.any(estimated): sdr, sir, sar, _ = bss_eval_sources(np.array([target]), np.array([estimated]), compute_permutation=False) sdr_list.append(sdr[0]) sir_list.append(sir[0]) sar_list.append(sar[0]) return np.array(sdr_list), np.array(sir_list), np.array(sar_list)
def convergence_callback(Y, X, n_targets, SDR, SIR, eval_time, ref, framesize, win_s, algo_name): t_in = time.perf_counter() # projection back z = projection_back(Y, X[:, :, 0]) Y = Y * np.conj(z[None, :, :]) from mir_eval.separation import bss_eval_sources if Y.shape[2] == 1: y = pra.transform.synthesis(Y[:, :, 0], framesize, hop, win=win_s)[:, None] else: y = pra.transform.synthesis(Y, framesize, hop, win=win_s) if algo_name not in parameters["overdet_algos"]: new_ord = np.argsort(np.std(y, axis=0))[::-1] y = y[:, new_ord] m = np.minimum(y.shape[0] - hop, ref.shape[1]) synth[:n_targets, :m, 0] = y[hop:m + hop, :n_targets].T synth[n_targets, :m, 0] = y[hop:m + hop, 0] sdr, sir, sar, perm = bss_eval_sources(ref[:n_targets + 1, :m, 0], synth[:, :m, 0]) SDR.append(sdr[:n_targets].tolist()) SIR.append(sir[:n_targets].tolist()) t_out = time.perf_counter() eval_time.append(t_out - t_in)
def convergence_callback(Y, **kwargs): global SDR, SIR, ref t_enter = time.perf_counter() from mir_eval.separation import bss_eval_sources # projection back z = projection_back(Y, X_mics[:, :, 0]) Y = Y.copy() * np.conj(z[None, :, :]) if Y.shape[2] == 1: y = pra.transform.synthesis(Y[:, :, 0], framesize, hop, win=win_s)[:, None] else: y = pra.transform.synthesis(Y, framesize, hop, win=win_s) y = y[framesize - hop :, :].astype(np.float64) if args.algo != "blinkiva": new_ord = np.argsort(np.std(y, axis=0))[::-1] y = y[:, new_ord] m = np.minimum(y.shape[0], ref.shape[1]) sdr, sir, sar, perm = bss_eval_sources(ref[:, :m], y[:m, [0, 0]].T) SDR.append(sdr) SIR.append(sir) t_exit = time.perf_counter() eval_time.append(t_exit - t_enter)
def add_eval_summary(summary_writer, step, before_loss, after_loss, linear_loss, loss, sample_rate, mixed_wav, target_wav, predicted_wav, mixed_linear_img, target_linear_img, predicted_linear_img): sdr = bss_eval_sources(target_wav, predicted_wav, False)[0][0] summary_writer.add_scalar('eval_before_loss', before_loss, step) summary_writer.add_scalar('eval_after_loss', after_loss, step) summary_writer.add_scalar('eval_linear_loss', linear_loss, step) summary_writer.add_scalar('eval_loss', loss, step) summary_writer.add_scalar('SDR', sdr, step) summary_writer.add_audio('mixed_wav', mixed_wav, step, sample_rate) summary_writer.add_audio('target_wav', target_wav, step, sample_rate) summary_writer.add_audio('predicted_wav', predicted_wav, step, sample_rate) summary_writer.add_image('mixed_spectrogram', mixed_linear_img, step, dataformats='HWC') summary_writer.add_image('target_spectrogram', target_linear_img, step, dataformats='HWC') summary_writer.add_image('predicted_spectrogram', predicted_linear_img, step, dataformats='HWC') summary_writer.flush()
def convergence_callback(Y, **kwargs): global SDR, SIR, ref from mir_eval.separation import bss_eval_sources if Y.shape[2] == 1: y = pra.transform.synthesis(Y[:, :, 0], framesize, framesize // 2, win=win_s)[:, None] else: y = pra.transform.synthesis(Y, framesize, framesize // 2, win=win_s) if args.algo != "blinkiva": new_ord = np.argsort(np.std(y, axis=0))[::-1] y = y[:, new_ord] m = np.minimum(y.shape[0] - framesize // 2, ref.shape[1]) sdr, sir, sar, perm = bss_eval_sources( ref[:n_sources_target, :m, 0], y[framesize // 2:m + framesize // 2, :n_sources_target].T, ) SDR.append(sdr) SIR.append(sir)
def convergence_callback(Y): global SDR, SIR from mir_eval.separation import bss_eval_sources ref = np.moveaxis(separate_recordings, 1, 2) y = pra.transform.synthesis(Y, L, L, zp_back=L//2, zp_front=L//2).T sdr, sir, sar, perm = bss_eval_sources(ref[:,:y.shape[1]-L//2,0], y[:,L//2:ref.shape[1]+L//2]) SDR.append(sdr) SIR.append(sir)
def bss_eval_sdr(src1_wav, pred_src1_wav): len_cropped = pred_src1_wav.shape[0] src1_wav = src1_wav[:len_cropped] sdr, _, _, _ = bss_eval_sources(src1_wav, pred_src1_wav, compute_permutation=True) return sdr
def convergence_callback(Y): global SDR, SIR from mir_eval.separation import bss_eval_sources ref = np.moveaxis(separate_recordings, 1, 2) y = np.array([pra.istft(Y[:,:,ch], L, L, transform=np.fft.irfft, zp_front=L//2, zp_back=L//2) for ch in range(Y.shape[2])]) sdr, sir, sar, perm = bss_eval_sources(ref[:,:y.shape[1]-L//2,0], y[:,L//2:ref.shape[1]+L//2]) SDR.append(sdr) SIR.append(sir)
def bss_eval(mixed_wav, src1_wav, src2_wav, pred_src1_wav, pred_src2_wav): import numpy as np from mir_eval.separation import bss_eval_sources n = pred_src1_wav.shape[0] src1_wav = src1_wav[:n] src2_wav = src2_wav[:n] mixed_wav = mixed_wav[:n] sdr, sir, sar, _ = bss_eval_sources(np.array([src1_wav, src2_wav]), np.array( [pred_src1_wav, pred_src2_wav]), compute_permutation=True) sdr_mixed, _, _, _ = bss_eval_sources(np.array([src1_wav, src2_wav]), np.array([mixed_wav, mixed_wav]), compute_permutation=True) # sdr, sir, sar, _ = bss_eval_sources(src2_wav,pred_src2_wav, False) # sdr_mixed, _, _, _ = bss_eval_sources(src2_wav,mixed_wav, False) nsdr = sdr - sdr_mixed return nsdr, sir, sar, n
def evaluate(reference, estimated): estimated = np.vstack([x.audio_data[0, :] for x in estimated]) reference = np.vstack([x.audio_data[0, :] for x in reference]) print estimated.shape[-1], reference.shape[-1] print estimated.shape[-1] - reference.shape[-1] L = min(estimated.shape[-1], reference.shape[-1]) estimated = estimated[:, 0:L] reference = reference[:, 0:L] return bss_eval_sources(reference, estimated, compute_permutation=False)
def cal_SDRi(src_ref, src_est, mix): """Calculate Source-to-Distortion Ratio improvement (SDRi). NOTE: bss_eval_sources is very very slow. Args: src_ref: numpy.ndarray, [C, T] src_est: numpy.ndarray, [C, T], reordered by best PIT permutation mix: numpy.ndarray, [T] Returns: average_SDRi """ # src_anchor = np.stack([mix, mix], axis=0) src_anchor = mix #!!NOTE THAT THIS IS TARGET SPEAKER SEPARATION!! sdr, sir, sar, popt = bss_eval_sources(src_ref, src_est) sdr0, sir0, sar0, popt0 = bss_eval_sources(src_ref, src_anchor) # avg_SDRi = ((sdr[0]-sdr0[0]) + (sdr[1]-sdr0[1])) / 2 avg_SDRi = (sdr - sdr0) #!!NOTE THAT THIS IS TARGET SPEAKER SEPARATION!! # print("SDRi1: {0:.2f}, SDRi2: {1:.2f}".format(sdr[0]-sdr0[0], sdr[1]-sdr0[1])) return avg_SDRi
def calc_sdr(w_real, w_pred): n_real, = w_real.shape n_pred, = w_pred.shape n = min(n_real, n_pred) w_real, w_pred = w_real[:n], w_pred[:n] sdr, _, _, _ = bss_eval_sources(w_real, w_pred, compute_permutation=True) return sdr
def bss_eval_sdr(src_list, pred_src_list): from mir_eval.separation import bss_eval_sources len_cropped = pred_src_list.shape[-1] src_list = src_list[:, :len_cropped] sdr, sir, sar, _ = bss_eval_sources(src_list, pred_src_list, compute_permutation=True) return sdr, sir, sar