def get_metrics(truth, estimates): #Compute the SDR by bss_eval from museval library ver.4 truth = truth[np.newaxis, :, np.newaxis] estimates = estimates[np.newaxis, :, np.newaxis] sdr, isr, sir, sar, perm = bss_eval_images(truth, estimates) #The function 'bss_eval_sources' is NOT recommended by documentation #[Ref] J. Le Roux et.al., "SDR-half-baked or well done?" (2018) #[URL] https://arxiv.org/pdf/1811.02508.pdf #sdr, sir, sar, perm = bss_eval_sources(truth, estimates) return sdr[0, 0], isr[0, 0], sir[0, 0], sar[0, 0], perm[0, 0]
def permute_metric( name: str, ref: np.ndarray, est: np.ndarray, compute_permutation: bool = False, fs: Optional[int] = None) -> Union[float, Tuple[float, list]]: """ Computation of SiSNR/PESQ/STOI in permutation/non-permutation mode Args: name: metric name ref: array, reference signal (N x S or S, ground truth) est: array, enhanced/separated signal (N x S or S) compute_permutation: return permutation order or not fs: sample rate of the audio """ if name == "sisnr": return _permute_eval(aps_sisnr, ref, est, compute_permutation=compute_permutation, fs=fs) elif name == "pesq": return _permute_eval(aps_pesq, ref, est, compute_permutation=compute_permutation, fs=fs) elif name == "stoi": return _permute_eval(aps_stoi, ref, est, compute_permutation=compute_permutation, fs=fs) elif name == "sdr": if ref.ndim == 1: ref, est = ref[None, :], est[None, :] sdr, _, _, _, ali = bss_eval_images(ref[..., None], est[..., None], compute_permutation=True) if compute_permutation: return sdr.mean(), ali[:, 0].tolist() else: return sdr[0, 0] else: raise ValueError(f"Unknown name of the metric: {name}")
#Call my function for audio pre-processing data, Fs = pre_processing(data, Fs, down_sam) #Call my function for getting STFT (complex STFT amplitude) print("Original sound") sf.write("./result/Original_sound.wav", data, Fs) Y, Fs, freqs, times = get_STFT(data, Fs, frame_length, frame_shift) #Call my function for updating NMF basements and weights H, U, P, loss = get_complexNMF(Y, num_iter, num_base, loss_func, spa_order) #Call my function for getting inverse STFT X = get_rec(H, U, P) rec_wav, Fs = get_invSTFT(X, Fs, frame_length, frame_shift) rec_wav = rec_wav[: int(data.shape[0])] #inverse stft includes residual part due to zero padding print("Reconstructed sound") sf.write("./result/Reconstructed_sound.wav", rec_wav, Fs) #Call my function for decomposing basements get_decompose(H, U, P, num_base, Fs, frame_length, frame_shift) #Compute the SDR by bss_eval from museval library ver.4 data = data[np.newaxis, :, np.newaxis] rec_wav = rec_wav[np.newaxis, :, np.newaxis] sdr, isr, sir, sar, perm = bss_eval_images(data, rec_wav) #sdr, sir, sar, perm = bss_eval_sources(truth, data) #Not recommended by documentation print("SDR: {:.3f} [dB]".format(sdr[0, 0])) #Call my function for displaying graph display_graph(Y, X, times, freqs, loss_func, num_iter)
#print("The weight vector: {}".format(np.sort(E_w)[::-1])) X = E_h[:, valid_M] @ (E_w[valid_M, np.newaxis] * E_u[valid_M, :]) #In the case of power spectrogram if spec_type == "pow": Y = np.sqrt(Y) X = np.sqrt(X) #Phase recovery Y = Y * np.exp(1j*arg) X = X * np.exp(1j*arg) #Call my function for getting inverse STFT original_wav, Fs = get_invSTFT(Y, Fs, frame_length, frame_shift) original_wav = original_wav[: int(data.shape[0])] #inverse stft includes residual part due to zero padding sf.write("./log/original.wav", original_wav, Fs) #Call my function for getting inverse STFT rec_wav, Fs = get_invSTFT(X, Fs, frame_length, frame_shift) rec_wav = rec_wav[: int(data.shape[0])] #inverse stft includes residual part due to zero padding sf.write("./log/approximated.wav", rec_wav, Fs) #Compute the SDR by bss_eval from museval library ver.4 original_wav = original_wav[np.newaxis, :, np.newaxis] rec_wav = rec_wav[np.newaxis, :, np.newaxis] sdr, isr, sir, sar, perm = bss_eval_images(original_wav, rec_wav) #sdr, sir, sar, perm = bss_eval_sources(truth, data) #Not recommended by documentation print("SDR: {:.3f} [dB]".format(sdr[0, 0])) #Call my function for displaying graph display_graph(Y, X, times, freqs, bound, M_list)