def get_metrics(truth, estimates):

    #Compute the SDR by bss_eval from museval library ver.4
    truth = truth[np.newaxis, :, np.newaxis]
    estimates = estimates[np.newaxis, :, np.newaxis]
    sdr, isr, sir, sar, perm = bss_eval_images(truth, estimates)
    #The function 'bss_eval_sources' is NOT recommended by documentation
    #[Ref] J. Le Roux et.al., "SDR-half-baked or well done?" (2018)
    #[URL] https://arxiv.org/pdf/1811.02508.pdf
    #sdr, sir, sar, perm = bss_eval_sources(truth, estimates)

    return sdr[0, 0], isr[0, 0], sir[0, 0], sar[0, 0], perm[0, 0]
Пример #2
0
Файл: sse.py Проект: yt752/aps
def permute_metric(
        name: str,
        ref: np.ndarray,
        est: np.ndarray,
        compute_permutation: bool = False,
        fs: Optional[int] = None) -> Union[float, Tuple[float, list]]:
    """
    Computation of SiSNR/PESQ/STOI in permutation/non-permutation mode
    Args:
        name: metric name
        ref: array, reference signal (N x S or S, ground truth)
        est: array, enhanced/separated signal (N x S or S)
        compute_permutation: return permutation order or not
        fs: sample rate of the audio
    """
    if name == "sisnr":
        return _permute_eval(aps_sisnr,
                             ref,
                             est,
                             compute_permutation=compute_permutation,
                             fs=fs)
    elif name == "pesq":
        return _permute_eval(aps_pesq,
                             ref,
                             est,
                             compute_permutation=compute_permutation,
                             fs=fs)
    elif name == "stoi":
        return _permute_eval(aps_stoi,
                             ref,
                             est,
                             compute_permutation=compute_permutation,
                             fs=fs)
    elif name == "sdr":
        if ref.ndim == 1:
            ref, est = ref[None, :], est[None, :]
        sdr, _, _, _, ali = bss_eval_images(ref[..., None],
                                            est[..., None],
                                            compute_permutation=True)
        if compute_permutation:
            return sdr.mean(), ali[:, 0].tolist()
        else:
            return sdr[0, 0]
    else:
        raise ValueError(f"Unknown name of the metric: {name}")
Пример #3
0
 
 #Call my function for audio pre-processing
 data, Fs = pre_processing(data, Fs, down_sam)
 
 #Call my function for getting STFT (complex STFT amplitude)
 print("Original sound")
 sf.write("./result/Original_sound.wav", data, Fs)
 Y, Fs, freqs, times = get_STFT(data, Fs, frame_length, frame_shift)
 
 #Call my function for updating NMF basements and weights
 H, U, P, loss = get_complexNMF(Y, num_iter, num_base, loss_func, spa_order)
 
 #Call my function for getting inverse STFT
 X = get_rec(H, U, P)
 rec_wav, Fs = get_invSTFT(X, Fs, frame_length, frame_shift)
 rec_wav = rec_wav[: int(data.shape[0])] #inverse stft includes residual part due to zero padding
 print("Reconstructed sound")
 sf.write("./result/Reconstructed_sound.wav", rec_wav, Fs)
 
 #Call my function for decomposing basements
 get_decompose(H, U, P, num_base, Fs, frame_length, frame_shift)
 
 #Compute the SDR by bss_eval from museval library ver.4
 data = data[np.newaxis, :, np.newaxis]
 rec_wav = rec_wav[np.newaxis, :, np.newaxis]
 sdr, isr, sir, sar, perm = bss_eval_images(data, rec_wav)
 #sdr, sir, sar, perm = bss_eval_sources(truth, data) #Not recommended by documentation
 print("SDR: {:.3f} [dB]".format(sdr[0, 0]))
 
 #Call my function for displaying graph
 display_graph(Y, X, times, freqs, loss_func, num_iter)
Пример #4
0
 #print("The weight vector: {}".format(np.sort(E_w)[::-1]))
 X = E_h[:, valid_M] @ (E_w[valid_M, np.newaxis] * E_u[valid_M, :])
 
 #In the case of power spectrogram
 if spec_type == "pow":
     Y = np.sqrt(Y)
     X = np.sqrt(X)
 
 #Phase recovery
 Y = Y * np.exp(1j*arg)
 X = X * np.exp(1j*arg)
 
 #Call my function for getting inverse STFT
 original_wav, Fs = get_invSTFT(Y, Fs, frame_length, frame_shift)
 original_wav = original_wav[: int(data.shape[0])] #inverse stft includes residual part due to zero padding
 sf.write("./log/original.wav", original_wav, Fs)
 
 #Call my function for getting inverse STFT
 rec_wav, Fs = get_invSTFT(X, Fs, frame_length, frame_shift)
 rec_wav = rec_wav[: int(data.shape[0])] #inverse stft includes residual part due to zero padding
 sf.write("./log/approximated.wav", rec_wav, Fs)
 
 #Compute the SDR by bss_eval from museval library ver.4
 original_wav = original_wav[np.newaxis, :, np.newaxis]
 rec_wav = rec_wav[np.newaxis, :, np.newaxis]
 sdr, isr, sir, sar, perm = bss_eval_images(original_wav, rec_wav)
 #sdr, sir, sar, perm = bss_eval_sources(truth, data) #Not recommended by documentation
 print("SDR: {:.3f} [dB]".format(sdr[0, 0]))
 
 #Call my function for displaying graph
 display_graph(Y, X, times, freqs, bound, M_list)