def bss_eval_global(wavs_mono, wavs_src1, wavs_src2, wavs_src1_pred, wavs_src2_pred):

    assert len(wavs_mono) == len(wavs_src1) == len(wavs_src2) == len(wavs_src1_pred) == len(wavs_src2_pred)

    num_samples = len(wavs_mono)

    gnsdr = np.zeros(2)
    gsir = np.zeros(2)
    gsar = np.zeros(2)
    frames_total = 0

    for wav_mono, wav_src1, wav_src2, wav_src1_pred, wav_src2_pred in zip(wavs_mono, wavs_src1, wavs_src2, wavs_src1_pred, wavs_src2_pred):
        len_cropped = wav_src1_pred.shape[-1]
        wav_mono_cropped = wav_mono[:len_cropped]
        wav_src1_cropped = wav_src1[:len_cropped]
        wav_src2_cropped = wav_src2[:len_cropped]

        sdr, sir, sar, _ = bss_eval_sources(reference_sources = np.asarray([wav_src1_cropped, wav_src2_cropped]), estimated_sources = np.asarray([wav_src1_pred, wav_src2_pred]), compute_permutation = False)
        sdr_mono, _, _, _ = bss_eval_sources(reference_sources = np.asarray([wav_src1_cropped, wav_src2_cropped]), estimated_sources = np.asarray([wav_mono_cropped, wav_mono_cropped]), compute_permutation = False)

        nsdr = sdr - sdr_mono
        gnsdr += len_cropped * nsdr
        gsir += len_cropped * sir
        gsar += len_cropped * sar
        frames_total += len_cropped

    gnsdr = gnsdr / frames_total
    gsir = gsir / frames_total
    gsar = gsar / frames_total

    return gnsdr, gsir, gsar
def bss_eval_global(mixed_wav, src1_wav, src2_wav, pred_src1_wav,
                    pred_src2_wav, n):
    len_cropped = pred_src1_wav.shape[-1]
    src1_wav = src1_wav[:, :len_cropped]
    src2_wav = src2_wav[:, :len_cropped]
    mixed_wav = mixed_wav[:, :len_cropped]
    gnsdr, gsir, gsar = np.zeros(2), np.zeros(2), np.zeros(2)
    total_len = 0
    for i in range(n):
        if np.sum(np.abs(src1_wav[i])) < 1e-10 or np.sum(np.abs(
                src2_wav[i])) < 1e-10:
            continue
        sdr, sir, sar, _ = bss_eval_sources(
            np.array([src1_wav[i], src2_wav[i]]),
            np.array([pred_src1_wav[i], pred_src2_wav[i]]), False)
        sdr_mixed, _, _, _ = bss_eval_sources(
            np.array([src1_wav[i], src2_wav[i]]),
            np.array([mixed_wav[i], mixed_wav[i]]), False)
        nsdr = sdr - sdr_mixed
        gnsdr += len_cropped * nsdr
        gsir += len_cropped * sir
        gsar += len_cropped * sar
        total_len += len_cropped
    gnsdr = gnsdr / total_len
    gsir = gsir / total_len
    gsar = gsar / total_len
    return gnsdr, gsir, gsar
示例#3
0
def bss_eval_global(wavs_mono, wavs_src1, wavs_src2, wavs_src1_pred, wavs_src2_pred):
    print(len(wavs_mono), len(wavs_src1) , len(wavs_src2) , len(wavs_src1_pred) , len(wavs_src2_pred))
    assert len(wavs_mono) == len(wavs_src1) == len(wavs_src2) == len(wavs_src1_pred) == len(wavs_src2_pred)

    num_samples = len(wavs_mono)

    gnsdr = np.zeros(2)
    gsir = np.zeros(2)
    gsar = np.zeros(2)
    frames_total = 0
    step = 1
    for wav_mono, wav_src1, wav_src2, wav_src1_pred, wav_src2_pred in zip(wavs_mono, wavs_src1, wavs_src2, wavs_src1_pred, wavs_src2_pred):
        len_cropped = wav_src1_pred.shape[-1]
        wav_mono_cropped = wav_mono[:len_cropped]
        wav_src1_cropped = wav_src1[:len_cropped]
        wav_src2_cropped = wav_src2[:len_cropped]

        sdr, sir, sar, _ = bss_eval_sources(reference_sources = np.asarray([wav_src1_cropped, wav_src2_cropped]), estimated_sources = np.asarray([wav_src1_pred, wav_src2_pred]), compute_permutation = False)
        sdr_mono, _, _, _ = bss_eval_sources(reference_sources = np.asarray([wav_src1_cropped, wav_src2_cropped]), estimated_sources = np.asarray([wav_mono_cropped, wav_mono_cropped]), compute_permutation = False)

        nsdr = sdr - sdr_mono
        gnsdr += len_cropped * nsdr
        gsir += len_cropped * sir
        gsar += len_cropped * sar
        frames_total += len_cropped
        print("{}/{}\n".format(step, len(wavs_mono)))
        step += 1

    gnsdr = gnsdr / frames_total
    gsir = gsir / frames_total
    gsar = gsar / frames_total

    return gnsdr, gsir, gsar
def bss_eval_global(mixed_wav, src1_wav, src2_wav, pred_src1_wav,
                    pred_src2_wav):
    len_cropped = pred_src1_wav.shape[-1]
    src1_wav = src1_wav[:, :len_cropped]
    src2_wav = src2_wav[:, :len_cropped]
    mixed_wav = mixed_wav[:, :len_cropped]
    gnsdr = np.zeros(2)
    gsir = np.zeros(2)
    gsar = np.zeros(2)
    total_len = 0
    for i in range(EvalConfig.NUM_EVAL):
        sdr, sir, sar, _ = bss_eval_sources(
            np.array([src1_wav[i], src2_wav[i]]),
            np.array([pred_src1_wav[i], pred_src2_wav[i]]), False)
        sdr_mixed, _, _, _ = bss_eval_sources(
            np.array([src1_wav[i], src2_wav[i]]),
            np.array([mixed_wav[i], mixed_wav[i]]), False)
        nsdr = sdr - sdr_mixed
        gnsdr += len_cropped * nsdr
        gsir += len_cropped * sir
        gsar += len_cropped * sar
        total_len += len_cropped
    gnsdr = gnsdr / total_len
    gsir = gsir / total_len
    gsar = gsar / total_len
    return gnsdr, gsir, gsar
示例#5
0
def SDR(est, egs, mix):
    '''
        calculate SDR
        est: Network generated audio
        egs: Ground Truth
    '''
    sdr, _, _, _ = bss_eval_sources(egs, est)
    mix_sdr, _, _, _ = bss_eval_sources(egs, mix)
    return float(sdr - mix_sdr)
示例#6
0
def SDR(est, egs, mix):
    '''
        calculate SDR
        est: Network generated audio
        egs: Ground Truth
    '''
    length = est.numpy().shape[0]
    sdr, _, _, _ = bss_eval_sources(egs.numpy()[:length], est.numpy()[:length])
    mix_sdr, _, _, _ = bss_eval_sources(egs.numpy()[:length], mix.numpy()[:length])
    return float(sdr-mix_sdr)
示例#7
0
def separation_metrics(pred_left, pred_right, gt_left, gt_right, mix):
    if audio_empty(gt_left) or audio_empty(gt_right) or audio_empty(
            pred_right) or audio_empty(pred_left) or audio_empty(mix):
        print("----------- Empty -----------")
        return None
    sdr, sir, sar, _ = bss_eval_sources(np.asarray([gt_left, gt_right]),
                                        np.asarray([pred_left, pred_right]),
                                        False)
    sdr_mix, _, _, _ = bss_eval_sources(np.asarray([gt_left, gt_right]),
                                        np.asarray([mix, mix]), False)

    return sdr.mean(), sir.mean(), sar.mean(), sdr_mix.mean()
def cal_SDR_improve(clean, direct, enhance):
    """
    calculate a SDR1: direct to clean
    calculate a SDR2: enhance to clean
    return :SDR2 - SDR1 (improvement)
    """
    import sys
    sys.path.append('/home/cjf/workspace/201903_dereverLocEnhance/mir_eval_master/')
    from mir_eval import separation as sep

    SDR1, SIR1, SAR1, perm1 = sep.bss_eval_sources(clean, enhance, False)
    SDR2, SIR2, SAR2, perm2 = sep.bss_eval_sources(clean, direct, False)
    return SDR1 - SDR2
示例#9
0
def bss_eval(mixed_wav, src1_wav, src2_wav, pred_src1_wav, pred_src2_wav):
    len = pred_src1_wav.shape[0]
    src1_wav = src1_wav[:len]
    src2_wav = src2_wav[:len]
    mixed_wav = mixed_wav[:len]
    sdr, sir, sar, _ = bss_eval_sources(np.array([src1_wav, src2_wav]),
                                        np.array(
                                            [pred_src1_wav, pred_src2_wav]),
                                        compute_permutation=True)
    sdr_mixed, _, _, _ = bss_eval_sources(np.array([src1_wav, src2_wav]),
                                          np.array([mixed_wav, mixed_wav]),
                                          compute_permutation=True)
    nsdr = sdr - sdr_mixed
    return nsdr, sir, sar, len
示例#10
0
def cal_sdri(src_ref, src_est, mix):
    """Calculate Source-to-Distortion Ratio improvement (SDRi).
    NOTE: bss_eval_sources is very very slow.
    Args:
        src_ref: numpy.ndarray, [C, T]
        src_est: numpy.ndarray, [C, T], reordered by best PIT permutation
        mix: numpy.ndarray, [T]
    Returns:
        average_SDRi
    """
    src_anchor = np.stack([mix, mix], axis=0)
    sdr, sir, sar, popt = bss_eval_sources(src_ref, src_est)
    sdr0, sir0, sar0, popt0 = bss_eval_sources(src_ref, src_anchor)
    avg_sdri = ((sdr[0] - sdr0[0]) + (sdr[1] - sdr0[1])) / 2
    return avg_sdri
示例#11
0
def sdr_sir_sar(gt_audio, sep_audio, sed_y, inside_only):
    """gt_audio, sep_audio.shape: (n_channels, n_samples)
    """
    if inside_only:
        n_step = cfg.n_step
        active_locts = np.where(sed_y==1)[0]
        onset = int(round(active_locts[0] * n_step))
        offset = int(round((active_locts[-1] + 1) * n_step))
        in_gt_audio = gt_audio[:, onset : offset]
        in_sep_audio = sep_audio[:, onset : offset]
        (sdr, sir, sar, perm) = bss_eval_sources(in_gt_audio, in_sep_audio, compute_permutation=False)
        return sdr, sir, sar
    else:
        (sdr, sir, sar, perm) = bss_eval_sources(gt_audio, sep_audio, compute_permutation=False)
        return sdr, sir, sar
示例#12
0
def validate(audio, model, embedder, testloader, writer, epoch):
    model.eval()
    criterion = nn.MSELoss()
    with torch.no_grad():
        for i, batch in enumerate(testloader):
            dvec_mel, target_wav, mixed_wav, target_mag, mixed_mag, mixed_phase = batch[
                0]

            dvec_mel = dvec_mel.cuda()
            target_mag = target_mag.unsqueeze(0).cuda()
            mixed_mag = mixed_mag.unsqueeze(0).cuda()

            dvec = embedder(dvec_mel)
            dvec = dvec.unsqueeze(0)
            est_mask = model(mixed_mag, dvec)
            est_mag = est_mask * mixed_mag

            test_loss = criterion(est_mag, target_mag).item()

            mixed_mag = mixed_mag[0].cpu().detach().numpy()
            target_mag = target_mag[0].cpu().detach().numpy()
            est_mag = est_mag[0].cpu().detach().numpy()
            est_wav = audio.spec2wav(est_mag, mixed_phase)
            est_mask = est_mask[0].cpu().detach().numpy()

            test_sdr_avg = bss_eval_sources(target_wav, est_wav, False)[0][0]
            test_loss_avg = test_loss
            writer.log_evaluation_data(mixed_wav, target_wav, est_wav,
                                       mixed_mag.T, target_mag.T, est_mag.T,
                                       est_mask.T, (epoch - 1), i)
            writer.log_evaluation_avg(test_loss_avg, test_sdr_avg, (epoch - 1))
            break
示例#13
0
def test(epoch):  # testing data
    model.eval()
    start_time = time.time()
    with torch.no_grad():
        avesdr = 0
        numSongs = 0
        sdrmedian = np.zeros(50)
        for iloader, xtrain, ytrain in loadtest:
            iloader=iloader.item()
            listofpred0 = []
            cnt,aveloss=0,0
            for ind in range(0, xtrain.shape[-1] - sampleSize, sampleSize):
                if (xtrain[0, 0, ind:ind + sampleSize].shape[0] < (sampleSize)): break
                output = model(xtrain[:, :,ind:ind + sampleSize].to(device))
                listofpred0.append(output.reshape(-1).cpu().numpy())
                loss = criterion(output, (ytrain[:, :,ind:ind + sampleSize].to(device)))
                cnt+=1
                aveloss += float(loss)
            aveloss /= cnt
            print('loss for test:{},num{},epoch{}'.format(aveloss, iloader,epoch))
            ans0 = mu_law_decode(np.concatenate(listofpred0))
            if(iloader >= 150):
                sdr = bss_eval_sources(mu_law_decode(ytrain[0,0,:ans0.shape[0]].cpu().numpy()), ans0)
                avesdr += sdr[0][0]
                sdrmedian[iloader-150] = sdr[0][0]
                #print('each ele of median',sdrmedian[iloader-150],iloader-150)
                numSongs += 1
            if(iloader > 160):continue
            if not os.path.exists('vsCorpus/'): os.makedirs('vsCorpus/')
            sf.write(savemusic.format(iloader), ans0, sample_rate)
            print('test stored done', np.round(time.time() - start_time))
        print('sdr mean:', avesdr / numSongs)
        print('sdr median:', np.median(sdrmedian))
示例#14
0
def validate(audio, model, embedder, testloader, writer, step):
    model.eval()

    criterion = nn.MSELoss()
    with torch.no_grad():
        for batch in testloader:
            dvec_mel, target_wav, mixed_wav, target_mag, mixed_mag, mixed_phase = batch[
                0]

            dvec_mel = dvec_mel.cuda()
            target_mag = target_mag.unsqueeze(0).cuda()
            mixed_mag = mixed_mag.unsqueeze(0).cuda()

            dvec = embedder(dvec_mel)
            dvec = dvec.unsqueeze(0)
            est_mask = model(mixed_mag, dvec)
            est_mag = est_mask * mixed_mag
            test_loss = criterion(target_mag, est_mag).item()

            mixed_mag = mixed_mag[0].cpu().detach().numpy()
            target_mag = target_mag[0].cpu().detach().numpy()
            est_mag = est_mag[0].cpu().detach().numpy()
            est_wav = audio.spec2wav(est_mag, mixed_phase)
            est_mask = est_mask[0].cpu().detach().numpy()

            sdr = bss_eval_sources(target_wav, est_wav, False)[0][0]
            writer.log_evaluation(test_loss, sdr, mixed_wav, target_wav,
                                  est_wav, mixed_mag.T, target_mag.T,
                                  est_mag.T, est_mask.T, step)
            break

    model.train()
示例#15
0
def test_preprocessed_data(net_type):
    from pystoi import stoi
    import pesq
    from mir_eval.separation import bss_eval_sources
    path = 'preprocessed_test_data_' + net_type + '/'
    if os.path.isdir(path):
        files = [f for f in os.listdir(path) if f.endswith('.npy')]
        sdr_a = []
        pesq_a = []
        stoi_a = []
        processed = 0
        for i, f in enumerate(files):
            signals = np.load(path + f)
            clean_speech = signals[:,0]
            recovered_speech = signals[:,1]
            if np.any(clean_speech) and np.any(recovered_speech):
                PESQ = pesq.pesq(dsp.audio_fs, clean_speech, recovered_speech, 'wb')
                STOI = stoi(clean_speech, recovered_speech, dsp.audio_fs, extended=False)
                SDR, sir, sar, perm = bss_eval_sources(clean_speech, recovered_speech)
                sdr_a.append(SDR[0])
                pesq_a.append(PESQ)
                stoi_a.append(STOI)
                processed += 1
                if i < len(files)-1:
                    print('[Metric computation: {}% complete]'.format(100.0*(i+1)/len(files)), end='\r')
                else:
                    print('[Metric computation: {}% complete]'.format(100.0*(i+1)/len(files)), end='\n')
        metrics = np.array([sdr_a, pesq_a, stoi_a]).T
        np.save(net_type + '_metrics.npy', metrics)
        print("Finished pre-processed testing of net '{}', {} files out of {} were processed into {}_metrics.npy".format(net_type, processed, len(files), net_type))
    else:
        print("Error: Preprocessed data for the model not found")
示例#16
0
def run(args):

    sep_reader = AudioReader(args.sep_scp)
    ref_reader = AudioReader(args.ref_scp)
    utt_snr = open(args.per_utt, "w") if args.per_utt else None
    utt_ali = open(args.utt_ali, "w") if args.utt_ali else None
    reporter = Report(args.spk2class)
    # sep: N x S
    for key, sep in sep_reader:
        # ref: N x S
        ref = ref_reader[key]
        # keep same shape
        nsamps = min(sep.shape[-1], ref.shape[-1])
        sdr, _, _, ali = bss_eval_sources(ref[:, :nsamps], sep[:, :nsamps])
        sdr = np.mean(sdr)
        reporter.add(key, sdr)
        if utt_snr:
            utt_snr.write("{}\t{:.2f}\n".format(key, sdr))
        if utt_ali:
            ali_str = " ".join(map(str, ali))
            utt_ali.write(f"{key}\t{ali_str}\n")
    reporter.report()
    if utt_snr:
        utt_snr.close()
    if utt_ali:
        utt_ali.close()
def sdr_batch_eval(target_sources,
                   noisy_sources,
                   estimated_sources,
                   sample_rate=16e3,
                   step_size=10,
                   sequence_lengths=None):
    sdr_list = []
    sir_list = []
    sar_list = []

    n_samples_frame = int(step_size / 1e3 * sample_rate)
    for i, (target, noisy, estimated) in enumerate(
            zip(target_sources, noisy_sources, estimated_sources)):
        if sequence_lengths is not None:
            target = target[:sequence_lengths[i] * n_samples_frame]
            noisy = noisy[:len(target)]
            estimated = estimated[:len(target)]

        # Skip evaluation if estimated sources is all-zero vector
        if np.any(estimated):
            ref_sources = np.vstack([target, noisy])
            est_sources = np.vstack([estimated, np.ones_like(estimated)])
            sdr, sir, sar, _ = bss_eval_sources(ref_sources,
                                                est_sources,
                                                compute_permutation=False)
            sdr_list.append(sdr[0])
            sir_list.append(sir[0])
            sar_list.append(sar[0])

    return np.array(sdr_list), np.array(sir_list), np.array(sar_list)
示例#18
0
def audio_to_bsseval(s1hats, s2hats, s1s, s2s):
    bss_evals = []
    bss_evals_paris = []
    for i, (s1hat, s2hat, s1, s2) in enumerate(zip(s1hats, s2hats, s1s, s2s)):

        print('Computing bssevals for mixture {}'.format(i))

        sourcehat_mat = np.concatenate(
            [s1hat.reshape(1, -1), s2hat.reshape(1, -1)], 0)
        source_mat = np.concatenate([s1.reshape(1, -1), s2.reshape(1, -1)], 0)

        Nhat, N = sourcehat_mat.shape[1], source_mat.shape[1]
        Nmin = min([N, Nhat])

        bss_evals.append(
            mevalsep.bss_eval_sources(source_mat[:, :Nmin],
                                      sourcehat_mat[:, :Nmin]))
        bss_evals_paris.append([
            tu.bss_eval(sourcehat_mat[0, :Nmin], 0, source_mat[:, :Nmin]),
            tu.bss_eval(sourcehat_mat[1, :Nmin], 1, source_mat[:, :Nmin])
        ])
        print(bss_evals)
        print(bss_evals_paris)

    return bss_evals
def sdr_batch_eval_ss(target_source,
                      estimated_source,
                      sample_rate=16e3,
                      step_size=10,
                      sequence_lengths=None):
    """
    Single source version of SDR, SIR and SDR computation
    """
    sdr_list = []
    sir_list = []
    sar_list = []

    n_samples_frame = int(step_size / 1e3 * sample_rate)
    for i, (target,
            estimated) in enumerate(zip(target_source, estimated_source)):
        if sequence_lengths is not None:
            target = target[:sequence_lengths[i] * n_samples_frame]
            estimated = estimated[:len(target)]

        # Skip evaluation if estimated sources is all-zero vector
        if np.any(estimated):
            sdr, sir, sar, _ = bss_eval_sources(np.array([target]),
                                                np.array([estimated]),
                                                compute_permutation=False)
            sdr_list.append(sdr[0])
            sir_list.append(sir[0])
            sar_list.append(sar[0])

    return np.array(sdr_list), np.array(sir_list), np.array(sar_list)
    def convergence_callback(Y, X, n_targets, SDR, SIR, eval_time, ref,
                             framesize, win_s, algo_name):
        t_in = time.perf_counter()

        # projection back
        z = projection_back(Y, X[:, :, 0])
        Y = Y * np.conj(z[None, :, :])

        from mir_eval.separation import bss_eval_sources

        if Y.shape[2] == 1:
            y = pra.transform.synthesis(Y[:, :, 0], framesize, hop,
                                        win=win_s)[:, None]
        else:
            y = pra.transform.synthesis(Y, framesize, hop, win=win_s)

        if algo_name not in parameters["overdet_algos"]:
            new_ord = np.argsort(np.std(y, axis=0))[::-1]
            y = y[:, new_ord]

        m = np.minimum(y.shape[0] - hop, ref.shape[1])

        synth[:n_targets, :m, 0] = y[hop:m + hop, :n_targets].T
        synth[n_targets, :m, 0] = y[hop:m + hop, 0]

        sdr, sir, sar, perm = bss_eval_sources(ref[:n_targets + 1, :m, 0],
                                               synth[:, :m, 0])
        SDR.append(sdr[:n_targets].tolist())
        SIR.append(sir[:n_targets].tolist())

        t_out = time.perf_counter()
        eval_time.append(t_out - t_in)
示例#21
0
    def convergence_callback(Y, **kwargs):
        global SDR, SIR, ref

        t_enter = time.perf_counter()

        from mir_eval.separation import bss_eval_sources

        # projection back
        z = projection_back(Y, X_mics[:, :, 0])
        Y = Y.copy() * np.conj(z[None, :, :])

        if Y.shape[2] == 1:
            y = pra.transform.synthesis(Y[:, :, 0], framesize, hop, win=win_s)[:, None]
        else:
            y = pra.transform.synthesis(Y, framesize, hop, win=win_s)
        y = y[framesize - hop :, :].astype(np.float64)

        if args.algo != "blinkiva":
            new_ord = np.argsort(np.std(y, axis=0))[::-1]
            y = y[:, new_ord]

        m = np.minimum(y.shape[0], ref.shape[1])
        sdr, sir, sar, perm = bss_eval_sources(ref[:, :m], y[:m, [0, 0]].T)
        SDR.append(sdr)
        SIR.append(sir)

        t_exit = time.perf_counter()
        eval_time.append(t_exit - t_enter)
示例#22
0
def add_eval_summary(summary_writer, step, before_loss, after_loss,
                     linear_loss, loss, sample_rate, mixed_wav, target_wav,
                     predicted_wav, mixed_linear_img, target_linear_img,
                     predicted_linear_img):
    sdr = bss_eval_sources(target_wav, predicted_wav, False)[0][0]

    summary_writer.add_scalar('eval_before_loss', before_loss, step)
    summary_writer.add_scalar('eval_after_loss', after_loss, step)
    summary_writer.add_scalar('eval_linear_loss', linear_loss, step)
    summary_writer.add_scalar('eval_loss', loss, step)
    summary_writer.add_scalar('SDR', sdr, step)

    summary_writer.add_audio('mixed_wav', mixed_wav, step, sample_rate)
    summary_writer.add_audio('target_wav', target_wav, step, sample_rate)
    summary_writer.add_audio('predicted_wav', predicted_wav, step, sample_rate)

    summary_writer.add_image('mixed_spectrogram',
                             mixed_linear_img,
                             step,
                             dataformats='HWC')
    summary_writer.add_image('target_spectrogram',
                             target_linear_img,
                             step,
                             dataformats='HWC')
    summary_writer.add_image('predicted_spectrogram',
                             predicted_linear_img,
                             step,
                             dataformats='HWC')
    summary_writer.flush()
示例#23
0
    def convergence_callback(Y, **kwargs):
        global SDR, SIR, ref
        from mir_eval.separation import bss_eval_sources

        if Y.shape[2] == 1:
            y = pra.transform.synthesis(Y[:, :, 0],
                                        framesize,
                                        framesize // 2,
                                        win=win_s)[:, None]
        else:
            y = pra.transform.synthesis(Y,
                                        framesize,
                                        framesize // 2,
                                        win=win_s)

        if args.algo != "blinkiva":
            new_ord = np.argsort(np.std(y, axis=0))[::-1]
            y = y[:, new_ord]

        m = np.minimum(y.shape[0] - framesize // 2, ref.shape[1])
        sdr, sir, sar, perm = bss_eval_sources(
            ref[:n_sources_target, :m, 0],
            y[framesize // 2:m + framesize // 2, :n_sources_target].T,
        )
        SDR.append(sdr)
        SIR.append(sir)
示例#24
0
 def convergence_callback(Y):
     global SDR, SIR
     from mir_eval.separation import bss_eval_sources
     ref = np.moveaxis(separate_recordings, 1, 2)
     y = pra.transform.synthesis(Y, L, L, zp_back=L//2, zp_front=L//2).T
     sdr, sir, sar, perm = bss_eval_sources(ref[:,:y.shape[1]-L//2,0], y[:,L//2:ref.shape[1]+L//2])
     SDR.append(sdr)
     SIR.append(sir)
示例#25
0
def bss_eval_sdr(src1_wav, pred_src1_wav):
    len_cropped = pred_src1_wav.shape[0]
    src1_wav = src1_wav[:len_cropped]

    sdr, _, _, _ = bss_eval_sources(src1_wav,
                                    pred_src1_wav,
                                    compute_permutation=True)
    return sdr
示例#26
0
 def convergence_callback(Y):
     global SDR, SIR
     from mir_eval.separation import bss_eval_sources
     ref = np.moveaxis(separate_recordings, 1, 2)
     y = np.array([pra.istft(Y[:,:,ch], L, L, transform=np.fft.irfft, zp_front=L//2, zp_back=L//2) for ch in range(Y.shape[2])])
     sdr, sir, sar, perm = bss_eval_sources(ref[:,:y.shape[1]-L//2,0], y[:,L//2:ref.shape[1]+L//2])
     SDR.append(sdr)
     SIR.append(sir)
示例#27
0
def bss_eval(mixed_wav, src1_wav, src2_wav, pred_src1_wav, pred_src2_wav):
    import numpy as np
    from mir_eval.separation import bss_eval_sources
    n = pred_src1_wav.shape[0]
    src1_wav = src1_wav[:n]
    src2_wav = src2_wav[:n]
    mixed_wav = mixed_wav[:n]
    sdr, sir, sar, _ = bss_eval_sources(np.array([src1_wav, src2_wav]),
                                        np.array(
                                            [pred_src1_wav, pred_src2_wav]),
                                        compute_permutation=True)
    sdr_mixed, _, _, _ = bss_eval_sources(np.array([src1_wav, src2_wav]),
                                          np.array([mixed_wav, mixed_wav]),
                                          compute_permutation=True)
    # sdr, sir, sar, _ = bss_eval_sources(src2_wav,pred_src2_wav, False)
    # sdr_mixed, _, _, _ = bss_eval_sources(src2_wav,mixed_wav, False)
    nsdr = sdr - sdr_mixed
    return nsdr, sir, sar, n
示例#28
0
def evaluate(reference, estimated):
    estimated = np.vstack([x.audio_data[0, :] for x in estimated])
    reference = np.vstack([x.audio_data[0, :] for x in reference])
    print estimated.shape[-1], reference.shape[-1]
    print estimated.shape[-1] - reference.shape[-1]
    L = min(estimated.shape[-1], reference.shape[-1])
    estimated = estimated[:, 0:L]
    reference = reference[:, 0:L]
    return bss_eval_sources(reference, estimated, compute_permutation=False)
示例#29
0
def cal_SDRi(src_ref, src_est, mix):
    """Calculate Source-to-Distortion Ratio improvement (SDRi).
    NOTE: bss_eval_sources is very very slow.
    Args:
        src_ref: numpy.ndarray, [C, T]
        src_est: numpy.ndarray, [C, T], reordered by best PIT permutation
        mix: numpy.ndarray, [T]
    Returns:
        average_SDRi
    """
    # src_anchor = np.stack([mix, mix], axis=0)
    src_anchor = mix  #!!NOTE THAT THIS IS TARGET SPEAKER SEPARATION!!
    sdr, sir, sar, popt = bss_eval_sources(src_ref, src_est)
    sdr0, sir0, sar0, popt0 = bss_eval_sources(src_ref, src_anchor)
    # avg_SDRi = ((sdr[0]-sdr0[0]) + (sdr[1]-sdr0[1])) / 2
    avg_SDRi = (sdr - sdr0)  #!!NOTE THAT THIS IS TARGET SPEAKER SEPARATION!!
    # print("SDRi1: {0:.2f}, SDRi2: {1:.2f}".format(sdr[0]-sdr0[0], sdr[1]-sdr0[1]))
    return avg_SDRi
示例#30
0
def calc_sdr(w_real, w_pred):
    n_real, = w_real.shape
    n_pred, = w_pred.shape
    n = min(n_real, n_pred)

    w_real, w_pred = w_real[:n], w_pred[:n]

    sdr, _, _, _ = bss_eval_sources(w_real, w_pred, compute_permutation=True)
    return sdr
示例#31
0
def bss_eval_sdr(src_list, pred_src_list):
    from mir_eval.separation import bss_eval_sources
    len_cropped = pred_src_list.shape[-1]
    src_list = src_list[:, :len_cropped]

    sdr, sir, sar, _ = bss_eval_sources(src_list,
                                        pred_src_list,
                                        compute_permutation=True)
    return sdr, sir, sar