def eval(ref_name, enh_name, nsy_name, results): try: utt_id = ref_name.split('/')[-1] ref, sr = audioread(ref_name) enh, sr = audioread(enh_name) nsy, sr = audioread(nsy_name) enh_len = enh.shape[0] ref_len = ref.shape[0] if enh_len > ref_len: enh = enh[:ref_len] else: ref = ref[:enh_len] nsy = nsy[:enh_len] ref_score = pesq(ref, nsy, sr) enh_score = pesq(ref, enh, sr) ref_stoi = stoi(ref, nsy, sr, extended=False) enh_stoi = stoi(ref, enh, sr, extended=False) ref_sisdr = si_sdr(nsy, ref) enh_sisdr = si_sdr(enh, ref) ref_sdr = sdr(nsy, ref) enh_sdr = sdr(enh, ref) except Exception as e: print(e) results.append([ utt_id, { 'pesq': [ref_score, enh_score], 'stoi': [ref_stoi, enh_stoi], 'si_sdr': [ref_sisdr, enh_sisdr], 'sdr': [ref_sdr, enh_sdr], } ])
def eval(self, audio_est, audio_ref): x_est, fs_est = sf.read(audio_est) x_ref, fs_ref = sf.read(audio_ref) # align len_x = np.min([len(x_est), len(x_ref)]) x_est = x_est[:len_x] x_ref = x_ref[:len_x] # x_ref = x_ref / np.max(np.abs(x_ref)) if fs_est != fs_ref: raise ValueError( 'Sampling rate is different amon estimated audio and reference audio' ) if self.metric == 'rmse': return compute_rmse(x_est, x_ref) elif self.metric == 'pesq': return pesq(x_ref, x_est, fs_est) elif self.metric == 'stoi': return stoi(x_ref, x_est, fs_est, extended=False) elif self.metric == 'estoi': return stoi(x_ref, x_est, fs_est, extended=True) elif self.metric == 'all': score_rmse = compute_rmse(x_est, x_ref) score_pesq = pesq(x_ref, x_est, fs_est) score_stoi = stoi(x_ref, x_est, fs_est, extended=False) return score_rmse, score_pesq, score_stoi else: raise ValueError( 'Evaluation only support: rmse, pesq, (e)stoi, all')
def calc_true_pesq(self, x, y, s, mask, fs=16000): scores = [] for i in range(x.shape[0]): ind = int(torch.sum(mask[i])) x_i = x[i][:, :ind] y_i = y[i][:, :ind] s_i = s[i][:, :ind] x_i = torch.istft(x_i, n_fft=512, win_length=512, hop_length=128, normalized=True).detach().cpu().numpy() y_i = torch.istft(y_i, n_fft=512, win_length=512, hop_length=128, normalized=True).detach().cpu().numpy() s_i = torch.istft(s_i, n_fft=512, win_length=512, hop_length=128, normalized=True).detach().cpu().numpy() score_x = pesq(s_i, x_i, fs) score_y = pesq(s_i, y_i, fs) score_s = pesq(s_i, s_i, fs) del x_i del y_i del s_i scores.append([score_x, score_y, score_s]) return torch.tensor(scores)
def eval_pesq(predicted_path, noisy_test, clean_test, out_path, img_path='/u/anakuzne/data/snr0_test_img/', fs=16000): ''' Params: predicted: stfts predicted by the model noisy test: stfts of noise mixture clean test: clean unmixed test signals ''' noisy_ref = os.listdir(noisy_test) #.wav clean_ref = collect_paths(clean_test) #.wav predicted = os.listdir(predicted_path) #.npy imag = os.listdir(img_path) #.npy scores_clean_ref = [] scores_noisy_ref = [] print('Calculating PESQ...') for p in tqdm(clean_ref): clean_r, sr = librosa.load(p, mono=True) clean_r = librosa.core.resample(clean_r, sr, 16000) fname = p.split('/')[-2] + '_' + p.split('/')[-1].split( '.')[0] + '.npy' ind = predicted.index(fname) pred = np.load(predicted_path + predicted[ind]) ind = imag.index(fname) imag_num = pad(np.load(img_path + imag[ind]), 1339) pred = pred + imag_num pred = librosa.istft(pred, hop_length=256, win_length=512) pred = pred[:clean_r.shape[0]] ##Compare to clean signal score_clean = pesq(clean_r, pred, fs) scores_clean_ref.append(score_clean) #Compare to degraded signal ind = noisy_ref.index(fname) pred = np.load(predicted_path + predicted[ind]) noisy_r = np.load(noisy_test + noisy_ref[ind]) noisy_r = librosa.istft(noisy_r, hop_length=256, win_length=512) pred = librosa.istft(pred, hop_length=256, win_length=512) pred = pred[:noisy_r.shape[0]] score_noisy = pesq(noisy_r, pred, fs) scores_noisy_ref.append(score_noisy) wav_names = [n.split('/')[-1] for n in clean_ref] data = { 'fname': wav_names, 'PESQ_clean_ref': scores_clean_ref, 'PESQ_noisy_ref': scores_noisy_ref } df = pd.DataFrame(data, columns=['fname', 'PESQ_clean_ref', 'PESQ_noisy_ref']) df.to_csv(out_path + 'PESQ.csv')
def getPESQ(name, clean_speech_dir, noise_num): clean_waves_dir = 'exp/data_for_ac/' + clean_speech_dir clean_list = os.listdir(clean_waves_dir) clean_dir_list = [ os.path.join(clean_waves_dir, clean_file) for clean_file in clean_list ] clean_dir_list.sort() enhanced_waves_dir = 'exp/data_for_ac/' + name + '/enhanced_wav' enhanced_list = os.listdir(enhanced_waves_dir) enhanced_dir_list = [ os.path.join(enhanced_waves_dir, enhanced_file) for enhanced_file in enhanced_list ] enhanced_dir_list.sort() mixed_waves_dir = 'exp/data_for_ac/' + name + '/mixed_wav' mixed_list = os.listdir(mixed_waves_dir) mixed_dir_list = [ os.path.join(mixed_waves_dir, mixed_file) for mixed_file in mixed_list ] mixed_dir_list.sort() clean_dir_list_long = [] for clean_dir in clean_dir_list: for i in range(noise_num): clean_dir_list_long.append(clean_dir) avg_score_raw = 0.0 avg_score_en = 0.0 avg_score_imp = 0.0 i = 0 for clean_wave, enhanced_wave, mixed_wave in zip(clean_dir_list_long, enhanced_dir_list, mixed_dir_list): ref, sr = sf.read(clean_wave) mixed, sr = sf.read(mixed_wave) enhanced, sr = sf.read(enhanced_wave) # spec = spectrum_tool.magnitude_spectrum_librosa_stft(enhanced,512,256) # angle = spectrum_tool.phase_spectrum_librosa_stft(enhanced,512,256) # spec = spec ** 1.3 # enhanced = spectrum_tool.librosa_istft(spec*np.exp(angle*1j),512,256) # score_raw = pesq(ref/np.max(np.abs(ref)), mixed/np.max(np.abs(mixed)), sr) # score_en = pesq(ref/np.max(np.abs(ref)), enhanced/np.max(np.abs(enhanced)), sr) score_raw = pesq(ref, mixed, sr) score_en = pesq(ref, enhanced, sr) print( str(i % noise_num + 1) + "_score_raw, score_en:", score_raw, score_en) i += 1 avg_score_raw += score_raw avg_score_en += score_en avg_score_imp += (score_en - score_raw) avg_score_raw /= len(clean_dir_list_long) avg_score_en /= len(clean_dir_list_long) avg_score_imp /= len(clean_dir_list_long) print('avg_score_raw: %f,\navg_score_en: %f,\nimp: %f' % (avg_score_raw, avg_score_en, avg_score_imp))
def separate_sample(sess, model, config, mix, c1, c2): batch_size = config['training']['batch_size'] n_channel = config['training']['n_output'] n_speaker = config['training']['n_speaker'] stride = config['model']['hop'] num_output_samples = mix.shape[0] num_fragments = int(np.ceil(num_output_samples / model.input_length)) target_field_length = model.input_length target_padding = 0 num_batches = int(np.ceil(num_fragments / batch_size)) output = [[] for _ in range(n_channel)] num_pad_values = 0 fragment_i = 0 # pad input mixture to 10x, since stride is 10 num_pad_values = stride - mix.shape[0] % stride mix = np.pad(mix, (0, num_pad_values), mode='constant', constant_values=0) output, = sess.run(fetches=[model.data_out], feed_dict={model.mix_input: np.expand_dims(mix, 0)})[0] output = np.array(output) if num_pad_values != 0: output = output[:, :-num_pad_values] mix = mix[:-num_pad_values] clean_wav = np.array([c1, c2]) perms = np.array(list(itertools.permutations(range(n_channel), n_speaker))) perms_onehot = (np.arange(perms.max() + 1) == perms[..., None]).astype(int) cross_sdr = signal_to_distortion_ratio( np.expand_dims(np.array([c1, c2]), 1), np.expand_dims(output, 0)) loss_sets = np.einsum('ij,pij->p', cross_sdr, perms_onehot) best_perm = perms[np.argmax(loss_sets)] pit_output = output[best_perm] # SDR _sdr, _sir, _sar, _perm = mir_eval.separation.bss_eval_sources( clean_wav, pit_output) # SISNR clean_wav_norm = clean_wav - np.mean(clean_wav, axis=-1, keepdims=True) pit_output_norm = pit_output - np.mean(pit_output, axis=-1, keepdims=True) _sisnr = signal_to_distortion_ratio(clean_wav_norm, pit_output_norm) # PESQ _pesq = [ pesq(clean_wav[0], pit_output[0], 8000), pesq(clean_wav[1], pit_output[1], 8000) ] perm_output = np.expand_dims(best_perm, -1).tolist() return _sdr, _sisnr, _pesq, perm_output
def evaluation(ref, est, mix): """ Wrapper function for evaluating the output of a NN. Metrics are PESQ and STOI :param ref: Path to the original (reference point) file. :param est: Path to the estimated file. :return: Prints in stdout PESQ and STOI metric values. """ file_ref = ref file_est = est file_mix = mix reference_sources, sr_r = librosa.load(file_ref, sr = None) estimated_sources, sr_e = librosa.load(file_est, sr = None) mix_sources, sr_m = librosa.load(file_mix, sr = None) if sr_r != 16000 or sr_e != 16000 or sr_m != 16000: print("\nResampling at 16k...") ref_16k = librosa.resample(reference_sources, sr_r, 16000) est_16k = librosa.resample(estimated_sources, sr_e, 16000) mix_16k = librosa.resample(mix_sources, sr_e, 16000) else: ref_16k = reference_sources est_16k = estimated_sources mix_16k = mix_sources pesq_score = round(pesq(ref_16k, est_16k, 16000), 3) stoi_score = round(stoi(ref_16k, est_16k, sr_r, extended=False), 2) estoi_score = round(stoi(ref_16k, est_16k, sr_r, extended=True), 2) ssr_score = round(SSR(est_16k, mix_16k), 3) print("PESQ\t STOI\t eSTOI\t SSR") print(pesq_score,"\t",stoi_score,"\t",estoi_score,"\t",ssr_score)
def PESQ(ref_wav, deg_wav): # reference wav # degraded wav sr = 16000 # tfl = tempfile.NamedTemporaryFile() # ref_tfl = tfl.name + '_ref.wav' # deg_tfl = tfl.name + '_deg.wav' # # # if ref_wav.max() <= 1: # # ref_wav = np.array(denormalize_wave_minmax(ref_wav), dtype=np.int16) # # if deg_wav.max() <= 1: # # deg_wav = np.array(denormalize_wave_minmax(deg_wav), dtype=np.int16) # # # wavfile.write(ref_tfl, 16000, ref_wav) # # wavfile.write(deg_tfl, 16000, deg_wav) # sf.write(ref_tfl, ref_wav, sr, subtype='PCM_16') # sf.write(deg_tfl, deg_wav, sr, subtype='PCM_16') # curr_dir = os.getcwd() # Write both to tmp files and then eval with pesqmain # try: # p = run(['pesqmain'.format(curr_dir), # ref_tfl, deg_tfl, '+'+str(sr), '+wb'], # stdout=PIPE, # encoding='ascii') # res_line = p.stdout.split('\n')[-2] # results = re.split('\s+', res_line) # return results[-1] # except FileNotFoundError: # print('pesqmain not found! Please add it your PATH') score = pesq(ref_wav, deg_wav, sr) return score
def calc_metrics(loader, actor, device): pesq_all = [] stoi_all = [] for batch in loader: x = batch["noisy"].unsqueeze(1).to(device) t = batch["clean"].unsqueeze(1).to(device) m = batch["mask"].to(device) out_r, out_i = actor(x) out_r = torch.transpose(out_r, 1, 2) out_i = torch.transpose(out_i, 1, 2) y = predict(x.squeeze(1), (out_r, out_i)) t = t.squeeze() m = m.squeeze() #print("Y:", y.shape) #source, targets, preds = inverse(t, y, m, x) targets, preds = inverse(t, y, m, x) for j in range(len(targets)): curr_pesq = pesq(targets[j].detach().cpu().numpy(), preds[j].detach().cpu().numpy(), 16000) curr_stoi = stoi(targets[j].detach().cpu().numpy(), preds[j].detach().cpu().numpy(), 16000) pesq_all.append(curr_pesq) stoi_all.append(curr_stoi) PESQ = torch.mean(torch.tensor(pesq_all)) STOI = torch.mean(torch.tensor(stoi_all)) return PESQ, STOI
def cal_score(clean, enhanced): clean = clean / abs(clean).max() enhanced = enhanced / abs(enhanced).max() s_stoi = stoi(clean, enhanced, 16000) s_pesq = pesq(clean, enhanced, 16000) return round(s_pesq, 5), round(s_stoi, 5)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--results_dir', type=str, required=True) parser.add_argument('--audio_sampling_rate', type=int, default=16000) args = parser.parse_args() audio1, _ = librosa.load(os.path.join(args.results_dir, 'audio1_separated.wav'), sr=args.audio_sampling_rate) audio2, _ = librosa.load(os.path.join(args.results_dir, 'audio2_separated.wav'), sr=args.audio_sampling_rate) audio1_gt, _ = librosa.load(os.path.join(args.results_dir, 'audio1.wav'), sr=args.audio_sampling_rate) audio2_gt, _ = librosa.load(os.path.join(args.results_dir, 'audio2.wav'), sr=args.audio_sampling_rate) audio_mix, _ = librosa.load(os.path.join(args.results_dir, 'audio_mixed.wav'), sr=args.audio_sampling_rate) # SDR, SIR, SAR sdr, sir, sar = getSeparationMetrics(audio1, audio2, audio1_gt, audio2_gt) sdr_mixed, _, _ = getSeparationMetrics(audio_mix, audio_mix, audio1_gt, audio2_gt) # PESQ pesq_score1 = pesq(audio1, audio1_gt, args.audio_sampling_rate) pesq_score2 = pesq(audio2, audio2_gt, args.audio_sampling_rate) pesq_score = (pesq_score1 + pesq_score2) / 2 # STOI stoi_score1 = stoi(audio1_gt, audio1, args.audio_sampling_rate, extended=False) stoi_score2 = stoi(audio2_gt, audio2, args.audio_sampling_rate, extended=False) stoi_score = (stoi_score1 + stoi_score2) / 2 output_file = open(os.path.join(args.results_dir, 'eval.txt'), 'w') output_file.write( "%3f %3f %3f %3f %3f %3f %3f" % (sdr, sdr_mixed, sdr - sdr_mixed, sir, sar, pesq_score, stoi_score)) output_file.close()
def forward(self, clean, enhanced): assert len(clean) == len(enhanced) clean, enhanced = np.array(clean), np.array(enhanced) scores = [] for c, e in zip(clean, enhanced): q = pesq(c.detach().cpu(), e.detach().cpu(), self.sr) q = (q + 0.5) / (4.5 + 0.5) # (q-min)/(max-min) scores.append(q) return torch.tensor(scores, device=self._device)
def compute_PESQ(clean_signal, noisy_signal, sr=16000): """计算PESQ Args: clean_signal:纯净语音信号 noisy_signal:带噪语音信号 sr: 采样率 """ return pesq(clean_signal, noisy_signal, sr)
def calculate_pesq(real, fake, sr): #PESQ only work with sampling rates 8000 or 16000, so we must resample try: if sr != 8000: real = librosa.resample(real, sr, 8000) fake = librosa.resample(fake, sr, 8000) return pesq(real, fake, 8000) except: return 0.0
def inference(clean_path, noisy_path, model_path, out_path): device = torch.device("cuda:1") model = Actor() model = nn.DataParallel(model, device_ids=[1, 2]) model.load_state_dict(torch.load(model_path + 'actor_best.pth')) model = model.to(device) dataset = Data(clean_path, noisy_path, mode='Test') loader = data.DataLoader(dataset, batch_size=32, shuffle=False, collate_fn=collate_custom) fnames = os.listdir(noisy_path) print("Num files:", len(fnames)) pesq_all = [] stoi_all = [] fcount = 0 for batch in tqdm(loader): x = batch["noisy"].unsqueeze(1).to(device) t = batch["clean"].unsqueeze(1).to(device) m = batch["mask"].to(device) out_r, out_i = model(x) out_r = torch.transpose(out_r, 1, 2) out_i = torch.transpose(out_i, 1, 2) y = predict(x.squeeze(1), (out_r, out_i)) t = t.squeeze() m = m.squeeze() x = x.squeeze() source, targets, preds = inverse(t, y, m, x) for j in range(len(targets)): t_j = targets[j].detach().cpu().numpy() p_j = preds[j].detach().cpu().numpy() p_j = 10 * (p_j / np.linalg.norm(p_j)) curr_pesq = pesq(t_j, p_j, 16000) curr_stoi = stoi(t_j, p_j, 16000) pesq_all.append(curr_pesq) stoi_all.append(curr_stoi) try: sf.write(os.path.join(out_path, fnames[fcount]), p_j, 16000) except IndexError: print("Fcount:", fcount, len(fnames)) fcount += 1 PESQ = torch.mean(torch.tensor(pesq_all)) STOI = torch.mean(torch.tensor(stoi_all)) print("PESQ: ", PESQ, "STOI: ", STOI) with open(os.path.join(model_path, 'test_scores.txt'), 'w') as fo: fo.write("Avg PESQ: " + str(float(PESQ)) + " Avg STOI: " + str(float(STOI)))
def __call__(self, a, b): """ :param a: 时域信号 :param b: 时域信号 :return: """ assert len(a.shape) == 1 assert len(a) == len(b) score = pesq(a, b, self.sr) return score
def pesq_by_inout(in_file, out_file): sr, ref = read(in_file) sr, deg = read(out_file) if len(ref.shape) >= 2: ref = ref[:, 0] if len(deg.shape) >= 2: deg = deg[:, 0] ref = np.pad(ref, (0, len(deg) - len(ref)), "constant", constant_values=0) return pesq(ref, deg, sr)
def compute_PESQ(clean_signal, noisy_signal, sr=16000): """ 使用 pypesq 计算 pesq 评价指标。 Notes: pypesq 是 PESQ 官方代码(C 语言)的 wrapper。官方代码在某些语音上会报错,而其报错时直接调用了 exit() 函数,直接导致 Python 脚本停止运行,亦无法捕获异常,实在过于粗鲁。 为了防止 Python 脚本被打断,这里使用子进程执行 PESQ 评价指标的计算,设置子进程的超时。 设置子进程超时的方法:https://stackoverflow.com/a/29378611 Returns: 当语音可以计算 pesq score 时,返回 pesq score,否则返回 -1 """ return pesq(clean_signal, noisy_signal, sr)
def pesq_metric(y_hat, y_true): with torch.no_grad(): y_hat = y_hat.cpu().data.numpy() y = y_true.cpu().data.numpy() sum_pesq = 0 for i in range(len(y)): sum_pesq += pesq(y[i], y_hat[i], SAMPLE_RATE) sum_pesq /= len(y) return sum_pesq
def eval(ref_name, enh_name, nsy_name, results): try: utt_id = ref_name.split('/')[-1] ref, sr = audioread(ref_name) enh, sr = audioread(enh_name) nsy, sr = audioread(nsy_name) ref_score = pesq(ref, nsy, sr) enh_score = pesq(ref, enh, sr) ref_stoi = stoi(ref, nsy, sr, extended=False) enh_stoi = stoi(ref, enh, sr, extended=False) ref_sdr = si_sdr(nsy, ref) enh_sdr = si_sdr(enh, ref) except Exception as e: print(e) results.append([utt_id, {'pesq':[ref_score, enh_score], 'stoi':[ref_stoi,enh_stoi], 'si_sdr':[ref_sdr, enh_sdr] }])
def pesq_score(self, ref_audio: 'Audio') -> float: """ PESQ score for speech audio. Args: ref_audio: Reference audio of type `Audio`. Returns: float -- PESQ score. """ assert ref_audio.sr == self.sr, 'Sample rate should be same' return pypesq.pesq(ref_audio.wave, self.wave, self.sr)
def pesq_metric(y_hat, bd): # PESQ with torch.no_grad(): y_hat = y_hat.cpu().numpy() y = bd['y'].cpu().numpy() # target signal sum = 0 for i in range(len(y)): sum += pesq(y[i, 0], y_hat[i, 0], SAMPLE_RATE) sum /= len(y) return torch.tensor(sum)
def _eval(batch, metrics, including='output', sample_rate=8000, use_pypesq=False): if use_pypesq: metrics = [m for m in metrics if m != 'pesq'] has_estoi = False if 'estoi' in metrics: metrics = [m for m in metrics if m != 'estoi'] has_estoi = True has_wer = False if 'wer' in metrics: metrics = [m for m in metrics if m != 'wer'] has_wer = True mix = batch['mix'] clean = batch['clean'] estimate = batch['enh'] snr = batch['snr'] res = get_metrics(mix.numpy(), clean.numpy(), estimate.numpy(), sample_rate=sample_rate, metrics_list=metrics, including=including) if use_pypesq: res['pesq'] = pesq(clean.flatten(), estimate.flatten(), sample_rate) if has_estoi: res['estoi'] = stoi(clean.flatten(), estimate.flatten(), sample_rate, extended=True) if has_wer: res['wer'] = jiwer.wer(batch['clean_text'], batch['transcription'], truth_transform=_wer_trans, hypothesis_transform=_wer_trans) if including == 'input': for m in metrics: res[m] = res['input_' + m] del res['input_' + m] res['snr'] = snr[0].item() return res
def generate_curriculum(clean_path, noisy_path, model_path): fnames = os.listdir(clean_path) d = {"fname": [], "pesq": []} for fname in tqdm(fnames): wav_noisy, sr = librosa.core.load(os.path.join(noisy_path, fname), sr=16000) wav_clean, sr = librosa.core.load(os.path.join(clean_path, fname), sr=16000) score = pesq(wav_clean, wav_noisy) d["fname"].append(fname) d["pesq"].append(score) df = pd.DataFrame.from_dict(d) df = df.sort_values(by=['pesq']) df.to_csv(os.path.join(model_path, "train_sort.tsv"), sep='\t')
def loss_pesq(step, source, estimate_source): source = source.cpu() estimate_source = estimate_source.cpu() score = 0 for i in range(source.shape[0]): score = score + pesq(source[i], estimate_source[i], sr) torchaudio.save(os.path.join(dns_home, 'predict_folder', test[step * batch_size + i]), estimate_source[i].unsqueeze(0), sample_rate=16000) return score / source.shape[0]
def cal_PESQi(src_ref, src_est, mix): """Calculate Source-to-Distortion Ratio improvement (SDRi). NOTE: bss_eval_sources is very very slow. Args: src_ref: numpy.ndarray, [C, T] src_est: numpy.ndarray, [C, T], reordered by best PIT permutation mix: numpy.ndarray, [T] Returns: average_SDRi """ num = 0 new_pesq = 0 orig_pesq = 0 avg_PESQi = 0 for ref, est in zip(src_ref, src_est): num = num + 1 new_pesq_out = pesq(ref, est, 8000) new_pesq = new_pesq + new_pesq_out orig_pesq_out = pesq(ref, mix, 8000) orig_pesq = orig_pesq + orig_pesq_out avg_PESQi = avg_PESQi + (new_pesq_out - orig_pesq_out) return new_pesq / num, orig_pesq / num, avg_PESQi / num
def get_pesq(filepath_1, filepath_2): ''' argument: filepath_1: original clean .wav file path filepath_2: generated clean .wav file path return: PESQ score ''' clean_wave, clean_sampe_rate = librosa.load(filepath_1, sr=16000) cleaned_wave, cleaned_sampe_rate = librosa.load(filepath_2, sr=16000) signal_power = 20 * log10(np.mean(np.square(clean_wave))) score = pesq(clean_wave, cleaned_wave, clean_sampe_rate) return score, signal_power
def main(): parser = argparse.ArgumentParser(description='Calculate performance index') parser.add_argument('--test_mix_folder', default='../test-mix-2-babble', type=str, help='test-set-mix') parser.add_argument('--test_clean_folder', default='../test-clean-2-babble', type=str, help='test-set-clean') parser.add_argument('--enhanced_folder', default='../test-result', type=str, help='test-set-enhanced') opt = parser.parse_args() MIX_FOLDER = opt.test_mix_folder CLEAN_FOLDER = opt.test_clean_folder ENHANCED_FOLDER = opt.enhanced_folder pesqs = [] stois = [] for cleanfile in os.listdir(CLEAN_FOLDER): mixfile = cleanfile.replace('clean', 'mix') enhancedfile = 'enhanced_' + mixfile cleanfile = os.path.join(CLEAN_FOLDER, cleanfile) mixfile = os.path.join(MIX_FOLDER, mixfile) enhancedfile = os.path.join(ENHANCED_FOLDER, enhancedfile) ref, sr1 = librosa.load(cleanfile, 16000) #deg_mix, sr2 = librosa.load(mixfile, 16000) deg_enh, sr3 = librosa.load(enhancedfile, 16000) #pesq1 = pesq.pesq(ref, deg_mix) pesq2 = pesq.pesq(ref, deg_enh[:len(ref)]) #print("pesq:", pesq1, " --> ", pesq2) pesqs.append(pesq2) #stoi1 = stoi(ref, deg_mix, fs_sig=16000) stoi2 = stoi(ref, deg_enh[:len(ref)], fs_sig=16000) #print("stoi:", stoi1, " --> ", stoi2) stois.append(stoi2) print('Epesq:', np.mean(pesqs), "Estoi:", np.mean(stois))
def cal_score(clean, enhanced): clean = clean / abs(clean).max() enhanced = enhanced / abs(enhanced).max() s_stoi = stoi(clean, enhanced, 16000) s_pesq = pesq(clean, enhanced, 16000) return round(s_pesq, 5), round(s_stoi, 5) #def get_filepaths(directory,folders='BabyCry.wav,cafeteria_babble.wav',ftype='.wav'): # file_paths = [] # folders = folders.split(',') # with open(directory, 'r') as f: # for line in f: # if str(line.split('/')[-3]) in folders: # file_paths.append(line[:-1]) # return file_paths '''
def sample_main(): sr = 16000 wav_list = ['data/1.wav', 'data/2.wav', 'data/3.wav', 'data/4.wav', 'data/5.wav'] src, mixed = mix_audio(wav_list) from sklearn.decomposition import FastICA W = FastICA(n_components=len(wav_list)) S = W.fit_transform(mixed) ## write audio into file for i in range(len(wav_list)): output_name = "result/%d.wav"%(i+1) wavfile.write(output_name, sr, S[:, i]) """ Caluculate sdr, sir, sar """ from mir_eval.separation import bss_eval_sources sdr, sir, sar, _ = bss_eval_sources(S.T, src.T) ## shape = (channels, samples) print("SDR: ", sdr) ## np.array(channels, ) print("SIR: ", sir) print("SAR: ",sar) from pypesq import pesq pesq_score = pesq(src, S, fs=16000) print("PESQ: ", pesq_score)