def test(model, device, noisy_path, clean_path, enhance_path, score_path, args): model = model.to(device) # load model model.eval() torch.no_grad() #checkpoint = torch.load(model_path) #model.SEmodel.load_state_dict(checkpoint['model']) # load data test_files = np.array(getfilename(noisy_path)) c_dict = get_cleanwav_dic(clean_path) #open score file if os.path.exists(score_path): os.remove(score_path) check_folder(score_path) print('Save PESQ&STOI results to:', score_path) with open(score_path, 'a') as f: f.write('Filename,PESQ,STOI\n') print('Testing...') for test_file in tqdm(test_files): write_score(model, device, test_file, c_dict, enhance_path, score_path) data = pd.read_csv(score_path) pesq_mean = data['PESQ'].to_numpy().astype('float').mean() stoi_mean = data['STOI'].to_numpy().astype('float').mean() with open(score_path, 'a') as f: f.write(','.join(('Average',str(pesq_mean),str(stoi_mean)))+'\n')
def save_checkpoint(epoch, model, optimizer, best_loss, model_path): state_dict = { 'epoch': epoch, 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), 'best_loss': best_loss } check_folder(model_path) torch.save(state_dict, model_path)
def write_score(model, device, test_file, c_dict, enhance_path, ilen, y, score_path, asr_result, TMHINT=None): n_spec, n_phase, n_len, c_wav, c_spec, c_phase, n_folder = prepare_test( test_file, c_dict, device, TMHINT) #[Yo] Change prediction if asr_result != None: ### Get ASR prediction results Fbank = model.Fbank() model.ASRmodel.report_cer = True model.ASRmodel.report_wer = True if asr_result == 'enhanced': spec = model.SEmodel(n_spec) phase = n_phase elif asr_result == 'noisy': spec = n_spec phase = n_phase else: spec = c_spec phase = c_phase fbank = Fbank.forward(spec) fbank, ilen, y = fbank.to(device), ilen.to(device), y.to(device) ASRloss, asr_cer = model.ASRmodel(fbank, ilen.unsqueeze(0), y.unsqueeze(0)) spec = spec.cpu().detach().numpy() recon_wav = recons_spec_phase(spec.squeeze().transpose(), phase, n_len) # cal score s_pesq, s_stoi = cal_score(c_wav, recon_wav) with open(score_path, 'a') as f: f.write(f'{test_file},{s_pesq},{s_stoi},{asr_cer}\n') else: enhanced_spec = model.SEmodel(n_spec).cpu().detach().numpy() enhanced = recons_spec_phase(enhanced_spec.squeeze().transpose(), n_phase, n_len) # cal score s_pesq, s_stoi = cal_score(c_wav, enhanced) with open(score_path, 'a') as f: f.write(f'{test_file},{s_pesq},{s_stoi}\n') # write enhanced waveform out_path = f"{enhance_path}/{n_folder+'/'+test_file.split('/')[-1]}" check_folder(out_path) audiowrite(out_path, 16000, (enhanced * maxv).astype(np.int16))
def write_score(model, device, test_file, c_dict, enhance_path, score_path, tr_bol=False): n_data, n_phase, n_len, c_wav, n_folder = prepare_test(test_file, c_dict,device) #[Yo] Change prediction pred = model.SEmodel(n_data).cpu().detach().numpy() enhanced = recons_spec_phase(pred.squeeze().transpose(),n_phase,n_len) # cal score s_pesq, s_stoi = cal_score(c_wav,enhanced) with open(score_path, 'a') as f: f.write(f'{test_file},{s_pesq},{s_stoi}\n') # write enhanced waveform out_path = f"{enhance_path}/{n_folder+'/'+test_file.split('/')[-1]}" check_folder(out_path) audiowrite(out_path,16000,(enhanced* maxv).astype(np.int16))
def format_output_file(self, website_name): if "FEED_URI" in self.settings: file_name = f"listing_{website_name}_{datetime.utcnow().strftime(self.utc_format)}.jl" output_folder = sc.check_folder( str(Path(self.settings["FEED_URI"]).parent) + "_" + datetime.utcnow().strftime("%Y-%m-%d")) self.settings["FEED_URI"] = str(Path(output_folder) / file_name)
def get_model_folder(self): """ Gives a new folder for the trained model. :return: (str) folder to save the model """ # Creating model folder today_dt = str(datetime.date(datetime.utcnow())) model_folder = sc.check_folder( os.path.join(self.model_config.output_path, self.name, today_dt)) if not os.path.isdir(model_folder): os.mkdir(model_folder) else: tmp_count = 0 # Get next nonexistent folder while os.path.isdir( os.path.join(self.model_config.output_path, self.name, '{0}_{1}'.format(tmp_count, today_dt))): tmp_count += 1 os.mkdir( os.path.join(self.model_config.output_path, self.name, '{0}_{1}'.format(tmp_count, today_dt))) model_folder = os.path.join(self.model_config.output_path, self.name, '{0}_{1}'.format(tmp_count, today_dt)) return model_folder
def parseStore(self, company, n_reviews: int = 10): if company in STORES: print(f"[*] Scraping '{company}'...") output_folder = Path(sc.check_folder("output")) for idx, store_url in enumerate(STORES[company]): print(f"[*] Scraping now from '{store_url}'!") print(f"{idx+1} out of {len(STORES[company])} links...") error = self.scraper.sort_by_date(store_url) if error == 0: n = 0 file = output_folder / f"gbusiness_{company}_{datetime.utcnow().strftime('%Y-%m-%d-T%H-%M-%SZ')}.jl" while n < n_reviews: reviews = self.scraper.enrich_reviews( self.scraper.get_reviews(n), store_url, company) with file.open("a", encoding="utf-8") as js: for r in reviews: js.write(json.dumps(r) + "\n") if len(reviews) == 0: n += 100 else: n += len(reviews) else: print( f"[*] Could not scrape link '{store_url}'. Link will be stored @error.log for further retries." ) with Path("error.log").open("a", encoding="utf-8") as js: js.write(str(store_url) + "\n") else: raise Exception(f"Company '{company}' not found!")
def get_tensorboard(self): """ Build Tensorboard keras callback. :return: TB callback """ tensorboard_path = sc.check_folder( os.path.join(self.model_folder, "tensorboard")) return TensorBoard(log_dir=tensorboard_path, histogram_freq=0)
def check_args(args): args.checkpoint_dir += args.name args.result_dir += args.name assert args.sample_num % math.sqrt(args.sample_num) == 0 assert os.path.exists(args.input_dir) util.check_folder(args.tfr_dir) util.check_folder(args.checkpoint_dir) util.check_folder(args.result_dir) util.check_folder(args.log_dir) return args
def test(model, device, noisy_path, clean_path, asr_dict, enhance_path, score_path, args): model = model.to(device) # load model model.eval() torch.no_grad() # load data if args.test_num is None: test_files = np.array(getfilename(noisy_path,"test")) else: test_files = np.array(getfilename(noisy_path,"test")[:args.test_num]) c_dict = get_cleanwav_dic(clean_path, args.corpus) #open score file google_asr = True if google_asr: score_path = score_path.replace(".csv","_wer.csv") if os.path.exists(score_path): os.remove(score_path) check_folder(score_path) if google_asr: print('Save WER results to:', score_path) with open(score_path, 'a') as f: f.write('Filename,PESQ,STOI,WER,CleanWER\n') else: print('Save PESQ&STOI results to:', score_path) with open(score_path, 'a') as f: f.write('Filename,PESQ,STOI\n') print('Testing...') for test_file in tqdm(test_files): name=test_file.split('/')[-1].replace('.wav','') ilen, y=asr_dict[name][0],asr_dict[name][1] write_score(model, device, test_file, c_dict, enhance_path, ilen, y, score_path, args.asr_result, args.corpus) data = pd.read_csv(score_path) pesq_mean = data['PESQ'].to_numpy().astype('float').mean() stoi_mean = data['STOI'].to_numpy().astype('float').mean() with open(score_path, 'a') as f: f.write(','.join(('Average',str(pesq_mean),str(stoi_mean)))+'\n')
def __init__(self, configfile): log_path = sc.check_folder(configfile["TRAINING"]['log_path']) log_path = os.path.join(log_path, '{0}_{1}_train.log'.format(self.name, time())) sc.configLog(log_path=log_path, mode=logging.DEBUG) self.configfile = configfile self.model_config = self.get_config(configfile) self.encoder = None self.embedder = None self.files_handler = None self.model = None self.test_set = None self.model_folder = None
try: if args.mode == 'train': if args.corpus == "TMHINT_DYS": # --adim, default=384, type=int, "Number of attention transformation dimensions" optimizer = get_std_opt( model.SEmodel.parameters(), 384, model.SEmodel.args.transformer_warmup_steps, model.SEmodel.args.transformer_lr) train(model, args.epochs, epoch, best_loss, optimizer, device, loader, writer, args.model_path, args) # mode=="test" else: test(model, device, args.test_noisy, args.test_clean, asr_dict, args.enhance_path, args.score_path, args) except KeyboardInterrupt: state_dict = { 'epoch': epoch, 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), 'best_loss': best_loss } check_folder(args.checkpoint_path) torch.save(state_dict, args.checkpoint_path) print('Saved interrupt') try: sys.exit(0) except SystemExit: os._exit(0)
def save_params(self, folder): sc.check_folder(folder) with open(os.path.join(folder, 'params.txt'), 'w') as f: f.write("Trained parameters: \n") f.write(str(self.params))
def unpack_config(self, configfile): for name, value in configfile["TRAINING"].items(): if name == 'output_path': value = sc.check_folder(value) setattr(self, name, value)
def check_args(args): util.check_folder(args.checkpoint_dir) util.check_folder(args.result_dir) return args