def validate(self, name): try: noisy, _ = next(self.val_iter) except: self.val_iter = iter(self.train_dataloader) noisy, _ = next(self.val_iter) audio_utils.write_wav( os.path.join(self.output_dir, "noisy_{}.wav".format(name)), noisy) wandb.log({ "noisy": [ wandb.Audio(os.path.join(self.output_dir, "noisy_{}.wav".format(name)), caption="noisy", sample_rate=16000) ] }) with torch.no_grad(): m = noisy.mean() noisy = (noisy - m) noisy = torch.tensor([noisy]).to(self.device) out = self.model(noisy) clean = np.squeeze(out.cpu().detach().numpy()) clean = clean + m audio_utils.write_wav( os.path.join(self.output_dir, "clean_{}.wav".format(name)), clean) wandb.log({ "clean": [ wandb.Audio(os.path.join(self.output_dir, "clean_{}.wav".format(name)), caption="clean", sample_rate=16000) ] }) torch.save(self.model.state_dict(), os.path.join(self.output_dir, "{}.pth".format(name)))
def valid(self, step): self.model.eval() index = random.randint(0, len(self.noisy_files)-1) noisy = dataloader.read_wav(self.noisy_files[index]) write_wav(os.path.join(self.output_dir, "noisy_{}.wav".format(step)), noisy) wandb.log({"noisy": [wandb.Audio(os.path.join(self.output_dir, "noisy_{}.wav".format(step)), caption="noisy", sample_rate=self.sr)]}) noisy = torch.tensor([noisy], dtype=torch.float32, device=self.device) with torch.no_grad(): clean, noise = self.model(noisy) clean = np.squeeze(clean.cpu().detach().numpy()) write_wav(os.path.join(self.output_dir, "clean_{}.wav".format(step)), clean, self.sr) wandb.log({"clean": [wandb.Audio(os.path.join(self.output_dir, "clean_{}.wav".format(step)), caption="clean", sample_rate=self.sr)]}) noise = np.squeeze(noise.cpu().detach().numpy()) write_wav(os.path.join(self.output_dir, "noise_{}.wav".format(step)), noise, self.sr) wandb.log({"noise": [wandb.Audio(os.path.join(self.output_dir, "noise_{}.wav".format(step)), caption="noise", sample_rate=self.sr)]})
def compute_feature(bg_batch, hm_batch, _n_filt=N_FILT, _winlen=WINLEN, _winstep=WINLEN // 2, to_write=False, _winfunc=lambda x: np.ones((x, ))): nfft = int(SAMPLING_RATE * _winlen) bg_mfcc, bg_mfcc_energy = mfcc(bg_batch, winlen=_winlen, winstep=_winstep, numcep=_n_filt, nfft=nfft, nfilt=_n_filt, preemph=0, cb=compute_gain, ceplifter=0, appendEnergy=False, winfunc=_winfunc) _, hm_mfcc_energy = mfcc(hm_batch, winlen=_winlen, winstep=_winstep, numcep=_n_filt, nfft=nfft, nfilt=_n_filt, preemph=0, cb=compute_gain, ceplifter=0, appendEnergy=False, winfunc=_winfunc) # Ideal Ratio Mask applied signal estimate = get_estimate(bg_batch, np.clip(np.sqrt((hm_mfcc_energy / bg_mfcc_energy)), 0, 1), _winlen=_winlen, _winfunc=_winfunc, _winstep=_winstep) if to_write: print('tes') write_wav('mixed.wav', bg_batch.astype(np.float32)) write_wav('original.wav', hm_batch.astype(np.float32)) write_wav('ideal.wav', estimate.astype(np.float32)) return bg_mfcc
elif os.path.isfile(inp): noisy_files = [inp] else: raise Exception("given input file/directory does not exist please check path") if len(noisy_files) == 0: raise Exception("folder does not contain files of format {}".format(args.format)) time_counter = [] for f in noisy_files: audio = read_wav(f, sr) start = time.time() with torch.no_grad(): raw = torch.tensor([audio], dtype=torch.float32, device=device) out = denoiser(raw) out = np.squeeze(out.detach().cpu().numpy()) out = out t = time.time() - start time_counter.append(t) print("processed file {} in {:.2f} secs".format(os.path.basename(f), t)) write_wav(os.path.join(audio_out, os.path.basename(f).split(".")[0] + "_clean.wav"), out, sr) if args.asr: transcript = ASR(f) with open(os.path.join(trans_out, os.path.basename(f).split(".")[0] + ".json"), 'w') as outfile: json.dump(transcript, outfile) print("Time stats: min: {:.2f} mean: {:.2f} max: {:.2f}".format(min(time_counter), sum(time_counter)/len(time_counter), max(time_counter)))