Пример #1
0
    def validate(self, name):
        try:
            noisy, _ = next(self.val_iter)
        except:
            self.val_iter = iter(self.train_dataloader)
            noisy, _ = next(self.val_iter)
        audio_utils.write_wav(
            os.path.join(self.output_dir, "noisy_{}.wav".format(name)), noisy)
        wandb.log({
            "noisy": [
                wandb.Audio(os.path.join(self.output_dir,
                                         "noisy_{}.wav".format(name)),
                            caption="noisy",
                            sample_rate=16000)
            ]
        })

        with torch.no_grad():
            m = noisy.mean()
            noisy = (noisy - m)
            noisy = torch.tensor([noisy]).to(self.device)
            out = self.model(noisy)

        clean = np.squeeze(out.cpu().detach().numpy())
        clean = clean + m
        audio_utils.write_wav(
            os.path.join(self.output_dir, "clean_{}.wav".format(name)), clean)
        wandb.log({
            "clean": [
                wandb.Audio(os.path.join(self.output_dir,
                                         "clean_{}.wav".format(name)),
                            caption="clean",
                            sample_rate=16000)
            ]
        })
        torch.save(self.model.state_dict(),
                   os.path.join(self.output_dir, "{}.pth".format(name)))
Пример #2
0
    def valid(self, step):
        self.model.eval()
        index = random.randint(0, len(self.noisy_files)-1)
        noisy = dataloader.read_wav(self.noisy_files[index])
        write_wav(os.path.join(self.output_dir, "noisy_{}.wav".format(step)), noisy)

        wandb.log({"noisy": [wandb.Audio(os.path.join(self.output_dir, "noisy_{}.wav".format(step)), caption="noisy", sample_rate=self.sr)]})

        noisy = torch.tensor([noisy], dtype=torch.float32, device=self.device)
        with torch.no_grad():
            clean, noise = self.model(noisy)

        clean = np.squeeze(clean.cpu().detach().numpy())
        write_wav(os.path.join(self.output_dir, "clean_{}.wav".format(step)), clean, self.sr)
        wandb.log({"clean": [wandb.Audio(os.path.join(self.output_dir, "clean_{}.wav".format(step)), caption="clean", sample_rate=self.sr)]})
        
        noise = np.squeeze(noise.cpu().detach().numpy())
        write_wav(os.path.join(self.output_dir, "noise_{}.wav".format(step)), noise, self.sr)
        wandb.log({"noise": [wandb.Audio(os.path.join(self.output_dir, "noise_{}.wav".format(step)), caption="noise", sample_rate=self.sr)]})
Пример #3
0
def compute_feature(bg_batch,
                    hm_batch,
                    _n_filt=N_FILT,
                    _winlen=WINLEN,
                    _winstep=WINLEN // 2,
                    to_write=False,
                    _winfunc=lambda x: np.ones((x, ))):
    nfft = int(SAMPLING_RATE * _winlen)
    bg_mfcc, bg_mfcc_energy = mfcc(bg_batch,
                                   winlen=_winlen,
                                   winstep=_winstep,
                                   numcep=_n_filt,
                                   nfft=nfft,
                                   nfilt=_n_filt,
                                   preemph=0,
                                   cb=compute_gain,
                                   ceplifter=0,
                                   appendEnergy=False,
                                   winfunc=_winfunc)
    _, hm_mfcc_energy = mfcc(hm_batch,
                             winlen=_winlen,
                             winstep=_winstep,
                             numcep=_n_filt,
                             nfft=nfft,
                             nfilt=_n_filt,
                             preemph=0,
                             cb=compute_gain,
                             ceplifter=0,
                             appendEnergy=False,
                             winfunc=_winfunc)
    # Ideal Ratio Mask applied signal
    estimate = get_estimate(bg_batch,
                            np.clip(np.sqrt((hm_mfcc_energy / bg_mfcc_energy)),
                                    0, 1),
                            _winlen=_winlen,
                            _winfunc=_winfunc,
                            _winstep=_winstep)
    if to_write:
        print('tes')
        write_wav('mixed.wav', bg_batch.astype(np.float32))
        write_wav('original.wav', hm_batch.astype(np.float32))
        write_wav('ideal.wav', estimate.astype(np.float32))
    return bg_mfcc
Пример #4
0
    elif os.path.isfile(inp):
        noisy_files = [inp]
    else:
        raise Exception("given input file/directory does not exist please check path")

    if len(noisy_files) == 0:
        raise Exception("folder does not contain files of format {}".format(args.format))
    
    time_counter = []

    for f in noisy_files:
        audio = read_wav(f, sr)

        start = time.time()
        with torch.no_grad():
            raw = torch.tensor([audio], dtype=torch.float32, device=device)
            out = denoiser(raw)
            out = np.squeeze(out.detach().cpu().numpy())
            out = out

        t = time.time() - start
        time_counter.append(t)
        print("processed file {} in {:.2f} secs".format(os.path.basename(f), t))
        write_wav(os.path.join(audio_out, os.path.basename(f).split(".")[0] + "_clean.wav"), out, sr)
        if args.asr:
            transcript = ASR(f)
            with open(os.path.join(trans_out, os.path.basename(f).split(".")[0] + ".json"), 'w') as outfile:
                json.dump(transcript, outfile)

    print("Time stats: min: {:.2f} mean: {:.2f} max: {:.2f}".format(min(time_counter), sum(time_counter)/len(time_counter), max(time_counter)))