def reconstruct_audio(self, description, irm=None, mask=None, idx=None, test=False): n_wavfiles = len(self.x_wavfiles) if idx is None: for j in range(n_wavfiles): if irm is None or mask is None: yest = self.reconstruct_x(j) else: yest = self.reconstruct_x( j, mask=irm[j, :np.sum(mask[j, :]), :].T) y = self.reconstruct_y(j) wavfile_enhanced = self.y_wavfiles[j].replace( 'scaled', 'enhanced_%s' % description) if not os.path.exists(os.path.dirname(wavfile_enhanced)): os.makedirs(os.path.dirname(wavfile_enhanced)) util.wavwrite(wavfile_enhanced, 16e3, yest) elif isinstance(idx, list): for j in idx: if irm is None or mask is None: yest = self.reconstruct_x(j) else: yest = self.reconstruct_x( j, mask=irm[j, :np.sum(mask[j, :]), :].T) y = self.reconstruct_y(j) if test: y_orig = util.wavread(self.y_wavfiles[j])[0:1, :] x = util.wavread(self.x_wavfiles[j])[0:1, :] if yest.shape[1] > x.shape[1]: yest = yest[:, :x.shape[1]] if y.shape[1] > y_orig.shape[1]: y = y[:, :y_orig.shape[1]] print "For file %d, NMSE between original x and yest is %e" % ( j, np.mean((x - yest)**2) / np.mean(x**2)) print "For file %d, NMSE between original y_orig and y is %e" % ( j, np.mean((y_orig - y)**2) / np.mean(y_orig**2)) else: wavfile_enhanced = self.y_wavfiles[j].replace( 'scaled', 'enhanced_%s' % description) if not os.path.exists(os.path.dirname(wavfile_enhanced)): os.makedirs(os.path.dirname(wavfile_enhanced)) util.wavwrite(wavfile_enhanced, 16e3, yest) else: if irm is None: yest = self.reconstruct_x(idx) else: yest = self.reconstruct_x(idx, mask=irm) wavfile_enhanced = self.y_wavfiles[idx].replace( 'scaled', 'enhanced_%s' % description) if not os.path.exists(os.path.dirname(wavfile_enhanced)): os.makedirs(os.path.dirname(wavfile_enhanced)) util.wavwrite(wavfile_enhanced, 16e3, yest) return
def main(argv): savefile = '' outputfolder = '' try: opts, args = getopt.getopt(argv,"hs:o:",["ifile=","ofile="]) except getopt.GetoptError: print 'recon_timitpred.py -s <savefile> -o <output folder>' sys.exit(2) for opt, arg in opts: if opt == '-h': print 'recon_timitpred.py -s <savefile> -o <output folder>' sys.exit() elif opt in ("-s"): savefile = arg elif opt in ("-o"): outputfolder = arg print 'Savefile is ', savefile print 'Output folder is ', outputfolder # load reference data print "Loading TIMIT test data..." test_xdata,test_mask=load_TIMIT_test_xdata(savefile) if ('trainNoSA' not in savefile): test_mask=test_mask[:,1::2,:] test_xdata=test_xdata[:,1::2,:] # load results file that contains predicted STFT log-magnitudes results_eval=cPickle.load( open(savefile, "rb")) best_xgen=np.asarray(results_eval['xgen']) best_xgen=best_xgen[:test_xdata.shape[0],:,:] best_test_loss=np.asarray(results_eval['eval_loss']) # undo data normalization normalize_str='' if ('_normalizeMeanVarGlobal' in savefile): normalize_str='_normalizeMeanVarGlobal' elif ('_normalizeVarGlobal' in savefile): normalize_str='_normalizeVarGlobal' if ('Var' in normalize_str): stats=results_eval['stats'] stats_cur=stats['eval_xdata_stats'] best_xgen_std=stats_cur['std'] best_xgen=best_xgen*(np.float32(1e-7)+np.float32(np.sqrt(2))*np.tile(best_xgen_std,(1,1,2))) if ('Mean' in normalize_str): stats=results_eval['stats'] stats_cur=stats['eval_xdata_stats'] best_xgen_mean=stats_cur['mean'] best_xgen=best_xgen+best_xgen_mean # build complex-valued STFTs of reference and predicted npred=1 n_input=129 n_output=129 test_xdata_logmag=10.0*np.log10(1e-5 + test_xdata[:,:,:129]**2 + test_xdata[:,:,129:]**2) test_xdata_c=test_xdata[:,:,:129]+np.complex64(1j)*test_xdata[:,:,129:] test_xdata_a=np.concatenate( [np.real(test_xdata_c),np.imag(test_xdata_c)],axis=2) magsq=test_mask[:,:,0:1]*((10**( best_xgen/10.0 ))) best_xgen_mag=np.sqrt( magsq ) test_xdata_mag=np.sqrt( test_mask[:,:,0:1]*((10**( test_xdata_logmag/10.0 ))) ) best_xgen_c=best_xgen_mag best_xgen_c=best_xgen_c[:-npred,:,:]*np.exp(np.complex64(1j)*np.angle(test_xdata_c[npred:,:,:])) best_xgen_complete=np.concatenate( [np.real(best_xgen_c),np.imag(best_xgen_c)],axis=2) n_utt=best_xgen.shape[1] print "Reconstructing audio..." for uidx in range(n_utt): #printProgress(uidx+1, n_utt, prefix = 'Progress:', suffix = 'Complete', barLength = 50) Tcur=int(np.sum(test_mask[:,uidx,0])) test_xdata_cur = np.transpose(np.squeeze(test_xdata_a[0:Tcur,uidx,:]),(1,0)) test_xdata_r=np.squeeze(util.iAugSTFT(test_xdata_cur,129,1,1)) # append first frames of reference, otherwise reconstruction has artifacts: best_xgen_cur = np.transpose(np.squeeze(best_xgen_complete[0:Tcur-npred,uidx,:]),(1,0)) best_xgen_cur=np.concatenate([test_xdata_cur[:,0:npred].astype(np.float32),best_xgen_cur],axis=1) best_xgen_r=np.squeeze(util.iAugSTFT(best_xgen_cur,129,1,1)) if not os.path.exists(outputfolder): os.makedirs(outputfolder) util.wavwrite(outputfolder+('/est%d.wav'%uidx),np.float32(8000.0),best_xgen_r) util.wavwrite(outputfolder+('/ref%d.wav'%uidx),np.float32(8000.0),test_xdata_r)
x, fs = util.wavread(sys.argv[1]) #downmix to single channel x = np.mean(x, axis=-1) #perform stft S = util.stft_real(x, blockSize=blockSize, hopSize=hopSize) magnitude = np.abs(S).astype(np.float32) angle = np.angle(S).astype(np.float32) #initialize the model model = Model_fcn.ModelSingleStep(blockSize) #load the pretrained model model.load_state_dict( torch.load("Modelfcn.pt", map_location=lambda storage, loc: storage)) #switch to eval mode model.eval() ################################### #Run your Model here to obtain a mask ################################### spectro_pred = model.process(magnitude) ################################### #perform reconstruction y = util.istft_real(spectro_pred * np.exp(1j * angle), blockSize=blockSize, hopSize=hopSize) #save the result util.wavwrite(sys.argv[2], y, fs)
#initialize the model model = Model.ModelSingleStep(blockSize) #load the pretrained model checkpoint = torch.load("savedModel_RNN_best.pt", map_location=lambda storage, loc: storage) model.load_state_dict(checkpoint['state_dict']) #switch to eval mode model.eval() # magnitude = torch.Tensor(magnitude).view(1, magnitude.shape[0], magnitude.shape[1]) ################################### #Run your Model here to obtain a mask ################################### magnitude = torch.from_numpy( magnitude.reshape((1, magnitude.shape[0], magnitude.shape[1]))) with torch.no_grad(): magnitude_masked = model.forward(magnitude) magnitude_masked = magnitude_masked.numpy() ################################### #perform reconstruction y = util.istft_real(magnitude_masked * np.exp(1j * angle), blockSize=blockSize, hopSize=hopSize) #save the result util.wavwrite(output_path, y.T, fs)