예제 #1
0
    def reconstruct_audio(self,
                          description,
                          irm=None,
                          mask=None,
                          idx=None,
                          test=False):
        n_wavfiles = len(self.x_wavfiles)
        if idx is None:
            for j in range(n_wavfiles):
                if irm is None or mask is None:
                    yest = self.reconstruct_x(j)
                else:
                    yest = self.reconstruct_x(
                        j, mask=irm[j, :np.sum(mask[j, :]), :].T)
                y = self.reconstruct_y(j)
                wavfile_enhanced = self.y_wavfiles[j].replace(
                    'scaled', 'enhanced_%s' % description)
                if not os.path.exists(os.path.dirname(wavfile_enhanced)):
                    os.makedirs(os.path.dirname(wavfile_enhanced))
                util.wavwrite(wavfile_enhanced, 16e3, yest)
        elif isinstance(idx, list):
            for j in idx:
                if irm is None or mask is None:
                    yest = self.reconstruct_x(j)
                else:
                    yest = self.reconstruct_x(
                        j, mask=irm[j, :np.sum(mask[j, :]), :].T)
                y = self.reconstruct_y(j)
                if test:
                    y_orig = util.wavread(self.y_wavfiles[j])[0:1, :]
                    x = util.wavread(self.x_wavfiles[j])[0:1, :]
                    if yest.shape[1] > x.shape[1]:
                        yest = yest[:, :x.shape[1]]
                    if y.shape[1] > y_orig.shape[1]:
                        y = y[:, :y_orig.shape[1]]
                    print "For file %d, NMSE between original x and yest is %e" % (
                        j, np.mean((x - yest)**2) / np.mean(x**2))
                    print "For file %d, NMSE between original y_orig and y is %e" % (
                        j, np.mean((y_orig - y)**2) / np.mean(y_orig**2))
                else:
                    wavfile_enhanced = self.y_wavfiles[j].replace(
                        'scaled', 'enhanced_%s' % description)
                    if not os.path.exists(os.path.dirname(wavfile_enhanced)):
                        os.makedirs(os.path.dirname(wavfile_enhanced))
                    util.wavwrite(wavfile_enhanced, 16e3, yest)
        else:
            if irm is None:
                yest = self.reconstruct_x(idx)
            else:
                yest = self.reconstruct_x(idx, mask=irm)

            wavfile_enhanced = self.y_wavfiles[idx].replace(
                'scaled', 'enhanced_%s' % description)
            if not os.path.exists(os.path.dirname(wavfile_enhanced)):
                os.makedirs(os.path.dirname(wavfile_enhanced))
            util.wavwrite(wavfile_enhanced, 16e3, yest)

        return
예제 #2
0
def main(argv):
    savefile = ''
    outputfolder = ''
    try:
        opts, args = getopt.getopt(argv,"hs:o:",["ifile=","ofile="])
    except getopt.GetoptError:
        print 'recon_timitpred.py -s <savefile> -o <output folder>'
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print 'recon_timitpred.py -s <savefile> -o <output folder>'
            sys.exit()
        elif opt in ("-s"):
            savefile = arg
        elif opt in ("-o"):
            outputfolder = arg
    print 'Savefile is ', savefile
    print 'Output folder is ', outputfolder

    # load reference data
    print "Loading TIMIT test data..."
    test_xdata,test_mask=load_TIMIT_test_xdata(savefile)
    if ('trainNoSA' not in savefile):
        test_mask=test_mask[:,1::2,:]
        test_xdata=test_xdata[:,1::2,:]

    # load results file that contains predicted STFT log-magnitudes
    results_eval=cPickle.load( open(savefile, "rb"))
    best_xgen=np.asarray(results_eval['xgen'])
    best_xgen=best_xgen[:test_xdata.shape[0],:,:]
    best_test_loss=np.asarray(results_eval['eval_loss'])

    # undo data normalization
    normalize_str=''
    if ('_normalizeMeanVarGlobal' in savefile):
        normalize_str='_normalizeMeanVarGlobal'
    elif ('_normalizeVarGlobal' in savefile):
        normalize_str='_normalizeVarGlobal' 
    if ('Var' in normalize_str):
        stats=results_eval['stats']
        stats_cur=stats['eval_xdata_stats']
        best_xgen_std=stats_cur['std']
        best_xgen=best_xgen*(np.float32(1e-7)+np.float32(np.sqrt(2))*np.tile(best_xgen_std,(1,1,2))) 
    if ('Mean' in normalize_str):
        stats=results_eval['stats']
        stats_cur=stats['eval_xdata_stats']
        best_xgen_mean=stats_cur['mean']
        best_xgen=best_xgen+best_xgen_mean
                                              
    # build complex-valued STFTs of reference and predicted
    npred=1
    n_input=129
    n_output=129
    test_xdata_logmag=10.0*np.log10(1e-5 + test_xdata[:,:,:129]**2 + test_xdata[:,:,129:]**2)
    test_xdata_c=test_xdata[:,:,:129]+np.complex64(1j)*test_xdata[:,:,129:]
    test_xdata_a=np.concatenate( [np.real(test_xdata_c),np.imag(test_xdata_c)],axis=2)
    
    magsq=test_mask[:,:,0:1]*((10**( best_xgen/10.0 )))
    best_xgen_mag=np.sqrt( magsq )
    test_xdata_mag=np.sqrt( test_mask[:,:,0:1]*((10**( test_xdata_logmag/10.0 ))) )
    best_xgen_c=best_xgen_mag
    best_xgen_c=best_xgen_c[:-npred,:,:]*np.exp(np.complex64(1j)*np.angle(test_xdata_c[npred:,:,:]))
    best_xgen_complete=np.concatenate( [np.real(best_xgen_c),np.imag(best_xgen_c)],axis=2)
   
    n_utt=best_xgen.shape[1]
    print "Reconstructing audio..."
    for uidx in range(n_utt):
        #printProgress(uidx+1, n_utt, prefix = 'Progress:', suffix = 'Complete', barLength = 50)
        Tcur=int(np.sum(test_mask[:,uidx,0]))
        test_xdata_cur = np.transpose(np.squeeze(test_xdata_a[0:Tcur,uidx,:]),(1,0))
        test_xdata_r=np.squeeze(util.iAugSTFT(test_xdata_cur,129,1,1))
        # append first frames of reference, otherwise reconstruction has artifacts:
        best_xgen_cur = np.transpose(np.squeeze(best_xgen_complete[0:Tcur-npred,uidx,:]),(1,0))
        best_xgen_cur=np.concatenate([test_xdata_cur[:,0:npred].astype(np.float32),best_xgen_cur],axis=1)
        best_xgen_r=np.squeeze(util.iAugSTFT(best_xgen_cur,129,1,1))
        if not os.path.exists(outputfolder):
            os.makedirs(outputfolder)
        util.wavwrite(outputfolder+('/est%d.wav'%uidx),np.float32(8000.0),best_xgen_r)
        util.wavwrite(outputfolder+('/ref%d.wav'%uidx),np.float32(8000.0),test_xdata_r)
    x, fs = util.wavread(sys.argv[1])
    #downmix to single channel
    x = np.mean(x, axis=-1)
    #perform stft
    S = util.stft_real(x, blockSize=blockSize, hopSize=hopSize)
    magnitude = np.abs(S).astype(np.float32)
    angle = np.angle(S).astype(np.float32)

    #initialize the model
    model = Model_fcn.ModelSingleStep(blockSize)
    #load the pretrained model
    model.load_state_dict(
        torch.load("Modelfcn.pt", map_location=lambda storage, loc: storage))
    #switch to eval mode
    model.eval()

    ###################################
    #Run your Model here to obtain a mask
    ###################################
    spectro_pred = model.process(magnitude)

    ###################################

    #perform reconstruction
    y = util.istft_real(spectro_pred * np.exp(1j * angle),
                        blockSize=blockSize,
                        hopSize=hopSize)

    #save the result
    util.wavwrite(sys.argv[2], y, fs)
예제 #4
0
        #initialize the model
        model = Model.ModelSingleStep(blockSize)

        #load the pretrained model
        checkpoint = torch.load("savedModel_RNN_best.pt",
                                map_location=lambda storage, loc: storage)
        model.load_state_dict(checkpoint['state_dict'])

        #switch to eval mode
        model.eval()
        # magnitude = torch.Tensor(magnitude).view(1, magnitude.shape[0], magnitude.shape[1])

        ###################################
        #Run your Model here to obtain a mask
        ###################################
        magnitude = torch.from_numpy(
            magnitude.reshape((1, magnitude.shape[0], magnitude.shape[1])))
        with torch.no_grad():
            magnitude_masked = model.forward(magnitude)
            magnitude_masked = magnitude_masked.numpy()
        ###################################

        #perform reconstruction
        y = util.istft_real(magnitude_masked * np.exp(1j * angle),
                            blockSize=blockSize,
                            hopSize=hopSize)

        #save the result
        util.wavwrite(output_path, y.T, fs)