def analysisf( fwav, shift=0.005, dftlen=4096, inf0txt_file=None, f0_min=60, f0_max=600, f0_file=None, f0_log=False, inf0bin_file=None, # input f0 file in binary spec_file=None, spec_order=None, # Mel-cepstral order for compressing the # spectrum (typically 59; None: no compression) pdd_file=None, pdd_order=None, # Mel-cepstral order for compressing PDD # spectrum (typically 59; None: no compression) nm_file=None, nm_nbbnds=None, # Number of mel-bands in the compressed mask # (None: no compression) verbose=1): wav, fs, enc = sp.wavread(fwav) if verbose > 0: print( 'PM Analysis (dur={:.3f}s, fs={}Hz, f0 in [{},{}]Hz, shift={}s, dftlen={})' .format(len(wav) / float(fs), fs, f0_min, f0_max, shift, dftlen)) f0s = None if inf0txt_file: f0s = np.loadtxt(inf0txt_file) # read input f0 file in float32 (ljuvela) if inf0bin_file: f0s = np.fromfile(inf0bin_file, dtype=np.float32) f0s = analysis_f0postproc(wav, fs, f0s, f0_min=f0_min, f0_max=f0_max, shift=shift, verbose=verbose) if f0_file: f0_values = f0s[:, 1] if verbose > 0: print(' Output F0 {} in: {}'.format(f0_values.shape, f0_file)) if f0_log: f0_values = np.log(f0_values) f0_values.astype(np.float32).tofile(f0_file) SPEC = None if spec_file: SPEC = analysis_spec(wav, fs, f0s, shift=shift, dftlen=dftlen, verbose=verbose) if not spec_order is None: SPEC = sp.spec2mcep(SPEC, sp.bark_alpha(fs), order=spec_order) if verbose > 0: print(' Output Spectrogram size={} in: {}'.format( SPEC.shape, spec_file)) SPEC.astype(np.float32).tofile(spec_file) PDD = None if pdd_file or nm_file: PDD = analysis_pdd(wav, fs, f0s, dftlen=dftlen, verbose=verbose) if pdd_file: if not pdd_order is None: # If asked, compress PDD PDD[PDD < 0.001] = 0.001 # From COVAREP PDD = sp.spec2mcep(PDD, sp.bark_alpha(fs), pdd_order) if verbose > 0: print(' Output PDD size={} in: {}'.format(PDD.shape, pdd_file)) PDD.astype(np.float32).tofile(pdd_file) NM = None if nm_file: NM = analysis_nm(wav, fs, f0s, PDD, verbose=verbose) # If asked, compress NM if nm_nbbnds: # If asked, compress the noise mask using a number of mel bands NM = sp.linbnd2fwbnd(NM, fs, dftlen, nm_nbbnds) if verbose > 0: print(' Output Noise Mask size={} in: {}'.format( NM.shape, nm_file)) NM.astype(np.float32).tofile(nm_file) if verbose > 2: plot_features(wav=wav, fs=fs, f0s=f0s, SPEC=SPEC, PDD=PDD, NM=NM)
def analysisf( fwav, shift=0.005, dftlen=4096, finf0txt=None, f0estimator='REAPER', f0_min=60, f0_max=600, ff0=None, f0_log=False, finf0bin=None, # input f0 file in binary fspec=None, spec_mceporder=None, # Mel-cepstral order for compressing the spectrogram (typically 59; None: no compression) spec_fwceporder=None, # Frequency warped cepstral order (very similar to above, just faster and less precise) (typically 59; None: no compression) spec_nbfwbnds=None, # Number of mel-bands in the compressed half log spectrogram (None: no compression) spec_nblinlogbnds=None, # Number of linear-bands in the compressed half log spectrogram (None: no compression) fpdd=None, pdd_mceporder=None, # Mel-cepstral order for compressing PDD spectrogram (typically 59; None: no compression) fnm=None, nm_nbfwbnds=None, # Number of mel-bands in the compressed noise mask (None: no compression) preproc_fs=None, # Resample the waveform preproc_hp=None, # Cut-off of high-pass filter (e.g. 20Hz) verbose=1): wav, fs, _ = sp.wavread(fwav) if verbose > 0: print( 'PML Analysis (dur={:.3f}s, fs={}Hz, f0 in [{},{}]Hz, shift={}s, dftlen={})' .format(len(wav) / float(fs), fs, f0_min, f0_max, shift, dftlen)) if (not preproc_fs is None) and (preproc_fs != fs): if verbose > 0: print( ' Resampling the waveform (new fs={}Hz)'.format(preproc_fs)) wav = sp.resample(wav, fs, preproc_fs, method=2, deterministic=True) fs = preproc_fs if not preproc_hp is None: if verbose > 0: print(' High-pass filter the waveform (cutt-off={}Hz)'.format( preproc_hp)) b, a = sig.butter(4, preproc_hp / (fs / 0.5), btype='high') wav = sig.filtfilt(b, a, wav) f0s = None if finf0txt: f0s = np.loadtxt(finf0txt) # read input f0 file in float32 (ljuvela) if finf0bin: f0s = np.fromfile(finf0bin, dtype=np.float32) f0s = analysis_f0postproc(wav, fs, f0s, f0_min=f0_min, f0_max=f0_max, shift=shift, f0estimator=f0estimator, verbose=verbose) if verbose > 2: f0sori = f0s.copy() if ff0: f0_values = f0s[:, 1] if verbose > 0: print(' Output F0 {} in: {}'.format(f0_values.shape, ff0)) if f0_log: f0_values = np.log(f0_values) if os.path.dirname(ff0) != '' and (not os.path.isdir( os.path.dirname(ff0))): os.mkdir(os.path.dirname(ff0)) f0_values.astype(np.float32).tofile(ff0) SPEC = None if fspec: SPEC = analysis_spec(wav, fs, f0s, shift=shift, dftlen=dftlen, verbose=verbose) if verbose > 2: SPECori = SPEC.copy() if not spec_mceporder is None: # pragma: no cover # Cannot test this because it needs SPTK SPEC = sp.spec2mcep(SPEC, sp.bark_alpha(fs), order=spec_mceporder) if not spec_fwceporder is None: SPEC = sp.loghspec2fwcep(np.log(abs(SPEC)), fs, order=spec_fwceporder) if not spec_nbfwbnds is None: SPEC = sp.linbnd2fwbnd(np.log(abs(SPEC)), fs, dftlen, spec_nbfwbnds) if not spec_nblinlogbnds is None: SPEC = np.log(abs(SPEC)) if verbose > 0: print(' Output Spectrogram size={} in: {}'.format( SPEC.shape, fspec)) if os.path.dirname(fspec) != '' and (not os.path.isdir( os.path.dirname(fspec))): os.mkdir(os.path.dirname(fspec)) SPEC.astype(np.float32).tofile(fspec) PDD = None if fpdd or fnm: PDD = analysis_pdd(wav, fs, f0s, dftlen=dftlen, verbose=verbose) if verbose > 2: PDDori = PDD.copy() if fpdd: if not pdd_mceporder is None: # pragma: no cover # Cannot test this because it needs SPTK # If asked, compress PDD PDD[PDD < 0.001] = 0.001 # From COVAREP PDD = sp.spec2mcep(PDD, sp.bark_alpha(fs), pdd_mceporder) if verbose > 0: print(' Output PDD size={} in: {}'.format(PDD.shape, fpdd)) if os.path.dirname(fpdd) != '' and (not os.path.isdir( os.path.dirname(fpdd))): os.mkdir(os.path.dirname(fpdd)) PDD.astype(np.float32).tofile(fpdd) NM = None if verbose > 2: NMori = None if fnm: NM = analysis_nm(wav, fs, f0s, PDD, verbose=verbose) if verbose > 2: NMori = NM.copy() # If asked, compress NM if nm_nbfwbnds: # If asked, compress the noise mask using a number of mel bands NM = sp.linbnd2fwbnd(NM, fs, dftlen, nm_nbfwbnds) if verbose > 0: print(' Output Noise Mask size={} in: {}'.format(NM.shape, fnm)) if os.path.dirname(fnm) != '' and (not os.path.isdir( os.path.dirname(fnm))): os.mkdir(os.path.dirname(fnm)) NM.astype(np.float32).tofile(fnm) if verbose > 2: plot_features(wav=wav, fs=fs, f0s=f0sori, SPEC=SPECori, PDD=PDDori, NM=NMori) # pragma: no cover
sys.path.append('/home/degottex/Research/CUED/Code') import sigproc as sp if __name__ == "__main__" : argpar = argparse.ArgumentParser() argpar.add_argument("specfile", default=None, help="Input spectrum file") argpar.add_argument("--dftlen", default=4096, type=int, help="DFT size for the input spectrum") argpar.add_argument("--fs", default=16000, type=int, help="Sampling frequency[Hz]") argpar.add_argument("--nbbands", type=int, help="Number of bands in the warped spectral representation") argpar.add_argument("bndfwspecfile", default=None, help="Output frequency warped spectrum file") args, unknown = argpar.parse_known_args() SPEC = np.fromfile(args.specfile, dtype=np.float32) SPEC = SPEC.reshape((-1, int(args.dftlen / 2)+1)) FWSPEC = sp.linbnd2fwbnd(np.log(SPEC), args.fs, args.dftlen, args.nbbands) FWSPEC.astype('float32').tofile(args.bndfwspecfile) if 0: shift = 0.005 SPECR = np.exp(sp.fwbnd2linbnd(FWSPEC, args.fs, args.dftlen)) import matplotlib.pyplot as plt plt.ion() ts = shift*np.arange(SPEC.shape[0]) plt.subplot(211) plt.imshow(sp.mag2db(SPEC).T, origin='lower', aspect='auto', interpolation='none', cmap='jet', extent=[0.0, ts[-1], 0.0, args.fs/2]) plt.subplot(212) plt.imshow(sp.mag2db(SPECR).T, origin='lower', aspect='auto', interpolation='none', cmap='jet', extent=[0.0, ts[-1], 0.0, args.fs/2]) from IPython.core.debugger import Pdb; Pdb().set_trace()
def analysisf(fwav , shift=0.005 , dftlen=4096 , inf0txt_file=None, f0_min=60, f0_max=600, f0_file=None , spec_file=None, spec_order=None # Mel-cepstral order for compressing the # spectrum (typically 59; None: no compression) , pdd_file=None, pdd_order=None # Mel-cepstral order for compressing PDD # spectrum (typically 59; None: no compression) , nm_file=None, nm_nbbnds=None # Number of mel-bands in the compressed mask # (None: no compression) , verbose=1): wav, fs, enc = sp.wavread(fwav) if verbose>0: print('PM Analysis (dur={:.3f}s, fs={}Hz, f0 in [{},{}]Hz, shift={}s, dftlen={})'.format(len(wav)/float(fs), fs, f0_min, f0_max, shift, dftlen)) f0s = None if inf0txt_file: f0s = np.loadtxt(inf0txt_file) f0s = analysis_f0postproc(wav, fs, f0s, f0_min=f0_min, f0_max=f0_max, shift=shift, verbose=verbose) if f0_file: if verbose>0: print(' Output F0 {} in: {}'.format(f0s[:,1].shape, f0_file)) f0s[:,1].astype(np.float32).tofile(f0_file) SPEC = None if spec_file: SPEC = analysis_spec(wav, fs, f0s, shift=shift, dftlen=dftlen, verbose=verbose) if not spec_order is None: SPEC = sp.spec2mcep(SPEC, sp.bark_alpha(fs), order=spec_order) if verbose>0: print(' Output Spectrogram size={} in: {}'.format(SPEC.shape, spec_file)) SPEC.astype(np.float32).tofile(spec_file) PDD = None if pdd_file or nm_file: PDD = analysis_pdd(wav, fs, f0s, dftlen=dftlen, verbose=verbose) if pdd_file: if not pdd_order is None: # If asked, compress PDD PDD[PDD<0.001] = 0.001 # From COVAREP PDD = sp.spec2mcep(PDD, sp.bark_alpha(fs), pdd_order) if verbose>0: print(' Output PDD size={} in: {}'.format(PDD.shape, pdd_file)) PDD.astype(np.float32).tofile(pdd_file) NM = None if nm_file: NM = analysis_nm(wav, fs, f0s, PDD, verbose=verbose) # If asked, compress NM if nm_nbbnds: # If asked, compress the noise mask using a number of mel bands NM = sp.linbnd2fwbnd(NM, fs, dftlen, nm_nbbnds) # Need to force to binary values because we don't use ambiguous values, # we use the binary version at synthesis time. NM[NM>=0.5] = 1.0 NM[NM<0.5] = 0.0 if verbose>0: print(' Output Noise Mask size={} in: {}'.format(NM.shape, nm_file)) NM.astype(np.float32).tofile(nm_file) if verbose>2: plot_features(wav=wav, fs=fs, f0s=f0s, SPEC=SPEC, PDD=PDD, NM=NM)