def analysisf( fwav, shift=0.005, dftlen=4096, inf0txt_file=None, f0_min=60, f0_max=600, f0_file=None, f0_log=False, inf0bin_file=None, # input f0 file in binary spec_file=None, spec_order=None, # Mel-cepstral order for compressing the # spectrum (typically 59; None: no compression) pdd_file=None, pdd_order=None, # Mel-cepstral order for compressing PDD # spectrum (typically 59; None: no compression) nm_file=None, nm_nbbnds=None, # Number of mel-bands in the compressed mask # (None: no compression) verbose=1): wav, fs, enc = sp.wavread(fwav) if verbose > 0: print( 'PM Analysis (dur={:.3f}s, fs={}Hz, f0 in [{},{}]Hz, shift={}s, dftlen={})' .format(len(wav) / float(fs), fs, f0_min, f0_max, shift, dftlen)) f0s = None if inf0txt_file: f0s = np.loadtxt(inf0txt_file) # read input f0 file in float32 (ljuvela) if inf0bin_file: f0s = np.fromfile(inf0bin_file, dtype=np.float32) f0s = analysis_f0postproc(wav, fs, f0s, f0_min=f0_min, f0_max=f0_max, shift=shift, verbose=verbose) if f0_file: f0_values = f0s[:, 1] if verbose > 0: print(' Output F0 {} in: {}'.format(f0_values.shape, f0_file)) if f0_log: f0_values = np.log(f0_values) f0_values.astype(np.float32).tofile(f0_file) SPEC = None if spec_file: SPEC = analysis_spec(wav, fs, f0s, shift=shift, dftlen=dftlen, verbose=verbose) if not spec_order is None: SPEC = sp.spec2mcep(SPEC, sp.bark_alpha(fs), order=spec_order) if verbose > 0: print(' Output Spectrogram size={} in: {}'.format( SPEC.shape, spec_file)) SPEC.astype(np.float32).tofile(spec_file) PDD = None if pdd_file or nm_file: PDD = analysis_pdd(wav, fs, f0s, dftlen=dftlen, verbose=verbose) if pdd_file: if not pdd_order is None: # If asked, compress PDD PDD[PDD < 0.001] = 0.001 # From COVAREP PDD = sp.spec2mcep(PDD, sp.bark_alpha(fs), pdd_order) if verbose > 0: print(' Output PDD size={} in: {}'.format(PDD.shape, pdd_file)) PDD.astype(np.float32).tofile(pdd_file) NM = None if nm_file: NM = analysis_nm(wav, fs, f0s, PDD, verbose=verbose) # If asked, compress NM if nm_nbbnds: # If asked, compress the noise mask using a number of mel bands NM = sp.linbnd2fwbnd(NM, fs, dftlen, nm_nbbnds) if verbose > 0: print(' Output Noise Mask size={} in: {}'.format( NM.shape, nm_file)) NM.astype(np.float32).tofile(nm_file) if verbose > 2: plot_features(wav=wav, fs=fs, f0s=f0s, SPEC=SPEC, PDD=PDD, NM=NM)
def analysisf( fwav, shift=0.005, dftlen=4096, finf0txt=None, f0estimator='REAPER', f0_min=60, f0_max=600, ff0=None, f0_log=False, finf0bin=None, # input f0 file in binary fspec=None, spec_mceporder=None, # Mel-cepstral order for compressing the spectrogram (typically 59; None: no compression) spec_fwceporder=None, # Frequency warped cepstral order (very similar to above, just faster and less precise) (typically 59; None: no compression) spec_nbfwbnds=None, # Number of mel-bands in the compressed half log spectrogram (None: no compression) spec_nblinlogbnds=None, # Number of linear-bands in the compressed half log spectrogram (None: no compression) fpdd=None, pdd_mceporder=None, # Mel-cepstral order for compressing PDD spectrogram (typically 59; None: no compression) fnm=None, nm_nbfwbnds=None, # Number of mel-bands in the compressed noise mask (None: no compression) preproc_fs=None, # Resample the waveform preproc_hp=None, # Cut-off of high-pass filter (e.g. 20Hz) verbose=1): wav, fs, _ = sp.wavread(fwav) if verbose > 0: print( 'PML Analysis (dur={:.3f}s, fs={}Hz, f0 in [{},{}]Hz, shift={}s, dftlen={})' .format(len(wav) / float(fs), fs, f0_min, f0_max, shift, dftlen)) if (not preproc_fs is None) and (preproc_fs != fs): if verbose > 0: print( ' Resampling the waveform (new fs={}Hz)'.format(preproc_fs)) wav = sp.resample(wav, fs, preproc_fs, method=2, deterministic=True) fs = preproc_fs if not preproc_hp is None: if verbose > 0: print(' High-pass filter the waveform (cutt-off={}Hz)'.format( preproc_hp)) b, a = sig.butter(4, preproc_hp / (fs / 0.5), btype='high') wav = sig.filtfilt(b, a, wav) f0s = None if finf0txt: f0s = np.loadtxt(finf0txt) # read input f0 file in float32 (ljuvela) if finf0bin: f0s = np.fromfile(finf0bin, dtype=np.float32) f0s = analysis_f0postproc(wav, fs, f0s, f0_min=f0_min, f0_max=f0_max, shift=shift, f0estimator=f0estimator, verbose=verbose) if verbose > 2: f0sori = f0s.copy() if ff0: f0_values = f0s[:, 1] if verbose > 0: print(' Output F0 {} in: {}'.format(f0_values.shape, ff0)) if f0_log: f0_values = np.log(f0_values) if os.path.dirname(ff0) != '' and (not os.path.isdir( os.path.dirname(ff0))): os.mkdir(os.path.dirname(ff0)) f0_values.astype(np.float32).tofile(ff0) SPEC = None if fspec: SPEC = analysis_spec(wav, fs, f0s, shift=shift, dftlen=dftlen, verbose=verbose) if verbose > 2: SPECori = SPEC.copy() if not spec_mceporder is None: # pragma: no cover # Cannot test this because it needs SPTK SPEC = sp.spec2mcep(SPEC, sp.bark_alpha(fs), order=spec_mceporder) if not spec_fwceporder is None: SPEC = sp.loghspec2fwcep(np.log(abs(SPEC)), fs, order=spec_fwceporder) if not spec_nbfwbnds is None: SPEC = sp.linbnd2fwbnd(np.log(abs(SPEC)), fs, dftlen, spec_nbfwbnds) if not spec_nblinlogbnds is None: SPEC = np.log(abs(SPEC)) if verbose > 0: print(' Output Spectrogram size={} in: {}'.format( SPEC.shape, fspec)) if os.path.dirname(fspec) != '' and (not os.path.isdir( os.path.dirname(fspec))): os.mkdir(os.path.dirname(fspec)) SPEC.astype(np.float32).tofile(fspec) PDD = None if fpdd or fnm: PDD = analysis_pdd(wav, fs, f0s, dftlen=dftlen, verbose=verbose) if verbose > 2: PDDori = PDD.copy() if fpdd: if not pdd_mceporder is None: # pragma: no cover # Cannot test this because it needs SPTK # If asked, compress PDD PDD[PDD < 0.001] = 0.001 # From COVAREP PDD = sp.spec2mcep(PDD, sp.bark_alpha(fs), pdd_mceporder) if verbose > 0: print(' Output PDD size={} in: {}'.format(PDD.shape, fpdd)) if os.path.dirname(fpdd) != '' and (not os.path.isdir( os.path.dirname(fpdd))): os.mkdir(os.path.dirname(fpdd)) PDD.astype(np.float32).tofile(fpdd) NM = None if verbose > 2: NMori = None if fnm: NM = analysis_nm(wav, fs, f0s, PDD, verbose=verbose) if verbose > 2: NMori = NM.copy() # If asked, compress NM if nm_nbfwbnds: # If asked, compress the noise mask using a number of mel bands NM = sp.linbnd2fwbnd(NM, fs, dftlen, nm_nbfwbnds) if verbose > 0: print(' Output Noise Mask size={} in: {}'.format(NM.shape, fnm)) if os.path.dirname(fnm) != '' and (not os.path.isdir( os.path.dirname(fnm))): os.mkdir(os.path.dirname(fnm)) NM.astype(np.float32).tofile(fnm) if verbose > 2: plot_features(wav=wav, fs=fs, f0s=f0sori, SPEC=SPECori, PDD=PDDori, NM=NMori) # pragma: no cover
def test_smoke_analysis_synthesis(cls): fname = filenames[filename_totest] # Just with one file for smoke test f0_min = 75 f0_max = 800 shift = 0.010 verbose = 1 dftlen = 512 from lib import pulsemodel import sigproc as sp wav, fs, _ = sp.wavread('test/' + fname) f0s, SPEC, PDD, NM = pulsemodel.analysis(wav, fs) _ = pulsemodel.analysis_f0postproc(wav, fs, f0s=np.zeros(f0s[:, 1].shape), f0_min=f0_min, f0_max=f0_max, shift=shift, verbose=verbose) _ = pulsemodel.analysis_f0postproc(wav, fs, f0s=f0s[:, 1], f0_min=f0_min, f0_max=f0_max, shift=shift, verbose=verbose) nonunif0s = f0s.copy() nonunif0s[:, 0] = np.random.rand( f0s.shape[0]) * (f0s[-1, 0] - f0s[0, 0]) + f0s[0, 0] nonunif0s[:, 0] = np.sort(nonunif0s[:, 0]) _ = pulsemodel.analysis_f0postproc(wav, fs, f0s=nonunif0s, f0_min=f0_min, f0_max=f0_max, shift=shift, verbose=verbose) f0s = pulsemodel.analysis_f0postproc(wav, fs, f0_min=f0_min, f0_max=f0_max, shift=shift, verbose=verbose) f0_min = 60 f0_max = 600 shift = 0.005 dftlen = 4096 f0s, SPEC, PDD, NM = pulsemodel.analysis(wav, fs, f0s=f0s, f0_min=f0_min, f0_max=f0_max, shift=shift, dftlen=dftlen, verbose=verbose) _ = pulsemodel.synthesize(fs, f0s, SPEC, wavlen=len(wav)) _ = pulsemodel.synthesize(fs, f0s, SPEC, NM=NM, wavlen=len(wav)) NM = PDD.copy() NM[NM > 0.75] = 1 NM[NM <= 0.75] = 0 _ = pulsemodel.synthesize(fs, f0s, SPEC, NM=NM, wavlen=len(wav)) _ = pulsemodel.synthesize(fs, f0s, SPEC, NM=NM, wavlen=len(wav), ener_multT0=True, nm_cont=True, nm_lowpasswinlen=13, hp_f0coef=0.25, antipreechohwindur=0.002, pp_f0_rmsteps=True, pp_f0_smooth=0.100, pp_atten1stharminsilences=-25, verbose=verbose)
def analysisf(fwav , shift=0.005 , dftlen=4096 , inf0txt_file=None, f0_min=60, f0_max=600, f0_file=None , spec_file=None, spec_order=None # Mel-cepstral order for compressing the # spectrum (typically 59; None: no compression) , pdd_file=None, pdd_order=None # Mel-cepstral order for compressing PDD # spectrum (typically 59; None: no compression) , nm_file=None, nm_nbbnds=None # Number of mel-bands in the compressed mask # (None: no compression) , verbose=1): wav, fs, enc = sp.wavread(fwav) if verbose>0: print('PM Analysis (dur={:.3f}s, fs={}Hz, f0 in [{},{}]Hz, shift={}s, dftlen={})'.format(len(wav)/float(fs), fs, f0_min, f0_max, shift, dftlen)) f0s = None if inf0txt_file: f0s = np.loadtxt(inf0txt_file) f0s = analysis_f0postproc(wav, fs, f0s, f0_min=f0_min, f0_max=f0_max, shift=shift, verbose=verbose) if f0_file: if verbose>0: print(' Output F0 {} in: {}'.format(f0s[:,1].shape, f0_file)) f0s[:,1].astype(np.float32).tofile(f0_file) SPEC = None if spec_file: SPEC = analysis_spec(wav, fs, f0s, shift=shift, dftlen=dftlen, verbose=verbose) if not spec_order is None: SPEC = sp.spec2mcep(SPEC, sp.bark_alpha(fs), order=spec_order) if verbose>0: print(' Output Spectrogram size={} in: {}'.format(SPEC.shape, spec_file)) SPEC.astype(np.float32).tofile(spec_file) PDD = None if pdd_file or nm_file: PDD = analysis_pdd(wav, fs, f0s, dftlen=dftlen, verbose=verbose) if pdd_file: if not pdd_order is None: # If asked, compress PDD PDD[PDD<0.001] = 0.001 # From COVAREP PDD = sp.spec2mcep(PDD, sp.bark_alpha(fs), pdd_order) if verbose>0: print(' Output PDD size={} in: {}'.format(PDD.shape, pdd_file)) PDD.astype(np.float32).tofile(pdd_file) NM = None if nm_file: NM = analysis_nm(wav, fs, f0s, PDD, verbose=verbose) # If asked, compress NM if nm_nbbnds: # If asked, compress the noise mask using a number of mel bands NM = sp.linbnd2fwbnd(NM, fs, dftlen, nm_nbbnds) # Need to force to binary values because we don't use ambiguous values, # we use the binary version at synthesis time. NM[NM>=0.5] = 1.0 NM[NM<0.5] = 0.0 if verbose>0: print(' Output Noise Mask size={} in: {}'.format(NM.shape, nm_file)) NM.astype(np.float32).tofile(nm_file) if verbose>2: plot_features(wav=wav, fs=fs, f0s=f0s, SPEC=SPEC, PDD=PDD, NM=NM)
#!/usr/bin/python ''' Copyright(C) 2016 Engineering Department, University of Cambridge, UK. License Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. Author Gilles Degottex <*****@*****.**> ''' import sys import sigproc as sp if __name__ == "__main__": print('Normalise {}'.format(sys.argv[1])) wav, fs, enc = sp.wavread(sys.argv[1]) wav, meta = lib.sigproc.interfaces.sv56demo(wav, fs) sp.wavwrite(sys.argv[1], wav, fs, enc)