示例#1
0
def analysisf(
    fwav,
    shift=0.005,
    dftlen=4096,
    inf0txt_file=None,
    f0_min=60,
    f0_max=600,
    f0_file=None,
    f0_log=False,
    inf0bin_file=None,  # input f0 file in binary
    spec_file=None,
    spec_order=None,  # Mel-cepstral order for compressing the 
    # spectrum (typically 59; None: no compression)
    pdd_file=None,
    pdd_order=None,  # Mel-cepstral order for compressing PDD
    # spectrum (typically 59; None: no compression)
    nm_file=None,
    nm_nbbnds=None,  # Number of mel-bands in the compressed mask
    # (None: no compression)
    verbose=1):

    wav, fs, enc = sp.wavread(fwav)

    if verbose > 0:
        print(
            'PM Analysis (dur={:.3f}s, fs={}Hz, f0 in [{},{}]Hz, shift={}s, dftlen={})'
            .format(len(wav) / float(fs), fs, f0_min, f0_max, shift, dftlen))

    f0s = None
    if inf0txt_file:
        f0s = np.loadtxt(inf0txt_file)

    # read input f0 file in float32 (ljuvela)
    if inf0bin_file:
        f0s = np.fromfile(inf0bin_file, dtype=np.float32)

    f0s = analysis_f0postproc(wav,
                              fs,
                              f0s,
                              f0_min=f0_min,
                              f0_max=f0_max,
                              shift=shift,
                              verbose=verbose)

    if f0_file:
        f0_values = f0s[:, 1]
        if verbose > 0:
            print('    Output F0 {} in: {}'.format(f0_values.shape, f0_file))
        if f0_log: f0_values = np.log(f0_values)
        f0_values.astype(np.float32).tofile(f0_file)

    SPEC = None
    if spec_file:
        SPEC = analysis_spec(wav,
                             fs,
                             f0s,
                             shift=shift,
                             dftlen=dftlen,
                             verbose=verbose)
        if not spec_order is None:
            SPEC = sp.spec2mcep(SPEC, sp.bark_alpha(fs), order=spec_order)
        if verbose > 0:
            print('    Output Spectrogram size={} in: {}'.format(
                SPEC.shape, spec_file))
        SPEC.astype(np.float32).tofile(spec_file)

    PDD = None
    if pdd_file or nm_file:
        PDD = analysis_pdd(wav, fs, f0s, dftlen=dftlen, verbose=verbose)

    if pdd_file:
        if not pdd_order is None:
            # If asked, compress PDD
            PDD[PDD < 0.001] = 0.001  # From COVAREP
            PDD = sp.spec2mcep(PDD, sp.bark_alpha(fs), pdd_order)
        if verbose > 0:
            print('    Output PDD size={} in: {}'.format(PDD.shape, pdd_file))
        PDD.astype(np.float32).tofile(pdd_file)

    NM = None
    if nm_file:
        NM = analysis_nm(wav, fs, f0s, PDD, verbose=verbose)
        # If asked, compress NM
        if nm_nbbnds:
            # If asked, compress the noise mask using a number of mel bands
            NM = sp.linbnd2fwbnd(NM, fs, dftlen, nm_nbbnds)
        if verbose > 0:
            print('    Output Noise Mask size={} in: {}'.format(
                NM.shape, nm_file))
        NM.astype(np.float32).tofile(nm_file)

    if verbose > 2:
        plot_features(wav=wav, fs=fs, f0s=f0s, SPEC=SPEC, PDD=PDD, NM=NM)
示例#2
0
def analysisf(
        fwav,
        shift=0.005,
        dftlen=4096,
        finf0txt=None,
        f0estimator='REAPER',
        f0_min=60,
        f0_max=600,
        ff0=None,
        f0_log=False,
        finf0bin=None,  # input f0 file in binary
        fspec=None,
        spec_mceporder=None,  # Mel-cepstral order for compressing the spectrogram (typically 59; None: no compression)
        spec_fwceporder=None,  # Frequency warped cepstral order (very similar to above, just faster and less precise) (typically 59; None: no compression)
        spec_nbfwbnds=None,  # Number of mel-bands in the compressed half log spectrogram (None: no compression)
        spec_nblinlogbnds=None,  # Number of linear-bands in the compressed half log spectrogram (None: no compression)
        fpdd=None,
        pdd_mceporder=None,  # Mel-cepstral order for compressing PDD spectrogram (typically 59; None: no compression)
        fnm=None,
        nm_nbfwbnds=None,  # Number of mel-bands in the compressed noise mask (None: no compression)
        preproc_fs=None,  # Resample the waveform
        preproc_hp=None,  # Cut-off of high-pass filter (e.g. 20Hz)
        verbose=1):

    wav, fs, _ = sp.wavread(fwav)

    if verbose > 0:
        print(
            'PML Analysis (dur={:.3f}s, fs={}Hz, f0 in [{},{}]Hz, shift={}s, dftlen={})'
            .format(len(wav) / float(fs), fs, f0_min, f0_max, shift, dftlen))

    if (not preproc_fs is None) and (preproc_fs != fs):
        if verbose > 0:
            print(
                '    Resampling the waveform (new fs={}Hz)'.format(preproc_fs))
        wav = sp.resample(wav, fs, preproc_fs, method=2, deterministic=True)
        fs = preproc_fs

    if not preproc_hp is None:
        if verbose > 0:
            print('    High-pass filter the waveform (cutt-off={}Hz)'.format(
                preproc_hp))
        b, a = sig.butter(4, preproc_hp / (fs / 0.5), btype='high')
        wav = sig.filtfilt(b, a, wav)

    f0s = None
    if finf0txt:
        f0s = np.loadtxt(finf0txt)

    # read input f0 file in float32 (ljuvela)
    if finf0bin:
        f0s = np.fromfile(finf0bin, dtype=np.float32)

    f0s = analysis_f0postproc(wav,
                              fs,
                              f0s,
                              f0_min=f0_min,
                              f0_max=f0_max,
                              shift=shift,
                              f0estimator=f0estimator,
                              verbose=verbose)
    if verbose > 2: f0sori = f0s.copy()

    if ff0:
        f0_values = f0s[:, 1]
        if verbose > 0:
            print('    Output F0 {} in: {}'.format(f0_values.shape, ff0))
        if f0_log: f0_values = np.log(f0_values)
        if os.path.dirname(ff0) != '' and (not os.path.isdir(
                os.path.dirname(ff0))):
            os.mkdir(os.path.dirname(ff0))
        f0_values.astype(np.float32).tofile(ff0)

    SPEC = None
    if fspec:
        SPEC = analysis_spec(wav,
                             fs,
                             f0s,
                             shift=shift,
                             dftlen=dftlen,
                             verbose=verbose)
        if verbose > 2: SPECori = SPEC.copy()
        if not spec_mceporder is None:  # pragma: no cover
            # Cannot test this because it needs SPTK
            SPEC = sp.spec2mcep(SPEC, sp.bark_alpha(fs), order=spec_mceporder)
        if not spec_fwceporder is None:
            SPEC = sp.loghspec2fwcep(np.log(abs(SPEC)),
                                     fs,
                                     order=spec_fwceporder)
        if not spec_nbfwbnds is None:
            SPEC = sp.linbnd2fwbnd(np.log(abs(SPEC)), fs, dftlen,
                                   spec_nbfwbnds)
        if not spec_nblinlogbnds is None:
            SPEC = np.log(abs(SPEC))
        if verbose > 0:
            print('    Output Spectrogram size={} in: {}'.format(
                SPEC.shape, fspec))
        if os.path.dirname(fspec) != '' and (not os.path.isdir(
                os.path.dirname(fspec))):
            os.mkdir(os.path.dirname(fspec))
        SPEC.astype(np.float32).tofile(fspec)

    PDD = None
    if fpdd or fnm:
        PDD = analysis_pdd(wav, fs, f0s, dftlen=dftlen, verbose=verbose)
        if verbose > 2: PDDori = PDD.copy()

    if fpdd:
        if not pdd_mceporder is None:  # pragma: no cover
            # Cannot test this because it needs SPTK
            # If asked, compress PDD
            PDD[PDD < 0.001] = 0.001  # From COVAREP
            PDD = sp.spec2mcep(PDD, sp.bark_alpha(fs), pdd_mceporder)
        if verbose > 0:
            print('    Output PDD size={} in: {}'.format(PDD.shape, fpdd))
        if os.path.dirname(fpdd) != '' and (not os.path.isdir(
                os.path.dirname(fpdd))):
            os.mkdir(os.path.dirname(fpdd))
        PDD.astype(np.float32).tofile(fpdd)

    NM = None
    if verbose > 2: NMori = None
    if fnm:
        NM = analysis_nm(wav, fs, f0s, PDD, verbose=verbose)
        if verbose > 2: NMori = NM.copy()
        # If asked, compress NM
        if nm_nbfwbnds:
            # If asked, compress the noise mask using a number of mel bands
            NM = sp.linbnd2fwbnd(NM, fs, dftlen, nm_nbfwbnds)
        if verbose > 0:
            print('    Output Noise Mask size={} in: {}'.format(NM.shape, fnm))
        if os.path.dirname(fnm) != '' and (not os.path.isdir(
                os.path.dirname(fnm))):
            os.mkdir(os.path.dirname(fnm))
        NM.astype(np.float32).tofile(fnm)

    if verbose > 2:
        plot_features(wav=wav,
                      fs=fs,
                      f0s=f0sori,
                      SPEC=SPECori,
                      PDD=PDDori,
                      NM=NMori)  # pragma: no cover
示例#3
0
    def test_smoke_analysis_synthesis(cls):
        fname = filenames[filename_totest]  # Just with one file for smoke test

        f0_min = 75
        f0_max = 800
        shift = 0.010
        verbose = 1
        dftlen = 512

        from lib import pulsemodel
        import sigproc as sp

        wav, fs, _ = sp.wavread('test/' + fname)

        f0s, SPEC, PDD, NM = pulsemodel.analysis(wav, fs)

        _ = pulsemodel.analysis_f0postproc(wav,
                                           fs,
                                           f0s=np.zeros(f0s[:, 1].shape),
                                           f0_min=f0_min,
                                           f0_max=f0_max,
                                           shift=shift,
                                           verbose=verbose)

        _ = pulsemodel.analysis_f0postproc(wav,
                                           fs,
                                           f0s=f0s[:, 1],
                                           f0_min=f0_min,
                                           f0_max=f0_max,
                                           shift=shift,
                                           verbose=verbose)

        nonunif0s = f0s.copy()
        nonunif0s[:, 0] = np.random.rand(
            f0s.shape[0]) * (f0s[-1, 0] - f0s[0, 0]) + f0s[0, 0]
        nonunif0s[:, 0] = np.sort(nonunif0s[:, 0])
        _ = pulsemodel.analysis_f0postproc(wav,
                                           fs,
                                           f0s=nonunif0s,
                                           f0_min=f0_min,
                                           f0_max=f0_max,
                                           shift=shift,
                                           verbose=verbose)

        f0s = pulsemodel.analysis_f0postproc(wav,
                                             fs,
                                             f0_min=f0_min,
                                             f0_max=f0_max,
                                             shift=shift,
                                             verbose=verbose)

        f0_min = 60
        f0_max = 600
        shift = 0.005
        dftlen = 4096
        f0s, SPEC, PDD, NM = pulsemodel.analysis(wav,
                                                 fs,
                                                 f0s=f0s,
                                                 f0_min=f0_min,
                                                 f0_max=f0_max,
                                                 shift=shift,
                                                 dftlen=dftlen,
                                                 verbose=verbose)

        _ = pulsemodel.synthesize(fs, f0s, SPEC, wavlen=len(wav))

        _ = pulsemodel.synthesize(fs, f0s, SPEC, NM=NM, wavlen=len(wav))

        NM = PDD.copy()
        NM[NM > 0.75] = 1
        NM[NM <= 0.75] = 0
        _ = pulsemodel.synthesize(fs, f0s, SPEC, NM=NM, wavlen=len(wav))

        _ = pulsemodel.synthesize(fs,
                                  f0s,
                                  SPEC,
                                  NM=NM,
                                  wavlen=len(wav),
                                  ener_multT0=True,
                                  nm_cont=True,
                                  nm_lowpasswinlen=13,
                                  hp_f0coef=0.25,
                                  antipreechohwindur=0.002,
                                  pp_f0_rmsteps=True,
                                  pp_f0_smooth=0.100,
                                  pp_atten1stharminsilences=-25,
                                  verbose=verbose)
示例#4
0
def analysisf(fwav
    , shift=0.005
    , dftlen=4096
    , inf0txt_file=None, f0_min=60, f0_max=600, f0_file=None
    , spec_file=None, spec_order=None # Mel-cepstral order for compressing the 
                            # spectrum (typically 59; None: no compression)
    , pdd_file=None, pdd_order=None   # Mel-cepstral order for compressing PDD
                            # spectrum (typically 59; None: no compression)
    , nm_file=None, nm_nbbnds=None  # Number of mel-bands in the compressed mask
                            # (None: no compression)
    , verbose=1):

    wav, fs, enc = sp.wavread(fwav)

    if verbose>0: print('PM Analysis (dur={:.3f}s, fs={}Hz, f0 in [{},{}]Hz, shift={}s, dftlen={})'.format(len(wav)/float(fs), fs, f0_min, f0_max, shift, dftlen))

    f0s = None
    if inf0txt_file:
        f0s = np.loadtxt(inf0txt_file)

    f0s = analysis_f0postproc(wav, fs, f0s, f0_min=f0_min, f0_max=f0_max, shift=shift, verbose=verbose)

    if f0_file:
        if verbose>0: print('    Output F0 {} in: {}'.format(f0s[:,1].shape, f0_file))
        f0s[:,1].astype(np.float32).tofile(f0_file)

    SPEC = None
    if spec_file:
        SPEC = analysis_spec(wav, fs, f0s, shift=shift, dftlen=dftlen, verbose=verbose)
        if not spec_order is None:
            SPEC = sp.spec2mcep(SPEC, sp.bark_alpha(fs), order=spec_order)
        if verbose>0: print('    Output Spectrogram size={} in: {}'.format(SPEC.shape, spec_file))
        SPEC.astype(np.float32).tofile(spec_file)

    PDD = None
    if pdd_file or nm_file:
        PDD = analysis_pdd(wav, fs, f0s, dftlen=dftlen, verbose=verbose)

    if pdd_file:
        if not pdd_order is None:
            # If asked, compress PDD
            PDD[PDD<0.001] = 0.001 # From COVAREP
            PDD = sp.spec2mcep(PDD, sp.bark_alpha(fs), pdd_order)
        if verbose>0: print('    Output PDD size={} in: {}'.format(PDD.shape, pdd_file))
        PDD.astype(np.float32).tofile(pdd_file)

    NM = None
    if nm_file:
        NM = analysis_nm(wav, fs, f0s, PDD, verbose=verbose)
        # If asked, compress NM
        if nm_nbbnds:
            # If asked, compress the noise mask using a number of mel bands
            NM = sp.linbnd2fwbnd(NM, fs, dftlen, nm_nbbnds)
            # Need to force to binary values because we don't use ambiguous values,
            # we use the binary version at synthesis time.
            NM[NM>=0.5] = 1.0
            NM[NM<0.5] = 0.0
        if verbose>0: print('    Output Noise Mask size={} in: {}'.format(NM.shape, nm_file))
        NM.astype(np.float32).tofile(nm_file)

    if verbose>2:
        plot_features(wav=wav, fs=fs, f0s=f0s, SPEC=SPEC, PDD=PDD, NM=NM)
示例#5
0
#!/usr/bin/python
'''
Copyright(C) 2016 Engineering Department, University of Cambridge, UK.

License
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

     http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.

Author
    Gilles Degottex <*****@*****.**>
'''

import sys

import sigproc as sp

if __name__ == "__main__":
    print('Normalise {}'.format(sys.argv[1]))
    wav, fs, enc = sp.wavread(sys.argv[1])
    wav, meta = lib.sigproc.interfaces.sv56demo(wav, fs)
    sp.wavwrite(sys.argv[1], wav, fs, enc)