Exemplo n.º 1
0
def synthesizef(fs,
                shift=0.005,
                dftlen=4096,
                ff0=None,
                flf0=None,
                fspec=None,
                fmcep=None,
                fpdd=None,
                fnm=None,
                fbndnm=None,
                fsyn=None,
                verbose=1):
    '''
    Call the synthesis from python using file inputs and outputs
    '''
    if ff0:
        f0 = np.fromfile(ff0, dtype=np.float32)
    if flf0:
        f0 = np.fromfile(flf0, dtype=np.float32)
        f0[f0 > 0] = np.exp(f0[f0 > 0])
    ts = (shift) * np.arange(len(f0))
    f0s = np.vstack((ts, f0)).T

    if fspec:
        SPEC = np.fromfile(fspec, dtype=np.float32)
        SPEC = SPEC.reshape((len(f0), -1))
    if fmcep:
        SPEC = np.fromfile(fmcep, dtype=np.float32)
        SPEC = SPEC.reshape((len(f0), -1))
        SPEC = sp.mcep2spec(SPEC, sp.bark_alpha(fs), dftlen)

    if fpdd:
        PDD = np.fromfile(fpdd, dtype=np.float32)
        PDD = PDD.reshape((len(f0), -1))
        thresh = 0.75  # DegottexG2015jhmpd
        NM = PDD.copy()
        NM[PDD < thresh] = 0.0
        NM[PDD > thresh] = 1.0
    if fnm:
        NM = np.fromfile(fnm, dtype=np.float32)
        NM = NM.reshape((len(f0), -1))
    if fbndnm:
        BNDNM = np.fromfile(fbndnm, dtype=np.float32)
        BNDNM = BNDNM.reshape((len(f0), -1))
        NM = sp.fwbnd2linbnd(BNDNM, fs, dftlen)
        NM[NM <= 0.5] = 0.0
        NM[NM > 0.5] = 1.0

    syn = synthesize(fs, f0s, SPEC, NM=NM, verbose=verbose)
    if fsyn:
        sp.wavwrite(fsyn, syn, fs, norm_abs=True, verbose=verbose)

    return syn
Exemplo n.º 2
0
def analysisf(
    fwav,
    shift=0.005,
    dftlen=4096,
    inf0txt_file=None,
    f0_min=60,
    f0_max=600,
    f0_file=None,
    f0_log=False,
    inf0bin_file=None,  # input f0 file in binary
    spec_file=None,
    spec_order=None,  # Mel-cepstral order for compressing the 
    # spectrum (typically 59; None: no compression)
    pdd_file=None,
    pdd_order=None,  # Mel-cepstral order for compressing PDD
    # spectrum (typically 59; None: no compression)
    nm_file=None,
    nm_nbbnds=None,  # Number of mel-bands in the compressed mask
    # (None: no compression)
    verbose=1):

    wav, fs, enc = sp.wavread(fwav)

    if verbose > 0:
        print(
            'PM Analysis (dur={:.3f}s, fs={}Hz, f0 in [{},{}]Hz, shift={}s, dftlen={})'
            .format(len(wav) / float(fs), fs, f0_min, f0_max, shift, dftlen))

    f0s = None
    if inf0txt_file:
        f0s = np.loadtxt(inf0txt_file)

    # read input f0 file in float32 (ljuvela)
    if inf0bin_file:
        f0s = np.fromfile(inf0bin_file, dtype=np.float32)

    f0s = analysis_f0postproc(wav,
                              fs,
                              f0s,
                              f0_min=f0_min,
                              f0_max=f0_max,
                              shift=shift,
                              verbose=verbose)

    if f0_file:
        f0_values = f0s[:, 1]
        if verbose > 0:
            print('    Output F0 {} in: {}'.format(f0_values.shape, f0_file))
        if f0_log: f0_values = np.log(f0_values)
        f0_values.astype(np.float32).tofile(f0_file)

    SPEC = None
    if spec_file:
        SPEC = analysis_spec(wav,
                             fs,
                             f0s,
                             shift=shift,
                             dftlen=dftlen,
                             verbose=verbose)
        if not spec_order is None:
            SPEC = sp.spec2mcep(SPEC, sp.bark_alpha(fs), order=spec_order)
        if verbose > 0:
            print('    Output Spectrogram size={} in: {}'.format(
                SPEC.shape, spec_file))
        SPEC.astype(np.float32).tofile(spec_file)

    PDD = None
    if pdd_file or nm_file:
        PDD = analysis_pdd(wav, fs, f0s, dftlen=dftlen, verbose=verbose)

    if pdd_file:
        if not pdd_order is None:
            # If asked, compress PDD
            PDD[PDD < 0.001] = 0.001  # From COVAREP
            PDD = sp.spec2mcep(PDD, sp.bark_alpha(fs), pdd_order)
        if verbose > 0:
            print('    Output PDD size={} in: {}'.format(PDD.shape, pdd_file))
        PDD.astype(np.float32).tofile(pdd_file)

    NM = None
    if nm_file:
        NM = analysis_nm(wav, fs, f0s, PDD, verbose=verbose)
        # If asked, compress NM
        if nm_nbbnds:
            # If asked, compress the noise mask using a number of mel bands
            NM = sp.linbnd2fwbnd(NM, fs, dftlen, nm_nbbnds)
        if verbose > 0:
            print('    Output Noise Mask size={} in: {}'.format(
                NM.shape, nm_file))
        NM.astype(np.float32).tofile(nm_file)

    if verbose > 2:
        plot_features(wav=wav, fs=fs, f0s=f0s, SPEC=SPEC, PDD=PDD, NM=NM)
Exemplo n.º 3
0
def analysisf(
        fwav,
        shift=0.005,
        dftlen=4096,
        finf0txt=None,
        f0estimator='REAPER',
        f0_min=60,
        f0_max=600,
        ff0=None,
        f0_log=False,
        finf0bin=None,  # input f0 file in binary
        fspec=None,
        spec_mceporder=None,  # Mel-cepstral order for compressing the spectrogram (typically 59; None: no compression)
        spec_fwceporder=None,  # Frequency warped cepstral order (very similar to above, just faster and less precise) (typically 59; None: no compression)
        spec_nbfwbnds=None,  # Number of mel-bands in the compressed half log spectrogram (None: no compression)
        spec_nblinlogbnds=None,  # Number of linear-bands in the compressed half log spectrogram (None: no compression)
        fpdd=None,
        pdd_mceporder=None,  # Mel-cepstral order for compressing PDD spectrogram (typically 59; None: no compression)
        fnm=None,
        nm_nbfwbnds=None,  # Number of mel-bands in the compressed noise mask (None: no compression)
        preproc_fs=None,  # Resample the waveform
        preproc_hp=None,  # Cut-off of high-pass filter (e.g. 20Hz)
        verbose=1):

    wav, fs, _ = sp.wavread(fwav)

    if verbose > 0:
        print(
            'PML Analysis (dur={:.3f}s, fs={}Hz, f0 in [{},{}]Hz, shift={}s, dftlen={})'
            .format(len(wav) / float(fs), fs, f0_min, f0_max, shift, dftlen))

    if (not preproc_fs is None) and (preproc_fs != fs):
        if verbose > 0:
            print(
                '    Resampling the waveform (new fs={}Hz)'.format(preproc_fs))
        wav = sp.resample(wav, fs, preproc_fs, method=2, deterministic=True)
        fs = preproc_fs

    if not preproc_hp is None:
        if verbose > 0:
            print('    High-pass filter the waveform (cutt-off={}Hz)'.format(
                preproc_hp))
        b, a = sig.butter(4, preproc_hp / (fs / 0.5), btype='high')
        wav = sig.filtfilt(b, a, wav)

    f0s = None
    if finf0txt:
        f0s = np.loadtxt(finf0txt)

    # read input f0 file in float32 (ljuvela)
    if finf0bin:
        f0s = np.fromfile(finf0bin, dtype=np.float32)

    f0s = analysis_f0postproc(wav,
                              fs,
                              f0s,
                              f0_min=f0_min,
                              f0_max=f0_max,
                              shift=shift,
                              f0estimator=f0estimator,
                              verbose=verbose)
    if verbose > 2: f0sori = f0s.copy()

    if ff0:
        f0_values = f0s[:, 1]
        if verbose > 0:
            print('    Output F0 {} in: {}'.format(f0_values.shape, ff0))
        if f0_log: f0_values = np.log(f0_values)
        if os.path.dirname(ff0) != '' and (not os.path.isdir(
                os.path.dirname(ff0))):
            os.mkdir(os.path.dirname(ff0))
        f0_values.astype(np.float32).tofile(ff0)

    SPEC = None
    if fspec:
        SPEC = analysis_spec(wav,
                             fs,
                             f0s,
                             shift=shift,
                             dftlen=dftlen,
                             verbose=verbose)
        if verbose > 2: SPECori = SPEC.copy()
        if not spec_mceporder is None:  # pragma: no cover
            # Cannot test this because it needs SPTK
            SPEC = sp.spec2mcep(SPEC, sp.bark_alpha(fs), order=spec_mceporder)
        if not spec_fwceporder is None:
            SPEC = sp.loghspec2fwcep(np.log(abs(SPEC)),
                                     fs,
                                     order=spec_fwceporder)
        if not spec_nbfwbnds is None:
            SPEC = sp.linbnd2fwbnd(np.log(abs(SPEC)), fs, dftlen,
                                   spec_nbfwbnds)
        if not spec_nblinlogbnds is None:
            SPEC = np.log(abs(SPEC))
        if verbose > 0:
            print('    Output Spectrogram size={} in: {}'.format(
                SPEC.shape, fspec))
        if os.path.dirname(fspec) != '' and (not os.path.isdir(
                os.path.dirname(fspec))):
            os.mkdir(os.path.dirname(fspec))
        SPEC.astype(np.float32).tofile(fspec)

    PDD = None
    if fpdd or fnm:
        PDD = analysis_pdd(wav, fs, f0s, dftlen=dftlen, verbose=verbose)
        if verbose > 2: PDDori = PDD.copy()

    if fpdd:
        if not pdd_mceporder is None:  # pragma: no cover
            # Cannot test this because it needs SPTK
            # If asked, compress PDD
            PDD[PDD < 0.001] = 0.001  # From COVAREP
            PDD = sp.spec2mcep(PDD, sp.bark_alpha(fs), pdd_mceporder)
        if verbose > 0:
            print('    Output PDD size={} in: {}'.format(PDD.shape, fpdd))
        if os.path.dirname(fpdd) != '' and (not os.path.isdir(
                os.path.dirname(fpdd))):
            os.mkdir(os.path.dirname(fpdd))
        PDD.astype(np.float32).tofile(fpdd)

    NM = None
    if verbose > 2: NMori = None
    if fnm:
        NM = analysis_nm(wav, fs, f0s, PDD, verbose=verbose)
        if verbose > 2: NMori = NM.copy()
        # If asked, compress NM
        if nm_nbfwbnds:
            # If asked, compress the noise mask using a number of mel bands
            NM = sp.linbnd2fwbnd(NM, fs, dftlen, nm_nbfwbnds)
        if verbose > 0:
            print('    Output Noise Mask size={} in: {}'.format(NM.shape, fnm))
        if os.path.dirname(fnm) != '' and (not os.path.isdir(
                os.path.dirname(fnm))):
            os.mkdir(os.path.dirname(fnm))
        NM.astype(np.float32).tofile(fnm)

    if verbose > 2:
        plot_features(wav=wav,
                      fs=fs,
                      f0s=f0sori,
                      SPEC=SPECori,
                      PDD=PDDori,
                      NM=NMori)  # pragma: no cover
Exemplo n.º 4
0
def analysisf(fwav
    , shift=0.005
    , dftlen=4096
    , inf0txt_file=None, f0_min=60, f0_max=600, f0_file=None
    , spec_file=None, spec_order=None # Mel-cepstral order for compressing the 
                            # spectrum (typically 59; None: no compression)
    , pdd_file=None, pdd_order=None   # Mel-cepstral order for compressing PDD
                            # spectrum (typically 59; None: no compression)
    , nm_file=None, nm_nbbnds=None  # Number of mel-bands in the compressed mask
                            # (None: no compression)
    , verbose=1):

    wav, fs, enc = sp.wavread(fwav)

    if verbose>0: print('PM Analysis (dur={:.3f}s, fs={}Hz, f0 in [{},{}]Hz, shift={}s, dftlen={})'.format(len(wav)/float(fs), fs, f0_min, f0_max, shift, dftlen))

    f0s = None
    if inf0txt_file:
        f0s = np.loadtxt(inf0txt_file)

    f0s = analysis_f0postproc(wav, fs, f0s, f0_min=f0_min, f0_max=f0_max, shift=shift, verbose=verbose)

    if f0_file:
        if verbose>0: print('    Output F0 {} in: {}'.format(f0s[:,1].shape, f0_file))
        f0s[:,1].astype(np.float32).tofile(f0_file)

    SPEC = None
    if spec_file:
        SPEC = analysis_spec(wav, fs, f0s, shift=shift, dftlen=dftlen, verbose=verbose)
        if not spec_order is None:
            SPEC = sp.spec2mcep(SPEC, sp.bark_alpha(fs), order=spec_order)
        if verbose>0: print('    Output Spectrogram size={} in: {}'.format(SPEC.shape, spec_file))
        SPEC.astype(np.float32).tofile(spec_file)

    PDD = None
    if pdd_file or nm_file:
        PDD = analysis_pdd(wav, fs, f0s, dftlen=dftlen, verbose=verbose)

    if pdd_file:
        if not pdd_order is None:
            # If asked, compress PDD
            PDD[PDD<0.001] = 0.001 # From COVAREP
            PDD = sp.spec2mcep(PDD, sp.bark_alpha(fs), pdd_order)
        if verbose>0: print('    Output PDD size={} in: {}'.format(PDD.shape, pdd_file))
        PDD.astype(np.float32).tofile(pdd_file)

    NM = None
    if nm_file:
        NM = analysis_nm(wav, fs, f0s, PDD, verbose=verbose)
        # If asked, compress NM
        if nm_nbbnds:
            # If asked, compress the noise mask using a number of mel bands
            NM = sp.linbnd2fwbnd(NM, fs, dftlen, nm_nbbnds)
            # Need to force to binary values because we don't use ambiguous values,
            # we use the binary version at synthesis time.
            NM[NM>=0.5] = 1.0
            NM[NM<0.5] = 0.0
        if verbose>0: print('    Output Noise Mask size={} in: {}'.format(NM.shape, nm_file))
        NM.astype(np.float32).tofile(nm_file)

    if verbose>2:
        plot_features(wav=wav, fs=fs, f0s=f0s, SPEC=SPEC, PDD=PDD, NM=NM)
Exemplo n.º 5
0
def synthesizef(fs,
                shift=0.005,
                dftlen=4096,
                ff0=None,
                flf0=None,
                fspec=None,
                flspec=None,
                ffwlspec=None,
                ffwcep=None,
                fmcep=None,
                fpdd=None,
                fmpdd=None,
                fnm=None,
                ffwnm=None,
                nm_cont=False,
                fsyn=None,
                verbose=1):
    '''
    Call the synthesis from python using file inputs and outputs
    '''
    if ff0:
        f0 = np.fromfile(ff0, dtype=np.float32)
    if flf0:
        f0 = np.fromfile(flf0, dtype=np.float32)
        f0[f0 > 0] = np.exp(f0[f0 > 0])
    ts = (shift) * np.arange(len(f0))
    f0s = np.vstack((ts, f0)).T

    if fspec:
        SPEC = np.fromfile(fspec, dtype=np.float32)
        SPEC = SPEC.reshape((len(f0), -1))
    if flspec:
        SPEC = np.fromfile(flspec, dtype=np.float32)
        SPEC = np.exp(SPEC.reshape((len(f0), -1)))
    if ffwlspec:
        FWLSPEC = np.fromfile(ffwlspec, dtype=np.float32)
        FWLSPEC = FWLSPEC.reshape((len(f0), -1))
        SPEC = np.exp(sp.fwbnd2linbnd(FWLSPEC, fs, dftlen, smooth=True))
    if ffwcep:
        FWCEP = np.fromfile(ffwcep, dtype=np.float32)
        FWCEP = FWCEP.reshape((len(f0), -1))
        SPEC = np.exp(sp.fwcep2loghspec(FWCEP, fs, dftlen))
    if fmcep:  # pragma: no cover
        # Cannot test this because it needs SPTK
        MCEP = np.fromfile(fmcep, dtype=np.float32)
        MCEP = MCEP.reshape((len(f0), -1))
        SPEC = sp.mcep2spec(MCEP, sp.bark_alpha(fs), dftlen)

    NM = None
    pdd_thresh = 0.75  # For this value, see:
    # G. Degottex and D. Erro, "A uniform phase representation for the harmonic model in speech synthesis applications," EURASIP, Journal on Audio, Speech, and Music Processing - Special Issue: Models of Speech - In Search of Better Representations, vol. 2014, iss. 1, p. 38, 2014.
    if fpdd:
        PDD = np.fromfile(fpdd, dtype=np.float32)
        PDD = PDD.reshape((len(f0), -1))
        NM = PDD.copy()
        NM[PDD < pdd_thresh] = 0.0
        NM[PDD > pdd_thresh] = 1.0
    if fmpdd:  # pragma: no cover
        # Cannot test this because it needs SPTK
        MPDD = np.fromfile(fmpdd, dtype=np.float32)
        MPDD = MPDD.reshape((len(f0), -1))
        PDD = sp.mcep2spec(MPDD, sp.bark_alpha(fs), dftlen)
        NM = PDD.copy()
        NM[PDD < pdd_thresh] = 0.0
        NM[PDD > pdd_thresh] = 1.0

    if fnm:
        NM = np.fromfile(fnm, dtype=np.float32)
        NM = NM.reshape((len(f0), -1))
    if ffwnm:
        FWNM = np.fromfile(ffwnm, dtype=np.float32)
        FWNM = FWNM.reshape((len(f0), -1))
        NM = sp.fwbnd2linbnd(FWNM, fs, dftlen)

    syn = synthesize(fs, f0s, SPEC, NM=NM, nm_cont=nm_cont, verbose=verbose)
    if fsyn:
        sp.wavwrite(fsyn, syn, fs, norm_max_ifneeded=True, verbose=verbose)

    return syn