Пример #1
0
    def test_GMM_train_and_convert(self):
        jnt = np.random.rand(100, 20)
        gmm_tr = GMMTrainer(n_mix=4, n_iter=100, covtype='full')
        gmm_tr.train(jnt)

        data = np.random.rand(200, 5)
        sddata = np.c_[data, delta(data)]
        gmm_cv = GMMConvertor(n_mix=4, covtype='full', gmmmode=None)
        gmm_cv.open_from_param(gmm_tr.param)

        odata = gmm_cv.convert(sddata, cvtype='mlpg')
        odata = gmm_cv.convert(sddata, cvtype='mmse')

        assert data.shape == odata.shape
Пример #2
0
    def test_BlockDiagonalGMM(self):
        jnt = np.random.rand(1000, 20)
        gmm_tr = GMMTrainer(n_mix=32, n_iter=10, covtype='block_diag')
        gmm_tr.train(jnt)

        data = np.random.rand(200, 5)
        sddata = static_delta(data)
        gmm_cv = GMMConvertor(n_mix=32, covtype='full', gmmmode=None)
        gmm_cv.open_from_param(gmm_tr.param)

        odata = gmm_cv.convert(sddata, cvtype='mlpg')
        odata = gmm_cv.convert(sddata, cvtype='mmse')

        assert data.shape == odata.shape
Пример #3
0
    def test_GMM_train_and_convert(self):
        jnt = np.random.rand(100, 20)
        gmm_tr = GMMTrainer(n_mix=4, n_iter=100, covtype='full')
        gmm_tr.train(jnt)

        data = np.random.rand(200, 5)
        sddata = np.c_[data, delta(data)]
        gmm_cv = GMMConvertor(
            n_mix=4, covtype='full', gmmmode=None)
        gmm_cv.open_from_param(gmm_tr.param)

        odata = gmm_cv.convert(sddata, cvtype='mlpg')
        odata = gmm_cv.convert(sddata, cvtype='mmse')

        assert data.shape == odata.shape
Пример #4
0
def feature_conversion(pconf, org_mceps, gmm, gmmmode=None):
    """Conversion of mel-cesptrums

    Parameters
    ---------
    pconf : PairYAML,
        Class of PairYAML
    org_mceps : list, shape (`num_mceps`)
        List of mel-cepstrums
    gmm : sklean.mixture.GaussianMixture,
        Parameters of sklearn-based Gaussian mixture
    gmmmode : str, optional
        Flag to parameter generation technique
        `None` : Normal VC
        `diff` : Differential VC
        Default set to `None`

    Returns
    ---------
    cv_mceps : list , shape(`num_mceps`)
        List of converted mel-cespstrums

    """
    cvgmm = GMMConvertor(
        n_mix=pconf.GMM_mcep_n_mix,
        covtype=pconf.GMM_mcep_covtype,
        gmmmode=gmmmode,
    )
    cvgmm.open_from_param(gmm.param)

    sd = 1  # start dimension to convert
    cv_mceps = []
    for mcep in org_mceps:
        mcep_0th = mcep[:, 0]
        cvmcep = cvgmm.convert(static_delta(mcep[:, sd:]),
                               cvtype=pconf.GMM_mcep_cvtype)
        cvmcep = np.c_[mcep_0th, cvmcep]
        if gmmmode == 'diff':
            cvmcep[:, sd:] += mcep[:, sd:]
        elif gmmmode is not None:
            raise ValueError('gmmmode must be `None` or `diff`.')
        cv_mceps.append(cvmcep)

    return cv_mceps
Пример #5
0
def feature_conversion(pconf, org_mceps, gmm, gmmmode=None):
    """Conversion of mel-cesptrums

    Parameters
    ---------
    pconf : PairYAML,
        Class of PairYAML
    org_mceps : list, shape (`num_mceps`)
        List of mel-cepstrums
    gmm : sklean.mixture.GaussianMixture,
        Parameters of sklearn-based Gaussian mixture
    gmmmode : str, optional
        Flag to parameter generation technique
        `None` : Normal VC
        `diff` : Differential VC
        Default set to `None`

    Returns
    ---------
    cv_mceps : list , shape(`num_mceps`)
        List of converted mel-cespstrums

    """
    cvgmm = GMMConvertor(n_mix=pconf.GMM_mcep_n_mix,
                         covtype=pconf.GMM_mcep_covtype,
                         gmmmode=gmmmode,
                         )
    cvgmm.open_from_param(gmm.param)

    sd = 1  # start dimension to convert
    cv_mceps = []
    for mcep in org_mceps:
        mcep_0th = mcep[:, 0]
        cvmcep = cvgmm.convert(static_delta(mcep[:, sd:]),
                               cvtype=pconf.GMM_mcep_cvtype)
        cvmcep = np.c_[mcep_0th, cvmcep]
        if gmmmode == 'diff':
            cvmcep[:, sd:] += mcep[:, sd:]
        elif gmmmode is not None:
            raise ValueError('gmmmode must be `None` or `diff`.')
        cv_mceps.append(cvmcep)

    return cv_mceps
Пример #6
0
    def test_GMM_train_and_convert(self):
        jnt = np.random.rand(100, 20)
        gmm_tr = GMMTrainer(n_mix=4, n_iter=100, covtype='full')
        gmm_tr.train(jnt)

        data = np.random.rand(200, 5)
        sddata = np.c_[data, delta(data)]
        gmm_cv = GMMConvertor(
            n_mix=4, covtype='full', gmmmode=None)
        gmm_cv.open_from_param(gmm_tr.param)

        odata = gmm_cv.convert(sddata, cvtype='mlpg')
        odata = gmm_cv.convert(sddata, cvtype='mmse')
        assert data.shape == odata.shape

        # test for singlepath
        Ajnt = np.random.rand(100, 120)
        Bjnt = np.random.rand(100, 140)
        gmm_tr.estimate_responsibility(jnt)
        Aparam = gmm_tr.train_singlepath(Ajnt)
        Bparam = gmm_tr.train_singlepath(Bjnt)
        assert np.allclose(Aparam.weights_, Bparam.weights_)
Пример #7
0
def main(*argv):
    argv = argv if argv else sys.argv[1:]
    # Options for python
    description = 'estimate joint feature of source and target speakers'
    parser = argparse.ArgumentParser(description=description)
    parser.add_argument('-gmmmode',
                        '--gmmmode',
                        type=str,
                        default=None,
                        help='mode of the GMM [None, diff, or intra]')
    parser.add_argument('org', type=str, help='Original speaker')
    parser.add_argument('tar', type=str, help='Target speaker')
    parser.add_argument('org_yml',
                        type=str,
                        help='Yml file of the original speaker')
    parser.add_argument('pair_yml',
                        type=str,
                        help='Yml file of the speaker pair')
    parser.add_argument('eval_list_file',
                        type=str,
                        help='List file for evaluation')
    parser.add_argument('wav_dir',
                        type=str,
                        help='Directory path of source spekaer')
    parser.add_argument('pair_dir',
                        type=str,
                        help='Directory path of pair directory')
    args = parser.parse_args(argv)

    # read parameters from speaker yml
    sconf = SpeakerYML(args.org_yml)
    pconf = PairYML(args.pair_yml)

    # read GMM for mcep
    mcepgmmpath = os.path.join(args.pair_dir, 'model/GMM_mcep.pkl')
    mcepgmm = GMMConvertor(
        n_mix=pconf.GMM_mcep_n_mix,
        covtype=pconf.GMM_mcep_covtype,
        gmmmode=args.gmmmode,
    )
    param = joblib.load(mcepgmmpath)
    mcepgmm.open_from_param(param)
    print("GMM for mcep conversion mode: {}".format(args.gmmmode))

    # read F0 statistics
    stats_dir = os.path.join(args.pair_dir, 'stats')
    orgstatspath = os.path.join(stats_dir, args.org + '.h5')
    orgstats_h5 = HDF5(orgstatspath, mode='r')
    orgf0stats = orgstats_h5.read(ext='f0stats')
    orgstats_h5.close()

    # read F0 and GV statistics for target
    tarstatspath = os.path.join(stats_dir, args.tar + '.h5')
    tarstats_h5 = HDF5(tarstatspath, mode='r')
    tarf0stats = tarstats_h5.read(ext='f0stats')
    targvstats = tarstats_h5.read(ext='gv')
    tarstats_h5.close()

    # read GV statistics for converted mcep
    cvgvstatspath = os.path.join(args.pair_dir, 'model', 'cvgv.h5')
    cvgvstats_h5 = HDF5(cvgvstatspath, mode='r')
    cvgvstats = cvgvstats_h5.read(ext='cvgv')
    diffcvgvstats = cvgvstats_h5.read(ext='diffcvgv')
    cvgvstats_h5.close()

    mcepgv = GV()
    f0stats = F0statistics()

    # constract FeatureExtractor class
    feat = FeatureExtractor(analyzer=sconf.analyzer,
                            fs=sconf.wav_fs,
                            fftl=sconf.wav_fftl,
                            shiftms=sconf.wav_shiftms,
                            minf0=sconf.f0_minf0,
                            maxf0=sconf.f0_maxf0)

    # constract Synthesizer class
    synthesizer = Synthesizer(fs=sconf.wav_fs,
                              fftl=sconf.wav_fftl,
                              shiftms=sconf.wav_shiftms)

    # test directory
    test_dir = os.path.join(args.pair_dir, 'test')
    os.makedirs(os.path.join(test_dir, args.org), exist_ok=True)

    # conversion in each evaluation file
    with open(args.eval_list_file, 'r') as fp:
        for line in fp:
            # open wav file
            f = line.rstrip()
            wavf = os.path.join(args.wav_dir, f + '.wav')
            fs, x = wavfile.read(wavf)
            x = x.astype(np.float)
            x = low_cut_filter(x, fs, cutoff=70)
            assert fs == sconf.wav_fs

            # analyze F0, mcep, and ap
            f0, spc, ap = feat.analyze(x)
            mcep = feat.mcep(dim=sconf.mcep_dim, alpha=sconf.mcep_alpha)
            mcep_0th = mcep[:, 0]

            # convert F0
            cvf0 = f0stats.convert(f0, orgf0stats, tarf0stats)

            # convert mcep
            cvmcep_wopow = mcepgmm.convert(static_delta(mcep[:, 1:]),
                                           cvtype=pconf.GMM_mcep_cvtype)
            cvmcep = np.c_[mcep_0th, cvmcep_wopow]

            # synthesis VC w/ GV
            if args.gmmmode is None:
                cvmcep_wGV = mcepgv.postfilter(cvmcep,
                                               targvstats,
                                               cvgvstats=cvgvstats,
                                               alpha=pconf.GV_morph_coeff,
                                               startdim=1)
                wav = synthesizer.synthesis(
                    cvf0,
                    cvmcep_wGV,
                    ap,
                    rmcep=mcep,
                    alpha=sconf.mcep_alpha,
                )
                wavpath = os.path.join(test_dir, f + '_VC.wav')

            # synthesis DIFFVC w/ GV
            if args.gmmmode == 'diff':
                cvmcep[:, 0] = 0.0
                cvmcep_wGV = mcepgv.postfilter(mcep + cvmcep,
                                               targvstats,
                                               cvgvstats=diffcvgvstats,
                                               alpha=pconf.GV_morph_coeff,
                                               startdim=1) - mcep
                wav = synthesizer.synthesis_diff(
                    x,
                    cvmcep_wGV,
                    rmcep=mcep,
                    alpha=sconf.mcep_alpha,
                )
                wavpath = os.path.join(test_dir, f + '_DIFFVC.wav')

            # write waveform
            if not os.path.exists(os.path.join(test_dir, f)):
                os.makedirs(os.path.join(test_dir, f))

            wav = np.clip(wav, -32768, 32767)
            wavfile.write(wavpath, fs, wav.astype(np.int16))
            print(wavpath)
Пример #8
0
parser.add_argument('--threshold_db1', type=float, default=base_acoustic_param.threshold_db)
parser.add_argument('--threshold_db2', type=float, default=base_acoustic_param.threshold_db)
parser.add_argument('--dtype', type=str, default='int64')
parser.add_argument('--ignore_feature', nargs='+', default=('feature1', 'feature2'))
parser.add_argument('--enable_overwrite', action='store_true')
arguments = parser.parse_args()

# read parameters from speaker yml
sconf1 = SpeakerYML(arguments.org_yml)
sconf2 = SpeakerYML(arguments.tar_yml)
pconf = PairYML(arguments.pair_yml)

# read GMM for mcep
mcepgmm = GMMConvertor(
    n_mix=pconf.GMM_mcep_n_mix,
    covtype=pconf.GMM_mcep_covtype,
    gmmmode=None,
)
param = joblib.load(arguments.gmm)
mcepgmm.open_from_param(param)

# constract FeatureExtractor class
feat1 = FeatureExtractor(
    analyzer=sconf1.analyzer,
    fs=sconf1.wav_fs,
    fftl=sconf1.wav_fftl,
    shiftms=sconf1.wav_shiftms,
    minf0=sconf1.f0_minf0,
    maxf0=sconf1.f0_maxf0,
)
feat2 = FeatureExtractor(