示例#1
0
def main(*argv):
    argv = argv if argv else sys.argv[1:]
    # Options for python
    description = 'estimate joint feature of source and target speakers'
    parser = argparse.ArgumentParser(description=description)
    parser.add_argument('speaker', type=str, help='Input speaker label')
    parser.add_argument('list_file',
                        type=str,
                        help='List file of the input speaker')
    parser.add_argument('pair_dir',
                        type=str,
                        help='Statistics directory of the speaker')
    args = parser.parse_args(argv)

    # open h5 files
    h5_dir = os.path.join(args.pair_dir, 'h5')
    statspath = os.path.join(args.pair_dir, 'stats', args.speaker + '.h5')
    h5 = HDF5(statspath, mode='a')

    # estimate and save F0 statistics
    f0stats = F0statistics()
    f0s = read_feats(args.list_file, h5_dir, ext='f0')
    f0stats = f0stats.estimate(f0s)
    h5.save(f0stats, ext='f0stats')
    print("f0stats save into " + statspath)

    # estimate and save GV of orginal and target speakers
    gv = GV()
    mceps = read_feats(args.list_file, h5_dir, ext='mcep')
    gvstats = gv.estimate(mceps)
    h5.save(gvstats, ext='gv')
    print("gvstats save into " + statspath)

    h5.close()
def main(*argv):
    argv = argv if argv else sys.argv[1:]
    # Options for python
    description = 'estimate joint feature of source and target speakers'
    parser = argparse.ArgumentParser(description=description)
    parser.add_argument('speaker', type=str,
                        help='Input speaker label')
    parser.add_argument('list_file', type=str,
                        help='List file of the input speaker')
    parser.add_argument('pair_dir', type=str,
                        help='Statistics directory of the speaker')
    args = parser.parse_args(argv)

    # open h5 files
    h5_dir = os.path.join(args.pair_dir, 'h5')
    statspath = os.path.join(args.pair_dir, 'stats', args.speaker + '.h5')
    h5 = HDF5(statspath, mode='w')

    # estimate and save F0 statistics
    f0stats = F0statistics()
    f0s = read_feats(args.list_file, h5_dir, ext='f0')
    f0stats = f0stats.estimate(f0s)
    h5.save(f0stats, ext='f0stats')
    print("f0stats save into " + statspath)

    # estimate and save GV of orginal and target speakers
    gv = GV()
    mceps = read_feats(args.list_file, h5_dir, ext='mcep')
    gvstats = gv.estimate(mceps)
    h5.save(gvstats, ext='gv')
    print("gvstats save into " + statspath)

    h5.close()
示例#3
0
def main(*argv):
    argv = argv if argv else sys.argv[1:]
    # Options for python
    description = 'Train GMM and converted GV statistics'
    parser = argparse.ArgumentParser(description=description)
    parser.add_argument('org_list_file',
                        type=str,
                        help='List file of original speaker')
    parser.add_argument('pair_yml',
                        type=str,
                        help='Yml file of the speaker pair')
    parser.add_argument('pair_dir',
                        type=str,
                        help='Directory path of h5 files')
    args = parser.parse_args(argv)

    # read pair-dependent yml file
    pconf = PairYML(args.pair_yml)

    # read joint feature vector
    jntf = os.path.join(args.pair_dir, 'jnt',
                        'it' + str(pconf.jnt_n_iter + 1) + '_jnt.h5')
    jnth5 = HDF5(jntf, mode='r')
    jnt = jnth5.read(ext='jnt')

    # train GMM for mcep using joint feature vector
    gmm = GMMTrainer(n_mix=pconf.GMM_mcep_n_mix,
                     n_iter=pconf.GMM_mcep_n_iter,
                     covtype=pconf.GMM_mcep_covtype)
    gmm.train(jnt)

    # save GMM
    gmm_dir = os.path.join(args.pair_dir, 'model')
    if not os.path.exists(gmm_dir):
        os.makedirs(gmm_dir)
    gmmpath = os.path.join(gmm_dir, 'GMM.pkl')
    joblib.dump(gmm.param, gmmpath)
    print("Conversion model save into " + gmmpath)

    # calculate GV statistics of converted feature
    h5_dir = os.path.join(args.pair_dir, 'h5')
    org_mceps = read_feats(args.org_list_file, h5_dir, ext='mcep')

    cv_mceps = feature_conversion(pconf, org_mceps, gmm, gmmmode=None)
    diffcv_mceps = feature_conversion(pconf, org_mceps, gmm, gmmmode='diff')

    gv = GV()
    cvgvstats = gv.estimate(cv_mceps)
    diffcvgvstats = gv.estimate(diffcv_mceps)

    # open h5 files
    statspath = os.path.join(gmm_dir, 'cvgv.h5')
    cvgvh5 = HDF5(statspath, mode='w')
    cvgvh5.save(cvgvstats, ext='cvgv')
    cvgvh5.save(diffcvgvstats, ext='diffcvgv')
    print("Converted gvstats save into " + statspath)
示例#4
0
    def test_GVstatistics(self):
        gvstats = GV()
        datalist = []
        for i in range(1, 4):
            datalist.append(np.random.rand(100 * i).reshape(100 * i // 2, 2))
        gv = gvstats.estimate(datalist)

        data = np.random.rand(100 * 5).reshape(100 * 5 // 2, 2)
        odata = gvstats.postfilter(data, gv, startdim=0)
        assert data.shape[0] == odata.shape[0]

        odata = gvstats.postfilter(data, gv, alpha=0.0, startdim=0)
        assert np.all(data == odata)
示例#5
0
    def test_GVstatistics(self):
        gvstats = GV()
        datalist = []
        for i in range(1, 4):
            datalist.append(np.random.rand(100 * i).reshape(100 * i // 2, 2))
        gv = gvstats.estimate(datalist)

        data = np.random.rand(100 * 5).reshape(100 * 5 // 2, 2)
        odata = gvstats.postfilter(data, gv, startdim=0)
        assert data.shape[0] == odata.shape[0]

        odata = gvstats.postfilter(data, gv, alpha=0.0, startdim=0)
        assert np.all(data == odata)
示例#6
0
def main(*argv):
    argv = argv if argv else sys.argv[1:]
    # Options for python
    description = 'estimate joint feature of source and target speakers'
    parser = argparse.ArgumentParser(description=description)
    parser.add_argument('-gmmmode',
                        '--gmmmode',
                        type=str,
                        default=None,
                        help='mode of the GMM [None, diff, or intra]')
    parser.add_argument('org', type=str, help='Original speaker')
    parser.add_argument('tar', type=str, help='Target speaker')
    parser.add_argument('org_yml',
                        type=str,
                        help='Yml file of the original speaker')
    parser.add_argument('pair_yml',
                        type=str,
                        help='Yml file of the speaker pair')
    parser.add_argument('eval_list_file',
                        type=str,
                        help='List file for evaluation')
    parser.add_argument('wav_dir',
                        type=str,
                        help='Directory path of source spekaer')
    parser.add_argument('pair_dir',
                        type=str,
                        help='Directory path of pair directory')
    args = parser.parse_args(argv)

    # read parameters from speaker yml
    sconf = SpeakerYML(args.org_yml)
    pconf = PairYML(args.pair_yml)

    # read GMM for mcep
    mcepgmmpath = os.path.join(args.pair_dir, 'model/GMM_mcep.pkl')
    mcepgmm = GMMConvertor(
        n_mix=pconf.GMM_mcep_n_mix,
        covtype=pconf.GMM_mcep_covtype,
        gmmmode=args.gmmmode,
    )
    param = joblib.load(mcepgmmpath)
    mcepgmm.open_from_param(param)
    print("GMM for mcep conversion mode: {}".format(args.gmmmode))

    # read F0 statistics
    stats_dir = os.path.join(args.pair_dir, 'stats')
    orgstatspath = os.path.join(stats_dir, args.org + '.h5')
    orgstats_h5 = HDF5(orgstatspath, mode='r')
    orgf0stats = orgstats_h5.read(ext='f0stats')
    orgstats_h5.close()

    # read F0 and GV statistics for target
    tarstatspath = os.path.join(stats_dir, args.tar + '.h5')
    tarstats_h5 = HDF5(tarstatspath, mode='r')
    tarf0stats = tarstats_h5.read(ext='f0stats')
    targvstats = tarstats_h5.read(ext='gv')
    tarstats_h5.close()

    # read GV statistics for converted mcep
    cvgvstatspath = os.path.join(args.pair_dir, 'model', 'cvgv.h5')
    cvgvstats_h5 = HDF5(cvgvstatspath, mode='r')
    cvgvstats = cvgvstats_h5.read(ext='cvgv')
    diffcvgvstats = cvgvstats_h5.read(ext='diffcvgv')
    cvgvstats_h5.close()

    mcepgv = GV()
    f0stats = F0statistics()

    # constract FeatureExtractor class
    feat = FeatureExtractor(analyzer=sconf.analyzer,
                            fs=sconf.wav_fs,
                            fftl=sconf.wav_fftl,
                            shiftms=sconf.wav_shiftms,
                            minf0=sconf.f0_minf0,
                            maxf0=sconf.f0_maxf0)

    # constract Synthesizer class
    synthesizer = Synthesizer(fs=sconf.wav_fs,
                              fftl=sconf.wav_fftl,
                              shiftms=sconf.wav_shiftms)

    # test directory
    test_dir = os.path.join(args.pair_dir, 'test')
    os.makedirs(os.path.join(test_dir, args.org), exist_ok=True)

    # conversion in each evaluation file
    with open(args.eval_list_file, 'r') as fp:
        for line in fp:
            # open wav file
            f = line.rstrip()
            wavf = os.path.join(args.wav_dir, f + '.wav')
            fs, x = wavfile.read(wavf)
            x = x.astype(np.float)
            x = low_cut_filter(x, fs, cutoff=70)
            assert fs == sconf.wav_fs

            # analyze F0, mcep, and ap
            f0, spc, ap = feat.analyze(x)
            mcep = feat.mcep(dim=sconf.mcep_dim, alpha=sconf.mcep_alpha)
            mcep_0th = mcep[:, 0]

            # convert F0
            cvf0 = f0stats.convert(f0, orgf0stats, tarf0stats)

            # convert mcep
            cvmcep_wopow = mcepgmm.convert(static_delta(mcep[:, 1:]),
                                           cvtype=pconf.GMM_mcep_cvtype)
            cvmcep = np.c_[mcep_0th, cvmcep_wopow]

            # synthesis VC w/ GV
            if args.gmmmode is None:
                cvmcep_wGV = mcepgv.postfilter(cvmcep,
                                               targvstats,
                                               cvgvstats=cvgvstats,
                                               alpha=pconf.GV_morph_coeff,
                                               startdim=1)
                wav = synthesizer.synthesis(
                    cvf0,
                    cvmcep_wGV,
                    ap,
                    rmcep=mcep,
                    alpha=sconf.mcep_alpha,
                )
                wavpath = os.path.join(test_dir, f + '_VC.wav')

            # synthesis DIFFVC w/ GV
            if args.gmmmode == 'diff':
                cvmcep[:, 0] = 0.0
                cvmcep_wGV = mcepgv.postfilter(mcep + cvmcep,
                                               targvstats,
                                               cvgvstats=diffcvgvstats,
                                               alpha=pconf.GV_morph_coeff,
                                               startdim=1) - mcep
                wav = synthesizer.synthesis_diff(
                    x,
                    cvmcep_wGV,
                    rmcep=mcep,
                    alpha=sconf.mcep_alpha,
                )
                wavpath = os.path.join(test_dir, f + '_DIFFVC.wav')

            # write waveform
            if not os.path.exists(os.path.join(test_dir, f)):
                os.makedirs(os.path.join(test_dir, f))

            wav = np.clip(wav, -32768, 32767)
            wavfile.write(wavpath, fs, wav.astype(np.int16))
            print(wavpath)
示例#7
0
def main(*argv):
    argv = argv if argv else sys.argv[1:]
    # Options for python
    description = 'Train GMM and converted GV statistics'
    parser = argparse.ArgumentParser(description=description)
    parser.add_argument('org_list_file', type=str,
                        help='List file of original speaker')
    parser.add_argument('pair_yml', type=str,
                        help='Yml file of the speaker pair')
    parser.add_argument('pair_dir', type=str,
                        help='Directory path of h5 files')
    args = parser.parse_args(argv)

    # read pair-dependent yml file
    pconf = PairYML(args.pair_yml)

    # read joint feature vector
    jntf = os.path.join(args.pair_dir, 'jnt',
                        'it' + str(pconf.jnt_n_iter + 1) + '_jnt.h5')
    jnth5 = HDF5(jntf, mode='r')
    jnt = jnth5.read(ext='mcep')
    jnt_codeap = jnth5.read(ext='codeap')

    # train GMM for mcep using joint feature vector
    gmm = GMMTrainer(n_mix=pconf.GMM_mcep_n_mix,
                     n_iter=pconf.GMM_mcep_n_iter,
                     covtype=pconf.GMM_mcep_covtype)
    gmm.train(jnt)

    # train GMM for codeap using joint feature vector
    gmm_codeap = GMMTrainer(n_mix=pconf.GMM_codeap_n_mix,
                            n_iter=pconf.GMM_codeap_n_iter,
                            covtype=pconf.GMM_codeap_covtype)
    gmm_codeap.train(jnt_codeap)

    # save GMM
    gmm_dir = os.path.join(args.pair_dir, 'model')
    if not os.path.exists(gmm_dir):
        os.makedirs(gmm_dir)
    gmmpath = os.path.join(gmm_dir, 'GMM_mcep.pkl')
    joblib.dump(gmm.param, gmmpath)
    print("Conversion model for mcep save into " + gmmpath)

    gmmpath_codeap = os.path.join(gmm_dir, 'GMM_codeap.pkl')
    joblib.dump(gmm_codeap.param, gmmpath_codeap)
    print("Conversion model for codeap save into " + gmmpath_codeap)

    # calculate GV statistics of converted feature
    h5_dir = os.path.join(args.pair_dir, 'h5')
    org_mceps = read_feats(args.org_list_file, h5_dir, ext='mcep')

    cv_mceps = feature_conversion(pconf, org_mceps, gmm, gmmmode=None)
    diffcv_mceps = feature_conversion(pconf, org_mceps, gmm, gmmmode='diff')

    gv = GV()
    cvgvstats = gv.estimate(cv_mceps)
    diffcvgvstats = gv.estimate(diffcv_mceps)

    # open h5 files
    statspath = os.path.join(gmm_dir, 'cvgv.h5')
    cvgvh5 = HDF5(statspath, mode='w')
    cvgvh5.save(cvgvstats, ext='cvgv')
    cvgvh5.save(diffcvgvstats, ext='diffcvgv')
    print("Converted gvstats save into " + statspath)