def main(*argv): argv = argv if argv else sys.argv[1:] # Options for python description = 'Train GMM and converted GV statistics' parser = argparse.ArgumentParser(description=description) parser.add_argument('org_list_file', type=str, help='List file of original speaker') parser.add_argument('pair_yml', type=str, help='Yml file of the speaker pair') parser.add_argument('pair_dir', type=str, help='Directory path of h5 files') args = parser.parse_args(argv) # read pair-dependent yml file pconf = PairYML(args.pair_yml) # read joint feature vector jntf = os.path.join(args.pair_dir, 'jnt', 'it' + str(pconf.jnt_n_iter + 1) + '_jnt.h5') jnth5 = HDF5(jntf, mode='r') jnt = jnth5.read(ext='jnt') # train GMM for mcep using joint feature vector gmm = GMMTrainer(n_mix=pconf.GMM_mcep_n_mix, n_iter=pconf.GMM_mcep_n_iter, covtype=pconf.GMM_mcep_covtype) gmm.train(jnt) # save GMM gmm_dir = os.path.join(args.pair_dir, 'model') if not os.path.exists(gmm_dir): os.makedirs(gmm_dir) gmmpath = os.path.join(gmm_dir, 'GMM.pkl') joblib.dump(gmm.param, gmmpath) print("Conversion model save into " + gmmpath) # calculate GV statistics of converted feature h5_dir = os.path.join(args.pair_dir, 'h5') org_mceps = read_feats(args.org_list_file, h5_dir, ext='mcep') cv_mceps = feature_conversion(pconf, org_mceps, gmm, gmmmode=None) diffcv_mceps = feature_conversion(pconf, org_mceps, gmm, gmmmode='diff') gv = GV() cvgvstats = gv.estimate(cv_mceps) diffcvgvstats = gv.estimate(diffcv_mceps) # open h5 files statspath = os.path.join(gmm_dir, 'cvgv.h5') cvgvh5 = HDF5(statspath, mode='w') cvgvh5.save(cvgvstats, ext='cvgv') cvgvh5.save(diffcvgvstats, ext='diffcvgv') print("Converted gvstats save into " + statspath)
def read_feats(listf, h5dir, ext='mcep'): """HDF5 handler Create list consisting of arrays listed in the list Parameters --------- listf : str, Path of list file h5dir : str, Path of hdf5 directory ext : str, `mcep` : mel-cepstrum `f0` : F0 Returns --------- datalist : list of arrays """ datalist = [] with open(listf, 'r') as fp: for line in fp: f = line.rstrip() h5f = os.path.join(h5dir, f + '.h5') h5 = HDF5(h5f, mode='r') datalist.append(h5.read(ext)) h5.close() return datalist
def _save_hdf5(self, h5f): h5 = HDF5(h5f, "a") for k, v in self.feats.items(): if v.dtype == np.float64: v = v.astype(np.float32) h5.save(v, ext=k) h5.close()
def main(*argv): argv = argv if argv else sys.argv[1:] # Options for python description = 'estimate joint feature of source and target speakers' parser = argparse.ArgumentParser(description=description) parser.add_argument('speaker', type=str, help='Input speaker label') parser.add_argument('list_file', type=str, help='List file of the input speaker') parser.add_argument('pair_dir', type=str, help='Statistics directory of the speaker') args = parser.parse_args(argv) # open h5 files h5_dir = os.path.join(args.pair_dir, 'h5') statspath = os.path.join(args.pair_dir, 'stats', args.speaker + '.h5') h5 = HDF5(statspath, mode='a') # estimate and save F0 statistics f0stats = F0statistics() f0s = read_feats(args.list_file, h5_dir, ext='f0') f0stats = f0stats.estimate(f0s) h5.save(f0stats, ext='f0stats') print("f0stats save into " + statspath) # estimate and save GV of orginal and target speakers gv = GV() mceps = read_feats(args.list_file, h5_dir, ext='mcep') gvstats = gv.estimate(mceps) h5.save(gvstats, ext='gv') print("gvstats save into " + statspath) h5.close()
def read_ppg_feats(s_listf, t_listf, h5dir, ext='mcep'): s_datalist = [] t_datalist = [] with open(s_listf, 'r') as fp: for line in fp: s_mcep = [] t_mcep = [] f = line.rstrip() h5f = os.path.join(h5dir, f + '.h5') h5 = HDF5(h5f, mode='r') s_mcep.append(h5.read(ext)) h5.close() source = "BDL_1.27" target = "RRBI_16k" line = line.replace(source, target) f = line.rstrip() h5f = os.path.join(h5dir, f + '.h5') h5 = HDF5(h5f, mode='r') t_mcep.append(h5.read(ext)) h5.close() source_post = "/home/anurag/kaldi/egs/librispeech/s5/post_source/" target_post = "/home/anurag/kaldi/egs/librispeech/s5/post_target/" f = "post." + f[-12:] + ".ark" print(f) s_post = os.path.join(source_post, f) t_post = os.path.join(target_post, f) s_post = np.loadtxt(s_post) t_post = np.loadtxt(t_post) for i in range(len(s_post)): score = [] for j in range(len(t_post)): score.append(KL(s_post[i], t_post[j])) t_mcep.append(t_post[score.index(min(score))]) s_datalist.append(s_mcep) t_datalist.append(t_mcep) print(len(s_datalist)) print(len(t_datalist)) return s_mcep, t_mcep
def main(*argv): argv = argv if argv else sys.argv # Options for python dcp = 'Extract aoucstic features for the speaker' parser = argparse.ArgumentParser(description=dcp) parser.add_argument('--overwrite', default=False, action='store_true', help='Overwrite h5 file') parser.add_argument('speaker', type=str, help='Input speaker label') parser.add_argument('ymlf', type=str, help='Yml file of the input speaker') parser.add_argument('list_file', type=str, help='List file of the input speaker') parser.add_argument('wav_dir', type=str, help='Wav file directory of the speaker') parser.add_argument('pair_dir', type=str, help='Directory of the speaker pair') args = parser.parse_args(argv) # read parameters from speaker yml sconf = SpeakerYML(args.ymlf) h5_dir = os.path.join(args.pair_dir, 'h5') anasyn_dir = os.path.join(args.pair_dir, 'anasyn') if not os.path.exists(os.path.join(h5_dir, args.speaker)): os.makedirs(os.path.join(h5_dir, args.speaker)) if not os.path.exists(os.path.join(anasyn_dir, args.speaker)): os.makedirs(os.path.join(anasyn_dir, args.speaker)) # constract FeatureExtractor class feat = FeatureExtractor(analyzer=sconf.analyzer, fs=sconf.wav_fs, fftl=sconf.wav_fftl, shiftms=sconf.wav_shiftms, minf0=sconf.f0_minf0, maxf0=sconf.f0_maxf0) # constract Synthesizer class synthesizer = Synthesizer(fs=sconf.wav_fs, fftl=sconf.wav_fftl, shiftms=sconf.wav_shiftms) # open list file with open(args.list_file, 'r') as fp: for line in fp: f = line.rstrip() h5f = os.path.join(h5_dir, f + '.h5') if (not os.path.exists(h5f)) or args.overwrite: wavf = os.path.join(args.wav_dir, f + '.wav') fs, x = wavfile.read(wavf) x = np.array(x, dtype=np.float) x = low_cut_filter(x, fs, cutoff=70) assert fs == sconf.wav_fs print("Extract acoustic features: " + wavf) # analyze F0, spc, and ap f0, spc, ap = feat.analyze(x) mcep = feat.mcep(dim=sconf.mcep_dim, alpha=sconf.mcep_alpha) npow = feat.npow() codeap = feat.codeap() # save features into a hdf5 file h5 = HDF5(h5f, mode='w') h5.save(f0, ext='f0') # h5.save(spc, ext='spc') # h5.save(ap, ext='ap') h5.save(mcep, ext='mcep') h5.save(npow, ext='npow') h5.save(codeap, ext='codeap') h5.close() # analysis/synthesis using F0, mcep, and ap wav = synthesizer.synthesis( f0, mcep, ap, alpha=sconf.mcep_alpha, ) wav = np.clip(wav, -32768, 32767) anasynf = os.path.join(anasyn_dir, f + '.wav') wavfile.write(anasynf, fs, np.array(wav, dtype=np.int16)) else: print("Acoustic features already exist: " + h5f)
def main(*argv): argv = argv if argv else sys.argv[1:] # Options for python description = 'estimate joint feature of source and target speakers' parser = argparse.ArgumentParser(description=description) parser.add_argument('-gmmmode', '--gmmmode', type=str, default=None, help='mode of the GMM [None, diff, or intra]') parser.add_argument('org', type=str, help='Original speaker') parser.add_argument('tar', type=str, help='Target speaker') parser.add_argument('org_yml', type=str, help='Yml file of the original speaker') parser.add_argument('pair_yml', type=str, help='Yml file of the speaker pair') parser.add_argument('eval_list_file', type=str, help='List file for evaluation') parser.add_argument('wav_dir', type=str, help='Directory path of source spekaer') parser.add_argument('pair_dir', type=str, help='Directory path of pair directory') args = parser.parse_args(argv) # read parameters from speaker yml sconf = SpeakerYML(args.org_yml) pconf = PairYML(args.pair_yml) # read GMM for mcep mcepgmmpath = os.path.join(args.pair_dir, 'model/GMM_mcep.pkl') mcepgmm = GMMConvertor( n_mix=pconf.GMM_mcep_n_mix, covtype=pconf.GMM_mcep_covtype, gmmmode=args.gmmmode, ) param = joblib.load(mcepgmmpath) mcepgmm.open_from_param(param) print("GMM for mcep conversion mode: {}".format(args.gmmmode)) # read F0 statistics stats_dir = os.path.join(args.pair_dir, 'stats') orgstatspath = os.path.join(stats_dir, args.org + '.h5') orgstats_h5 = HDF5(orgstatspath, mode='r') orgf0stats = orgstats_h5.read(ext='f0stats') orgstats_h5.close() # read F0 and GV statistics for target tarstatspath = os.path.join(stats_dir, args.tar + '.h5') tarstats_h5 = HDF5(tarstatspath, mode='r') tarf0stats = tarstats_h5.read(ext='f0stats') targvstats = tarstats_h5.read(ext='gv') tarstats_h5.close() # read GV statistics for converted mcep cvgvstatspath = os.path.join(args.pair_dir, 'model', 'cvgv.h5') cvgvstats_h5 = HDF5(cvgvstatspath, mode='r') cvgvstats = cvgvstats_h5.read(ext='cvgv') diffcvgvstats = cvgvstats_h5.read(ext='diffcvgv') cvgvstats_h5.close() mcepgv = GV() f0stats = F0statistics() # constract FeatureExtractor class feat = FeatureExtractor(analyzer=sconf.analyzer, fs=sconf.wav_fs, fftl=sconf.wav_fftl, shiftms=sconf.wav_shiftms, minf0=sconf.f0_minf0, maxf0=sconf.f0_maxf0) # constract Synthesizer class synthesizer = Synthesizer(fs=sconf.wav_fs, fftl=sconf.wav_fftl, shiftms=sconf.wav_shiftms) # test directory test_dir = os.path.join(args.pair_dir, 'test') os.makedirs(os.path.join(test_dir, args.org), exist_ok=True) # conversion in each evaluation file with open(args.eval_list_file, 'r') as fp: for line in fp: # open wav file f = line.rstrip() wavf = os.path.join(args.wav_dir, f + '.wav') fs, x = wavfile.read(wavf) x = x.astype(np.float) x = low_cut_filter(x, fs, cutoff=70) assert fs == sconf.wav_fs # analyze F0, mcep, and ap f0, spc, ap = feat.analyze(x) mcep = feat.mcep(dim=sconf.mcep_dim, alpha=sconf.mcep_alpha) mcep_0th = mcep[:, 0] # convert F0 cvf0 = f0stats.convert(f0, orgf0stats, tarf0stats) # convert mcep cvmcep_wopow = mcepgmm.convert(static_delta(mcep[:, 1:]), cvtype=pconf.GMM_mcep_cvtype) cvmcep = np.c_[mcep_0th, cvmcep_wopow] # synthesis VC w/ GV if args.gmmmode is None: cvmcep_wGV = mcepgv.postfilter(cvmcep, targvstats, cvgvstats=cvgvstats, alpha=pconf.GV_morph_coeff, startdim=1) wav = synthesizer.synthesis( cvf0, cvmcep_wGV, ap, rmcep=mcep, alpha=sconf.mcep_alpha, ) wavpath = os.path.join(test_dir, f + '_VC.wav') # synthesis DIFFVC w/ GV if args.gmmmode == 'diff': cvmcep[:, 0] = 0.0 cvmcep_wGV = mcepgv.postfilter(mcep + cvmcep, targvstats, cvgvstats=diffcvgvstats, alpha=pconf.GV_morph_coeff, startdim=1) - mcep wav = synthesizer.synthesis_diff( x, cvmcep_wGV, rmcep=mcep, alpha=sconf.mcep_alpha, ) wavpath = os.path.join(test_dir, f + '_DIFFVC.wav') # write waveform if not os.path.exists(os.path.join(test_dir, f)): os.makedirs(os.path.join(test_dir, f)) wav = np.clip(wav, -32768, 32767) wavfile.write(wavpath, fs, wav.astype(np.int16)) print(wavpath)
def feat2hdf5(mlfb, hdf5, ext="feats"): tdir, name = Path(hdf5).parent, Path(hdf5).stem h5f = tdir / (str(name) + ".h5") h5 = HDF5(str(h5f), "a") h5.save(mlfb, ext=ext) h5.close()
def _save_hdf5(self, h5f): h5 = HDF5(h5f, "a") for k, v in self.feats.items(): h5.save(v, ext=k) h5.close()
def main(*argv): argv = argv if argv else sys.argv[1:] # Options for python description = 'estimate joint feature of source and target speakers' parser = argparse.ArgumentParser(description=description) parser.add_argument('pair_yml', type=str, help='Yml file of the speaker pair') parser.add_argument('org_list_file', type=str, help='List file of original speaker') parser.add_argument('tar_list_file', type=str, help='List file of target speaker') parser.add_argument('pair_dir', type=str, help='Directory path of h5 files') args = parser.parse_args(argv) # read pair-dependent yml file pconf = PairYML(args.pair_yml) # read source and target features from HDF file h5_dir = os.path.join(args.pair_dir, 'h5') org_mceps = read_feats(args.org_list_file, h5_dir, ext='mcep') org_npows = read_feats(args.org_list_file, h5_dir, ext='npow') tar_mceps = read_feats(args.tar_list_file, h5_dir, ext='mcep') tar_npows = read_feats(args.tar_list_file, h5_dir, ext='npow') assert len(org_mceps) == len(tar_mceps) assert len(org_npows) == len(tar_npows) assert len(org_mceps) == len(org_npows) itnum = 1 sd = 1 # start dimension for aligment of mcep num_files = len(org_mceps) print('{}-th joint feature extraction starts.'.format(itnum)) # first iteration for i in range(num_files): jdata, _, mcd = get_aligned_jointdata(org_mceps[i][:, sd:], org_npows[i], tar_mceps[i][:, sd:], tar_npows[i]) print('distortion [dB] for {}-th file: {}'.format(i + 1, mcd)) if i == 0: jnt = jdata else: jnt = np.r_[jnt, jdata] itnum += 1 # second through final iteration while itnum < pconf.jnt_n_iter + 1: print('{}-th joint feature extraction starts.'.format(itnum)) # train GMM trgmm = GMMTrainer(n_mix=pconf.GMM_mcep_n_mix, n_iter=pconf.GMM_mcep_n_iter, covtype=pconf.GMM_mcep_covtype) trgmm.train(jnt) cvgmm = GMMConvertor(n_mix=pconf.GMM_mcep_n_mix, covtype=pconf.GMM_mcep_covtype) cvgmm.open_from_param(trgmm.param) twfs = [] for i in range(num_files): cvmcep = cvgmm.convert(static_delta(org_mceps[i][:, sd:]), cvtype=pconf.GMM_mcep_cvtype) jdata, twf, mcd = get_aligned_jointdata(org_mceps[i][:, sd:], org_npows[i], tar_mceps[i][:, sd:], tar_npows[i], cvdata=cvmcep) print('distortion [dB] for {}-th file: {}'.format(i + 1, mcd)) if i == 0: jnt = jdata else: jnt = np.r_[jnt, jdata] twfs.append(twf) itnum += 1 # save joint feature vector jnt_dir = os.path.join(args.pair_dir, 'jnt') if not os.path.exists(jnt_dir): os.makedirs(jnt_dir) jntpath = os.path.join(jnt_dir, 'it' + str(itnum) + '_jnt.h5') jnth5 = HDF5(jntpath, mode='w') jnth5.save(jnt, ext='jnt') jnth5.close() # save GMM gmm_dir = os.path.join(args.pair_dir, 'GMM') if not os.path.exists(gmm_dir): os.makedirs(gmm_dir) gmmpath = os.path.join(gmm_dir, 'it' + str(itnum) + '_gmm.pkl') joblib.dump(trgmm.param, gmmpath) # save twf twf_dir = os.path.join(args.pair_dir, 'twf') if not os.path.exists(twf_dir): os.makedirs(twf_dir) with open(args.org_list_file, 'r') as fp: for line, twf in zip(fp, twfs): f = os.path.basename(line.rstrip()) twfpath = os.path.join(twf_dir, 'it' + str(itnum) + '_' + f + '.h5') twfh5 = HDF5(twfpath, mode='w') twfh5.save(twf, ext='twf') twfh5.close()
def main(*argv): argv = argv if argv else sys.argv[1:] # Options for python description = 'estimate joint feature of source and target speakers' parser = argparse.ArgumentParser(description=description) parser.add_argument('org_yml', type=str, help='Yml file of the original speaker') parser.add_argument('tar_yml', type=str, help='Yml file of the target speaker') parser.add_argument('pair_yml', type=str, help='Yml file of the speaker pair') parser.add_argument('org_list_file', type=str, help='List file of original speaker') parser.add_argument('tar_list_file', type=str, help='List file of target speaker') parser.add_argument('pair_dir', type=str, help='Directory path of h5 files') args = parser.parse_args(argv) # read speaker-dependent yml files oconf = SpeakerYML(args.org_yml) tconf = SpeakerYML(args.tar_yml) # read pair-dependent yml file pconf = PairYML(args.pair_yml) # read source and target features from HDF file h5_dir = os.path.join(args.pair_dir, 'h5') org_mceps = read_feats(args.org_list_file, h5_dir, ext='mcep') org_npows = read_feats(args.org_list_file, h5_dir, ext='npow') tar_mceps = read_feats(args.tar_list_file, h5_dir, ext='mcep') tar_npows = read_feats(args.tar_list_file, h5_dir, ext='npow') assert len(org_mceps) == len(tar_mceps) assert len(org_npows) == len(tar_npows) assert len(org_mceps) == len(org_npows) # dtw between original and target w/o 0th and silence print('## Alignment mcep w/o 0-th and silence ##') jmceps, twfs = align_feature_vectors(org_mceps, org_npows, tar_mceps, tar_npows, pconf, opow=oconf.power_threshold, tpow=tconf.power_threshold, itnum=pconf.jnt_n_iter, sd=1, ) jnt_mcep = transform_jnt(jmceps) # create joint feature for codeap using given twfs print('## Alignment codeap using given twf ##') org_codeaps = read_feats(args.org_list_file, h5_dir, ext='codeap') tar_codeaps = read_feats(args.tar_list_file, h5_dir, ext='codeap') jcodeaps = [] for i in range(len(org_codeaps)): # extract codeap joint feature vector jcodeap, _, _ = get_alignment(org_codeaps[i], org_npows[i], tar_codeaps[i], tar_npows[i], opow=oconf.power_threshold, tpow=tconf.power_threshold, given_twf=twfs[i]) jcodeaps.append(jcodeap) jnt_codeap = transform_jnt(jcodeaps) # save joint feature vectors jnt_dir = os.path.join(args.pair_dir, 'jnt') os.makedirs(jnt_dir, exist_ok=True) jntpath = os.path.join(jnt_dir, 'it' + str(pconf.jnt_n_iter) + '_jnt.h5') jnth5 = HDF5(jntpath, mode='a') jnth5.save(jnt_mcep, ext='mcep') jnth5.save(jnt_codeap, ext='codeap') jnth5.close() # save twfs twf_dir = os.path.join(args.pair_dir, 'twf') os.makedirs(twf_dir, exist_ok=True) with open(args.org_list_file, 'r') as fp: for line, twf in zip(fp, twfs): f = os.path.basename(line.rstrip()) twfpath = os.path.join( twf_dir, 'it' + str(pconf.jnt_n_iter) + '_' + f + '.h5') twfh5 = HDF5(twfpath, mode='a') twfh5.save(twf, ext='twf') twfh5.close()
def main(): if args.file == 'con': file = 'converted' elif args.file == 'tar': file = 'target' else: raise ValueError("The file is incorrect") feat = FeatureExtractor(analyzer='world', fs=22050, fftl=1024, shiftms=5, minf0=args.minf0, maxf0=args.maxf0) # constract Synthesizer class synthesizer = Synthesizer(fs=22050, fftl=1024, shiftms=5) # open list file with open('./list/' + file + '.list', 'r') as fp: for line in fp: f = line.rstrip() h5f = os.path.join('./' + file + '/h5f/', f + '.h5') if (not os.path.exists(h5f)): wavf = os.path.join('./' + file + '/wav/', f + '.wav') fs, x = wavfile.read(wavf) x = np.array(x, dtype=np.float) x = low_cut_filter(x, fs, cutoff=70) print("Extract acoustic features: " + wavf) # analyze F0, spc, and ap f0, spc, ap = feat.analyze(x) mcep = feat.mcep(dim=34, alpha=0.544) npow = feat.npow() codeap = feat.codeap() # save features into a hdf5 file h5 = HDF5(h5f, mode='w') h5.save(f0, ext='f0') # h5.save(spc, ext='spc') # h5.save(ap, ext='ap') h5.save(mcep, ext='mcep') h5.save(npow, ext='npow') h5.save(codeap, ext='codeap') h5.close() # analysis/synthesis using F0, mcep, and ap wav = synthesizer.synthesis( f0, mcep, ap, alpha=0.544, ) wav = np.clip(wav, -32768, 32767) anasynf = os.path.join('./' + file + '/anasyn/', f + '.wav') wavfile.write(anasynf, fs, np.array(wav, dtype=np.int16)) else: print("Acoustic features already exist: " + h5f)