def test(params): from io_modules.encodings import Encodings from io_modules.dataset import LTSDataset encodings = Encodings() encodings.load(params.model + '.encodings') testset = LTSDataset(params.input) from models.g2p import G2P model = G2P(encodings) model.load(params.model + '-bestAcc.network') f = open(params.output, 'w') correct = 0 last_proc = 0 index = 0 for entry in testset.entries: index += 1 curr_proc = int(index * 100 / len(testset.entries)) if curr_proc % 5 == 0: while last_proc < curr_proc: last_proc += 5 sys.stdout.write(str(last_proc) + ' ') sys.stdout.flush() p_transcription = model.transcribe(entry.word) ok = False if p_transcription == entry.transcription: correct += 1 ok = True f.write(entry.word + '\t') for phon in entry.transcription: f.write(phon + ' ') f.write('\t') for phon in p_transcription: f.write(phon + ' ') if not ok: f.write('***') f.write('\n') f.close() sys.stdout.write('done\n') sys.stdout.write('Word level accuracy is ' + str(float(correct) / len(testset.entries)) + '\n')
def convert(params): from io_modules.encodings import Encodings encodings = Encodings() encodings.load(params.model + '.encodings') from models.g2p import G2P model = G2P(encodings) model.load(params.model + '-bestAcc.network') from os import listdir from os.path import isfile, join files_tmp = [ f for f in listdir(params.input) if isfile(join(params.input, f)) and f.find('txt') > -1 ] for file in files_tmp: print(file) with open(join(params.input, file), 'r') as f: line = f.readline() with open(join(params.input, file), 'w') as w: phones = [] for word in line.split(' '): phones.extend(model.transcribe(word)) for phone in phones: w.write(phone + ' ')
def train(params): from io_modules.encodings import Encodings from io_modules.dataset import LTSDataset sys.stdout.write('Loading datasets...\n') trainset = LTSDataset(params.train_file) devset = LTSDataset(params.dev_file) sys.stdout.write('Trainset has ' + str(len(trainset.entries)) + ' entries\n') sys.stdout.write('Devset has ' + str(len(devset.entries)) + ' entries\n') encodings = Encodings() encodings.update_encodings(trainset) sys.stdout.write('Found ' + str(len(encodings.char2int)) + ' characters\n') sys.stdout.write('Found ' + str(len(encodings.phoneme2int)) + ' phonemes\n') from trainers.g2p import G2PTrainer from models.g2p import G2P model = G2P(encodings) trainer = G2PTrainer() trainer.start_training(model, encodings, trainset, devset, params.model_base, params.batch_size, params.patience)
elif not params.txt_file: print("Input file is mandatory") elif not params.output_file: print("Output file is mandatory") memory = int(params.memory) # for compatibility we have to add this paramater params.learning_rate = 0.0001 dynet_config.set(mem=memory, random_seed=9) if params.gpu: dynet_config.set_gpu() if params.g2p is not None: from models.g2p import G2P from io_modules.encodings import Encodings g2p_encodings = Encodings() g2p_encodings.load(params.g2p + '.encodings') g2p = G2P(g2p_encodings) g2p.load(params.g2p + '-bestAcc.network') if exists(params.g2p + '.lexicon'): g2p.load_lexicon(params.g2p + '.lexicon') else: g2p = None synthesize(params.speaker, params.txt_file, params.output_file, params, g2p=g2p)
def phase_1_prepare_corpus(params): from os import listdir from os.path import isfile, join from os.path import exists train_files_tmp = [ f for f in listdir(params.train_folder) if isfile(join(params.train_folder, f)) ] if params.dev_folder is not None: dev_files_tmp = [ f for f in listdir(params.dev_folder) if isfile(join(params.dev_folder, f)) ] else: dev_files_tmp = [] if params.g2p is not None: from models.g2p import G2P from io_modules.encodings import Encodings g2p_encodings = Encodings() g2p_encodings.load(params.g2p + '.encodings') g2p = G2P(g2p_encodings) g2p.load(params.g2p + '-bestAcc.network') if exists(params.g2p + '.lexicon'): g2p.load_lexicon(params.g2p + '.lexicon') else: g2p = None sys.stdout.write("Scanning training files...") sys.stdout.flush() final_list = [] for file in train_files_tmp: base_name = file[:-4] lab_name = base_name + '.txt' wav_name = base_name + '.wav' if exists(join(params.train_folder, lab_name)) and exists( join(params.train_folder, wav_name)): if base_name not in final_list: final_list.append(base_name) train_files = final_list sys.stdout.write(" found " + str(len(train_files)) + " valid training files\n") sys.stdout.write("Scanning development files...") sys.stdout.flush() final_list = [] for file in dev_files_tmp: base_name = file[:-4] lab_name = base_name + '.txt' wav_name = base_name + '.wav' if exists(join(params.dev_folder, lab_name)) and exists( join(params.dev_folder, wav_name)): if base_name not in final_list: final_list.append(base_name) dev_files = final_list sys.stdout.write(" found " + str(len(dev_files)) + " valid development files\n") from io_modules.dataset import DatasetIO from io_modules.vocoder import MelVocoder from shutil import copyfile dio = DatasetIO() vocoder = MelVocoder() base_folder = params.train_folder total_files = 0 for index in range(len(train_files)): total_files += 1 sys.stdout.write("\r\tprocessing file " + str(index + 1) + "/" + str(len(train_files))) sys.stdout.flush() base_name = train_files[index] txt_name = base_name + '.txt' wav_name = base_name + '.wav' spc_name = base_name + '.png' lab_name = base_name + '.lab' tgt_txt_name = txt_name tgt_spc_name = spc_name tgt_lab_name = lab_name if params.prefix is not None: tgt_txt_name = params.prefix + "_{:05d}".format( total_files) + '.txt' tgt_spc_name = params.prefix + "_{:05d}".format( total_files) + '.png' tgt_lab_name = params.prefix + "_{:05d}".format( total_files) + '.lab' # LAB - copy or create if exists(join(base_folder, lab_name)): copyfile(join(base_folder, lab_name), join('data/processed/train', tgt_lab_name)) else: create_lab_file(join(base_folder, txt_name), join('data/processed/train', tgt_lab_name), speaker_name=params.speaker, g2p=g2p) # TXT copyfile(join(base_folder, txt_name), join('data/processed/train', tgt_txt_name)) # WAVE data, sample_rate = dio.read_wave( join(base_folder, wav_name), sample_rate=params.target_sample_rate) mgc = vocoder.melspectrogram(data, sample_rate=params.target_sample_rate, num_mels=params.mgc_order) # SPECT render_spectrogram(mgc, join('data/processed/train', tgt_spc_name)) if params.prefix is None: dio.write_wave( join('data/processed/train', base_name + '.orig.wav'), data, sample_rate) array2file(mgc, join('data/processed/train', base_name + '.mgc')) else: tgt_wav_name = params.prefix + "_{:05d}".format( total_files) + '.orig.wav' tgt_mgc_name = params.prefix + "_{:05d}".format( total_files) + '.mgc' dio.write_wave(join('data/processed/train', tgt_wav_name), data, sample_rate) array2file(mgc, join('data/processed/train', tgt_mgc_name)) sys.stdout.write('\n') base_folder = params.dev_folder for index in range(len(dev_files)): total_files += 1 sys.stdout.write("\r\tprocessing file " + str(index + 1) + "/" + str(len(dev_files))) sys.stdout.flush() base_name = dev_files[index] txt_name = base_name + '.txt' wav_name = base_name + '.wav' spc_name = base_name + '.png' lab_name = base_name + '.lab' tgt_txt_name = txt_name tgt_spc_name = spc_name tgt_lab_name = lab_name if params.prefix is not None: tgt_txt_name = params.prefix + "_{:05d}".format( total_files) + '.txt' tgt_spc_name = params.prefix + "_{:05d}".format( total_files) + '.png' tgt_lab_name = params.prefix + "_{:05d}".format( total_files) + '.lab' # LAB - copy or create if exists(join(base_folder, lab_name)): copyfile(join(base_folder, lab_name), join('data/processed/dev', tgt_lab_name)) else: create_lab_file(join(base_folder, txt_name), join('data/processed/dev', tgt_lab_name), speaker_name=params.speaker, g2p=g2p) # TXT copyfile(join(base_folder, txt_name), join('data/processed/dev', tgt_txt_name)) # WAVE data, sample_rate = dio.read_wave( join(base_folder, wav_name), sample_rate=params.target_sample_rate) mgc = vocoder.melspectrogram(data, sample_rate=params.target_sample_rate, num_mels=params.mgc_order) # SPECT render_spectrogram(mgc, join('data/processed/dev', tgt_spc_name)) if params.prefix is None: dio.write_wave( join('data/processed/dev', base_name + '.orig.wav'), data, sample_rate) array2file(mgc, join('data/processed/dev', base_name + '.mgc')) else: tgt_wav_name = params.prefix + "_{:05d}".format( total_files) + '.orig.wav' tgt_mgc_name = params.prefix + "_{:05d}".format( total_files) + '.mgc' dio.write_wave(join('data/processed/dev', tgt_wav_name), data, sample_rate) array2file(mgc, join('data/processed/dev', tgt_mgc_name)) sys.stdout.write('\n')