def synthesize(speaker, input_file, output_file, params): print("[Encoding]") from io_modules.dataset import Dataset from io_modules.dataset import Encodings from models.encoder import Encoder from trainers.encoder import Trainer encodings = Encodings() encodings.load('data/models/encoder.encodings') encoder = Encoder(params, encodings, runtime=True) encoder.load('data/models/rnn_encoder') seq = create_lab_input(input_file, speaker) mgc, att = encoder.generate(seq) _render_spectrogram(mgc, output_file + '.png') print("[Vocoding]") from models.vocoder import Vocoder from trainers.vocoder import Trainer vocoder = Vocoder(params, runtime=True) vocoder.load('data/models/rnn_vocoder') import time start = time.time() signal = vocoder.synthesize(mgc, batch_size=1000, temperature=params.temperature) stop = time.time() sys.stdout.write(" execution time=" + str(stop - start)) sys.stdout.write('\n') sys.stdout.flush() from io_modules.dataset import DatasetIO dio = DatasetIO() enc = dio.b16_dec(signal, discreete=True) dio.write_wave(output_file, enc, params.target_sample_rate)
def phase_4_train_pvocoder(params): from io_modules.dataset import Dataset from models.vocoder import Vocoder from models.vocoder import ParallelVocoder from trainers.vocoder import Trainer vocoder_wavenet = Vocoder(params) sys.stdout.write('Loading wavenet vocoder\n') vocoder_wavenet.load('data/models/nn_vocoder') vocoder = ParallelVocoder(params, vocoder_wavenet) if params.resume: sys.stdout.write('Resuming from previous checkpoint\n') vocoder.load('data/models/pnn_vocoder') trainset = Dataset("data/processed/train") devset = Dataset("data/processed/dev") sys.stdout.write('Found ' + str(len(trainset.files)) + ' training files and ' + str(len(devset.files)) + ' development files\n') trainer = Trainer(vocoder, trainset, devset, target_output_path='data/models/pnn_vocoder') trainer.start_training(20, params.batch_size, params.target_sample_rate, params=params)
def load_vocoder(params, base_path='data/models'): from models.vocoder import ParallelVocoder from models.vocoder import Vocoder vocoder = Vocoder(params) vocoder.load('%s/nn_vocoder' % base_path) pvocoder = ParallelVocoder(params, vocoder=vocoder) pvocoder.load('%s/pnn_vocoder' % base_path) return pvocoder
def phase_5_test_vocoder(params): from io_modules.dataset import Dataset from models.vocoder import Vocoder from trainers.vocoder import Trainer vocoder = Vocoder(params, runtime=True) vocoder.load('data/models/rnn') trainset = Dataset("data/processed/train") devset = Dataset("data/processed/dev") sys.stdout.write('Found ' + str(len(trainset.files)) + ' training files and ' + str( len(devset.files)) + ' development files\n') trainer = Trainer(vocoder, trainset, devset) trainer.synth_devset(params.batch_size, target_sample_rate=params.target_sample_rate, sample=True, temperature=0.8)
def phase_2_train_vocoder(params): from io_modules.dataset import Dataset from models.vocoder import Vocoder from trainers.vocoder import Trainer vocoder = Vocoder(params) if params.resume: sys.stdout.write('Resuming from previous checkpoint\n') vocoder.load('data/models/rnn') trainset = Dataset("data/processed/train") devset = Dataset("data/processed/dev") sys.stdout.write('Found ' + str(len(trainset.files)) + ' training files and ' + str( len(devset.files)) + ' development files\n') trainer = Trainer(vocoder, trainset, devset) trainer.start_training(20, params.batch_size, params.target_sample_rate)
def synthesize(speaker, input_file, output_file, params): from models.vocoder import device print(device) print("[Encoding]") from io_modules.dataset import Dataset from io_modules.dataset import Encodings from models.encoder import Encoder from trainers.encoder import Trainer encodings = Encodings() encodings.load('data/models/encoder.encodings') encoder = Encoder(params, encodings, runtime=True) encoder.load('data/models/rnn_encoder') seq = create_lab_input(input_file, speaker) mgc, att = encoder.generate(seq) _render_spectrogram(mgc, output_file + '.png') print("[Vocoding]") from models.vocoder import ParallelVocoder from models.vocoder import Vocoder vocoder = Vocoder(params) vocoder.load('data/models/nn_vocoder') pvocoder = ParallelVocoder(params, vocoder=vocoder) pvocoder.load('data/models/pnn_vocoder') import time start = time.time() import torch with torch.no_grad(): signal = pvocoder.synthesize(mgc, batch_size=params.batch_size) stop = time.time() sys.stdout.write(" execution time=" + str(stop - start)) sys.stdout.write('\n') sys.stdout.flush() from io_modules.dataset import DatasetIO dio = DatasetIO() dio.write_wave(output_file, signal / 32768.0, params.target_sample_rate, dtype=signal.dtype)
def phase_7_train_sparse(params): sys.stdout.write("Starting sparsification for VanillaLSTM\n") from io_modules.dataset import Dataset from models.vocoder import Vocoder from trainers.vocoder import Trainer vocoder = Vocoder(params, use_sparse_lstm=True) sys.stdout.write('Resuming from previous checkpoint\n') vocoder.load('data/models/rnn_vocoder_sparse') sys.stdout.write("Reading datasets\n") sys.stdout.flush() trainset = Dataset("data/processed/train") devset = Dataset("data/processed/dev") sys.stdout.write('Found ' + str(len(trainset.files)) + ' training files and ' + str(len(devset.files)) + ' development files\n') sys.stdout.flush() trainer = Trainer(vocoder, trainset, devset) trainer.start_training(20, params.batch_size, params.target_sample_rate, params=params)