def synthesize(speaker, input_file, output_file, params): print("[Encoding]") from io_modules.dataset import Dataset from io_modules.dataset import Encodings from models.encoder import Encoder from trainers.encoder import Trainer encodings = Encodings() encodings.load('data/models/encoder.encodings') encoder = Encoder(params, encodings, runtime=True) encoder.load('data/models/rnn_encoder') seq = create_lab_input(input_file, speaker) mgc, att = encoder.generate(seq) _render_spectrogram(mgc, output_file + '.png') print("[Vocoding]") from models.vocoder import Vocoder from trainers.vocoder import Trainer vocoder = Vocoder(params, runtime=True) vocoder.load('data/models/rnn_vocoder') import time start = time.time() signal = vocoder.synthesize(mgc, batch_size=1000, temperature=params.temperature) stop = time.time() sys.stdout.write(" execution time=" + str(stop - start)) sys.stdout.write('\n') sys.stdout.flush() from io_modules.dataset import DatasetIO dio = DatasetIO() enc = dio.b16_dec(signal, discreete=True) dio.write_wave(output_file, enc, params.target_sample_rate)
def synth_devset(self, batch_size, target_sample_rate, sample=True, temperature=1.0): sys.stdout.write('\tSynthesizing devset\n') file_index = 1 for file in self.devset.files[:5]: sys.stdout.write("\t\t" + str(file_index) + "/" + str(len(self.devset.files)) + " processing file " + file) sys.stdout.flush() file_index += 1 mgc_file = file + ".mgc.npy" mgc = np.load(mgc_file) import time start = time.time() synth = self.vocoder.synthesize(mgc, batch_size, sample=sample, temperature=temperature) stop = time.time() sys.stdout.write(" execution time=" + str(stop - start)) sys.stdout.write('\n') sys.stdout.flush() dio = DatasetIO() if self.use_ulaw: enc = dio.ulaw_decode(synth, discreete=True) else: enc = dio.b16_dec(synth, discreete=True) output_file = 'data/output/' + file[file.rfind('/') + 1:] + '.wav' dio.write_wave(output_file, enc, target_sample_rate)