示例#1
0
def synthesize(speaker, input_file, output_file, params):
    print("[Encoding]")
    from io_modules.dataset import Dataset
    from io_modules.dataset import Encodings
    from models.encoder import Encoder
    from trainers.encoder import Trainer
    encodings = Encodings()
    encodings.load('data/models/encoder.encodings')
    encoder = Encoder(params, encodings, runtime=True)
    encoder.load('data/models/rnn_encoder')

    seq = create_lab_input(input_file, speaker)
    mgc, att = encoder.generate(seq)
    _render_spectrogram(mgc, output_file + '.png')

    print("[Vocoding]")
    from models.vocoder import Vocoder
    from trainers.vocoder import Trainer
    vocoder = Vocoder(params, runtime=True)
    vocoder.load('data/models/rnn_vocoder')

    import time
    start = time.time()
    signal = vocoder.synthesize(mgc,
                                batch_size=1000,
                                temperature=params.temperature)
    stop = time.time()
    sys.stdout.write(" execution time=" + str(stop - start))
    sys.stdout.write('\n')
    sys.stdout.flush()
    from io_modules.dataset import DatasetIO
    dio = DatasetIO()
    enc = dio.b16_dec(signal, discreete=True)
    dio.write_wave(output_file, enc, params.target_sample_rate)
示例#2
0
    def synth_devset(self,
                     batch_size,
                     target_sample_rate,
                     sample=True,
                     temperature=1.0):
        sys.stdout.write('\tSynthesizing devset\n')
        file_index = 1
        for file in self.devset.files[:5]:
            sys.stdout.write("\t\t" + str(file_index) + "/" +
                             str(len(self.devset.files)) +
                             " processing file " + file)
            sys.stdout.flush()
            file_index += 1
            mgc_file = file + ".mgc.npy"
            mgc = np.load(mgc_file)
            import time
            start = time.time()
            synth = self.vocoder.synthesize(mgc,
                                            batch_size,
                                            sample=sample,
                                            temperature=temperature)
            stop = time.time()
            sys.stdout.write(" execution time=" + str(stop - start))
            sys.stdout.write('\n')
            sys.stdout.flush()

            dio = DatasetIO()
            if self.use_ulaw:
                enc = dio.ulaw_decode(synth, discreete=True)
            else:
                enc = dio.b16_dec(synth, discreete=True)
            output_file = 'data/output/' + file[file.rfind('/') + 1:] + '.wav'
            dio.write_wave(output_file, enc, target_sample_rate)