示例#1
0
def synthesize(speaker, input_file, output_file, params):
    print("[Encoding]")
    from io_modules.dataset import Dataset
    from io_modules.dataset import Encodings
    from models.encoder import Encoder
    from trainers.encoder import Trainer
    encodings = Encodings()
    encodings.load('data/models/encoder.encodings')
    encoder = Encoder(params, encodings, runtime=True)
    encoder.load('data/models/rnn_encoder')

    seq = create_lab_input(input_file, speaker)
    mgc, att = encoder.generate(seq)
    _render_spectrogram(mgc, output_file + '.png')

    print("[Vocoding]")
    from models.vocoder import Vocoder
    from trainers.vocoder import Trainer
    vocoder = Vocoder(params, runtime=True)
    vocoder.load('data/models/rnn_vocoder')

    import time
    start = time.time()
    signal = vocoder.synthesize(mgc,
                                batch_size=1000,
                                temperature=params.temperature)
    stop = time.time()
    sys.stdout.write(" execution time=" + str(stop - start))
    sys.stdout.write('\n')
    sys.stdout.flush()
    from io_modules.dataset import DatasetIO
    dio = DatasetIO()
    enc = dio.b16_dec(signal, discreete=True)
    dio.write_wave(output_file, enc, params.target_sample_rate)
示例#2
0
def load_encoder(params, base_path='data/models'):
    from io_modules.dataset import Encodings
    from models.encoder import Encoder

    encodings = Encodings()
    encodings.load('%s/encoder.encodings' % base_path)

    encoder = Encoder(params, encodings, runtime=True)
    encoder.load('%s/rnn_encoder' % base_path)

    return encoder
示例#3
0
    def phase_3_train_encoder(params):
        from io_modules.dataset import Dataset
        from io_modules.dataset import Encodings
        from models.encoder import Encoder
        from trainers.encoder import Trainer
        trainset = Dataset("data/processed/train")
        devset = Dataset("data/processed/dev")
        sys.stdout.write('Found ' + str(len(trainset.files)) +
                         ' training files and ' + str(len(devset.files)) +
                         ' development files\n')

        encodings = Encodings()
        count = 0
        if not params.resume:
            for train_file in trainset.files:
                count += 1
                if count % 100 == 0:
                    sys.stdout.write('\r' + str(count) + '/' +
                                     str(len(trainset.files)) +
                                     ' processed files')
                    sys.stdout.flush()
                from io_modules.dataset import DatasetIO
                dio = DatasetIO()
                lab_list = dio.read_lab(train_file + ".lab")
                for entry in lab_list:
                    encodings.update(entry)
            sys.stdout.write('\r' + str(count) + '/' +
                             str(len(trainset.files)) + ' processed files\n')
            sys.stdout.write('Found ' + str(len(encodings.char2int)) +
                             ' unique symbols, ' +
                             str(len(encodings.context2int)) +
                             ' unique features and ' +
                             str(len(encodings.speaker2int)) +
                             ' unique speakers\n')
            encodings.store('data/models/encoder.encodings')
        else:
            encodings.load('data/models/encoder.encodings')
        if params.resume:
            runtime = True  # avoid ortonormal initialization
        else:
            runtime = False
        encoder = Encoder(params, encodings, runtime=runtime)
        if params.resume:
            sys.stdout.write('Resuming from previous checkpoint\n')
            encoder.load('data/models/rnn_encoder')
        if params.no_guided_attention:
            sys.stdout.write('Disabling guided attention\n')
        if params.no_bounds:
            sys.stdout.write(
                'Using internal stopping condition for synthesis\n')
        trainer = Trainer(encoder, trainset, devset)
        trainer.start_training(10, 1000, params)
示例#4
0
def synthesize(speaker, input_file, output_file, params):
    from models.vocoder import device
    print(device)
    print("[Encoding]")
    from io_modules.dataset import Dataset
    from io_modules.dataset import Encodings
    from models.encoder import Encoder
    from trainers.encoder import Trainer
    encodings = Encodings()
    encodings.load('data/models/encoder.encodings')
    encoder = Encoder(params, encodings, runtime=True)
    encoder.load('data/models/rnn_encoder')

    seq = create_lab_input(input_file, speaker)
    mgc, att = encoder.generate(seq)
    _render_spectrogram(mgc, output_file + '.png')

    print("[Vocoding]")
    from models.vocoder import ParallelVocoder
    from models.vocoder import Vocoder
    vocoder = Vocoder(params)
    vocoder.load('data/models/nn_vocoder')
    pvocoder = ParallelVocoder(params, vocoder=vocoder)
    pvocoder.load('data/models/pnn_vocoder')

    import time
    start = time.time()
    import torch
    with torch.no_grad():
        signal = pvocoder.synthesize(mgc, batch_size=params.batch_size)
    stop = time.time()
    sys.stdout.write(" execution time=" + str(stop - start))
    sys.stdout.write('\n')
    sys.stdout.flush()
    from io_modules.dataset import DatasetIO
    dio = DatasetIO()

    dio.write_wave(output_file, signal / 32768.0, params.target_sample_rate, dtype=signal.dtype)
示例#5
0
    elif not params.txt_file:
        print("Input file is mandatory")
    elif not params.output_file:
        print("Output file is mandatory")

    memory = int(params.memory)
    # for compatibility we have to add this paramater
    params.learning_rate = 0.0001
    dynet_config.set(mem=memory, random_seed=9)
    if params.gpu:
        dynet_config.set_gpu()

    if params.g2p is not None:
        from models.g2p import G2P
        from io_modules.encodings import Encodings

        g2p_encodings = Encodings()
        g2p_encodings.load(params.g2p + '.encodings')
        g2p = G2P(g2p_encodings)
        g2p.load(params.g2p + '-bestAcc.network')
        if exists(params.g2p + '.lexicon'):
            g2p.load_lexicon(params.g2p + '.lexicon')
    else:
        g2p = None

    synthesize(params.speaker,
               params.txt_file,
               params.output_file,
               params,
               g2p=g2p)