Пример #1
0
def get_forward_model(model_path):
    device = torch.device('cuda')
    model = ForwardTacotron(embed_dims=hp.forward_embed_dims,
                            num_chars=len(symbols),
                            durpred_rnn_dims=hp.forward_durpred_rnn_dims,
                            durpred_conv_dims=hp.forward_durpred_conv_dims,
                            rnn_dim=hp.forward_rnn_dims,
                            postnet_k=hp.forward_postnet_K,
                            postnet_dims=hp.forward_postnet_dims,
                            prenet_k=hp.forward_prenet_K,
                            prenet_dims=hp.forward_prenet_dims,
                            highways=hp.forward_num_highways,
                            dropout=hp.forward_dropout,
                            n_mels=hp.num_mels).to(device)
    model.load(model_path)
    return model
Пример #2
0
        pitch_rnn_dims=hp.forward_pitch_rnn_dims,
        pitch_conv_dims=hp.forward_pitch_conv_dims,
        pitch_dropout=hp.forward_pitch_dropout,
        pitch_emb_dims=hp.forward_pitch_emb_dims,
        pitch_proj_dropout=hp.forward_pitch_proj_dropout,
        rnn_dim=hp.forward_rnn_dims,
        postnet_k=hp.forward_postnet_K,
        postnet_dims=hp.forward_postnet_dims,
        prenet_k=hp.forward_prenet_K,
        prenet_dims=hp.forward_prenet_dims,
        highways=hp.forward_num_highways,
        dropout=hp.forward_dropout,
        n_mels=hp.num_mels).to(device)

    tts_load_path = tts_weights if tts_weights else paths.forward_latest_weights
    tts_model.load(tts_load_path)

    if input_text:
        text = clean_text(input_text.strip())
        inputs = [text_to_sequence(text)]
    else:
        with open('sentences.txt') as f:
            inputs = [clean_text(l.strip()) for l in f]
        inputs = [text_to_sequence(t) for t in inputs]

    tts_k = tts_model.get_step() // 1000

    if args.vocoder == 'wavernn':
        voc_k = voc_model.get_step() // 1000
        simple_table([('Forward Tacotron', str(tts_k) + 'k'),
                      ('Vocoder Type', 'WaveRNN'),
Пример #3
0
def main():
    # Parse Arguments
    parser = argparse.ArgumentParser(description='TTS Generator')

    parser.add_argument(
        '--tts_weights',
        type=str,
        help='[string/path] Load in different FastSpeech weights')

    parser.add_argument('--hp_file',
                        metavar='FILE',
                        default='hparams.py',
                        help='The file to use for the hyperparameters')
    parser.add_argument(
        '--alpha',
        type=float,
        default=1.,
        help='Parameter for controlling length regulator for speedup '
        'or slow-down of generated speech, e.g. alpha=2.0 is double-time')

    if not os.path.exists('onnx'):
        os.mkdir('onnx')

    args = parser.parse_args()

    hp.configure(args.hp_file)

    input_text = "the forms of printed letters should be beautiful, and that their arrangement on the page should be reasonable and a help to the shapeliness of the letters themselves."
    tts_weights = args.tts_weights

    paths = Paths(hp.data_path, hp.voc_model_id, hp.tts_model_id)

    device = torch.device('cpu')
    print('Using device:', device)

    print('\nInitialising Forward TTS Model...\n')
    tts_model = ForwardTacotron(embed_dims=hp.forward_embed_dims,
                                num_chars=len(symbols),
                                durpred_rnn_dims=hp.forward_durpred_rnn_dims,
                                durpred_conv_dims=hp.forward_durpred_conv_dims,
                                rnn_dim=hp.forward_rnn_dims,
                                postnet_k=hp.forward_postnet_K,
                                postnet_dims=hp.forward_postnet_dims,
                                prenet_k=hp.forward_prenet_K,
                                prenet_dims=hp.forward_prenet_dims,
                                highways=hp.forward_num_highways,
                                dropout=hp.forward_dropout,
                                n_mels=hp.num_mels).to(device)

    tts_load_path = tts_weights or paths.forward_latest_weights
    tts_model.load(tts_load_path)

    encoder = DurationPredictor(tts_model)
    decoder = Tacotron(tts_model)

    tts_model.eval()
    encoder.eval()
    decoder.eval()

    opset_version = 10

    with torch.no_grad():
        input_seq = text_to_sequence(input_text.strip(), hp.tts_cleaner_names)
        input_seq = torch.as_tensor(input_seq, dtype=torch.long,
                                    device=device).unsqueeze(0)
        '''
        FIRST STEP: predict symbols duration
        '''
        torch.onnx.export(encoder,
                          input_seq,
                          "./onnx/forward_tacotron_duration_prediction.onnx",
                          opset_version=opset_version,
                          do_constant_folding=True,
                          input_names=["input_seq"],
                          output_names=["embeddings", "duration"])

        x, durations = encoder(input_seq)
        '''
        SECOND STEP: expand symbols by durations
        '''
        x = encoder.lr(x, durations)
        '''
        THIRD STEP: generate mel
        '''
        torch.onnx.export(decoder,
                          x,
                          "./onnx/forward_tacotron_regression.onnx",
                          opset_version=opset_version,
                          do_constant_folding=True,
                          input_names=["data"],
                          output_names=["mel"])

    print('Done!')