def test_taco(): B, T_out, D_out = 2, 400, 80 r = 5 T_encoder = T_out // r texts = ["Thank you very much.", "Hello"] seqs = [ np.array(text_to_sequence(t, ["english_cleaners"]), dtype=np.int) for t in texts ] input_lengths = np.array([len(s) for s in seqs]) max_len = np.max(input_lengths) seqs = np.array([_pad(s, max_len) for s in seqs]) x = torch.LongTensor(seqs) y = torch.rand(B, T_out, D_out) x = Variable(x) y = Variable(y) model = Tacotron(n_vocab=len(symbols), r=r) print("Encoder input shape: ", x.size()) print("Decoder input shape: ", y.size()) a, b, c = model(x, y, input_lengths=input_lengths) print("Mel shape:", a.size()) print("Linear shape:", b.size()) print("Attention shape:", c.size()) assert c.size() == (B, T_encoder, max_len) # Test greddy decoding a, b, c = model(x, input_lengths=input_lengths)
# Dataset and Dataloader setup dataset = PyTorchDataset(X, Mel, Y) data_loader = data_utils.DataLoader(dataset, batch_size=hparams.batch_size, num_workers=hparams.num_workers, shuffle=True, collate_fn=collate_fn, pin_memory=hparams.pin_memory) # Model model = Tacotron( n_vocab=len(symbols), embedding_dim=256, mel_dim=hparams.num_mels, linear_dim=hparams.num_freq, r=hparams.outputs_per_step, padding_idx=hparams.padding_idx, use_memory_mask=hparams.use_memory_mask, ) optimizer = optim.Adam(model.parameters(), lr=hparams.initial_learning_rate, betas=(hparams.adam_beta1, hparams.adam_beta2), weight_decay=hparams.weight_decay) # Load checkpoint if checkpoint_path: print("Load checkpoint from: {}".format(checkpoint_path)) checkpoint = torch.load(checkpoint_path) model.load_state_dict(checkpoint["state_dict"]) optimizer.load_state_dict(checkpoint["optimizer"])
D -= hp.mask_nth_mgc_for_adv_loss if hp.discriminator_linguistic_condition: D = D + X_data_min.shape[-1] hp.discriminator_params["in_dim"] = D dataset = PyTorchDataset(X, Mel, Y) data_loader = data_utils.DataLoader( dataset, batch_size=hparams.batch_size, num_workers=hparams.num_workers, shuffle=True, collate_fn=collate_fn, pin_memory=hparams.pin_memory) # Models model_g = Tacotron(n_vocab = len(symbols), embedding_dim = 256, mel_dim = hparams.num_mels, linear_dim = hparams.num_freq, r = hparams.outputs_per_step, padding_idx = hparams.padding_idx, use_memory_mask = hparams.use_memory_mask, ) model_d = getattr(gantts_models, hp.discriminator)(**hp.discriminator_params) print("Generator:", model_g) print("Discriminator:", model_d) if use_cuda: model_g, model_d = model_g.cuda(), model_d.cuda() # Optimizers optimizer_g = optim.Adam( model.parameters(), lr = hparams.initial_learning_rate, betas = ( hparams.adam_beta1, hparams.adam_beta2 ), weight_decay = hparams.weight_decay ) optimizer_d = getattr(optim, hp.optimizer_d)(model_d.parameters(),