# Prepare sampler frame_lengths = Mel.file_data_source.frame_lengths sampler = PartialyRandomizedSimilarTimeLengthSampler( frame_lengths, batch_size=hparams.batch_size) # Dataset and Dataloader setup dataset = PyTorchDataset(X, Mel, Y) data_loader_dv3 = data_utils.DataLoader( dataset, batch_size=hparams.batch_size, num_workers=hparams.num_workers, sampler=sampler, collate_fn=collate_fn, pin_memory=hparams.pin_memory) print("dataloader for dv3 prepared") dv3.train._frontend = getattr(frontend, hparams.frontend) dv3_model = build_deepvoice_3(dv3_preset , checkpoint_dv3) print("Built dv3!") if use_cuda: dv3_model = dv3_model.cuda() dv3_optimizer = optim.Adam((dv3_model.get_trainable_parameters(), lr=hparams.initial_learning_rate, betas=( hparams.adam_beta1, hparams.adam_beta2), eps=hparams.adam_eps, weight_decay=hparams.weight_decay) log_event_path = "log/run-test" + str(datetime.now()).replace(" ", "_") print("Log event path for dv3: {}".format(log_event_path)) writer_dv3 = SummaryWriter(log_dir=log_event_path) # ENCODER
#if i%8000=0: #scheduler.step() def download_file(file_name=None): from google.colab import files files.download(file_name) batch_size = 64 if __name__ == "__main__": #Load Deep Voice 3 # Pre Trained Model dv3_model = build_deepvoice_3(True) all_speakers = get_cloned_voices() print("Cloning Texts are produced") speaker_embed = get_speaker_embeddings(dv3_model) # encoder = build_encoder() print("Encoder is built!") speech_data = Speech_Dataset(all_speakers, speaker_embed) #for i in range(5): # sample = speech_data[i] # print(sample[0].shape, sample[1].shape) # print(sample[0], sample[1])