def train_and_eval(rank, n_gpus, hps): global global_step if rank == 0: logger = utils.get_logger(hps.model_dir) logger.info(hps) utils.check_git_hash(hps.model_dir) writer = SummaryWriter(log_dir=hps.model_dir) writer_eval = SummaryWriter(log_dir=os.path.join(hps.model_dir, "eval")) dist.init_process_group(backend='nccl', init_method='env://', world_size=n_gpus, rank=rank) torch.manual_seed(hps.train.seed) torch.cuda.set_device(rank) train_dataset = TextMelLoader(hps.data.training_files, hps.data) train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset, num_replicas=n_gpus, rank=rank, shuffle=True) collate_fn = TextMelCollate(1) train_loader = DataLoader(train_dataset, num_workers=8, shuffle=False, batch_size=hps.train.batch_size, pin_memory=True, drop_last=True, collate_fn=collate_fn, sampler=train_sampler) if rank == 0: val_dataset = TextMelLoader(hps.data.validation_files, hps.data) val_loader = DataLoader(val_dataset, num_workers=8, shuffle=False, batch_size=hps.train.batch_size, pin_memory=True, drop_last=True, collate_fn=collate_fn) generator = models.FlowGenerator( n_vocab=len(symbols), out_channels=hps.data.n_mel_channels, **hps.model).cuda(rank) optimizer_g = commons.Adam(generator.parameters(), scheduler=hps.train.scheduler, dim_model=hps.model.hidden_channels, warmup_steps=hps.train.warmup_steps, lr=hps.train.learning_rate, betas=hps.train.betas, eps=hps.train.eps) if hps.train.fp16_run: generator, optimizer_g._optim = amp.initialize(generator, optimizer_g._optim, opt_level="O1") generator = DDP(generator) epoch_str = 1 global_step = 0 try: _, _, _, epoch_str = utils.load_checkpoint(utils.latest_checkpoint_path(hps.model_dir, "G_*.pth"), generator, optimizer_g) epoch_str += 1 optimizer_g.step_num = (epoch_str - 1) * len(train_loader) optimizer_g._update_learning_rate() global_step = (epoch_str - 1) * len(train_loader) except: if hps.train.ddi and os.path.isfile(os.path.join(hps.model_dir, "ddi_G.pth")): _ = utils.load_checkpoint(os.path.join(hps.model_dir, "ddi_G.pth"), generator, optimizer_g) for epoch in range(epoch_str, hps.train.epochs + 1): if rank==0: train(rank, epoch, hps, generator, optimizer_g, train_loader, logger, writer) evaluate(rank, epoch, hps, generator, optimizer_g, val_loader, logger, writer_eval) if epoch%50 == 0: utils.save_checkpoint(generator, optimizer_g, hps.train.learning_rate, epoch, os.path.join(hps.model_dir, "G_{}.pth".format(epoch))) else: train(rank, epoch, hps, generator, optimizer_g, train_loader, None, None)
def main(): hps = utils.get_hparams() logger = utils.get_logger(hps.model_dir) logger.info(hps) utils.check_git_hash(hps.model_dir) torch.manual_seed(hps.train.seed) train_dataset = TextMelLoader(hps.data.training_files, hps.data) collate_fn = TextMelCollate(1) train_loader = DataLoader(train_dataset, num_workers=8, shuffle=True, batch_size=hps.train.batch_size, pin_memory=True, drop_last=True, collate_fn=collate_fn) generator = FlowGenerator_DDI(speaker_dim=hps.model.speaker_embedding, n_vocab=len(symbols), out_channels=hps.data.n_mel_channels, **hps.model).cuda() optimizer_g = commons.Adam(generator.parameters(), scheduler=hps.train.scheduler, dim_model=hps.model.hidden_channels, warmup_steps=hps.train.warmup_steps, lr=hps.train.learning_rate, betas=hps.train.betas, eps=hps.train.eps) generator.train() for batch_idx, (x, x_lengths, y, y_lengths, speaker_embedding) in enumerate(train_loader): x, x_lengths = x.cuda(), x_lengths.cuda() y, y_lengths = y.cuda(), y_lengths.cuda() speaker_embedding = speaker_embedding.cuda() _ = generator(x, x_lengths, speaker_embedding, y, y_lengths, gen=False) break utils.save_checkpoint(generator, optimizer_g, hps.train.learning_rate, 0, os.path.join(hps.model_dir, "ddi_G.pth"))
def main(): hps = utils.get_hparams() logger = utils.get_logger(hps.model_dir) logger.info(hps) utils.check_git_hash(hps.model_dir) torch.manual_seed(hps.train.seed) train_dataset = TextMelLoader(hps.data.training_files, hps.data) collate_fn = TextMelCollate(1) train_loader = DataLoader(train_dataset, num_workers=8, shuffle=True, batch_size=hps.train.batch_size, pin_memory=True, drop_last=True, collate_fn=collate_fn) generator = FlowGenerator_DDI( len(symbols), out_channels=hps.data.n_mel_channels, **hps.model).cuda() optimizer_g = commons.Adam(generator.parameters(), scheduler=hps.train.scheduler, dim_model=hps.model.hidden_channels, warmup_steps=hps.train.warmup_steps, lr=hps.train.learning_rate, betas=hps.train.betas, eps=hps.train.eps) generator.train() for batch_idx, (x, x_lengths, y, y_lengths) in enumerate(train_loader): x, x_lengths = x.cuda(), x_lengths.cuda() y, y_lengths = y.cuda(), y_lengths.cuda() _ = generator(x, x_lengths, y, y_lengths, gen=False) break # check for pretrained and load it without a an optimizer pretrained_checkpoint_path = os.path.join(hps.model_dir, "pretrained.pth") if os.path.isfile(pretrained_checkpoint_path): logger.info("Loading pretrained checkpoint: %s" % pretrained_checkpoint_path) model, optimizer, learning_rate, iteration = utils.load_checkpoint(pretrained_checkpoint_path, generator) utils.save_checkpoint(model, optimizer_g, hps.train.learning_rate, 0, os.path.join(hps.model_dir, "ddi_G.pth")) else: utils.save_checkpoint(generator, optimizer_g, hps.train.learning_rate, 0, os.path.join(hps.model_dir, "ddi_G.pth"))