def get_loss_function(loss_function, sigma=1.0): if loss_function == 'Tacotron2': loss = Tacotron2Loss() elif loss_function == 'WaveGlow': loss = WaveGlowLoss(sigma=sigma) else: raise NotImplementedError( "unknown loss function requested: {}".format(loss_function)) loss.cuda() return loss
def get_loss_function(loss_function, **kw): if loss_function == 'Tacotron2': loss = Tacotron2Loss() elif loss_function == 'WaveGlow': loss = WaveGlowLoss(**kw) elif loss_function == 'FastPitch': loss = FastPitchLoss(**kw) else: raise NotImplementedError( "unknown loss function requested: {}".format(loss_function)) return loss.cuda()
def __init__(self, hparams): super(Tacotron2, self).__init__() self.hparams = hparams self.mask_padding = hparams.mask_padding self.fp16_run = hparams.fp16_run self.n_mel_channels = hparams.n_mel_channels self.n_frames_per_step = hparams.n_frames_per_step self.embedding = nn.Embedding(hparams.n_symbols, hparams.symbols_embedding_dim) std = sqrt(2.0 / (hparams.n_symbols + hparams.symbols_embedding_dim)) val = sqrt(3.0) * std # uniform bounds for std self.embedding.weight.data.uniform_(-val, val) self.encoder = Encoder(hparams) self.decoder = Decoder(hparams) self.postnet = Postnet(hparams) self.criterion = Tacotron2Loss()
def main(): parser = argparse.ArgumentParser(description='PyTorch Tacotron 2 Training') parser = parse_args(parser) args, _ = parser.parse_known_args() LOGGER.set_model_name("Tacotron2_PyT") LOGGER.set_backends([ dllg.StdOutBackend(log_file=None, logging_scope=dllg.TRAIN_ITER_SCOPE, iteration_interval=1), dllg.JsonBackend(log_file=os.path.join( args.output_directory, args.log_file) if args.rank == 0 else None, logging_scope=dllg.TRAIN_ITER_SCOPE, iteration_interval=1) ]) LOGGER.timed_block_start("run") LOGGER.register_metric(tags.TRAIN_ITERATION_LOSS, metric_scope=dllg.TRAIN_ITER_SCOPE) LOGGER.register_metric("iter_time", metric_scope=dllg.TRAIN_ITER_SCOPE) LOGGER.register_metric("epoch_time", metric_scope=dllg.EPOCH_SCOPE) LOGGER.register_metric("run_time", metric_scope=dllg.RUN_SCOPE) LOGGER.register_metric("val_iter_loss", metric_scope=dllg.EPOCH_SCOPE) LOGGER.register_metric("train_epoch_frames/sec", metric_scope=dllg.EPOCH_SCOPE) LOGGER.register_metric("train_epoch_avg_frames/sec", metric_scope=dllg.EPOCH_SCOPE) LOGGER.register_metric("train_epoch_avg_loss", metric_scope=dllg.EPOCH_SCOPE) log_hardware() parser = parse_tacotron2_args(parser) args = parser.parse_args() log_args(args) torch.backends.cudnn.enabled = args.cudnn_enabled torch.backends.cudnn.benchmark = args.cudnn_benchmark distributed_run = args.world_size > 1 if distributed_run: init_distributed(args, args.world_size, args.rank, args.group_name) os.makedirs(args.output_directory, exist_ok=True) LOGGER.log(key=tags.RUN_START) run_start_time = time.time() model = get_tacotron2_model(args, len(args.training_anchor_dirs), is_training=True) if not args.amp_run and distributed_run: model = DDP(model) model.restore_checkpoint( os.path.join(args.output_directory, args.latest_checkpoint_file)) optimizer = torch.optim.Adam(model.parameters(), lr=args.init_lr, weight_decay=args.weight_decay) if args.amp_run: model, optimizer = amp.initialize(model, optimizer, opt_level="O1") if distributed_run: model = DDP(model) criterion = Tacotron2Loss() collate_fn = TextMelCollate(args) train_dataset = TextMelDataset(args, args.training_anchor_dirs) train_loader = DataLoader(train_dataset, num_workers=2, shuffle=False, batch_size=args.batch_size // len(args.training_anchor_dirs), pin_memory=False, drop_last=True, collate_fn=collate_fn) # valate_dataset = TextMelDataset(args, args.validation_anchor_dirs) model.train() elapsed_epochs = model.get_elapsed_epochs() epochs = args.epochs - elapsed_epochs iteration = elapsed_epochs * len(train_loader) LOGGER.log(key=tags.TRAIN_LOOP) for epoch in range(1, epochs + 1): LOGGER.epoch_start() epoch_start_time = time.time() epoch += elapsed_epochs LOGGER.log(key=tags.TRAIN_EPOCH_START, value=epoch) # used to calculate avg frames/sec over epoch reduced_num_frames_epoch = 0 # used to calculate avg loss over epoch train_epoch_avg_loss = 0.0 train_epoch_avg_frames_per_sec = 0.0 num_iters = 0 adjust_learning_rate(optimizer, epoch, args) for i, batch in enumerate(train_loader): print(f"Batch: {i}/{len(train_loader)} epoch {epoch}") LOGGER.iteration_start() iter_start_time = time.time() LOGGER.log(key=tags.TRAIN_ITER_START, value=i) # start = time.perf_counter() optimizer.zero_grad() x, y, num_frames = batch_to_gpu(batch) y_pred = model(x) loss = criterion(y_pred, y) if distributed_run: reduced_loss = reduce_tensor(loss.data, args.world_size).item() reduced_num_frames = reduce_tensor(num_frames.data, 1).item() else: reduced_loss = loss.item() reduced_num_frames = num_frames.item() if np.isnan(reduced_loss): raise Exception("loss is NaN") LOGGER.log(key=tags.TRAIN_ITERATION_LOSS, value=reduced_loss) train_epoch_avg_loss += reduced_loss num_iters += 1 # accumulate number of frames processed in this epoch reduced_num_frames_epoch += reduced_num_frames if args.amp_run: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() grad_norm = torch.nn.utils.clip_grad_norm_( amp.master_params(optimizer), args.grad_clip_thresh) else: loss.backward() grad_norm = torch.nn.utils.clip_grad_norm_( model.parameters(), args.grad_clip_thresh) optimizer.step() iteration += 1 LOGGER.log(key=tags.TRAIN_ITER_STOP, value=i) iter_stop_time = time.time() iter_time = iter_stop_time - iter_start_time frames_per_sec = reduced_num_frames / iter_time train_epoch_avg_frames_per_sec += frames_per_sec LOGGER.log(key="train_iter_frames/sec", value=frames_per_sec) LOGGER.log(key="iter_time", value=iter_time) LOGGER.iteration_stop() LOGGER.log(key=tags.TRAIN_EPOCH_STOP, value=epoch) epoch_stop_time = time.time() epoch_time = epoch_stop_time - epoch_start_time LOGGER.log(key="train_epoch_frames/sec", value=(reduced_num_frames_epoch / epoch_time)) LOGGER.log(key="train_epoch_avg_frames/sec", value=(train_epoch_avg_frames_per_sec / num_iters if num_iters > 0 else 0.0)) LOGGER.log(key="train_epoch_avg_loss", value=(train_epoch_avg_loss / num_iters if num_iters > 0 else 0.0)) LOGGER.log(key="epoch_time", value=epoch_time) LOGGER.log(key=tags.EVAL_START, value=epoch) # validate(model, criterion, valate_dataset, iteration, collate_fn, distributed_run, args) LOGGER.log(key=tags.EVAL_STOP, value=epoch) # Store latest checkpoint in each epoch model.elapse_epoch() checkpoint_path = os.path.join(args.output_directory, args.latest_checkpoint_file) torch.save(model.state_dict(), checkpoint_path) # Plot alignemnt if epoch % args.epochs_per_alignment == 0 and args.rank == 0: alignments = y_pred[3].data.numpy() index = np.random.randint(len(alignments)) plot_alignment( alignments[index].transpose(0, 1), # [enc_step, dec_step] os.path.join(args.output_directory, f"align_{epoch:04d}_{iteration}.png"), info= f"{datetime.now().strftime('%Y-%m-%d %H:%M')} Epoch={epoch:04d} Iteration={iteration} Average loss={train_epoch_avg_loss/num_iters:.5f}" ) # Save checkpoint if epoch % args.epochs_per_checkpoint == 0 and args.rank == 0: checkpoint_path = os.path.join(args.output_directory, f"checkpoint_{epoch:04d}.pt") print( f"Saving model and optimizer state at epoch {epoch:04d} to {checkpoint_path}" ) torch.save(model.state_dict(), checkpoint_path) # Save evaluation # save_sample(model, args.tacotron2_checkpoint, args.phrase_path, # os.path.join(args.output_directory, f"sample_{epoch:04d}_{iteration}.wav"), args.sampling_rate) LOGGER.epoch_stop() run_stop_time = time.time() run_time = run_stop_time - run_start_time LOGGER.log(key="run_time", value=run_time) LOGGER.log(key=tags.RUN_FINAL) print("training time", run_stop_time - run_start_time) LOGGER.timed_block_stop("run") if args.rank == 0: LOGGER.finish()
def train(output_directory, log_directory, checkpoint_path, warm_start, n_gpus, rank, group_name, hparams): """Training and validation logging results to tensorboard and stdout Params ------ output_directory (string): directory to save checkpoints log_directory (string) directory to save tensorboard logs checkpoint_path(string): checkpoint path n_gpus (int): number of gpus rank (int): rank of current gpu hparams (object): comma separated list of "name=value" pairs. """ if hparams.distributed_run: init_distributed(hparams, n_gpus, rank, group_name) seed_everything(hparams.seed) train_dataloader, valid_dataloader = prepare_dataloaders(hparams) hparams.n_symbols = len(train_dataloader.dataset.tokenizer.id2token) model = load_model(hparams) learning_rate = hparams.learning_rate optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=hparams.weight_decay) if hparams.fp16_run: from apex import amp model, optimizer = amp.initialize(model, optimizer, opt_level='O2') if hparams.distributed_run: model = apply_gradient_allreduce(model) criterion = Tacotron2Loss() logger = prepare_directories_and_logger(output_directory, log_directory, rank) # Load checkpoint if one exists iteration = 0 epoch_offset = 0 if checkpoint_path is not None: if warm_start: model = warm_start_model(checkpoint_path, model, hparams.ignore_layers) else: model, optimizer, _learning_rate, iteration = load_checkpoint( checkpoint_path, model, optimizer) if hparams.use_saved_learning_rate: learning_rate = _learning_rate iteration += 1 # next iteration is iteration + 1 epoch_offset = max(0, int(iteration / len(train_dataloader))) patience = 0 val_losses = [] is_overflow = False model.train() # ================ MAIN TRAINNIG LOOP! =================== for epoch in range(epoch_offset, hparams.epochs): print("Epoch: {}".format(epoch)) tqdm_bar = tqdm(enumerate(train_dataloader), total=len(train_dataloader)) for i, batch_dict in tqdm_bar: batch_dict = to_device_dict(batch_dict, device=hparams.device) start = time.perf_counter() for param_group in optimizer.param_groups: param_group['lr'] = learning_rate model.zero_grad() y_pred = model(batch_dict) loss = criterion(y_pred, batch_dict['y']) if hparams.distributed_run: reduced_loss = reduce_tensor(loss.data, n_gpus).item() else: reduced_loss = loss.item() if hparams.fp16_run: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() if hparams.fp16_run: grad_norm = torch.nn.utils.clip_grad_norm_( amp.master_params(optimizer), hparams.grad_clip_thresh) is_overflow = math.isnan(grad_norm) else: grad_norm = torch.nn.utils.clip_grad_norm_( model.parameters(), hparams.grad_clip_thresh) optimizer.step() if not is_overflow and rank == 0: duration = time.perf_counter() - start tqdm_bar.set_postfix_str( "Train loss {} {:.6f} Grad Norm {:.6f} {:.2f}s/it".format( iteration, reduced_loss, grad_norm, duration)) logger.log_training(reduced_loss, grad_norm, learning_rate, duration, iteration) iteration += 1 if not is_overflow and (iteration % hparams.iters_per_checkpoint == 0): val_loss = validate(model, valid_dataloader, criterion, iteration, n_gpus, logger, hparams.distributed_run, rank, hparams.device) val_losses.append(val_loss) if rank == 0: checkpoint_path = os.path.join( output_directory, "checkpoint_{}".format(iteration)) save_checkpoint(model, optimizer, learning_rate, iteration, checkpoint_path) if hparams.lr_reduce: if val_losses[-hparams. lr_reduce['patience']:][0] < val_losses[-1]: patience += 1 else: patience = 0 if patience >= hparams.lr_reduce['patience']: for g in optimizer.param_groups: g['lr'] = g['lr'] / hparams.lr_reduce['divisor'] patience = 0
def train(output_directory, log_directory, checkpoint_path, warm_start, n_gpus, rank, group_name, hparams): """Training and validation logging results to tensorboard and stdout Params ------ output_directory (string): directory to save checkpoints log_directory (string) directory to save tensorboard logs checkpoint_path(string): checkpoint path n_gpus (int): number of gpus rank (int): rank of current gpu hparams (object): comma separated list of "name=value" pairs. """ if hparams.distributed_run: init_distributed(hparams, n_gpus, rank, group_name) torch.manual_seed(hparams.seed) torch.cuda.manual_seed(hparams.seed) model = load_model(hparams) learning_rate = hparams.learning_rate optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=hparams.weight_decay) if hparams.fp16_run: from apex import amp model, optimizer = amp.initialize( model, optimizer, opt_level='O2') if hparams.distributed_run: model = apply_gradient_allreduce(model) criterion = Tacotron2Loss() logger = prepare_directories_and_logger( output_directory, log_directory, rank) train_loader, valset, collate_fn = prepare_dataloaders(hparams) # Load checkpoint if one exists iteration = 0 epoch_offset = 0 val_loss_best = 1e20 if checkpoint_path is not None: if warm_start: model = warm_start_model( checkpoint_path, model, hparams.ignore_layers) else: model, optimizer, _learning_rate, iteration = load_checkpoint( checkpoint_path, model, optimizer) if hparams.use_saved_learning_rate: learning_rate = _learning_rate iteration += 1 # next iteration is iteration + 1 epoch_offset = max(0, int(iteration / len(train_loader))) model.train() is_overflow = False # ================ MAIN TRAINNIG LOOP! =================== for epoch in range(epoch_offset, hparams.epochs): print("Epoch: {}".format(epoch)) for i, batch in enumerate(train_loader): start = time.perf_counter() for param_group in optimizer.param_groups: param_group['lr'] = learning_rate model.zero_grad() x, y, speaker_embeddings = model.parse_batch(batch) y_pred = model(x, speaker_embeddings) loss = criterion(y_pred, y) if hparams.distributed_run: reduced_loss = reduce_tensor(loss.data, n_gpus).item() else: reduced_loss = loss.item() if hparams.fp16_run: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() if hparams.fp16_run: grad_norm = torch.nn.utils.clip_grad_norm_( amp.master_params(optimizer), hparams.grad_clip_thresh) is_overflow = math.isnan(grad_norm) else: grad_norm = torch.nn.utils.clip_grad_norm_( model.parameters(), hparams.grad_clip_thresh) optimizer.step() if not is_overflow and rank == 0: duration = time.perf_counter() - start print("Train loss {} {:.6f} Grad Norm {:.6f} {:.2f}s/it".format( iteration, reduced_loss, grad_norm, duration)) logger.log_training( reduced_loss, grad_norm, learning_rate, duration, iteration) iteration += 1 if not is_overflow and (iteration % hparams.iters_per_checkpoint == 0): val_loss = validate(model, criterion, valset, iteration, hparams.batch_size, n_gpus, collate_fn, logger, hparams.distributed_run, rank) if rank == 0: checkpoint_path = os.path.join( output_directory, "checkpoint_{}".format(iteration)) save_checkpoint(model, optimizer, learning_rate, iteration, checkpoint_path) if val_loss_best > val_loss: print("Best validation loss improved from {:6f} to {:6f}. Save checkpoint".format( val_loss_best, val_loss)) val_loss_best = val_loss checkpoint_path = os.path.join( output_directory, "best_val".format(iteration)) save_checkpoint(model, optimizer, learning_rate, iteration, checkpoint_path)