def train(model_G, criterion_G, optimizer_G, model_D, criterion_D, optimizer_D, scheduler_G, scheduler_D, ap, global_step, epoch): data_loader = setup_loader(ap, is_val=False, verbose=(epoch == 0)) model_G.train() model_D.train() epoch_time = 0 keep_avg = KeepAverage() if use_cuda: batch_n_iter = int( len(data_loader.dataset) / (c.batch_size * num_gpus)) else: batch_n_iter = int(len(data_loader.dataset) / c.batch_size) end_time = time.time() c_logger.print_train_start() for num_iter, data in enumerate(data_loader): start_time = time.time() # format data c_G, y_G, c_D, y_D = format_data(data) loader_time = time.time() - end_time global_step += 1 ############################## # GENERATOR ############################## # generator pass y_hat = model_G(c_G) y_hat_sub = None y_G_sub = None y_hat_vis = y_hat # for visualization # PQMF formatting if y_hat.shape[1] > 1: y_hat_sub = y_hat y_hat = model_G.pqmf_synthesis(y_hat) y_hat_vis = y_hat y_G_sub = model_G.pqmf_analysis(y_G) scores_fake, feats_fake, feats_real = None, None, None if global_step > c.steps_to_start_discriminator: # run D with or without cond. features if len(signature(model_D.forward).parameters) == 2: D_out_fake = model_D(y_hat, c_G) else: D_out_fake = model_D(y_hat) D_out_real = None if c.use_feat_match_loss: with torch.no_grad(): D_out_real = model_D(y_G) # format D outputs if isinstance(D_out_fake, tuple): scores_fake, feats_fake = D_out_fake if D_out_real is None: feats_real = None else: _, feats_real = D_out_real else: scores_fake = D_out_fake # compute losses loss_G_dict = criterion_G(y_hat, y_G, scores_fake, feats_fake, feats_real, y_hat_sub, y_G_sub) loss_G = loss_G_dict['G_loss'] # optimizer generator optimizer_G.zero_grad() loss_G.backward() if c.gen_clip_grad > 0: torch.nn.utils.clip_grad_norm_(model_G.parameters(), c.gen_clip_grad) optimizer_G.step() if scheduler_G is not None: scheduler_G.step() loss_dict = dict() for key, value in loss_G_dict.items(): if isinstance(value, int): loss_dict[key] = value else: loss_dict[key] = value.item() ############################## # DISCRIMINATOR ############################## if global_step >= c.steps_to_start_discriminator: # discriminator pass with torch.no_grad(): y_hat = model_G(c_D) # PQMF formatting if y_hat.shape[1] > 1: y_hat = model_G.pqmf_synthesis(y_hat) # run D with or without cond. features if len(signature(model_D.forward).parameters) == 2: D_out_fake = model_D(y_hat.detach(), c_D) D_out_real = model_D(y_D, c_D) else: D_out_fake = model_D(y_hat.detach()) D_out_real = model_D(y_D) # format D outputs if isinstance(D_out_fake, tuple): scores_fake, feats_fake = D_out_fake if D_out_real is None: scores_real, feats_real = None, None else: scores_real, feats_real = D_out_real else: scores_fake = D_out_fake scores_real = D_out_real # compute losses loss_D_dict = criterion_D(scores_fake, scores_real) loss_D = loss_D_dict['D_loss'] # optimizer discriminator optimizer_D.zero_grad() loss_D.backward() if c.disc_clip_grad > 0: torch.nn.utils.clip_grad_norm_(model_D.parameters(), c.disc_clip_grad) optimizer_D.step() if scheduler_D is not None: scheduler_D.step() for key, value in loss_D_dict.items(): if isinstance(value, (int, float)): loss_dict[key] = value else: loss_dict[key] = value.item() step_time = time.time() - start_time epoch_time += step_time # get current learning rates current_lr_G = list(optimizer_G.param_groups)[0]['lr'] current_lr_D = list(optimizer_D.param_groups)[0]['lr'] # update avg stats update_train_values = dict() for key, value in loss_dict.items(): update_train_values['avg_' + key] = value update_train_values['avg_loader_time'] = loader_time update_train_values['avg_step_time'] = step_time keep_avg.update_values(update_train_values) # print training stats if global_step % c.print_step == 0: log_dict = { 'step_time': [step_time, 2], 'loader_time': [loader_time, 4], "current_lr_G": current_lr_G, "current_lr_D": current_lr_D } c_logger.print_train_step(batch_n_iter, num_iter, global_step, log_dict, loss_dict, keep_avg.avg_values) if args.rank == 0: # plot step stats if global_step % 10 == 0: iter_stats = { "lr_G": current_lr_G, "lr_D": current_lr_D, "step_time": step_time } iter_stats.update(loss_dict) tb_logger.tb_train_iter_stats(global_step, iter_stats) # save checkpoint if global_step % c.save_step == 0: if c.checkpoint: # save model save_checkpoint(model_G, optimizer_G, scheduler_G, model_D, optimizer_D, scheduler_D, global_step, epoch, OUT_PATH, model_losses=loss_dict) # compute spectrograms figures = plot_results(y_hat_vis, y_G, ap, global_step, 'train') tb_logger.tb_train_figures(global_step, figures) # Sample audio sample_voice = y_hat_vis[0].squeeze(0).detach().cpu().numpy() tb_logger.tb_train_audios(global_step, {'train/audio': sample_voice}, c.audio["sample_rate"]) end_time = time.time() # print epoch stats c_logger.print_train_epoch_end(global_step, epoch, epoch_time, keep_avg) # Plot Training Epoch Stats epoch_stats = {"epoch_time": epoch_time} epoch_stats.update(keep_avg.avg_values) if args.rank == 0: tb_logger.tb_train_epoch_stats(global_step, epoch_stats) # TODO: plot model stats # if c.tb_model_param_stats: # tb_logger.tb_model_weights(model, global_step) return keep_avg.avg_values, global_step
def train(model, criterion, optimizer, scheduler, scaler, ap, global_step, epoch): data_loader = setup_loader(ap, is_val=False, verbose=(epoch == 0)) model.train() epoch_time = 0 keep_avg = KeepAverage() if use_cuda: batch_n_iter = int( len(data_loader.dataset) / (c.batch_size * num_gpus)) else: batch_n_iter = int(len(data_loader.dataset) / c.batch_size) end_time = time.time() c_logger.print_train_start() # setup noise schedule noise_schedule = c['train_noise_schedule'] betas = np.linspace(noise_schedule['min_val'], noise_schedule['max_val'], noise_schedule['num_steps']) if hasattr(model, 'module'): model.module.compute_noise_level(betas) else: model.compute_noise_level(betas) for num_iter, data in enumerate(data_loader): start_time = time.time() # format data m, x = format_data(data) loader_time = time.time() - end_time global_step += 1 with torch.cuda.amp.autocast(enabled=c.mixed_precision): # compute noisy input if hasattr(model, 'module'): noise, x_noisy, noise_scale = model.module.compute_y_n(x) else: noise, x_noisy, noise_scale = model.compute_y_n(x) # forward pass noise_hat = model(x_noisy, m, noise_scale) # compute losses loss = criterion(noise, noise_hat) loss_wavegrad_dict = {'wavegrad_loss':loss} # check nan loss if torch.isnan(loss).any(): raise RuntimeError(f'Detected NaN loss at step {global_step}.') optimizer.zero_grad() # backward pass with loss scaling if c.mixed_precision: scaler.scale(loss).backward() scaler.unscale_(optimizer) grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), c.clip_grad) scaler.step(optimizer) scaler.update() else: loss.backward() grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), c.clip_grad) optimizer.step() # schedule update if scheduler is not None: scheduler.step() # disconnect loss values loss_dict = dict() for key, value in loss_wavegrad_dict.items(): if isinstance(value, int): loss_dict[key] = value else: loss_dict[key] = value.item() # epoch/step timing step_time = time.time() - start_time epoch_time += step_time # get current learning rates current_lr = list(optimizer.param_groups)[0]['lr'] # update avg stats update_train_values = dict() for key, value in loss_dict.items(): update_train_values['avg_' + key] = value update_train_values['avg_loader_time'] = loader_time update_train_values['avg_step_time'] = step_time keep_avg.update_values(update_train_values) # print training stats if global_step % c.print_step == 0: log_dict = { 'step_time': [step_time, 2], 'loader_time': [loader_time, 4], "current_lr": current_lr, "grad_norm": grad_norm.item() } c_logger.print_train_step(batch_n_iter, num_iter, global_step, log_dict, loss_dict, keep_avg.avg_values) if args.rank == 0: # plot step stats if global_step % 10 == 0: iter_stats = { "lr": current_lr, "grad_norm": grad_norm.item(), "step_time": step_time } iter_stats.update(loss_dict) tb_logger.tb_train_iter_stats(global_step, iter_stats) # save checkpoint if global_step % c.save_step == 0: if c.checkpoint: # save model save_checkpoint(model, optimizer, scheduler, None, None, None, global_step, epoch, OUT_PATH, model_losses=loss_dict, scaler=scaler.state_dict() if c.mixed_precision else None) end_time = time.time() # print epoch stats c_logger.print_train_epoch_end(global_step, epoch, epoch_time, keep_avg) # Plot Training Epoch Stats epoch_stats = {"epoch_time": epoch_time} epoch_stats.update(keep_avg.avg_values) if args.rank == 0: tb_logger.tb_train_epoch_stats(global_step, epoch_stats) # TODO: plot model stats if c.tb_model_param_stats and args.rank == 0: tb_logger.tb_model_weights(model, global_step) return keep_avg.avg_values, global_step
def train(model, optimizer, criterion, scheduler, scaler, ap, global_step, epoch): # create train loader data_loader = setup_loader(ap, is_val=False, verbose=(epoch == 0)) model.train() epoch_time = 0 keep_avg = KeepAverage() if use_cuda: batch_n_iter = int( len(data_loader.dataset) / (c.batch_size * num_gpus)) else: batch_n_iter = int(len(data_loader.dataset) / c.batch_size) end_time = time.time() c_logger.print_train_start() # train loop for num_iter, data in enumerate(data_loader): start_time = time.time() x_input, mels, y_coarse = format_data(data) loader_time = time.time() - end_time global_step += 1 optimizer.zero_grad() if c.mixed_precision: # mixed precision training with torch.cuda.amp.autocast(): y_hat = model(x_input, mels) if isinstance(model.mode, int): y_hat = y_hat.transpose(1, 2).unsqueeze(-1) else: y_coarse = y_coarse.float() y_coarse = y_coarse.unsqueeze(-1) # compute losses loss = criterion(y_hat, y_coarse) scaler.scale(loss).backward() scaler.unscale_(optimizer) if c.grad_clip > 0: torch.nn.utils.clip_grad_norm_(model.parameters(), c.grad_clip) scaler.step(optimizer) scaler.update() else: # full precision training y_hat = model(x_input, mels) if isinstance(model.mode, int): y_hat = y_hat.transpose(1, 2).unsqueeze(-1) else: y_coarse = y_coarse.float() y_coarse = y_coarse.unsqueeze(-1) # compute losses loss = criterion(y_hat, y_coarse) if loss.item() is None: raise RuntimeError(" [!] None loss. Exiting ...") loss.backward() if c.grad_clip > 0: torch.nn.utils.clip_grad_norm_(model.parameters(), c.grad_clip) optimizer.step() if scheduler is not None: scheduler.step() # get the current learning rate cur_lr = list(optimizer.param_groups)[0]["lr"] step_time = time.time() - start_time epoch_time += step_time update_train_values = dict() loss_dict = dict() loss_dict["model_loss"] = loss.item() for key, value in loss_dict.items(): update_train_values["avg_" + key] = value update_train_values["avg_loader_time"] = loader_time update_train_values["avg_step_time"] = step_time keep_avg.update_values(update_train_values) # print training stats if global_step % c.print_step == 0: log_dict = { "step_time": [step_time, 2], "loader_time": [loader_time, 4], "current_lr": cur_lr, } c_logger.print_train_step( batch_n_iter, num_iter, global_step, log_dict, loss_dict, keep_avg.avg_values, ) # plot step stats if global_step % 10 == 0: iter_stats = {"lr": cur_lr, "step_time": step_time} iter_stats.update(loss_dict) tb_logger.tb_train_iter_stats(global_step, iter_stats) # save checkpoint if global_step % c.save_step == 0: if c.checkpoint: # save model save_checkpoint( model, optimizer, scheduler, None, None, None, global_step, epoch, OUT_PATH, model_losses=loss_dict, scaler=scaler.state_dict() if c.mixed_precision else None) # synthesize a full voice rand_idx = random.randrange(0, len(train_data)) wav_path = train_data[rand_idx] if not isinstance( train_data[rand_idx], (tuple, list)) else train_data[rand_idx][0] wav = ap.load_wav(wav_path) ground_mel = ap.melspectrogram(wav) sample_wav = model.generate(ground_mel, c.batched, c.target_samples, c.overlap_samples, use_cuda) predict_mel = ap.melspectrogram(sample_wav) # compute spectrograms figures = { "train/ground_truth": plot_spectrogram(ground_mel.T), "train/prediction": plot_spectrogram(predict_mel.T) } tb_logger.tb_train_figures(global_step, figures) # Sample audio tb_logger.tb_train_audios(global_step, {"train/audio": sample_wav}, c.audio["sample_rate"]) end_time = time.time() # print epoch stats c_logger.print_train_epoch_end(global_step, epoch, epoch_time, keep_avg) # Plot Training Epoch Stats epoch_stats = {"epoch_time": epoch_time} epoch_stats.update(keep_avg.avg_values) tb_logger.tb_train_epoch_stats(global_step, epoch_stats) # TODO: plot model stats # if c.tb_model_param_stats: # tb_logger.tb_model_weights(model, global_step) return keep_avg.avg_values, global_step