Пример #1
0
def train(model_G, criterion_G, optimizer_G, model_D, criterion_D, optimizer_D,
          scheduler_G, scheduler_D, ap, global_step, epoch):
    data_loader = setup_loader(ap, is_val=False, verbose=(epoch == 0))
    model_G.train()
    model_D.train()
    epoch_time = 0
    keep_avg = KeepAverage()
    if use_cuda:
        batch_n_iter = int(
            len(data_loader.dataset) / (c.batch_size * num_gpus))
    else:
        batch_n_iter = int(len(data_loader.dataset) / c.batch_size)
    end_time = time.time()
    c_logger.print_train_start()
    for num_iter, data in enumerate(data_loader):
        start_time = time.time()

        # format data
        c_G, y_G, c_D, y_D = format_data(data)
        loader_time = time.time() - end_time

        global_step += 1

        ##############################
        # GENERATOR
        ##############################

        # generator pass
        y_hat = model_G(c_G)
        y_hat_sub = None
        y_G_sub = None
        y_hat_vis = y_hat  # for visualization

        # PQMF formatting
        if y_hat.shape[1] > 1:
            y_hat_sub = y_hat
            y_hat = model_G.pqmf_synthesis(y_hat)
            y_hat_vis = y_hat
            y_G_sub = model_G.pqmf_analysis(y_G)

        scores_fake, feats_fake, feats_real = None, None, None
        if global_step > c.steps_to_start_discriminator:

            # run D with or without cond. features
            if len(signature(model_D.forward).parameters) == 2:
                D_out_fake = model_D(y_hat, c_G)
            else:
                D_out_fake = model_D(y_hat)
            D_out_real = None

            if c.use_feat_match_loss:
                with torch.no_grad():
                    D_out_real = model_D(y_G)

            # format D outputs
            if isinstance(D_out_fake, tuple):
                scores_fake, feats_fake = D_out_fake
                if D_out_real is None:
                    feats_real = None
                else:
                    _, feats_real = D_out_real
            else:
                scores_fake = D_out_fake

        # compute losses
        loss_G_dict = criterion_G(y_hat, y_G, scores_fake, feats_fake,
                                  feats_real, y_hat_sub, y_G_sub)
        loss_G = loss_G_dict['G_loss']

        # optimizer generator
        optimizer_G.zero_grad()
        loss_G.backward()
        if c.gen_clip_grad > 0:
            torch.nn.utils.clip_grad_norm_(model_G.parameters(),
                                           c.gen_clip_grad)
        optimizer_G.step()
        if scheduler_G is not None:
            scheduler_G.step()

        loss_dict = dict()
        for key, value in loss_G_dict.items():
            if isinstance(value, int):
                loss_dict[key] = value
            else:
                loss_dict[key] = value.item()

        ##############################
        # DISCRIMINATOR
        ##############################
        if global_step >= c.steps_to_start_discriminator:
            # discriminator pass
            with torch.no_grad():
                y_hat = model_G(c_D)

            # PQMF formatting
            if y_hat.shape[1] > 1:
                y_hat = model_G.pqmf_synthesis(y_hat)

            # run D with or without cond. features
            if len(signature(model_D.forward).parameters) == 2:
                D_out_fake = model_D(y_hat.detach(), c_D)
                D_out_real = model_D(y_D, c_D)
            else:
                D_out_fake = model_D(y_hat.detach())
                D_out_real = model_D(y_D)

            # format D outputs
            if isinstance(D_out_fake, tuple):
                scores_fake, feats_fake = D_out_fake
                if D_out_real is None:
                    scores_real, feats_real = None, None
                else:
                    scores_real, feats_real = D_out_real
            else:
                scores_fake = D_out_fake
                scores_real = D_out_real

            # compute losses
            loss_D_dict = criterion_D(scores_fake, scores_real)
            loss_D = loss_D_dict['D_loss']

            # optimizer discriminator
            optimizer_D.zero_grad()
            loss_D.backward()
            if c.disc_clip_grad > 0:
                torch.nn.utils.clip_grad_norm_(model_D.parameters(),
                                               c.disc_clip_grad)
            optimizer_D.step()
            if scheduler_D is not None:
                scheduler_D.step()

            for key, value in loss_D_dict.items():
                if isinstance(value, (int, float)):
                    loss_dict[key] = value
                else:
                    loss_dict[key] = value.item()

        step_time = time.time() - start_time
        epoch_time += step_time

        # get current learning rates
        current_lr_G = list(optimizer_G.param_groups)[0]['lr']
        current_lr_D = list(optimizer_D.param_groups)[0]['lr']

        # update avg stats
        update_train_values = dict()
        for key, value in loss_dict.items():
            update_train_values['avg_' + key] = value
        update_train_values['avg_loader_time'] = loader_time
        update_train_values['avg_step_time'] = step_time
        keep_avg.update_values(update_train_values)

        # print training stats
        if global_step % c.print_step == 0:
            log_dict = {
                'step_time': [step_time, 2],
                'loader_time': [loader_time, 4],
                "current_lr_G": current_lr_G,
                "current_lr_D": current_lr_D
            }
            c_logger.print_train_step(batch_n_iter, num_iter, global_step,
                                      log_dict, loss_dict, keep_avg.avg_values)

        if args.rank == 0:
            # plot step stats
            if global_step % 10 == 0:
                iter_stats = {
                    "lr_G": current_lr_G,
                    "lr_D": current_lr_D,
                    "step_time": step_time
                }
                iter_stats.update(loss_dict)
                tb_logger.tb_train_iter_stats(global_step, iter_stats)

            # save checkpoint
            if global_step % c.save_step == 0:
                if c.checkpoint:
                    # save model
                    save_checkpoint(model_G,
                                    optimizer_G,
                                    scheduler_G,
                                    model_D,
                                    optimizer_D,
                                    scheduler_D,
                                    global_step,
                                    epoch,
                                    OUT_PATH,
                                    model_losses=loss_dict)

                # compute spectrograms
                figures = plot_results(y_hat_vis, y_G, ap, global_step,
                                       'train')
                tb_logger.tb_train_figures(global_step, figures)

                # Sample audio
                sample_voice = y_hat_vis[0].squeeze(0).detach().cpu().numpy()
                tb_logger.tb_train_audios(global_step,
                                          {'train/audio': sample_voice},
                                          c.audio["sample_rate"])
        end_time = time.time()

    # print epoch stats
    c_logger.print_train_epoch_end(global_step, epoch, epoch_time, keep_avg)

    # Plot Training Epoch Stats
    epoch_stats = {"epoch_time": epoch_time}
    epoch_stats.update(keep_avg.avg_values)
    if args.rank == 0:
        tb_logger.tb_train_epoch_stats(global_step, epoch_stats)
    # TODO: plot model stats
    # if c.tb_model_param_stats:
    # tb_logger.tb_model_weights(model, global_step)
    return keep_avg.avg_values, global_step
Пример #2
0
def train(model, criterion, optimizer,
          scheduler, scaler, ap, global_step, epoch):
    data_loader = setup_loader(ap, is_val=False, verbose=(epoch == 0))
    model.train()
    epoch_time = 0
    keep_avg = KeepAverage()
    if use_cuda:
        batch_n_iter = int(
            len(data_loader.dataset) / (c.batch_size * num_gpus))
    else:
        batch_n_iter = int(len(data_loader.dataset) / c.batch_size)
    end_time = time.time()
    c_logger.print_train_start()
    # setup noise schedule
    noise_schedule = c['train_noise_schedule']
    betas = np.linspace(noise_schedule['min_val'], noise_schedule['max_val'], noise_schedule['num_steps'])
    if hasattr(model, 'module'):
        model.module.compute_noise_level(betas)
    else:
        model.compute_noise_level(betas)
    for num_iter, data in enumerate(data_loader):
        start_time = time.time()

        # format data
        m, x = format_data(data)
        loader_time = time.time() - end_time

        global_step += 1

        with torch.cuda.amp.autocast(enabled=c.mixed_precision):
            # compute noisy input
            if hasattr(model, 'module'):
                noise, x_noisy, noise_scale = model.module.compute_y_n(x)
            else:
                noise, x_noisy, noise_scale = model.compute_y_n(x)

            # forward pass
            noise_hat = model(x_noisy, m, noise_scale)

            # compute losses
            loss = criterion(noise, noise_hat)
        loss_wavegrad_dict = {'wavegrad_loss':loss}

        # check nan loss
        if torch.isnan(loss).any():
            raise RuntimeError(f'Detected NaN loss at step {global_step}.')

        optimizer.zero_grad()

        # backward pass with loss scaling
        if c.mixed_precision:
            scaler.scale(loss).backward()
            scaler.unscale_(optimizer)
            grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(),
                                           c.clip_grad)
            scaler.step(optimizer)
            scaler.update()
        else:
            loss.backward()
            grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(),
                                           c.clip_grad)
            optimizer.step()

        # schedule update
        if scheduler is not None:
            scheduler.step()

        # disconnect loss values
        loss_dict = dict()
        for key, value in loss_wavegrad_dict.items():
            if isinstance(value, int):
                loss_dict[key] = value
            else:
                loss_dict[key] = value.item()

        # epoch/step timing
        step_time = time.time() - start_time
        epoch_time += step_time

        # get current learning rates
        current_lr = list(optimizer.param_groups)[0]['lr']

        # update avg stats
        update_train_values = dict()
        for key, value in loss_dict.items():
            update_train_values['avg_' + key] = value
        update_train_values['avg_loader_time'] = loader_time
        update_train_values['avg_step_time'] = step_time
        keep_avg.update_values(update_train_values)

        # print training stats
        if global_step % c.print_step == 0:
            log_dict = {
                'step_time': [step_time, 2],
                'loader_time': [loader_time, 4],
                "current_lr": current_lr,
                "grad_norm": grad_norm.item()
            }
            c_logger.print_train_step(batch_n_iter, num_iter, global_step,
                                      log_dict, loss_dict, keep_avg.avg_values)

        if args.rank == 0:
            # plot step stats
            if global_step % 10 == 0:
                iter_stats = {
                    "lr": current_lr,
                    "grad_norm": grad_norm.item(),
                    "step_time": step_time
                }
                iter_stats.update(loss_dict)
                tb_logger.tb_train_iter_stats(global_step, iter_stats)

            # save checkpoint
            if global_step % c.save_step == 0:
                if c.checkpoint:
                    # save model
                    save_checkpoint(model,
                                    optimizer,
                                    scheduler,
                                    None,
                                    None,
                                    None,
                                    global_step,
                                    epoch,
                                    OUT_PATH,
                                    model_losses=loss_dict,
                                    scaler=scaler.state_dict() if c.mixed_precision else None)

        end_time = time.time()

    # print epoch stats
    c_logger.print_train_epoch_end(global_step, epoch, epoch_time, keep_avg)

    # Plot Training Epoch Stats
    epoch_stats = {"epoch_time": epoch_time}
    epoch_stats.update(keep_avg.avg_values)
    if args.rank == 0:
        tb_logger.tb_train_epoch_stats(global_step, epoch_stats)
    # TODO: plot model stats
    if c.tb_model_param_stats and args.rank == 0:
        tb_logger.tb_model_weights(model, global_step)
    return keep_avg.avg_values, global_step
Пример #3
0
def train(model, optimizer, criterion, scheduler, scaler, ap, global_step,
          epoch):
    # create train loader
    data_loader = setup_loader(ap, is_val=False, verbose=(epoch == 0))
    model.train()
    epoch_time = 0
    keep_avg = KeepAverage()
    if use_cuda:
        batch_n_iter = int(
            len(data_loader.dataset) / (c.batch_size * num_gpus))
    else:
        batch_n_iter = int(len(data_loader.dataset) / c.batch_size)
    end_time = time.time()
    c_logger.print_train_start()
    # train loop
    for num_iter, data in enumerate(data_loader):
        start_time = time.time()
        x_input, mels, y_coarse = format_data(data)
        loader_time = time.time() - end_time
        global_step += 1

        optimizer.zero_grad()

        if c.mixed_precision:
            # mixed precision training
            with torch.cuda.amp.autocast():
                y_hat = model(x_input, mels)
                if isinstance(model.mode, int):
                    y_hat = y_hat.transpose(1, 2).unsqueeze(-1)
                else:
                    y_coarse = y_coarse.float()
                y_coarse = y_coarse.unsqueeze(-1)
                # compute losses
                loss = criterion(y_hat, y_coarse)
            scaler.scale(loss).backward()
            scaler.unscale_(optimizer)
            if c.grad_clip > 0:
                torch.nn.utils.clip_grad_norm_(model.parameters(), c.grad_clip)
            scaler.step(optimizer)
            scaler.update()
        else:
            # full precision training
            y_hat = model(x_input, mels)
            if isinstance(model.mode, int):
                y_hat = y_hat.transpose(1, 2).unsqueeze(-1)
            else:
                y_coarse = y_coarse.float()
            y_coarse = y_coarse.unsqueeze(-1)
            # compute losses
            loss = criterion(y_hat, y_coarse)
            if loss.item() is None:
                raise RuntimeError(" [!] None loss. Exiting ...")
            loss.backward()
            if c.grad_clip > 0:
                torch.nn.utils.clip_grad_norm_(model.parameters(), c.grad_clip)
            optimizer.step()

        if scheduler is not None:
            scheduler.step()

        # get the current learning rate
        cur_lr = list(optimizer.param_groups)[0]["lr"]

        step_time = time.time() - start_time
        epoch_time += step_time

        update_train_values = dict()
        loss_dict = dict()
        loss_dict["model_loss"] = loss.item()
        for key, value in loss_dict.items():
            update_train_values["avg_" + key] = value
        update_train_values["avg_loader_time"] = loader_time
        update_train_values["avg_step_time"] = step_time
        keep_avg.update_values(update_train_values)

        # print training stats
        if global_step % c.print_step == 0:
            log_dict = {
                "step_time": [step_time, 2],
                "loader_time": [loader_time, 4],
                "current_lr": cur_lr,
            }
            c_logger.print_train_step(
                batch_n_iter,
                num_iter,
                global_step,
                log_dict,
                loss_dict,
                keep_avg.avg_values,
            )

        # plot step stats
        if global_step % 10 == 0:
            iter_stats = {"lr": cur_lr, "step_time": step_time}
            iter_stats.update(loss_dict)
            tb_logger.tb_train_iter_stats(global_step, iter_stats)

        # save checkpoint
        if global_step % c.save_step == 0:
            if c.checkpoint:
                # save model
                save_checkpoint(
                    model,
                    optimizer,
                    scheduler,
                    None,
                    None,
                    None,
                    global_step,
                    epoch,
                    OUT_PATH,
                    model_losses=loss_dict,
                    scaler=scaler.state_dict() if c.mixed_precision else None)

            # synthesize a full voice
            rand_idx = random.randrange(0, len(train_data))
            wav_path = train_data[rand_idx] if not isinstance(
                train_data[rand_idx],
                (tuple, list)) else train_data[rand_idx][0]
            wav = ap.load_wav(wav_path)
            ground_mel = ap.melspectrogram(wav)
            sample_wav = model.generate(ground_mel, c.batched,
                                        c.target_samples, c.overlap_samples,
                                        use_cuda)
            predict_mel = ap.melspectrogram(sample_wav)

            # compute spectrograms
            figures = {
                "train/ground_truth": plot_spectrogram(ground_mel.T),
                "train/prediction": plot_spectrogram(predict_mel.T)
            }
            tb_logger.tb_train_figures(global_step, figures)

            # Sample audio
            tb_logger.tb_train_audios(global_step, {"train/audio": sample_wav},
                                      c.audio["sample_rate"])
        end_time = time.time()

    # print epoch stats
    c_logger.print_train_epoch_end(global_step, epoch, epoch_time, keep_avg)

    # Plot Training Epoch Stats
    epoch_stats = {"epoch_time": epoch_time}
    epoch_stats.update(keep_avg.avg_values)
    tb_logger.tb_train_epoch_stats(global_step, epoch_stats)
    # TODO: plot model stats
    # if c.tb_model_param_stats:
    # tb_logger.tb_model_weights(model, global_step)
    return keep_avg.avg_values, global_step