Exemplo n.º 1
0
def compute_validation_loss(model, criterion, valset, collate_fn, batch_size,
                            n_gpus):
    model.eval()
    with torch.no_grad():
        val_sampler = DistributedSampler(valset) if n_gpus > 1 else None
        val_loader = DataLoader(valset,
                                sampler=val_sampler,
                                num_workers=1,
                                shuffle=False,
                                batch_size=batch_size,
                                pin_memory=False,
                                collate_fn=collate_fn)

        val_loss, val_loss_nll, val_loss_gate = 0.0, 0.0, 0.0
        for i, batch in enumerate(val_loader):
            mel, speaker_vecs, text, in_lens, out_lens, gate_target, attn_prior = batch
            mel, speaker_vecs, text = mel.cuda(), speaker_vecs.cuda(
            ), text.cuda()
            in_lens, out_lens, gate_target = in_lens.cuda(), out_lens.cuda(
            ), gate_target.cuda()
            attn_prior = attn_prior.cuda() if valset.use_attn_prior else None
            z, log_s_list, gate_pred, attn, mean, log_var, prob = model(
                mel, speaker_vecs, text, in_lens, out_lens, attn_prior)

            loss_nll, loss_gate = criterion(
                (z, log_s_list, gate_pred, mean, log_var, prob), gate_target,
                out_lens)
            loss = loss_nll + loss_gate

            if n_gpus > 1:
                reduced_val_loss = reduce_tensor(loss.data, n_gpus).item()
                reduced_val_loss_nll = reduce_tensor(loss_nll.data,
                                                     n_gpus).item()
                reduced_val_loss_gate = reduce_tensor(loss_gate.data,
                                                      n_gpus).item()
            else:
                reduced_val_loss = loss.item()
                reduced_val_loss_nll = loss_nll.item()
                reduced_val_loss_gate = loss_gate.item()
            val_loss += reduced_val_loss
            val_loss_nll += reduced_val_loss_nll
            val_loss_gate += reduced_val_loss_gate
        val_loss = val_loss / (i + 1)
        val_loss_nll = val_loss_nll / (i + 1)
        val_loss_gate = val_loss_gate / (i + 1)

    print("Mean {}\nLogVar {}\nProb {}".format(mean, log_var, prob))
    model.train()
    return val_loss, val_loss_nll, val_loss_gate, attn, gate_pred, gate_target
Exemplo n.º 2
0
def validation(model, data_loader, criterion, rank, world_size, epoch,
               num_epoch):
    model.eval()
    summary_loss = AverageMeter()

    if rank == 0:
        pbar = tqdm(total=len(data_loader), unit="batch")
        pbar.set_description(f"Epoch[{epoch+1}/{num_epoch}].Val")

    for imgs, labels in data_loader:
        batch_size = imgs.shape[0]

        imgs = imgs.to(rank)
        labels = labels.to(rank)
        output = model(imgs)
        loss = criterion(output, labels)

        reduced_loss = reduce_tensor(loss, world_size)
        summary_loss.update(reduced_loss.detach().item(), batch_size)

        if rank == 0:
            pbar.set_postfix({"loss": summary_loss.avg})
            pbar.update(1)

    return summary_loss
Exemplo n.º 3
0
    def Train_Step(self, features):
        loss_dict = {}

        features = features.to(self.device, non_blocking=True)

        with torch.cuda.amp.autocast(enabled=self.hp.Use_Mixed_Precision):
            embeddings = self.model(features)
            loss_dict['Embedding'] = self.criterion(
                embeddings, self.hp.Train.Batch.Train.Pattern_per_Speaker)

        self.optimizer.zero_grad()
        self.scaler.scale(loss_dict['Embedding']).backward()
        if self.hp.Train.Gradient_Norm > 0.0:
            self.scaler.unscale_(self.optimizer)
            torch.nn.utils.clip_grad_norm_(
                parameters=self.model.parameters(),
                max_norm=self.hp.Train.Gradient_Norm)

        self.scaler.step(self.optimizer)
        self.scaler.update()
        self.scheduler.step()
        self.steps += 1
        self.tqdm.update(1)

        for tag, loss in loss_dict.items():
            loss = reduce_tensor(
                loss.data,
                self.num_gpus).item() if self.num_gpus > 1 else loss.item()
            self.scalar_dict['Train']['Loss/{}'.format(tag)] += loss
Exemplo n.º 4
0
def validate(model, criterion, valset, iteration, batch_size, n_gpus,
             collate_fn, logger, rank):
    """Handles all the validation scoring and printing"""

    model.eval()

    with torch.no_grad():
        val_sampler = DistributedSampler(valset) if n_gpus > 1 else None
        val_loader = DataLoader(valset,
                                sampler=val_sampler,
                                num_workers=1,
                                shuffle=False,
                                batch_size=batch_size,
                                pin_memory=False,
                                collate_fn=collate_fn)

        val_loss = 0.0

        for i, batch in enumerate(val_loader):
            x, y = model.parse_batch(batch)
            y_pred = model(x)
            loss = criterion(y_pred, y)
            reduced_val_loss = reduce_tensor(loss.data, n_gpus).item() \
                if n_gpus > 1 else loss.item()
            val_loss += reduced_val_loss
        val_loss = val_loss / (i + 1)

    model.train()

    if rank == 0:
        print("Validation loss {}: {:9f}  ".format(iteration,
                                                   reduced_val_loss))
        logger.log_validation(reduced_val_loss, model, y, y_pred, iteration)

    return val_loss
Exemplo n.º 5
0
    def Evaluation_Step(self, tokens, notes, durations, token_lengths,
                        features, feature_lengths):
        loss_dict = {}
        tokens = tokens.to(self.device, non_blocking=True)
        notes = notes.to(self.device, non_blocking=True)
        durations = durations.to(self.device, non_blocking=True)
        token_lengths = token_lengths.to(self.device, non_blocking=True)
        features = features.to(self.device, non_blocking=True)
        feature_lengths = feature_lengths.to(self.device, non_blocking=True)

        pre_features, post_features, alignments = self.model(
            tokens=tokens,
            notes=notes,
            durations=durations,
            token_lengths=token_lengths,
            features=features,
            feature_lengths=feature_lengths,
            is_training=True)

        loss_dict['Pre'] = self.criterion_dict['MSE'](pre_features, features)
        loss_dict['Post'] = self.criterion_dict['MSE'](post_features, features)
        loss_dict['Guided_Attention'] = self.criterion_dict['GAL'](
            alignments, feature_lengths, token_lengths)
        loss_dict['Total'] = loss_dict['Pre'] + loss_dict['Post'] + loss_dict[
            'Guided_Attention']

        for tag, loss in loss_dict.items():
            loss = reduce_tensor(
                loss.data,
                self.num_gpus).item() if self.num_gpus > 1 else loss.item()
            self.scalar_dict['Evaluation']['Loss/{}'.format(tag)] += loss

        return pre_features, post_features, alignments
Exemplo n.º 6
0
def train_one_epoch(model, train_loader, optimizer, criterion, rank,
                    world_size, epoch, num_epoch):
    model.train()
    summary_loss = AverageMeter()

    if rank == 0:
        pbar = tqdm(total=len(train_loader), unit="batch")
        pbar.set_description(f"Epoch[{epoch+1}/{num_epoch}].Train")

    for imgs, labels in train_loader:
        batch_size = imgs.shape[0]

        imgs = imgs.to(rank)
        labels = labels.to(rank)
        output = model(imgs)
        loss = criterion(output, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        reduced_loss = reduce_tensor(loss, world_size)
        summary_loss.update(reduced_loss.detach().item(), batch_size)

        if rank == 0:
            pbar.set_postfix({"loss": summary_loss.avg})
            pbar.update(1)

    return summary_loss
Exemplo n.º 7
0
    def Evaluation_Step(self, features):
        loss_dict = {}

        features = features.to(self.device, non_blocking=True)

        embeddings = self.model(features)
        loss_dict['Embedding'] = self.criterion(
            embeddings, self.hp.Train.Batch.Eval.Pattern_per_Speaker)

        for tag, loss in loss_dict.items():
            loss = reduce_tensor(
                loss.data,
                self.num_gpus).item() if self.num_gpus > 1 else loss.item()
            self.scalar_dict['Evaluation']['Loss/{}'.format(tag)] += loss
Exemplo n.º 8
0
    def Train_Step(self, tokens, notes, durations, token_lengths, features,
                   feature_lengths):
        loss_dict = {}
        tokens = tokens.to(self.device, non_blocking=True)
        notes = notes.to(self.device, non_blocking=True)
        durations = durations.to(self.device, non_blocking=True)
        token_lengths = token_lengths.to(self.device, non_blocking=True)
        features = features.to(self.device, non_blocking=True)
        feature_lengths = feature_lengths.to(self.device, non_blocking=True)

        with torch.cuda.amp.autocast(enabled=self.hp.Use_Mixed_Precision):
            pre_features, post_features, alignments = self.model(
                tokens=tokens,
                notes=notes,
                durations=durations,
                token_lengths=token_lengths,
                features=features,
                feature_lengths=feature_lengths,
                is_training=True)

            loss_dict['Pre'] = self.criterion_dict['MSE'](pre_features,
                                                          features)
            loss_dict['Post'] = self.criterion_dict['MSE'](post_features,
                                                           features)
            loss_dict['Guided_Attention'] = self.criterion_dict['GAL'](
                alignments, feature_lengths, token_lengths)
            loss_dict['Total'] = loss_dict['Pre'] + loss_dict[
                'Post'] + loss_dict['Guided_Attention']

        self.optimizer.zero_grad()
        self.scaler.scale(loss_dict['Total']).backward()
        if self.hp.Train.Gradient_Norm > 0.0:
            self.scaler.unscale_(self.optimizer)
            torch.nn.utils.clip_grad_norm_(
                parameters=self.model.parameters(),
                max_norm=self.hp.Train.Gradient_Norm)

        self.scaler.step(self.optimizer)
        self.scaler.update()
        self.scheduler.step()
        self.steps += 1
        self.tqdm.update(1)

        for tag, loss in loss_dict.items():
            loss = reduce_tensor(
                loss.data,
                self.num_gpus).item() if self.num_gpus > 1 else loss.item()
            self.scalar_dict['Train']['Loss/{}'.format(tag)] += loss
Exemplo n.º 9
0
def eval(eval_loader,
         model,
         criterion,
         num_gpus,
         start_time,
         epoch,
         use_multi_speaker=False):
    print("[%s] start evaluation" %
          datetime.datetime.now().strftime("%Y-%m-%d_%H:%M:%S"))
    with torch.no_grad():
        model.eval()
        total_loss = 0.
        for i, batch in enumerate(eval_loader):
            model.zero_grad()

            if use_multi_speaker:
                mel, audio, spk_embed_or_id = batch
                spk_embed_or_id = torch.autograd.Variable(
                    spk_embed_or_id.cuda())
            else:
                mel, audio = batch
            mel = torch.autograd.Variable(mel.cuda())
            audio = torch.autograd.Variable(audio.cuda())

            if use_multi_speaker:
                outputs = model((mel, audio, spk_embed_or_id))
            else:
                outputs = model((mel, audio))

            loss = criterion(outputs)
            if num_gpus > 1:
                reduced_loss = reduce_tensor(loss.data, num_gpus).item()
            else:
                reduced_loss = loss.item()

            total_loss += reduced_loss
            if i > 0 and i % 100 == 0:
                elapsed = datetime.datetime.now() - start_time
                print("[{}][els: {}] eval {}/{} steps:\t{:.9f}".format(
                    datetime.datetime.now().strftime("%Y-%m-%d_%H:%M:%S"),
                    elapsed, i, len(eval_loader), reduced_loss))
    elapsed = datetime.datetime.now() - start_time
    print("[{}][els: {}] {} epoch :\t eval avg loss {:.9f}".format(
        datetime.datetime.now().strftime("%Y-%m-%d_%H:%M:%S"), elapsed, epoch,
        total_loss / len(eval_loader)))
Exemplo n.º 10
0
def validate(model, criterion, testset, iteration, batch_size, num_gpus,
             logger):
    model.eval()
    with torch.no_grad():
        print("validation {}:".format(iteration))

        batch_size = min(batch_size, int(len(testset) / num_gpus))

        test_sampler = DistributedSampler(testset) if num_gpus > 1 else None
        test_loader = DataLoader(testset,
                                 num_workers=1,
                                 shuffle=False,
                                 sampler=test_sampler,
                                 batch_size=batch_size,
                                 pin_memory=False,
                                 drop_last=True)

        # # why is this necessary??
        # for j, test_batch in enumerate(test_loader):
        #     print("\twtfbatch loaded, {} of {}".format(j + 1, len(test_loader)))
        #     mel, audio = test_batch
        #     print("\twtfbatch done, {} of {}: {} {}".format(j + 1, len(test_loader), mel.size(), audio.size()))

        val_loss = 0.0
        for j, test_batch in enumerate(test_loader):
            print("\tval batch loaded, {} of {}".format(
                j + 1, len(test_loader)))
            mel, audio = test_batch
            mel = torch.autograd.Variable(mel.cuda())
            audio = torch.autograd.Variable(audio.cuda())
            outputs = model((mel, audio))

            loss = criterion(outputs)
            if num_gpus > 1:
                reduced_val_loss = reduce_tensor(loss.data, num_gpus).item()
            else:
                reduced_val_loss = loss.item()
            val_loss += reduced_val_loss
            print("\tval batch done, {} of {}: {:.9f}".format(
                j + 1, len(test_loader), reduced_val_loss))
        val_loss = val_loss / len(test_loader)
        print("val loss: {}:\t{:.9f}".format(iteration, val_loss))
        if logger:
            logger.add_scalar('test_loss', val_loss, iteration)
    model.train()
Exemplo n.º 11
0
def train(num_gpus,
          rank,
          group_name,
          output_directory,
          epochs,
          learning_rate,
          sigma,
          iters_per_checkpoint,
          batch_size,
          seed,
          fp16_run,
          checkpoint_path,
          with_tensorboard,
          num_workers=4):
    print("num_workers", num_workers)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # =====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        init_distributed(rank, num_gpus, group_name, **dist_config)
    # =====END:   ADDED FOR DISTRIBUTED======

    criterion = WaveGlowLoss(sigma)
    model = WaveGlow(**waveglow_config).cuda()

    # =====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        model = apply_gradient_allreduce(model)
    # =====END:   ADDED FOR DISTRIBUTED======

    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    scheduler = StepLR(optimizer, step_size=1, gamma=0.96)

    if fp16_run:
        from apex import amp
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1')

    # Load checkpoint if one exists
    iteration = 0
    if checkpoint_path != "":
        model, optimizer, iteration = load_checkpoint(checkpoint_path, model,
                                                      optimizer)
        iteration += 1  # next iteration is iteration + 1

    trainset = Mel2Samp(**data_config)
    evalset = Mel2Samp(**eval_data_config)
    # =====START: ADDED FOR DISTRIBUTED======
    train_sampler = DistributedSampler(trainset) if num_gpus > 1 else None
    eval_sampler = DistributedSampler(evalset) if num_gpus > 1 else None
    # =====END:   ADDED FOR DISTRIBUTED======
    train_loader = DataLoader(trainset,
                              num_workers=num_workers,
                              shuffle=False,
                              sampler=train_sampler,
                              batch_size=batch_size,
                              pin_memory=False,
                              drop_last=True)
    eval_loader = DataLoader(evalset,
                             num_workers=num_workers,
                             shuffle=False,
                             sampler=eval_sampler,
                             batch_size=batch_size,
                             pin_memory=False,
                             drop_last=True)

    # Get shared output_directory ready
    if rank == 0:
        if not os.path.isdir(output_directory):
            os.makedirs(output_directory)
            os.chmod(output_directory, 0o775)
        print("output directory", output_directory)

    if with_tensorboard and rank == 0:
        from tensorboardX import SummaryWriter
        logger = SummaryWriter(os.path.join(output_directory, 'logs'))

    epoch_offset = max(1, int(iteration / len(train_loader)))
    start_time = datetime.datetime.now()
    # ================ MAIN TRAINNIG LOOP! ===================
    for epoch in range(epoch_offset, epochs):
        print('Epoch:', epoch, 'LR:', scheduler.get_lr())
        elapsed = datetime.datetime.now() - start_time
        print("Epoch: [{}][els: {}] {}".format(
            datetime.datetime.now().strftime("%Y-%m-%d_%H:%M:%S"), elapsed,
            epoch))
        model.train()
        total_loss = 0.
        for i, batch in enumerate(train_loader):
            model.zero_grad()

            if waveglow_config["multi_speaker_config"]["use_multi_speaker"]:
                mel, audio, spk_embed_or_id = batch
                spk_embed_or_id = torch.autograd.Variable(
                    spk_embed_or_id.cuda())
            else:
                mel, audio = batch
            mel = torch.autograd.Variable(mel.cuda())
            audio = torch.autograd.Variable(audio.cuda())

            if waveglow_config["multi_speaker_config"]["use_multi_speaker"]:
                outputs = model((mel, audio, spk_embed_or_id))
            else:
                outputs = model((mel, audio))

            loss = criterion(outputs)
            if num_gpus > 1:
                reduced_loss = reduce_tensor(loss.data, num_gpus).item()
            else:
                reduced_loss = loss.item()

            if fp16_run:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            optimizer.step()
            total_loss += reduced_loss
            if i > 0 and i % 10 == 0:
                elapsed = datetime.datetime.now() - start_time
                print(
                    "[{}][els: {}] epoch {},total steps{}, {}/{} steps:\t{:.9f}"
                    .format(
                        datetime.datetime.now().strftime("%Y-%m-%d_%H:%M:%S"),
                        elapsed, epoch, iteration, i, len(train_loader),
                        reduced_loss))
            if with_tensorboard and rank == 0:
                logger.add_scalar('training_loss', reduced_loss,
                                  i + len(train_loader) * epoch)

            if (iteration % iters_per_checkpoint == 0):
                if rank == 0:
                    checkpoint_path = "{}/waveglow_{}".format(
                        output_directory, iteration)
                    save_checkpoint(model, optimizer, learning_rate, iteration,
                                    checkpoint_path)

            iteration += 1
        elapsed = datetime.datetime.now() - start_time
        print("[{}][els: {}] {} epoch :\tavg loss {:.9f}".format(
            datetime.datetime.now().strftime("%Y-%m-%d_%H:%M:%S"), elapsed,
            epoch, total_loss / len(train_loader)))
        scheduler.step()
        eval.eval(eval_loader, model, criterion, num_gpus, start_time, epoch,
                  waveglow_config["multi_speaker_config"]["use_multi_speaker"])
Exemplo n.º 12
0
def train(num_gpus, rank, group_name, output_directory, epochs, learning_rate,
          sigma, loss_empthasis, iters_per_checkpoint, batch_size, seed, fp16_run,
          checkpoint_path, with_tensorboard, logdirname, datedlogdir, warm_start=False, optimizer='ADAM', start_zero=False):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    #=====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        init_distributed(rank, num_gpus, group_name, **dist_config)
    #=====END:   ADDED FOR DISTRIBUTED======
    
    global WaveGlow
    global WaveGlowLoss
    
    ax = True # this is **really** bad coding practice :D
    if ax:
        from efficient_model_ax import WaveGlow
        from efficient_loss import WaveGlowLoss
    else:
        if waveglow_config["yoyo"]: # efficient_mode # TODO: Add to Config File
            from efficient_model import WaveGlow
            from efficient_loss import WaveGlowLoss
        else:
            from glow import WaveGlow, WaveGlowLoss
    
    criterion = WaveGlowLoss(sigma, loss_empthasis)
    model = WaveGlow(**waveglow_config).cuda()
    #=====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        model = apply_gradient_allreduce(model)
    #=====END:   ADDED FOR DISTRIBUTED======
    STFTs = [STFT.TacotronSTFT(filter_length=window,
                                 hop_length=data_config['hop_length'],
                                 win_length=window,
                                 sampling_rate=data_config['sampling_rate'],
                                 n_mel_channels=160,
                                 mel_fmin=0, mel_fmax=16000) for window in data_config['validation_windows']]
    
    loader_STFT = STFT.TacotronSTFT(filter_length=data_config['filter_length'],
                                 hop_length=data_config['hop_length'],
                                 win_length=data_config['win_length'],
                                 sampling_rate=data_config['sampling_rate'],
                                 n_mel_channels=data_config['n_mel_channels'] if 'n_mel_channels' in data_config.keys() else 160,
                                 mel_fmin=data_config['mel_fmin'], mel_fmax=data_config['mel_fmax'])
    
    #optimizer = "Adam"
    optimizer = optimizer.lower()
    optimizer_fused = bool( 0 ) # use Apex fused optimizer, should be identical to normal but slightly faster and only works on RTX cards
    if optimizer_fused:
        from apex import optimizers as apexopt
        if optimizer == "adam":
            optimizer = apexopt.FusedAdam(model.parameters(), lr=learning_rate)
        elif optimizer == "lamb":
            optimizer = apexopt.FusedLAMB(model.parameters(), lr=learning_rate, max_grad_norm=200)
    else:
        if optimizer == "adam":
            optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
        elif optimizer == "lamb":
            from lamb import Lamb as optLAMB
            optimizer = optLAMB(model.parameters(), lr=learning_rate)
            #import torch_optimizer as optim
            #optimizer = optim.Lamb(model.parameters(), lr=learning_rate)
            #raise# PyTorch doesn't currently include LAMB optimizer.
    
    if fp16_run:
        global amp
        from apex import amp
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1')
    else:
        amp = None
    
    ## LEARNING RATE SCHEDULER
    if True:
        from torch.optim.lr_scheduler import ReduceLROnPlateau
        min_lr = 1e-8
        factor = 0.1**(1/5) # amount to scale the LR by on Validation Loss plateau
        scheduler = ReduceLROnPlateau(optimizer, 'min', factor=factor, patience=20, cooldown=2, min_lr=min_lr, verbose=True, threshold=0.0001, threshold_mode='abs')
        print("ReduceLROnPlateau used as Learning Rate Scheduler.")
    else: scheduler=False
    
    # Load checkpoint if one exists
    iteration = 0
    if checkpoint_path != "":
        model, optimizer, iteration, scheduler = load_checkpoint(checkpoint_path, model,
                                                      optimizer, scheduler, fp16_run, warm_start=warm_start)
        iteration += 1  # next iteration is iteration + 1
    if start_zero:
        iteration = 0
    
    trainset = Mel2Samp(**data_config, check_files=True)
    speaker_lookup = trainset.speaker_ids
    # =====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        train_sampler = DistributedSampler(trainset, shuffle=True)
        shuffle = False
    else:
        train_sampler = None
        shuffle = True
    # =====END:   ADDED FOR DISTRIBUTED======
    train_loader = DataLoader(trainset, num_workers=3, shuffle=shuffle,
                              sampler=train_sampler,
                              batch_size=batch_size,
                              pin_memory=False,
                              drop_last=True)
    
    # Get shared output_directory ready
    if rank == 0:
        if not os.path.isdir(output_directory):
            os.makedirs(output_directory)
            os.chmod(output_directory, 0o775)
        print("output directory", output_directory)
    
    if with_tensorboard and rank == 0:
        from tensorboardX import SummaryWriter
        if datedlogdir:
            timestr = time.strftime("%Y_%m_%d-%H_%M_%S")
            log_directory = os.path.join(output_directory, logdirname, timestr)
        else:
            log_directory = os.path.join(output_directory, logdirname)
        logger = SummaryWriter(log_directory)
    
    moving_average = int(min(len(train_loader), 100)) # average loss over entire Epoch
    rolling_sum = StreamingMovingAverage(moving_average)
    start_time = time.time()
    start_time_iter = time.time()
    start_time_dekaiter = time.time()
    model.train()
    
    # best (averaged) training loss
    if os.path.exists(os.path.join(output_directory, "best_model")+".txt"):
        best_model_loss = float(str(open(os.path.join(output_directory, "best_model")+".txt", "r", encoding="utf-8").read()).split("\n")[0])
    else:
        best_model_loss = -6.20
    
    # best (validation) MSE on inferred spectrogram.
    if os.path.exists(os.path.join(output_directory, "best_val_model")+".txt"):
        best_MSE = float(str(open(os.path.join(output_directory, "best_val_model")+".txt", "r", encoding="utf-8").read()).split("\n")[0])
    else:
        best_MSE = 9e9
    
    epoch_offset = max(0, int(iteration / len(train_loader)))
    
    pytorch_total_params = sum(p.numel() for p in model.parameters())
    print("{:,} total parameters in model".format(pytorch_total_params))
    pytorch_total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print("{:,} trainable parameters.".format(pytorch_total_params))
    
    print(f"Segment Length: {data_config['segment_length']:,}\nBatch Size: {batch_size:,}\nNumber of GPUs: {num_gpus:,}\nSamples/Iter: {data_config['segment_length']*batch_size*num_gpus:,}")
    
    training = True
    while training:
        try:
            if rank == 0:
                epochs_iterator = tqdm(range(epoch_offset, epochs), initial=epoch_offset, total=epochs, smoothing=0.01, desc="Epoch", position=1, unit="epoch")
            else:
                epochs_iterator = range(epoch_offset, epochs)
            # ================ MAIN TRAINING LOOP! ===================
            for epoch in epochs_iterator:
                print(f"Epoch: {epoch}")
                if num_gpus > 1:
                    train_sampler.set_epoch(epoch)
                
                if rank == 0:
                    iters_iterator = tqdm(enumerate(train_loader), desc=" Iter", smoothing=0, total=len(train_loader), position=0, unit="iter", leave=True)
                else:
                    iters_iterator = enumerate(train_loader)
                for i, batch in iters_iterator:
                    # run external code every iter, allows the run to be adjusted without restarts
                    if (i==0 or iteration % param_interval == 0):
                        try:
                            with open("run_every_epoch.py") as f:
                                internal_text = str(f.read())
                                if len(internal_text) > 0:
                                    #code = compile(internal_text, "run_every_epoch.py", 'exec')
                                    ldict = {'iteration': iteration, 'seconds_elapsed': time.time()-start_time}
                                    exec(internal_text, globals(), ldict)
                                else:
                                    print("No Custom code found, continuing without changes.")
                        except Exception as ex:
                            print(f"Custom code FAILED to run!\n{ex}")
                        globals().update(ldict)
                        locals().update(ldict)
                        if show_live_params:
                            print(internal_text)
                    if not iteration % 50: # check actual learning rate every 20 iters (because I sometimes see learning_rate variable go out-of-sync with real LR)
                        learning_rate = optimizer.param_groups[0]['lr']
                    # Learning Rate Schedule
                    if custom_lr:
                        old_lr = learning_rate
                        if iteration < warmup_start:
                            learning_rate = warmup_start_lr
                        elif iteration < warmup_end:
                            learning_rate = (iteration-warmup_start)*((A_+C_)-warmup_start_lr)/(warmup_end-warmup_start) + warmup_start_lr # learning rate increases from warmup_start_lr to A_ linearly over (warmup_end-warmup_start) iterations.
                        else:
                            if iteration < decay_start:
                                learning_rate = A_ + C_
                            else:
                                iteration_adjusted = iteration - decay_start
                                learning_rate = (A_*(e**(-iteration_adjusted/B_))) + C_
                        assert learning_rate > -1e-8, "Negative Learning Rate."
                        if old_lr != learning_rate:
                            for param_group in optimizer.param_groups:
                                param_group['lr'] = learning_rate
                    else:
                        scheduler.patience = scheduler_patience
                        scheduler.cooldown = scheduler_cooldown
                        if override_scheduler_last_lr:
                            scheduler._last_lr = override_scheduler_last_lr
                        if override_scheduler_best:
                            scheduler.best = override_scheduler_best
                        if override_scheduler_last_lr or override_scheduler_best:
                            print("scheduler._last_lr =", scheduler._last_lr, "scheduler.best =", scheduler.best, "  |", end='')
                    model.zero_grad()
                    mel, audio, speaker_ids = batch
                    mel = torch.autograd.Variable(mel.cuda(non_blocking=True))
                    audio = torch.autograd.Variable(audio.cuda(non_blocking=True))
                    speaker_ids = speaker_ids.cuda(non_blocking=True).long().squeeze(1)
                    outputs = model(mel, audio, speaker_ids)
                    
                    loss = criterion(outputs)
                    if num_gpus > 1:
                        reduced_loss = reduce_tensor(loss.data, num_gpus).item()
                    else:
                        reduced_loss = loss.item()
                    
                    if fp16_run:
                        with amp.scale_loss(loss, optimizer) as scaled_loss:
                            scaled_loss.backward()
                    else:
                        loss.backward()
                    
                    if (reduced_loss > LossExplosionThreshold) or (math.isnan(reduced_loss)):
                        model.zero_grad()
                        raise LossExplosion(f"\nLOSS EXPLOSION EXCEPTION ON RANK {rank}: Loss reached {reduced_loss} during iteration {iteration}.\n\n\n")
                    
                    if use_grad_clip:
                        if fp16_run:
                            grad_norm = torch.nn.utils.clip_grad_norm_(
                                amp.master_params(optimizer), grad_clip_thresh)
                        else:
                            grad_norm = torch.nn.utils.clip_grad_norm_(
                                model.parameters(), grad_clip_thresh)
                        if type(grad_norm) == torch.Tensor:
                            grad_norm = grad_norm.item()
                        is_overflow = math.isinf(grad_norm) or math.isnan(grad_norm)
                    else: is_overflow = False; grad_norm=0.00001
                    
                    optimizer.step()
                    if not is_overflow and rank == 0:
                        # get current Loss Scale of first optimizer
                        loss_scale = amp._amp_state.loss_scalers[0]._loss_scale if fp16_run else 32768
                        
                        if with_tensorboard:
                            if (iteration % 100000 == 0):
                                # plot distribution of parameters
                                for tag, value in model.named_parameters():
                                    tag = tag.replace('.', '/')
                                    logger.add_histogram(tag, value.data.cpu().numpy(), iteration)
                            logger.add_scalar('training_loss', reduced_loss, iteration)
                            logger.add_scalar('training_loss_samples', reduced_loss, iteration*batch_size)
                            if (iteration % 20 == 0):
                                logger.add_scalar('learning.rate', learning_rate, iteration)
                            if (iteration % 10 == 0):
                                logger.add_scalar('duration', ((time.time() - start_time_dekaiter)/10), iteration)
                        
                        average_loss = rolling_sum.process(reduced_loss)
                        if (iteration % 10 == 0):
                            tqdm.write("{} {}:  {:.3f} {:.3f}  {:.3f} {:08.3F} {:.8f}LR ({:.8f} Effective)  {:.2f}s/iter {:.4f}s/item".format(time.strftime("%H:%M:%S"), iteration, reduced_loss, average_loss, best_MSE, round(grad_norm,3), learning_rate, min((grad_clip_thresh/grad_norm)*learning_rate,learning_rate), (time.time() - start_time_dekaiter)/10, ((time.time() - start_time_dekaiter)/10)/(batch_size*num_gpus)))
                            start_time_dekaiter = time.time()
                        else:
                            tqdm.write("{} {}:  {:.3f} {:.3f}  {:.3f} {:08.3F} {:.8f}LR ({:.8f} Effective) {}LS".format(time.strftime("%H:%M:%S"), iteration, reduced_loss, average_loss, best_MSE, round(grad_norm,3), learning_rate, min((grad_clip_thresh/grad_norm)*learning_rate,learning_rate), loss_scale))
                        start_time_iter = time.time()
                    
                    if rank == 0 and (len(rolling_sum.values) > moving_average-2):
                        if (average_loss+best_model_margin) < best_model_loss:
                            checkpoint_path = os.path.join(output_directory, "best_model")
                            try:
                                save_checkpoint(model, optimizer, learning_rate, iteration, amp, scheduler, speaker_lookup,
                                            checkpoint_path)
                            except KeyboardInterrupt: # Avoid corrupting the model.
                                save_checkpoint(model, optimizer, learning_rate, iteration, amp, scheduler, speaker_lookup,
                                            checkpoint_path)
                            text_file = open((f"{checkpoint_path}.txt"), "w", encoding="utf-8")
                            text_file.write(str(average_loss)+"\n"+str(iteration))
                            text_file.close()
                            best_model_loss = average_loss #Only save the model if X better than the current loss.
                    if rank == 0 and iteration > 0 and ((iteration % iters_per_checkpoint == 0) or (os.path.exists(save_file_check_path))):
                        checkpoint_path = f"{output_directory}/waveglow_{iteration}"
                        save_checkpoint(model, optimizer, learning_rate, iteration, amp, scheduler, speaker_lookup,
                                        checkpoint_path)
                        if (os.path.exists(save_file_check_path)):
                            os.remove(save_file_check_path)
                    
                    if (iteration % validation_interval == 0):
                        if rank == 0:
                            MSE, MAE = validate(model, loader_STFT, STFTs, logger, iteration, data_config['validation_files'], speaker_lookup, sigma, output_directory, data_config)
                            if scheduler:
                                MSE = torch.tensor(MSE, device='cuda')
                                if num_gpus > 1:
                                    broadcast(MSE, 0)
                                scheduler.step(MSE.item())
                                if MSE < best_MSE:
                                    checkpoint_path = os.path.join(output_directory, "best_val_model")
                                    try:
                                        save_checkpoint(model, optimizer, learning_rate, iteration, amp, scheduler, speaker_lookup,
                                                    checkpoint_path)
                                    except KeyboardInterrupt: # Avoid corrupting the model.
                                        save_checkpoint(model, optimizer, learning_rate, iteration, amp, scheduler, speaker_lookup,
                                                    checkpoint_path)
                                    text_file = open((f"{checkpoint_path}.txt"), "w", encoding="utf-8")
                                    text_file.write(str(MSE.item())+"\n"+str(iteration))
                                    text_file.close()
                                    best_MSE = MSE.item() #Only save the model if X better than the current loss.
                        else:
                            if scheduler:
                                MSE = torch.zeros(1, device='cuda')
                                broadcast(MSE, 0)
                                scheduler.step(MSE.item())
                        learning_rate = optimizer.param_groups[0]['lr'] #check actual learning rate (because I sometimes see learning_rate variable go out-of-sync with real LR)
                    iteration += 1
            training = False # exit the While loop
        
        except LossExplosion as ex: # print Exception and continue from checkpoint. (turns out it takes < 4 seconds to restart like this, f*****g awesome)
            print(ex) # print Loss
            checkpoint_path = os.path.join(output_directory, "best_model")
            assert os.path.exists(checkpoint_path), "best_val_model must exist for automatic restarts"
            
            # clearing VRAM for load checkpoint
            audio = mel = speaker_ids = loss = None
            torch.cuda.empty_cache()
            
            model.eval()
            model, optimizer, iteration, scheduler = load_checkpoint(checkpoint_path, model, optimizer, scheduler, fp16_run)
            learning_rate = optimizer.param_groups[0]['lr']
            epoch_offset = max(0, int(iteration / len(train_loader)))
            model.train()
            iteration += 1
            pass # and continue training.
Exemplo n.º 13
0
def train(num_gpus, rank, group_name, output_directory, epochs, learning_rate,
          sigma, iters_per_checkpoint, batch_size, seed, fp16_run,
          checkpoint_path, with_tensorboard):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    #=====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        init_distributed(rank, num_gpus, group_name, **dist_config)
    #=====END:   ADDED FOR DISTRIBUTED======

    criterion = WaveGlowLoss(sigma)
    model = WaveGlow(**waveglow_config).cuda()

    #=====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        model = apply_gradient_allreduce(model)
    #=====END:   ADDED FOR DISTRIBUTED======

    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    if fp16_run:
        from apex import amp
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1')

    # Load checkpoint if one exists
    iteration = 0
    if checkpoint_path != "":
        model, optimizer, iteration = load_checkpoint(checkpoint_path, model,
                                                      optimizer)
        iteration += 1  # next iteration is iteration + 1

    trainset = Mel2Samp(data_config['training_files'],
                        data_config['segment_length'],
                        data_config['filter_length'],
                        data_config['hop_length'],
                        data_config['win_length'],
                        data_config['sampling_rate'],
                        data_config['mel_fmin'],
                        data_config['mel_fmax'],
                        debug=False)

    if 'testing_files' in data_config:
        testset = Mel2Samp(data_config['testing_files'],
                           data_config['segment_length'],
                           data_config['filter_length'],
                           data_config['hop_length'],
                           data_config['win_length'],
                           data_config['sampling_rate'],
                           data_config['mel_fmin'],
                           data_config['mel_fmax'],
                           debug=True)
    else:
        testset = None

    # =====START: ADDED FOR DISTRIBUTED======
    train_sampler = DistributedSampler(trainset) if num_gpus > 1 else None
    # =====END:   ADDED FOR DISTRIBUTED======
    train_loader = DataLoader(trainset,
                              num_workers=1,
                              shuffle=False,
                              sampler=train_sampler,
                              batch_size=batch_size,
                              pin_memory=False,
                              drop_last=True)

    # Get shared output_directory ready
    if rank == 0:
        if not os.path.isdir(output_directory):
            os.makedirs(output_directory)
            os.chmod(output_directory, 0o775)
        print("output directory", output_directory)

    if with_tensorboard and rank == 0:
        from tensorboardX import SummaryWriter
        logger = SummaryWriter(os.path.join(output_directory, 'logs'))
    else:
        logger = None

    model.train()
    epoch_offset = max(0, int(iteration / len(train_loader)))
    # ================ MAIN TRAINNIG LOOP! ===================
    for epoch in range(epoch_offset, epochs):
        print("Epoch: {}".format(epoch))
        for i, batch in enumerate(train_loader):
            start = time.perf_counter()

            model.zero_grad()

            print("train batch loaded, {} ({} of {})".format(
                iteration, i, len(train_loader)))
            mel, audio = batch
            mel = torch.autograd.Variable(mel.cuda())
            audio = torch.autograd.Variable(audio.cuda())
            outputs = model((mel, audio))

            loss = criterion(outputs)
            if num_gpus > 1:
                reduced_loss = reduce_tensor(loss.data, num_gpus).item()
            else:
                reduced_loss = loss.item()

            if fp16_run:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            is_overflow = False
            if fp16_run:
                grad_norm = torch.nn.utils.clip_grad_norm_(
                    amp.master_params(optimizer), 1.0)
                is_overflow = math.isnan(grad_norm)

            optimizer.step()

            duration = time.perf_counter() - start

            print(
                "train batch done, {} ({} of {}): {:.9f} (took {:.2f})".format(
                    iteration, i, len(train_loader), reduced_loss, duration))

            if logger:
                logger.add_scalar('training_loss', reduced_loss,
                                  i + len(train_loader) * epoch)
                logger.add_scalar('duration', duration,
                                  i + len(train_loader) * epoch)

            if testset and not is_overflow and (iteration %
                                                iters_per_checkpoint == 0):
                if testset:
                    validate(model, criterion, testset, iteration, batch_size,
                             num_gpus, logger)

                if rank == 0:
                    rotate_checkpoints(output_directory)
                    checkpoint_path = "{}/waveglow_{}".format(
                        output_directory, iteration)
                    save_checkpoint(model, optimizer, learning_rate, iteration,
                                    checkpoint_path)

            iteration += 1
Exemplo n.º 14
0
    def forward(self, pred_audio, gt_audio, amp, model, optimizer, optimizer_d,
                num_gpus, use_grad_clip,
                grad_clip_thresh):  # optional cond input
        """
        pred_audio: [B, T]
        gt_audio: [B, T]
        """
        metrics = {}
        pred_spect = self.MRS.get_mel(pred_audio)
        pred_spect = dynamic_range_compression(
            pred_spect)  # linear -> log magnitudes
        gt_spec = self.MRS.get_mel(gt_audio)
        gt_spec = dynamic_range_compression(
            gt_spec)  # linear -> log magnitudes
        if self.stage >= 2:
            real_labels = torch.zeros(gt_audio.shape[0],
                                      device=gt_audio.device,
                                      dtype=gt_audio.dtype)  # [B]
            fake_labels = torch.ones(gt_audio.shape[0],
                                     device=gt_audio.device,
                                     dtype=gt_audio.dtype)  # [B]

            if False:  # (optional) mask frequencies that humans can't hear in STFT
                pred_spect *= self.mask
                gt_spec *= self.mask

            #############################
            ###    Generator Stuff    ###
            #############################
            mel_fake_pred_fakeness = self.discriminatorS(
                pred_spect)  # [B] predict fakeness of generated spectrogram
            wav_fake_pred_fakeness = self.discriminatorW(
                pred_audio)  # [B] predict fakeness of generated audio
            fake_pred_fakeness = (
                mel_fake_pred_fakeness + wav_fake_pred_fakeness
            ).sigmoid()  # Average and range between 0.0 and 1.0
            loss = nn.BCELoss(
            )(fake_pred_fakeness,
              real_labels)  # [B] -> [] calc loss to decrease fakeness of model

            metrics['g_train_loss'] = reduce_tensor(
                loss.data, num_gpus).item() if num_gpus > 1 else loss.item()

            if amp is not None:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            if use_grad_clip:
                if amp is not None:
                    grad_norm = torch.nn.utils.clip_grad_norm_(
                        amp.master_params(optimizer), grad_clip_thresh)
                else:
                    grad_norm = torch.nn.utils.clip_grad_norm_(
                        model.parameters(), grad_clip_thresh)
                if type(grad_norm) == torch.Tensor:
                    grad_norm = grad_norm.item()
                metrics['is_overflow'] = math.isinf(grad_norm) or math.isnan(
                    grad_norm)
                if not metrics['is_overflow']:
                    metrics['grad_norm'] = grad_norm
            else:
                metrics['is_overflow'] = False
                grad_norm = 1e-6

            optimizer.step()

            #############################
            ###  Discriminator Stuff  ###
            #############################
            optimizer_d.zero_grad()

            mel_real_pred_fakeness = self.discriminatorS(
                gt_spec)  # [B] predict fakeness of real spectrogram
            wav_real_pred_fakeness = self.discriminatorW(
                gt_audio)  # [B] predict fakeness of real audio
            real_pred_fakeness = (
                mel_real_pred_fakeness + wav_real_pred_fakeness
            ).sigmoid()  # Average and range between 0.0 and 1.0
            real_d_loss = nn.BCELoss()(
                real_pred_fakeness, real_labels
            )  # [B] -> [] loss to decrease distriminated fakeness of real samples

            mel_fake_pred_fakeness = self.discriminatorS(pred_spect.detach(
            ))  # [B] predict fakeness of generated spectrogram
            wav_fake_pred_fakeness = self.discriminatorW(
                pred_audio.detach())  # [B] predict fakeness of generated audio
            fake_pred_fakeness = (
                mel_fake_pred_fakeness + wav_fake_pred_fakeness
            ).sigmoid()  # Average and range between 0.0 and 1.0
            fake_d_loss = nn.BCELoss()(
                fake_pred_fakeness, fake_labels
            )  # [B] -> [] loss to increase distriminated fakeness of fake samples

            d_loss = (real_d_loss + fake_d_loss) / 2
            metrics['d_train_loss'] = reduce_tensor(
                d_loss.data,
                num_gpus).item() if num_gpus > 1 else d_loss.item()

            if amp is not None:
                with amp.scale_loss(d_loss, optimizer_d) as scaled_d_loss:
                    scaled_d_loss.backward()
            else:
                d_loss.backward()

            if use_grad_clip:
                if amp is not None:
                    grad_norm = torch.nn.utils.clip_grad_norm_(
                        amp.master_params(optimizer_d), grad_clip_thresh)
                else:
                    grad_norm = torch.nn.utils.clip_grad_norm_(
                        self.parameters(), grad_clip_thresh)
                if type(grad_norm) == torch.Tensor:
                    grad_norm = grad_norm.item()
                metrics['is_overflow'] = math.isinf(grad_norm) or math.isnan(
                    grad_norm)
                if not metrics['is_overflow']:
                    metrics['grad_norm'] = grad_norm
            else:
                metrics['is_overflow'] = False
                grad_norm = 1e-6

            optimizer_d.step()
        else:
            loss = F.l1_loss(pred_spect, gt_spec)
            loss += F.l1_loss(pred_audio, gt_audio)

            metrics['train_loss'] = reduce_tensor(
                loss.data, num_gpus).item() if num_gpus > 1 else loss.item()

            if amp is not None:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            if use_grad_clip:
                if amp is not None:
                    grad_norm = torch.nn.utils.clip_grad_norm_(
                        amp.master_params(optimizer), grad_clip_thresh)
                else:
                    grad_norm = torch.nn.utils.clip_grad_norm_(
                        model.parameters(), grad_clip_thresh)
                if type(grad_norm) == torch.Tensor:
                    grad_norm = grad_norm.item()
                metrics['is_overflow'] = math.isinf(grad_norm) or math.isnan(
                    grad_norm)
                if not metrics['is_overflow']:
                    metrics['grad_norm'] = grad_norm
            else:
                metrics['is_overflow'] = False
                grad_norm = 0.00001

            optimizer.step()
        return metrics
Exemplo n.º 15
0
def train(num_gpus, rank, group_name, output_directory, epochs, learning_rate,
          sigma, iters_per_checkpoint, batch_size, seed, fp16_run,
          checkpoint_path, with_tensorboard, weight_sharing, optimizer_type,
          dataloader_type):

    ws = weight_sharing
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    #=====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        init_distributed(rank, num_gpus, group_name, **dist_config)
    #=====END:   ADDED FOR DISTRIBUTED======

    criterion = WaveGlowLoss(sigma)
    model = WaveGlow(**waveglow_config).cuda()

    #=====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        model = apply_gradient_allreduce(model)
    #=====END:   ADDED FOR DISTRIBUTED======

    optimizer_type = optimizer_type.lower()
    if optimizer_type == "sgd":
        optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
    elif optimizer_type == "adam":
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    else:
        print("Unsupported optimizer: %s. Aborting." % optimizer_type)
        return None

    if fp16_run:
        from apex import amp
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1')

    # Load checkpoint if one exists
    iteration = 0
    if checkpoint_path != "":
        model, optimizer, iteration = load_checkpoint(checkpoint_path, model,
                                                      optimizer)
        iteration += 1  # next iteration is iteration + 1

    dataloader_type = dataloader_type.lower()
    if dataloader_type == "vanilla":
        trainset = Mel2Samp(**data_config)
    elif dataloader_type == "split":
        trainset = Mel2SampSplit(**data_config)
    else:
        print("Unsupported dataloader type: %s. Aborting." % dataloader_type)
        return None

    # =====START: ADDED FOR DISTRIBUTED======
    train_sampler = DistributedSampler(trainset) if num_gpus > 1 else None
    # =====END:   ADDED FOR DISTRIBUTED======
    train_loader = DataLoader(trainset,
                              num_workers=1,
                              shuffle=(num_gpus == 1),
                              sampler=train_sampler,
                              batch_size=batch_size,
                              pin_memory=False,
                              drop_last=True)

    # Get shared output_directory ready
    if rank == 0:
        if not os.path.isdir(output_directory):
            os.makedirs(output_directory)
            os.chmod(output_directory, 0o775)
        print("output directory", output_directory)

    name = "waveglow_ws%d_%s_%s_batch%d" % (ws, optimizer_type,
                                            dataloader_type, batch_size)

    if learning_rate != 1e-4:
        name = name + "_lr{:.0e}".format(learning_rate)

    if num_gpus > 1:
        name = name + "_x%d" % num_gpus

    if with_tensorboard and rank == 0:
        from tensorboardX import SummaryWriter
        logger = SummaryWriter(os.path.join("./logs", name))

    model.train()
    epoch_offset = max(0, int(iteration / len(train_loader)))
    # ================ MAIN TRAINNIG LOOP! ===================
    stime2 = None
    for epoch in range(epoch_offset, epochs):
        print("Epoch: {}".format(epoch))
        stime = time()
        for i, batch in enumerate(train_loader):
            model.zero_grad()

            mel, audio = batch
            mel = torch.autograd.Variable(mel.cuda())
            audio = torch.autograd.Variable(audio.cuda())
            outputs = model((mel, audio))

            loss = criterion(outputs)
            if num_gpus > 1:
                reduced_loss = reduce_tensor(loss.data, num_gpus).item()
            else:
                reduced_loss = loss.item()

            if fp16_run:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            optimizer.step()

            if (iteration % 100 == 0):
                if not stime2 is None:
                    tot_time2 = time() - stime2
                    print("{}:\t{:.9f}, time: {}".format(
                        iteration, reduced_loss, int(tot_time2)))
                stime2 = time()
            if with_tensorboard and rank == 0:
                logger.add_scalar('training_loss', reduced_loss,
                                  i + len(train_loader) * epoch)

            if (iteration % iters_per_checkpoint == 0):
                if rank == 0:
                    checkpoint_path = "{}/waveglow_{}_{}".format(
                        output_directory, name, iteration)
                    save_checkpoint(model, optimizer, learning_rate, iteration,
                                    checkpoint_path)

            iteration += 1
        tot_time = time() - stime
        print("Epoch %d completed. Time: %d seconds" % (epoch, int(tot_time)))
Exemplo n.º 16
0
def train(num_gpus, rank, group_name, output_directory, epochs, learning_rate,
          sigma, iters_per_checkpoint, batch_size, seed, checkpoint_path):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    #=====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        init_distributed(rank, num_gpus, group_name, **dist_config)
    #=====END:   ADDED FOR DISTRIBUTED======

    criterion = WaveGlowLoss(sigma)
    model = WaveGlow(**waveglow_config).cuda()

    #=====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        model = apply_gradient_allreduce(model)
    #=====END:   ADDED FOR DISTRIBUTED======

    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    # Load checkpoint if one exists
    iteration = 0
    print("checkpoint path", checkpoint_path)
    #model = warm_load_checkpoint(checkpoint_path, model)
    model, optimizer, iteration = load_checkpoint(checkpoint_path, model,
                                                  optimizer)
    iteration += 1
    trainset = Mel2Samp(**data_config)
    # =====START: ADDED FOR DISTRIBUTED======
    train_sampler = DistributedSampler(trainset) if num_gpus > 1 else None
    # =====END:   ADDED FOR DISTRIBUTED======
    train_loader = DataLoader(trainset,
                              num_workers=1,
                              shuffle=True,
                              sampler=train_sampler,
                              batch_size=batch_size,
                              pin_memory=False,
                              drop_last=True)

    # Get shared output_directory ready
    if rank == 0:
        if not os.path.isdir(output_directory):
            os.makedirs(output_directory)
            os.chmod(output_directory, 0o775)
        print("output directory", output_directory)

    model.train()
    epoch_offset = max(0, int(iteration / len(train_loader)))
    # ================ MAIN TRAINNIG LOOP! ===================
    for epoch in range(epoch_offset, epochs):
        print("Epoch: {}".format(epoch))
        for i, batch in enumerate(train_loader):
            model.zero_grad()

            mel, audio = batch
            mel = torch.autograd.Variable(mel.cuda())
            audio = torch.autograd.Variable(audio.cuda())
            outputs = model((mel, audio))

            loss = criterion(outputs)
            if num_gpus > 1:
                reduced_loss = reduce_tensor(loss.data, num_gpus).item()
            else:
                reduced_loss = loss.item()
            loss.backward()
            optimizer.step()
            if (iteration % iters_per_checkpoint == 0):
                print("{}:\t{:.9f}".format(iteration, reduced_loss))
                checkpoint_path = "{}/waveglow".format(output_directory)
                save_checkpoint(model, optimizer, learning_rate, iteration,
                                checkpoint_path)
            iteration += 1
Exemplo n.º 17
0
def train(num_gpus, rank, group_name, output_directory, epochs, init_lr,
          final_lr, sigma, epochs_per_checkpoint, batch_size, seed, fp16_run,
          checkpoint_path, with_tensorboard):
    os.makedirs(output_directory, exist_ok=True)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    #=====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        init_distributed(rank, num_gpus, group_name, **dist_config)
    #=====END:   ADDED FOR DISTRIBUTED======

    criterion = WaveGlowLoss(sigma)
    model = WaveGlow(**waveglow_config).cuda()

    #=====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        model = apply_gradient_allreduce(model)
    #=====END:   ADDED FOR DISTRIBUTED======

    optimizer = torch.optim.Adam(model.parameters(), lr=init_lr)

    if fp16_run:
        from apex import amp
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1')

    # Load checkpoint if one exists
    epoch_offset = 1
    if checkpoint_path != "":
        model, optimizer, epoch_offset = load_checkpoint(
            checkpoint_path, model, optimizer)
        epoch_offset += 1  # next epoch is epoch_offset + 1

    trainset = Mel2Samp(**data_config)
    # =====START: ADDED FOR DISTRIBUTED======
    train_sampler = DistributedSampler(trainset) if num_gpus > 1 else None
    # =====END:   ADDED FOR DISTRIBUTED======
    train_loader = DataLoader(trainset,
                              num_workers=8,
                              shuffle=False,
                              sampler=train_sampler,
                              batch_size=batch_size,
                              pin_memory=False,
                              drop_last=True)

    # Get shared output_directory ready
    if rank == 0:
        if not os.path.isdir(output_directory):
            os.makedirs(output_directory)
            os.chmod(output_directory, 0o775)
        print("output directory", output_directory)

    if with_tensorboard and rank == 0:
        from tensorboardX import SummaryWriter
        logger = SummaryWriter(os.path.join(output_directory, 'logs'))

    model.train()
    # ================ MAIN TRAINNIG LOOP! ===================
    for epoch in range(epoch_offset, epochs + 1):
        print(f'Epoch: {epoch}')
        adjust_learning_rate(optimizer, epoch, init_lr, final_lr, epochs)

        for i, batch in enumerate(tqdm.tqdm(train_loader)):
            optimizer.zero_grad()

            batch = model.pre_process(batch)
            outputs = model(batch)

            loss = criterion(outputs)
            if num_gpus > 1:
                reduced_loss = reduce_tensor(loss.data, num_gpus).item()
            else:
                reduced_loss = loss.item()

            if fp16_run:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            optimizer.step()

            if with_tensorboard and rank == 0:
                logger.add_scalar('training_loss', reduced_loss,
                                  i + 1 + len(train_loader) * epoch)

        if epoch % epochs_per_checkpoint == 0:
            if rank == 0:
                # Keep only one checkpoint
                last_chkpt = os.path.join(
                    output_directory,
                    f'waveglow_{epoch - epochs_per_checkpoint:06d}.pt')
                if os.path.exists(last_chkpt):
                    os.remove(last_chkpt)

                checkpoint_path = os.path.join(output_directory,
                                               f'waveglow_{epoch:06d}.pt')
                save_checkpoint(model, optimizer, epoch, checkpoint_path)
Exemplo n.º 18
0
def train(num_gpus, rank, group_name, output_directory, epochs, learning_rate,
          iters_per_checkpoint, batch_size, seed, checkpoint_path):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    #=====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        init_distributed(rank, num_gpus, group_name, **dist_config)
    #=====END:   ADDED FOR DISTRIBUTED======

    criterion = CrossEntropyLoss()
    model = WaveNet(**wavenet_config).cuda()

    #=====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        model = apply_gradient_allreduce(model)
    #=====END:   ADDED FOR DISTRIBUTED======

    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    # Load checkpoint if one exists
    iteration = 0
    if checkpoint_path != "":
        model, optimizer, iteration = load_checkpoint(checkpoint_path, model,
                                                      optimizer)
        iteration += 1  # next iteration is iteration + 1

    #trainset = Mel2SampOnehot(**data_config)
    trainset = DeepMels(**data_config)
    # =====START: ADDED FOR DISTRIBUTED======
    train_sampler = DistributedSampler(trainset) if num_gpus > 1 else None
    # =====END:   ADDED FOR DISTRIBUTED======
    train_loader = DataLoader(trainset,
                              num_workers=1,
                              shuffle=False,
                              sampler=train_sampler,
                              batch_size=batch_size,
                              pin_memory=False,
                              drop_last=True)

    # Get shared output_directory ready
    if rank == 0:
        if not os.path.isdir(output_directory):
            os.makedirs(output_directory)
            os.chmod(output_directory, 0o775)
        print("output directory", output_directory)

    model.train()
    epoch_offset = max(0, int(iteration / len(train_loader)))
    # ================ MAIN TRAINNIG LOOP! ===================
    for epoch in range(epoch_offset, epochs):
        total_loss = 0
        print("Epoch: {}".format(epoch))
        for i, batch in enumerate(train_loader):
            model.zero_grad()

            x, y = batch
            x = to_gpu(x).float()
            y = to_gpu(y)
            x = (x, y)  # auto-regressive takes outputs as inputs
            y_pred = model(x)
            loss = criterion(y_pred, y)
            if num_gpus > 1:
                reduced_loss = reduce_tensor(loss.data, num_gpus)[0]
            else:
                reduced_loss = loss.data[0]
            loss.backward()
            optimizer.step()

            total_loss += reduced_loss

            if (iteration % iters_per_checkpoint == 0):
                if rank == 0:
                    checkpoint_path = "{}/wavenet_{}".format(
                        output_directory, iteration)
                    save_checkpoint(model, optimizer, learning_rate, iteration,
                                    checkpoint_path)

            iteration += 1
        print("epoch:{}, total epoch loss:{}".format(epoch, total_loss))
Exemplo n.º 19
0
def train(num_gpus,
          rank,
          group_name,
          device,
          output_directory,
          epochs,
          learning_rate,
          iters_per_checkpoint,
          batch_size,
          seed,
          checkpoint_path,
          use_scheduled_sampling=False,
          use_wavenet_autoencoder=True,
          use_variational_autoencoder=False,
          diversity_scale=0.005,
          use_logistic_mixtures=False,
          n_mixtures=3,
          audio_hz=16000,
          midi_hz=250,
          aggressive_loss_threshold=3.0,
          encoder_error_thresh=0.0):

    assert use_wavenet_autoencoder is True

    if num_gpus > 1:
        device = init_distributed(rank, num_gpus, group_name, **dist_config)
    device = torch.device(device)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

    if use_logistic_mixtures:
        sampler = DML.SampleDiscretizedMixLogistics()
        criterion = DML.DiscretizedMixLogisticLoss()
    else:
        sampler = utils.CategoricalSampler()
        criterion = CrossEntropyLoss()

    model = WavenetAutoencoder(wavenet_config, cond_wavenet_config,
                               use_variational_autoencoder).to(device)
    if use_variational_autoencoder:
        diversity_loss = L2DiversityLoss()

    if num_gpus > 1:
        model = apply_gradient_allreduce(model)

    if use_scheduled_sampling:
        scheduled_sampler = ScheduledSamplerWithPatience(
            model, sampler, **scheduled_sampler_config)

    encoder_optimizer = torch.optim.Adam(model.encoder_wavenet.parameters(),
                                         lr=learning_rate)
    decoder_optimizer = torch.optim.Adam(model.wavenet.parameters(),
                                         lr=learning_rate)

    # Train state params
    aggressive = True
    train_encoder = False

    # Load checkpoint if one exists
    iteration = 0
    if checkpoint_path != "":
        model, encoder_optimizer, decoder_optimizer, aggressive, iteration = load_checkpoint(
            checkpoint_path, model, encoder_optimizer, decoder_optimizer)
        iteration += 1

    # Dataloader
    trainset = MaestroDataloader(**data_config)
    if num_gpus > 1:
        train_sampler = DistributedSampler(trainset)
    else:
        train_sampler = None
    train_loader = DataLoader(trainset,
                              num_workers=1,
                              shuffle=False,
                              sampler=train_sampler,
                              batch_size=batch_size,
                              pin_memory=False,
                              drop_last=True)

    # Get shared output_directory ready for distributed
    if rank == 0:
        if not os.path.isdir(output_directory):
            os.makedirs(output_directory)
            os.chmod(output_directory, 0o775)
        print("output directory", output_directory)

    # Initialize training variables
    epoch_offset = max(0, int(iteration / len(train_loader)))
    start_iter = iteration

    loss_idx = 0
    loss_sum = 0
    prev_loss = 999999999

    print("output directory: " + output_directory)

    # write loss to csv file
    loss_writer = DictWriter(open(output_directory + "/train.csv",
                                  'w',
                                  newline=''),
                             fieldnames=['iteration', 'loss'])
    loss_writer.writeheader()

    signal_writer = DictWriter(open(output_directory + "/signal.csv",
                                    "w",
                                    newline=''),
                               fieldnames=[
                                   'iteration', 'cosim', 'p-dist',
                                   'forwardMagnitude', 'midiMagnitude'
                               ])
    signal_writer.writeheader()

    model.train()
    # ================ MAIN TRAINING LOOP! ===================
    for epoch in range(epoch_offset, epochs):
        print("Epoch: {}".format(epoch))
        for i, batch in enumerate(train_loader):
            model.zero_grad()

            x, y = batch

            x = as_variable(x, device)
            y = as_variable(y, device)
            y_true = y.clone()

            if use_scheduled_sampling:
                y = scheduled_sampler(x, y)

            y_preds = model((x, y))

            if use_wavenet_autoencoder:
                q_bar = y_preds[1]
                y_preds = y_preds[0]

            loss = criterion(y_preds, y_true)
            if use_variational_autoencoder:
                div_loss = diversity_loss(q_bar)
                loss = loss + (diversity_scale * div_loss)
            if num_gpus > 1:
                reduced_loss = reduce_tensor(loss.data, num_gpus).item()
            else:
                reduced_loss = loss.data.item()
            loss.backward()

            if aggressive and train_encoder:
                encoder_optimizer.step()
                print("Encoder step")

            elif aggressive:
                decoder_optimizer.step()
                print("Decoder step")

            else:  # normal training
                encoder_optimizer.step()
                decoder_optimizer.step()

            print("total loss:     {}:\t{:.9f}".format(iteration,
                                                       reduced_loss))
            if use_variational_autoencoder:
                print("    diversity loss: {:.9f}".format(div_loss))

            if use_scheduled_sampling:
                scheduled_sampler.update(reduced_loss)

            # record running average of loss
            loss_sum += reduced_loss
            loss_idx += 1
            if (iteration % 10 == 0):
                loss_avg = loss_sum / loss_idx
                print("floating avg of 10: " + str(loss_avg))
                #loss_writer.writerow({"iteration": str(i),
                #                     "loss": str(reduced_loss)})

                if aggressive and loss_avg < aggressive_loss_threshold:
                    agressive = False
                elif aggressive and train_encoder and loss_avg >= (
                        prev_loss + encoder_error_thresh):
                    train_encoder = False
                elif aggressive:
                    train_encoder = True

                prev_loss = loss_avg
                loss_sum = 0
                loss_idx = 0

            # save model
            if (iteration % iters_per_checkpoint == 0):
                if rank == 0:
                    checkpoint_path = "{}/wavenet_{}".format(
                        output_directory, iteration)
                    save_checkpoint_autoencoder(model, device,
                                                use_variational_autoencoder,
                                                encoder_optimizer,
                                                decoder_optimizer, aggressive,
                                                learning_rate, iteration,
                                                checkpoint_path)

            iteration += 1
            del loss
Exemplo n.º 20
0
def train(num_gpus,
          rank,
          group_name,
          output_directory,
          epochs,
          learning_rate,
          sigma,
          iters_per_checkpoint,
          batch_size,
          seed,
          fp16_run,
          checkpoint_path,
          with_tensorboard,
          num_workers=2):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    #=====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        init_distributed(rank, num_gpus, group_name, **dist_config)
    #=====END:   ADDED FOR DISTRIBUTED======

    criterion = WaveGlowLoss(sigma)
    model = WaveGlow(**waveglow_config).cuda()

    #=====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        model = apply_gradient_allreduce(model)
    #=====END:   ADDED FOR DISTRIBUTED======

    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    if fp16_run:
        from apex import amp
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1')

    # Load checkpoint if one exists
    iteration = 0
    if checkpoint_path != "":
        model, optimizer, iteration = load_checkpoint(checkpoint_path, model,
                                                      optimizer)
        iteration += 1  # next iteration is iteration + 1

    # HACK: setup separate training and eval sets
    training_files = data_config['training_files']
    eval_files = data_config['eval_files']
    del data_config['training_files']
    del data_config['eval_files']
    data_config['audio_files'] = training_files
    trainset = Mel2Samp(**data_config)
    data_config['audio_files'] = eval_files
    evalset = Mel2Samp(**data_config)

    # =====START: ADDED FOR DISTRIBUTED======
    train_sampler = DistributedSampler(trainset) if num_gpus > 1 else None
    eval_sampler = DistributedSampler(evalset) if num_gpus > 1 else None
    # =====END:   ADDED FOR DISTRIBUTED======

    print("Creating dataloaders with " + str(num_workers) + " workers")
    train_loader = DataLoader(trainset,
                              num_workers=num_workers,
                              shuffle=True,
                              sampler=train_sampler,
                              batch_size=batch_size,
                              pin_memory=False,
                              drop_last=True)
    eval_loader = DataLoader(evalset,
                             num_workers=num_workers,
                             shuffle=True,
                             sampler=eval_sampler,
                             batch_size=batch_size,
                             pin_memory=False,
                             drop_last=True)

    # Get shared output_directory ready
    if rank == 0:
        if not os.path.isdir(output_directory):
            os.makedirs(output_directory)
            os.chmod(output_directory, 0o775)
        print("output directory", output_directory)

    if with_tensorboard and rank == 0:
        from tensorboardX import SummaryWriter
        logger_train = SummaryWriter(
            os.path.join(output_directory, 'logs', 'train'))
        logger_eval = SummaryWriter(
            os.path.join(output_directory, 'logs', 'eval'))

    epoch_offset = max(0, int(iteration / len(train_loader)))
    # ================ MAIN TRAINNIG LOOP! ===================
    for epoch in range(epoch_offset, epochs):
        model.train()
        with tqdm(total=len(train_loader)) as train_pbar:
            for i, batch in enumerate(train_loader):
                model.zero_grad()

                mel, audio = batch
                mel = torch.autograd.Variable(mel.cuda())
                audio = torch.autograd.Variable(audio.cuda())
                outputs = model((mel, audio))

                loss = criterion(outputs)
                if num_gpus > 1:
                    reduced_loss = reduce_tensor(loss.data, num_gpus).item()
                else:
                    reduced_loss = loss.item()

                if fp16_run:
                    with amp.scale_loss(loss, optimizer) as scaled_loss:
                        scaled_loss.backward()
                else:
                    loss.backward()

                optimizer.step()

                train_pbar.set_description(
                    "Epoch {} Iter {} Loss {:.3f}".format(
                        epoch, iteration, reduced_loss))
                if with_tensorboard and rank == 0 and iteration % 10 == 0:
                    logger_train.add_scalar('loss', reduced_loss,
                                            i + len(train_loader) * epoch)
                    # adding logging for GPU utilization and memory usage
                    gpu_memory_used, gpu_utilization = get_gpu_stats()
                    k = 'gpu' + str(0)
                    logger_train.add_scalar(k + '/memory', gpu_memory_used,
                                            iteration)
                    logger_train.add_scalar(k + '/load', gpu_utilization,
                                            iteration)
                    logger_train.flush()

                if (iteration % iters_per_checkpoint == 0):
                    if rank == 0:
                        checkpoint_path = "{}/waveglow_{}".format(
                            output_directory, iteration)
                        save_checkpoint(model, optimizer, learning_rate,
                                        iteration, checkpoint_path)

                iteration += 1
                train_pbar.update(1)

        # Eval
        model.eval()
        torch.cuda.empty_cache()

        with torch.no_grad():
            tensorboard_mel, tensorboard_audio = None, None
            loss_accum = []
            with tqdm(total=len(eval_loader)) as eval_pbar:
                for i, batch in enumerate(eval_loader):
                    model.zero_grad()
                    mel, audio = batch
                    mel = torch.autograd.Variable(mel.cuda())
                    audio = torch.autograd.Variable(audio.cuda())
                    outputs = model((mel, audio))
                    loss = criterion(outputs).item()
                    loss_accum.append(loss)
                    eval_pbar.set_description("Epoch {} Eval {:.3f}".format(
                        epoch, loss))
                    outputs = None

                    # use the first batch for tensorboard audio samples
                    if i == 0:
                        tensorboard_mel = mel
                        tensorboard_audio = audio
                    eval_pbar.update(1)

            if with_tensorboard and rank == 0:
                loss_avg = statistics.mean(loss_accum)
                tqdm.write("Epoch {} Eval AVG {}".format(epoch, loss_avg))
                logger_eval.add_scalar('loss', loss_avg, iteration)

            # log audio samples to tensorboard
            tensorboard_audio_generated = model.infer(tensorboard_mel)
            for i in range(0, 5):
                ta = tensorboard_audio[i].cpu().numpy()
                tag = tensorboard_audio_generated[i].cpu().numpy()
                logger_eval.add_audio("sample " + str(i) + "/orig",
                                      ta,
                                      epoch,
                                      sample_rate=data_config['sampling_rate'])
                logger_eval.add_audio("sample " + str(i) + "/gen",
                                      tag,
                                      epoch,
                                      sample_rate=data_config['sampling_rate'])
            logger_eval.flush()
Exemplo n.º 21
0
def train(num_gpus, rank, group_name, prj_name, run_name, output_directory,
          epochs, learning_rate, sigma, iters_per_checkpoint, batch_size, seed,
          fp16_run, grad_clip_thresh, checkpoint_path, pretrained_path,
          with_tensorboard, with_wandb):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    #=====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        init_distributed(rank, num_gpus, group_name, **dist_config)
    #=====END:   ADDED FOR DISTRIBUTED======

    criterion = WaveGlowLoss(sigma)
    model = WaveGlow(**waveglow_config).cuda()

    #=====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        model = apply_gradient_allreduce(model)
    #=====END:   ADDED FOR DISTRIBUTED======

    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    if fp16_run:
        from apex import amp
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1')

    # Load checkpoint if one exists
    iteration = 0
    if checkpoint_path != "":
        model, optimizer, iteration = load_checkpoint(checkpoint_path, model,
                                                      optimizer)
        iteration += 1  # next iteration is iteration + 1

    if pretrained_path != "":
        model = load_pretrained(pretrained_path, model)

    trainset = Mel2Samp(**data_config)
    # =====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        train_sampler = DistributedSampler(trainset)
        shuffle_at_dataloader = False
    else:
        train_sampler = None
        shuffle_at_dataloader = True
    # =====END:   ADDED FOR DISTRIBUTED======
    train_loader = DataLoader(trainset,
                              num_workers=1,
                              shuffle=shuffle_at_dataloader,
                              sampler=train_sampler,
                              batch_size=batch_size,
                              pin_memory=False,
                              drop_last=True)

    # Get shared output_directory ready
    if rank == 0:
        if not os.path.isdir(output_directory):
            os.makedirs(output_directory)
            os.chmod(output_directory, 0o775)
        print("output directory", output_directory)

    if with_tensorboard and rank == 0:
        from tensorboardX import SummaryWriter
        logger = SummaryWriter(os.path.join(output_directory, 'logs'))

    model.train()
    epoch_offset = max(0, int(iteration / len(train_loader)))
    # ================ MAIN TRAINNIG LOOP! ===================
    for epoch in range(epoch_offset, epochs):
        print("Epoch: {}".format(epoch))
        for i, batch in enumerate(train_loader):
            iter_start = time.perf_counter()

            float_epoch = float(iteration) / len(train_loader)

            model.zero_grad()

            mel, audio = batch
            mel = torch.autograd.Variable(mel.cuda())
            audio = torch.autograd.Variable(audio.cuda())
            outputs = model((mel, audio))

            loss, etc = criterion(outputs)
            (z_L2_normalized, neg_log_s_total, neg_log_det_W_total) = etc
            if num_gpus > 1:
                reduced_loss = reduce_tensor(loss.data, num_gpus).item()
            else:
                reduced_loss = loss.item()

            if fp16_run:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            is_overflow = False
            if fp16_run:
                grad_norm = torch.nn.utils.clip_grad_norm_(
                    amp.master_params(optimizer), grad_clip_thresh)
                is_overflow = math.isnan(grad_norm)
                if not is_overflow:
                    clipped_grad_norm = get_clip_grad_norm(
                        grad_norm, grad_clip_thresh)
            else:
                grad_norm = torch.nn.utils.clip_grad_norm_(
                    model.parameters(), grad_clip_thresh)
                clipped_grad_norm = get_clip_grad_norm(grad_norm,
                                                       grad_clip_thresh)

            optimizer.step()
            iter_duration = time.perf_counter() - iter_start

            print("{}:\t{:.9f}".format(iteration, reduced_loss))
            if with_tensorboard and rank == 0:
                logger.add_scalar('training_loss', reduced_loss,
                                  i + len(train_loader) * epoch)

            if with_wandb and rank == 0:
                wandb.log(
                    {
                        'iteration': iteration,
                        'epoch': float_epoch,
                        'iter_duration': iter_duration,
                        'training_loss': reduced_loss,
                        'training_loss/z_L2_normalized': z_L2_normalized,
                        'training_loss/neg_log_s_total': neg_log_s_total,
                        'training_loss/neg_log_det_W_total':
                        neg_log_det_W_total,
                    },
                    step=iteration)
                if not is_overflow:
                    wandb.log(
                        {
                            'grad_norm': grad_norm,
                            'clipped_grad_norm': clipped_grad_norm,
                        },
                        step=iteration)

            if (iteration % iters_per_checkpoint == 0):
                if rank == 0:
                    checkpoint_path = "{}/{}/{}/waveglow_{}".format(
                        output_directory, prj_name, run_name, iteration)
                    save_checkpoint(model, optimizer, learning_rate, iteration,
                                    checkpoint_path)

            iteration += 1
Exemplo n.º 22
0
def train(num_gpus, rank, group_name, output_directory, epochs, learning_rate,
          iters_per_checkpoint, iters_per_eval, batch_size, seed, checkpoint_path, log_dir, ema_decay=0.9999):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    #=====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        init_distributed(rank, num_gpus, group_name, **dist_config)
    #=====END:   ADDED FOR DISTRIBUTED======

    if train_data_config["no_chunks"]:
        criterion = MaskedCrossEntropyLoss()
    else:
        criterion = CrossEntropyLoss()
    model = WaveNet(**wavenet_config).cuda()
    ema = ExponentialMovingAverage(ema_decay)
    for name, param in model.named_parameters():
        if param.requires_grad:
            ema.register(name, param.data)

    #=====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        model = apply_gradient_allreduce(model)
    #=====END:   ADDED FOR DISTRIBUTED======

    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    scheduler = StepLR(optimizer, step_size=200000, gamma=0.5)

    # Load checkpoint if one exists
    iteration = 0
    if checkpoint_path != "":
        model, optimizer, scheduler, iteration, ema = load_checkpoint(checkpoint_path, model,
                                                                      optimizer, scheduler, ema)
        iteration += 1  # next iteration is iteration + 1

    trainset = Mel2SampOnehot(audio_config=audio_config, verbose=True, **train_data_config)
    validset = Mel2SampOnehot(audio_config=audio_config, verbose=False, **valid_data_config)
    # =====START: ADDED FOR DISTRIBUTED======
    train_sampler = DistributedSampler(trainset) if num_gpus > 1 else None
    valid_sampler = DistributedSampler(validset) if num_gpus > 1 else None
    # =====END:   ADDED FOR DISTRIBUTED======
    print(train_data_config)
    if train_data_config["no_chunks"]:
        collate_fn = utils.collate_fn
    else:
        collate_fn = torch.utils.data.dataloader.default_collate
    train_loader = DataLoader(trainset, num_workers=1, shuffle=False,
                              collate_fn=collate_fn,
                              sampler=train_sampler,
                              batch_size=batch_size,
                              pin_memory=True,
                              drop_last=True)
    valid_loader = DataLoader(validset, num_workers=1, shuffle=False,
                              sampler=valid_sampler, batch_size=1, pin_memory=True)
    # Get shared output_directory ready
    if rank == 0:
        if not os.path.isdir(output_directory):
            os.makedirs(output_directory)
            os.chmod(output_directory, 0o775)
        print("output directory", output_directory)
    
    model.train()
    epoch_offset = max(0, int(iteration / len(train_loader)))
    writer = SummaryWriter(log_dir)
    print("Checkpoints writing to: {}".format(log_dir))
    # ================ MAIN TRAINNIG LOOP! ===================
    for epoch in range(epoch_offset, epochs):
        print("Epoch: {}".format(epoch))
        for i, batch in enumerate(train_loader):
            if low_memory:
                torch.cuda.empty_cache()
            scheduler.step()
            model.zero_grad()

            if train_data_config["no_chunks"]:
                x, y, seq_lens = batch
                seq_lens = to_gpu(seq_lens)
            else:
                x, y = batch
            x = to_gpu(x).float()
            y = to_gpu(y)
            x = (x, y)  # auto-regressive takes outputs as inputs
            y_pred = model(x)
            if train_data_config["no_chunks"]:
                loss = criterion(y_pred, y, seq_lens)
            else:
                loss = criterion(y_pred, y)
            if num_gpus > 1:
                reduced_loss = reduce_tensor(loss.data, num_gpus)[0]
            else:
                reduced_loss = loss.data[0]
            loss.backward()
            optimizer.step()

            for name, param in model.named_parameters():
                if name in ema.shadow:
                    ema.update(name, param.data)

            print("{}:\t{:.9f}".format(iteration, reduced_loss))
            if rank == 0:
                writer.add_scalar('loss', reduced_loss, iteration)
            if (iteration % iters_per_checkpoint == 0 and iteration):
                if rank == 0:
                    checkpoint_path = "{}/wavenet_{}".format(
                        output_directory, iteration)
                    save_checkpoint(model, optimizer, scheduler, learning_rate, iteration,
                                    checkpoint_path, ema, wavenet_config)
            if (iteration % iters_per_eval == 0 and iteration > 0 and not config["no_validation"]):
                if low_memory:
                    torch.cuda.empty_cache()
                if rank == 0:
                    model_eval = nv_wavenet.NVWaveNet(**(model.export_weights()))
                    for j, valid_batch in enumerate(valid_loader):
                        mel, audio = valid_batch
                        mel = to_gpu(mel).float()
                        cond_input = model.get_cond_input(mel)
                        predicted_audio = model_eval.infer(cond_input, nv_wavenet.Impl.AUTO)
                        predicted_audio = utils.mu_law_decode_numpy(predicted_audio[0, :].cpu().numpy(), 256)
                        writer.add_audio("valid/predicted_audio_{}".format(j),
                                         predicted_audio,
                                         iteration,
                                         22050)
                        audio = utils.mu_law_decode_numpy(audio[0, :].cpu().numpy(), 256)
                        writer.add_audio("valid_true/audio_{}".format(j),
                                         audio,
                                         iteration,
                                         22050)
                        if low_memory:
                            torch.cuda.empty_cache()
            iteration += 1
Exemplo n.º 23
0
def train(num_gpus, rank, group_name, output_directory, log_directory,
          checkpoint_path):
    # Get device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    torch.manual_seed(hp.seed)
    torch.cuda.manual_seed(hp.seed)

    #=====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        init_distributed(rank, num_gpus, group_name, **dist_config)
    #=====END:   ADDED FOR DISTRIBUTED======

    criterion = WaveGlowLoss(hp.sigma)
    model = WaveGlow().cuda()

    #=====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        model = apply_gradient_allreduce(model)
    #=====END:   ADDED FOR DISTRIBUTED======

    learning_rate = hp.learning_rate
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    if hp.fp16_run:
        from apex import amp
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1')

    # Load checkpoint if one exists
    iteration = 0
    if checkpoint_path:
        model, optimizer, iteration = load_checkpoint(checkpoint_path, model,
                                                      optimizer)
        iteration += 1  # next iteration is iteration + 1

    # Get dataset
    dataset = FastSpeechDataset()

    # Get training loader
    print("Get Training Loader")
    training_loader = DataLoader(dataset,
                                 batch_size=hp.batch_size,
                                 shuffle=True,
                                 collate_fn=collate_fn,
                                 drop_last=True,
                                 num_workers=cpu_count())

    if rank == 0:
        if not os.path.isdir(output_directory):
            os.makedirs(output_directory)
            os.chmod(output_directory, 0o775)
        print("output directory", output_directory)

    if hp.with_tensorboard and rank == 0:
        logger = prepare_directories_and_logger(output_directory,
                                                log_directory)

    model = model.train()
    epoch_offset = max(0, int(iteration / len(training_loader)))
    beta = hp.batch_size
    print("Total Epochs: {}".format(hp.epochs))
    print("Batch Size: {}".format(hp.batch_size))

    # ================ MAIN TRAINNIG LOOP! ===================
    for epoch in range(epoch_offset, hp.epochs):
        print("Epoch: {}".format(epoch))
        for i, data_of_batch in enumerate(training_loader):
            model.zero_grad()

            if not hp.pre_target:
                # Prepare Data
                src_seq = data_of_batch["texts"]
                src_pos = data_of_batch["pos"]
                mel_tgt = data_of_batch["mels"]

                src_seq = torch.from_numpy(src_seq).long().to(device)
                src_pos = torch.from_numpy(src_pos).long().to(device)
                mel_tgt = torch.from_numpy(mel_tgt).float().to(device)
                alignment_target = get_alignment(src_seq,
                                                 tacotron2).float().to(device)
                # For Data Parallel
                mel_max_len = mel_tgt.size(1)
            else:
                # Prepare Data
                src_seq = data_of_batch["texts"]
                src_pos = data_of_batch["pos"]
                mel_tgt = data_of_batch["mels"]
                alignment_target = data_of_batch["alignment"]

                src_seq = torch.from_numpy(src_seq).long().to(device)
                src_pos = torch.from_numpy(src_pos).long().to(device)
                mel_tgt = torch.from_numpy(mel_tgt).float().to(device)
                alignment_target = torch.from_numpy(
                    alignment_target).float().to(device)
                # For Data Parallel
                mel_max_len = mel_tgt.size(1)

            outputs = model(src_seq, src_pos, mel_tgt, mel_max_len,
                            alignment_target)
            _, _, _, duration_predictor = outputs
            mel_tgt = mel_tgt.transpose(1, 2)
            max_like, dur_loss = criterion(outputs, alignment_target, mel_tgt)
            if beta > 1 and iteration % 10000 == 0:
                beta = beta // 2
            loss = max_like + dur_loss

            if num_gpus > 1:
                reduced_loss = reduce_tensor(loss.data, num_gpus).item()
            else:
                reduced_loss = loss.item()

            if hp.fp16_run:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            #grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), hp.grad_clip_thresh)

            optimizer.step()

            print("{}:\t{:.9f}".format(iteration, reduced_loss))
            if hp.with_tensorboard and rank == 0:
                logger.log_training(reduced_loss, dur_loss, learning_rate,
                                    iteration)

            if (iteration % hp.save_step == 0):
                if rank == 0:
                    # logger.log_alignment(model, mel_predict, mel_tgt, iteration)
                    checkpoint_path = "{}/TTSglow_{}".format(
                        output_directory, iteration)
                    save_checkpoint(model, optimizer, learning_rate, iteration,
                                    checkpoint_path)

            iteration += 1
Exemplo n.º 24
0
def compute_validation_loss(model, criterion, valset, batch_size, n_gpus,
                            apply_ctc):
    model.eval()
    with torch.no_grad():
        collate_fn = DataCollate(n_frames_per_step=1,
                                 use_attn_prior=valset.use_attn_prior)
        val_sampler = DistributedSampler(valset) if n_gpus > 1 else None
        val_loader = DataLoader(valset,
                                sampler=val_sampler,
                                num_workers=1,
                                shuffle=False,
                                batch_size=batch_size,
                                pin_memory=False,
                                collate_fn=collate_fn)

        val_loss, val_loss_nll, val_loss_gate = 0.0, 0.0, 0.0
        val_loss_ctc = 0.0
        n_batches = len(val_loader)
        for i, batch in enumerate(val_loader):
            (mel, spk_ids, txt, in_lens, out_lens, gate_target,
             attn_prior) = batch
            mel, spk_ids, txt = mel.cuda(), spk_ids.cuda(), txt.cuda()
            in_lens, out_lens = in_lens.cuda(), out_lens.cuda()
            gate_target = gate_target.cuda()
            attn_prior = attn_prior.cuda() if attn_prior is not None else None
            (z, log_s_list, gate_pred, attn, attn_logprob, mean, log_var,
             prob) = model(mel, spk_ids, txt, in_lens, out_lens, attn_prior)

            loss_nll, loss_gate, loss_ctc = criterion(
                (z, log_s_list, gate_pred, attn, attn_logprob, mean, log_var,
                 prob),
                gate_target,
                in_lens,
                out_lens,
                is_validation=True)
            loss = loss_nll + loss_gate

            if apply_ctc:
                loss += loss_ctc * criterion.ctc_loss_weight

            if n_gpus > 1:
                reduced_val_loss = reduce_tensor(loss.data, n_gpus).item()
                reduced_val_loss_nll = reduce_tensor(loss_nll.data,
                                                     n_gpus).item()
                reduced_val_loss_gate = reduce_tensor(loss_gate.data,
                                                      n_gpus).item()
                reduced_val_loss_ctc = reduce_tensor(loss_ctc.data,
                                                     n_gpus).item()
            else:
                reduced_val_loss = loss.item()
                reduced_val_loss_nll = loss_nll.item()
                reduced_val_loss_gate = loss_gate.item()
                reduced_val_loss_ctc = loss_ctc.item()
            val_loss += reduced_val_loss
            val_loss_nll += reduced_val_loss_nll
            val_loss_gate += reduced_val_loss_gate
            val_loss_ctc += reduced_val_loss_ctc

        val_loss = val_loss / n_batches
        val_loss_nll = val_loss_nll / n_batches
        val_loss_gate = val_loss_gate / n_batches
        val_loss_ctc = val_loss_ctc / n_batches

    print("Mean {}\nLogVar {}\nProb {}".format(mean, log_var, prob))
    model.train()
    return (val_loss, val_loss_nll, val_loss_gate, val_loss_ctc, attn,
            gate_pred, gate_target)
Exemplo n.º 25
0
def train(n_gpus,
          rank,
          output_directory,
          epochs,
          optim_algo,
          learning_rate,
          weight_decay,
          sigma,
          iters_per_checkpoint,
          batch_size,
          seed,
          checkpoint_path,
          ignore_layers,
          include_layers,
          finetune_layers,
          warmstart_checkpoint_path,
          with_tensorboard,
          grad_clip_val,
          fp16_run,
          tensorboard_path=None):
    fp16_run = bool(fp16_run)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    if n_gpus > 1:
        init_distributed(rank, n_gpus, **dist_config)

    criterion = FlowtronLoss(sigma, bool(model_config['n_components']),
                             bool(model_config['use_gate_layer']))
    model = Flowtron(**model_config).cuda()

    if len(finetune_layers):
        for name, param in model.named_parameters():
            if name in finetune_layers:
                param.requires_grad = True
            else:
                param.requires_grad = False

    print("Initializing %s optimizer" % (optim_algo))
    if optim_algo == 'Adam':
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=learning_rate,
                                     weight_decay=weight_decay)
    elif optim_algo == 'RAdam':
        optimizer = RAdam(model.parameters(),
                          lr=learning_rate,
                          weight_decay=weight_decay)
    else:
        print("Unrecognized optimizer %s!" % (optim_algo))
        exit(1)

    # Load checkpoint if one exists
    iteration = 0
    if warmstart_checkpoint_path != "":
        model = warmstart(warmstart_checkpoint_path, model)

    if checkpoint_path != "":
        model, optimizer, iteration = load_checkpoint(checkpoint_path, model,
                                                      optimizer, ignore_layers)
        iteration += 1  # next iteration is iteration + 1

    if n_gpus > 1:
        model = apply_gradient_allreduce(model)
    print(model)
    scaler = amp.GradScaler(enabled=fp16_run)

    train_loader, valset, collate_fn = prepare_dataloaders(
        data_config, n_gpus, batch_size)

    # Get shared output_directory ready
    if rank == 0 and not os.path.isdir(output_directory):
        os.makedirs(output_directory)
        os.chmod(output_directory, 0o775)
        print("Output directory", output_directory)

    if with_tensorboard and rank == 0:
        tboard_out_path = tensorboard_path
        if tensorboard_path is None:
            tboard_out_path = os.path.join(output_directory, "logs/run1")
        print("Setting up Tensorboard log in %s" % (tboard_out_path))
        logger = FlowtronLogger(tboard_out_path)

    # force set the learning rate to what is specified
    for param_group in optimizer.param_groups:
        param_group['lr'] = learning_rate

    model.train()
    epoch_offset = max(0, int(iteration / len(train_loader)))

    # ================ MAIN TRAINNIG LOOP! ===================
    for epoch in range(epoch_offset, epochs):
        print("Epoch: {}".format(epoch))
        for batch in train_loader:
            model.zero_grad()

            mel, speaker_vecs, text, in_lens, out_lens, gate_target, attn_prior = batch
            mel, speaker_vecs, text = mel.cuda(), speaker_vecs.cuda(
            ), text.cuda()
            in_lens, out_lens, gate_target = in_lens.cuda(), out_lens.cuda(
            ), gate_target.cuda()
            attn_prior = attn_prior.cuda() if valset.use_attn_prior else None
            with amp.autocast(enabled=fp16_run):
                z, log_s_list, gate_pred, attn, mean, log_var, prob = model(
                    mel, speaker_vecs, text, in_lens, out_lens, attn_prior)

                loss_nll, loss_gate = criterion(
                    (z, log_s_list, gate_pred, mean, log_var, prob),
                    gate_target, out_lens)
                loss = loss_nll + loss_gate

            if n_gpus > 1:
                reduced_loss = reduce_tensor(loss.data, n_gpus).item()
                reduced_gate_loss = reduce_tensor(loss_gate.data,
                                                  n_gpus).item()
                reduced_nll_loss = reduce_tensor(loss_nll.data, n_gpus).item()
            else:
                reduced_loss = loss.item()
                reduced_gate_loss = loss_gate.item()
                reduced_nll_loss = loss_nll.item()

            scaler.scale(loss).backward()
            if grad_clip_val > 0:
                scaler.unscale_(optimizer)
                torch.nn.utils.clip_grad_norm_(model.parameters(),
                                               grad_clip_val)

            scaler.step(optimizer)
            scaler.update()

            if rank == 0:
                print("{}:\t{:.9f}".format(iteration, reduced_loss),
                      flush=True)

            if with_tensorboard and rank == 0:
                logger.add_scalar('training_loss', reduced_loss, iteration)
                logger.add_scalar('training_loss_gate', reduced_gate_loss,
                                  iteration)
                logger.add_scalar('training_loss_nll', reduced_nll_loss,
                                  iteration)
                logger.add_scalar('learning_rate', learning_rate, iteration)

            if iteration % iters_per_checkpoint == 0:
                val_loss, val_loss_nll, val_loss_gate, attns, gate_pred, gate_target = compute_validation_loss(
                    model, criterion, valset, collate_fn, batch_size, n_gpus)
                if rank == 0:
                    print("Validation loss {}: {:9f}  ".format(
                        iteration, val_loss))
                    if with_tensorboard:
                        logger.log_validation(val_loss, val_loss_nll,
                                              val_loss_gate, attns, gate_pred,
                                              gate_target, iteration)

                    checkpoint_path = "{}/model_{}".format(
                        output_directory, iteration)
                    save_checkpoint(model, optimizer, learning_rate, iteration,
                                    checkpoint_path)

            iteration += 1
Exemplo n.º 26
0
def train(n_gpus, rank, output_directory, epochs, learning_rate, weight_decay,
          sigma, iters_per_checkpoint, batch_size, seed, checkpoint_path,
          ignore_layers, include_layers, warmstart_checkpoint_path,
          with_tensorboard, fp16_run):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    if n_gpus > 1:
        init_distributed(rank, n_gpus, **dist_config)

    criterion = FlowtronLoss(sigma, bool(model_config['n_components']),
                             model_config['use_gate_layer'])
    model = Flowtron(**model_config).cuda()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=learning_rate,
                                 weight_decay=weight_decay)

    # Load checkpoint if one exists
    iteration = 0
    if warmstart_checkpoint_path != "":
        model = warmstart(warmstart_checkpoint_path, model)

    if checkpoint_path != "":
        model, optimizer, iteration = load_checkpoint(checkpoint_path, model,
                                                      optimizer, ignore_layers)
        iteration += 1  # next iteration is iteration + 1

    if n_gpus > 1:
        model = apply_gradient_allreduce(model)
    print(model)
    if fp16_run:
        from apex import amp
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1')

    train_loader, valset, collate_fn = prepare_dataloaders(
        data_config, n_gpus, batch_size)

    # Get shared output_directory ready
    if rank == 0 and not os.path.isdir(output_directory):
        os.makedirs(output_directory)
        os.chmod(output_directory, 0o775)
    print("output directory", output_directory)

    if with_tensorboard and rank == 0:
        logger = FlowtronLogger(os.path.join(output_directory, 'logs'))

    model.train()
    epoch_offset = max(0, int(iteration / len(train_loader)))
    # ================ MAIN TRAINNIG LOOP! ===================
    for epoch in range(epoch_offset, epochs):
        print("Epoch: {}".format(epoch))
        for batch in train_loader:
            model.zero_grad()

            mel, speaker_vecs, text, in_lens, out_lens, gate_target = batch
            mel, speaker_vecs, text = mel.cuda(), speaker_vecs.cuda(
            ), text.cuda()
            in_lens, out_lens, gate_target = in_lens.cuda(), out_lens.cuda(
            ), gate_target.cuda()

            z, log_s_list, gate_pred, attn, mean, log_var, prob = model(
                mel, speaker_vecs, text, in_lens, out_lens)
            loss = criterion((z, log_s_list, gate_pred, mean, log_var, prob),
                             gate_target, out_lens)

            if n_gpus > 1:
                reduced_loss = reduce_tensor(loss.data, n_gpus).item()
            else:
                reduced_loss = loss.item()

            if fp16_run:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()
            optimizer.step()

            if rank == 0:
                print("{}:\t{:.9f}".format(iteration, reduced_loss),
                      flush=True)

            if with_tensorboard and rank == 0:
                logger.add_scalar('training_loss', reduced_loss, iteration)
                logger.add_scalar('learning_rate', learning_rate, iteration)

            if (iteration % iters_per_checkpoint == 0):
                val_loss, attns, gate_pred, gate_target = compute_validation_loss(
                    model, criterion, valset, collate_fn, batch_size, n_gpus)
                if rank == 0:
                    print("Validation loss {}: {:9f}  ".format(
                        iteration, val_loss))
                    if with_tensorboard:
                        logger.log_validation(val_loss, attns, gate_pred,
                                              gate_target, iteration)

                    checkpoint_path = "{}/model_{}".format(
                        output_directory, iteration)
                    save_checkpoint(model, optimizer, learning_rate, iteration,
                                    checkpoint_path)

            iteration += 1
Exemplo n.º 27
0
def train(n_gpus, rank, group_name):
    if n_gpus > 1:
        if rank == 0: print('Synchronizing distributed flow...')
        init_distributed(rank, n_gpus, group_name, config['dist_config'])

    torch.manual_seed(config['seed'])
    torch.cuda.manual_seed(config['seed'])

    if rank == 0: print('Initializing model, optimizer and loss...')
    model = Tacotron2(config).cuda()
    criterion = Tacotron2Loss()
    learning_rate = config['learning_rate']
    optimizer = torch.optim.Adam(params=model.parameters(),
                                 lr=learning_rate,
                                 weight_decay=config['weight_decay'])
    if config['fp16_run']:
        if rank == 0: print('Using FP16...')
        from apex import amp
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1')

    if rank == 0: print('Preparing dirs, data loaders and logger...')
    logger = prepare_directories_and_logger(config['output_directory'],
                                            config['log_directory'], rank)
    train_loader, valset, collate_fn = prepare_dataloaders(
        config['training_files'], config['validation_files'],
        config['n_frames_per_step'], n_gpus)

    iteration = 0
    epoch_offset = 0
    if not config['warm_up_checkpoint'] is None:
        if rank == 0:
            print('Loading checkpoint from {}...'.format(
                config['warm_up_checkpoint']))

        model = load_checkpoint(config['warm_up_checkpoint'], model, optimizer)

        iteration += 1  # next iteration is iteration + 1
        epoch_offset = max(0, int(iteration / len(train_loader)))

    model.compress_factorize(config=config['compress_config'])
    model.train()

    # Main training loop
    for epoch in range(epoch_offset, config['epochs']):
        print("Epoch: {}".format(epoch))
        for _, batch in enumerate(train_loader):
            start = time.perf_counter()
            x, y = model.parse_batch(batch)
            y_pred = model(x)

            loss = criterion(y_pred, y)
            if n_gpus > 1:
                reduced_loss = reduce_tensor(loss.data, n_gpus).item()
            else:
                reduced_loss = loss.item()
            if config['fp16_run']:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            if iteration % config['iters_per_grad_acc'] == 0:
                grad_norm = torch.nn.utils.clip_grad_norm_(
                    model.parameters(), config['grad_clip_thresh'])

                optimizer.step()
                model.zero_grad()

                if rank == 0:
                    duration = time.perf_counter() - start
                    print("Train loss {} {:.6f} Grad Norm {:.6f} {:.2f}s/it".
                          format(iteration, reduced_loss, grad_norm, duration))
                    logger.log_training(reduced_loss, grad_norm, learning_rate,
                                        duration, iteration)

            if iteration % config['iters_per_validation'] == 0:
                validate(model, criterion, valset, iteration,
                         config['batch_size'], n_gpus, collate_fn, logger,
                         rank)

            if iteration % config['iters_per_checkpoint'] == 0:
                if rank == 0:
                    checkpoint_path = os.path.join(
                        config['output_directory'],
                        "checkpoint_{}".format(iteration))
                    save_checkpoint(model, optimizer, iteration,
                                    checkpoint_path)

            iteration += 1
Exemplo n.º 28
0
def train(num_gpus, rank, group_name, output_directory, log_directory,
          checkpoint_path, hparams):
    torch.manual_seed(hparams.seed)
    torch.cuda.manual_seed(hparams.seed)

    #=====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        init_distributed(rank, num_gpus, group_name, **dist_config)
    #=====END:   ADDED FOR DISTRIBUTED======

    criterion = WaveGlowLoss(hparams.sigma)
    model = WaveGlow(hparams).cuda()

    Taco2 = load_pretrained_taco('tacotron2.pt', hparams)

    #=====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        model = apply_gradient_allreduce(model)
    #=====END:   ADDED FOR DISTRIBUTED======

    learning_rate = hparams.learning_rate
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    if hparams.fp16_run:
        from apex import amp
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1')

    # Load checkpoint if one exists
    iteration = 0
    if checkpoint_path:
        model, optimizer, iteration = load_checkpoint(checkpoint_path, model,
                                                      optimizer)
        iteration += 1  # next iteration is iteration + 1

    trainset = TextMelLoader(hparams.training_files, hparams)
    collate_fn = TextMelCollate()
    # =====START: ADDED FOR DISTRIBUTED======
    train_sampler = DistributedSampler(trainset) if num_gpus > 1 else None
    # =====END:   ADDED FOR DISTRIBUTED======
    batch_size = hparams.batch_size
    train_loader = DataLoader(trainset,
                              num_workers=0,
                              shuffle=False,
                              sampler=train_sampler,
                              batch_size=batch_size,
                              pin_memory=False,
                              drop_last=True,
                              collate_fn=collate_fn)

    # Get shared output_directory readya

    if rank == 0:
        if not os.path.isdir(output_directory):
            os.makedirs(output_directory)
            os.chmod(output_directory, 0o775)
        print("output directory", output_directory)

    if hparams.with_tensorboard and rank == 0:
        logger = prepare_directories_and_logger(output_directory,
                                                log_directory)

    model.train()
    epoch_offset = max(0, int(iteration / len(train_loader)))
    print("Total Epochs: {}".format(hparams.epochs))
    print("Batch Size: {}".format(hparams.batch_size))
    print("learning rate: {}".format(hparams.learning_rate))
    # ================ MAIN TRAINNIG LOOP! ===================
    for epoch in range(epoch_offset, hparams.epochs):
        print("Epoch: {}".format(epoch))
        for i, batch in enumerate(train_loader):
            model.zero_grad()

            text_padded, input_lengths, mel_padded, max_len, output_lengths = parse_batch(
                batch)
            with torch.no_grad():
                enc_outputs, alignments = Taco2(
                    (text_padded, input_lengths, mel_padded, max_len,
                     output_lengths))

            # mel_padded = mel_padded.transpose(1, 2)
            # mel_padded = mel_padded / torch.abs(mel_padded).max().item()
            mel_pos = torch.arange(1000)
            mel_pos = to_gpu(mel_pos).long().unsqueeze(0)
            mel_pos = mel_pos.expand(hparams.batch_size, -1)
            src_pos = torch.arange(hparams.n_position)
            src_pos = to_gpu(src_pos).long().unsqueeze(0)
            src_pos = src_pos.expand(hparams.batch_size, -1)

            mel_padded = (mel_padded + 5) / 10

            z, log_s_list, log_det_w_list, dec_enc_attn = model(
                mel_padded, enc_outputs, mel_pos, src_pos, input_lengths)
            outputs = (z, log_s_list, log_det_w_list, dec_enc_attn)
            loss = criterion(outputs, alignments)
            if num_gpus > 1:
                reduced_loss = reduce_tensor(loss.data, num_gpus).item()
            else:
                reduced_loss = loss.item()

            if hparams.fp16_run:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            grad_norm = torch.nn.utils.clip_grad_norm_(
                model.parameters(), hparams.grad_clip_thresh)
            optimizer.step()

            print("{}:\t{:.9f}".format(iteration, reduced_loss))
            if hparams.with_tensorboard and rank == 0:
                logger.log_training(reduced_loss, grad_norm, learning_rate,
                                    iteration)

            if (iteration % hparams.iters_per_checkpoint == 0):
                if rank == 0:
                    mel_predict, test_attn = model.test(
                        mel_padded, enc_outputs, mel_pos, src_pos,
                        input_lengths)
                    logger.log_alignment(model, dec_enc_attn, alignments,
                                         mel_padded, mel_predict, test_attn,
                                         iteration)
                    checkpoint_path = "{}/waveglow_{}".format(
                        output_directory, iteration)
                    save_checkpoint(model, optimizer, learning_rate, iteration,
                                    checkpoint_path)

            iteration += 1
Exemplo n.º 29
0
def train(num_gpus, rank, group_name, output_directory, epochs, learning_rate,
          sigma, iters_per_checkpoint, batch_size, seed, fp16_run,
          checkpoint_path, with_tensorboard):
    torch.manual_seed(seed)
    #=====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        init_distributed(rank, num_gpus, group_name, **dist_config)
    #=====END:   ADDED FOR DISTRIBUTED======

    criterion = WaveGlowLoss(sigma)
    model = WaveGlow(**waveglow_config).cpu()

    #=====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        model = apply_gradient_allreduce(model)
    #=====END:   ADDED FOR DISTRIBUTED======

    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    if fp16_run:
        from apex import amp
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1')

    # Load checkpoint if one exists
    iteration = 0
    if checkpoint_path != "":
        model, optimizer, iteration = load_checkpoint(checkpoint_path, model,
                                                      optimizer)
        iteration += 1  # next iteration is iteration + 1

    trainset = Mel2Samp(**data_config)
    # =====START: ADDED FOR DISTRIBUTED======
    train_sampler = DistributedSampler(trainset) if num_gpus > 1 else None
    # =====END:   ADDED FOR DISTRIBUTED======
    train_loader = DataLoader(trainset, num_workers=1, shuffle=False,
                              sampler=train_sampler,
                              batch_size=batch_size,
                              pin_memory=False,
                              drop_last=True)

    # Get shared output_directory ready
    if rank == 0:
        if not os.path.isdir(output_directory):
            os.makedirs(output_directory)
            os.chmod(output_directory, 0o775)
        print("output directory", output_directory)

    if with_tensorboard and rank == 0:
        from tensorboardX import SummaryWriter
        logger = SummaryWriter(os.path.join(output_directory, 'logs'))

    model.train()
    epoch_offset = max(0, int(iteration / len(train_loader)))
    # ================ MAIN TRAINNIG LOOP! ===================
    for epoch in range(epoch_offset, epochs):
        print("Epoch: {}".format(epoch))
        for i, batch in enumerate(train_loader):
            model.zero_grad()

            mel, audio = batch
            mel = torch.autograd.Variable(mel.cpu())
            audio = torch.autograd.Variable(audio.cpu())
            outputs = model((mel, audio))

            loss = criterion(outputs)
            if num_gpus > 1:
                reduced_loss = reduce_tensor(loss.data, num_gpus).item()
            else:
                reduced_loss = loss.item()

            if fp16_run:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            optimizer.step()

            print("{}:\t{:.9f}".format(iteration, reduced_loss))
            if with_tensorboard and rank == 0:
                logger.add_scalar('training_loss', reduced_loss, i + len(train_loader) * epoch)

            if (iteration % iters_per_checkpoint == 0):
                if rank == 0:
                    checkpoint_path = "{}/waveglow_{}".format(
                        output_directory, iteration)
                    save_checkpoint(model, optimizer, learning_rate, iteration,
                                    checkpoint_path)

            iteration += 1
Exemplo n.º 30
0
def train(num_gpus, rank, group_name, output_directory, epochs, learning_rate,
          sigma, batch_size, seed, checkpoint_path, hparams):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # =====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        init_distributed(rank, num_gpus, group_name, **dist_config)
    # =====END:   ADDED FOR DISTRIBUTED======

    criterion = WaveGlowLoss(sigma)
    if num_gpus >= 1:
        model = WaveGlow(**waveglow_config, hparams=hparams).cuda()
    else:
        model = WaveGlow(**waveglow_config, hparams=hparams)

    # =====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        model = apply_gradient_allreduce(model)
    # =====END:   ADDED FOR DISTRIBUTED======

    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    # Load checkpoint if one exists
    iteration, eval_iteration = 0, 0

    if checkpoint_path != "":
        model, optimizer, iteration, eval_iteration = load_checkpoint(checkpoint_path, model, optimizer)
        iteration += 1  # next iteration is iteration + 1
        eval_iteration += 1
    # trainset = Mel2Samp(**data_config)

    trainset = TextMelLoader(
        audiopaths_and_text='./filelists/ljs_audio_text_train_filelist.txt', hparams=hparams)
    testset = TextMelLoader(
        audiopaths_and_text='./filelists/ljs_audio_text_test_filelist.txt', hparams=hparams)


    collate_fn = TextMelCollate(hparams, fixed_length=True)
    # =====START: ADDED FOR DISTRIBUTED======
    train_sampler = DistributedSampler(trainset) if num_gpus > 1 else None
    # =====END:   ADDED FOR DISTRIBUTED======
    train_loader = DataLoader(trainset, num_workers=1,
                              collate_fn=collate_fn,
                              shuffle=False,
                              sampler=train_sampler,
                              batch_size=batch_size,
                              pin_memory=False,
                              drop_last=True)
    test_loader = DataLoader(testset, num_workers=1,
                             collate_fn=collate_fn,
                             shuffle=False,
                             sampler=train_sampler,
                             batch_size=batch_size,
                             pin_memory=False,
                             drop_last=True)
    log_path = os.path.join(output_directory, 'log-event')
    os.makedirs(log_path, exist_ok=True)
    logger = WaveGlowLogger(log_path)
    # Get shared output_directory ready
    if rank == 0:
        if not os.path.isdir(output_directory):
            os.makedirs(output_directory)
            os.chmod(output_directory, 0o775)
        print("output directory", output_directory)

    model.train()
    tacotron2 = Tacotron2(hparams)
    batch_parser = tacotron2.parse_batch
    # we use tacotron-2's pipeline
    epoch_offset = max(0, int(iteration / len(train_loader)))
    # ================ MAIN TRAINNIG LOOP! ===================
    for epoch in range(epoch_offset, epochs):
        print("Epoch: {}".format(epoch))
        model.train()
        for i, batch in enumerate(train_loader):
            model.zero_grad()
            x, y = batch_parser(batch)
            text_padded, input_lengths, mel_padded, max_len, output_lengths = x
            # print(text_padded.size(), mel_padded.size())
            mel_padded, gate_padded = y
            outputs = model((text_padded, mel_padded))

            loss = criterion(outputs)
            logger.log_loss('train/loss', loss, iteration)
            if num_gpus > 1:
                reduced_loss = reduce_tensor(loss.data, num_gpus).item()
            else:
                reduced_loss = loss.item()
            loss.backward()
            optimizer.step()

            print("{}:\t{:.9f}".format(iteration, reduced_loss))
            iteration += 1

        # model.eval()
        # for i, batch in enumerate(test_loader):
        #     x, y = batch_parser(batch)
        #     text_padded, input_lengths, mel_padded, max_len, output_lengths = x
        #     mel_padded, gate_padded = y
        #     outputs = model((text_padded, mel_padded))
        #     loss = criterion(outputs)
        #     logger.log_loss('eval/loss', loss, iteration)
        #     eval_iteration += 1

        if rank == 0:
            checkpoint_path = "{}/waveglow_epoch_{}".format(output_directory, epoch)
            save_checkpoint(model, optimizer, learning_rate, iteration, eval_iteration, checkpoint_path,
                            hparams=hparams)