示例#1
0
def tts_train_loop(paths: Paths, model: Tacotron, optimizer, train_set, lr,
                   train_steps, attn_example):
    device = next(
        model.parameters()).device  # use same device as model parameters

    for g in optimizer.param_groups:
        g['lr'] = lr

    total_iters = len(train_set)
    epochs = train_steps // total_iters + 1

    for e in range(1, epochs + 1):

        start = time.time()
        running_loss = 0

        # Perform 1 epoch
        for i, (x, m, ids, _) in enumerate(train_set, 1):

            x, m = x.to(device), m.to(device)

            # Parallelize model onto GPUS using workaround due to python bug
            if device.type == 'cuda' and torch.cuda.device_count() > 1:
                m1_hat, m2_hat, attention = data_parallel_workaround(
                    model, x, m)
            else:
                m1_hat, m2_hat, attention = model(x, m)

            m1_loss = F.l1_loss(m1_hat, m)
            m2_loss = F.l1_loss(m2_hat, m)

            loss = m1_loss + m2_loss

            optimizer.zero_grad()
            loss.backward()
            if hp.tts_clip_grad_norm is not None:
                grad_norm = torch.nn.utils.clip_grad_norm_(
                    model.parameters(), hp.tts_clip_grad_norm)
                if np.isnan(grad_norm):
                    print('grad_norm was NaN!')

            optimizer.step()

            running_loss += loss.item()
            avg_loss = running_loss / i

            speed = i / (time.time() - start)

            step = model.get_step()
            k = step // 1000

            if step % hp.tts_checkpoint_every == 0:
                ckpt_name = 'taco_step%sK' % (repr1(k))
                save_checkpoint('tts',
                                paths,
                                model,
                                optimizer,
                                name=ckpt_name,
                                is_silent=True)

            if attn_example in ids:
                idx = ids.index(attn_example)
                save_attention(np_now(attention[idx][:, :160]),
                               paths.tts_attention / '%s' % (repr1(step)))
                save_spectrogram(np_now(m2_hat[idx]),
                                 paths.tts_mel_plot / '%s' % (repr1(step)),
                                 600)

            msg = '| Epoch: %s/%s (%s/%s) | Loss: %.4f | %.2f steps/s | Step: %sk | ' % (
                repr1(e), repr1(epochs), repr1(i), repr1(total_iters),
                avg_loss, speed, repr1(k))
            stream(msg)

        # Must save latest optimizer state to ensure that resuming training
        # doesn't produce artifacts
        save_checkpoint('tts', paths, model, optimizer, is_silent=True)
        model.log(paths.tts_log, msg)
        print(' ')
示例#2
0
def tts_train_loop_af_offline(paths: Paths,
                              model: Tacotron,
                              optimizer,
                              train_set,
                              lr,
                              train_steps,
                              attn_example,
                              hp=None):
    # setattr(model, 'mode', 'attention_forcing')
    # import pdb

    def smooth(d, eps=float(1e-10)):
        u = 1.0 / float(d.size()[2])
        return eps * u + (1 - eps) * d

    device = next(
        model.parameters()).device  # use same device as model parameters

    for g in optimizer.param_groups:
        g['lr'] = lr

    total_iters = len(train_set)
    epochs = train_steps // total_iters + 1

    for e in range(1, epochs + 1):

        start = time.time()
        running_loss_out, running_loss_attn = 0, 0

        # Perform 1 epoch
        for i, (x, m, ids, _, attn_ref) in enumerate(train_set, 1):

            # print(x.size())
            # print(m.size())
            # print(attn_ref.size())
            # # print(m1_hat.size(), m2_hat.size())
            # # print(attention.size(), attention.size(1)*model.r)
            # pdb.set_trace()

            x, m, attn_ref = x.to(device), m.to(device), attn_ref.to(device)

            # Parallelize model onto GPUS using workaround due to python bug
            if device.type == 'cuda' and torch.cuda.device_count() > 1:
                m1_hat, m2_hat, attention = data_parallel_workaround(
                    model, x, m, False, attn_ref)
            else:
                m1_hat, m2_hat, attention = model(x,
                                                  m,
                                                  generate_gta=False,
                                                  attn_ref=attn_ref)

            m1_loss = F.l1_loss(m1_hat, m)
            m2_loss = F.l1_loss(m2_hat, m)
            # attn_loss = F.kl_div(torch.log(smooth(attention)), smooth(attn_ref), reduction='mean') # 'batchmean'
            attn_loss = F.l1_loss(smooth(attention), smooth(attn_ref))

            loss_out = m1_loss + m2_loss
            loss_attn = attn_loss * hp.attn_loss_coeff
            loss = loss_out + loss_attn

            optimizer.zero_grad()
            loss.backward()
            if hp.tts_clip_grad_norm is not None:
                grad_norm = torch.nn.utils.clip_grad_norm_(
                    model.parameters(), hp.tts_clip_grad_norm)
                if np.isnan(grad_norm):
                    print('grad_norm was NaN!')

            optimizer.step()

            running_loss_out += loss_out.item()
            avg_loss_out = running_loss_out / i
            running_loss_attn += loss_attn.item()
            avg_loss_attn = running_loss_attn / i

            speed = i / (time.time() - start)

            step = model.get_step()
            k = step // 1000

            if step % hp.tts_checkpoint_every == 0:
                ckpt_name = f'taco_step{k}K'
                save_checkpoint('tts',
                                paths,
                                model,
                                optimizer,
                                name=ckpt_name,
                                is_silent=True)

            if attn_example in ids:
                idx = ids.index(attn_example)
                save_attention(np_now(attn_ref[idx][:, :160]),
                               paths.tts_attention / f'{step}_tf')
                save_attention(np_now(attention[idx][:, :160]),
                               paths.tts_attention / f'{step}_af')
                save_spectrogram(np_now(m2_hat[idx]),
                                 paths.tts_mel_plot / f'{step}', 600)

            msg = f'| Epoch: {e}/{epochs} ({i}/{total_iters}) | Loss_out: {avg_loss_out:#.4}; Output_attn: {avg_loss_attn:#.4} | {speed:#.2} steps/s | Step: {k}k | '
            stream(msg)

        # Must save latest optimizer state to ensure that resuming training
        # doesn't produce artifacts
        save_checkpoint('tts', paths, model, optimizer, is_silent=True)
        model.log(paths.tts_log, msg)
        print(' ')
def tts_train_loop(paths: Paths, model: Tacotron, optimizer, train_set, lr,
                   train_steps, attn_example):
    device = next(
        model.parameters()).device  # use same device as model parameters

    for g in optimizer.param_groups:
        g['lr'] = lr

    total_iters = len(train_set)
    epochs = train_steps // total_iters + 1

    for e in range(1, epochs + 1):

        start = time.time()
        running_loss = 0

        # Perform 1 epoch
        for i, (x, m, ids, _, att_guides) in enumerate(train_set, 1):

            x, m = x.to(device), m.to(device)

            # Parallelize model onto GPUS using workaround due to python bug
            if device.type == 'cuda' and torch.cuda.device_count() > 1:
                m1_hat, m2_hat, attention, r = data_parallel_workaround(
                    model, x, m)
            else:
                m1_hat, m2_hat, attention, r = model(x, m)

            #print(att_guides.shape)
            orig_attention = attention
            n = int(len(att_guides[0]) / r)
            #print("n", n)
            #reduce guide by r factor
            ga = [a[t] for a in att_guides for t in range(0, len(a), r)]

            assert n == len(attention[0])
            guided_attention = [ga[k:k + n] for k in range(0, len(ga), n)]

            attention = np_now(attention)
            attention = [
                pad2d_nonzero(x, n, len(att_guides[0][0])) for x in attention
            ]

            guided_attention = torch.tensor(guided_attention)
            guided_attention = guided_attention.to(device)

            attention = torch.tensor(attention)
            attention = attention.to(device)

            #create attention mask
            attention_masks = torch.ne(attention, -1).type(torch.FloatTensor)

            attention_masks = torch.tensor(attention_masks)
            attention_masks = attention.to(device)

            multiply = torch.abs(
                attention * guided_attention) * attention_masks

            attention_loss = torch.sum(multiply)

            mask_sum = torch.sum(attention_masks)

            attention_loss /= mask_sum

            m1_loss = F.l1_loss(m1_hat, m)
            m2_loss = F.l1_loss(m2_hat, m)

            #print("mask sum", mask_sum)
            #print("attention loss", attention_loss)
            #print("m losses", m1_loss, m2_loss)
            prev_loss = m1_loss + m2_loss
            #print("prev loss", prev_loss)
            loss = m1_loss + m2_loss + attention_loss
            #print("loss + att", loss)

            optimizer.zero_grad()
            loss.backward()
            if hp.tts_clip_grad_norm is not None:
                grad_norm = torch.nn.utils.clip_grad_norm_(
                    model.parameters(), hp.tts_clip_grad_norm)
                if np.isnan(grad_norm):
                    print('grad_norm was NaN!')

            optimizer.step()

            running_loss += loss.item()
            avg_loss = running_loss / i

            speed = i / (time.time() - start)

            step = model.get_step()
            k = step // 1000

            if step % hp.tts_checkpoint_every == 0:
                ckpt_name = f'taco_step{k}K'
                save_checkpoint('tts',
                                paths,
                                model,
                                optimizer,
                                name=ckpt_name,
                                is_silent=True)

            if attn_example in ids:
                idx = ids.index(attn_example)
                save_attention(np_now(orig_attention[idx][:, :160]),
                               paths.tts_attention / f'{step}')
                save_spectrogram(np_now(m2_hat[idx]),
                                 paths.tts_mel_plot / f'{step}', 600)

            msg = f'| Epoch: {e}/{epochs} ({i}/{total_iters}) | Loss: {avg_loss:#.4} | {speed:#.2} steps/s | Step: {k}k | '
            stream(msg)

        # Must save latest optimizer state to ensure that resuming training
        # doesn't produce artifacts
        save_checkpoint('tts', paths, model, optimizer, is_silent=True)
        model.log(paths.tts_log, msg)
        print(' ')
示例#4
0
def tts_train_loop(paths: Paths, model: Tacotron, optimizer, train_set, lr,
                   train_steps, attn_example, max_y, max_x):
    device = next(
        model.parameters()).device  # use same device as model parameters

    for g in optimizer.param_groups:
        g['lr'] = lr

    total_iters = len(train_set)
    epochs = train_steps // total_iters + 1

    for e in range(1, epochs + 1):

        start = time.time()
        running_loss = 0

        # Perform 1 epoch
        for i, (x, m, ids, _, padded_att_guides) in enumerate(train_set, 1):

            x, m = x.to(device), m.to(device)

            # Parallelize model onto GPUS using workaround due to python bug
            if device.type == 'cuda' and torch.cuda.device_count() > 1:
                m1_hat, m2_hat, attention, r = data_parallel_workaround(
                    model, x, m)
            else:
                m1_hat, m2_hat, attention, r = model(x, m)

            reduced_guides = []

            att_guide_path = hp.attention_path
            for j, item_id in enumerate(ids):
                att = np.load(f'{att_guide_path}/{item_id}.npy')
                reduced = att[0::r]

                pred_attention = attention[j]
                n_frames = pred_attention.shape[0]
                n_phones = pred_attention.shape[-1]

                #  pred_attention = torch.tensor(pred_attention)
                # reduced = torch.tensor(reduced)

                padded_guides = pad2d_nonzero(reduced, n_frames, n_phones)
                #padded_guides = torch.tensor(padded_guides)
                reduced_guides.append(padded_guides)

            reduced_guides = torch.tensor(reduced_guides)
            mask = torch.ne(reduced_guides, -1).type(torch.FloatTensor)

            mask = torch.tensor(mask)
            padded_guides = [
                pad2d_zero(x, n_frames, n_phones) for x in reduced_guides
            ]
            padded_guides = torch.tensor(padded_guides)
            padded_guides = padded_guides.to(device)
            attention = attention.to(device)
            mask = mask.to(device)
            attention = attention * mask
            print("guide att shape", att.shape)
            print(att)

            print("reduced guide", padded_guides.shape)

            #   print("attention size",n_frames, n_phones)
            print("mask", mask.shape)
            print(mask)

            print(padded_guides.shape, attention.shape, mask.shape)

            print(attention)
            print(padded_guides)

            multiply = torch.pow((attention - padded_guides), 2)
            print(multiply)

            #multiply = torch.pow((pred_attention - padded_guides),2)* mask
            #print(multiply)

            attention_loss = torch.sum(multiply)
            print(attention_loss)
            mask_sum1 = torch.sum(mask)

            attention_loss /= mask_sum1
            print(attention_loss)

            #    batch_attention_losses.append(attention_loss)

            m1_loss = F.l1_loss(m1_hat, m)
            m2_loss = F.l1_loss(m2_hat, m)

            #average_att_loss = sum(batch_attention_losses)/len(batch_attention_losses)
            #print("attention loss", average_att_loss)
            #print("m losses", m1_loss, m2_loss)
            prev_loss = m1_loss + m2_loss
            print("prev loss", prev_loss)
            loss = m1_loss + m2_loss + attention_loss
            print("loss + att", loss)
            #exit()
            optimizer.zero_grad()
            loss.backward()
            if hp.tts_clip_grad_norm is not None:
                grad_norm = torch.nn.utils.clip_grad_norm_(
                    model.parameters(), hp.tts_clip_grad_norm)
                if np.isnan(grad_norm):
                    print('grad_norm was NaN!')

            optimizer.step()

            running_loss += loss.item()
            avg_loss = running_loss / i

            speed = i / (time.time() - start)

            step = model.get_step()
            k = step // 1000

            if step % hp.tts_checkpoint_every == 0:
                ckpt_name = f'taco_step{k}K'
                save_checkpoint('tts',
                                paths,
                                model,
                                optimizer,
                                name=ckpt_name,
                                is_silent=True)

            if attn_example in ids:
                idx = ids.index(attn_example)
                save_attention(np_now(attention[idx][:, :160]),
                               paths.tts_attention / f'{step}')
                save_spectrogram(np_now(m2_hat[idx]),
                                 paths.tts_mel_plot / f'{step}', 600)

            msg = f'| Epoch: {e}/{epochs} ({i}/{total_iters}) | Loss: {avg_loss:#.4} | {speed:#.2} steps/s | Step: {k}k | '
            stream(msg)

        # Must save latest optimizer state to ensure that resuming training
        # doesn't produce artifacts
        save_checkpoint('tts', paths, model, optimizer, is_silent=True)
        model.log(paths.tts_log, msg)
        print(' ')