def main():
    with torch.no_grad():
        model = DCBHG()
        model = tts_load(model=model, ckpt_path=ckpt_path_Multi)

        ppgs_list = open(ppgs_paths, 'r')
        ppgs_list = [i.strip() for i in ppgs_list]
        for idx, ppg_path_and_findA_ppg_path_and_speaker in tqdm(
                enumerate(ppgs_list)):
            ppg_path, findA_ppg_path, speaker_id = ppg_path_and_findA_ppg_path_and_speaker.split(
                '|')
            ppg = np.load(ppg_path)
            findA_ppg = np.load(findA_ppg_path)
            assert ppg.shape[1] == PPG_DIM and findA_ppg.shape[1] == PPG_DIM

            speaker_id = int(speaker_id)
            mel_pred, spec_pred, mel_pred_audio, spec_pred_audio = tts_predict(
                model, ppg, speaker_id)
            findA_mel_pred, findA_spec_pred, findA_mel_pred_audio, findA_spec_pred_audio = tts_predict(
                model, findA_ppg, speaker_id)
            # CE_fromWav, CE_seq_fromWav = consistencyError_fromWav(spec_pred_audio, ppg)
            # findA_CE_fromWav, findA_CE_seq_fromWav = consistencyError_fromWav(findA_spec_pred_audio, ppg)

            # with open(CE_fromWav_compare_path, 'w') as f:
            #     f.write(str(CE_fromWav) + '\n')
            #     f.write(str(findA_CE_fromWav) + '\n')

            write_wav(
                os.path.join(Multi_log_dir, "{}_sample_mel.wav".format(idx)),
                mel_pred_audio)
            write_wav(
                os.path.join(Multi_log_dir, "{}_sample_spec.wav".format(idx)),
                spec_pred_audio)

            np.save(
                os.path.join(Multi_log_dir, "{}_sample_mel.npy".format(idx)),
                mel_pred)
            np.save(
                os.path.join(Multi_log_dir, "{}_sample_spec.npy".format(idx)),
                spec_pred)

            draw_spec(
                os.path.join(Multi_log_dir, "{}_sample_mel.png".format(idx)),
                mel_pred)
            draw_spec(
                os.path.join(Multi_log_dir, "{}_sample_spec.png".format(idx)),
                spec_pred)

            write_wav(
                os.path.join(Multi_log_dir,
                             "{}_sample_mel_findA.wav".format(idx)),
                findA_mel_pred_audio)
            write_wav(
                os.path.join(Multi_log_dir,
                             "{}_sample_spec_findA.wav".format(idx)),
                findA_spec_pred_audio)

            np.save(
                os.path.join(Multi_log_dir,
                             "{}_sample_mel_findA.npy".format(idx)),
                findA_mel_pred)
            np.save(
                os.path.join(Multi_log_dir,
                             "{}_sample_spec_findA.npy".format(idx)),
                findA_spec_pred)

            draw_spec(
                os.path.join(Multi_log_dir,
                             "{}_sample_mel_findA.png".format(idx)),
                findA_mel_pred)
            draw_spec(
                os.path.join(Multi_log_dir,
                             "{}_sample_spec_findA.png".format(idx)),
                findA_spec_pred)
Пример #2
0
use_cuda = torch.cuda.is_available()
assert use_cuda is True


# 超参数和路径
STARTED_DATESTRING = "{0:%Y-%m-%dT%H-%M-%S}".format(datetime.now())
ckpt_path_DataBakerCN = '/datapool/home/hujk17/ppg_decode_spec_5ms_sch_DataBakerCN/restoreANDvalitation_DataBakerCN_log_dir/2020-10-15T21-03-07/ckpt_model/checkpoint_step000031800.pth'

ppgs_paths = 'inference_ppgs_path_list.txt'
DataBakerCN_log_dir = os.path.join('inference_DataBakerCN_log_dir', STARTED_DATESTRING)
if os.path.exists(DataBakerCN_log_dir) is False:
    os.makedirs(DataBakerCN_log_dir, exist_ok=True)

# 全局变量
model = DCBHG()

def tts_load(model, ckpt_path):
    ckpt_load = torch.load(ckpt_path)
    model.load_state_dict(ckpt_load["state_dict"])
    if use_cuda:
        model = model.cuda()
    model.eval()
    return model


def tts_predict(model, ppg):
    # 准备输入的数据并转换到GPU
    ppg = Variable(torch.from_numpy(ppg)).unsqueeze(0).float()
    print(ppg.size())
    print(ppg.shape)
Пример #3
0
def main():
  # 数据读入,准备
  now_dataset_train = MultiDataset(TRAIN_FILE)
  now_train_torch_dataloader = DataLoader(now_dataset_train, batch_size=BATCH_SIZE, num_workers=num_workers, shuffle=True, drop_last=True)

  now_dataset_validation = MultiDataset(VALIDATION_FILE)
  now_validation_torch_loader = DataLoader(now_dataset_validation, batch_size=BATCH_SIZE, num_workers=num_workers, shuffle=True)
  
  
  # 构建模型,放在gpu上,顺便把tensorboard的图的记录变量操作也算在这里面
  model = DCBHG().to(device)
  writer = SummaryWriter(log_dir=Multi_log_dir)


  # 设置梯度回传优化器,目前使用固定lr=0.0003,不知用不用变lr
  optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)


  global_step = 0
  global_epoch = 0
  if restore_ckpt_path_Multi is not None:
    model, optimizer, _step, _epoch = load_checkpoint(restore_ckpt_path_Multi, model, optimizer)
    global_step = _step
    global_epoch = _epoch
  

  # optimize classification
  # cross_entropy_loss = nn.CrossEntropyLoss()
  # criterion = nn.MSELoss()
  # l1_loss = nn.NLLLoss()
  # from kuaishou 
  my_l1_loss = nn.L1Loss()


  


  # 开始训练
  print('Start Training...')
  
  model.train()
  while global_epoch < nepochs:
      running_loss = 0.0
      for _step, (ppgs, mels, specs, lengths, id_speakers) in enumerate(now_train_torch_dataloader):
          start_time = time.time()
          # Batch开始训练,清空opt,数据拿到GPU上
          optimizer.zero_grad()

          ppgs = ppgs.to(device)
          mels = mels.to(device)
          specs = specs.to(device)
          id_speakers = id_speakers.to(device).long()
          ppgs, mels, specs = Variable(ppgs).float(), Variable(mels).float(), Variable(specs).float()
          # id_speaker需要是整数
          # print('id_speaker type', id_speakers.type())


          # Batch同时计算出pred结果
          mels_pred, specs_pred = model(ppgs, id_speakers)


          # 根据预测结果定义/计算loss; 不过我记得tacotron里面不是用的两个l1loss吧,之后再看看 TODO
          loss = 0.0
          for i in range(BATCH_SIZE):
            mel_loss = my_l1_loss(mels_pred[i, :lengths[i], :], mels[i, :lengths[i], :])
            spec_loss = my_l1_loss(specs_pred[i, :lengths[i], :], specs[i, :lengths[i], :])
            loss += (mel_loss + spec_loss)
          loss = loss / BATCH_SIZE
          print('Steps', global_step, 'Training Loss:', loss.item(), 'Time Use: ', time.time() - start_time)
          writer.add_scalar("loss", float(loss.item()), global_step)
          running_loss += loss.item() 


          # 根据loss,计算梯度,并且应用梯度回传操作,改变权重值
          loss.backward()
          if clip_thresh > 0:
            _grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), clip_thresh) # 返回值不用管
          optimizer.step()


          # 存储ckpt,存储生成的音频
          if global_step > 0 and global_step % CKPT_EVERY == 0:
            checkpoint_path = os.path.join(Multi_model_dir, "checkpoint_step{:09d}.pth".format(global_step))
            torch.save({
                "state_dict": model.state_dict(),
                "optimizer": optimizer.state_dict(),
                "global_step": global_step,
                "global_epoch": global_epoch,
            }, checkpoint_path)
            
          # 测试集的效果,很重道,不过均在这一个函数中实现了
          if global_step > 0 and global_step % VALIDATION_EVERY == 0:
            validate(model=model, criterion=my_l1_loss, validation_torch_loader=now_validation_torch_loader, now_steps=global_step, writer=writer)

          # 该BATCH操作结束,step++
          global_step += 1

      # 对整个epoch进行信息统计
      averaged_loss = running_loss / (len(now_train_torch_dataloader))
      writer.add_scalar('epochLoss', averaged_loss, global_epoch)
      global_epoch += 1
Пример #4
0
def main():
  # 数据读入,准备
  now_dataset = ljspeechDtaset()
  now_torch_dataloader = DataLoader(now_dataset, batch_size=BATCH_SIZE, num_workers=num_workers, shuffle=True, drop_last=True)


  # 构建模型,放在gpu上,顺便把tensorboard的图的记录变量操作也算在这里面
  model = DCBHG().to(device)
  writer = SummaryWriter(log_dir=ljspeech_log_dir)


  # 设置梯度回传优化器,目前使用固定lr=0.0003,不知用不用变lr
  optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
  

  # optimize classification
  # cross_entropy_loss = nn.CrossEntropyLoss()
  # criterion = nn.MSELoss()
  # l1_loss = nn.NLLLoss()
  # from kuaishou 
  my_l1_loss = nn.L1Loss()


  # 开始训练
  print('开始训练')
  global global_step, global_epoch
  model.train()

  while global_epoch < nepochs:
      running_loss = 0.0
      for _step, (ppgs, mels, specs, lengths) in tqdm(enumerate(now_torch_dataloader)):
          # Batch开始训练,清空opt,数据拿到GPU上
          optimizer.zero_grad()

          ppgs = ppgs.to(device)
          mels = mels.to(device)
          specs = specs.to(device)
          print('before type:', type(ppgs), type(mels), type(specs))
          ppgs, mels, specs = Variable(ppgs).float(), Variable(mels).float(), Variable(specs).float()
          print('after type:', type(ppgs), type(mels), type(specs))


          # Batch同时计算出pred结果
          mels_pred, specs_pred = model(ppgs)


          # 根据预测结果定义/计算loss; 不过我记得tacotron里面不是用的两个l1loss吧,之后再看看 TODO
          loss = 0.0
          for i in range(BATCH_SIZE):
            mel_loss = my_l1_loss(mels_pred[i, :lengths[i], :], mels[i, :lengths[i], :])
            spec_loss = my_l1_loss(specs_pred[i, :lengths[i], :], specs[i, :lengths[i], :])
            loss += (mel_loss + spec_loss)
          loss = loss / BATCH_SIZE
          print("Check Loss:", loss)
          writer.add_scalar("loss", float(loss.item()), global_step)
          running_loss += loss.item() # 计算epoch的平均loss累计值


          # 根据loss,计算梯度,并且应用梯度回传操作,改变权重值
          loss.backward()
          if clip_thresh > 0:
            _grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), clip_thresh) # 返回值不用管
          optimizer.step()


          # 存储ckpt,并且同样的步数,存储生成的音频
          if global_step > 0 and global_step % CKPT_EVERY == 0:
            checkpoint_path = os.path.join(ljspeech_model_dir, "checkpoint_step{:09d}.pth".format(global_step))
            torch.save({
                "state_dict": model.state_dict(),
                "optimizer": optimizer.state_dict(),
                "global_step": global_step,
                "global_epoch": global_epoch,
            }, checkpoint_path)
            eval_model_generate(specs[0].cpu().data.numpy(), specs_pred[0].cpu().data.numpy(), lengths[0], ljspeech_log_dir, global_step)
          

          # BATCH操作结束,step++
          global_step += 1

      # 开始对整个epoch进行信息统计
      averaged_loss = running_loss / (len(now_torch_dataloader))
      writer.add_scalar("loss (per epoch)", averaged_loss, global_epoch)
      global_epoch += 1