def main(): with torch.no_grad(): model = DCBHG() model = tts_load(model=model, ckpt_path=ckpt_path_Multi) ppgs_list = open(ppgs_paths, 'r') ppgs_list = [i.strip() for i in ppgs_list] for idx, ppg_path_and_findA_ppg_path_and_speaker in tqdm( enumerate(ppgs_list)): ppg_path, findA_ppg_path, speaker_id = ppg_path_and_findA_ppg_path_and_speaker.split( '|') ppg = np.load(ppg_path) findA_ppg = np.load(findA_ppg_path) assert ppg.shape[1] == PPG_DIM and findA_ppg.shape[1] == PPG_DIM speaker_id = int(speaker_id) mel_pred, spec_pred, mel_pred_audio, spec_pred_audio = tts_predict( model, ppg, speaker_id) findA_mel_pred, findA_spec_pred, findA_mel_pred_audio, findA_spec_pred_audio = tts_predict( model, findA_ppg, speaker_id) # CE_fromWav, CE_seq_fromWav = consistencyError_fromWav(spec_pred_audio, ppg) # findA_CE_fromWav, findA_CE_seq_fromWav = consistencyError_fromWav(findA_spec_pred_audio, ppg) # with open(CE_fromWav_compare_path, 'w') as f: # f.write(str(CE_fromWav) + '\n') # f.write(str(findA_CE_fromWav) + '\n') write_wav( os.path.join(Multi_log_dir, "{}_sample_mel.wav".format(idx)), mel_pred_audio) write_wav( os.path.join(Multi_log_dir, "{}_sample_spec.wav".format(idx)), spec_pred_audio) np.save( os.path.join(Multi_log_dir, "{}_sample_mel.npy".format(idx)), mel_pred) np.save( os.path.join(Multi_log_dir, "{}_sample_spec.npy".format(idx)), spec_pred) draw_spec( os.path.join(Multi_log_dir, "{}_sample_mel.png".format(idx)), mel_pred) draw_spec( os.path.join(Multi_log_dir, "{}_sample_spec.png".format(idx)), spec_pred) write_wav( os.path.join(Multi_log_dir, "{}_sample_mel_findA.wav".format(idx)), findA_mel_pred_audio) write_wav( os.path.join(Multi_log_dir, "{}_sample_spec_findA.wav".format(idx)), findA_spec_pred_audio) np.save( os.path.join(Multi_log_dir, "{}_sample_mel_findA.npy".format(idx)), findA_mel_pred) np.save( os.path.join(Multi_log_dir, "{}_sample_spec_findA.npy".format(idx)), findA_spec_pred) draw_spec( os.path.join(Multi_log_dir, "{}_sample_mel_findA.png".format(idx)), findA_mel_pred) draw_spec( os.path.join(Multi_log_dir, "{}_sample_spec_findA.png".format(idx)), findA_spec_pred)
use_cuda = torch.cuda.is_available() assert use_cuda is True # 超参数和路径 STARTED_DATESTRING = "{0:%Y-%m-%dT%H-%M-%S}".format(datetime.now()) ckpt_path_DataBakerCN = '/datapool/home/hujk17/ppg_decode_spec_5ms_sch_DataBakerCN/restoreANDvalitation_DataBakerCN_log_dir/2020-10-15T21-03-07/ckpt_model/checkpoint_step000031800.pth' ppgs_paths = 'inference_ppgs_path_list.txt' DataBakerCN_log_dir = os.path.join('inference_DataBakerCN_log_dir', STARTED_DATESTRING) if os.path.exists(DataBakerCN_log_dir) is False: os.makedirs(DataBakerCN_log_dir, exist_ok=True) # 全局变量 model = DCBHG() def tts_load(model, ckpt_path): ckpt_load = torch.load(ckpt_path) model.load_state_dict(ckpt_load["state_dict"]) if use_cuda: model = model.cuda() model.eval() return model def tts_predict(model, ppg): # 准备输入的数据并转换到GPU ppg = Variable(torch.from_numpy(ppg)).unsqueeze(0).float() print(ppg.size()) print(ppg.shape)
def main(): # 数据读入,准备 now_dataset_train = MultiDataset(TRAIN_FILE) now_train_torch_dataloader = DataLoader(now_dataset_train, batch_size=BATCH_SIZE, num_workers=num_workers, shuffle=True, drop_last=True) now_dataset_validation = MultiDataset(VALIDATION_FILE) now_validation_torch_loader = DataLoader(now_dataset_validation, batch_size=BATCH_SIZE, num_workers=num_workers, shuffle=True) # 构建模型,放在gpu上,顺便把tensorboard的图的记录变量操作也算在这里面 model = DCBHG().to(device) writer = SummaryWriter(log_dir=Multi_log_dir) # 设置梯度回传优化器,目前使用固定lr=0.0003,不知用不用变lr optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE) global_step = 0 global_epoch = 0 if restore_ckpt_path_Multi is not None: model, optimizer, _step, _epoch = load_checkpoint(restore_ckpt_path_Multi, model, optimizer) global_step = _step global_epoch = _epoch # optimize classification # cross_entropy_loss = nn.CrossEntropyLoss() # criterion = nn.MSELoss() # l1_loss = nn.NLLLoss() # from kuaishou my_l1_loss = nn.L1Loss() # 开始训练 print('Start Training...') model.train() while global_epoch < nepochs: running_loss = 0.0 for _step, (ppgs, mels, specs, lengths, id_speakers) in enumerate(now_train_torch_dataloader): start_time = time.time() # Batch开始训练,清空opt,数据拿到GPU上 optimizer.zero_grad() ppgs = ppgs.to(device) mels = mels.to(device) specs = specs.to(device) id_speakers = id_speakers.to(device).long() ppgs, mels, specs = Variable(ppgs).float(), Variable(mels).float(), Variable(specs).float() # id_speaker需要是整数 # print('id_speaker type', id_speakers.type()) # Batch同时计算出pred结果 mels_pred, specs_pred = model(ppgs, id_speakers) # 根据预测结果定义/计算loss; 不过我记得tacotron里面不是用的两个l1loss吧,之后再看看 TODO loss = 0.0 for i in range(BATCH_SIZE): mel_loss = my_l1_loss(mels_pred[i, :lengths[i], :], mels[i, :lengths[i], :]) spec_loss = my_l1_loss(specs_pred[i, :lengths[i], :], specs[i, :lengths[i], :]) loss += (mel_loss + spec_loss) loss = loss / BATCH_SIZE print('Steps', global_step, 'Training Loss:', loss.item(), 'Time Use: ', time.time() - start_time) writer.add_scalar("loss", float(loss.item()), global_step) running_loss += loss.item() # 根据loss,计算梯度,并且应用梯度回传操作,改变权重值 loss.backward() if clip_thresh > 0: _grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), clip_thresh) # 返回值不用管 optimizer.step() # 存储ckpt,存储生成的音频 if global_step > 0 and global_step % CKPT_EVERY == 0: checkpoint_path = os.path.join(Multi_model_dir, "checkpoint_step{:09d}.pth".format(global_step)) torch.save({ "state_dict": model.state_dict(), "optimizer": optimizer.state_dict(), "global_step": global_step, "global_epoch": global_epoch, }, checkpoint_path) # 测试集的效果,很重道,不过均在这一个函数中实现了 if global_step > 0 and global_step % VALIDATION_EVERY == 0: validate(model=model, criterion=my_l1_loss, validation_torch_loader=now_validation_torch_loader, now_steps=global_step, writer=writer) # 该BATCH操作结束,step++ global_step += 1 # 对整个epoch进行信息统计 averaged_loss = running_loss / (len(now_train_torch_dataloader)) writer.add_scalar('epochLoss', averaged_loss, global_epoch) global_epoch += 1
def main(): # 数据读入,准备 now_dataset = ljspeechDtaset() now_torch_dataloader = DataLoader(now_dataset, batch_size=BATCH_SIZE, num_workers=num_workers, shuffle=True, drop_last=True) # 构建模型,放在gpu上,顺便把tensorboard的图的记录变量操作也算在这里面 model = DCBHG().to(device) writer = SummaryWriter(log_dir=ljspeech_log_dir) # 设置梯度回传优化器,目前使用固定lr=0.0003,不知用不用变lr optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE) # optimize classification # cross_entropy_loss = nn.CrossEntropyLoss() # criterion = nn.MSELoss() # l1_loss = nn.NLLLoss() # from kuaishou my_l1_loss = nn.L1Loss() # 开始训练 print('开始训练') global global_step, global_epoch model.train() while global_epoch < nepochs: running_loss = 0.0 for _step, (ppgs, mels, specs, lengths) in tqdm(enumerate(now_torch_dataloader)): # Batch开始训练,清空opt,数据拿到GPU上 optimizer.zero_grad() ppgs = ppgs.to(device) mels = mels.to(device) specs = specs.to(device) print('before type:', type(ppgs), type(mels), type(specs)) ppgs, mels, specs = Variable(ppgs).float(), Variable(mels).float(), Variable(specs).float() print('after type:', type(ppgs), type(mels), type(specs)) # Batch同时计算出pred结果 mels_pred, specs_pred = model(ppgs) # 根据预测结果定义/计算loss; 不过我记得tacotron里面不是用的两个l1loss吧,之后再看看 TODO loss = 0.0 for i in range(BATCH_SIZE): mel_loss = my_l1_loss(mels_pred[i, :lengths[i], :], mels[i, :lengths[i], :]) spec_loss = my_l1_loss(specs_pred[i, :lengths[i], :], specs[i, :lengths[i], :]) loss += (mel_loss + spec_loss) loss = loss / BATCH_SIZE print("Check Loss:", loss) writer.add_scalar("loss", float(loss.item()), global_step) running_loss += loss.item() # 计算epoch的平均loss累计值 # 根据loss,计算梯度,并且应用梯度回传操作,改变权重值 loss.backward() if clip_thresh > 0: _grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), clip_thresh) # 返回值不用管 optimizer.step() # 存储ckpt,并且同样的步数,存储生成的音频 if global_step > 0 and global_step % CKPT_EVERY == 0: checkpoint_path = os.path.join(ljspeech_model_dir, "checkpoint_step{:09d}.pth".format(global_step)) torch.save({ "state_dict": model.state_dict(), "optimizer": optimizer.state_dict(), "global_step": global_step, "global_epoch": global_epoch, }, checkpoint_path) eval_model_generate(specs[0].cpu().data.numpy(), specs_pred[0].cpu().data.numpy(), lengths[0], ljspeech_log_dir, global_step) # BATCH操作结束,step++ global_step += 1 # 开始对整个epoch进行信息统计 averaged_loss = running_loss / (len(now_torch_dataloader)) writer.add_scalar("loss (per epoch)", averaged_loss, global_epoch) global_epoch += 1