def sample_training(self, output, iteration): mel_outputs = to_arr(output[0][0]) mel_outputs_postnet = to_arr(output[1][0]) alignments = to_arr(output[2][0]).T # plot alignment, mel and postnet output self.add_image("alignment", plot_alignment_to_numpy(alignments), iteration) self.add_image("mel_outputs", plot_spectrogram_to_numpy(mel_outputs), iteration) self.add_image("mel_outputs_postnet", plot_spectrogram_to_numpy(mel_outputs_postnet), iteration) # save audio try: # sometimes error wav = inv_melspectrogram(mel_outputs) wav /= max(0.01, np.max(np.abs(wav))) wav_postnet = inv_melspectrogram(mel_outputs_postnet) wav_postnet /= max(0.01, np.max(np.abs(wav_postnet))) self.add_audio('pred', wav, iteration, hps.sample_rate) self.add_audio('pred_postnet', wav_postnet, iteration, hps.sample_rate) except: pass
def sample_training(self, output, target, iteration): mel_outputs = to_arr(output[0][0]) mel_target = to_arr(target[0][0]) mel_outputs_postnet = to_arr(output[1][0]) alignments = to_arr(output[2][0]).T # plot alignment, mel and postnet output self.add_image("alignment_test", plot_alignment_to_numpy(alignments), iteration) self.add_image("mel_outputs_test", plot_spectrogram_to_numpy(mel_outputs), iteration) self.add_image("mel_outputs_postnet_test", plot_spectrogram_to_numpy(mel_outputs_postnet), iteration) self.add_image("mel_target_test", plot_spectrogram_to_numpy(mel_target), iteration) # save audio # try: # sometimes error wav = inv_mel_spectrogram(mel_outputs, hps) # wav *= 32767 / max(0.01, np.max(np.abs(wav))) # wav /= max(0.01, np.max(np.abs(wav))) wav_postnet = inv_mel_spectrogram(mel_outputs_postnet, hps) # wav_postnet *= 32767 / max(0.01, np.max(np.abs(wav_postnet))) # wav_postnet /= max(0.01, np.max(np.abs(wav_postnet))) wav_target = inv_mel_spectrogram(mel_target, hps) # wav_target *= 32767 / max(0.01, np.max(np.abs(wav_target))) # wav_target /= max(0.01, np.max(np.abs(wav_target))) self.add_audio('pred_test', wav, iteration, hps.sample_rate) self.add_audio('pred_postnet_test', wav_postnet, iteration, hps.sample_rate) self.add_audio('target_test', wav_target, iteration, hps.sample_rate)
def audio(output, pth): mel_outputs, mel_outputs_postnet, _ = output wav = inv_melspectrogram(to_arr(mel_outputs[0])) wav_postnet = inv_melspectrogram(to_arr(mel_outputs_postnet[0])) save_wav(wav, pth + '.wav') save_wav(wav_postnet, pth + '_post.wav') print('wav save to:', pth + '.wav') print('postnet_wav save to:', pth + '_post.wav')
def sample_training(self, real, pred, iteration): real = to_arr(real) pred = to_arr(pred) # save audio real /= max(0.01, np.max(np.abs(real))) pred /= max(0.01, np.max(np.abs(pred))) self.add_audio('real', real, iteration, hps.sample_rate) self.add_audio('pred', pred, iteration, hps.sample_rate)
def infer(model, src_pth): src = load_wav(src_pth, seg=False) mel = melspectrogram(src).astype(np.float32) mel = mode(torch.Tensor([mel])) with torch.no_grad(): res = model.infer(mel)[0] return [src, to_arr(res)]
def infer(wav_path, text, model): sequence = text_to_sequence(text, hps.text_cleaners) sequence = to_var(torch.IntTensor(sequence)[None, :]).long() mel = melspectrogram(load_wav(wav_path)) mel_in = to_var(torch.Tensor([mel])) r = mel_in.shape[2] % hps.n_frames_per_step if r != 0: mel_in = mel_in[:, :, :-r] sequence = torch.cat([sequence, sequence], 0) mel_in = torch.cat([mel_in, mel_in], 0) _, mel_outputs_postnet, _, _ = model.teacher_infer(sequence, mel_in) ret = mel if r != 0: ret[:, :-r] = to_arr(mel_outputs_postnet[0]) else: ret = to_arr(mel_outputs_postnet[0]) return ret
def log_training_vid(self, output, target, reduced_loss, grad_norm, learning_rate, iteration): mel_loss, mel_loss_post, l1_loss, gate_loss = reduced_loss self.add_scalar("training.mel_loss", mel_loss, iteration) self.add_scalar("training.mel_loss_post", mel_loss_post, iteration) self.add_scalar("training.l1_loss", l1_loss, iteration) self.add_scalar("training.gate_loss", gate_loss, iteration) self.add_scalar("grad.norm", grad_norm, iteration) self.add_scalar("learning.rate", learning_rate, iteration) mel_outputs = to_arr(output[0][0]) mel_target = to_arr(target[0][0]) mel_outputs_postnet = to_arr(output[1][0]) alignments = to_arr(output[3][0]).T # plot alignment, mel and postnet output self.add_image("alignment", plot_alignment_to_numpy(alignments), iteration) self.add_image("mel_outputs", plot_spectrogram_to_numpy(mel_outputs), iteration) self.add_image("mel_outputs_postnet", plot_spectrogram_to_numpy(mel_outputs_postnet), iteration) self.add_image("mel_target", plot_spectrogram_to_numpy(mel_target), iteration) # save audio # try: # sometimes error wav = inv_mel_spectrogram(mel_outputs, hps) wav *= 32767 / max(0.01, np.max(np.abs(wav))) # wav /= max(0.01, np.max(np.abs(wav))) wav_postnet = inv_mel_spectrogram(mel_outputs_postnet, hps) wav_postnet *= 32767 / max(0.01, np.max(np.abs(wav_postnet))) # wav_postnet /= max(0.01, np.max(np.abs(wav_postnet))) wav_target = inv_mel_spectrogram(mel_target, hps) wav_target *= 32767 / max(0.01, np.max(np.abs(wav_target))) # wav_target /= max(0.01, np.max(np.abs(wav_target))) self.add_audio('pred', wav, iteration, hps.sample_rate) self.add_audio('pred_postnet', wav_postnet, iteration, hps.sample_rate) self.add_audio('target', wav_target, iteration, hps.sample_rate)
def save_mel(output, pth): mel_outputs, mel_outputs_postnet, _ = output #np.save(pth+'.npy', to_arr(mel_outputs[0]).T) np.save(pth + '.npy', to_arr(mel_outputs[0]).T)
def audio(output, pth): mel_outputs, mel_outputs_postnet, _ = output #wav = inv_melspectrogram(to_arr(mel_outputs[0])) wav_postnet = inv_melspectrogram(to_arr(mel_outputs_postnet[0])) #save_wav(wav, pth+'.wav') save_wav(wav_postnet, pth + '.wav')
def plot(output, pth): mel_outputs, mel_outputs_postnet, alignments = output plot_data((to_arr(mel_outputs[0]), to_arr(mel_outputs_postnet[0]), to_arr(alignments[0]).T)) plt.savefig(pth + '.png')