예제 #1
0
 def log_attn_ws(self,
                 attn_ws,
                 ilens,
                 olens,
                 iteration,
                 name=None,
                 num=None):
     for w in attn_ws:
         for k in w.keys():
             att = w[k].transpose(1, 2)[0][:olens[0], :ilens[0]].cpu().data
             if name is not None:
                 self.add_figure(name + '_' + num + '_' + k,
                                 plot_alignment_to_numpy(att), iteration)
             else:
                 self.add_figure(k, plot_alignment_to_numpy(att), iteration)
예제 #2
0
    def log_validation(self, reduced_loss, model, y, y_pred, iteration):
        self.add_scalar("validation.loss", reduced_loss, iteration)
        _, mel_outputs, gate_outputs, alignments = y_pred
        mel_targets, gate_targets = y

        # plot distribution of parameters
        for tag, value in model.named_parameters():
            tag = tag.replace('.', '/')
            self.add_histogram(tag, value.data.cpu().numpy(), iteration)

        # plot alignment, mel target and predicted, gate target and predicted
        idx = random.randint(0, alignments.size(0) - 1)
        self.add_image("alignment",
                       plot_alignment_to_numpy(
                           alignments[idx].data.cpu().numpy().T),
                       iteration,
                       dataformats='HWC')
        self.add_image("mel_target",
                       plot_spectrogram_to_numpy(
                           mel_targets[idx].data.cpu().numpy()),
                       iteration,
                       dataformats='HWC')
        self.add_image("mel_predicted",
                       plot_spectrogram_to_numpy(
                           mel_outputs[idx].data.cpu().numpy()),
                       iteration,
                       dataformats='HWC')
        self.add_image("gate",
                       plot_gate_outputs_to_numpy(
                           gate_targets[idx].data.cpu().numpy(),
                           torch.sigmoid(
                               gate_outputs[idx]).data.cpu().numpy()),
                       iteration,
                       dataformats='HWC')
예제 #3
0
    def log(self, y, y_pred, idx, iteration):
        _, mel_outputs, gate_outputs, alignments = y_pred
        mel_targets, gate_targets = y

        self.add_image("alignment",
                       plot_alignment_to_numpy(
                           alignments[idx].data.cpu().numpy().T),
                       iteration,
                       dataformats='HWC')
        self.add_image("mel_target",
                       plot_spectrogram_to_numpy(
                           mel_targets[idx].data.cpu().numpy()),
                       iteration,
                       dataformats='HWC')
        self.add_image("mel_predicted",
                       plot_spectrogram_to_numpy(
                           mel_outputs[idx].data.cpu().numpy()),
                       iteration,
                       dataformats='HWC')
        self.add_image("gate",
                       plot_gate_outputs_to_numpy(
                           gate_targets[idx].data.cpu().numpy(),
                           torch.sigmoid(
                               gate_outputs[idx]).data.cpu().numpy()),
                       iteration,
                       dataformats='HWC')
예제 #4
0
    def sample_training(self, output, iteration):
        mel_outputs = to_arr(output[0][0])
        mel_outputs_postnet = to_arr(output[1][0])
        alignments = to_arr(output[2][0]).T

        # plot alignment, mel and postnet output
        self.add_image("alignment", plot_alignment_to_numpy(alignments),
                       iteration)
        self.add_image("mel_outputs", plot_spectrogram_to_numpy(mel_outputs),
                       iteration)
        self.add_image("mel_outputs_postnet",
                       plot_spectrogram_to_numpy(mel_outputs_postnet),
                       iteration)

        # save audio
        try:  # sometimes error
            wav = inv_melspectrogram(mel_outputs)
            wav /= max(0.01, np.max(np.abs(wav)))
            wav_postnet = inv_melspectrogram(mel_outputs_postnet)
            wav_postnet /= max(0.01, np.max(np.abs(wav_postnet)))
            self.add_audio('pred', wav, iteration, hps.sample_rate)
            self.add_audio('pred_postnet', wav_postnet, iteration,
                           hps.sample_rate)
        except:
            pass
예제 #5
0
    def sample_training(self, output, target, iteration):
        mel_outputs = to_arr(output[0][0])
        mel_target = to_arr(target[0][0])
        mel_outputs_postnet = to_arr(output[1][0])
        alignments = to_arr(output[2][0]).T

        # plot alignment, mel and postnet output
        self.add_image("alignment_test", plot_alignment_to_numpy(alignments),
                       iteration)
        self.add_image("mel_outputs_test",
                       plot_spectrogram_to_numpy(mel_outputs), iteration)
        self.add_image("mel_outputs_postnet_test",
                       plot_spectrogram_to_numpy(mel_outputs_postnet),
                       iteration)
        self.add_image("mel_target_test",
                       plot_spectrogram_to_numpy(mel_target), iteration)

        # save audio
        # try: # sometimes error
        wav = inv_mel_spectrogram(mel_outputs, hps)
        # 			wav *= 32767 / max(0.01, np.max(np.abs(wav)))
        # wav /= max(0.01, np.max(np.abs(wav)))
        wav_postnet = inv_mel_spectrogram(mel_outputs_postnet, hps)
        # 			wav_postnet *= 32767 / max(0.01, np.max(np.abs(wav_postnet)))
        # wav_postnet /= max(0.01, np.max(np.abs(wav_postnet)))
        wav_target = inv_mel_spectrogram(mel_target, hps)
        # 			wav_target *= 32767 / max(0.01, np.max(np.abs(wav_target)))
        # wav_target /= max(0.01, np.max(np.abs(wav_target)))
        self.add_audio('pred_test', wav, iteration, hps.sample_rate)
        self.add_audio('pred_postnet_test', wav_postnet, iteration,
                       hps.sample_rate)
        self.add_audio('target_test', wav_target, iteration, hps.sample_rate)
예제 #6
0
    def log_training_vid(self, output, target, reduced_loss, grad_norm,
                         learning_rate, iteration):
        mel_loss, mel_loss_post, l1_loss, gate_loss = reduced_loss
        self.add_scalar("training.mel_loss", mel_loss, iteration)
        self.add_scalar("training.mel_loss_post", mel_loss_post, iteration)
        self.add_scalar("training.l1_loss", l1_loss, iteration)
        self.add_scalar("training.gate_loss", gate_loss, iteration)
        self.add_scalar("grad.norm", grad_norm, iteration)
        self.add_scalar("learning.rate", learning_rate, iteration)

        mel_outputs = to_arr(output[0][0])
        mel_target = to_arr(target[0][0])
        mel_outputs_postnet = to_arr(output[1][0])
        alignments = to_arr(output[3][0]).T

        # plot alignment, mel and postnet output
        self.add_image("alignment", plot_alignment_to_numpy(alignments),
                       iteration)
        self.add_image("mel_outputs", plot_spectrogram_to_numpy(mel_outputs),
                       iteration)
        self.add_image("mel_outputs_postnet",
                       plot_spectrogram_to_numpy(mel_outputs_postnet),
                       iteration)
        self.add_image("mel_target", plot_spectrogram_to_numpy(mel_target),
                       iteration)

        # save audio
        # try:  # sometimes error
        wav = inv_mel_spectrogram(mel_outputs, hps)
        wav *= 32767 / max(0.01, np.max(np.abs(wav)))
        # wav /= max(0.01, np.max(np.abs(wav)))
        wav_postnet = inv_mel_spectrogram(mel_outputs_postnet, hps)
        wav_postnet *= 32767 / max(0.01, np.max(np.abs(wav_postnet)))
        # wav_postnet /= max(0.01, np.max(np.abs(wav_postnet)))
        wav_target = inv_mel_spectrogram(mel_target, hps)
        wav_target *= 32767 / max(0.01, np.max(np.abs(wav_target)))
        # wav_target /= max(0.01, np.max(np.abs(wav_target)))
        self.add_audio('pred', wav, iteration, hps.sample_rate)
        self.add_audio('pred_postnet', wav_postnet, iteration, hps.sample_rate)
        self.add_audio('target', wav_target, iteration, hps.sample_rate)