Пример #1
0
    def sample_training(self, output, iteration):
        mel_outputs = to_arr(output[0][0])
        mel_outputs_postnet = to_arr(output[1][0])
        alignments = to_arr(output[2][0]).T

        # plot alignment, mel and postnet output
        self.add_image("alignment", plot_alignment_to_numpy(alignments),
                       iteration)
        self.add_image("mel_outputs", plot_spectrogram_to_numpy(mel_outputs),
                       iteration)
        self.add_image("mel_outputs_postnet",
                       plot_spectrogram_to_numpy(mel_outputs_postnet),
                       iteration)

        # save audio
        try:  # sometimes error
            wav = inv_melspectrogram(mel_outputs)
            wav /= max(0.01, np.max(np.abs(wav)))
            wav_postnet = inv_melspectrogram(mel_outputs_postnet)
            wav_postnet /= max(0.01, np.max(np.abs(wav_postnet)))
            self.add_audio('pred', wav, iteration, hps.sample_rate)
            self.add_audio('pred_postnet', wav_postnet, iteration,
                           hps.sample_rate)
        except:
            pass
Пример #2
0
    def sample_training(self, output, target, iteration):
        mel_outputs = to_arr(output[0][0])
        mel_target = to_arr(target[0][0])
        mel_outputs_postnet = to_arr(output[1][0])
        alignments = to_arr(output[2][0]).T

        # plot alignment, mel and postnet output
        self.add_image("alignment_test", plot_alignment_to_numpy(alignments),
                       iteration)
        self.add_image("mel_outputs_test",
                       plot_spectrogram_to_numpy(mel_outputs), iteration)
        self.add_image("mel_outputs_postnet_test",
                       plot_spectrogram_to_numpy(mel_outputs_postnet),
                       iteration)
        self.add_image("mel_target_test",
                       plot_spectrogram_to_numpy(mel_target), iteration)

        # save audio
        # try: # sometimes error
        wav = inv_mel_spectrogram(mel_outputs, hps)
        # 			wav *= 32767 / max(0.01, np.max(np.abs(wav)))
        # wav /= max(0.01, np.max(np.abs(wav)))
        wav_postnet = inv_mel_spectrogram(mel_outputs_postnet, hps)
        # 			wav_postnet *= 32767 / max(0.01, np.max(np.abs(wav_postnet)))
        # wav_postnet /= max(0.01, np.max(np.abs(wav_postnet)))
        wav_target = inv_mel_spectrogram(mel_target, hps)
        # 			wav_target *= 32767 / max(0.01, np.max(np.abs(wav_target)))
        # wav_target /= max(0.01, np.max(np.abs(wav_target)))
        self.add_audio('pred_test', wav, iteration, hps.sample_rate)
        self.add_audio('pred_postnet_test', wav_postnet, iteration,
                       hps.sample_rate)
        self.add_audio('target_test', wav_target, iteration, hps.sample_rate)
Пример #3
0
def audio(output, pth):
    mel_outputs, mel_outputs_postnet, _ = output
    wav = inv_melspectrogram(to_arr(mel_outputs[0]))
    wav_postnet = inv_melspectrogram(to_arr(mel_outputs_postnet[0]))
    save_wav(wav, pth + '.wav')
    save_wav(wav_postnet, pth + '_post.wav')
    print('wav save to:', pth + '.wav')
    print('postnet_wav save to:', pth + '_post.wav')
Пример #4
0
    def sample_training(self, real, pred, iteration):
        real = to_arr(real)
        pred = to_arr(pred)

        # save audio
        real /= max(0.01, np.max(np.abs(real)))
        pred /= max(0.01, np.max(np.abs(pred)))
        self.add_audio('real', real, iteration, hps.sample_rate)
        self.add_audio('pred', pred, iteration, hps.sample_rate)
Пример #5
0
def infer(model, src_pth):
    src = load_wav(src_pth, seg=False)
    mel = melspectrogram(src).astype(np.float32)
    mel = mode(torch.Tensor([mel]))
    with torch.no_grad():
        res = model.infer(mel)[0]
    return [src, to_arr(res)]
Пример #6
0
def infer(wav_path, text, model):
    sequence = text_to_sequence(text, hps.text_cleaners)
    sequence = to_var(torch.IntTensor(sequence)[None, :]).long()
    mel = melspectrogram(load_wav(wav_path))
    mel_in = to_var(torch.Tensor([mel]))
    r = mel_in.shape[2] % hps.n_frames_per_step
    if r != 0:
        mel_in = mel_in[:, :, :-r]
    sequence = torch.cat([sequence, sequence], 0)
    mel_in = torch.cat([mel_in, mel_in], 0)
    _, mel_outputs_postnet, _, _ = model.teacher_infer(sequence, mel_in)
    ret = mel
    if r != 0:
        ret[:, :-r] = to_arr(mel_outputs_postnet[0])
    else:
        ret = to_arr(mel_outputs_postnet[0])
    return ret
Пример #7
0
    def log_training_vid(self, output, target, reduced_loss, grad_norm,
                         learning_rate, iteration):
        mel_loss, mel_loss_post, l1_loss, gate_loss = reduced_loss
        self.add_scalar("training.mel_loss", mel_loss, iteration)
        self.add_scalar("training.mel_loss_post", mel_loss_post, iteration)
        self.add_scalar("training.l1_loss", l1_loss, iteration)
        self.add_scalar("training.gate_loss", gate_loss, iteration)
        self.add_scalar("grad.norm", grad_norm, iteration)
        self.add_scalar("learning.rate", learning_rate, iteration)

        mel_outputs = to_arr(output[0][0])
        mel_target = to_arr(target[0][0])
        mel_outputs_postnet = to_arr(output[1][0])
        alignments = to_arr(output[3][0]).T

        # plot alignment, mel and postnet output
        self.add_image("alignment", plot_alignment_to_numpy(alignments),
                       iteration)
        self.add_image("mel_outputs", plot_spectrogram_to_numpy(mel_outputs),
                       iteration)
        self.add_image("mel_outputs_postnet",
                       plot_spectrogram_to_numpy(mel_outputs_postnet),
                       iteration)
        self.add_image("mel_target", plot_spectrogram_to_numpy(mel_target),
                       iteration)

        # save audio
        # try:  # sometimes error
        wav = inv_mel_spectrogram(mel_outputs, hps)
        wav *= 32767 / max(0.01, np.max(np.abs(wav)))
        # wav /= max(0.01, np.max(np.abs(wav)))
        wav_postnet = inv_mel_spectrogram(mel_outputs_postnet, hps)
        wav_postnet *= 32767 / max(0.01, np.max(np.abs(wav_postnet)))
        # wav_postnet /= max(0.01, np.max(np.abs(wav_postnet)))
        wav_target = inv_mel_spectrogram(mel_target, hps)
        wav_target *= 32767 / max(0.01, np.max(np.abs(wav_target)))
        # wav_target /= max(0.01, np.max(np.abs(wav_target)))
        self.add_audio('pred', wav, iteration, hps.sample_rate)
        self.add_audio('pred_postnet', wav_postnet, iteration, hps.sample_rate)
        self.add_audio('target', wav_target, iteration, hps.sample_rate)
Пример #8
0
def save_mel(output, pth):
    mel_outputs, mel_outputs_postnet, _ = output
    #np.save(pth+'.npy', to_arr(mel_outputs[0]).T)
    np.save(pth + '.npy', to_arr(mel_outputs[0]).T)
Пример #9
0
def audio(output, pth):
    mel_outputs, mel_outputs_postnet, _ = output
    #wav = inv_melspectrogram(to_arr(mel_outputs[0]))
    wav_postnet = inv_melspectrogram(to_arr(mel_outputs_postnet[0]))
    #save_wav(wav, pth+'.wav')
    save_wav(wav_postnet, pth + '.wav')
Пример #10
0
def plot(output, pth):
    mel_outputs, mel_outputs_postnet, alignments = output
    plot_data((to_arr(mel_outputs[0]), to_arr(mel_outputs_postnet[0]),
               to_arr(alignments[0]).T))
    plt.savefig(pth + '.png')