Exemplo n.º 1
0
    if iteration % 5 == 0:
        if hp.loss == "BEGAN":
            logger.log_training(iteration = iteration, loss_gan = loss_gan, 
            loss_dis = loss_dis, loss_cycle = loss_cycle, k = k, convergence = convergence)

    if (iteration % 50 == 0):

        save_checkpoint(m, opt, iteration, f'checkpoint/{args.checkpoint_path}/gen')
        save_checkpoint(dis_high, opt_dis, iteration, f'checkpoint/{args.checkpoint_path}/dis')

        
        idx = random.randint(0, fake_singing.size(0) - 1)

        #mel = (mel * std) +mean
        #z = (z * std) + mean
        real_audio = melblock.inverse(singing).detach().cpu().numpy()
        fake_audio = melblock.inverse(fake_singing).detach().cpu().numpy()
        real_speech_audio = vocoder_speech.inverse(speech).detach().cpu().numpy()
        #mel = (mel -mean)/ std
        #z = (z - mean ) / std
        """
        logger work like this:
            logger only accept image, audio ,scalars type.
            and the type of them is :
            
            scalars : int
            image : tensor
            audio : ndarray
        
        More details can see logger/logger.py
        """
Exemplo n.º 2
0
    loss_sp2sing = loss_sp2sing.mean()

    OptimStep([(m, opt, loss_sp2sing, False)], 3)

    if (iteration % 50 == 0):
        logger.log_training(iteration=iteration, loss_sing2sp=loss_sp2sing)
        save_checkpoint(m, opt, iteration,
                        f'checkpoint/{args.checkpoint_path}/gen')

        idx = 0
        fake_song_padded = torch.clamp(fake_song_padded, -10000, 1.8)
        singing_melgan = fake_song_padded[idx:idx + 1]
        singing_real_melgan = song_padded[idx:idx + 1]

        if args.feat_type == "mel":
            fake_singing_audio = vocoder.inverse(
                singing_melgan).detach().cpu().numpy()[0]
            real_singing_audio = vocoder.inverse(
                singing_real_melgan).detach().cpu().numpy()[0]
        else:

            fake_singing_audio = gl_rec(
                singing_melgan[0].detach().cpu().numpy())
            real_singing_audio = gl_rec(
                singing_real_melgan[0].detach().cpu().numpy())

        logger.log_validation(
            iteration=iteration,
            mel_train_singing=("image", plot_spectrogram_to_numpy(),
                               song_padded[idx]),
            mel_train_singing_generate=("image", plot_spectrogram_to_numpy(),
                                        fake_song_padded[idx]),