Exemplo n.º 1
if args.load_checkpoint==True:
    m, opt, iteration = load_checkpoint(f'checkpoint/{args.checkpoint_path}/gen', m, opt)       
    dis_high, opt_dis, iteration = load_checkpoint(f'checkpoint/{args.checkpoint_path}/dis', dis_high, opt_dis)

    In general, we preprocess data to npy, and put them in 
    specific folder. Dataloader load npy file. 

    But in this example, I show that how to transfrom audio
    into stft, melspectrogram by torch.nn.module (MelSpectrogram).

#melblock = MelSpectrogram(hp).cuda()
melblock = MelVocoder(path = "vocoder/melgan-neurips/scripts/logs/NUS")
vocoder_speech = torch.hub.load('descriptinc/melgan-neurips', 'load_melgan')
import random
while True:
    speech = next(inf_iterator_tr_speech).cuda()
    singing = next(inf_iterator_tr_sing).cuda()
    scale = 1 + random.random()
    speech_2x= F.interpolate(speech, scale_factor=scale, mode='nearest')
    #mel = (mel-mean)/std
    singing = singing[:,:,:min(speech_2x.size(2), singing.size(2))]
    speech_2x = speech_2x[:,:,:min(speech_2x.size(2), singing.size(2))]

    singing = F.pad(singing,(0,(singing.size(2)//8+1)*8 - singing.size(2)), 'reflect')
    speech_2x = F.pad(speech_2x,(0,(speech_2x.size(2)//8+1)*8 - speech_2x.size(2)), 'reflect')
Exemplo n.º 2
from logger import Logger
from logger_utils import prepare_directories_and_logger
from plotting_utils import plot_spectrogram_to_numpy

from optim_step import *
from save_and_load import save_checkpoint, load_checkpoint
from torch.nn import functional as F
from mel2wav.interface import MelVocoder

#vocoder = torch.hub.load('descriptinc/melgan-neurips', 'load_melgan')
vocoder = MelVocoder(

#vocoder = torch.hub.load('descriptinc/melgan-neurips', 'load_melgan')

def train_(args, model, opt, latent_loss_weight, criterion, loader, epochs,
           inf_iterator_test, logger, iteration):

    for epoch in range(epochs):
        mse_sum = 0
        mse_n = 0

        for i, (audio, pitch) in enumerate(loader):

            audio = audio.cuda().float()
Exemplo n.º 3
dis_high = ymp.construct_model(f"model_config/{hp.config_dis}/1.yaml")
dis_high = dis_high.cuda()
opt_dis = optim.Adam(dis_high.parameters(), lr=1e-4)

iteration = 0
if args.load_checkpoint == True:
    m, opt, iteration = load_checkpoint(
        f'checkpoint/{args.checkpoint_path}/gen', m, opt)

    ### Vocoder block ###
    MelGan vocoder, vocoder/modules
vocoder = MelVocoder(path="vocoder/melgan-neurips/scripts/logs/NUS")

while True:

    song_padded, read_padded, pitch_padded, read_real = \
    song_padded, read_padded, pitch_padded = \
    song_padded.float().cuda(), read_padded.float().cuda(), pitch_padded.long().cuda()

    song_padded = song_padded[..., :song_padded.size(2) // 8 * 8]
    read_padded = read_padded[..., :read_padded.size(2) // 8 * 8]
    pitch_padded = pitch_padded[..., :read_padded.size(2) // 8 * 8]

    factor = 16