if args.load_checkpoint==True: m, opt, iteration = load_checkpoint(f'checkpoint/{args.checkpoint_path}/gen', m, opt) dis_high, opt_dis, iteration = load_checkpoint(f'checkpoint/{args.checkpoint_path}/dis', dis_high, opt_dis) ''' ########################################################### In general, we preprocess data to npy, and put them in specific folder. Dataloader load npy file. But in this example, I show that how to transfrom audio into stft, melspectrogram by torch.nn.module (MelSpectrogram). ########################################################### ''' #melblock = MelSpectrogram(hp).cuda() melblock = MelVocoder(path = "vocoder/melgan-neurips/scripts/logs/NUS") vocoder_speech = torch.hub.load('descriptinc/melgan-neurips', 'load_melgan') import random while True: speech = next(inf_iterator_tr_speech).cuda() singing = next(inf_iterator_tr_sing).cuda() scale = 1 + random.random() speech_2x= F.interpolate(speech, scale_factor=scale, mode='nearest') #mel = (mel-mean)/std singing = singing[:,:,:min(speech_2x.size(2), singing.size(2))] speech_2x = speech_2x[:,:,:min(speech_2x.size(2), singing.size(2))] singing = F.pad(singing,(0,(singing.size(2)//8+1)*8 - singing.size(2)), 'reflect') speech_2x = F.pad(speech_2x,(0,(speech_2x.size(2)//8+1)*8 - speech_2x.size(2)), 'reflect')
sys.path.append('../logger') from logger import Logger from logger_utils import prepare_directories_and_logger from plotting_utils import plot_spectrogram_to_numpy sys.path.append('../utils') from optim_step import * from save_and_load import save_checkpoint, load_checkpoint from torch.nn import functional as F sys.path.append('/home/ericwudayi/AiVocal/SkipVQVC/vocoder/melgan-neurips') from mel2wav.interface import MelVocoder #vocoder = torch.hub.load('descriptinc/melgan-neurips', 'load_melgan') vocoder = MelVocoder( path= "/home/ericwudayi/AiVocal/ai_singing/vocoder/melgan-neurips/scripts/logs/NUS" ) #vocoder = torch.hub.load('descriptinc/melgan-neurips', 'load_melgan') def train_(args, model, opt, latent_loss_weight, criterion, loader, epochs, inf_iterator_test, logger, iteration): for epoch in range(epochs): mse_sum = 0 mse_n = 0 for i, (audio, pitch) in enumerate(loader): audio = audio.cuda().float()
dis_high = ymp.construct_model(f"model_config/{hp.config_dis}/1.yaml") dis_high = dis_high.cuda() opt_dis = optim.Adam(dis_high.parameters(), lr=1e-4) iteration = 0 if args.load_checkpoint == True: m, opt, iteration = load_checkpoint( f'checkpoint/{args.checkpoint_path}/gen', m, opt) ########################################################## ''' ### Vocoder block ### MelGan vocoder, vocoder/modules ''' vocoder = MelVocoder(path="vocoder/melgan-neurips/scripts/logs/NUS") ####################################################################### while True: song_padded, read_padded, pitch_padded, read_real = \ next(inf_iterator_tr_speech) song_padded, read_padded, pitch_padded = \ song_padded.float().cuda(), read_padded.float().cuda(), pitch_padded.long().cuda() song_padded = song_padded[..., :song_padded.size(2) // 8 * 8] read_padded = read_padded[..., :read_padded.size(2) // 8 * 8] pitch_padded = pitch_padded[..., :read_padded.size(2) // 8 * 8] factor = 16