def load_model(hparams, dev='gpu'): if dev == 'gpu': model = Tacotron2(hparams).cuda() elif dev == 'cpu': model = Tacotron2(hparams) if hparams.fp16_run: model.decoder.attention_layer.score_mask_value = finfo('float16').min if hparams.distributed_run: model = apply_gradient_allreduce(model) return model
def load_model(hparams): if torch.cuda.is_available(): model = Tacotron2(hparams).cuda() else: model = Tacotron2(hparams).to("cpu") if hparams.fp16_run: model.decoder.attention_layer.score_mask_value = finfo('float16').min if hparams.distributed_run: model = apply_gradient_allreduce(model) return model
def load(self, t_checkpoint_path, v_checkpoint_path, t_config_path=None, v_config_path=None, model_name='tacotron'): if t_checkpoint_path.endswith('.pt'): self.model_name = 'nvidia' print('Constructing model: %s' % self.model_name) # set-up params hparams = create_hparams() # load model from checkpoint self.model = Tacotron2(hparams) self.model.load_state_dict(torch.load(t_checkpoint_path, map_location='cpu')['state_dict']) _ = self.model.eval() else: # elif t_checkpoint_path.endswith('.pth.tar'): self.model_name = 'coqui' print('Constructing model: %s' % self.model_name) # load tts config and audio processor self.tts_config = load_config(t_config_path) self.tts_model = setup_tts_model(config=self.tts_config) self.tts_model.load_checkpoint(self.tts_config, t_checkpoint_path, eval=True) self.ap = AudioProcessor(verbose=False, **self.tts_config.audio) # load vocoder config and audio processor vocoder_config = load_config(v_config_path) self.vocoder_ap = AudioProcessor(verbose=False, **vocoder_config.audio) # Load neurips MelGAN for mel2audio synthesis self.vocoder = torch.hub.load('descriptinc/melgan-neurips', 'load_melgan') melgan_ckpt = torch.load(v_checkpoint_path, map_location='cpu') self.vocoder.mel2wav.load_state_dict(melgan_ckpt)
def T2LoadModel(self, T2Model, Cleaners): if os.path.isfile(T2Model): ExperimentDataParameters = [ 500, 500, True, False, False, "", "", Cleaners ] EncoderDecoderParameters = [ 5, 3, 512, 1024, 256, 1000, 0.5, 0.1, 0.1 ] AttentionLocationLayerParameters = [1024, 128, 32, 31] MelProcessingNetworkNarameters = [512, 5, 5] OptimizationHyperparameters = [ False, True, 1 * pow(10, -3), 1 * pow(10, -6), 8 ] hparams = create_hparams( ExperimentDataParameters, self.AudioParameters[self.currentAudioParameters], EncoderDecoderParameters, AttentionLocationLayerParameters, MelProcessingNetworkNarameters, OptimizationHyperparameters) self.Cleaners = Cleaners self.model = Tacotron2(hparams).cuda() self.model.load_state_dict(torch.load(T2Model)['state_dict']) self._ = self.model.cuda().eval().half()
def load_model(hparams): model = Tacotron2(hparams).cuda() if hparams.fp16_run: model = batchnorm_to_float(model.half()) model.decoder.attention_layer.score_mask_value = float( finfo('float16').min) return model
def load_model(hparams): if hparams.reverse: model = TacotronAsr(hparams).cuda() else: model = Tacotron2(hparams).cuda() if hparams.distributed_run: model = apply_gradient_allreduce(model) return model
def initiate_model(hparams): model = Tacotron2(hparams).cuda() if hparams.fp16_run: model.decoder.attention_layer.score_mask_value = finfo('float16').min if hparams.distributed_run: model = apply_gradient_allreduce(model) return model
def load_model(hparams, use_cuda=True): device = torch.device('cuda' if use_cuda else 'cpu') model = Tacotron2(hparams).to(device) if hparams.fp16_run: model.decoder.attention_layer.score_mask_value = finfo('float16').min if hparams.distributed_run: model = apply_gradient_allreduce(model) return model
def load_model(hparams): model = Tacotron2(hparams).cuda() model.decoder.residual_encoder.after_optim_step() if hparams.fp16_run: model.decoder.attention_layer.score_mask_value = finfo('float16').min if hparams.distributed_run: model = apply_gradient_allreduce(model) return model
def load_model(self): model = Tacotron2(self.hparams) if self.hparams.fp16_run: model.decoder.attention_layer.score_mask_value = finfo( 'float16').min if self.hparams.distributed_run: model = apply_gradient_allreduce(model) return model
def load_model(hparams, symbols): print(len(symbols)) model = Tacotron2(hparams, len(symbols)).cuda() if hparams.fp16_run: model.decoder.attention_layer.score_mask_value = finfo('float16').min if hparams.distributed_run: model = apply_gradient_allreduce(model) return model
def load_model(hparams): model = Tacotron2(hparams).cuda() model = batchnorm_to_float(model.half()) if hparams.fp16_run else model if hparams.distributed_run: model = DistributedDataParallel(model) elif torch.cuda.device_count() > 1: model = DataParallel(model) return model
def init_model(tacotron_cp, speaker_encode_cp): hparams = create_hparams( 'tacotron', f'speaker_encoder={speaker_encode_cp}' if speaker_encode_cp is not None else None) model = Tacotron2(hparams) model.load_state_dict(torch.load(tacotron_cp)['state_dict']) model.eval() return model
def __init__(self, hparams, tacotron2_path, waveglow_path): self.Tacotron2Model = Tacotron2(hparams).cpu() self.WaveglowModel = None self.Denoiser = None self.WaveglowSigma = 0.800 self.UseDenoiser = False self.DenoiserStrength = 0.01 self.load_models(tacotron2_path, waveglow_path)
def load_model(hparams): model = Tacotron2(hparams).cuda() if hparams.fp16_run: model = batchnorm_to_float(model.half()) model.decoder.attention_layer.score_mask_value = float(finfo('float16').min) if hparams.distributed_run: model = apply_gradient_allreduce(model) return model
def load_model(hparams): model = Tacotron2(hparams).cuda() if hparams.fp16_run: model = batchnorm_to_float(model.half()) model.decoder.attention_layer.score_mask_value = float( finfo('float16').min) if hparams.distributed_run: model = DistributedDataParallel(model) elif torch.cuda.device_count() > 1: model = DataParallel(model) return model
def log_audio(model: Tacotron2, iteration: int, logger: Tacotron2Logger, waveglow): text = "Does it work yet?" sequence = array(text_to_sequence(text, ['english_cleaners']))[None, :] sequence = torch.autograd.Variable( torch.from_numpy(sequence)).cuda().long() mel_outputs, mel_outputs_postnet, _, alignments = model.inference(sequence) with torch.no_grad(): audio = waveglow.infer(mel_outputs_postnet, sigma=0.666) logger.add_audio(text, audio[0].data.cpu(), global_step=iteration, sample_rate=hparams.sampling_rate)
def log_audio(model: Tacotron2, iteration: int, logger: Tacotron2Logger, waveglow, inference_batch, text_encoded, mel): # load source data to obtain rhythm using tacotron 2 as a forced aligner x, y = model.parse_batch(inference_batch) with torch.no_grad(): # get rhythm (alignment map) using tacotron 2 mel_outputs, mel_outputs_postnet, gate_outputs, rhythm = model.forward( x) rhythm = rhythm.permute(1, 0, 2) for emotion in range(4): emotion_id = torch.LongTensor([emotion]).cuda() with torch.no_grad(): mel_outputs, mel_outputs_postnet, gate_outputs, _ = model.inference_noattention( (text_encoded, mel, emotion_id, rhythm)) audio = waveglow.infer(mel_outputs_postnet, sigma=0.8) logger.add_audio(f"Emotion {str(emotion)}", audio[0].data.cpu(), global_step=iteration, sample_rate=hparams.sampling_rate)
def load_inference_model(hparams, name, ckpt_step, cuda=True): ckpt_path = './output-{}/checkpoint_{}'.format(name, ckpt_step) ckpt = torch.load(ckpt_path)['state_dict'] ckpt_ = dict() for k in ckpt.keys(): if k.startswith('module'): ckpt_['.'.join(k.split('.')[1:])] = ckpt[k] else: ckpt_[k] = ckpt[k] model = Tacotron2( hparams) if hparams.model == 'tacotron2' else Transformer(hparams) model.load_state_dict(ckpt_) model.eval() if cuda: model = model.cuda() model.eval() return model
def load_tacotron(): global hparams hparams = hparams if hparams.is_cuda == True: device = "cuda" else: device = "cpu" # load tacotron model tacotron = Tacotron2(hparams) tacotron.load_state_dict( torch.load(hparams.tacotron_path, map_location=torch.device(device))['state_dict']) if hparams.is_cuda: tacotron.cuda().eval().half() else: tacotron.eval() return hparams, tacotron
def load(self, t_checkpoint_path, v_checkpoint_path, model_name='tacotron'): print('Constructing model: %s' % model_name) # set-up params hparams = create_hparams() # load model from checkpoint self.model = Tacotron2(hparams) self.model.load_state_dict( torch.load(t_checkpoint_path, map_location='cpu')['state_dict']) _ = self.model.eval() # Load neurips MelGAN for mel2audio synthesis self.vocoder = torch.hub.load('descriptinc/melgan-neurips', 'load_melgan') melgan_ckpt = torch.load(v_checkpoint_path, map_location='cpu') self.vocoder.mel2wav.load_state_dict(melgan_ckpt)
def load_model(hparams, device="cuda"): if hparams.model_type == "tacotron2": model = Tacotron2(hparams).to(device) model.requires_durations = False elif hparams.model_type == "forwardtacotron": model = ForwardTacotron(hparams, num_chars=hparams.n_symbols, n_mels=hparams.n_mel_channels).to(device) model.requires_durations = True elif hparams.model_type == "durationtacotron2": model = DurationTacotron2().to(device) model.requires_durations = True if hparams.fp16_run: model.decoder.attention_layer.score_mask_value = finfo('float16').min if hparams.distributed_run: model = apply_gradient_allreduce(model) return model
def load(self, tacotron_model, waveglow_model): # setting self.project_name = 'tacotron2' sys.path.append(self.project_name) sys.path.append(join(self.project_name, 'waveglow/')) # initialize Tacotron2 self.hparams = create_hparams() self.hparams.sampling_rate = 22050 self.hparams.max_decoder_steps = 1000 self.hparams.fp16_run = True self.tacotron = Tacotron2(self.hparams) self.tacotron.load_state_dict(torch.load(tacotron_model)['state_dict']) _ = self.tacotron.cuda().eval() self.waveglow = torch.load(waveglow_model)['model'] self.waveglow = self.waveglow.remove_weightnorm(self.waveglow) _ = self.waveglow.cuda().eval() for k in self.waveglow.convinv: k.float()
from model import Tacotron2 from loss_function import Tacotron2Loss hparams = create_hparams() text_loader = TextMelLoader(hparams.training_lst, hparams) collate_fn = TextMelCollate(hparams.n_frames_per_step) text, mel = text_loader[0] # mel.shape (80 * frame_num) plt.matshow(mel, origin='lower') plt.colorbar() plt.savefig('mel_demo.png') train_loader = torch.utils.data.DataLoader(text_loader, num_workers=1, shuffle=False, batch_size=3, pin_memory=False, drop_last=True, collate_fn=collate_fn) print(len(train_loader)) tacotron = Tacotron2(hparams) criterion = Tacotron2Loss() for batch in train_loader: text_padded, text_alignment_padded, input_lengths, mel_padded, alignments, alignments_weights_padded,\ output_lengths = batch max_len = torch.max(input_lengths.data).item() x = (text_padded, input_lengths, mel_padded, max_len, output_lengths) y = (mel_padded, alignments, alignments_weights_padded, text_alignment_padded) y_pred = tacotron(x) print(criterion(y_pred, y)) break
def main(args): # Get device device = torch.device('cuda'if torch.cuda.is_available()else 'cpu') # Define model model = nn.DataParallel(Tacotron2(hp)).to(device) print("Model Has Been Defined") num_param = utils.get_param_num(model) print('Number of TTS Parameters:', num_param) # Get dataset dataset = DNNDataset() # Optimizer and loss optimizer = torch.optim.Adam(model.parameters(), betas=(0.9, 0.98), eps=1e-9) scheduled_optim = ScheduledOptim(optimizer, hp.encoder_embedding_dim, hp.n_warm_up_step, args.restore_step) criterion = DNNLoss().to(device) print("Defined Optimizer and Loss Function.") # Load checkpoint if exists try: checkpoint = torch.load(os.path.join( hp.checkpoint_path, 'checkpoint_%d.pth.tar' % args.restore_step)) model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) print("\n---Model Restored at Step %d---\n" % args.restore_step) except: print("\n---Start New Training---\n") if not os.path.exists(hp.checkpoint_path): os.mkdir(hp.checkpoint_path) # Init logger if not os.path.exists(hp.logger_path): os.mkdir(hp.logger_path) # Define Some Information Time = np.array([]) Start = time.clock() # Training model = model.train() for epoch in range(hp.epochs): # Get Training Loader training_loader = DataLoader(dataset, batch_size=hp.batch_size**2, shuffle=True, collate_fn=collate_fn, drop_last=True, num_workers=0) total_step = hp.epochs * len(training_loader) * hp.batch_size for i, batchs in enumerate(training_loader): for j, db in enumerate(batchs): start_time = time.clock() current_step = i * hp.batch_size + j + args.restore_step + \ epoch * len(training_loader) * hp.batch_size + 1 # Init scheduled_optim.zero_grad() # Get Data character = torch.from_numpy(db["text"]).long().to(device) mel_target = torch.from_numpy(db["mel_target"]).transpose(1, 2) mel_target = mel_target.float().to(device) duration = torch.from_numpy(db["D"]).int().to(device) src_pos = torch.from_numpy(db["src_pos"]).long().to(device) mel_pos = torch.from_numpy(db["mel_pos"]).long().to(device) max_mel_len = db["mel_max_len"] input_lengths = torch.max(src_pos, -1)[0] output_lengths = torch.max(mel_pos, -1)[0] stop_token = torch.from_numpy(db["stop_token"]) stop_token = stop_token.float().to(device) # Forward batch = character, input_lengths, mel_target, stop_token, output_lengths x, y = model.module.parse_batch(batch) y_ = model(x) # Cal Loss mel_loss, mel_postnet_loss, stop_pred_loss = criterion(y_, y) total_loss = mel_loss + mel_postnet_loss + stop_pred_loss # Logger t_l = total_loss.item() m_l = mel_loss.item() m_p_l = mel_postnet_loss.item() s_l = stop_pred_loss with open(os.path.join("logger", "total_loss.txt"), "a") as f_total_loss: f_total_loss.write(str(t_l)+"\n") with open(os.path.join("logger", "mel_loss.txt"), "a") as f_mel_loss: f_mel_loss.write(str(m_l)+"\n") with open(os.path.join("logger", "mel_postnet_loss.txt"), "a") as f_mel_postnet_loss: f_mel_postnet_loss.write(str(m_p_l)+"\n") with open(os.path.join("logger", "duration_loss.txt"), "a") as f_d_loss: f_d_loss.write(str(s_l)+"\n") # Backward total_loss.backward() # Clipping gradients to avoid gradient explosion nn.utils.clip_grad_norm_( model.parameters(), hp.grad_clip_thresh) # Update weights if args.frozen_learning_rate: scheduled_optim.step_and_update_lr_frozen( args.learning_rate_frozen) else: scheduled_optim.step_and_update_lr() # Print if current_step % hp.log_step == 0: Now = time.clock() str1 = "Epoch [{}/{}], Step [{}/{}]:".format( epoch+1, hp.epochs, current_step, total_step) str2 = "Mel Loss: {:.4f}, Mel PostNet Loss: {:.4f}, ".format(m_l, m_p_l) + \ "Stop Prediction Loss: {:.4f};".format(s_l) str3 = "Current Learning Rate is {:.6f}.".format( scheduled_optim.get_learning_rate()) str4 = "Time Used: {:.3f}s, Estimated Time Remaining: {:.3f}s.".format( (Now-Start), (total_step-current_step)*np.mean(Time)) print("\n" + str1) print(str2) print(str3) print(str4) with open(os.path.join("logger", "logger.txt"), "a") as f_logger: f_logger.write(str1 + "\n") f_logger.write(str2 + "\n") f_logger.write(str3 + "\n") f_logger.write(str4 + "\n") f_logger.write("\n") if current_step % hp.save_step == 0: torch.save({'model': model.state_dict(), 'optimizer': optimizer.state_dict( )}, os.path.join(hp.checkpoint_path, 'checkpoint_%d.pth.tar' % current_step)) print("save model at step %d ..." % current_step) end_time = time.clock() Time = np.append(Time, end_time - start_time) if len(Time) == hp.clear_Time: temp_value = np.mean(Time) Time = np.delete( Time, [i for i in range(len(Time))], axis=None) Time = np.append(Time, temp_value)
def main(args): # Get device device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Define model print("Use Tacotron2") model = nn.DataParallel(Tacotron2(hp)).to(device) print("Model Has Been Defined") num_param = utils.get_param_num(model) print('Number of TTS Parameters:', num_param) # Get buffer print("Load data to buffer") buffer = get_data_to_buffer() # Optimizer and loss optimizer = torch.optim.Adam(model.parameters(), betas=(0.9, 0.98), eps=1e-9) scheduled_optim = ScheduledOptim(optimizer, hp.decoder_rnn_dim, hp.n_warm_up_step, args.restore_step) tts_loss = DNNLoss().to(device) print("Defined Optimizer and Loss Function.") # Load checkpoint if exists try: checkpoint = torch.load( os.path.join(hp.checkpoint_path, 'checkpoint_%d.pth.tar' % args.restore_step)) model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) print("\n---Model Restored at Step %d---\n" % args.restore_step) except: print("\n---Start New Training---\n") if not os.path.exists(hp.checkpoint_path): os.mkdir(hp.checkpoint_path) # Init logger if not os.path.exists(hp.logger_path): os.mkdir(hp.logger_path) # Get dataset dataset = BufferDataset(buffer) # Get Training Loader training_loader = DataLoader(dataset, batch_size=hp.batch_expand_size * hp.batch_size, shuffle=True, collate_fn=collate_fn_tensor, drop_last=True, num_workers=0) total_step = hp.epochs * len(training_loader) * hp.batch_expand_size # Define Some Information Time = np.array([]) Start = time.perf_counter() # Training model = model.train() for epoch in range(hp.epochs): for i, batchs in enumerate(training_loader): # real batch start here for j, db in enumerate(batchs): start_time = time.perf_counter() current_step = i * hp.batch_expand_size + j + args.restore_step + \ epoch * len(training_loader) * hp.batch_expand_size + 1 # Init scheduled_optim.zero_grad() # Get Data character = db["text"].long().to(device) mel_target = db["mel_target"].float().to(device) mel_pos = db["mel_pos"].long().to(device) src_pos = db["src_pos"].long().to(device) max_mel_len = db["mel_max_len"] mel_target = mel_target.contiguous().transpose(1, 2) src_length = torch.max(src_pos, -1)[0] mel_length = torch.max(mel_pos, -1)[0] gate_target = mel_pos.eq(0).float() gate_target = gate_target[:, 1:] gate_target = F.pad(gate_target, (0, 1, 0, 0), value=1.) # Forward inputs = character, src_length, mel_target, max_mel_len, mel_length mel_output, mel_output_postnet, gate_output = model(inputs) # Cal Loss mel_loss, mel_postnet_loss, gate_loss \ = tts_loss(mel_output, mel_output_postnet, gate_output, mel_target, gate_target) total_loss = mel_loss + mel_postnet_loss + gate_loss # Logger t_l = total_loss.item() m_l = mel_loss.item() m_p_l = mel_postnet_loss.item() g_l = gate_loss.item() with open(os.path.join("logger", "total_loss.txt"), "a") as f_total_loss: f_total_loss.write(str(t_l) + "\n") with open(os.path.join("logger", "mel_loss.txt"), "a") as f_mel_loss: f_mel_loss.write(str(m_l) + "\n") with open(os.path.join("logger", "mel_postnet_loss.txt"), "a") as f_mel_postnet_loss: f_mel_postnet_loss.write(str(m_p_l) + "\n") with open(os.path.join("logger", "gate_loss.txt"), "a") as f_g_loss: f_g_loss.write(str(g_l) + "\n") # Backward total_loss.backward() # Clipping gradients to avoid gradient explosion nn.utils.clip_grad_norm_(model.parameters(), hp.grad_clip_thresh) # Update weights if args.frozen_learning_rate: scheduled_optim.step_and_update_lr_frozen( args.learning_rate_frozen) else: scheduled_optim.step_and_update_lr() # Print if current_step % hp.log_step == 0: Now = time.perf_counter() str1 = "Epoch [{}/{}], Step [{}/{}]:"\ .format(epoch + 1, hp.epochs, current_step, total_step) str2 = "Mel Loss: {:.4f}, Mel PostNet Loss: {:.4f}, Gate Loss: {:.4f};".format( m_l, m_p_l, g_l) str3 = "Current Learning Rate is {:.6f}."\ .format(scheduled_optim.get_learning_rate()) str4 = "Time Used: {:.3f}s, Estimated Time Remaining: {:.3f}s."\ .format((Now-Start), (total_step-current_step) * np.mean(Time, dtype=np.float32)) print("\n" + str1) print(str2) print(str3) print(str4) with open(os.path.join("logger", "logger.txt"), "a") as f_logger: f_logger.write(str1 + "\n") f_logger.write(str2 + "\n") f_logger.write(str3 + "\n") f_logger.write(str4 + "\n") f_logger.write("\n") if current_step % hp.save_step == 0: torch.save( { 'model': model.state_dict(), 'optimizer': optimizer.state_dict() }, os.path.join(hp.checkpoint_path, 'checkpoint_%d.pth.tar' % current_step)) print("save model at step %d ..." % current_step) end_time = time.perf_counter() Time = np.append(Time, end_time - start_time) if len(Time) == hp.clear_Time: temp_value = np.mean(Time) Time = np.delete(Time, [i for i in range(len(Time))], axis=None) Time = np.append(Time, temp_value)
BATCH_SIZE = 8 torch.backends.cudnn.deterministic = True random.seed(42) np.random.seed(42) torch.manual_seed(42) if torch.cuda.is_available(): torch.cuda.manual_seed_all(42) from src.dataset import load_dataset dataloader_train, dataloader_val = load_dataset(featurizer, BATCH_SIZE) from model import Tacotron2 generator = Tacotron2(n_mels=80, n_frames=1).to(device) from math import exp, log optimizer = optim.Adam(generator.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-6, weight_decay=1e-6) lambda1 = lambda step: exp(log(0.01)*min(15000, step)/15000) scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=[lambda1]) import os os.environ['CUDA_LAUNCH_BLOCKING'] = "1" import wandb wandb.init( project="DLA_HW4", config={ "lstm_dropout": 0.1, "n_mels": 80,
def train(num_gpus, rank, group_name, output_directory, epochs, learning_rate, sigma, batch_size, seed, checkpoint_path, hparams): torch.manual_seed(seed) torch.cuda.manual_seed(seed) # =====START: ADDED FOR DISTRIBUTED====== if num_gpus > 1: init_distributed(rank, num_gpus, group_name, **dist_config) # =====END: ADDED FOR DISTRIBUTED====== criterion = WaveGlowLoss(sigma) if num_gpus >= 1: model = WaveGlow(**waveglow_config, hparams=hparams).cuda() else: model = WaveGlow(**waveglow_config, hparams=hparams) # =====START: ADDED FOR DISTRIBUTED====== if num_gpus > 1: model = apply_gradient_allreduce(model) # =====END: ADDED FOR DISTRIBUTED====== optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # Load checkpoint if one exists iteration, eval_iteration = 0, 0 if checkpoint_path != "": model, optimizer, iteration, eval_iteration = load_checkpoint(checkpoint_path, model, optimizer) iteration += 1 # next iteration is iteration + 1 eval_iteration += 1 # trainset = Mel2Samp(**data_config) trainset = TextMelLoader( audiopaths_and_text='./filelists/ljs_audio_text_train_filelist.txt', hparams=hparams) testset = TextMelLoader( audiopaths_and_text='./filelists/ljs_audio_text_test_filelist.txt', hparams=hparams) collate_fn = TextMelCollate(hparams, fixed_length=True) # =====START: ADDED FOR DISTRIBUTED====== train_sampler = DistributedSampler(trainset) if num_gpus > 1 else None # =====END: ADDED FOR DISTRIBUTED====== train_loader = DataLoader(trainset, num_workers=1, collate_fn=collate_fn, shuffle=False, sampler=train_sampler, batch_size=batch_size, pin_memory=False, drop_last=True) test_loader = DataLoader(testset, num_workers=1, collate_fn=collate_fn, shuffle=False, sampler=train_sampler, batch_size=batch_size, pin_memory=False, drop_last=True) log_path = os.path.join(output_directory, 'log-event') os.makedirs(log_path, exist_ok=True) logger = WaveGlowLogger(log_path) # Get shared output_directory ready if rank == 0: if not os.path.isdir(output_directory): os.makedirs(output_directory) os.chmod(output_directory, 0o775) print("output directory", output_directory) model.train() tacotron2 = Tacotron2(hparams) batch_parser = tacotron2.parse_batch # we use tacotron-2's pipeline epoch_offset = max(0, int(iteration / len(train_loader))) # ================ MAIN TRAINNIG LOOP! =================== for epoch in range(epoch_offset, epochs): print("Epoch: {}".format(epoch)) model.train() for i, batch in enumerate(train_loader): model.zero_grad() x, y = batch_parser(batch) text_padded, input_lengths, mel_padded, max_len, output_lengths = x # print(text_padded.size(), mel_padded.size()) mel_padded, gate_padded = y outputs = model((text_padded, mel_padded)) loss = criterion(outputs) logger.log_loss('train/loss', loss, iteration) if num_gpus > 1: reduced_loss = reduce_tensor(loss.data, num_gpus).item() else: reduced_loss = loss.item() loss.backward() optimizer.step() print("{}:\t{:.9f}".format(iteration, reduced_loss)) iteration += 1 # model.eval() # for i, batch in enumerate(test_loader): # x, y = batch_parser(batch) # text_padded, input_lengths, mel_padded, max_len, output_lengths = x # mel_padded, gate_padded = y # outputs = model((text_padded, mel_padded)) # loss = criterion(outputs) # logger.log_loss('eval/loss', loss, iteration) # eval_iteration += 1 if rank == 0: checkpoint_path = "{}/waveglow_epoch_{}".format(output_directory, epoch) save_checkpoint(model, optimizer, learning_rate, iteration, eval_iteration, checkpoint_path, hparams=hparams)
def load_model(hparams): model = Tacotron2(hparams).cuda() return model
help='pre-trained model') parser.add_argument('--warm_start', action='store_true', help='load model weights only, ignore specified layers') parser.add_argument('--hparams', type=str, required=False, help='comma separated name=value pairs') parser.add_argument('--freeze_encoder', type=str, default=False, help='freeze encoder for transfer learning') args = parser.parse_args() hps = create_hparams(args.hparams) model = Tacotron2(hps).to(device) # If warm-start: need to specify a valid path for pre-trained model if path.exists(args.pre_trained) and args.warm_start: model = warm_start_model(args.pre_trained, model, hps.ignore_layers) # Case Freeze Encoder if args.freeze_encoder: model.freeze_encoder() # Load training csv (data_train, nms_data) = load_csv(hps.nm_csv_train, hps) # Manage types of sampling for batch selection: (BatchSampler or OrderedSampler) sampler_train = OrderedSampler(model, data_train,