def prepare_directories_and_logger(output_directory, log_directory, rank): if rank == 0: if not os.path.isdir(output_directory): os.makedirs(output_directory) os.chmod(output_directory, 0o775) logger = Tacotron2Logger(log_directory) else: logger = None return logger
def train(args): # build model model = Tacotron2() mode(model, True) optimizer = torch.optim.Adam(model.parameters(), lr = hps.lr, betas = hps.betas, eps = hps.eps, weight_decay = hps.weight_decay) criterion = Tacotron2Loss() # load checkpoint iteration = 1 if args.ckpt_pth != '': model, optimizer, iteration = load_checkpoint(args.ckpt_pth, model, optimizer) iteration += 1 # next iteration is iteration+1 # get scheduler if hps.sch: lr_lambda = lambda step: hps.sch_step**0.5*min((step+1)*hps.sch_step**-1.5, (step+1)**-0.5) if args.ckpt_pth != '': scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda, last_epoch = iteration) else: scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda) # make dataset train_loader = prepare_dataloaders(args.data_dir) # get logger ready if args.log_dir != '': if not os.path.isdir(args.log_dir): os.makedirs(args.log_dir) os.chmod(args.log_dir, 0o775) logger = Tacotron2Logger(args.log_dir) # get ckpt_dir ready if args.ckpt_dir != '' and not os.path.isdir(args.ckpt_dir): os.makedirs(args.ckpt_dir) os.chmod(args.ckpt_dir, 0o775) model.train() # ================ MAIN TRAINNIG LOOP! =================== while iteration <= hps.max_iter: for batch in train_loader: if iteration > hps.max_iter: break start = time.perf_counter() x, y = model.parse_batch(batch) y_pred = model(x) # loss loss, item = criterion(y_pred, y, iteration) # zero grad model.zero_grad() # backward, grad_norm, and update loss.backward() grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), hps.grad_clip_thresh) optimizer.step() if hps.sch: scheduler.step() # info dur = time.perf_counter()-start print('Iter: {} Loss: {:.2e} Grad Norm: {:.2e} {:.1f}s/it'.format( iteration, item, grad_norm, dur)) # log if args.log_dir != '' and (iteration % hps.iters_per_log == 0): learning_rate = optimizer.param_groups[0]['lr'] logger.log_training(item, grad_norm, learning_rate, iteration) # sample if args.log_dir != '' and (iteration % hps.iters_per_sample == 0): model.eval() output = infer(hps.eg_text, model) model.train() logger.sample_training(output, iteration) # save ckpt if args.ckpt_dir != '' and (iteration % hps.iters_per_ckpt == 0): ckpt_pth = os.path.join(args.ckpt_dir, 'ckpt_{}'.format(iteration)) save_checkpoint(model, optimizer, iteration, ckpt_pth) iteration += 1 if args.log_dir != '': logger.close()
from synthesizer.utils import ValueWindow, data_parallel_workaround from synthesizer.utils.plot import plot_spectrogram from synthesizer.utils.symbols import symbols from synthesizer.utils.text import sequence_to_text from vocoder.display import * from datetime import datetime import numpy as np from pathlib import Path import sys import time from utils.logger import Tacotron2Logger from preprocess_dataset.gl import write_wav, mel2wav import os good_logger = Tacotron2Logger('./log_dir') def np_now(x: torch.Tensor): return x.detach().cpu().numpy() def time_string(): return datetime.now().strftime("%Y-%m-%d %H:%M") def train(run_id: str, metadata_fpath: str, models_dir: str, save_every: int, backup_every: int, force_restart:bool, hparams): models_dir = Path(models_dir) models_dir.mkdir(exist_ok=True) model_dir = models_dir.joinpath(run_id) plot_dir = model_dir.joinpath("plots")
def prepare_directories_and_logger(log_directory): if not os.path.isdir(log_directory): os.mkdir(log_directory) logger = Tacotron2Logger(os.path.join(log_directory)) return logger
def train(output_directory, log_directory, checkpoint_path, warm_start, use_amp, name, hparams): """Training and validation logging results to tensorboard and stdout Params ------ output_directory (string): directory to save checkpoints log_directory (string) directory to save tensorboard logs checkpoint_path(string): checkpoint path n_gpus (int): number of gpus hparams (object): comma separated list of "name=value" pairs. """ torch.manual_seed(hparams.seed) torch.cuda.manual_seed(hparams.seed) if use_amp: print("Automatic Mixed Precision Training") scaler = amp.GradScaler() n_symbols = len(symbols) model = PPSpeech(hparams, n_symbols).cuda() learning_rate = hparams.learning_rate optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=hparams.weight_decay) criterion = Tacotron2Loss() logger = Tacotron2Logger(log_directory) train_loader, valset, collate_fn = prepare_dataloaders(hparams) # Load checkpoint if one exists iteration = 0 epoch_offset = 0 if checkpoint_path is not None: if warm_start: model = warm_start_model(checkpoint_path, model, hparams.ignore_layers) else: model, optimizer, _learning_rate, iteration = load_checkpoint( checkpoint_path, model, optimizer) if hparams.use_saved_learning_rate: learning_rate = _learning_rate iteration += 1 # next iteration is iteration + 1 epoch_offset = max(0, int(iteration / len(train_loader))) model.train() is_overflow = False # ================ MAIN TRAINNIG LOOP! =================== for epoch in range(epoch_offset, hparams.epochs): print("Epoch: {}".format(epoch)) pbar = tqdm.tqdm(train_loader, desc='Loading train data') for data in pbar: start = time.perf_counter() for param_group in optimizer.param_groups: param_group['lr'] = learning_rate model.zero_grad() x, y = model.parse_batch(data) if use_amp: with amp.autocast(): y_pred = model(x) loss = criterion(y_pred, y) scaler.scale(loss).backward() scaler.unscale_(optimizer) grad_norm = torch.nn.utils.clip_grad_norm_( model.parameters(), hparams.grad_clip_thresh) scaler.step(optimizer) scaler.update() else: y_pred = model(x) loss = criterion(y_pred, y) loss.backward() grad_norm = torch.nn.utils.clip_grad_norm_( model.parameters(), hparams.grad_clip_thresh) optimizer.step() reduced_loss = loss.item() if not is_overflow and (iteration % hparams.summary_interval == 0): duration = time.perf_counter() - start pbar.set_description( " Loss %.04f | Grad Norm %.04f | step %d" % (reduced_loss, grad_norm, iteration)) logger.log_training(reduced_loss, grad_norm, learning_rate, duration, iteration) if not is_overflow and (iteration % hparams.iters_per_checkpoint == 0): validate(model, criterion, valset, iteration, hparams.batch_size, collate_fn, logger, vocoder) checkpoint_path = os.path.join( output_directory, "checkpoint_{}_{}.pyt".format(name, iteration)) save_checkpoint(model, optimizer, learning_rate, iteration, checkpoint_path) iteration += 1