def main(): """ Call train.py as a new process and pass command arguments """ parser = argparse.ArgumentParser() parser.add_argument('--restore_path', type=str, help='Folder path to checkpoints', default='') parser.add_argument( '--config_path', type=str, help='path to config file for training', ) parser.add_argument('--data_path', type=str, help='dataset path.', default='') args = parser.parse_args() CONFIG = load_config(args.config_path) OUT_PATH = create_experiment_folder(CONFIG.output_path, CONFIG.run_name, True) stdout_path = os.path.join(OUT_PATH, "process_stdout/") num_gpus = torch.cuda.device_count() group_id = time.strftime("%Y_%m_%d-%H%M%S") # set arguments for train.py command = ['train.py'] command.append('--restore_path={}'.format(args.restore_path)) command.append('--config_path={}'.format(args.config_path)) command.append('--group_id=group_{}'.format(group_id)) command.append('--data_path={}'.format(args.data_path)) command.append('--output_path={}'.format(OUT_PATH)) command.append('') if not os.path.isdir(stdout_path): os.makedirs(stdout_path) os.chmod(stdout_path, 0o775) # run processes processes = [] for i in range(num_gpus): my_env = os.environ.copy() my_env["PYTHON_EGG_CACHE"] = "/tmp/tmp{}".format(i) command[6] = '--rank={}'.format(i) stdout = None if i == 0 else open( os.path.join(stdout_path, "process_{}.log".format(i)), "w") p = subprocess.Popen(['python3'] + command, stdout=stdout, env=my_env) processes.append(p) print(command) for p in processes: p.wait()
def main(args): """ Call train.py as a new process and pass command arguments """ CONFIG = load_config(args.config_path) if args.output_path == "": OUT_PATH = os.path.join(_, CONFIG.output_path) else: OUT_PATH = args.output_path OUT_PATH = create_experiment_folder(OUT_PATH, CONFIG.model_name) stdout_path = os.path.join(OUT_PATH, "process_stdout/") num_gpus = torch.cuda.device_count() group_id = time.strftime("%Y_%m_%d-%H%M%S") if args.lr_find: command = ['find_lr.py'] command.append('--restore_path={}'.format(args.restore_path)) command.append('--config_path={}'.format(args.config_path)) command.append('--group_id=group_{}'.format(group_id)) command.append('--data_path={}'.format(args.data_path)) command.append('--output_path={}'.format(OUT_PATH)) command.append('--init_lr={}'.format(args.init_lr)) command.append('--end_lr={}'.format(args.end_lr)) command.append('') else: # set arguments for train.py command = ['train.py'] command.append('--restore_path={}'.format(args.restore_path)) command.append('--config_path={}'.format(args.config_path)) command.append('--group_id=group_{}'.format(group_id)) command.append('--data_path={}'.format(args.data_path)) command.append('--output_path={}'.format(OUT_PATH)) command.append('') if not os.path.isdir(stdout_path): os.makedirs(stdout_path) os.chmod(stdout_path, 0o775) # run processes processes = [] for i in range(num_gpus): my_env = os.environ.copy() my_env["PYTHON_EGG_CACHE"] = "/tmp/tmp{}".format(i) command[6] = '--rank={}'.format(i) stdout = None if i == 0 else open( os.path.join(stdout_path, "process_{}.log".format(i)), "w") p = subprocess.Popen(['python3'.format(i)] + command, stdout=stdout, env=my_env) processes.append(p) print(command) for p in processes: p.wait()
def main(args): # setup output paths and read configs c = load_config(args.config_path) _ = os.path.dirname(os.path.realpath(__file__)) OUT_PATH = os.path.join(_, c.output_path) OUT_PATH = create_experiment_folder(OUT_PATH) CHECKPOINT_PATH = os.path.join(OUT_PATH, 'checkpoints') shutil.copyfile(args.config_path, os.path.join(OUT_PATH, 'config.json')) # save config to tmp place to be loaded by subsequent modules. file_name = str(os.getpid()) tmp_path = os.path.join("/tmp/", file_name+'_tts') pickle.dump(c, open(tmp_path, "wb")) # setup tensorboard LOG_DIR = OUT_PATH tb = SummaryWriter(LOG_DIR) # Ctrl+C handler to remove empty experiment folder def signal_handler(signal, frame): print(" !! Pressed Ctrl+C !!") remove_experiment_folder(OUT_PATH) sys.exit(1) signal.signal(signal.SIGINT, signal_handler) # Setup the dataset dataset = LJSpeechDataset(os.path.join(c.data_path, 'metadata.csv'), os.path.join(c.data_path, 'wavs'), c.r, c.sample_rate, c.text_cleaner, c.num_mels, c.min_level_db, c.frame_shift_ms, c.frame_length_ms, c.preemphasis, c.ref_level_db, c.num_freq, c.power ) dataloader = DataLoader(dataset, batch_size=c.batch_size, shuffle=True, collate_fn=dataset.collate_fn, drop_last=True, num_workers=c.num_loader_workers) # setup the model model = Tacotron(c.embedding_size, c.hidden_size, c.num_mels, c.num_freq, c.r) # plot model on tensorboard dummy_input = dataset.get_dummy_data() ## TODO: onnx does not support RNN fully yet # model_proto_path = os.path.join(OUT_PATH, "model.proto") # onnx.export(model, dummy_input, model_proto_path, verbose=True) # tb.add_graph_onnx(model_proto_path) if use_cuda: model = nn.DataParallel(model.cuda()) optimizer = optim.Adam(model.parameters(), lr=c.lr) if args.restore_step: checkpoint = torch.load(os.path.join( args.restore_path, 'checkpoint_%d.pth.tar' % args.restore_step)) model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) print("\n > Model restored from step %d\n" % args.restore_step) start_epoch = checkpoint['step'] // len(dataloader) best_loss = checkpoint['linear_loss'] else: start_epoch = 0 print("\n > Starting a new training") num_params = count_parameters(model) print(" | > Model has {} parameters".format(num_params)) model = model.train() if not os.path.exists(CHECKPOINT_PATH): os.mkdir(CHECKPOINT_PATH) if use_cuda: criterion = nn.L1Loss().cuda() else: criterion = nn.L1Loss() n_priority_freq = int(3000 / (c.sample_rate * 0.5) * c.num_freq) #lr_scheduler = ReduceLROnPlateau(optimizer, factor=c.lr_decay, # patience=c.lr_patience, verbose=True) epoch_time = 0 best_loss = float('inf') for epoch in range(0, c.epochs): print("\n | > Epoch {}/{}".format(epoch, c.epochs)) progbar = Progbar(len(dataset) / c.batch_size) for num_iter, data in enumerate(dataloader): start_time = time.time() text_input = data[0] text_lengths = data[1] linear_input = data[2] mel_input = data[3] current_step = num_iter + args.restore_step + epoch * len(dataloader) + 1 # setup lr current_lr = lr_decay(c.lr, current_step) for params_group in optimizer.param_groups: params_group['lr'] = current_lr optimizer.zero_grad() # Add a single frame of zeros to Mel Specs for better end detection #try: # mel_input = np.concatenate((np.zeros( # [c.batch_size, 1, c.num_mels], dtype=np.float32), # mel_input[:, 1:, :]), axis=1) #except: # raise TypeError("not same dimension") # convert inputs to variables text_input_var = Variable(text_input) mel_spec_var = Variable(mel_input) linear_spec_var = Variable(linear_input, volatile=True) # sort sequence by length. # TODO: might be unnecessary sorted_lengths, indices = torch.sort( text_lengths.view(-1), dim=0, descending=True) sorted_lengths = sorted_lengths.long().numpy() text_input_var = text_input_var[indices] mel_spec_var = mel_spec_var[indices] linear_spec_var = linear_spec_var[indices] if use_cuda: text_input_var = text_input_var.cuda() mel_spec_var = mel_spec_var.cuda() linear_spec_var = linear_spec_var.cuda() mel_output, linear_output, alignments =\ model.forward(text_input_var, mel_spec_var, input_lengths= torch.autograd.Variable(torch.cuda.LongTensor(sorted_lengths))) mel_loss = criterion(mel_output, mel_spec_var) #linear_loss = torch.abs(linear_output - linear_spec_var) #linear_loss = 0.5 * \ #torch.mean(linear_loss) + 0.5 * \ #torch.mean(linear_loss[:, :n_priority_freq, :]) linear_loss = 0.5 * criterion(linear_output, linear_spec_var) \ + 0.5 * criterion(linear_output[:, :, :n_priority_freq], linear_spec_var[: ,: ,:n_priority_freq]) loss = mel_loss + linear_loss # loss = loss.cuda() loss.backward() grad_norm = nn.utils.clip_grad_norm(model.parameters(), 1.) ## TODO: maybe no need optimizer.step() step_time = time.time() - start_time epoch_time += step_time progbar.update(num_iter+1, values=[('total_loss', loss.data[0]), ('linear_loss', linear_loss.data[0]), ('mel_loss', mel_loss.data[0]), ('grad_norm', grad_norm)]) # Plot Learning Stats tb.add_scalar('Loss/TotalLoss', loss.data[0], current_step) tb.add_scalar('Loss/LinearLoss', linear_loss.data[0], current_step) tb.add_scalar('Loss/MelLoss', mel_loss.data[0], current_step) tb.add_scalar('Params/LearningRate', optimizer.param_groups[0]['lr'], current_step) tb.add_scalar('Params/GradNorm', grad_norm, current_step) tb.add_scalar('Time/StepTime', step_time, current_step) align_img = alignments[0].data.cpu().numpy() align_img = plot_alignment(align_img) tb.add_image('Attn/Alignment', align_img, current_step) if current_step % c.save_step == 0: if c.checkpoint: # save model save_checkpoint(model, optimizer, linear_loss.data[0], OUT_PATH, current_step, epoch) # Diagnostic visualizations const_spec = linear_output[0].data.cpu().numpy() gt_spec = linear_spec_var[0].data.cpu().numpy() const_spec = plot_spectrogram(const_spec, dataset.ap) gt_spec = plot_spectrogram(gt_spec, dataset.ap) tb.add_image('Spec/Reconstruction', const_spec, current_step) tb.add_image('Spec/GroundTruth', gt_spec, current_step) align_img = alignments[0].data.cpu().numpy() align_img = plot_alignment(align_img) tb.add_image('Attn/Alignment', align_img, current_step) # Sample audio audio_signal = linear_output[0].data.cpu().numpy() dataset.ap.griffin_lim_iters = 60 audio_signal = dataset.ap.inv_spectrogram(audio_signal.T) try: tb.add_audio('SampleAudio', audio_signal, current_step, sample_rate=c.sample_rate) except: print("\n > Error at audio signal on TB!!") print(audio_signal.max()) print(audio_signal.min()) # average loss after the epoch avg_epoch_loss = np.mean( progbar.sum_values['linear_loss'][0] / max(1, progbar.sum_values['linear_loss'][1])) best_loss = save_best_model(model, optimizer, avg_epoch_loss, best_loss, OUT_PATH, current_step, epoch) #lr_scheduler.step(loss.data[0]) tb.add_scalar('Time/EpochTime', epoch_time, epoch) epoch_time = 0
help='DISTRIBUTED: process group id.') args = parser.parse_args() # setup output paths and read configs c = load_config(args.config_path) _ = os.path.dirname(os.path.realpath(__file__)) if args.data_path != '': c.data_path = args.data_path if args.output_path == '': OUT_PATH = os.path.join(_, c.output_path) else: OUT_PATH = args.output_path if args.group_id == '' and args.output_folder == '': OUT_PATH = create_experiment_folder(OUT_PATH, c.run_name, args.debug) else: OUT_PATH = os.path.join(OUT_PATH, args.output_folder) AUDIO_PATH = os.path.join(OUT_PATH, 'test_audios') if args.rank == 0: os.makedirs(AUDIO_PATH, exist_ok=True) new_fields = {} if args.restore_path: new_fields["restore_path"] = args.restore_path new_fields["github_branch"] = get_git_branch() copy_config_file(args.config_path, os.path.join(OUT_PATH, 'config.json'), new_fields) os.chmod(AUDIO_PATH, 0o775) os.chmod(OUT_PATH, 0o775)
parser.add_argument('--restore_path', type=str, help='Folder path to checkpoints', default=0) parser.add_argument( '--config_path', type=str, help='path to config file for training', ) args = parser.parse_args() # setup output paths and read configs c = load_config(args.config_path) _ = os.path.dirname(os.path.realpath(__file__)) OUT_PATH = os.path.join(_, c.output_path) OUT_PATH = create_experiment_folder(OUT_PATH) CHECKPOINT_PATH = os.path.join(OUT_PATH, 'checkpoints') shutil.copyfile(args.config_path, os.path.join(OUT_PATH, 'config.json')) parser.add_argument('--finetine_path', type=str) # save config to tmp place to be loaded by subsequent modules. file_name = str(os.getpid()) tmp_path = os.path.join("/tmp/", file_name + '_tts') pickle.dump(c, open(tmp_path, "wb")) # setup tensorboard LOG_DIR = OUT_PATH tb = SummaryWriter(LOG_DIR) def signal_handler(signal, frame):
torch.manual_seed(1) use_cuda = torch.cuda.is_available() parser = argparse.ArgumentParser() parser.add_argument('--restore_path', type=str, help='Folder path to checkpoints', default=0) parser.add_argument('--config_path', type=str, help='path to config file for training',) args = parser.parse_args() # setup output paths and read configs c = load_config(args.config_path) _ = os.path.dirname(os.path.realpath(__file__)) OUT_PATH = os.path.join(_, c.output_path) OUT_PATH = create_experiment_folder(OUT_PATH, c.model_name) CHECKPOINT_PATH = os.path.join(OUT_PATH, 'checkpoints') shutil.copyfile(args.config_path, os.path.join(OUT_PATH, 'config.json')) parser.add_argument('--finetine_path', type=str) # save config to tmp place to be loaded by subsequent modules. file_name = str(os.getpid()) tmp_path = os.path.join("/tmp/", file_name+'_tts') pickle.dump(c, open(tmp_path, "wb")) # setup tensorboard LOG_DIR = OUT_PATH tb = SummaryWriter(LOG_DIR) if c.priority_freq: n_priority_freq = int(3000 / (c.sample_rate * 0.5) * c.num_freq)
def main(args): # setup output paths and read configs c = load_config(args.config_path) _ = os.path.dirname(os.path.realpath(__file__)) OUT_PATH = os.path.join(_, c.output_path) OUT_PATH = create_experiment_folder(OUT_PATH) CHECKPOINT_PATH = os.path.join(OUT_PATH, 'checkpoints') shutil.copyfile(args.config_path, os.path.join(OUT_PATH, 'config.json')) # Ctrl+C handler to remove empty experiment folder def signal_handler(signal, frame): print(" !! Pressed Ctrl+C !!") remove_experiment_folder(OUT_PATH) sys.exit(0) signal.signal(signal.SIGINT, signal_handler) dataset = LJSpeechDataset(os.path.join(c.data_path, 'metadata.csv'), os.path.join(c.data_path, 'wavs'), c.r, c.sample_rate, c.text_cleaner) model = Tacotron(c.embedding_size, c.hidden_size, c.num_mels, c.num_freq, c.r) if use_cuda: model = nn.DataParallel(model.cuda()) optimizer = optim.Adam(model.parameters(), lr=c.lr) try: checkpoint = torch.load( os.path.join(CHECKPOINT_PATH, 'checkpoint_%d.pth.tar' % args.restore_step)) model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) print("\n > Model restored from step %d\n" % args.restore_step) except: print("\n > Starting a new training\n") model = model.train() if not os.path.exists(CHECKPOINT_PATH): os.mkdir(CHECKPOINT_PATH) if use_cuda: criterion = nn.L1Loss().cuda() else: criterion = nn.L1Loss() n_priority_freq = int(3000 / (c.sample_rate * 0.5) * c.num_freq) for epoch in range(c.epochs): dataloader = DataLoader(dataset, batch_size=c.batch_size, shuffle=True, collate_fn=dataset.collate_fn, drop_last=True, num_workers=32) progbar = Progbar(len(dataset) / c.batch_size) for i, data in enumerate(dataloader): text_input = data[0] magnitude_input = data[1] mel_input = data[2] current_step = i + args.restore_step + epoch * len(dataloader) + 1 optimizer.zero_grad() try: mel_input = np.concatenate( (np.zeros([c.batch_size, 1, c.num_mels], dtype=np.float32), mel_input[:, 1:, :]), axis=1) except: raise TypeError("not same dimension") if use_cuda: text_input_var = Variable(torch.from_numpy(text_input).type( torch.cuda.LongTensor), requires_grad=False).cuda() mel_input_var = Variable(torch.from_numpy(mel_input).type( torch.cuda.FloatTensor), requires_grad=False).cuda() mel_spec_var = Variable(torch.from_numpy(mel_input).type( torch.cuda.FloatTensor), requires_grad=False).cuda() linear_spec_var = Variable( torch.from_numpy(magnitude_input).type( torch.cuda.FloatTensor), requires_grad=False).cuda() else: text_input_var = Variable(torch.from_numpy(text_input).type( torch.LongTensor), requires_grad=False) mel_input_var = Variable(torch.from_numpy(mel_input).type( torch.FloatTensor), requires_grad=False) mel_spec_var = Variable(torch.from_numpy(mel_input).type( torch.FloatTensor), requires_grad=False) linear_spec_var = Variable( torch.from_numpy(magnitude_input).type(torch.FloatTensor), requires_grad=False) mel_output, linear_output, alignments =\ model.forward(text_input_var, mel_input_var) mel_loss = criterion(mel_output, mel_spec_var) linear_loss = torch.abs(linear_output - linear_spec_var) linear_loss = 0.5 * \ torch.mean(linear_loss) + 0.5 * \ torch.mean(linear_loss[:, :n_priority_freq, :]) loss = mel_loss + linear_loss loss = loss.cuda() start_time = time.time() loss.backward() nn.utils.clip_grad_norm(model.parameters(), 1.) optimizer.step() time_per_step = time.time() - start_time progbar.update(i, values=[('total_loss', loss.data[0]), ('linear_loss', linear_loss.data[0]), ('mel_loss', mel_loss.data[0])]) if current_step % c.save_step == 0: checkpoint_path = 'checkpoint_{}.pth.tar'.format(current_step) checkpoint_path = os.path.join(OUT_PATH, checkpoint_path) save_checkpoint( { 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), 'step': current_step, 'total_loss': loss.data[0], 'linear_loss': linear_loss.data[0], 'mel_loss': mel_loss.data[0], 'date': datetime.date.today().strftime("%B %d, %Y") }, checkpoint_path) print(" > Checkpoint is saved : {}".format(checkpoint_path)) if current_step in c.decay_step: optimizer = adjust_learning_rate(optimizer, current_step)
args.output_path = args.continue_path args.config_path = os.path.join(args.continue_path, 'config.json') list_of_files = glob.glob( args.continue_path + "/*.pth.tar") # * means all if need specific format then *.csv latest_model_file = max(list_of_files, key=os.path.getctime) args.restore_path = latest_model_file print(f" > Training continues for {args.restore_path}") # setup output paths and read configs c = load_config(args.config_path) _ = os.path.dirname(os.path.realpath(__file__)) OUT_PATH = args.continue_path if args.continue_path == '': OUT_PATH = create_experiment_folder(c.output_path, c.run_name, args.debug) AUDIO_PATH = os.path.join(OUT_PATH, 'test_audios') if args.rank == 0: os.makedirs(AUDIO_PATH, exist_ok=True) new_fields = {} if args.restore_path: new_fields["restore_path"] = args.restore_path new_fields["github_branch"] = get_git_branch() copy_config_file(args.config_path, os.path.join(OUT_PATH, 'config.json'), new_fields) os.chmod(AUDIO_PATH, 0o775) os.chmod(OUT_PATH, 0o775) if args.rank == 0: