def main(): start_epoch = 1 model = Loop(args) model.cuda() if args.checkpoint != '': checkpoint_args_path = os.path.dirname(args.checkpoint) + '/args.pth' checkpoint_args = torch.load(checkpoint_args_path) start_epoch = checkpoint_args[3] model.load_state_dict(torch.load(args.checkpoint)) criterion = MaskedMSE().cuda() optimizer = optim.Adam(model.parameters(), lr=args.lr) # Keep track of losses train_losses = [] eval_losses = [] best_eval = float('inf') # Begin! for epoch in range(start_epoch, start_epoch + args.epochs): train(model, criterion, optimizer, epoch, train_losses) eval_loss = evaluate(model, criterion, epoch, eval_losses) if eval_loss < best_eval: torch.save(model.state_dict(), '%s/bestmodel.pth' % (args.expName)) best_eval = eval_loss torch.save(model.state_dict(), '%s/lastmodel.pth' % (args.expName)) torch.save([args, train_losses, eval_losses, epoch], '%s/args.pth' % (args.expName))
def main(): args = init() checkpoint = args.checkpoint checkpoint_args_path = os.path.dirname(checkpoint) + '/args.pth' checkpoint_args = torch.load(checkpoint_args_path) opt = torch.load(os.path.dirname(checkpoint) + '/args.pth') train_args = opt[0] train_args.noise = 0 train_args.checkpoint = checkpoint args_to_use = args args_to_use = train_args print args_to_use model = Loop(args_to_use) model.cuda() model.load_state_dict( torch.load(args_to_use.checkpoint, map_location=lambda storage, loc: storage)) criterion = MaskedMSE().cuda() loader = get_loader(args.data, args.max_seq_len, args.batch_size, args.nspk) eval_loss = evaluate(model, loader, criterion) print eval_loss
def eval_loss(checkpoint='models/vctk/bestmodel.pth', data='data/vctk', max_seq_len=1000, nspk=22, gpu=0, batch_size=64, seed=1): #args = init() torch.cuda.set_device(gpu) torch.manual_seed(seed) torch.cuda.manual_seed(seed) print checkpoint print os.getcwd() checkpoint_args_path = os.path.dirname(checkpoint) + '/args.pth' checkpoint_args = torch.load(checkpoint_args_path) opt = torch.load(os.path.dirname(checkpoint) + '/args.pth') train_args = opt[0] train_args.noise = 0 train_args.checkpoint = checkpoint #args_to_use = args args_to_use = train_args print args_to_use model = Loop(args_to_use) model.cuda() model.load_state_dict( torch.load(args_to_use.checkpoint, map_location=lambda storage, loc: storage)) criterion = MaskedMSE().cuda() loader = get_loader(data, max_seq_len, batch_size, nspk) eval_loss, my_eval_loss, loss_workings = evaluate(model, loader, criterion) print eval_loss print my_eval_loss return eval_loss, loss_workings
def main(): cuda_available = torch.cuda.is_available() train_params, dataset_params = get_e_arguments() net = TimeStretch() epoch_trained = 0 if train_params['restore_model']: net = load_model(net, train_params['restore_dir'], train_params['restore_model']) if net is None: print("Initialize network and train from scratch.") net = TimeStretch() else: epoch_trained = 0 train_loader, X_val_var, y_val_var, L_test = audio_data_loader( **dataset_params) if cuda_available is False: warnings.warn( "Cuda is not avalable, can not train model using multi-gpu.") if cuda_available: # Remove train_params["device_ids"] for single GPU if train_params["device_ids"]: batch_size = dataset_params["batch_size"] num_gpu = len(train_params["device_ids"]) assert batch_size % num_gpu == 0 net = nn.DataParallel(net, device_ids=train_params['device_ids']) torch.backends.cudnn.benchmark = True net = net.cuda() criterion = MaskedMSE() #nn.MSELoss()# optimizer = get_optimizer(net, train_params['optimizer'], train_params['learning_rate'], train_params['momentum']) if cuda_available: criterion = criterion.cuda() if not os.path.exists(train_params['log_dir']): os.makedirs(train_params['log_dir']) if not os.path.exists(train_params['restore_dir']): os.makedirs(train_params['restore_dir']) train_loss_log_file = open( train_params['log_dir'] + 'train_loss_log_e.log', 'a') test_loss_log_file = open(train_params['log_dir'] + 'test_loss_log_e.log', 'a') # Add print for start of training time time = str(datetime.now()) line = 'Training Started at' + str(time) + ' !!! \n' train_loss_log_file.writelines(line) train_loss_log_file.flush() # Keep track of losses train_losses = [] eval_losses = [] best_eval = float('inf') # Begin! for epoch in range(train_params['num_epochs']): train(net, criterion, optimizer, train_losses, train_params, train_loss_log_file, train_loader, cuda_available) eval_loss = evaluate(net, criterion, epoch, eval_losses, X_val_var, y_val_var, L_test, test_loss_log_file, cuda_available) #if eval_loss < best_eval: # save_model(net,1,train_params['restore_dir']) # torch.save(net.state_dict(), train_params['restore_dir'] +'bestmodel.pth') # best_eval = eval_loss if epoch % train_params['check_point_every'] == 0: save_model(net, epoch_trained + epoch + 1, train_params['restore_dir']) torch.save([train_losses, eval_losses, epoch], train_params['restore_dir'] + 'data_params') # Add print for end of training time time = str(datetime.now()) line = 'Training Ended at' + str(time) + ' !!! \n' train_loss_log_file.writelines(line) train_loss_log_file.flush() train_loss_log_file.close() test_loss_log_file.close()
def main(): # load datasets train_dataset_path = os.path.join(args.data, 'numpy_features') train = NpzFolder(train_dataset_path) train.remove_too_long_seq(args.max_seq_len) train_loader = Dataset_Iter(train, batch_size=args.batch_size) train_loader.shuffle() valid_dataset_path = os.path.join(args.data, 'numpy_features_valid') valid = NpzFolder(valid_dataset_path) valid_loader = Dataset_Iter(valid, batch_size=args.batch_size) valid_loader.shuffle() # train_loader = Dataset_Iter(valid, batch_size=args.batch_size) # initiate tensorflow model input0 = tf.placeholder(tf.int64, [None, None]) input1 = tf.placeholder(tf.float32, [None]) # contains length of sentence speaker = tf.placeholder(tf.int32, [None, 1]) # speaker identity target0 = tf.placeholder(tf.float32, [None, None, 63]) target1 = tf.placeholder(tf.float32, [None]) # apparently speaker identity # idente = tf.placeholder(tf.float32, [None,256]) # s_t = tf.placeholder(tf.float32, [64,319,20]) # mu_t = tf.placeholder(tf.float32, [64,10]) # context = tf.placeholder(tf.float32, [64,64,256]) start = tf.placeholder(tf.bool, shape=(), name='start_new_batch') train_flag = tf.placeholder(tf.bool, shape=(), name='train_flag') # out_seq = tf.placeholder(tf.float32, [None, None, 63]) # attns_seq = tf.placeholder(tf.float32, [None, None, 63]) model = Loop(args) # Define loss and optimizer output, attns = model.forward(input0, speaker, target0, start, train_flag) loss_op = MaskedMSE(output, target0, target1) optimizer = tf.train.AdamOptimizer(learning_rate=args.lr) train_op, clip_flag = gradient_check_and_clip(loss_op, optimizer, args.clip_grad, args.ignore_grad) merged = tf.summary.merge_all() # Initialize the variables (i.e. assign their default value) init = tf.global_variables_initializer() # Add ops to save and restore all the variables. saver = tf.train.Saver(global_variable_list) load_model = not args.checkpoint == '' save_model = True best_eval = float('inf') sess_idx = 0 train_losses = [] valid_losses = [] with tf.Session() as sess: # Run the initializer train_writer = tf.summary.FileWriter( "%s/%s/train" % (args.outpath, expName), sess.graph) valid_writer = tf.summary.FileWriter( "%s/%s/valid" % (args.outpath, expName), sess.graph) # Restore variables from disk. sess.run(init) if load_model: saver.restore(sess, args.checkpoint) print("Model restored from file: %s" % args.checkpoint) for epoch in range(args.epochs): train_enum = tqdm(train_loader, desc='Train epoch %d' % epoch, total=ceil_on_division(len(train_loader), args.batch_size)) # Train data for batch_ind in train_enum: batch_loss_list = [] (srcBatch, srcLengths), (tgtBatch, tgtLengths), full_spkr = \ make_a_batch(train_loader.dataset, batch_ind) batch_iter = TBPTTIter((srcBatch, srcLengths), (tgtBatch, tgtLengths), full_spkr, args.seq_len) for (srcBatch, srcLenths), (tgtBatch, tgtLengths), spkr, start2 in batch_iter: loss, _, clip_flag1, summary = sess.run( [loss_op, train_op, clip_flag, merged], feed_dict={ input0: srcBatch, speaker: spkr, target0: tgtBatch, target1: tgtLengths, start: start2, train_flag: True }) train_writer.add_summary(summary, sess_idx) sess_idx += 1 if not clip_flag1: batch_loss_list.append(loss) else: print( '-' ) # if too many - appear, there are exploding gradients train_losses.append(batch_loss_list) if len(batch_loss_list) != 0: batch_loss = sum(batch_loss_list) / len(batch_loss_list) batch_loss_list.append(batch_loss) else: batch_loss = -1. train_enum.set_description('Train (loss %.2f) epoch %d' % (batch_loss, epoch)) train_enum.update(srcBatch.shape[0]) # Validate data valid_enum = tqdm(valid_loader, desc='Validating epoch %d' % epoch, total=ceil_on_division(len(valid_loader), args.batch_size)) batch_loss_list = [] for batch_ind in valid_enum: (srcBatch, srcLengths), (tgtBatch, tgtLengths), full_spkr = \ make_a_batch(valid_loader.dataset, batch_ind) loss, summary = sess.run( [loss_op, merged], feed_dict={ input0: srcBatch, speaker: full_spkr, target0: tgtBatch, target1: tgtLengths, start: True, train_flag: False }) batch_loss_list.append(loss) train_enum.set_description('Train (loss %.2f) epoch %d' % (loss, epoch)) valid_writer.add_summary(summary, sess_idx) sess_idx += 1 valid_enum.set_description('Validating (loss %.2f) epoch %d' % (loss, epoch)) if len(batch_loss_list) != 0: valid_losses.append(batch_loss_list) valid_loss = sum(batch_loss_list) / len(batch_loss_list) else: valid_loss = 99999. if valid_loss < best_eval and save_model: best_eval = valid_loss save_path = saver.save(sess, "%s/bestmodel.ckpt" % args.expName) print("NEW BEST MODEL!, model saved in file: %s" % save_path) print('Final validation loss for epoch %d is: %.2f' % (epoch, valid_loss)) train_loader.shuffle() valid_loader.shuffle() if save_model: save_path = saver.save(sess, "%s/model.ckpt" % args.expName) print("Model saved in file: %s" % save_path) train_writer.close() valid_writer.close()
def main(): start_epoch = 1 model = Loop(args) model.cuda() if args.checkpoint != '': checkpoint_args_path = os.path.dirname(args.checkpoint) + '/args.pth' checkpoint_args = torch.load(checkpoint_args_path) start_epoch = checkpoint_args[3] model.load_state_dict( torch.load(args.checkpoint, map_location=lambda storage, loc: storage)) criterion = MaskedMSE().cuda() optimizer = optim.Adam(model.parameters(), lr=args.lr) # Keep track of losses train_losses = [] eval_losses = [] best_eval = float('inf') training_monitor = TrainingMonitor(file=args.expNameRaw, exp_name=args.expNameRaw, b_append=True, path='training_logs') # Begin! for epoch in range(start_epoch, start_epoch + args.epochs): # train model train(model, criterion, optimizer, epoch, train_losses) # evaluate on validation set eval_loss = evaluate(model, criterion, epoch, eval_losses) #chk, _, _, _ = ec.evaluate(model=model, # criterion=criterion, # epoch=epoch, # loader=valid_loader, # metrics=('loss') # ) # save checkpoint for this epoch # I'm saving every epoch so I can compute evaluation metrics across the training curve later on torch.save(model.state_dict(), '%s/epoch_%d.pth' % (args.expName, epoch)) torch.save([args, train_losses, eval_losses, epoch], '%s/args.pth' % (args.expName)) if eval_loss < best_eval: # if this is the best model yet, save it as 'bestmodel' torch.save(model.state_dict(), '%s/bestmodel.pth' % (args.expName)) best_eval = eval_loss # also keep a running copy of 'lastmodel' torch.save(model.state_dict(), '%s/lastmodel.pth' % (args.expName)) torch.save([args, train_losses, eval_losses, epoch], '%s/args.pth' % (args.expName)) # evaluate on a randomised subset of the training set if epoch % args.eval_epochs == 0: train_eval_loader = ec.get_training_data_for_eval( data=args.data, len_valid=len(valid_loader.dataset)) train_loss, _, _, _ = ec.evaluate(model=model, criterion=criterion, epoch=epoch, loader=train_eval_loader, metrics=('loss')) else: train_loss = None # store loss metrics training_monitor.insert(epoch=epoch, valid_loss=eval_loss, train_loss=train_loss) training_monitor.write()