def main(): opt = TrainOptions().parse() train_history = TrainHistory() checkpoint = Checkpoint(opt) visualizer = Visualizer(opt) os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu_ids cudnn.benchmark = True """build graph""" net = model.CreateNet(opt) """optimizer""" optimizer = model.CreateAdamOptimizer(opt, net) #net = torch.nn.DataParallel(net).cuda() net.cuda() """optionally resume from a checkpoint""" checkpoint.load_checkpoint(net, optimizer, train_history) """load data""" train_list = os.path.join(opt.data_dir, opt.train_list) train_loader = torch.utils.data.DataLoader(ImageLoader( train_list, transforms.ToTensor(), is_train=True), batch_size=opt.bs, shuffle=True, num_workers=opt.nThreads, pin_memory=True) val_list = os.path.join(opt.data_dir, opt.val_list) val_loader = torch.utils.data.DataLoader(ImageLoader(val_list, transforms.ToTensor(), is_train=False), batch_size=opt.bs, shuffle=False, num_workers=opt.nThreads, pin_memory=True) """training and validation""" for epoch in range(opt.resume_epoch, opt.nEpochs): model.AdjustLR(opt, optimizer, epoch) # train for one epoch train_loss_det, train_loss_reg, tran_loss = \ train(train_loader, net, optimizer, epoch, visualizer) # evaluate on validation set val_loss_det, val_loss_reg, val_loss, det_rmse, reg_rmse = \ validate(val_loader, net, epoch, visualizer, is_show=False) # update training history e = OrderedDict([('epoch', epoch)]) lr = OrderedDict([('lr', opt.lr)]) loss = OrderedDict([('train_loss_det', train_loss_det), ('train_loss_reg', train_loss_reg), ('val_loss_det', val_loss_det), ('val_loss_reg', val_loss_reg)]) rmse = OrderedDict([('det_rmse', det_rmse), ('val_rmse', reg_rmse)]) train_history.update(e, lr, loss, rmse) checkpoint.save_checkpoint(net, optimizer, train_history) visualizer.plot_train_history(train_history) # plot best validation if train_history.is_best: visualizer.imgpts_win_id = 4 validate(val_loader, net, epoch, visualizer, is_show=True)
def main(): opt = TrainOptions().parse() train_history = TrainHistory() checkpoint = Checkpoint() visualizer = Visualizer(opt) exp_dir = os.path.join(opt.exp_dir, opt.exp_id) log_name = opt.vis_env + 'log.txt' visualizer.log_name = os.path.join(exp_dir, log_name) os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu_id # if opt.dataset == 'mpii': num_classes = 16 # layer_num = 2 net = create_cu_net(neck_size=4, growth_rate=32, init_chan_num=128, num_classes=num_classes, layer_num=opt.layer_num, max_link=1, inter_loss_num=opt.layer_num) # num1 = get_n_params(net) # num2 = get_n_trainable_params(net) # num3 = get_n_conv_params(net) # print 'number of params: ', num1 # print 'number of trainalbe params: ', num2 # print 'number of conv params: ', num3 # torch.save(net.state_dict(), 'test-model-size.pth.tar') # exit() # device = torch.device("cuda:0") # net = net.to(device) net = torch.nn.DataParallel(net).cuda() global quan_op quan_op = QuanOp(net) optimizer = torch.optim.RMSprop(net.parameters(), lr=opt.lr, alpha=0.99, eps=1e-8, momentum=0, weight_decay=0) """optionally resume from a checkpoint""" if opt.resume_prefix != '': # if 'pth' in opt.resume_prefix: # trunc_index = opt.resume_prefix.index('pth') # opt.resume_prefix = opt.resume_prefix[0:trunc_index - 1] # checkpoint.save_prefix = os.path.join(exp_dir, opt.resume_prefix) checkpoint.save_prefix = exp_dir + '/' checkpoint.load_prefix = os.path.join(exp_dir, opt.resume_prefix)[0:-1] checkpoint.load_checkpoint(net, optimizer, train_history) opt.lr = optimizer.param_groups[0]['lr'] resume_log = True else: checkpoint.save_prefix = exp_dir + '/' resume_log = False print 'save prefix: ', checkpoint.save_prefix # model = {'state_dict': net.state_dict()} # save_path = checkpoint.save_prefix + 'test-model-size.pth.tar' # torch.save(model, save_path) # exit() """load data""" train_loader = torch.utils.data.DataLoader(MPII( 'dataset/mpii-hr-lsp-normalizer.json', '/bigdata1/zt53/data', is_train=True), batch_size=opt.bs, shuffle=True, num_workers=opt.nThreads, pin_memory=True) val_loader = torch.utils.data.DataLoader(MPII( 'dataset/mpii-hr-lsp-normalizer.json', '/bigdata1/zt53/data', is_train=False), batch_size=opt.bs, shuffle=False, num_workers=opt.nThreads, pin_memory=True) """optimizer""" # optimizer = torch.optim.SGD( net.parameters(), lr=opt.lr, # momentum=opt.momentum, # weight_decay=opt.weight_decay ) # optimizer = torch.optim.RMSprop(net.parameters(), lr=opt.lr, alpha=0.99, # eps=1e-8, momentum=0, weight_decay=0) print type(optimizer) # idx = range(0, 16) # idx = [e for e in idx if e not in (6, 7, 8, 9, 12, 13)] idx = [0, 1, 2, 3, 4, 5, 10, 11, 14, 15] logger = Logger(os.path.join(opt.exp_dir, opt.exp_id, 'training-summary.txt'), title='training-summary', resume=resume_log) logger.set_names( ['Epoch', 'LR', 'Train Loss', 'Val Loss', 'Train Acc', 'Val Acc']) if not opt.is_train: visualizer.log_path = os.path.join(opt.exp_dir, opt.exp_id, 'val_log.txt') val_loss, val_pckh, predictions = validate( val_loader, net, train_history.epoch[-1]['epoch'], visualizer, idx, joint_flip_index, num_classes) checkpoint.save_preds(predictions) return """training and validation""" start_epoch = 0 if opt.resume_prefix != '': start_epoch = train_history.epoch[-1]['epoch'] + 1 for epoch in range(start_epoch, opt.nEpochs): adjust_lr(opt, optimizer, epoch) # # train for one epoch train_loss, train_pckh = train(train_loader, net, optimizer, epoch, visualizer, idx, opt) # evaluate on validation set val_loss, val_pckh, predictions = validate(val_loader, net, epoch, visualizer, idx, joint_flip_index, num_classes) # visualizer.display_imgpts(imgs, pred_pts, 4) # exit() # update training history e = OrderedDict([('epoch', epoch)]) lr = OrderedDict([('lr', optimizer.param_groups[0]['lr'])]) loss = OrderedDict([('train_loss', train_loss), ('val_loss', val_loss)]) pckh = OrderedDict([('val_pckh', val_pckh)]) train_history.update(e, lr, loss, pckh) checkpoint.save_checkpoint(net, optimizer, train_history, predictions) # visualizer.plot_train_history(train_history) logger.append([ epoch, optimizer.param_groups[0]['lr'], train_loss, val_loss, train_pckh, val_pckh ]) logger.close()
def main(): opt = TrainOptions().parse() train_history = TrainHistory() checkpoint = Checkpoint(opt) os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu_id """Architecture""" net = MakeLinearModel(1024, 16) net = torch.nn.DataParallel(net).cuda() checkpoint.load_checkpoint(net, train_history, '/best-single.pth.tar') """Uploading Mean and SD""" path_to_data = '.../multi-view-pose-estimation/dataset/' #mean and sd of 2d poses in training dataset Mean_2D = np.loadtxt(path_to_data + 'Mean_2D.txt') Mean_2D = Mean_2D.astype('float32') Mean_2D = torch.from_numpy(Mean_2D) Mean_Delta = np.loadtxt(path_to_data + 'Mean_Delta.txt') Mean_Delta = Mean_Delta.astype('float32') Mean_Delta = torch.from_numpy(Mean_Delta) Mean_Delta = torch.autograd.Variable(Mean_Delta.cuda(async=True), requires_grad=False) Mean_3D = np.loadtxt(path_to_data + 'Mean_3D.txt') Mean_3D = Mean_3D.astype('float32') Mean_3D = torch.from_numpy(Mean_3D) Mean_3D = torch.autograd.Variable(Mean_3D.cuda(async=True), requires_grad=False) SD_2D = np.loadtxt(path_to_data + 'SD_2D.txt') SD_2D = SD_2D.astype('float32') SD_2D = torch.from_numpy(SD_2D) SD_Delta = np.loadtxt(path_to_data + 'SD_Delta.txt') SD_Delta = SD_Delta.astype('float32') SD_Delta = torch.from_numpy(SD_Delta) SD_Delta = torch.autograd.Variable(SD_Delta.cuda(async=True), requires_grad=False) SD_3D = np.loadtxt(path_to_data + 'SD_3D.txt') SD_3D = SD_3D.astype('float32') SD_3D = torch.from_numpy(SD_3D) SD_3D = torch.autograd.Variable(SD_3D.cuda(async=True), requires_grad=False) """Loading Data""" train_list = 'train_list_4view.txt' train_loader = torch.utils.data.DataLoader(data4view.PtsList( train_list, is_train=True), batch_size=opt.bs, shuffle=True, num_workers=opt.nThreads, pin_memory=True) val_list = 'valid_list_4view.txt' val_loader = torch.utils.data.DataLoader(data4view.PtsList(val_list, is_train=False), batch_size=opt.bs, shuffle=False, num_workers=opt.nThreads, pin_memory=True) demo_list = 'demo_list_4view.txt' demo_loader = torch.utils.data.DataLoader(data4view.PtsList( demo_list, is_train=False), batch_size=1, shuffle=False, num_workers=opt.nThreads, pin_memory=True) """Optimizer""" optimizer = torch.optim.Adam(net.parameters(), lr=opt.lr, betas=(0.9, 0.999), weight_decay=0) """Validation""" if evaluate_mode: # evaluate on validation set checkpoint.load_checkpoint(net, train_history, '/best-multi.pth.tar') val_loss, val_pckh = validate(val_loader, net, Mean_2D, Mean_Delta, Mean_3D, SD_2D, SD_Delta, SD_3D, 0, opt) return """Demo""" if demo_mode: # Grab a random batch to visualize checkpoint.load_checkpoint(net, train_history, '/best-multi.pth.tar') demo(demo_loader, net, Mean_2D, Mean_Delta, Mean_3D, SD_2D, SD_Delta, SD_3D, 0, opt) return """Training""" for epoch in range(0, opt.nEpochs): adjust_learning_rate(optimizer, epoch, opt.lr) # train for one epoch train_loss = train(train_loader, net, Mean_2D, Mean_Delta, Mean_3D, SD_2D, SD_Delta, SD_3D, optimizer, epoch, opt) # evaluate on validation set val_loss, val_pckh = validate(val_loader, net, Mean_2D, Mean_Delta, Mean_3D, SD_2D, SD_Delta, SD_3D, epoch, opt) # update training history e = OrderedDict([('epoch', epoch)]) lr = OrderedDict([('lr', opt.lr)]) loss = OrderedDict([('train_loss', train_loss), ('val_loss', val_loss)]) pckh = OrderedDict([('val_pckh', val_pckh)]) train_history.update(e, lr, loss, pckh) checkpoint.save_checkpoint(net, train_history, 'best-multi.pth.tar')