def main(args): os.environ['CUDA_VISIBLE_DEVICES'] = '3' # create checkpoint dir if not isdir(args.checkpoint): mkdir_p(args.checkpoint) # create model model = network.__dict__[cfg.model](cfg.output_shape, cfg.num_class, pretrained = False) model = torch.nn.DataParallel(model).cuda() # define loss function (criterion) and optimizer criterion1 = torch.nn.MSELoss().cuda() # for Global loss criterion2 = torch.nn.MSELoss(reduce=False).cuda() # for refine loss optimizer = torch.optim.Adam(model.parameters(), lr = cfg.lr, weight_decay=cfg.weight_decay) if args.resume: if isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) pretrained_dict = checkpoint['state_dict'] model.load_state_dict(pretrained_dict) args.start_epoch = checkpoint['epoch'] optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) logger = Logger(join(args.checkpoint, 'log.txt'), resume=True) else: print("=> no checkpoint found at '{}'".format(args.resume)) else: logger = Logger(join(args.checkpoint, 'log.txt')) logger.set_names(['Epoch', 'LR', 'Train Loss']) cudnn.benchmark = True print(' Total params: %.2fMB' % (sum(p.numel() for p in model.parameters())/(1024*1024)*4)) train_loader = torch.utils.data.DataLoader( MscocoMulti(cfg), batch_size=cfg.batch_size*args.num_gpus, shuffle=True, num_workers=args.workers, pin_memory=True) for epoch in range(args.start_epoch, args.epochs): lr = adjust_learning_rate(optimizer, epoch, cfg.lr_dec_epoch, cfg.lr_gamma) print('\nEpoch: %d | LR: %.8f' % (epoch + 1, lr)) # train for one epoch train_loss = train(train_loader, model, [criterion1, criterion2], optimizer) print('train_loss: ',train_loss) # append logger file logger.append([epoch + 1, lr, train_loss]) save_model({ 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'optimizer' : optimizer.state_dict(), }, checkpoint=args.checkpoint) logger.close()
def main(args): # model = load_flattened_model_val(args.checkpoint, args.test) model = load_model_val(args.checkpoint, args.test) test_loader = torch.utils.data.DataLoader(MscocoMulti(cfg, train=False), batch_size=args.batch * args.num_gpus, shuffle=False, num_workers=args.workers, pin_memory=True) print('testing...') full_result = [] for i, (inputs, meta) in tqdm(enumerate(test_loader)): # full_result += Predict.predict_val(model, inputs, meta) full_result += PredictWithRotation.predict_val(model, inputs, meta, 10) if i == 100: break result_path = args.result if not isdir(result_path): mkdir_p(result_path) result_file = os.path.join(result_path, 'result.json') with open(result_file, 'w') as wf: json.dump(full_result, wf) # evaluate on COCO eval_gt = COCO(cfg.ori_gt_path) eval_dt = eval_gt.loadRes(result_file) cocoEval = COCOeval(eval_gt, eval_dt, iouType='keypoints') cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize()
def _save_preds(dataset, data_output_dir): """ Save the PyTorch Dataset of predictions to a file :param dataset: The PyTorch Dataset object of predictions :param data_output_dir: The filename for the file to save """ # Make directory if it doesn't exists if not isdir(data_output_dir): mkdir_p(data_output_dir) # Just save the predictions in the correct place via PyTorch torch.save(dataset, data_output_dir + "/3dposes")
def visualize_2d_overlay_3d_gt_3d_pred(options): """ Same as visualize_2d_and_3d, but adds a ground truth visualization also. Images in the output from left to right are: 1. original image with 2D pose overlayed 2. 3D ground truth 3. 3D prediction visualization Options that should be included: options.img_dir: the directory for the image options.twod_pose_estimations: a PyTorch file containing 2D pose estimations. Assumes the format of a dict, keyed by filenames options.threed_pose_ground_truths: a PyTorch file containing 3D pose ground truths options.threed_pose_estimations: a PyTorch file containing the 3D pose estimations. Assumes the format of a dict, keyed by filenames options.output_dir: a directory to output each visualization to :param options: Options for the visualizations, defined in options.py. (Including defaults). """ # Load the predictions and unpack options img_dir = options.img_dir twod_pose_preds = torch.load(options.twod_pose_estimations) threed_pose_ground_truths = torch.load(options.threed_pose_ground_truths) threed_pose_preds = torch.load(options.threed_pose_estimations) output_dir = options.output_dir # Make dir for output if it doesnt exist if not isdir(output_dir): mkdir_p(output_dir) i = 0 total = len(twod_pose_preds) # Produce a visualization for each input image, outputting to 'output_dir' with the same image name as input for filename in os.listdir(img_dir): if filename.endswith(".jpg") or filename.endswith(".png"): abs_filename = os.path.join(img_dir, filename) img = scipy.misc.imread(abs_filename) if not abs_filename in twod_pose_preds: continue twod_overlay = viz_2d_overlay(img, twod_pose_preds[filename]) threed_gt_viz = viz_3d_pose(threed_pose_ground_truths[filename].numpy()) threed_pose_viz = viz_3d_pose(threed_pose_preds[filename].numpy()) final_img = _pack_images([twod_overlay, threed_gt_viz, threed_pose_viz]) scipy.misc.imsave(os.path.join(output_dir, filename), final_img) # progress if i % 100 == 0: print("Visualized " + str(i) + " out of " + str(total)) i += 1
def _save_preds(pred_2d, pred_3d, gt_2d, gt_3d, metas, data_output_dir): """ TODO """ # Make directory if it doesn't exists if not isdir(data_output_dir): mkdir_p(data_output_dir) # Just save the predictions in the correct place via PyTorch torch.save(pred_2d, data_output_dir + "/2dpreds") torch.save(pred_3d, data_output_dir + "/3dpreds") torch.save(gt_2d, data_output_dir + "/2dgt") torch.save(gt_3d, data_output_dir + "/3dgt") torch.save(metas, data_output_dir + "/metas")
def graph_PCKh_scores(options): """ Script that takes a list of predictions and plots the PCKh curves and saves the figure to a file Required options: options.prediction_files - a space seperated list of prediction files (output as part of the model checkpointing) options.model_names - a space seperated list of model names, used in the figure options.output_dir - specifies a directory to save the graph as an image as :param options: Options for the evaluation, defined in options.py. (Including defaults). """ pred_files = options.prediction_files model_names = options.model_names if len(pred_files) != len(model_names): raise Exception("Options must be the same length") if not isdir(options.output_dir): mkdir_p(options.output_dir) curves = {} for i in range(len(model_names)): curves[model_names[i]] = compute_PCKh_curve(pred_files[i], model_names[i]) for key in curves[model_names[0]]: fig = plt.figure(figsize=(10.0, 10.0)) for model in model_names: plt.plot(np.arange(0.0, 0.5, 0.01), curves[model][key], label=model) plt.legend() plt.xlabel("Threshold") plt.ylabel("% joints correct") plt.title("PCKh curves") # convert fig to a numpy array # see: https://stackoverflow.com/questions/7821518/matplotlib-save-plot-to-numpy-array fig.canvas.draw() data = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep='') data = data.reshape(fig.canvas.get_width_height()[::-1] + (3, )) # avoid unecessary memory consumption plt.close(fig) # save filename = os.path.join(options.output_dir, "graph_{joint}_PCKh.jpg".format(joint=key)) scipy.misc.imsave(filename, data)
def _save_preds(twod_predictions, threed_predictions, data_output_dir): """ Save the PyTorch set of predictions to a file :param twod_predictions: The map object of 2D predictions that we wish to save :param threed_predictions: The map object of 3D predictions that we wish to save :param data_output_dir: The filename for the file to save """ # Make directory if it doesn't exists if not isdir(data_output_dir): mkdir_p(data_output_dir) # Just save the predictions in the correct place via PyTorch torch.save(twod_predictions, data_output_dir + "/2dpreds") torch.save(threed_predictions, data_output_dir + "/3dpreds")
def visualize_2d_overlay(options): """ Unpacks options and makes visualizations for 2d and 3d predictions. Images in the output from left to right are: 1. Original image with 2D pose overlayed 2. 3D prediction visualization Options that should be included: options.img_dir: the directory for the image options.twod_pose_estimations: a PyTorch file containing 2D pose estimations. Assumed to be a dict keyed by filenames options.output_dir: a directory to output each visualization to :param options: Options for the visualizations, defined in options.py. (Including defaults). """ # Load the predictions and unpack options img_dir = options.img_dir twod_pose_preds = torch.load(options.twod_pose_estimations) output_dir = options.output_dir # Make dir for output if it doesnt exist if not isdir(output_dir): mkdir_p(output_dir) i = 0 total = len(os.listdir(img_dir)) # Produce a visualization for each input image, outputting to 'output_dir' with the same image name as input for filename in os.listdir(img_dir): if filename.endswith(".jpg") or filename.endswith(".png"): abs_filename = os.path.join(img_dir, filename) if not filename in twod_pose_preds: continue img = scipy.misc.imread(abs_filename) twod_overlay = viz_2d_overlay(img, twod_pose_preds[filename]) scipy.misc.imsave(os.path.join(output_dir, filename), twod_overlay) # progress if i % 100 == 0: print("Visualized " + str(i) + " out of " + str(total)) i += 1
def visualize_2d_pred_3d_gt_3d_pred(options): """ Visualize the 2D and 3D pose estimations on matplotlib axes. This is just an interface for twod_threed's visualizations Options that should be included: options.twod_pose_ground_truths: a PyTorch file containing 2D pose ground truths. options.threed_pose_ground_truths: a PyTorch file containing 3D pose ground truths. options.threed_pose_estimations: a PyTorch file containing 3D pose estimations. options.output_dir: A directory to output each visualization to :param options: Options for the visualizations, defined in options.py. (Including defaults). """ # Unpack options twod_pose_ground_truths = torch.load(options.twod_pose_ground_truths) threed_pose_preds = torch.load(options.threed_pose_estimations) output_dir = options.output_dir # Make dir for output if it doesnt exist if not isdir(output_dir): mkdir_p(output_dir) i = 0 total = len(twod_pose_ground_truths) # Loop through each pose (each item in the dict is an array (in time) of 2d poses for k2d in twod_pose_ground_truths: k3d = get_3d_key_from_2d_key(k2d) for t in range(min(len(twod_pose_ground_truths[k2d]), 100)): twod_gt_viz = viz_2d_pose(twod_pose_ground_truths[k2d][t]) threed_gt_viz = viz_3d_pose(threed_pose_ground_truths[k3d][t]) threed_pred_viz = viz_3d_pose(threed_pose_preds[k2d][t].numpy()) final_img = _pack_images([twod_gt_viz, threed_gt_viz, threed_pred_viz]) scipy.misc.imsave(os.path.join(output_dir, str(k2d)+"_"+str(t)+".jpg"), final_img) # progress if i % 100 == 0: print("Visualized " + str(i) + " out of " + str(total)) i += 1
def main(args): # create model model = network.__dict__[cfg.model](cfg.output_shape, cfg.num_class, pretrained=False) model = torch.nn.DataParallel(model).cuda() test_loader = torch.utils.data.DataLoader( MscocoMulti(cfg, train=False), batch_size=args.batch * args.num_gpus, shuffle=False, num_workers=args.workers, pin_memory=True) # load trainning weights # checkpoint_file = os.path.join(args.checkpoint, args.test + '.pth.tar') checkpoint_file = os.path.join('model', 'checkpoint', 'epoch9checkpoint.pth.tar') checkpoint = torch.load(checkpoint_file) model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})".format(checkpoint_file, checkpoint['epoch'])) # change to evaluation mode model.eval() print('testing...') full_result = [] for i, (inputs, meta) in tqdm(enumerate(test_loader)): with torch.no_grad(): input_var = torch.autograd.Variable(inputs.cuda()) if args.flip == True: flip_inputs = inputs.clone() for i, finp in enumerate(flip_inputs): finp = im_to_numpy(finp) finp = cv2.flip(finp, 1) flip_inputs[i] = im_to_torch(finp) flip_input_var = torch.autograd.Variable(flip_inputs.cuda()) # compute output global_outputs, refine_output = model(input_var) score_map = refine_output.data.cpu() score_map = score_map.numpy() if args.flip == True: flip_global_outputs, flip_output = model(flip_input_var) flip_score_map = flip_output.data.cpu() flip_score_map = flip_score_map.numpy() for i, fscore in enumerate(flip_score_map): fscore = fscore.transpose((1, 2, 0)) fscore = cv2.flip(fscore, 1) fscore = list(fscore.transpose((2, 0, 1))) for (q, w) in cfg.symmetry: fscore[q], fscore[w] = fscore[w], fscore[q] fscore = np.array(fscore) score_map[i] += fscore score_map[i] /= 2 # ids = meta['imgID'].numpy() det_scores = meta['det_scores'] for b in range(inputs.size(0)): details = meta['augmentation_details'] imgid = meta['imgid'][b] # print(imgid) category = meta['category'][b] # print(category) single_result_dict = {} single_result = [] single_map = score_map[b] r0 = single_map.copy() r0 /= 255 r0 += 0.5 v_score = np.zeros(24) for p in range(24): single_map[p] /= np.amax(single_map[p]) border = 10 dr = np.zeros((cfg.output_shape[0] + 2 * border, cfg.output_shape[1] + 2 * border)) dr[border:-border, border:-border] = single_map[p].copy() dr = cv2.GaussianBlur(dr, (21, 21), 0) lb = dr.argmax() y, x = np.unravel_index(lb, dr.shape) dr[y, x] = 0 lb = dr.argmax() py, px = np.unravel_index(lb, dr.shape) y -= border x -= border py -= border + y px -= border + x ln = (px ** 2 + py ** 2) ** 0.5 delta = 0.25 if ln > 1e-3: x += delta * px / ln y += delta * py / ln x = max(0, min(x, cfg.output_shape[1] - 1)) y = max(0, min(y, cfg.output_shape[0] - 1)) resy = float((4 * y + 2) / cfg.data_shape[0] * (details[b][3] - details[b][1]) + details[b][1]) resx = float((4 * x + 2) / cfg.data_shape[1] * (details[b][2] - details[b][0]) + details[b][0]) v_score[p] = float(r0[p, int(round(y) + 1e-10), int(round(x) + 1e-10)]) single_result.append(resx) single_result.append(resy) single_result.append(1) if len(single_result) != 0: result = [] result.append(imgid) result.append(category) j = 0 while j < len(single_result): result.append(str(int(single_result[j])) + '_' + str(int(single_result[j + 1])) + '_1') j += 3 full_result.append(result) result_path = args.result if not isdir(result_path): mkdir_p(result_path) result_file = os.path.join(result_path, 'result.csv') with open(result_file, 'w', newline='') as f: writer = csv.writer(f) writer.writerows(full_result) Evaluator = FaiKeypoint2018Evaluator(userAnswerFile=os.path.join(result_path, 'result9.csv'), standardAnswerFile="fashionAI_key_points_test_a_answer_20180426.csv") score = Evaluator.evaluate() print(score) Evaluator.writerror(result_path=os.path.join(result_path, "toperror1.csv"))
net.apply(weight_init) # load a pretrained model if required #net.load_state_dict(torch.load('path/to/pretrain.pth')) if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") net = nn.DataParallel(net) net.to(device) # choose an optimizer #optimizer = optim.Adam(net.parameters(), lr=opt.lr, weight_decay=opt.weight_decay) optimizer = optim.SGD( net.parameters(), lr=opt.lr, momentum=opt.momentum, weight_decay=opt.wd, nesterov=True) #optimizer = optim.RMSprop(net.parameters(), lr=opt.lr, momentum=opt.momentum, weight_decay=opt.weight_decay) # choose a learning rate scheduler #scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=opt.gamma) #scheduler = optim.lr_scheduler.LambdaLR(optimizer, lambda epoch: 0.95**epoch) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1) #scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[30,80], gamma=0.1) dataloaders = get_dataloaders(batch_size=opt.batch_size, num_workers=opt.workers) # create checkpoint dir #opt.output = f'{opt.output}' if not isdir(opt.output): mkdir_p(opt.output) # train model train_model(net, dataloaders, optimizer, scheduler, num_epochs=opt.epoch)
def _main_regression(opt): """ Main training loop for the 3D baseline """ start_epoch = 0 err_best = 1000 glob_step = 0 lr_now = opt.lr # save options log.save_options(opt, opt.checkpoint_dir) # Make a summary writer writer = SummaryWriter(log_dir="%s/2d3d_h36m_%s_tb_log" % (opt.tb_dir, opt.exp)) # create model print(">>> creating model") model = LinearModel(dataset_normalized_input=opt.dataset_normalization) model = model.cuda() model.apply(weight_init) print(">>> total params: {:.2f}M".format(sum(p.numel() for p in model.parameters()) / 1000000.0)) criterion = nn.MSELoss(size_average=True).cuda() optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr) # load ckpt if opt.load: print(">>> loading ckpt from '{}'".format(opt.load)) ckpt = torch.load(opt.load) start_epoch = ckpt['epoch'] err_best = ckpt['err'] glob_step = ckpt['step'] lr_now = ckpt['lr'] model.load_state_dict(ckpt['state_dict']) optimizer.load_state_dict(ckpt['optimizer']) print(">>> ckpt loaded (epoch: {} | err: {})".format(start_epoch, err_best)) if opt.resume: logger = log.Logger(os.path.join(opt.checkpoint_dir, 'log.txt'), resume=True) else: logger = log.Logger(os.path.join(opt.checkpoint_dir, 'log.txt')) logger.set_names(['epoch', 'lr', 'loss_train', 'loss_test', 'err_test']) # list of action(s) actions = misc.define_actions(opt.action) num_actions = len(actions) print(">>> actions to use (total: {}):".format(num_actions)) pprint(actions, indent=4) print(">>>") # data loading # data loading print(">>> loading data") # load dadasets for training train_dataset, train_loader, test_loader = _make_torch_data_loaders(opt, actions) stat_3d = train_dataset.get_stat_3d() print(">>> data loaded !") cudnn.benchmark = True for epoch in range(start_epoch, opt.epochs): print('==========================') print('>>> epoch: {} | lr: {:.5f}'.format(epoch + 1, lr_now)) # per epoch glob_step, lr_now, loss_train = _train( train_loader, model, criterion, optimizer, writer, lr_init=opt.lr, lr_now=lr_now, glob_step=glob_step, lr_decay=opt.lr_decay, gamma=opt.lr_gamma, no_grad_clipping=opt.no_grad_clipping, grad_clip=opt.grad_clip, tb_log_freq=opt.tb_log_freq, use_horovod=opt.use_horovod) loss_test, err_test = _test(test_loader, model, criterion, opt.dataset_normalization, procrustes=opt.procrustes) # Update tensorboard summaries writer.add_scalars('data/epoch/loss', {'train_loss': loss_train, 'test_loss': loss_test}, epoch) writer.add_scalar('data/epoch/validation_error', err_test, epoch) # update log file logger.append([epoch + 1, lr_now, loss_train, loss_test, err_test], ['int', 'float', 'float', 'float', 'float']) # save ckpt model_specific_checkpoint_dir = "%s/2d3d_h36m_%s" % (opt.checkpoint_dir, opt.exp) if not isdir(model_specific_checkpoint_dir): mkdir_p(model_specific_checkpoint_dir) is_best = err_test < err_best err_best = min(err_test, err_best) if is_best: log.save_ckpt({'epoch': epoch + 1, 'lr': lr_now, 'step': glob_step, 'err': err_best, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()}, ckpt_path=model_specific_checkpoint_dir, is_best=True) log.save_ckpt({'epoch': epoch + 1, 'lr': lr_now, 'step': glob_step, 'err': err_best, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()}, ckpt_path=model_specific_checkpoint_dir, is_best=False) logger.close() writer.close()
def main(args): # create model model = network.__dict__[cfg.model](cfg.output_shape, cfg.num_class, pretrained=False) model = torch.nn.DataParallel(model).cuda() test_loader = torch.utils.data.DataLoader(MscocoMulti(cfg, train=False), batch_size=args.batch * args.num_gpus, shuffle=False, num_workers=args.workers, pin_memory=True) # load trainning weights checkpoint_file = os.path.join(args.checkpoint, args.test + '.pth.tar') checkpoint = torch.load(checkpoint_file) model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})".format( checkpoint_file, checkpoint['epoch'])) # change to evaluation mode model.eval() print('testing...') full_result = [] for i, (inputs, meta) in tqdm(enumerate(test_loader)): # print(i) # print(inputs.shape) with torch.no_grad(): input_var = torch.autograd.Variable(inputs.cuda()) if args.flip == True: flip_inputs = inputs.clone() # k = 0 for i, finp in enumerate(flip_inputs): finp = im_to_numpy(finp) finp = cv2.flip(finp, 1) flip_inputs[i] = im_to_torch(finp) # print(k) # print(1111111111111111) flip_input_var = torch.autograd.Variable(flip_inputs.cuda()) # compute output global_outputs, refine_output = model(input_var) score_map = refine_output.data.cpu() score_map = score_map.numpy() # print(score_map.shape) # score_map (128,2,64,48) # xx = inputs.numpy() # print(xx[0].transpose((1,2,0)).shape) # plt.figure(1) # plt.subplot(121) # plt.imshow(xx[0].transpose((1,2,0))) # # plt.subplot(122) # plt.imshow(score_map[0][0], cmap='gray', interpolation='nearest') # plt.show() if args.flip == True: flip_global_outputs, flip_output = model(flip_input_var) flip_score_map = flip_output.data.cpu() flip_score_map = flip_score_map.numpy() for i, fscore in enumerate(flip_score_map): fscore = fscore.transpose((1, 2, 0)) fscore = cv2.flip(fscore, 1) # fscore=fscore[:, :,np.newaxis] # print(fscore.shape) # (64,48,2) # print(2222222222222) fscore = list(fscore.transpose((2, 0, 1))) # for (q, w) in cfg.symmetry: # fscore[q], fscore[w] = fscore[w], fscore[q] fscore = np.array(fscore) score_map[i] += fscore score_map[i] /= 2 # print(score_map[i].shape) # print(score_map.shape) (128,2,64.48) ids = meta['imgID'].numpy() imgclass = meta['class'] # print(ids) det_scores = meta['det_scores'] for b in range(inputs.size(0)): # print(inputs.size(0)) details = meta['augmentation_details'] single_result_dict = {} single_result = [] single_map = score_map[b] #(2,64,48) # print(single_map.shape) r0 = single_map.copy() r0 /= 255 r0 += 0.5 v_score = np.zeros(10) if imgclass[b] == 'chair': c = 0 elif imgclass[b] == 'bed': c = 1 elif imgclass[b] == 'sofa': c = 2 single_map[c] /= np.amax(single_map[c]) border = 9 ps = parseHeatmap(single_map[c], thresh=0.20) #shape 2 # print(len(ps[0])) # print(len(ps[1])) # print(1111111111) # plt.imshow(single_map[c], cmap='gray', interpolation='nearest') # plt.show() # print(len(ps[0])) for k in range(len(ps[0])): x = ps[0][k] - border # height y = ps[1][k] - border # width # print(cfg.data_shape[0]) # height # print(cfg.data_shape[1]) # width resy = float((4 * x + 2) / cfg.data_shape[0] * (details[b][3] - details[b][1]) + details[b][1]) resx = float((4 * y + 2) / cfg.data_shape[1] * (details[b][2] - details[b][0]) + details[b][0]) # print(resx,resy) single_result.append(resx) single_result.append(resy) single_result.append(1) if len(single_result) != 0: single_result_dict['image_id'] = int(ids[b]) single_result_dict['class'] = imgclass[b] single_result_dict['keypoints'] = single_result # single_result_dict['score'] = float(det_scores[b])*v_score.mean() full_result.append(single_result_dict) result_path = args.result if not isdir(result_path): mkdir_p(result_path) result_file = os.path.join(result_path, 'result.json') with open(result_file, 'w') as wf: json.dump(full_result, wf)
def main(): args = parse_args() update_config(cfg_hrnet, args) # create checkpoint dir if not isdir(args.checkpoint): mkdir_p(args.checkpoint) # create model #print('networks.'+ cfg_hrnet.MODEL.NAME+'.get_pose_net') model = eval('models.' + cfg_hrnet.MODEL.NAME + '.get_pose_net')( cfg_hrnet, is_train=True) model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() # show net args.channels = 3 args.height = cfg.data_shape[0] args.width = cfg.data_shape[1] #net_vision(model, args) # define loss function (criterion) and optimizer criterion = torch.nn.MSELoss(reduction='mean').cuda() #torch.optim.Adam optimizer = AdaBound(model.parameters(), lr=cfg.lr, weight_decay=cfg.weight_decay) if args.resume: if isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) pretrained_dict = checkpoint['state_dict'] model.load_state_dict(pretrained_dict) args.start_epoch = checkpoint['epoch'] optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) logger = Logger(join(args.checkpoint, 'log.txt'), resume=True) else: print("=> no checkpoint found at '{}'".format(args.resume)) else: logger = Logger(join(args.checkpoint, 'log.txt')) logger.set_names(['Epoch', 'LR', 'Train Loss']) cudnn.benchmark = True torch.backends.cudnn.enabled = True print(' Total params: %.2fMB' % (sum(p.numel() for p in model.parameters()) / (1024 * 1024) * 4)) train_loader = torch.utils.data.DataLoader( #MscocoMulti(cfg), KPloader(cfg), batch_size=cfg.batch_size * len(args.gpus)) #, shuffle=True, #num_workers=args.workers, pin_memory=True) #for i, (img, targets, valid) in enumerate(train_loader): # print(i, img, targets, valid) for epoch in range(args.start_epoch, args.epochs): lr = adjust_learning_rate(optimizer, epoch, cfg.lr_dec_epoch, cfg.lr_gamma) print('\nEpoch: %d | LR: %.8f' % (epoch + 1, lr)) # train for one epoch train_loss = train(train_loader, model, criterion, optimizer) print('train_loss: ', train_loss) # append logger file logger.append([epoch + 1, lr, train_loss]) save_model( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), }, checkpoint=args.checkpoint) logger.close()
def main(): args = parse_args() # create checkpoint dir if not isdir(args.checkpoint): mkdir_p(args.checkpoint) # create model model = network.__dict__[cfg.model](cfg.channel_settings, cfg.output_shape, cfg.num_class, pretrained=True) # show net args.channels = 3 args.height = cfg.data_shape[0] args.width = cfg.data_shape[1] #net_vision(model, args) if 1: if isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) model.load_state_dict(checkpoint['state_dict']) args.start_epoch = checkpoint['epoch'] lr = checkpoint['lr'] print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) logger = Logger(join(args.checkpoint, 'log.txt'), resume=True) else: print("=> no checkpoint found at '{}'".format(args.resume)) else: lr = cfg.lr logger = Logger(join(args.checkpoint, 'log.txt')) logger.set_names(['Epoch', 'LR', 'Train Loss']) # define loss function (criterion) and optimizer criterion1 = torch.nn.MSELoss().cuda() # for Global loss criterion2 = torch.nn.MSELoss(reduce=False).cuda() # for refine loss model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() cudnn.benchmark = True torch.backends.cudnn.enabled = True print(' Total params: %.2fMB' % (sum(p.numel() for p in model.parameters()) / (1024 * 1024) * 4)) train_loader = torch.utils.data.DataLoader( #MscocoMulti(cfg), KPloader(cfg), batch_size=cfg.batch_size * len(args.gpus)) #, shuffle=True, #num_workers=args.workers, pin_memory=True) #torch.optim.Adam optimizer = AdaBound(model.parameters(), lr=lr, weight_decay=cfg.weight_decay) for epoch in range(args.start_epoch, args.epochs): lr = adjust_learning_rate(optimizer, epoch, cfg.lr_dec_epoch, cfg.lr_gamma) print('\nEpoch: %d | LR: %.8f' % (epoch + 1, lr)) # train for one epoch train_loss = train(train_loader, model, [criterion1, criterion2], optimizer) print('train_loss: ', train_loss) # append logger file logger.append([epoch + 1, lr, train_loss]) #save_model({ # 'epoch': epoch + 1, # 'state_dict': model.state_dict(), # 'optimizer' : optimizer.state_dict(), #}, checkpoint=args.checkpoint) state_dict = model.module.state_dict() for key in state_dict.keys(): state_dict[key] = state_dict[key].cpu() torch.save({ 'epoch': epoch + 1, 'state_dict': state_dict, 'lr': lr, }, os.path.join(args.checkpoint, "epoch" + str(epoch + 1) + "checkpoint.ckpt")) print("=> Save model done! the path: ", \ os.path.join(args.checkpoint, "epoch" + str(epoch + 1) + "checkpoint.ckpt")) logger.close()
def test(test_loader, model): model.eval() print('testing...') full_result = [] flip = True for i, (inputs, meta) in tqdm(enumerate(test_loader)): with torch.no_grad(): input_var = torch.autograd.Variable(inputs.cuda()) if flip == True: flip_inputs = inputs.clone() for i, finp in enumerate(flip_inputs): finp = im_to_numpy(finp) finp = cv2.flip(finp, 1) flip_inputs[i] = im_to_torch(finp) flip_input_var = torch.autograd.Variable(flip_inputs.cuda()) # compute output global_outputs, refine_output = model(input_var) score_map = refine_output.data.cpu() score_map = score_map.numpy() if flip == True: flip_global_outputs, flip_output = model(flip_input_var) flip_score_map = flip_output.data.cpu() flip_score_map = flip_score_map.numpy() for i, fscore in enumerate(flip_score_map): fscore = fscore.transpose((1, 2, 0)) fscore = cv2.flip(fscore, 1) fscore = list(fscore.transpose((2, 0, 1))) for (q, w) in test_cfg.symmetry: fscore[q], fscore[w] = fscore[w], fscore[q] fscore = np.array(fscore) score_map[i] += fscore score_map[i] /= 2 ids = meta['imgID'].numpy() det_scores = meta['det_scores'] for b in range(inputs.size(0)): details = meta['augmentation_details'] single_result_dict = {} single_result = [] single_map = score_map[b] r0 = single_map.copy() r0 /= 255 r0 += 0.5 v_score = np.zeros(17) for p in range(17): single_map[p] /= np.amax(single_map[p]) border = 10 dr = np.zeros((test_cfg.output_shape[0] + 2 * border, test_cfg.output_shape[1] + 2 * border)) dr[border:-border, border:-border] = single_map[p].copy() dr = cv2.GaussianBlur(dr, (21, 21), 0) lb = dr.argmax() y, x = np.unravel_index(lb, dr.shape) dr[y, x] = 0 lb = dr.argmax() py, px = np.unravel_index(lb, dr.shape) y -= border x -= border py -= border + y px -= border + x ln = (px**2 + py**2)**0.5 delta = 0.25 if ln > 1e-3: x += delta * px / ln y += delta * py / ln x = max(0, min(x, test_cfg.output_shape[1] - 1)) y = max(0, min(y, test_cfg.output_shape[0] - 1)) resy = float((4 * y + 2) / test_cfg.data_shape[0] * (details[b][3] - details[b][1]) + details[b][1]) resx = float((4 * x + 2) / test_cfg.data_shape[1] * (details[b][2] - details[b][0]) + details[b][0]) v_score[p] = float(r0[p, int(round(y) + 1e-10), int(round(x) + 1e-10)]) single_result.append(resx) single_result.append(resy) single_result.append(1) if len(single_result) != 0: single_result_dict['image_id'] = int(ids[b]) single_result_dict['category_id'] = 1 single_result_dict['keypoints'] = single_result single_result_dict['score'] = float( det_scores[b]) * v_score.mean() full_result.append(single_result_dict) result_path = 'result' if not isdir(result_path): mkdir_p(result_path) result_file = os.path.join(result_path, 'result.json') with open(result_file, 'w') as wf: json.dump(full_result, wf) # evaluate on COCO eval_gt = COCO(test_cfg.ori_gt_path) eval_dt = eval_gt.loadRes(result_file) cocoEval = COCOeval(eval_gt, eval_dt, iouType='keypoints') cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize()
def visualize_saliency_and_prob_maps(options, skeleton_overlay=False): """ This is a visualization of 2D joint predictions. For each joint we will produce a row of images: Original Image, Joint Prediction, Saliency Map, Overlayed Saliency Map Then for each image, we will produce one row for each joint, at put them in a big collumn Options that should be included: options.img_dir: directory for the image(s) options.load: specifies the location of the saved (hourglass) model options.output_dir: specifies the location to save the (torch dataset of) pose predictions <any other model specific options you specified for training, e.g. --use_layer_norm, or --stacks 4> :param options: The options used to specify where to load images from and where to save the output etc :param skeleton_overlay: If we should overlay the image with a skeleton :return: Nothing """ upasample_4x4 = torch.nn.Upsample(scale_factor=4) # if output directory doesnt exist, make it if not isdir(options.output_dir): mkdir_p(options.output_dir) # Load model (use helper from StackedHourglass' run.py) model, dataset = _load_model_and_dataset(options.load, options.img_dir, options) # Iterate through every image for i in range(len(dataset)): # Progress if i % 1 == 0: print("At " + str(i) + " out of " + str(len(dataset)) + ".") # Get the image, the ground truths and data about the image map inputs, targets, meta = dataset[i] filename = dataset.anno[dataset.train[i]]['img_paths'] # Wrap input in a variable and set that it requires a gradient (so we can actually get gradient info) inputs_var = torch.autograd.Variable(inputs.unsqueeze(0)) inputs_var.requires_grad_() # Run the model to get the output predictions output = model(inputs_var.cuda()) score_map = output[-1].cpu().data joint_preds = final_preds(score_map, [meta['center']], [meta['scale']], [64, 64]).squeeze() # Compute the original image, as "inputs" is color normalized, so move from [-1,1] to [0,255]. Also transposes to go from [C,W,H] to [W,H,C], when there is a C dimension # If we have joint predictions, then overlay them also original_image = inputs.clone() color_denormalize(original_image, dataset.mean, dataset.std) original_image = original_image.numpy().transpose(1,2,0) * 255.0 # If we have skeleton information, then, add the original image with skeleton overlay abs_filename = os.path.join(options.img_dir, filename) img = scipy.misc.imread(abs_filename) twod_overlay = viz_2d_overlay(img, joint_preds) # Compute the output from the network (which is a list and we only want the last, final set of scores). Output is of shape [1,num_joints,64,64] so squeeze and upsample scores = model(inputs_var.cuda())[-1].cpu().squeeze() scores_upsampled = upasample_4x4(scores.unsqueeze(0)).squeeze() # Saliency map is the gradient of the scores with respect to the scores. We want to do this one joint at a time packed_joint_imgs = [] for joint in range(model.num_classes): # Comput saliency, i.e. gradient of the scores w.r.t input image joint_scores = scores[joint] if options.use_max_for_saliency_map: joint_scores_sum_or_max = torch.max(joint_scores) else: joint_scores_sum_or_max = torch.sum(joint_scores) joint_scores_sum_or_max.backward(retain_graph=True) saliency = torch.sum(inputs_var.grad.abs().squeeze(), dim=0) # Zero out any gradients (for next iteration) inputs_var.grad.zero_() model.zero_grad() # Create the images to stack. (prob dist is just [W,H] in shape, not [C,W,H]) # Transpose probabilities and saliency maps, 'coz matplotlib seems to take y,x rather than x,y coords joint_prob_distr_image = _heatmap_from_prob_scores(scores_upsampled[joint].detach().numpy()) saliency_image = _heatmap_from_prob_scores(saliency.numpy(), colormap=plt.cm.hot) saliency_overlay = _overlay_saliency(original_image, saliency.numpy()) # Stack these images in a row imgs = [] if skeleton_overlay and twod_overlay is not None: imgs.append(twod_overlay) imgs.extend([original_image, joint_prob_distr_image, saliency_image, saliency_overlay]) packed_joint_imgs.append(_pack_images(imgs)) # Stack images in a collumn and save final_visualization = _pack_images_col(packed_joint_imgs) output_filename = os.path.join(options.output_dir, filename) scipy.misc.imsave(output_filename, final_visualization)
def main(args): """ Main training loop for training a stacked hourglass model on MPII dataset. :param args: Command line arguments. """ global best_acc # create checkpoint dir if not isdir(args.checkpoint_dir): mkdir_p(args.checkpoint_dir) # create model print("==> creating model '{}', stacks={}, blocks={}".format( args.arch, args.stacks, args.blocks)) model = HourglassNet(num_stacks=args.stacks, num_blocks=args.blocks, num_classes=args.num_classes, batch_norm_momentum=args.batch_norm_momentum, use_layer_norm=args.use_layer_norm, width=256, height=256) joint_visibility_model = JointVisibilityNet(hourglass_stacks=args.stacks) # scale weights if args.scale_weight_factor != 1.0: model.scale_weights_(args.scale_weight_factor) # setup horovod and model for parallel execution if args.use_horovod: hvd.init() torch.cuda.set_device(hvd.local_rank()) args.lr *= hvd.size() model.cuda() else: model = model.cuda() if args.predict_joint_visibility: joint_visibility_model = joint_visibility_model.cuda() # define loss function (criterion) and optimizer criterion = torch.nn.MSELoss(size_average=True).cuda() joint_visibility_criterion = None if not args.predict_joint_visibility else torch.nn.BCEWithLogitsLoss( ) params = [{'params': model.parameters(), 'lr': args.lr}] if args.predict_joint_visibility: params.append({ 'params': joint_visibility_model.parameters(), 'lr': args.lr }) params = model.parameters() if not args.use_amsprop: optimizer = torch.optim.RMSprop(params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) else: optimizer = torch.optim.Adam(params, lr=args.lr, weight_decay=args.weight_decay, amsgrad=True) if args.use_horovod: optimizer = hvd.DistributedOptimizer( optimizer, named_parameters=model.named_parameters()) # Create a tensorboard writer writer = SummaryWriter(log_dir="%s/hourglass_mpii_%s_tb_log" % (args.tb_dir, args.exp)) # optionally resume from a checkpoint title = 'mpii-' + args.arch if args.load: if isfile(args.load): print("=> loading checkpoint '{}'".format(args.load)) checkpoint = torch.load(args.load) # remove old usage of data parallel (used to be wrapped around model) # TODO: remove this when no old models used this state_dict = {} for key in checkpoint['state_dict']: new_key = key[len("module."):] if key.startswith( "module.") else key state_dict[new_key] = checkpoint['state_dict'][key] # restore state args.start_epoch = checkpoint['epoch'] best_acc = checkpoint['best_acc'] model.load_state_dict(state_dict) if args.predict_joint_visibility: joint_visibility_model.load_state_dict( checkpoint['joint_visibility_state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.load, checkpoint['epoch'])) logger = Logger(join(args.checkpoint_dir, 'log.txt'), title=title, resume=True) else: raise Exception("=> no checkpoint found at '{}'".format(args.load)) else: logger = Logger(join(args.checkpoint_dir, 'log.txt'), title=title) logger.set_names( ['Epoch', 'LR', 'Train Loss', 'Val Loss', 'Train Acc', 'Val Acc']) cudnn.benchmark = True print(' Total params: %.2fM' % (sum(p.numel() for p in model.parameters()) / 1000000.0)) # Data loading code train_dataset, train_loader, val_loader = _make_torch_data_loaders(args) if args.evaluate: print('\nEvaluation only') loss, acc, predictions = validate(val_loader, model, criterion, args.num_classes, args.debug, args.flip) save_pred(predictions, checkpoint=args.checkpoint_dir) return lr = args.lr for epoch in range(args.start_epoch, args.epochs): lr = adjust_learning_rate(optimizer, epoch, lr, args.schedule, args.gamma) print('\nEpoch: %d | LR: %.8f' % (epoch + 1, lr)) # decay sigma if args.sigma_decay > 0: train_loader.dataset.sigma *= args.sigma_decay val_loader.dataset.sigma *= args.sigma_decay # train for one epoch train_loss, train_acc, joint_visibility_loss, joint_visibility_acc = train( train_loader, model=model, joint_visibility_model=joint_visibility_model, criterion=criterion, num_joints=args.num_classes, joint_visibility_criterion=joint_visibility_criterion, optimizer=optimizer, epoch=epoch, writer=writer, lr=lr, debug=args.debug, flip=args.flip, remove_intermediate_supervision=args. remove_intermediate_supervision, tb_freq=args.tb_log_freq, no_grad_clipping=args.no_grad_clipping, grad_clip=args.grad_clip, use_horovod=args.use_horovod, predict_joint_visibility=args.predict_joint_visibility, predict_joint_loss_coeff=args.joint_visibility_loss_coeff) # evaluate on validation set valid_loss, valid_acc_PCK, valid_acc_PCKh, valid_acc_PCKh_per_joint, valid_joint_visibility_loss, valid_joint_visibility_acc, predictions = validate( val_loader, model, joint_visibility_model, criterion, joint_visibility_criterion, args.num_classes, args.debug, args.flip, args.use_horovod, args.use_train_mode_to_eval, args.predict_joint_visibility) # append logger file, and write to tensorboard summaries writer.add_scalars('data/epoch/losses_wrt_epochs', { 'train_loss': train_loss, 'test_lost': valid_loss }, epoch) writer.add_scalar('data/epoch/train_accuracy_PCK', train_acc, epoch) writer.add_scalar('data/epoch/test_accuracy_PCK', valid_acc_PCK, epoch) writer.add_scalar('data/epoch/test_accuracy_PCKh', valid_acc_PCKh, epoch) for key in valid_acc_PCKh_per_joint: writer.add_scalar( 'per_joint_data/epoch/test_accuracy_PCKh_%s' % key, valid_acc_PCKh_per_joint[key], epoch) logger.append( [epoch + 1, lr, train_loss, valid_loss, train_acc, valid_acc_PCK]) if args.predict_joint_visibility: writer.add_scalars( 'joint_visibility/epoch/loss', { 'train': joint_visibility_loss, 'test_lost': valid_joint_visibility_loss }, epoch) writer.add_scalars( 'joint_visibility/epoch/acc', { 'train': joint_visibility_acc, 'test_lost': valid_joint_visibility_acc }, epoch) # remember best acc and save checkpoint model_specific_checkpoint_dir = "%s/hourglass_mpii_%s" % ( args.checkpoint_dir, args.exp) if not isdir(model_specific_checkpoint_dir): mkdir_p(model_specific_checkpoint_dir) is_best = valid_acc_PCK > best_acc best_acc = max(valid_acc_PCK, best_acc) mean, stddev = train_dataset.get_mean_stddev() checkpoint = { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_acc': best_acc, 'optimizer': optimizer.state_dict(), 'mean': mean, 'stddev': stddev, } if args.predict_joint_visibility: checkpoint[ 'joint_visibility_state_dict'] = joint_visibility_model.state_dict( ) save_checkpoint(checkpoint, predictions, is_best, checkpoint=model_specific_checkpoint_dir) logger.close()
def viz_orthog_transform(options): """ Visual Options that should be included: options.data_dir: the directory for the dataset of poses options.load: checkpoint file for the model options.index: the index into the dataset to visualize options.num_orientations: the number of re-orientations to make options.dataset_normalization: if the network was trained with dataset normalizations options.output_dir: the directory to output the visualization image :param options: Options for the visualization, defined in options.py. """ # Unpack options data_dir = options.data_dir model_checkpoint_file = options.load index = options.index num_orientations = options.num_orientations dataset_normalize = options.dataset_normalization # Make output dir if not isdir(options.output_dir): mkdir_p(options.output_dir) # Make the dataset object, and load the model, and put it in eval mode dataset = Human36mDataset(dataset_path=data_dir, orthogonal_data_augmentation_prob=0.0, z_rotations_only=options.z_rotations_only, dataset_normalization=dataset_normalize) model = LinearModel(dataset_normalized_input=dataset_normalize).cuda() ckpt = torch.load(model_checkpoint_file) model.load_state_dict(ckpt['state_dict']) model.eval() # Loop vstack = [] for i in range(num_orientations): # Get the data from the dataset _, _, pose_2d_gt, pose_3d_gt, meta = dataset[index] # Run the model to get the prediction (put it in a 'psuedo batch' of size 1) pose_3d_pred = model(torch.Tensor(pose_2d_gt).view((1,-1)).cuda()).cpu().detach().numpy() # Unnormalized poses (adding and remove the phantom batching as needed) pose_2d_gt = np.expand_dims(pose_2d_gt, axis=0) pose_3d_gt = np.expand_dims(pose_3d_gt, axis=0) pose_2d_gt_unnorm = data_utils.unNormalizeData(pose_2d_gt, meta, dataset_normalize, is_2d=True)[0] pose_3d_gt_unnorm = data_utils.unNormalizeData(pose_3d_gt, meta, dataset_normalize)[0] pose_3d_pred_unnorm = data_utils.unNormalizeData(pose_3d_pred, meta, dataset_normalize)[0] # Visualize in a hstack pose_2d_gt_img = viz_2d_pose(pose_2d_gt_unnorm) pose_3d_gt_img = viz_3d_pose(pose_3d_gt_unnorm) # pose_3d_gt_img = viz_3d_pose(meta['3d_pose_camera_coords']) # pose_3d_pred_img = viz_3d_pose(meta['3d_pose_camera_coords']) pose_3d_pred_img = viz_3d_pose(pose_3d_pred_unnorm) # If it's the first iteration, now switch to using the orthogonal data augmentation if i == 0: dataset.orthogonal_data_augmentation_prob = 1.0 # Append hstacked image to vstack vstack.append(_pack_images([pose_2d_gt_img, pose_3d_gt_img, pose_3d_pred_img])) # Compute the vstacked image and save it final_visualization = _pack_images_col(vstack) output_filename = os.path.join(options.output_dir, "{index}.jpg".format(index=index)) scipy.misc.imsave(output_filename, final_visualization)
def main(args): # create model model = network.__dict__[cfg.model](cfg.output_shape, cfg.num_class, pretrained=False) model = torch.nn.DataParallel(model).cuda() test_loader = torch.utils.data.DataLoader(MscocoMulti(cfg, train=False), batch_size=args.batch * args.num_gpus, shuffle=False, num_workers=args.workers, pin_memory=True) # load trainning weights checkpoint_file = os.path.join(args.checkpoint, args.test + '.pth.tar') checkpoint = torch.load(checkpoint_file) model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})".format( checkpoint_file, checkpoint['epoch'])) # change to evaluation mode model.eval() print('testing...') full_result = [] for i, (inputs, meta) in tqdm(enumerate(test_loader)): with torch.no_grad(): input_var = torch.autograd.Variable(inputs.cuda()) if args.flip == True: flip_inputs = inputs.clone() for i, finp in enumerate(flip_inputs): finp = im_to_numpy(finp) finp = cv2.flip(finp, 1) flip_inputs[i] = im_to_torch(finp) flip_input_var = torch.autograd.Variable(flip_inputs.cuda()) # compute output global_outputs, refine_output = model(input_var) score_map = refine_output.data.cpu() score_map = score_map.numpy() if args.flip == True: flip_global_outputs, flip_output = model(flip_input_var) flip_score_map = flip_output.data.cpu() flip_score_map = flip_score_map.numpy() for i, fscore in enumerate(flip_score_map): fscore = fscore.transpose((1, 2, 0)) fscore = cv2.flip(fscore, 1) fscore = list(fscore.transpose((2, 0, 1))) for (q, w) in cfg.symmetry: fscore[q], fscore[w] = fscore[w], fscore[q] fscore = np.array(fscore) score_map[i] += fscore score_map[i] /= 2 ids = meta['imgID'].numpy() det_scores = meta['det_scores'] for b in range(inputs.size(0)): details = meta['augmentation_details'] single_result_dict = {} single_result = [] single_map = score_map[b] r0 = single_map.copy() r0 /= 255 r0 += 0.5 v_score = np.zeros(17) for p in range(17): single_map[p] /= np.amax(single_map[p]) border = 10 dr = np.zeros((cfg.output_shape[0] + 2 * border, cfg.output_shape[1] + 2 * border)) dr[border:-border, border:-border] = single_map[p].copy() dr = cv2.GaussianBlur(dr, (21, 21), 0) lb = dr.argmax() y, x = np.unravel_index(lb, dr.shape) dr[y, x] = 0 lb = dr.argmax() py, px = np.unravel_index(lb, dr.shape) y -= border x -= border py -= border + y px -= border + x ln = (px**2 + py**2)**0.5 delta = 0.25 if ln > 1e-3: x += delta * px / ln y += delta * py / ln x = max(0, min(x, cfg.output_shape[1] - 1)) y = max(0, min(y, cfg.output_shape[0] - 1)) resy = float((4 * y + 2) / cfg.data_shape[0] * (details[b][3] - details[b][1]) + details[b][1]) resx = float((4 * x + 2) / cfg.data_shape[1] * (details[b][2] - details[b][0]) + details[b][0]) v_score[p] = float(r0[p, int(round(y) + 1e-10), int(round(x) + 1e-10)]) single_result.append(resx) single_result.append(resy) single_result.append(1) if len(single_result) != 0: single_result_dict['image_id'] = int(ids[b]) single_result_dict['category_id'] = 1 single_result_dict['keypoints'] = single_result single_result_dict['score'] = float( det_scores[b]) * v_score.mean() full_result.append(single_result_dict) result_path = args.result if not isdir(result_path): mkdir_p(result_path) result_file = os.path.join(result_path, 'result.json') with open(result_file, 'w') as wf: json.dump(full_result, wf) # evaluate on COCO eval_gt = COCO(cfg.ori_gt_path) eval_dt = eval_gt.loadRes(result_file) cocoEval = COCOeval(eval_gt, eval_dt, iouType='keypoints') cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize()
def main(args): # import pdb; pdb.set_trace() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # device = torch.device("cpu") print(device) writer = SummaryWriter(cfg.tensorboard_path) # create checkpoint dir counter = 0 if not isdir(args.checkpoint): mkdir_p(args.checkpoint) # create model model = network.__dict__[cfg.model](cfg.output_shape, cfg.num_class, pretrained=True) model = torch.nn.DataParallel(model).to(device) # model = model.to(device) # define loss function (criterion) and optimizer criterion_bce = torch.nn.BCELoss().to(device) criterion_abs = torch.nn.L1Loss().to(device) # criterion_abs = offset_loss().to(device) # criterion1 = torch.nn.MSELoss().to(device) # for Global loss # criterion2 = torch.nn.MSELoss(reduce=False).to(device) # for refine loss optimizer = torch.optim.Adam(model.parameters(), lr=cfg.lr, weight_decay=cfg.weight_decay) if args.resume: print(args.resume) checkpoint_file_resume = os.path.join(args.checkpoint, args.resume + '.pth.tar') if isfile(checkpoint_file_resume): print("=> loading checkpoint '{}'".format(checkpoint_file_resume)) checkpoint = torch.load(checkpoint_file_resume) pretrained_dict = checkpoint['state_dict'] model.load_state_dict(pretrained_dict) args.start_epoch = checkpoint['epoch'] optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( checkpoint_file_resume, checkpoint['epoch'])) logger = Logger(join(args.checkpoint, 'log.txt'), resume=True) else: print("=> no checkpoint found at '{}'".format( checkpoint_file_resume)) else: logger = Logger(join(args.checkpoint, 'log.txt')) logger.set_names(['Epoch', 'LR', 'Train Loss']) cudnn.benchmark = True print(' Total params: %.2fMB' % (sum(p.numel() for p in model.parameters()) / (1024 * 1024) * 4)) train_loader = torch.utils.data.DataLoader(MscocoMulti_double_only(cfg), batch_size=cfg.batch_size * args.num_gpus, shuffle=True, num_workers=args.workers, pin_memory=True) for epoch in range(args.start_epoch, args.epochs): lr = adjust_learning_rate(optimizer, epoch, cfg.lr_dec_epoch, cfg.lr_gamma) print('\nEpoch: %d | LR: %.8f' % (epoch + 1, lr)) # train for one epoch train_loss, counter = train(train_loader, model, [criterion_abs, criterion_bce], writer, counter, optimizer, device) print('train_loss: ', train_loss) # append logger file logger.append([epoch + 1, lr, train_loss]) save_model( { 'epoch': epoch + 1, 'info': cfg.info, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), }, checkpoint=args.checkpoint) writer.export_scalars_to_json("./test.json") writer.close() logger.close()