def run(datapath, benchmark, backbone, thres, alpha, hyperpixel, logpath, args, beamsearch=False, model=None, dataloader=None): r"""Runs Semantic Correspondence as an Optimal Transport Problem""" # 1. Logging initialization if not os.path.isdir('logs'): os.mkdir('logs') if not beamsearch: logfile = 'logs/{}_{}_{}_{}_exp{}-{}_e{}_m{}_{}_{}'.format( benchmark, backbone, args.split, args.sim, args.exp1, args.exp2, args.eps, args.classmap, args.cam, args.hyperpixel) print(logfile) util.init_logger(logfile) util.log_args(args) # 2. Evaluation benchmark initialization device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") if dataloader is None: download.download_dataset(os.path.abspath(datapath), benchmark) #split = 'val' if beamsearch else 'test' split = args.split dset = download.load_dataset(benchmark, datapath, thres, device, split, args.cam) dataloader = DataLoader(dset, batch_size=1, num_workers=0) # 3. Model initialization if model is None: model = scot_CAM.SCOT_CAM(backbone, hyperpixel, benchmark, device, args.cam) else: model.hyperpixel_ids = util.parse_hyperpixel(hyperpixel) # 4. Evaluator initialization evaluator = evaluation.Evaluator(benchmark, device) zero_pcks = 0 srcpt_list = [] trgpt_list = [] time_list = [] PCK_list = [] for idx, data in enumerate(dataloader): threshold = 0.0 # a) Retrieve images and adjust their sizes to avoid large numbers of hyperpixels data['src_img'], data['src_kps'], data['src_intratio'] = util.resize( data['src_img'], data['src_kps'][0]) data['trg_img'], data['trg_kps'], data['trg_intratio'] = util.resize( data['trg_img'], data['trg_kps'][0]) src_size = data['src_img'].size() trg_size = data['trg_img'].size() if len(args.cam) > 0: data['src_mask'] = util.resize_mask(data['src_mask'], src_size) data['trg_mask'] = util.resize_mask(data['trg_mask'], trg_size) data['src_bbox'] = util.get_bbox_mask(data['src_mask'], thres=threshold).to(device) data['trg_bbox'] = util.get_bbox_mask(data['trg_mask'], thres=threshold).to(device) else: data['src_mask'] = None data['trg_mask'] = None data['alpha'] = alpha tic = time.time() # b) Feed a pair of images to Hyperpixel Flow model with torch.no_grad(): confidence_ts, src_box, trg_box = model( data['src_img'], data['trg_img'], args.sim, args.exp1, args.exp2, args.eps, args.classmap, data['src_bbox'], data['trg_bbox'], data['src_mask'], data['trg_mask'], backbone) conf, trg_indices = torch.max(confidence_ts, dim=1) unique, inv = torch.unique(trg_indices, sorted=False, return_inverse=True) trgpt_list.append(len(unique)) srcpt_list.append(len(confidence_ts)) # c) Predict key-points & evaluate performance prd_kps = geometry.predict_kps(src_box, trg_box, data['src_kps'], confidence_ts) toc = time.time() #print(toc-tic) time_list.append(toc - tic) pair_pck = evaluator.evaluate(prd_kps, data) PCK_list.append(pair_pck) if pair_pck == 0: zero_pcks += 1 # d) Log results if not beamsearch: evaluator.log_result(idx, data=data) #save_file = logfile.replace('logs/','') #np.save('PCK_{}.npy'.format(save_file), PCK_list) if beamsearch: return (sum(evaluator.eval_buf['pck']) / len(evaluator.eval_buf['pck'])) * 100. else: logging.info('source points:' + str(sum(srcpt_list) * 1.0 / len(srcpt_list))) logging.info('target points:' + str(sum(trgpt_list) * 1.0 / len(trgpt_list))) logging.info('avg running time:' + str(sum(time_list) / len(time_list))) evaluator.log_result(len(dset), data=None, average=True) logging.info('Total Number of 0.00 pck images:' + str(zero_pcks))
def run(datapath, benchmark, backbone, thres, alpha, hyperpixel, logpath, args, model=None, dataloader=None): r"""Runs Semantic Correspondence as an Optimal Transport Problem""" # 1. Logging initialization if not os.path.isdir('logs'): os.mkdir('logs') logfile = 'logs/{}_{}_{}_{}_exp{}-{}_e{}_m{}_{}_{}'.format( benchmark, backbone, args.split, args.sim, args.exp1, args.exp2, args.eps, args.classmap, args.cam, args.hyperpixel) print(logfile) util.init_logger(logfile) util.log_args(args) # 2. Evaluation benchmark initialization device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") if dataloader is None: csv_file = 'test_pairs_tss.csv' dataset = TSSDataset(csv_file=os.path.join(datapath, benchmark, csv_file), dataset_path=os.path.join(datapath, benchmark)) dataloader = DataLoader(dataset, batch_size=1, num_workers=0) # 3. Model initialization if model is None: model = scot_CAM.SCOT_CAM(backbone, hyperpixel, benchmark, device, args.cam) else: model.hyperpixel_ids = util.parse_hyperpixel(hyperpixel) time_list = [] for idx, data in enumerate(dataloader): threshold = 0.0 # a) Retrieve images and adjust their sizes to avoid large numbers of hyperpixels if args.classmap in [0, 1]: src_img, src_size, src_size2, src_ratio = util.resize_TSS( data['src_img']) trg_img, trg_size, trg_size2, trg_ratio = util.resize_TSS( data['trg_img']) src_mask, trg_mask, src_bbox, trg_bbox = None, None, None, None data['alpha'] = alpha #tic = time.time() # b) Feed a pair of images to Hyperpixel Flow model with torch.no_grad(): # target meshgrids --> source matching points confidence_ts, trg_box, src_box = model(trg_img, src_img, args.sim, args.exp1, args.exp2, args.eps, args.classmap, trg_bbox, src_bbox, trg_mask, src_mask, backbone) # c) Image Grids and write flow to files h_tgt = int(trg_size[1].data.cpu().numpy()) w_tgt = int(trg_size[2].data.cpu().numpy()) grid_x_np, grid_y_np = np.meshgrid(range(1, w_tgt + 1), range(1, h_tgt + 1)) grid_x = torch.tensor(grid_x_np).view(1, -1).cuda() grid_y = torch.tensor(grid_y_np).view(1, -1).cuda() trg_kps = torch.cat((grid_x, grid_y), 0).type(torch.cuda.FloatTensor) # 2xwh trg_kps *= trg_ratio n_points = trg_kps.size(1) n_itr = int(n_points / 10000) prd_kps = torch.zeros_like(trg_kps).to(trg_kps.device) for i in range(0, n_itr + 1): s = i * 10000 t = min(n_points, (i + 1) * 10000) if s >= t: break trg_part = trg_kps[:, s:t].contiguous().clone() prd_part = geometry.predict_kps(trg_box, src_box, trg_part, confidence_ts) prd_kps[:, s:t] = prd_part def pointsToGrid(x, h_tgt=h_tgt, w_tgt=w_tgt): return x.contiguous().view(1, 2, h_tgt, w_tgt).transpose(1, 2).transpose(2, 3) prd_grid = pointsToGrid(prd_kps).squeeze(0) # hxwx2 prd_grid /= src_ratio disp_x = prd_grid[:, :, 0].data.cpu().numpy() - grid_x_np disp_y = prd_grid[:, :, 1].data.cpu().numpy() - grid_y_np flow = np.concatenate( (np.expand_dims(disp_x, 2), np.expand_dims(disp_y, 2)), 2) flow_path = os.path.join(args.datapath, 'TSS/result_CAM', data['flow_path'][0]) create_file_path(flow_path) write_flo_file(flow, flow_path) #toc = time.time() #print(idx, toc-tic) print(idx + 1, '/', dataset.__len__())
help='exponential factor on final OT scores') parser.add_argument('--eps', type=float, default=0.05, help='epsilon for Sinkhorn Regularization') parser.add_argument( '--classmap', type=int, default=1, help='class activation map: 0 for none, 1 for using CAM') parser.add_argument( '--cam', type=str, default='', help='activation map folder, empty for end2end computation') args = parser.parse_args() os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu run(datapath=args.datapath, benchmark=args.dataset, backbone=args.backbone, thres=args.thres, alpha=args.alpha, hyperpixel=args.hyperpixel, logpath=args.logpath, args=args, beamsearch=False) util.log_args(args)
def run(datapath, benchmark, backbone, thres, alpha, hyperpixel, logpath, beamsearch, model=None, dataloader=None, visualize=False): r"""Runs Hyperpixel Flow framework""" # 1. Logging initialization if not os.path.isdir('logs'): os.mkdir('logs') if not beamsearch: cur_datetime = datetime.datetime.now().__format__('_%m%d_%H%M%S') logfile = os.path.join('logs', logpath + cur_datetime + '.log') util.init_logger(logfile) util.log_args(args) if visualize: os.mkdir(logfile + 'vis') # 2. Evaluation benchmark initialization device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") if dataloader is None: download.download_dataset(os.path.abspath(datapath), benchmark) split = 'val' if beamsearch else 'test' dset = download.load_dataset(benchmark, datapath, thres, device, split) dataloader = DataLoader(dset, batch_size=1, num_workers=0) # 3. Model initialization if model is None: model = hpflow.HyperpixelFlow(backbone, hyperpixel, benchmark, device) else: model.hyperpixel_ids = util.parse_hyperpixel(hyperpixel) # 4. Evaluator initialization evaluator = evaluation.Evaluator(benchmark, device) for idx, data in enumerate(dataloader): # a) Retrieve images and adjust their sizes to avoid large numbers of hyperpixels data['src_img'], data['src_kps'], data['src_intratio'] = util.resize( data['src_img'], data['src_kps'][0]) data['trg_img'], data['trg_kps'], data['trg_intratio'] = util.resize( data['trg_img'], data['trg_kps'][0]) data['alpha'] = alpha # b) Feed a pair of images to Hyperpixel Flow model with torch.no_grad(): confidence_ts, src_box, trg_box = model(data['src_img'], data['trg_img']) # c) Predict key-points & evaluate performance prd_kps = geometry.predict_kps(src_box, trg_box, data['src_kps'], confidence_ts) evaluator.evaluate(prd_kps, data) # d) Log results if not beamsearch: evaluator.log_result(idx, data=data) if visualize: vispath = os.path.join( logfile + 'vis', '%03d_%s_%s' % (idx, data['src_imname'][0], data['trg_imname'][0])) util.visualize_prediction(data['src_kps'].t().cpu(), prd_kps.t().cpu(), data['src_img'], data['trg_img'], vispath) if beamsearch: return (sum(evaluator.eval_buf['pck']) / len(evaluator.eval_buf['pck'])) * 100. else: evaluator.log_result(len(dset), data=None, average=True)