def __init__(self, use_log_polar=True, do_cuda=True): print('Using LogpolarFeature2D') self.model_base_path = config.cfg.root_folder + '/thirdparty/logpolar/' if use_log_polar: config_path = os.path.join(self.model_base_path, 'configs', 'init_one_example_ptn_96.yml') if kVerbose: print('-- Using log-polar model') else: config_path = os.path.join(self.model_base_path, 'configs', 'init_one_example_stn_16.yml') if kVerbose: print('-- Using cartesian model') cfg.merge_from_file(config_path) self.model_weights_path = self.model_base_path + cfg.TEST.MODEL_WEIGHTS # N.B.: this must stay here, after cfg.merge_from_file() if kVerbose2: print('model_weights_path:', self.model_weights_path) os.environ["CUDA_VISIBLE_DEVICES"] = str(0) torch.cuda.manual_seed_all(cfg.TRAINING.SEED) torch.backends.cudnn.deterministic = True self.do_cuda = do_cuda & torch.cuda.is_available() print('cuda:', self.do_cuda) device = torch.device("cuda:0" if self.do_cuda else "cpu") self.device = device torch.set_grad_enabled(False) print('==> Loading pre-trained network.') self.model = HardNet(transform=cfg.TEST.TRANSFORMER, coords=cfg.TEST.COORDS, patch_size=cfg.TEST.IMAGE_SIZE, scale=cfg.TEST.SCALE, is_desc256=cfg.TEST.IS_DESC_256, orientCorrect=cfg.TEST.ORIENT_CORRECTION) self.checkpoint = torch.load(self.model_weights_path) self.model.load_state_dict(self.checkpoint['state_dict']) if self.do_cuda: self.model.cuda() print('Extracting on GPU') else: print('Extracting on CPU') self.model = model.cpu() self.model.eval() print('==> Successfully loaded pre-trained network.')
help="path to config file", type=str) parser.add_argument("--amos_dataset", default="dl/AMOS/Handpicked_v3_png/", help="path to config file", type=str) parser.add_argument("opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER) args = parser.parse_args() if args.config_file != "": cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) if not cfg.TRAINING.NO_CUDA: torch.cuda.manual_seed_all(cfg.TRAINING.SEED) torch.backends.cudnn.deterministic = True device = torch.device("cuda" if torch.cuda.is_available() else "cpu") amos_dataset = AMOSDataset(args.amos_dataset, cfg.TRAINING.PAD_TO) model = HardNet(transform=cfg.TEST.TRANSFORMER, coords=cfg.TEST.COORDS, patch_size=cfg.TEST.IMAGE_SIZE, scale=cfg.TEST.SCALE, is_desc256=cfg.TEST.IS_DESC_256,
parser.add_argument("--config_file", default=config_path, help="path to config file", type=str) parser.add_argument("opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 if args.config_file != "": cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg = create_logging_directories(cfg) if not cfg.TRAINING.NO_CUDA: # cudnn.benchmark = True torch.cuda.manual_seed_all(cfg.TRAINING.SEED) torch.backends.cudnn.deterministic = True # set random seeds random.seed(cfg.TRAINING.SEED) torch.manual_seed(cfg.TRAINING.SEED) np.random.seed(cfg.TRAINING.SEED)
def extract_descriptors(input_filename, output_filename, use_log_polar, num_keypoints, verbose): # Setup ROOT = os.getcwd() if use_log_polar: config_path = os.path.join(ROOT, 'configs', 'init_one_example_ptn_96.yml') if verbose: print('-- Using log-polar models') else: config_path = os.path.join(ROOT, 'configs', 'init_one_example_stn_16.yml') if verbose: print('-- Using cartesian models') cfg.merge_from_file(config_path) os.environ["CUDA_VISIBLE_DEVICES"] = str(0) torch.cuda.manual_seed_all(cfg.TRAINING.SEED) torch.backends.cudnn.deterministic = True device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if verbose: if torch.cuda.is_available(): print('-- Using GPU') else: print('-- Using CPU') # Extract SIFT keypoints img = cv2.imread(input_filename, cv2.IMREAD_GRAYSCALE) # A safe image size is ~1000px on the largest dimension # To extract features on larger images you might want to increase the padding max_size = 1024 if any([s > max_size for s in img.shape]): h, w = img.shape if h > w: img = cv2.resize(img, (int(w * max_size / h), max_size), cv2.INTER_CUBIC) elif w > h: img = cv2.resize(img, (max_size, int(h * max_size / w)), cv2.INTER_CUBIC) h, w = img.shape # get keypoints, scale and locatinos from SIFT or another detector sift = cv2.xfeatures2d.SIFT_create(num_keypoints) keypoints = sift.detect(img, None) pts = np.array([kp.pt for kp in keypoints]) scales = np.array([kp.size for kp in keypoints]) oris = np.array([kp.angle for kp in keypoints]) # Mirror-pad the image to avoid boundary effects if any([s > cfg.TEST.PAD_TO for s in img.shape[:2]]): raise RuntimeError( "Image exceeds acceptable size ({}x{}), please downsample".format( cfg.TEST.PAD_TO, cfg.TEST.PAD_TO)) fillHeight = cfg.TEST.PAD_TO - img.shape[0] fillWidth = cfg.TEST.PAD_TO - img.shape[1] padLeft = int(np.round(fillWidth / 2)) padRight = int(fillWidth - padLeft) padUp = int(np.round(fillHeight / 2)) padDown = int(fillHeight - padUp) img = np.pad(img, pad_width=((padUp, padDown), (padLeft, padRight)), mode='reflect') if verbose: print('-- Padding image from {}x{} to {}x{}'.format( h, w, img.shape[0], img.shape[1])) # Normalize keypoint locations kp_norm = [] for i, p in enumerate(pts): _p = 2 * np.array([(p[0] + padLeft) / (cfg.TEST.PAD_TO), (p[1] + padUp) / (cfg.TEST.PAD_TO)]) - 1 kp_norm.append(_p) theta = [ torch.from_numpy(np.array(kp_norm)).float().squeeze(), torch.from_numpy(scales).float(), torch.from_numpy(np.array([np.deg2rad(o) for o in oris])).float() ] # Instantiate the model t = time() model = HardNet(transform=cfg.TEST.TRANSFORMER, coords=cfg.TEST.COORDS, patch_size=cfg.TEST.IMAGE_SIZE, scale=cfg.TEST.SCALE, is_desc256=cfg.TEST.IS_DESC_256, orientCorrect=cfg.TEST.ORIENT_CORRECTION) # Load weights model.load_state_dict(torch.load(cfg.TEST.MODEL_WEIGHTS)['state_dict']) model.eval() model.to(device) if verbose: print('-- Instantiated model in {:0.2f} sec.'.format(time() - t)) # Extract descriptors imgs, img_keypoints = torch.from_numpy(img).unsqueeze(0).to(device), \ [theta[0].to(device), theta[1].to(device), theta[2].to(device)] t = time() descriptors, patches = model({input_filename: imgs}, img_keypoints, [input_filename] * len(img_keypoints[0])) if verbose: print('-- Computed {} descriptors in {:0.2f} sec.'.format( descriptors.shape[0], time() - t)) keypoints_array = np.concatenate([pts, scales[..., None], oris[..., None]], axis=1) t = time() with h5py.File(output_filename, 'w') as f: f['keypoints'] = keypoints_array f['descriptors'] = descriptors.cpu().detach().numpy() print('-- Saved {} descriptors in {:0.2f} sec.'.format( descriptors.shape[0], time() - t))