class DCFNetTraker(object): def __init__(self, im, init_rect, config=TrackerConfig(), gpu=False): self.gpu = gpu self.config = config self.net = DCFNet(config) self.net.load_param(config.feature_path) self.net.eval() if gpu: self.net.cuda() # confine results target_pos, target_sz = rect1_2_cxy_wh(init_rect) self.min_sz = np.maximum(config.min_scale_factor * target_sz, 4) self.max_sz = np.minimum(im.shape[:2], config.max_scale_factor * target_sz) # crop template window_sz = target_sz * (1 + config.padding) bbox = cxy_wh_2_bbox(target_pos, window_sz) patch = crop_chw(im, bbox, self.config.crop_sz) target = patch - config.net_average_image self.net.update(torch.Tensor(np.expand_dims(target, axis=0)).cuda()) self.target_pos, self.target_sz = target_pos, target_sz self.patch_crop = np.zeros((config.num_scale, patch.shape[0], patch.shape[1], patch.shape[2]), np.float32) # buff def track(self, im): for i in range(self.config.num_scale): # crop multi-scale search region window_sz = self.target_sz * (self.config.scale_factor[i] * (1 + self.config.padding)) bbox = cxy_wh_2_bbox(self.target_pos, window_sz) self.patch_crop[i, :] = crop_chw(im, bbox, self.config.crop_sz) search = self.patch_crop - self.config.net_average_image if self.gpu: response = self.net(torch.Tensor(search).cuda()) else: response = self.net(torch.Tensor(search)) peak, idx = torch.max(response.view(self.config.num_scale, -1), 1) peak = peak.data.cpu().numpy() * self.config.scale_penalties best_scale = np.argmax(peak) r_max, c_max = np.unravel_index(idx[best_scale], self.config.net_input_size) if r_max > self.config.net_input_size[0] / 2: r_max = r_max - self.config.net_input_size[0] if c_max > self.config.net_input_size[1] / 2: c_max = c_max - self.config.net_input_size[1] window_sz = self.target_sz * (self.config.scale_factor[best_scale] * (1 + self.config.padding)) self.target_pos = self.target_pos + np.array([c_max, r_max]) * window_sz / self.config.net_input_size self.target_sz = np.minimum(np.maximum(window_sz / (1 + self.config.padding), self.min_sz), self.max_sz) # model update window_sz = self.target_sz * (1 + self.config.padding) bbox = cxy_wh_2_bbox(self.target_pos, window_sz) patch = crop_chw(im, bbox, self.config.crop_sz) target = patch - self.config.net_average_image self.net.update(torch.Tensor(np.expand_dims(target, axis=0)).cuda(), lr=self.config.interp_factor) return cxy_wh_2_rect1(self.target_pos, self.target_sz) # 1-index
args = parser.parse_args() dataset = args.dataset base_path = join('dataset', dataset) json_path = join('dataset', dataset + '.json') annos = json.load(open(json_path, 'r')) videos = sorted(annos.keys()) use_gpu = True visualization = False # default parameter and load feature extractor network config = TrackerConfig() net = DCFNet(config) net.load_param(args.model) net.eval().cuda() speed = [] # loop videos for video_id, video in enumerate(videos): # run without resetting video_path_name = annos[video]['name'] init_rect = np.array(annos[video]['init_rect']).astype(np.float) image_files = [ join(base_path, video_path_name, 'img', im_f) for im_f in annos[video]['image_files'] ] n_images = len(image_files) tic = time.time() # time start target_pos, target_sz = rect1_2_cxy_wh(
dataset = args.dataset base_path = join('dataset', dataset) json_path = join('dataset', dataset + '.json') annos = json.load(open(json_path, 'r')) #print(annos) videos = sorted(annos.keys()) use_gpu = False visualization = False # default parameter and load feature extractor network config = TrackerConfig() net = DCFNet(config) net.load_param(args.model) net.eval()#.cuda() speed = [] # loop videos for video_id, video in enumerate(videos): # run without resetting video_path_name = annos[video]['name'] init_rect = np.array(annos[video]['init_rect']).astype(np.float) image_files = [join(base_path, video_path_name, 'img', im_f) for im_f in annos[video]['image_files']] #print(image_files) #print(annos[video]) n_images = len(image_files) tic = time.time() # time start target_pos, target_sz = rect1_2_cxy_wh(init_rect) # OTB label is 1-indexed