def track(self, state, im): p = state['p'] net = state['net'] avg_chans = state['avg_chans'] window = state['window'] target_pos = state['target_pos'] target_sz = state['target_sz'] wc_z = target_sz[1] + p.context_amount * sum(target_sz) hc_z = target_sz[0] + p.context_amount * sum(target_sz) s_z = np.sqrt(wc_z * hc_z) scale_z = p.exemplar_size / s_z d_search = (p.instance_size - p.exemplar_size) / 2 pad = d_search / scale_z s_x = s_z + 2 * pad # extract scaled crops for search region x at previous target position x_crop = Variable( get_subwindow_tracking(im, target_pos, p.instance_size, python2round(s_x), avg_chans).unsqueeze(0)) target_pos, target_sz, score = self.update(net, x_crop.cuda(), target_pos, target_sz * scale_z, window, scale_z, p) target_pos[0] = max(0, min(state['im_w'], target_pos[0])) target_pos[1] = max(0, min(state['im_h'], target_pos[1])) target_sz[0] = max(10, min(state['im_w'], target_sz[0])) target_sz[1] = max(10, min(state['im_h'], target_sz[1])) state['target_pos'] = target_pos state['target_sz'] = target_sz state['score'] = score return state
def track(self, state, im): self.frame_num += 1 self.curr_frame = im p = state['p'] net = state['net'] avg_chans = state['avg_chans'] window = state['window'] target_pos = state['target_pos'] target_sz = state['target_sz'] wc_z = target_sz[1] + p.context_amount * sum(target_sz) hc_z = target_sz[0] + p.context_amount * sum(target_sz) s_z = np.sqrt(wc_z * hc_z) scale_z = p.exemplar_size / s_z d_search = (p.instance_size - p.exemplar_size) / 2 pad = d_search / scale_z s_x = s_z + 2 * pad # extract scaled crops for search region x at previous target position x_crop = Variable(get_subwindow_tracking(im, target_pos, p.instance_size, python2round(s_x), avg_chans).unsqueeze(0)) if state["arch"]=="SiamRPNRes22": target_pos, target_sz, score = self.update(net, x_crop.cuda(), target_pos, target_sz * scale_z, window, scale_z, p) elif state["arch"]=="CascadedSiamRPNRes22": target_pos, target_sz, score = self.update_stage12_mean(net, x_crop.cuda(), target_pos, target_sz * scale_z, window,scale_z, p) else: raise NotImplementedError target_pos[0] = max(0, min(state['im_w'], target_pos[0])) target_pos[1] = max(0, min(state['im_h'], target_pos[1])) target_sz[0] = max(10, min(state['im_w'], target_sz[0])) target_sz[1] = max(10, min(state['im_h'], target_sz[1])) state['target_pos'] = target_pos state['target_sz'] = target_sz state['score'] = score return state
def CGACD_track(state, im): net = state['net'] avg_chans = state['avg_chans'] window = state['window'] target_pos = state['target_pos'] target_sz = state['target_sz'] template_bbox = state['template_bbox'] wc_z = target_sz[1] + cfg.track.contex_amount * sum(target_sz) hc_z = target_sz[0] + cfg.track.contex_amount * sum(target_sz) s_z = np.sqrt(wc_z * hc_z) scale_z = cfg.track.template_size / s_z s_x = s_z * (cfg.track.search_size / cfg.track.template_size) # extract scaled crops for search region x at previous target position x_crop = get_subwindow_tracking(im, target_pos, cfg.track.search_size, round(s_x), avg_chans) x_crop = torch.from_numpy(np.transpose(x_crop, (2, 0, 1))).float().unsqueeze(0) target_pos, target_sz, best_score = tracker_eval(net, x_crop.cuda(), target_pos, template_bbox, target_sz * scale_z, window, scale_z) target_pos[0] = max(0, min(state['im_w'], target_pos[0])) target_pos[1] = max(0, min(state['im_h'], target_pos[1])) target_sz[0] = max(10, min(state['im_w'], target_sz[0])) target_sz[1] = max(10, min(state['im_h'], target_sz[1])) state['target_pos'] = target_pos state['target_sz'] = target_sz state['best_score'] = best_score return state
def track(self, state, im, online_score=None, gt=None, name=None): p = state['p'] net = state['net'] avg_chans = state['avg_chans'] window = state['window'] target_pos = state['target_pos'] target_sz = state['target_sz'] self.im_ori = im.copy() self.gt = gt if online_score is not None: self.online_score = online_score.squeeze().cpu().data.numpy() else: self.online_score = None # debug if self.debug: if name is not None: temp = name.split('/')[-2] else: name = 'temp.jpg' temp = 'oceanplus' self.name = name self.save_dir = join('debug', temp) if not exists(self.save_dir): os.makedirs(self.save_dir) hc_z = target_sz[1] + p.context_amount * sum(target_sz) wc_z = target_sz[0] + p.context_amount * sum(target_sz) s_z = np.sqrt(wc_z * hc_z) scale_z = p.exemplar_size / s_z d_search = (p.instance_size - p.exemplar_size) / 2 # slightly different from rpn++ pad = d_search / scale_z s_x = s_z + 2 * pad x_crop, self.crop_info = get_subwindow_tracking( im, target_pos, p.instance_size, python2round(s_x), avg_chans) x_crop = x_crop.unsqueeze(0) results = self.update(net, x_crop.cuda(), target_pos, target_sz * scale_z, window, scale_z, p) target_pos, target_sz, cls_score, mask, mask_ori, polygon = results[ 'target_pos'], results['target_sz'], results['cls_score'], results[ 'mask'], results['mask_ori'], results['polygon'] target_pos[0] = max(0, min(state['im_w'], target_pos[0])) target_pos[1] = max(0, min(state['im_h'], target_pos[1])) target_sz[0] = max(10, min(state['im_w'], target_sz[0])) target_sz[1] = max(10, min(state['im_h'], target_sz[1])) state['target_pos'] = target_pos state['target_sz'] = target_sz state['cls_score'] = cls_score state['mask'] = mask state['mask_ori'] = mask_ori state['polygon'] = polygon state['p'] = p state['polygon'] = results['polygon'] return state
def init(self, im, target_pos, target_sz, model, hp=None): state = dict() state['im_h'] = im.shape[0] state['im_w'] = im.shape[1] p = RPNConfig() # single test if not hp and not self.info.epoch_test: prefix = [x for x in ['OTB', 'VOT'] if x in self.info.dataset] cfg = load_yaml('./experiments/test/{0}/{1}.yaml'.format( prefix[0], self.info.arch)) cfg_benchmark = cfg[self.info.dataset] p.update(cfg_benchmark) p.renew() # for vot17 or vot18: from siamrpn released if '2017' in self.info.dataset: if ((target_sz[0] * target_sz[1]) / float(state['im_h'] * state['im_w'])) < 0.004: p.instance_size = 287 p.renew() else: p.instance_size = 271 p.renew() net = model p.anchor = generate_anchor(p.total_stride, p.scales, p.ratios, p.score_size) avg_chans = np.mean(im, axis=(0, 1)) wc_z = target_sz[0] + p.context_amount * sum(target_sz) hc_z = target_sz[1] + p.context_amount * sum(target_sz) s_z = python2round(np.sqrt(wc_z * hc_z)) z_crop = get_subwindow_tracking(im, target_pos, p.exemplar_size, s_z, avg_chans) z = Variable(z_crop.unsqueeze(0)) net.template(z.cuda()) if p.windowing == 'cosine': window = np.outer(np.hanning(p.score_size), np.hanning(p.score_size)) # [17,17] elif p.windowing == 'uniform': window = np.ones((p.score_size, p.score_size)) window = np.expand_dims(window, axis=0) # [1,17,17] window = np.repeat(window, p.anchor_num, axis=0) # [5,17,17] state['p'] = p state['net'] = net state['avg_chans'] = avg_chans state['window'] = window state['target_pos'] = target_pos state['target_sz'] = target_sz return state
def track(self, state, im, updatenet): p = state['p'] net = state['net'] avg_chans = state['avg_chans'] window = state['window'] target_pos = state['target_pos'] target_sz = state['target_sz'] wc_z = target_sz[1] + p.context_amount * sum(target_sz) hc_z = target_sz[0] + p.context_amount * sum(target_sz) s_z = np.sqrt(wc_z * hc_z) scaled_instance = p.s_x * p.scales scaled_target = [[target_sz[0] * p.scales], [target_sz[1] * p.scales]] x_crops = Variable(make_scale_pyramid(im, target_pos, scaled_instance, p.instance_size, avg_chans)) target_pos, new_scale = self.update(net, p.s_x, x_crops.cuda(), target_pos, window, p) # scale damping and saturation p.s_x = max(p.min_s_x, min(p.max_s_x, (1 - p.scale_lr) * p.s_x + p.scale_lr * scaled_instance[new_scale])) target_sz = [(1 - p.scale_lr) * target_sz[0] + p.scale_lr * scaled_target[0][0][new_scale], (1 - p.scale_lr) * target_sz[1] + p.scale_lr * scaled_target[1][0][new_scale]] target_pos[0] = max(0, min(state['im_w'], target_pos[0])) target_pos[1] = max(0, min(state['im_h'], target_pos[1])) target_sz[0] = max(10, min(state['im_w'], target_sz[0])) target_sz[1] = max(10, min(state['im_h'], target_sz[1])) z_crop = Variable(get_subwindow_tracking(im, target_pos, p.exemplar_size, round(s_z), avg_chans).unsqueeze(0)) z_f = net.feature_extractor(z_crop.cuda()) temp = torch.cat((Variable(state['z_0']).cuda(),Variable(state['z_f']).cuda(),z_f),1) init_inp = Variable(state['z_0']).cuda() z_f_ = updatenet(temp, init_inp) net.kernel(z_f_) state['z_f'] = z_f_.cpu().data state['net'] = net state['target_pos'] = target_pos state['target_sz'] = target_sz state['p'] = p return state
def track(self, state, im, online_score=None, gt=None): p = state['p'] net = state['net'] avg_chans = state['avg_chans'] window = state['window'] target_pos = state['target_pos'] target_sz = state['target_sz'] if online_score is not None: self.online_score = online_score.squeeze().cpu().data.numpy() else: self.online_score = None hc_z = target_sz[1] + p.context_amount * sum(target_sz) wc_z = target_sz[0] + p.context_amount * sum(target_sz) s_z = np.sqrt(wc_z * hc_z) scale_z = p.exemplar_size / s_z d_search = (p.instance_size - p.exemplar_size) / 2 # slightly different from rpn++ pad = d_search / scale_z s_x = s_z + 2 * pad x_crop, _ = get_subwindow_tracking(im, target_pos, p.instance_size, python2round(s_x), avg_chans) x_crop = x_crop.unsqueeze(0) target_pos, target_sz, _ = self.update(net, x_crop.cuda(), target_pos, target_sz * scale_z, window, scale_z, p) target_pos[0] = max(0, min(state['im_w'], target_pos[0])) target_pos[1] = max(0, min(state['im_h'], target_pos[1])) target_sz[0] = max(10, min(state['im_w'], target_sz[0])) target_sz[1] = max(10, min(state['im_h'], target_sz[1])) state['target_pos'] = target_pos state['target_sz'] = target_sz state['p'] = p return state
def CGACD_init(im, target_pos, target_sz, net): state = dict() state['im_h'] = im.shape[0] state['im_w'] = im.shape[1] avg_chans = np.mean(im, axis=(0, 1)) wc_z = target_sz[0] + cfg.track.contex_amount * sum(target_sz) hc_z = target_sz[1] + cfg.track.contex_amount * sum(target_sz) s_z = round(np.sqrt(wc_z * hc_z)) # initialize the exemplar z_crop = get_subwindow_tracking(im, target_pos, cfg.track.template_size, s_z, avg_chans) scale_z = cfg.track.template_size / s_z w, h = target_sz[0] * scale_z, target_sz[1] * scale_z cx, cy = cfg.track.template_size // 2, cfg.track.template_size // 2 template_bbox = [cx - w * 0.5, cy - h * 0.5, cx + w * 0.5, cy + h * 0.5] z = torch.from_numpy(np.transpose(z_crop, (2, 0, 1))).float().unsqueeze(0) net.template(z.cuda()) if cfg.track.windowing == 'cosine': window = np.outer(np.hanning(cfg.track.response_size), np.hanning(cfg.track.response_size)) elif cfg.track.windowing == 'uniform': window = np.ones((cfg.track.response_size, cfg.track.response_size)) window = window.flatten() state['net'] = net state['avg_chans'] = avg_chans state['window'] = window state['target_pos'] = target_pos state['target_sz'] = target_sz state['template_bbox'] = template_bbox return state
def init(self, im, target_pos, target_sz, model, hp=None): state = dict() # epoch test p = DAGConfig() state['im_h'] = im.shape[0] state['im_w'] = im.shape[1] if not hp and self.info.epoch_test: prefix = [x for x in ['OTB', 'VOT'] if x in self.info.dataset] if len(prefix) == 0: prefix = [self.info.dataset] absPath = os.path.abspath(os.path.dirname(__file__)) yname = 'DAG.yaml' yamlPath = os.path.join( absPath, '../../experiments/test/{0}/'.format(prefix[0]), yname) cfg = load_yaml(yamlPath) if self.online: temp = self.info.dataset + 'ON' cfg_benchmark = cfg[temp] else: cfg_benchmark = cfg[self.info.dataset] p.update(cfg_benchmark) p.renew() if ((target_sz[0] * target_sz[1]) / float(state['im_h'] * state['im_w'])) < 0.004: p.instance_size = cfg_benchmark['big_sz'] p.renew() else: p.instance_size = cfg_benchmark['small_sz'] p.renew() if hp: p.update(hp) p.renew() if ((target_sz[0] * target_sz[1]) / float(state['im_h'] * state['im_w'])) < 0.004: p.instance_size = hp['big_sz'] p.renew() else: p.instance_size = hp['small_sz'] p.renew() if self.trt: print( '====> TRT version testing: only support 255 input, the hyper-param is random <====' ) p.instance_size = 255 p.renew() self.grids(p) net = model wc_z = target_sz[0] + p.context_amount * sum(target_sz) hc_z = target_sz[1] + p.context_amount * sum(target_sz) s_z = round(np.sqrt(wc_z * hc_z)) avg_chans = np.mean(im, axis=(0, 1)) z_crop, _ = get_subwindow_tracking(im, target_pos, p.exemplar_size, s_z, avg_chans) z = z_crop.unsqueeze(0) net.template(z.cuda()) if p.windowing == 'cosine': window = np.outer(np.hanning(p.score_size), np.hanning(p.score_size)) # [17,17] elif p.windowing == 'uniform': window = np.ones(int(p.score_size), int(p.score_size)) state['p'] = p state['net'] = net state['avg_chans'] = avg_chans state['window'] = window state['target_pos'] = target_pos state['target_sz'] = target_sz return state
def init(self, im, target_pos, target_sz, model, hp=None): state = dict() # epoch test p = FCConfig() # single test if not hp and not self.info.epoch_test: prefix = [x for x in ['OTB', 'VOT'] if x in self.info.dataset] cfg = load_yaml('./experiments/test/{0}/{1}.yaml'.format(prefix[0], self.info.arch)) cfg_benchmark = cfg[self.info.dataset] p.update(cfg_benchmark) p.renew() # param tune if hp: p.update(hp) p.renew() net = model avg_chans = np.mean(im, axis=(0, 1)) wc_z = target_sz[0] + p.context_amount * sum(target_sz) hc_z = target_sz[1] + p.context_amount * sum(target_sz) s_z = round(np.sqrt(wc_z * hc_z)) scale_z = p.exemplar_size / s_z z_crop = get_subwindow_tracking(im, target_pos, p.exemplar_size, s_z, avg_chans) z = Variable(z_crop.unsqueeze(0)) z_f = net.feature_extractor(z.cuda()) net.template(z.cuda()) d_search = (p.instance_size - p.exemplar_size) / 2 pad = d_search / scale_z s_x = s_z + 2 * pad min_s_x = 0.2 * s_x max_s_x = 5 * s_x s_x_serise = {'s_x': s_x, 'min_s_x': min_s_x, 'max_s_x': max_s_x} p.update(s_x_serise) z = Variable(z_crop.unsqueeze(0)) z_f = net.feature_extractor(z.cuda()) net.kernel(z_f) if p.windowing == 'cosine': window = np.outer(np.hanning(int(p.score_size) * int(p.response_up)), np.hanning(int(p.score_size) * int(p.response_up))) elif p.windowing == 'uniform': window = np.ones(int(p.score_size) * int(p.response_up), int(p.score_size) * int(p.response_up)) window /= window.sum() p.scales = p.scale_step ** (range(p.num_scale) - np.ceil(p.num_scale // 2)) state['p'] = p state['net'] = net state['avg_chans'] = avg_chans state['window'] = window state['target_pos'] = target_pos state['target_sz'] = target_sz state['z_0'] = z_f.cpu().data state['z_f'] = z_f.cpu().data state['im_h'] = im.shape[0] state['im_w'] = im.shape[1] return state
def init(self, im, target_pos, target_sz, model, hp=None, online=False, mask=None, debug=False): # in: whether input infrared image state = dict() # epoch test p = AdaConfig() self.debug = debug state['im_h'] = im.shape[0] state['im_w'] = im.shape[1] self.imh = state['im_h'] self.imw = state['im_w'] # single test # if not hp and not self.info.epoch_test: if True: prefix = [x for x in ['OTB', 'VOT'] if x in self.info.dataset] if len(prefix) == 0: prefix = [self.info.dataset] absPath = os.path.abspath(os.path.dirname(__file__)) yname='OceanPlus.yaml' yamlPath = os.path.join(absPath, '../../experiments/test/VOT/', yname) cfg = load_yaml(yamlPath) if self.info.dataset not in list(cfg.keys()): print('[*] unsupported benchmark, use VOT2020 hyper-parameters (not optimal)') cfg_benchmark = cfg['VOT2020'] else: cfg_benchmark = cfg[self.info.dataset] p.update(cfg_benchmark) p.renew() if ((target_sz[0] * target_sz[1]) / float(state['im_h'] * state['im_w'])) < 0.004: p.instance_size = cfg_benchmark['big_sz'] p.renew() else: p.instance_size = cfg_benchmark['small_sz'] p.renew() self.grids(p) # self.grid_to_search_x, self.grid_to_search_y net = model # param tune if hp: p.update(hp) if 'lambda_u' in hp.keys() or 'lambda_s' in hp.keys(): net.update_lambda(hp['lambda_u'], hp['lambda_s']) if 'iter1' in hp.keys() or 'iter2' in hp.keys(): net.update_iter(hp['iter1'], hp['iter2']) print('======= hyper-parameters: pk: {:.3f}, wi: {:.2f}, lr: {:.2f} ======='.format(p.penalty_k, p.window_influence, p.lr)) wc_z = target_sz[0] + p.context_amount * sum(target_sz) hc_z = target_sz[1] + p.context_amount * sum(target_sz) s_z = round(np.sqrt(wc_z * hc_z)) avg_chans = np.mean(im, axis=(0, 1)) z_crop, _ = get_subwindow_tracking(im, target_pos, p.exemplar_size, s_z, avg_chans) mask_crop, _ = get_subwindow_tracking_mask(mask, target_pos, p.exemplar_size, s_z, out_mode=None) mask_crop = (mask_crop > 0.5).astype(np.uint8) mask_crop = torch.from_numpy(mask_crop) # vis zcrop # vis = 0.5 * z_crop.permute(1,2,0) + 255 * mask_crop.unsqueeze(-1).float() # cv2.imwrite('zcrop.jpg', vis.numpy()) z = z_crop.unsqueeze(0) net.template(z.cuda(), mask_crop.unsqueeze(0).cuda()) if p.windowing == 'cosine': window = np.outer(np.hanning(p.score_size), np.hanning(p.score_size)) # [17,17] elif p.windowing == 'uniform': window = np.ones(int(p.score_size), int(p.score_size)) state['p'] = p state['net'] = net state['avg_chans'] = avg_chans state['window'] = window state['target_pos'] = target_pos state['target_sz'] = target_sz self.p = p self.debug_on_crop = False self.debug_on_ori = False self.save_mask = False # save all mask results self.mask_ratio = False self.update_template = True if self.debug_on_ori or self.debug_on_crop: print('Warning: debuging...') print('Warning: turning off debugging mode after this process') self.debug = True return state
def init(self, im, target_pos, target_sz, model, hp=None): state = dict() # epoch test p = FCConfig() # single test if not hp and not self.info.epoch_test: prefix = [x for x in ['OTB', 'VOT'] if x in self.info.dataset] absPath = os.path.abspath(os.path.dirname(__file__)) yname = 'SiamDW.yaml' yamlPath = os.path.join( absPath, '../../experiments/test/{0}/'.format(prefix[0]), yname) cfg = load_yaml(yamlPath) cfg_benchmark = cfg[self.info.dataset] p.update(cfg_benchmark) p.renew() # param tune if hp: p.update(hp) p.renew() print( '======= hyper-parameters: scale_step: {}, scale_penalty: {}, scale_lr: {} =======' .format(p.scale_step, p.scale_penalty, p.scale_lr)) net = model avg_chans = np.mean(im, axis=(0, 1)) wc_z = target_sz[0] + p.context_amount * sum(target_sz) hc_z = target_sz[1] + p.context_amount * sum(target_sz) s_z = round(np.sqrt(wc_z * hc_z)) scale_z = p.exemplar_size / s_z z_crop, _ = get_subwindow_tracking(im, target_pos, p.exemplar_size, s_z, avg_chans) d_search = (p.instance_size - p.exemplar_size) / 2 pad = d_search / scale_z s_x = s_z + 2 * pad min_s_x = 0.2 * s_x max_s_x = 5 * s_x s_x_serise = {'s_x': s_x, 'min_s_x': min_s_x, 'max_s_x': max_s_x} p.update(s_x_serise) z = Variable(z_crop.unsqueeze(0)) net.template(z.cuda()) if p.windowing == 'cosine': window = np.outer( np.hanning(int(p.score_size) * int(p.response_up)), np.hanning(int(p.score_size) * int(p.response_up))) elif p.windowing == 'uniform': window = np.ones( int(p.score_size) * int(p.response_up), int(p.score_size) * int(p.response_up)) window /= window.sum() p.scales = p.scale_step**(range(p.num_scale) - np.ceil(p.num_scale // 2)) state['p'] = p state['net'] = net state['avg_chans'] = avg_chans state['window'] = window state['target_pos'] = target_pos state['target_sz'] = target_sz state['im_h'] = im.shape[0] state['im_w'] = im.shape[1] return state
def init(self, im, target_pos, target_sz, model, hp=None): # in: whether input infrared image state = dict() # epoch test p = OceanConfig() state['im_h'] = im.shape[0] state['im_w'] = im.shape[1] # single test if not hp and not self.info.epoch_test: prefix = [ x for x in ['OTB', 'VOT', 'GOT10K', 'LASOT'] if x in self.info.dataset ] if len(prefix) == 0: prefix = [self.info.dataset] absPath = os.path.abspath(os.path.dirname(__file__)) yname = 'Ocean.yaml' yamlPath = os.path.join( absPath, '../../experiments/test/{0}/'.format(prefix[0]), yname) cfg = load_yaml(yamlPath) if self.online: temp = self.info.dataset + 'ON' cfg_benchmark = cfg[temp] else: cfg_benchmark = cfg[self.info.dataset] p.update(cfg_benchmark) p.renew() if ((target_sz[0] * target_sz[1]) / float(state['im_h'] * state['im_w'])) < 0.004: p.instance_size = cfg_benchmark['big_sz'] p.renew() else: p.instance_size = cfg_benchmark['small_sz'] p.renew() # double check # print('======= hyper-parameters: penalty_k: {}, wi: {}, lr: {}, ratio: {}, instance_sz: {}, score_sz: {} ======='.format(p.penalty_k, p.window_influence, p.lr, p.ratio, p.instance_size, p.score_size)) # param tune if hp: p.update(hp) p.renew() # for small object (from DaSiamRPN released) if ((target_sz[0] * target_sz[1]) / float(state['im_h'] * state['im_w'])) < 0.004: p.instance_size = hp['big_sz'] p.renew() else: p.instance_size = hp['small_sz'] p.renew() if self.trt: print( '====> TRT version testing: only support 255 input, the hyper-param is random <====' ) p.instance_size = 255 p.renew() self.grids(p) # self.grid_to_search_x, self.grid_to_search_y net = model wc_z = target_sz[0] + p.context_amount * sum(target_sz) hc_z = target_sz[1] + p.context_amount * sum(target_sz) s_z = round(np.sqrt(wc_z * hc_z)) avg_chans = np.mean(im, axis=(0, 1)) z_crop, _ = get_subwindow_tracking(im, target_pos, p.exemplar_size, s_z, avg_chans) z = z_crop.unsqueeze(0) net.template(z.cuda()) if p.windowing == 'cosine': window = np.outer(np.hanning(p.score_size), np.hanning(p.score_size)) # [17,17] elif p.windowing == 'uniform': window = np.ones(int(p.score_size), int(p.score_size)) state['p'] = p state['net'] = net state['avg_chans'] = avg_chans state['window'] = window state['target_pos'] = target_pos state['target_sz'] = target_sz return state
def init(self, im, target_pos, target_sz, model, hp=None): self.frame_num = 1 self.temp_max = 0 self.lost_count = 0 state = dict() state['im_h'] = im.shape[0] state['im_w'] = im.shape[1] p = RPNConfig() # single test if not hp and not self.info.epoch_test: prefix = [x for x in ['OTB', 'VOT'] if x in self.info.dataset] cfg_ = load_yaml('../experiments/test/{0}/{1}.yaml'.format(prefix[0], self.info.arch)) cfg_benchmark = cfg_[self.info.dataset] p.update(cfg_benchmark) p.renew() # for vot17 or vot18: from siamrpn released if '2017' in self.info.dataset: if ((target_sz[0] * target_sz[1]) / float(state['im_h'] * state['im_w'])) < 0.004: p.instance_size = 287 p.renew() else: p.instance_size = 271 p.renew() # param tune if hp: p.update(hp) p.renew() # for small object (from DaSiamRPN released) if ((target_sz[0] * target_sz[1]) / float(state['im_h'] * state['im_w'])) < 0.004: p.instance_size = hp['big_sz'] p.renew() else: p.instance_size = hp['small_sz'] p.renew() net = model p.anchor = generate_anchor(p.total_stride, p.scales, p.ratios, p.score_size) avg_chans = np.mean(im, axis=(0, 1)) wc_z = target_sz[0] + p.context_amount * sum(target_sz) hc_z = target_sz[1] + p.context_amount * sum(target_sz) s_z = python2round(np.sqrt(wc_z * hc_z)) z_crop = get_subwindow_tracking(im, target_pos, p.exemplar_size, s_z, avg_chans) z = Variable(z_crop.unsqueeze(0)) net.template(z.cuda()) if p.windowing == 'cosine': window = np.outer(np.hanning(p.score_size), np.hanning(p.score_size)) # [17,17] elif p.windowing == 'uniform': window = np.ones((p.score_size, p.score_size)) window = np.expand_dims(window, axis=0) # [1,17,17] window = np.repeat(window, p.anchor_num, axis=0) # [5,17,17] if cfg.TRACK.USE_CLASSIFIER: # atom = ATOM().cuda().eval() # checkpoint_dict = torch.load("/home/zhuyi/Code/CRPN_511/atom_pretrain.pth") # atom.load_state_dict(checkpoint_dict["net"],strict=False) self.classifier = BaseClassifier(net) self.center_pos = np.array(target_pos) self.size = np.array(target_sz) bbox = [target_pos[0]-(target_sz[0]-1)/2,target_pos[1]-(target_sz[1]-1)/2,target_sz[0],target_sz[1]] #bbox = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h] if cfg.TRACK.TEMPLATE_UPDATE: with torch.no_grad(): net.template_short_term(self.z_crop) s_xx = s_z * (cfg.TRACK.INSTANCE_SIZE * 2 / cfg.TRACK.EXEMPLAR_SIZE) x_crop = get_subwindow_tracking(im, self.center_pos, cfg.TRACK.INSTANCE_SIZE * 2, round(s_xx), avg_chans) x_crop =x_crop.unsqueeze(0) self.classifier.initialize(x_crop.type(torch.FloatTensor), bbox) state['p'] = p state['net'] = net state['avg_chans'] = avg_chans state['window'] = window state['target_pos'] = target_pos state['target_sz'] = target_sz return state