def SiamRPN_track(state, im): p = state['p'] net = state['net'] avg_chans = state['avg_chans'] window = state['window'] target_pos = state['target_pos'] target_sz = state['target_sz'] wc_z = target_sz[1] + p.context_amount * sum(target_sz) hc_z = target_sz[0] + p.context_amount * sum(target_sz) s_z = np.sqrt(wc_z * hc_z) scale_z = p.exemplar_size / s_z d_search = (p.instance_size - p.exemplar_size) / 2 pad = d_search / scale_z s_x = s_z + 2 * pad # extract scaled crops for search region x at previous target position x_crop = Variable( get_subwindow_tracking(im, target_pos, p.instance_size, round(s_x), avg_chans).unsqueeze(0)) target_pos, target_sz, score = tracker_eval(net, x_crop.cpu(), target_pos, target_sz * scale_z, window, scale_z, p) target_pos[0] = max(0, min(state['im_w'], target_pos[0])) target_pos[1] = max(0, min(state['im_h'], target_pos[1])) target_sz[0] = max(10, min(state['im_w'], target_sz[0])) target_sz[1] = max(10, min(state['im_h'], target_sz[1])) state['target_pos'] = target_pos state['target_sz'] = target_sz state['score'] = score return state
def SiamRPN_track(state, im): # 接收网络参数 p = state['p'] net = state['net'] avg_chans = state['avg_chans'] window = state['window'] # 主要接收上一帧目标跟踪的位置以及尺寸 target_pos = state['target_pos'] target_sz = state['target_sz'] # 更新搜索区域,决定本帧的搜索区域(根据上一帧的检测结果来设置本帧搜索区域) wc_z = target_sz[1] + p.context_amount * sum(target_sz) hc_z = target_sz[0] + p.context_amount * sum(target_sz) s_z = np.sqrt(wc_z * hc_z) # 获取尺度变化率 scale_z = p.exemplar_size / s_z # 调整搜索区域 d_search = (p.instance_size - p.exemplar_size) / 2 pad = d_search / scale_z s_x = s_z + 2 * pad # extract scaled crops for search region x at previous target position x_crop = Variable( get_subwindow_tracking(im, target_pos, p.instance_size, round(s_x), avg_chans).unsqueeze(0)) # 获得本帧预测结果,target_pos-目标位置 target_sz-目标尺度 score-置信分数 # Ps: target_sz * scale_z表示本帧的搜索区域大小 if torch.cuda.is_available(): target_pos, target_sz, score = tracker_eval(net, x_crop.cuda(), target_pos, target_sz * scale_z, window, scale_z, p) else: target_pos, target_sz, score = tracker_eval(net, x_crop, target_pos, target_sz * scale_z, window, scale_z, p) target_pos[0] = max(0, min(state['im_w'], target_pos[0])) target_pos[1] = max(0, min(state['im_h'], target_pos[1])) target_sz[0] = max(10, min(state['im_w'], target_sz[0])) target_sz[1] = max(10, min(state['im_h'], target_sz[1])) state['target_pos'] = target_pos state['target_sz'] = target_sz state['score'] = score return state
def SiamRPN_init(im, target_pos, target_sz, net): state = dict() p = TrackerConfig() state['im_h'] = im.shape[0] state['im_w'] = im.shape[1] state['ctr'] = 1 if ((target_sz[0] * target_sz[1]) / float(state['im_h'] * state['im_w'])) < 0.004: p.instance_size = 287 # small object big search region else: p.instance_size = 271 p.score_size = int((p.instance_size - p.exemplar_size) / p.total_stride + 1) p.anchor = generate_anchor(p.total_stride, p.scales, p.ratios, p.score_size) avg_chans = np.mean(im, axis=(0, 1)) wc_z = target_sz[0] + p.context_amount * sum(target_sz) hc_z = target_sz[1] + p.context_amount * sum(target_sz) s_z = round(np.sqrt(wc_z * hc_z)) # initialize the exemplar z_crop = get_subwindow_tracking(im, target_pos, p.exemplar_size, s_z, avg_chans) z = Variable(z_crop.unsqueeze(0)) net.temple(z.cuda()) if p.windowing == 'cosine': window = np.outer(np.hanning(p.score_size), np.hanning(p.score_size)) elif p.windowing == 'uniform': window = np.ones((p.score_size, p.score_size)) window = np.tile(window.flatten(), p.anchor_num) state['p'] = p state['net'] = net state['avg_chans'] = avg_chans state['window'] = window state['target_pos'] = target_pos state['target_sz'] = target_sz return state
def SiamRPN_init(im, target_pos, target_sz, net): # 设置一个空的字典 state = dict() # 设置跟踪器的相关参数 p = TrackerConfig() # 将网络的参数加载至跟踪模型中 p.update(net.cfg) # 将图像的尺寸加载至state中 state['im_h'] = im.shape[0] state['im_w'] = im.shape[1] if p.adaptive: # 初始化设置为True if ((target_sz[0] * target_sz[1]) / float(state['im_h'] * state['im_w'])) < 0.004: p.instance_size = 287 # small object big search region else: p.instance_size = 271 # 用于设置instance_size,为score_size计算做准备 # 与TrackerConfig类中计算方式一致 p.score_size = (p.instance_size - p.exemplar_size) / p.total_stride + 1 # 生成候选框尺寸锚点 p.anchor = generate_anchor(p.total_stride, p.scales, p.ratios, int(p.score_size)) # 计算图像均值 avg_chans = np.mean(im, axis=(0, 1)) # context_amount参数已经初始化,默认0.5, wc_z = target_sz[0] + p.context_amount * sum( target_sz) # target_sz是模板的横纵尺寸,通过sum函数加在一起 hc_z = target_sz[1] + p.context_amount * sum(target_sz) # round取整数 s_z = round(np.sqrt(wc_z * hc_z)) # initialize the exemplar 初始化目标模板 z_crop = get_subwindow_tracking(im, target_pos, p.exemplar_size, s_z, avg_chans) # 数据降维 z = Variable(z_crop.unsqueeze(0)) # 将网络加载至GPU上运行 if torch.cuda.is_available(): net.temple(z.cuda()) else: net.temple(z) if p.windowing == 'cosine': window = np.outer(np.hanning(p.score_size), np.hanning(p.score_size)) elif p.windowing == 'uniform': window = np.ones((p.score_size, p.score_size)) window = np.tile(window.flatten(), p.anchor_num) # 将所有的参数写入字典中,进行保存 state['p'] = p state['net'] = net state['avg_chans'] = avg_chans state['window'] = window state['target_pos'] = target_pos state['target_sz'] = target_sz # 返回网络的初始化设置 以字典形式返回 return state