def siamese_init(im, target_pos, target_sz, model, hp=None, device='cpu'): """ 初始化跟踪器,根据目标的信息构建state 字典 :param im: 当前处理的图像 :param target_pos: 目标的位置 :param target_sz: 目标的尺寸 :param model: 训练好的网络模型 :param hp: 超参数 :param device: 硬件信息 :return: 跟踪器的state字典数据 """ # 初始化state字典 state = dict() # 设置图像的宽高 state['im_h'] = im.shape[0] state['im_w'] = im.shape[1] # 配置跟踪器的相关参数 p = TrackerConfig() # 对参数进行更新 p.update(hp, model.anchors) # 更新参数 p.renew() # 获取网络模型 net = model # 根据网络参数对跟踪器的参数进行更新,主要是anchors p.scales = model.anchors['scales'] p.ratios = model.anchors['ratios'] p.anchor_num = model.anchor_num # 生成锚点 p.anchor = generate_anchor(model.anchors, p.score_size) # 图像的平均值 avg_chans = np.mean(im, axis=(0, 1)) # 根据设置的上下文比例,输入z 的宽高及尺寸 wc_z = target_sz[0] + p.context_amount * sum(target_sz) hc_z = target_sz[1] + p.context_amount * sum(target_sz) s_z = round(np.sqrt(wc_z * hc_z)) # 初始化跟踪目标 initialize the exemplar z_crop = get_subwindow_tracking(im, target_pos, p.exemplar_size, s_z, avg_chans) # 将其转换为Variable可在pythorch中进行反向传播 z = Variable(z_crop.unsqueeze(0)) # 专门处理模板 net.template(z.to(device)) # 设置使用的惩罚窗口 if p.windowing == 'cosine': # 利用hanning窗的外积生成cosine窗口 window = np.outer(np.hanning(p.score_size), np.hanning(p.score_size)) elif p.windowing == 'uniform': window = np.ones((p.score_size, p.score_size)) # 每一个anchor都有一个对应的惩罚窗口 window = np.tile(window.flatten(), p.anchor_num) # 将信息更新到state字典中 state['p'] = p state['net'] = net state['avg_chans'] = avg_chans state['window'] = window state['target_pos'] = target_pos state['target_sz'] = target_sz return state
def siamese_init(im, target_pos, target_sz, model, hp=None, device='cpu'): # print("------siamese_init-------") state = dict() state['im_h'] = im.shape[0] state['im_w'] = im.shape[1] # print("im.shape[0] ", im.shape[0]) p = TrackerConfig() p.update(hp, model.anchors) p.renew() net = model p.scales = model.anchors['scales'] p.ratios = model.anchors['ratios'] p.anchor_num = model.anchor_num p.anchor = generate_anchor(model.anchors, p.score_size) avg_chans = np.mean(im, axis=(0, 1)) wc_z = target_sz[0] + p.context_amount * sum(target_sz) hc_z = target_sz[1] + p.context_amount * sum(target_sz) s_z = round(np.sqrt(wc_z * hc_z)) # initialize the exemplar z_crop = get_subwindow_tracking(im, target_pos, p.exemplar_size, s_z, avg_chans) # print("z size (patch) ", z_crop.size()) z = Variable(z_crop.unsqueeze(0)) # La xarxa es guarda les features resultants (self.zf) d'haver passat el patch z per la siamesa net.template(z.to(device)) if p.windowing == 'cosine': window = np.outer(np.hanning(p.score_size), np.hanning(p.score_size)) elif p.windowing == 'uniform': window = np.ones((p.score_size, p.score_size)) window = np.tile(window.flatten(), p.anchor_num) state['p'] = p state['net'] = net state['avg_chans'] = avg_chans state['window'] = window state['target_pos'] = target_pos state['target_sz'] = target_sz # print("window = ", state['window']) return state
def siamese_init(im, target_pos, target_sz, model, hp=None, device='cpu'): state = dict() state['im_h'] = im.shape[0] state['im_w'] = im.shape[1] p = TrackerConfig() p.update(hp, model.anchors) p.renew() net = model p.scales = model.anchors['scales'] p.ratios = model.anchors['ratios'] p.anchor_num = model.anchor_num p.anchor = generate_anchor(model.anchors, p.score_size) avg_chans = np.mean(im, axis=(0, 1)) wc_z = target_sz[0] + p.context_amount * sum(target_sz) hc_z = target_sz[1] + p.context_amount * sum(target_sz) s_z = round(np.sqrt(wc_z * hc_z)) # initialize the exemplar z_crop = get_subwindow_tracking(im, target_pos, p.exemplar_size, s_z, avg_chans) z = Variable(z_crop.unsqueeze(0)) net.template(z.to(device)) if p.windowing == 'cosine': window = np.outer(np.hanning(p.score_size), np.hanning(p.score_size)) # 外积的结果是矩阵,内积的结果是一个数 elif p.windowing == 'uniform': window = np.ones((p.score_size, p.score_size)) window = np.tile(window.flatten(), p.anchor_num) state['p'] = p state['net'] = net state['avg_chans'] = avg_chans state['window'] = window state['target_pos'] = target_pos state['target_sz'] = target_sz return state
def siamese_init(im, model, hp=None, device='cpu', targets=None, detector=None): custom_objects = detector.CustomObjects(car=True, person=True) state = dict() state['im_h'] = im.shape[0] state['im_w'] = im.shape[1] p = TrackerConfig() p.update(hp, model.anchors) p.renew() net = model p.scales = model.anchors['scales'] p.ratios = model.anchors['ratios'] p.anchor_num = model.anchor_num p.anchor = generate_anchor(model.anchors, p.score_size) avg_chans = np.mean(im, axis=(0, 1)) # s_z = [ round(np.sqrt(target["target_sz"][1] + 0.123 * sum(target["target_sz"])*target["target_sz"][0] + 0.123 * sum(target["target_sz"]) )) for target in targets ] # s_z = np.array(s_z) # print(targe) # targets.append(targe) # print(targets) BLUE = [255, 255, 255] for i, target in enumerate(targets): wc_z = target["target_sz"][0] + p.context_amount * sum( target["target_sz"]) hc_z = target["target_sz"][1] + p.context_amount * sum( target["target_sz"]) target["s_z"] = round(np.sqrt(wc_z * hc_z)) print("out") # initialize the exemplar targets = get_subwindow_tracking( im, p.exemplar_size, avg_chans, targets=targets, ) # z_f = [ net.template(Variable(target["im_to_torch"].unsqueeze(0)).to(device)) for target in targets ] for i, target in enumerate(targets): # detections = detector.detectCustomObjectsFromImage(custom_objects=custom_objects, input_image=target["im_patch"],input_type="array", output_image_path=os.path.join("image {} custom.jpg".format(i)),output_type="file", minimum_percentage_probability=30) # detections = detector.detectCustomObjectsFromImage(custom_objects=custom_objects, input_image=target["img"],input_type="array", output_image_path=os.path.join(execution_path , "images.jpg"),output_type="file", minimum_percentage_probability=30) z = Variable(target["im_to_torch"].unsqueeze(0)) target["zf"] = net.template(z.to(device)) del target["im_to_torch"] # for eachObject in detections: # print(eachObject["name"] , " : ", eachObject["percentage_probability"], " : ", eachObject["box_points"] ) # target["detection"] = eachObject["box_points"] # print("--------------------------------") if p.windowing == 'cosine': window = np.outer(np.hanning(p.score_size), np.hanning(p.score_size)) elif p.windowing == 'uniform': window = np.ones((p.score_size, p.score_size)) window = np.tile(window.flatten(), p.anchor_num) state['p'] = p state['net'] = net state['avg_chans'] = avg_chans state['window'] = window state["targets"] = targets state["detector"] = detector # state["s_z"] = s_z # state["z_f"] = z_f return state
class SingleTracker(object): def __init__(self, config_path, model_path): args = TrackArgs() args.config = config_path args.resume = model_path cfg = load_config(args) if args.arch == 'Custom': from custom import Custom self.model = Custom(anchors=cfg['anchors']) else: parser.error('invalid architecture: {}'.format(args.arch)) if args.resume: assert isfile(args.resume), '{} is not a valid file'.format(args.resume) self.model = load_pretrain(self.model, args.resume) self.model.eval() self.device = torch.device('cuda' if (torch.cuda.is_available() and not args.cpu) else 'cpu') self.model = self.model.to(self.device) ################# Dangerous self.p = TrackerConfig() self.p.update(cfg['hp'] if 'hp' in cfg.keys() else None, self.model.anchors) self.p.renew() self.p.scales = self.model.anchors['scales'] self.p.ratios = self.model.anchors['ratios'] self.p.anchor_num = self.model.anchor_num self.p.anchor = generate_anchor(self.model.anchors, self.p.score_size) if self.p.windowing == 'cosine': self.window = np.outer(np.hanning(self.p.score_size), np.hanning(self.p.score_size)) elif self.p.windowing == 'uniform': self.window = np.ones((self.p.score_size, self.p.score_size)) self.window = np.tile(self.window.flatten(), self.p.anchor_num) ################ def get_examplar_feature(self, img, target_pos, target_sz): avg_chans = np.mean(img, axis=(0, 1)) wc_z = target_sz[0] + self.p.context_amount * sum(target_sz) hc_z = target_sz[1] + self.p.context_amount * sum(target_sz) s_z = round(np.sqrt(wc_z * hc_z)) # initialize the exemplar examplar = get_subwindow_tracking(img, target_pos, self.p.exemplar_size, s_z, avg_chans) z = Variable(examplar.unsqueeze(0)) return self.model.template(z.to(self.device)) def siamese_track(self, img, target_pos, target_sz, examplar_feature, debug=False, mask_enable=True, refine_enable=True): avg_chans = np.mean(img, axis=(0, 1)) im_h = img.shape[0] im_w = img.shape[1] wc_x = target_sz[0] + self.p.context_amount * sum(target_sz) hc_x = target_sz[1] + self.p.context_amount * sum(target_sz) s_x = np.sqrt(wc_x * hc_x) ''' scale_x = self.p.exemplar_size / s_x d_search = (self.p.instance_size - self.p.exemplar_size) / 2 pad = d_search / scale_x s_x = s_x + 2 * pad crop_box = [target_pos[0] - round(s_x) / 2, target_pos[1] - round(s_x) / 2, round(s_x), round(s_x)] ''' # myy # 上面注释的部分, 原作者写的代码可以简化为下面三句 scale_x = self.p.exemplar_size / s_x s_x = self.p.instance_size / self.p.exemplar_size * s_x crop_box = [target_pos[0] - round(s_x) / 2, target_pos[1] - round(s_x) / 2, round(s_x), round(s_x)] # extract scaled crops for search region x at previous target position x_crop = Variable(get_subwindow_tracking(img, target_pos, self.p.instance_size, round(s_x), avg_chans).unsqueeze(0)) if mask_enable: score, delta, mask = self.model.track_mask(examplar_feature, x_crop.to(self.device)) else: score, delta = self.model.track(examplar_feature, x_crop.to(self.device)) delta = delta.permute(1, 2, 3, 0).contiguous().view(4, -1).data.cpu().numpy() score = F.softmax(score.permute(1, 2, 3, 0).contiguous().view(2, -1).permute(1, 0), dim=1).data[:, 1].cpu().numpy() delta[0, :] = delta[0, :] * self.p.anchor[:, 2] + self.p.anchor[:, 0] delta[1, :] = delta[1, :] * self.p.anchor[:, 3] + self.p.anchor[:, 1] delta[2, :] = np.exp(delta[2, :]) * self.p.anchor[:, 2] delta[3, :] = np.exp(delta[3, :]) * self.p.anchor[:, 3] def change(r): return np.maximum(r, 1. / r) def sz(w, h): pad = (w + h) * 0.5 sz2 = (w + pad) * (h + pad) return np.sqrt(sz2) def sz_wh(wh): pad = (wh[0] + wh[1]) * 0.5 sz2 = (wh[0] + pad) * (wh[1] + pad) return np.sqrt(sz2) # size penalty target_sz_in_crop = target_sz*scale_x s_c = change(sz(delta[2, :], delta[3, :]) / (sz_wh(target_sz_in_crop))) # scale penalty r_c = change((target_sz_in_crop[0] / target_sz_in_crop[1]) / (delta[2, :] / delta[3, :])) # ratio penalty penalty = np.exp(-(r_c * s_c - 1) * self.p.penalty_k) pscore = penalty * score # cos window (motion model) pscore = pscore * (1 - self.p.window_influence) + self.window * self.p.window_influence best_pscore_id = np.argmax(pscore) pred_in_crop = delta[:, best_pscore_id] / scale_x lr = penalty[best_pscore_id] * score[best_pscore_id] * self.p.lr # lr for OTB res_x = pred_in_crop[0] + target_pos[0] res_y = pred_in_crop[1] + target_pos[1] res_w = target_sz[0] * (1 - lr) + pred_in_crop[2] * lr res_h = target_sz[1] * (1 - lr) + pred_in_crop[3] * lr target_pos = np.array([res_x, res_y]) target_sz = np.array([res_w, res_h]) # for Mask Branch if mask_enable: best_pscore_id_mask = np.unravel_index(best_pscore_id, (5, self.p.score_size, self.p.score_size)) delta_x, delta_y = best_pscore_id_mask[2], best_pscore_id_mask[1] if refine_enable: mask = self.model.track_refine((delta_y, delta_x)).to(self.device).sigmoid().squeeze().view( self.p.out_size, self.p.out_size).cpu().data.numpy() else: mask = mask[0, :, delta_y, delta_x].sigmoid(). \ squeeze().view(self.p.out_size, self.p.out_size).cpu().data.numpy() def crop_back(image, bbox, out_sz, padding=-1): a = (out_sz[0] - 1) / bbox[2] b = (out_sz[1] - 1) / bbox[3] c = -a * bbox[0] d = -b * bbox[1] mapping = np.array([[a, 0, c], [0, b, d]]).astype(np.float) crop = cv2.warpAffine(image, mapping, (out_sz[0], out_sz[1]), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, borderValue=padding) return crop s = crop_box[2] / self.p.instance_size sub_box = [crop_box[0] + (delta_x - self.p.base_size / 2) * self.p.total_stride * s, crop_box[1] + (delta_y - self.p.base_size / 2) * self.p.total_stride * s, s * self.p.exemplar_size, s * self.p.exemplar_size] s = self.p.out_size / sub_box[2] back_box = [-sub_box[0] * s, -sub_box[1] * s, im_w * s, im_h * s] mask_in_img = crop_back(mask, back_box, (im_w, im_h)) target_mask = (mask_in_img > self.p.seg_thr).astype(np.uint8) if cv2.__version__[-5] == '4': contours, _ = cv2.findContours(target_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) else: _, contours, _ = cv2.findContours(target_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) cnt_area = [cv2.contourArea(cnt) for cnt in contours] if len(contours) != 0 and np.max(cnt_area) > 100: contour = contours[np.argmax(cnt_area)] # use max area polygon polygon = contour.reshape(-1, 2) # pbox = cv2.boundingRect(polygon) # Min Max Rectangle prbox = cv2.boxPoints(cv2.minAreaRect(polygon)) # Rotated Rectangle # box_in_img = pbox rbox_in_img = prbox else: # empty mask location = cxy_wh_2_rect(target_pos, target_sz) rbox_in_img = np.array([[location[0], location[1]], [location[0] + location[2], location[1]], [location[0] + location[2], location[1] + location[3]], [location[0], location[1] + location[3]]]) target_pos[0] = max(0, min(im_w, target_pos[0])) target_pos[1] = max(0, min(im_h, target_pos[1])) target_sz[0] = max(10, min(im_w, target_sz[0])) target_sz[1] = max(10, min(im_h, target_sz[1])) score = score[best_pscore_id] mask = mask_in_img if mask_enable else [] return target_pos, target_sz, score, mask
def siamese_init(im, search_shape, target_pos, target_sz, model, hp=None, device='cpu'): """ generate anchors, inference the template image, set up window :param im: whole image :param target_pos: target position that are selected :param target_sz: target size that are selected :param model: SiamMask model :param hp: hyper parameters :param device: :return: """ state = dict() state['im_h'] = search_shape[0] state['im_w'] = search_shape[1] p = TrackerConfig() p.update(hp, model.anchors) p.renew() net = model p.scales = model.anchors['scales'] p.ratios = model.anchors['ratios'] p.anchor_num = model.anchor_num p.anchor = generate_anchor( model.anchors, p.score_size) # anchor size: (25*25*5, 4) --> (3125, 4) avg_chans = np.mean(im, axis=(0, 1)) # wc_z = target_sz[0] + p.context_amount * sum(target_sz) # hc_z = target_sz[1] + p.context_amount * sum(target_sz) # s_z = round(np.sqrt(wc_z * hc_z)) # crop size = sqrt((w+(w+h)/2)*(h+(w+h)/2)) ## initialize the exemplar #im_patch = get_subwindow_tracking(im, target_pos, p.exemplar_size, s_z, avg_chans, out_mode="numpy") im_patch = im im_patch = cv2.resize(im_patch, (p.exemplar_size, p.exemplar_size)) cv2.imshow('crop_template', im_patch) cv2.waitKey(0) z_crop = im_to_torch(im_patch) z = Variable(z_crop.unsqueeze(0)) net.template(z.to(device)) if p.windowing == 'cosine': window = np.outer(np.hanning(p.score_size), np.hanning(p.score_size)) elif p.windowing == 'uniform': window = np.ones((p.score_size, p.score_size)) window = np.tile(window.flatten(), p.anchor_num) state['p'] = p state['net'] = net state['avg_chans'] = avg_chans state['window'] = window state['target_pos'] = target_pos state['target_sz'] = target_sz return state
def siamese_init(im, target_pos, target_sz, model, hp=None, device='cpu'): #target_pos, target_sz输入的就是由gt轴对称得来的 state = dict() state['im_h'] = im.shape[0] state['im_w'] = im.shape[1] p = TrackerConfig() #配置参数 p.update(hp, model.anchors) #用hp,model.anchors更新p的参数,相当于用config_vot.json更新p p.renew() # p.score_size=25 net = model p.scales = model.anchors['scales'] #Custom的父类SiamMask里的属性 p.ratios = model.anchors['ratios'] p.anchor_num = model.anchor_num #vot数据集上是5 p.anchor = generate_anchor( model.anchors, p.score_size ) #generate_anchor 生成锚点。p.anchor.shape = (p.anchor_num*p.score_size*p.score_size,4) avg_chans = np.mean(im, axis=(0, 1)) #此处im单张图片,对每个颜色通道都求均值(3,)(B,G,R) #图像预处理,按比例外扩目标框,从而获得一定的 context 信息。p.context_amount = 0.5 wc_z = target_sz[0] + p.context_amount * sum( target_sz) #wc_z = w + p.context_amuont * (w+h) hc_z = target_sz[1] + p.context_amount * sum( target_sz) #hc_z = h + p.context_amuont * (w+h) #需要将框定的框做一个大约2倍放大,以物体为中心, s_z为宽高,截取一个正方体的物体出来 s_z = round(np.sqrt(wc_z * hc_z)) #round四舍五入取整,round(2.5) = 2,round(2.51)=3 # initialize the exemplar z_crop = get_subwindow_tracking( im, target_pos, p.exemplar_size, s_z, avg_chans) #tensor<(3, 127, 127), float32, cpu> #TrackerConfig中的定义是 input z size,127 #z_crop的维度是(127*127*3) z = Variable( z_crop.unsqueeze(0) ) #pytorch中的命令,扩充数据维度,变成神经网络的参数tensor<(1, 3, 127, 127), float32, cpu> net.template(z.to(device)) #将z送到cuda上面提取特征,即得到resnet50之后的结果 if p.windowing == 'cosine': #默认 window = np.outer( np.hanning(p.score_size), np.hanning(p.score_size) ) #求外积 ndarray(p.score_size,p.score_size)即<(25, 25), float64> elif p.windowing == 'uniform': window = np.ones((p.score_size, p.score_size)) window = np.tile(window.flatten(), p.anchor_num) #对window.flatten()在X轴进行重复p.anchor_num次 #ndarray<(3125,), float64>,p.anchor_num=5 state['p'] = p state['net'] = net state['avg_chans'] = avg_chans state['window'] = window state['target_pos'] = target_pos #还是传进来的数据 state['target_sz'] = target_sz #还是传进来的数据 return state