def __call__(self, gt_bbox, neg=False): # corner x1,y1,x2,y2 anchor_num = self.anchor_generator.anchor_num gt_cls = -1 * np.ones( (anchor_num, self.out_size, self.out_size), dtype=np.int64) gt_delta = np.zeros((4, anchor_num, self.out_size, self.out_size), dtype=np.float32) delta_weight = np.zeros((anchor_num, self.out_size, self.out_size), dtype=np.float32) gt_cx, gt_cy, gt_w, gt_h = corner2center(gt_bbox) if neg: cx = self.out_size // 2 cy = self.out_size // 2 cx += int( np.ceil((gt_cx - cfg.TRAIN.SEARCH_SIZE // 2) / cfg.ANCHOR.STRIDE + 0.5)) cy += int( np.ceil((gt_cy - cfg.TRAIN.SEARCH_SIZE // 2) / cfg.ANCHOR.STRIDE + 0.5)) l = max(0, cx - 3) r = min(self.out_size, cx + 4) u = max(0, cy - 3) d = min(self.out_size, cy + 4) gt_cls[:, u:d, l:r] = 0 neg_idx = np.where(gt_cls == 0) neg_idx = np.vstack(neg_idx).transpose() if (len(neg_idx) > cfg.TRAIN.NEG_NUM): keep_num = cfg.TRAIN.NEG_NUM np.random.shuffle(neg_idx) neg_idx = neg_idx[:keep_num, :] gt_cls[:] = -1 gt_cls[neg_idx[:, 0], neg_idx[:, 1], neg_idx[:, 2]] = 0 return gt_cls, gt_delta, delta_weight # NOTE: the shape of all_anchors and gt_bbox are different, need broadcast. iou = calc_iou(self.all_anchors, gt_bbox) pos_idx = np.where(iou > cfg.TRAIN.THRESH_HIGH) neg_idx = np.where(iou < cfg.TRAIN.THRESH_LOW) pos_idx = np.vstack(pos_idx).transpose() neg_idx = np.vstack(neg_idx).transpose() pos_num = len(pos_idx) if (pos_num > cfg.TRAIN.POS_NUM): keep_num = cfg.TRAIN.POS_NUM np.random.shuffle(pos_idx) pos_idx = pos_idx[:keep_num, :] gt_cls[pos_idx[:, 0], pos_idx[:, 1], pos_idx[:, 2]] = 1 delta_weight[pos_idx[:, 0], pos_idx[:, 1], pos_idx[:, 2]] = 1 / (pos_num + 1e-6) neg_num = cfg.TRAIN.TOTAL_NUM - cfg.TRAIN.POS_NUM if (len(neg_idx) > neg_num): keep_num = neg_num np.random.shuffle(neg_idx) neg_idx = neg_idx[:keep_num, :] gt_cls[neg_idx[:, 0], neg_idx[:, 1], neg_idx[:, 2]] = 0 gt_delta = bbox2delta(self.all_anchors, gt_bbox) return gt_cls, gt_delta, delta_weight
def track(self, img): bbox_size = self.bbox_size size_z = self._size_z(bbox_size) scale_z = cfg.TRACK.EXAMPLAR_SIZE / size_z size_x = self._size_x(bbox_size) search = self.get_subwindow(img, self.bbox_pos, cfg.TRACK.INSTANCE_SIZE, size_x, self.channel_average) # show_img(search) new_search = torch.from_numpy(search[np.newaxis, :].astype( np.float32)).permute(0, 3, 1, 2).cuda() cls, loc = self.model.track(new_search) score = self._convert_score(cls) loc = loc.reshape(4, self.anchor_generator.anchor_num, loc.size()[2], loc.size()[3]) pred_bbox = delta2bbox(self.all_anchor, loc) pred_bbox = pred_bbox.transpose((1, 2, 3, 0)).reshape( (-1, 4)) # x1,y1,x2,y2 pred_bbox = corner2center(pred_bbox) # cx,cy,w,h def change(r): return np.maximum(r, 1 / r) def s_z(w, h): w_z = w + 0.5 * (w + h) h_z = h + 0.5 * (w + h) size_z = np.sqrt(w_z * h_z) return size_z rc = change((bbox_size[0] / bbox_size[1]) / (pred_bbox[:, 2] / pred_bbox[:, 3])) sc = change( s_z(self.bbox_size[0] * scale_z, self.bbox_size[1] * scale_z) / s_z(pred_bbox[:, 2], pred_bbox[:, 3])) penalty = np.exp(-(rc * sc - 1) * cfg.TRACK.PENALTY_K) pscore = penalty * score pscore = pscore * (1 - cfg.TRACK.WINDOW_INFLUENCE) + \ self.window * cfg.TRACK.WINDOW_INFLUENCE best_idx = np.argmax(pscore) best_bbox = pred_bbox[best_idx, :] best_bbox[0] -= cfg.TRACK.INSTANCE_SIZE // 2 best_bbox[1] -= cfg.TRACK.INSTANCE_SIZE // 2 best_bbox = best_bbox / scale_z cx = best_bbox[0] + self.bbox_pos[0] cy = best_bbox[1] + self.bbox_pos[1] lr = penalty[best_idx] * score[best_idx] * cfg.TRACK.LR w = self.bbox_size[0] * (1 - lr) + lr * best_bbox[2] h = self.bbox_size[1] * (1 - lr) + lr * best_bbox[3] pred_bbox = self._clip_bbox(cx, cy, w, h, img.shape[1], img.shape[0]) # update self.bbox_pos = pred_bbox[0:2] self.bbox_size = pred_bbox[2:4] return {'bbox': pred_bbox, 'score': score[best_idx]}
def _shift_scale_aug(self, image, bbox, crop_bbox, size): im_h, im_w = image.shape[:2] # adjust crop bounding box crop_bbox_center = corner2center(crop_bbox) if self.scale: scale_x = (1.0 + Augmentation.random() * self.scale) scale_y = (1.0 + Augmentation.random() * self.scale) h, w = crop_bbox_center.h, crop_bbox_center.w scale_x = min(scale_x, float(im_w) / w) scale_y = min(scale_y, float(im_h) / h) crop_bbox_center = Center(crop_bbox_center.x, crop_bbox_center.y, crop_bbox_center.w * scale_x, crop_bbox_center.h * scale_y) crop_bbox = center2corner(crop_bbox_center) if self.shift: sx = Augmentation.random() * self.shift sy = Augmentation.random() * self.shift x1, y1, x2, y2 = crop_bbox sx = max(-x1, min(im_w - 1 - x2, sx)) sy = max(-y1, min(im_h - 1 - y2, sy)) crop_bbox = Corner(x1 + sx, y1 + sy, x2 + sx, y2 + sy) # adjust target bounding box x1, y1 = crop_bbox.x1, crop_bbox.y1 bbox = Corner(bbox.x1 - x1, bbox.y1 - y1, bbox.x2 - x1, bbox.y2 - y1) if self.scale: bbox = Corner(bbox.x1 / scale_x, bbox.y1 / scale_y, bbox.x2 / scale_x, bbox.y2 / scale_y) image = self._crop_roi(image, crop_bbox, size) return image, bbox
def generate_all_anchors(self, im_c, size): """ im_c: image center size: image size """ if self.image_center == im_c and self.size == size: return False self.image_center = im_c self.size = size a0x = im_c - size // 2 * self.stride ori = np.array([a0x] * 4, dtype=np.float32) zero_anchors = self.anchors + ori x1 = zero_anchors[:, 0] y1 = zero_anchors[:, 1] x2 = zero_anchors[:, 2] y2 = zero_anchors[:, 3] x1, y1, x2, y2 = map(lambda x: x.reshape(self.anchor_num, 1, 1), [x1, y1, x2, y2]) cx, cy, w, h = corner2center([x1, y1, x2, y2]) disp_x = np.arange(0, size).reshape(1, 1, -1) * self.stride disp_y = np.arange(0, size).reshape(1, -1, 1) * self.stride cx = cx + disp_x cy = cy + disp_y # broadcast zero = np.zeros((self.anchor_num, size, size), dtype=np.float32) cx, cy, w, h = map(lambda x: x + zero, [cx, cy, w, h]) x1, y1, x2, y2 = center2corner([cx, cy, w, h]) self.all_anchors = (np.stack([x1, y1, x2, y2]).astype(np.float32), np.stack([cx, cy, w, h]).astype(np.float32)) return True
def track(self, img): bbox_size = self.bbox_size size_z = self._size_z(bbox_size) scale_z = cfg.TRACK.EXAMPLAR_SIZE / size_z size_x = self._size_x(bbox_size) search = self.get_subwindow(img, self.bbox_pos, cfg.TRACK.INSTANCE_SIZE, size_x, self.channel_average) new_search = torch.from_numpy(search[np.newaxis, :].astype( np.float32)).permute(0, 3, 1, 2).cuda() cls, loc = self.model.track(new_search) score = self._convert_score(cls) loc = loc.reshape(4, self.anchor_generator.anchor_num, loc.size()[2], loc.size()[3]) pred_bbox = delta2bbox(self.all_anchor, loc) pred_bbox = pred_bbox.transpose((1, 2, 3, 0)).reshape( (-1, 4)) # x1,y1,x2,y2 pred_bbox = corner2center(pred_bbox) # cx,cy,w,h def change(r): return np.maximum(r, 1 / r) def s_z(w, h): w_z = w + 0.5 * (w + h) h_z = h + 0.5 * (w + h) size_z = np.sqrt(w_z * h_z) return size_z rc = change((bbox_size[0] / bbox_size[1]) / (pred_bbox[:, 2] / pred_bbox[:, 3])) sc = change( s_z(self.bbox_size[0] * scale_z, self.bbox_size[1] * scale_z) / s_z(pred_bbox[:, 2], pred_bbox[:, 3])) penalty = np.exp(-(rc * sc - 1) * cfg.TRACK.PENALTY_K) pscore = penalty * score pscore = pscore * (1 - cfg.TRACK.WINDOW_INFLUENCE) + \ self.window * cfg.TRACK.WINDOW_INFLUENCE best_idx = np.argmax(pscore) best_bbox = pred_bbox[best_idx, :] best_score = pscore[best_idx] # update memory if best_score > cfg.META.UPDATE_THRESH: del_idx = np.argmin(self.score_mem) del self.search_mem[del_idx] del self.bbox_mem[del_idx] del self.score_mem[del_idx] self.search_mem.append(search) self.bbox_mem.append(best_bbox.tolist()) self.score_mem.append(best_score) # update filter if self.track_frame % cfg.META.UPDATE_FREQ == 0: gt_data = [self.anchor_target(bbox) for bbox in self.bbox_mem] gt_cls, gt_loc, gt_loc_weight = zip(*gt_data) gt_cls, gt_loc, gt_loc_weight = map( lambda x: torch.from_numpy(np.stack(x)).cuda(), [gt_cls, gt_loc, gt_loc_weight]) searches = torch.from_numpy( np.stack(self.search_mem).astype(np.float32).transpose( (0, 3, 1, 2))).cuda() self.model.meta_train(self.examplars, searches, gt_cls, gt_loc, gt_loc_weight) # update track state best_bbox[0] -= cfg.TRACK.INSTANCE_SIZE // 2 best_bbox[1] -= cfg.TRACK.INSTANCE_SIZE // 2 best_bbox = best_bbox / scale_z cx = best_bbox[0] + self.bbox_pos[0] cy = best_bbox[1] + self.bbox_pos[1] lr = penalty[best_idx] * score[best_idx] * cfg.TRACK.LR w = self.bbox_size[0] * (1 - lr) + lr * best_bbox[2] h = self.bbox_size[1] * (1 - lr) + lr * best_bbox[3] pred_bbox = self._clip_bbox(cx, cy, w, h, img.shape[1], img.shape[0]) self.bbox_pos = pred_bbox[0:2] self.bbox_size = pred_bbox[2:4] self.track_frame += 1 return {'bbox': pred_bbox, 'score': score[best_idx]}
def __call__(self, target, size, neg=False): anchor_num = len(cfg.ANCHOR.RATIOS) * len(cfg.ANCHOR.SCALES) # -1 ignore 0 negative 1 positive cls = -1 * np.ones((anchor_num, size, size), dtype=np.int64) delta = np.zeros((4, anchor_num, size, size), dtype=np.float32) delta_weight = np.zeros((anchor_num, size, size), dtype=np.float32) def select(position, keep_num=16): num = position[0].shape[0] if num <= keep_num: return position, num slt = np.arange(num) np.random.shuffle(slt) slt = slt[:keep_num] return tuple(p[slt] for p in position), keep_num tcx, tcy, tw, th = corner2center(target) if neg: # l = size // 2 - 3 # r = size // 2 + 3 + 1 # cls[:, l:r, l:r] = 0 cx = size // 2 cy = size // 2 cx += int( np.ceil((tcx - cfg.TRAIN.SEARCH_SIZE // 2) / cfg.ANCHOR.STRIDE + 0.5)) cy += int( np.ceil((tcy - cfg.TRAIN.SEARCH_SIZE // 2) / cfg.ANCHOR.STRIDE + 0.5)) l = max(0, cx - 3) r = min(size, cx + 4) u = max(0, cy - 3) d = min(size, cy + 4) cls[:, u:d, l:r] = 0 neg, neg_num = select(np.where(cls == 0), cfg.TRAIN.NEG_NUM) cls[:] = -1 cls[neg] = 0 overlap = np.zeros((anchor_num, size, size), dtype=np.float32) return cls, delta, delta_weight, overlap anchor_box = self.anchors.all_anchors[0] anchor_center = self.anchors.all_anchors[1] x1, y1, x2, y2 = anchor_box[0], anchor_box[1], \ anchor_box[2], anchor_box[3] cx, cy, w, h = anchor_center[0], anchor_center[1], \ anchor_center[2], anchor_center[3] delta[0] = (tcx - cx) / w delta[1] = (tcy - cy) / h delta[2] = np.log(tw / w) delta[3] = np.log(th / h) overlap = IoU([x1, y1, x2, y2], target) pos = np.where(overlap > cfg.TRAIN.THR_HIGH) neg = np.where(overlap < cfg.TRAIN.THR_LOW) pos, pos_num = select(pos, cfg.TRAIN.POS_NUM) neg, neg_num = select(neg, cfg.TRAIN.TOTAL_NUM - cfg.TRAIN.POS_NUM) cls[pos] = 1 delta_weight[pos] = 1. / (pos_num + 1e-6) cls[neg] = 0 return cls, delta, delta_weight, overlap