示例#1
0
    def __call__(self, gt_bbox, neg=False):  # corner x1,y1,x2,y2
        anchor_num = self.anchor_generator.anchor_num
        gt_cls = -1 * np.ones(
            (anchor_num, self.out_size, self.out_size), dtype=np.int64)
        gt_delta = np.zeros((4, anchor_num, self.out_size, self.out_size),
                            dtype=np.float32)
        delta_weight = np.zeros((anchor_num, self.out_size, self.out_size),
                                dtype=np.float32)
        gt_cx, gt_cy, gt_w, gt_h = corner2center(gt_bbox)
        if neg:
            cx = self.out_size // 2
            cy = self.out_size // 2
            cx += int(
                np.ceil((gt_cx - cfg.TRAIN.SEARCH_SIZE // 2) /
                        cfg.ANCHOR.STRIDE + 0.5))
            cy += int(
                np.ceil((gt_cy - cfg.TRAIN.SEARCH_SIZE // 2) /
                        cfg.ANCHOR.STRIDE + 0.5))
            l = max(0, cx - 3)
            r = min(self.out_size, cx + 4)
            u = max(0, cy - 3)
            d = min(self.out_size, cy + 4)
            gt_cls[:, u:d, l:r] = 0
            neg_idx = np.where(gt_cls == 0)
            neg_idx = np.vstack(neg_idx).transpose()
            if (len(neg_idx) > cfg.TRAIN.NEG_NUM):
                keep_num = cfg.TRAIN.NEG_NUM
                np.random.shuffle(neg_idx)
                neg_idx = neg_idx[:keep_num, :]
            gt_cls[:] = -1
            gt_cls[neg_idx[:, 0], neg_idx[:, 1], neg_idx[:, 2]] = 0
            return gt_cls, gt_delta, delta_weight

        # NOTE: the shape of all_anchors and gt_bbox are different, need broadcast.
        iou = calc_iou(self.all_anchors, gt_bbox)

        pos_idx = np.where(iou > cfg.TRAIN.THRESH_HIGH)
        neg_idx = np.where(iou < cfg.TRAIN.THRESH_LOW)
        pos_idx = np.vstack(pos_idx).transpose()
        neg_idx = np.vstack(neg_idx).transpose()
        pos_num = len(pos_idx)
        if (pos_num > cfg.TRAIN.POS_NUM):
            keep_num = cfg.TRAIN.POS_NUM
            np.random.shuffle(pos_idx)
            pos_idx = pos_idx[:keep_num, :]
        gt_cls[pos_idx[:, 0], pos_idx[:, 1], pos_idx[:, 2]] = 1
        delta_weight[pos_idx[:, 0], pos_idx[:, 1],
                     pos_idx[:, 2]] = 1 / (pos_num + 1e-6)
        neg_num = cfg.TRAIN.TOTAL_NUM - cfg.TRAIN.POS_NUM
        if (len(neg_idx) > neg_num):
            keep_num = neg_num
            np.random.shuffle(neg_idx)
            neg_idx = neg_idx[:keep_num, :]
        gt_cls[neg_idx[:, 0], neg_idx[:, 1], neg_idx[:, 2]] = 0
        gt_delta = bbox2delta(self.all_anchors, gt_bbox)
        return gt_cls, gt_delta, delta_weight
示例#2
0
    def track(self, img):
        bbox_size = self.bbox_size
        size_z = self._size_z(bbox_size)
        scale_z = cfg.TRACK.EXAMPLAR_SIZE / size_z
        size_x = self._size_x(bbox_size)
        search = self.get_subwindow(img, self.bbox_pos,
                                    cfg.TRACK.INSTANCE_SIZE, size_x,
                                    self.channel_average)
        # show_img(search)
        new_search = torch.from_numpy(search[np.newaxis, :].astype(
            np.float32)).permute(0, 3, 1, 2).cuda()
        cls, loc = self.model.track(new_search)
        score = self._convert_score(cls)

        loc = loc.reshape(4, self.anchor_generator.anchor_num,
                          loc.size()[2],
                          loc.size()[3])
        pred_bbox = delta2bbox(self.all_anchor, loc)
        pred_bbox = pred_bbox.transpose((1, 2, 3, 0)).reshape(
            (-1, 4))  # x1,y1,x2,y2
        pred_bbox = corner2center(pred_bbox)  # cx,cy,w,h

        def change(r):
            return np.maximum(r, 1 / r)

        def s_z(w, h):
            w_z = w + 0.5 * (w + h)
            h_z = h + 0.5 * (w + h)
            size_z = np.sqrt(w_z * h_z)
            return size_z

        rc = change((bbox_size[0] / bbox_size[1]) /
                    (pred_bbox[:, 2] / pred_bbox[:, 3]))
        sc = change(
            s_z(self.bbox_size[0] * scale_z, self.bbox_size[1] * scale_z) /
            s_z(pred_bbox[:, 2], pred_bbox[:, 3]))
        penalty = np.exp(-(rc * sc - 1) * cfg.TRACK.PENALTY_K)
        pscore = penalty * score
        pscore = pscore * (1 - cfg.TRACK.WINDOW_INFLUENCE) + \
                 self.window * cfg.TRACK.WINDOW_INFLUENCE
        best_idx = np.argmax(pscore)
        best_bbox = pred_bbox[best_idx, :]
        best_bbox[0] -= cfg.TRACK.INSTANCE_SIZE // 2
        best_bbox[1] -= cfg.TRACK.INSTANCE_SIZE // 2
        best_bbox = best_bbox / scale_z
        cx = best_bbox[0] + self.bbox_pos[0]
        cy = best_bbox[1] + self.bbox_pos[1]
        lr = penalty[best_idx] * score[best_idx] * cfg.TRACK.LR
        w = self.bbox_size[0] * (1 - lr) + lr * best_bbox[2]
        h = self.bbox_size[1] * (1 - lr) + lr * best_bbox[3]
        pred_bbox = self._clip_bbox(cx, cy, w, h, img.shape[1], img.shape[0])
        # update
        self.bbox_pos = pred_bbox[0:2]
        self.bbox_size = pred_bbox[2:4]

        return {'bbox': pred_bbox, 'score': score[best_idx]}
示例#3
0
    def _shift_scale_aug(self, image, bbox, crop_bbox, size):
        im_h, im_w = image.shape[:2]

        # adjust crop bounding box
        crop_bbox_center = corner2center(crop_bbox)
        if self.scale:
            scale_x = (1.0 + Augmentation.random() * self.scale)
            scale_y = (1.0 + Augmentation.random() * self.scale)
            h, w = crop_bbox_center.h, crop_bbox_center.w
            scale_x = min(scale_x, float(im_w) / w)
            scale_y = min(scale_y, float(im_h) / h)
            crop_bbox_center = Center(crop_bbox_center.x,
                                      crop_bbox_center.y,
                                      crop_bbox_center.w * scale_x,
                                      crop_bbox_center.h * scale_y)

        crop_bbox = center2corner(crop_bbox_center)
        if self.shift:
            sx = Augmentation.random() * self.shift
            sy = Augmentation.random() * self.shift

            x1, y1, x2, y2 = crop_bbox

            sx = max(-x1, min(im_w - 1 - x2, sx))
            sy = max(-y1, min(im_h - 1 - y2, sy))

            crop_bbox = Corner(x1 + sx, y1 + sy, x2 + sx, y2 + sy)

        # adjust target bounding box
        x1, y1 = crop_bbox.x1, crop_bbox.y1
        bbox = Corner(bbox.x1 - x1, bbox.y1 - y1,
                      bbox.x2 - x1, bbox.y2 - y1)

        if self.scale:
            bbox = Corner(bbox.x1 / scale_x, bbox.y1 / scale_y,
                          bbox.x2 / scale_x, bbox.y2 / scale_y)

        image = self._crop_roi(image, crop_bbox, size)
        return image, bbox
示例#4
0
    def generate_all_anchors(self, im_c, size):
        """
        im_c: image center
        size: image size
        """
        if self.image_center == im_c and self.size == size:
            return False
        self.image_center = im_c
        self.size = size

        a0x = im_c - size // 2 * self.stride
        ori = np.array([a0x] * 4, dtype=np.float32)
        zero_anchors = self.anchors + ori

        x1 = zero_anchors[:, 0]
        y1 = zero_anchors[:, 1]
        x2 = zero_anchors[:, 2]
        y2 = zero_anchors[:, 3]

        x1, y1, x2, y2 = map(lambda x: x.reshape(self.anchor_num, 1, 1),
                             [x1, y1, x2, y2])
        cx, cy, w, h = corner2center([x1, y1, x2, y2])

        disp_x = np.arange(0, size).reshape(1, 1, -1) * self.stride
        disp_y = np.arange(0, size).reshape(1, -1, 1) * self.stride

        cx = cx + disp_x
        cy = cy + disp_y

        # broadcast
        zero = np.zeros((self.anchor_num, size, size), dtype=np.float32)
        cx, cy, w, h = map(lambda x: x + zero, [cx, cy, w, h])
        x1, y1, x2, y2 = center2corner([cx, cy, w, h])

        self.all_anchors = (np.stack([x1, y1, x2, y2]).astype(np.float32),
                            np.stack([cx, cy, w, h]).astype(np.float32))
        return True
示例#5
0
    def track(self, img):
        bbox_size = self.bbox_size
        size_z = self._size_z(bbox_size)
        scale_z = cfg.TRACK.EXAMPLAR_SIZE / size_z
        size_x = self._size_x(bbox_size)
        search = self.get_subwindow(img, self.bbox_pos,
                                    cfg.TRACK.INSTANCE_SIZE, size_x,
                                    self.channel_average)
        new_search = torch.from_numpy(search[np.newaxis, :].astype(
            np.float32)).permute(0, 3, 1, 2).cuda()
        cls, loc = self.model.track(new_search)
        score = self._convert_score(cls)
        loc = loc.reshape(4, self.anchor_generator.anchor_num,
                          loc.size()[2],
                          loc.size()[3])
        pred_bbox = delta2bbox(self.all_anchor, loc)
        pred_bbox = pred_bbox.transpose((1, 2, 3, 0)).reshape(
            (-1, 4))  # x1,y1,x2,y2
        pred_bbox = corner2center(pred_bbox)  # cx,cy,w,h

        def change(r):
            return np.maximum(r, 1 / r)

        def s_z(w, h):
            w_z = w + 0.5 * (w + h)
            h_z = h + 0.5 * (w + h)
            size_z = np.sqrt(w_z * h_z)
            return size_z

        rc = change((bbox_size[0] / bbox_size[1]) /
                    (pred_bbox[:, 2] / pred_bbox[:, 3]))
        sc = change(
            s_z(self.bbox_size[0] * scale_z, self.bbox_size[1] * scale_z) /
            s_z(pred_bbox[:, 2], pred_bbox[:, 3]))
        penalty = np.exp(-(rc * sc - 1) * cfg.TRACK.PENALTY_K)
        pscore = penalty * score
        pscore = pscore * (1 - cfg.TRACK.WINDOW_INFLUENCE) + \
            self.window * cfg.TRACK.WINDOW_INFLUENCE
        best_idx = np.argmax(pscore)
        best_bbox = pred_bbox[best_idx, :]
        best_score = pscore[best_idx]
        # update memory
        if best_score > cfg.META.UPDATE_THRESH:
            del_idx = np.argmin(self.score_mem)
            del self.search_mem[del_idx]
            del self.bbox_mem[del_idx]
            del self.score_mem[del_idx]
            self.search_mem.append(search)
            self.bbox_mem.append(best_bbox.tolist())
            self.score_mem.append(best_score)
        # update filter
        if self.track_frame % cfg.META.UPDATE_FREQ == 0:
            gt_data = [self.anchor_target(bbox) for bbox in self.bbox_mem]
            gt_cls, gt_loc, gt_loc_weight = zip(*gt_data)
            gt_cls, gt_loc, gt_loc_weight = map(
                lambda x: torch.from_numpy(np.stack(x)).cuda(),
                [gt_cls, gt_loc, gt_loc_weight])
            searches = torch.from_numpy(
                np.stack(self.search_mem).astype(np.float32).transpose(
                    (0, 3, 1, 2))).cuda()

            self.model.meta_train(self.examplars, searches, gt_cls, gt_loc,
                                  gt_loc_weight)
        # update track state
        best_bbox[0] -= cfg.TRACK.INSTANCE_SIZE // 2
        best_bbox[1] -= cfg.TRACK.INSTANCE_SIZE // 2
        best_bbox = best_bbox / scale_z
        cx = best_bbox[0] + self.bbox_pos[0]
        cy = best_bbox[1] + self.bbox_pos[1]
        lr = penalty[best_idx] * score[best_idx] * cfg.TRACK.LR
        w = self.bbox_size[0] * (1 - lr) + lr * best_bbox[2]
        h = self.bbox_size[1] * (1 - lr) + lr * best_bbox[3]
        pred_bbox = self._clip_bbox(cx, cy, w, h, img.shape[1], img.shape[0])
        self.bbox_pos = pred_bbox[0:2]
        self.bbox_size = pred_bbox[2:4]
        self.track_frame += 1

        return {'bbox': pred_bbox, 'score': score[best_idx]}
示例#6
0
    def __call__(self, target, size, neg=False):
        anchor_num = len(cfg.ANCHOR.RATIOS) * len(cfg.ANCHOR.SCALES)

        # -1 ignore 0 negative 1 positive
        cls = -1 * np.ones((anchor_num, size, size), dtype=np.int64)
        delta = np.zeros((4, anchor_num, size, size), dtype=np.float32)
        delta_weight = np.zeros((anchor_num, size, size), dtype=np.float32)

        def select(position, keep_num=16):
            num = position[0].shape[0]
            if num <= keep_num:
                return position, num
            slt = np.arange(num)
            np.random.shuffle(slt)
            slt = slt[:keep_num]
            return tuple(p[slt] for p in position), keep_num

        tcx, tcy, tw, th = corner2center(target)

        if neg:
            # l = size // 2 - 3
            # r = size // 2 + 3 + 1
            # cls[:, l:r, l:r] = 0

            cx = size // 2
            cy = size // 2
            cx += int(
                np.ceil((tcx - cfg.TRAIN.SEARCH_SIZE // 2) /
                        cfg.ANCHOR.STRIDE + 0.5))
            cy += int(
                np.ceil((tcy - cfg.TRAIN.SEARCH_SIZE // 2) /
                        cfg.ANCHOR.STRIDE + 0.5))
            l = max(0, cx - 3)
            r = min(size, cx + 4)
            u = max(0, cy - 3)
            d = min(size, cy + 4)
            cls[:, u:d, l:r] = 0

            neg, neg_num = select(np.where(cls == 0), cfg.TRAIN.NEG_NUM)
            cls[:] = -1
            cls[neg] = 0

            overlap = np.zeros((anchor_num, size, size), dtype=np.float32)
            return cls, delta, delta_weight, overlap

        anchor_box = self.anchors.all_anchors[0]
        anchor_center = self.anchors.all_anchors[1]
        x1, y1, x2, y2 = anchor_box[0], anchor_box[1], \
            anchor_box[2], anchor_box[3]
        cx, cy, w, h = anchor_center[0], anchor_center[1], \
            anchor_center[2], anchor_center[3]

        delta[0] = (tcx - cx) / w
        delta[1] = (tcy - cy) / h
        delta[2] = np.log(tw / w)
        delta[3] = np.log(th / h)

        overlap = IoU([x1, y1, x2, y2], target)

        pos = np.where(overlap > cfg.TRAIN.THR_HIGH)
        neg = np.where(overlap < cfg.TRAIN.THR_LOW)

        pos, pos_num = select(pos, cfg.TRAIN.POS_NUM)
        neg, neg_num = select(neg, cfg.TRAIN.TOTAL_NUM - cfg.TRAIN.POS_NUM)

        cls[pos] = 1
        delta_weight[pos] = 1. / (pos_num + 1e-6)

        cls[neg] = 0
        return cls, delta, delta_weight, overlap