Пример #1
0
    def simple_test_ope(self, img, frame_id, gt_bboxes):
        """Test using OPE test mode.

        Args:
            img (Tensor): of shape (1, C, H, W) encoding input image.
            frame_id (int): the id of current frame in the video.
            gt_bboxes (list[Tensor]): list of ground truth bboxes for each
                image with shape (1, 4) in [tl_x, tl_y, br_x, br_y] format or
                shape (1, 8) in [x1, y1, x2, y2, x3, y3, x4, y4].

        Returns:
            bbox_pred (Tensor): in [tl_x, tl_y, br_x, br_y] format.
            best_score (Tensor): the tracking bbox confidence in range [0,1],
                and the score of initial frame is -1.
        """
        if frame_id == 0:
            gt_bboxes = gt_bboxes[0][0]
            self.memo = Dict()
            self.memo.bbox = quad2bbox(gt_bboxes)
            self.memo.z_feat, self.memo.avg_channel = self.init(
                img, self.memo.bbox)
            best_score = -1.
        else:
            best_score, self.memo.bbox = self.track(img, self.memo.bbox,
                                                    self.memo.z_feat,
                                                    self.memo.avg_channel)
        bbox_pred = bbox_cxcywh_to_xyxy(self.memo.bbox)

        return bbox_pred, best_score
Пример #2
0
def test_quad2bbox():
    quad = torch.zeros((5, 8), dtype=torch.float)
    low_coord_index = torch.tensor([0, 1, 3, 6], dtype=torch.long)
    high_coord_index = torch.tensor([2, 4, 5, 7], dtype=torch.long)
    quad[:, low_coord_index] = torch.randint(1, 10, (5, 4), dtype=torch.float)
    quad[:, high_coord_index] = torch.randint(10,
                                              20, (5, 4),
                                              dtype=torch.float)
    bbox = quad2bbox(quad)
    assert (bbox > 0).all()
Пример #3
0
    def simple_test_vot(self, img, frame_id, gt_bboxes, img_metas=None):
        """Test using VOT test mode.

        Args:
            img (Tensor): of shape (1, C, H, W) encoding input image.
            frame_id (int): the id of current frame in the video.
            gt_bboxes (list[Tensor]): list of ground truth bboxes for each
                image with shape (1, 4) in [tl_x, tl_y, br_x, br_y] format or
                shape (1, 8) in [x1, y1, x2, y2, x3, y3, x4, y4].
            img_metas (list[dict]): list of image information dict where each
                dict has: 'img_shape', 'scale_factor', 'flip', and may also
                contain 'filename', 'ori_shape', 'pad_shape', and
                'img_norm_cfg'. For details on the values of these keys see
                `mmtrack/datasets/pipelines/formatting.py:VideoCollect`.

        Returns:
            bbox_pred (Tensor): in [tl_x, tl_y, br_x, br_y] format.
            best_score (Tensor): the tracking bbox confidence in range [0,1],
                and the score of initial frame is -1.
        """
        if frame_id == 0:
            self.init_frame_id = 0
        if self.init_frame_id == frame_id:
            # initialization
            gt_bboxes = gt_bboxes[0][0]
            self.memo = Dict()
            self.memo.bbox = quad2bbox(gt_bboxes)
            self.memo.z_feat, self.memo.avg_channel = self.init(
                img, self.memo.bbox)
            # 1 denotes the initialization state
            bbox_pred = img.new_tensor([1.])
            best_score = -1.
        elif self.init_frame_id > frame_id:
            # 0 denotes unknown state, namely the skipping frame after failure
            bbox_pred = img.new_tensor([0.])
            best_score = -1.
        else:
            # normal tracking state
            best_score, self.memo.bbox = self.track(img, self.memo.bbox,
                                                    self.memo.z_feat,
                                                    self.memo.avg_channel)
            # convert bbox to region
            track_bbox = bbox_cxcywh_to_x1y1wh(self.memo.bbox).cpu().numpy()
            track_region = bbox2region(track_bbox)
            gt_bbox = gt_bboxes[0][0]
            if len(gt_bbox) == 4:
                gt_bbox = bbox_xyxy_to_x1y1wh(gt_bbox)
            gt_region = bbox2region(gt_bbox.cpu().numpy())

            if img_metas is not None and 'img_shape' in img_metas[0]:
                image_shape = img_metas[0]['img_shape']
                image_wh = (image_shape[1], image_shape[0])
            else:
                image_wh = None
                Warning('image shape are need when calculating bbox overlap')
            overlap = calculate_region_overlap(track_region,
                                               gt_region,
                                               bounds=image_wh)
            if overlap <= 0:
                # tracking failure
                self.init_frame_id = frame_id + 5
                # 2 denotes the failure state
                bbox_pred = img.new_tensor([2.])
            else:
                bbox_pred = bbox_cxcywh_to_xyxy(self.memo.bbox)

        return bbox_pred, best_score