Пример #1
0
    def get_modified_target(self, x, bbox_gt):
        """
        Returns modified ground truth bounding box 
        for rescaled images
        Return shape: (8,)
        """

        bbox_gt = bbox_gt[np.newaxis, :]
        bbox = x[2][np.newaxis, :]
        y_gt = get_min_max_bbox(bbox_gt)
        y = get_min_max_bbox(bbox)
        size = np.array([y[:, 2], y[:, 3]])
        size = size.transpose()
        w_z = size[:, 0] + CONTEXT_AMOUNT * np.sum(size, 1)
        h_z = size[:, 1] + CONTEXT_AMOUNT * np.sum(size, 1)
        s_z = np.sqrt(w_z * h_z)
        scale_z = NEW_EXEMPLAR_SIZE / s_z
        y_gt -= y
        y_gt = y_gt * scale_z

        y_gt[:, 0] += (NEW_INSTANCE_SIZE / 2)
        y_gt[:, 1] += (NEW_INSTANCE_SIZE / 2)
        y_gt[:, 2] += (NEW_EXEMPLAR_SIZE)
        y_gt[:, 3] += (NEW_EXEMPLAR_SIZE)
        y_gt = get_region_from_center(y_gt)
        return y_gt[0]
Пример #2
0
    def init(self, imgs, bbox):
        """
        args:
            imgs(np.ndarray): batch of BGR image
            batch of bbox: (x, y, w, h) bbox
        """
        bbox = get_min_max_bbox(bbox)
        bbox = cxy_wh_2_rect(bbox)

        self.center_pos = np.array([bbox[:, 0]+(bbox[:, 2])/2.0,
                                    bbox[:, 1]+(bbox[:, 3])/2.0])
        self.center_pos = self.center_pos.transpose()
        self.size = np.array([bbox[:, 2], bbox[:, 3]])
        self.size = self.size.transpose()

        w_z = self.size[:, 0] + CONTEXT_AMOUNT * np.sum(self.size, 1)
        h_z = self.size[:, 1] + CONTEXT_AMOUNT * np.sum(self.size, 1)
        s_z = np.round(np.sqrt(w_z * h_z))
        self.channel_average = []
        for img in imgs:
            self.channel_average.append(np.mean(img, axis=(0, 1)))
        self.channel_average = np.array(self.channel_average)
        z_crop = []
        for i, img in enumerate(imgs):
            z_crop.append(self.get_subwindow(img, self.center_pos[i],
                                    EXEMPLAR_SIZE,
                                    s_z[i], 
                                    self.channel_average[i], 
                                    ind=0))
        z_crop = torch.cat(z_crop)        # print(z_crop)
        self.model.template(z_crop)
        self.cnt = 0
Пример #3
0
def transform_to_gt(x, y):
    img_t = x[0]
    img_i = x[1]
    bbox_t = x[2][np.newaxis, :]
    bbox_t = get_min_max_bbox(bbox_t)
    bbox_t = cxy_wh_2_rect(bbox_t)
    y = get_min_max_bbox(y[np.newaxis, :])[0]

    center_pos = np.array([
        bbox_t[0, 0] + (bbox_t[0, 2] - 1) / 2.0,
        bbox_t[0, 1] + (bbox_t[0, 3] - 1) / 2.0
    ])
    size = np.array([bbox_t[0, 2], bbox_t[0, 3]])

    # calculate z crop size
    w_z = size[0] + CONTEXT_AMOUNT * np.sum(size)
    h_z = size[1] + CONTEXT_AMOUNT * np.sum(size)
    s_z = np.round(np.sqrt(w_z * h_z))
    scale_z = EXEMPLAR_SIZE / s_z
    y[0] -= (INSTANCE_SIZE / 2)
    y[1] -= (INSTANCE_SIZE / 2)
    y[2] -= (EXEMPLAR_SIZE)
    y[3] -= (EXEMPLAR_SIZE)

    y = y / scale_z
    # print("Bounding box shape = ", bbox.shape)

    # lr = penalty[best_idx] * score[best_idx] * cfg.TRACK.LR

    cx = y[0] + center_pos[0]
    cy = y[1] + center_pos[1]
    # bbox2 = [x.detach() for x in bbox]
    # # smooth bbox
    # print(size, bbox)
    width = size[0] * (1 - TRANSITION_LR) + (size[0] + y[2]) * TRANSITION_LR
    height = size[1] * (1 - TRANSITION_LR) + (size[1] + y[3]) * TRANSITION_LR
    cx, cy, width, height = _bbox_clip(cx, cy, width, height, img_i.shape[:2])

    bbox = np.array([cx - width / 2, cy - height / 2, width, height])
    quad_num = get_region_from_corner(bbox[np.newaxis, :])[0]
    return quad_num
Пример #4
0
    def get_modified_target(self, x, bbox):
        bbox = bbox[np.newaxis, :]
        bbox0 = x[2][np.newaxis, :]
        y0 = get_min_max_bbox(bbox0)

        size = np.array([y0[:, 2], y0[:, 3]])
        size = size.transpose()
        w_z = size[:, 0] + CONTEXT_AMOUNT * np.sum(size, 1)
        h_z = size[:, 1] + CONTEXT_AMOUNT * np.sum(size, 1)
        s_z = np.sqrt(w_z * h_z)
        scale_z = EXEMPLAR_SIZE / s_z

        y = get_min_max_bbox(bbox)
        y -= y0
        y = y * scale_z

        y[:, 0] += (INSTANCE_SIZE / 2)
        y[:, 1] += (INSTANCE_SIZE / 2)
        y[:, 2] += (EXEMPLAR_SIZE)
        y[:, 3] += (EXEMPLAR_SIZE)
        y = get_region_from_center(y)
        return y[0]
Пример #5
0
def visualise_transformed_data_point(x, y):
    img_t = x[0]
    img_i = x[1]
    bbox_t = x[2][np.newaxis, :]
    bbox_t = get_min_max_bbox(bbox_t)
    bbox_t = cxy_wh_2_rect(bbox_t)

    center_pos = np.array([
        bbox_t[0, 0] + (bbox_t[0, 2] - 1) / 2.0,
        bbox_t[0, 1] + (bbox_t[0, 3] - 1) / 2.0
    ])
    size = np.array([bbox_t[0, 2], bbox_t[0, 3]])

    # calculate z crop size
    w_z = size[0] + CONTEXT_AMOUNT * np.sum(size)
    h_z = size[1] + CONTEXT_AMOUNT * np.sum(size)
    s_z = np.round(np.sqrt(w_z * h_z))
    scale_z = EXEMPLAR_SIZE / s_z
    s_x = np.round(s_z * (INSTANCE_SIZE / EXEMPLAR_SIZE))

    # print("Track centre = ", center_pos)
    channel_average = np.mean(img_t, axis=(0, 1))
    # get crop
    # cv2.imwrite('/home/sudeep/Desktop/img1.jpg', img)
    # print(img.shape)
    # print("Init centre = ", center_pos)
    img_t = get_subwindow(img_t, center_pos, INSTANCE_SIZE, s_x,
                          channel_average)
    sz = EXEMPLAR_SIZE
    sx = INSTANCE_SIZE
    centre = np.array([(sx / 2.0), (sx / 2.0)])
    xmin = centre[0] - (sz / 2.0)
    xmax = centre[0] + (sz / 2.0)
    t_quad = np.array([xmin, xmax, xmin, xmin, xmax, xmin, xmax,
                       xmax])  #inclusive
    img_i = get_subwindow(img_i, center_pos, INSTANCE_SIZE, s_x,
                          channel_average)
    i_quad = y
    imgt_box = draw_bbox(img_t, t_quad)
    imgi_box = draw_bbox(img_i, i_quad)
    vis = np.concatenate((imgt_box, imgi_box), axis=1)
    return vis
Пример #6
0
    def track(self, imgs):
        """
        args:
            img(np.ndarray): Batch of BGR image
        return:
            bbox(list):[x, y, width, height]
        """

        w_z = self.size[:, 0] + CONTEXT_AMOUNT * np.sum(self.size, 1)
        h_z = self.size[:, 1] + CONTEXT_AMOUNT * np.sum(self.size, 1)
        s_z = np.sqrt(w_z * h_z)

        scale_z = EXEMPLAR_SIZE / s_z
        s_x = s_z * (INSTANCE_SIZE / EXEMPLAR_SIZE)

        x_crop = []
        for i, img in enumerate(imgs):
            x_crop.append(
                self.get_subwindow(img,
                                   self.center_pos[i],
                                   INSTANCE_SIZE,
                                   np.round(s_x)[i],
                                   self.channel_average[i],
                                   ind=1))
        x_crop = torch.cat(x_crop)

        self.cnt += 1

        outputs = self.model(x_crop)
        x_crop = img_to_numpy(x_crop[0])
        bbox_lkt = []
        bbox_rescaled = []
        for i in range(len(outputs[0])):
            bbox1 = tensor_to_numpy(outputs[0][i])
            bbox_lkt.append(bbox1)
            # print(x_crop.shape, bbox.shape)
            # x_box = draw_bbox(x_crop, bbox[0, :])
            bbox = get_min_max_bbox(bbox1)
            bbox[:, 0] -= (INSTANCE_SIZE / 2)
            bbox[:, 1] -= (INSTANCE_SIZE / 2)
            bbox[:, 2] -= (EXEMPLAR_SIZE)
            bbox[:, 3] -= (EXEMPLAR_SIZE)

            bbox = bbox / scale_z[:, np.newaxis]

            cx = bbox[:, 0] + self.center_pos[:, 0]
            cy = bbox[:, 1] + self.center_pos[:, 1]
            width = self.size[:, 0] * (1 - TRANSITION_LR) + (
                self.size[:, 0] + bbox[:, 2]) * TRANSITION_LR
            height = self.size[:, 1] * (1 - TRANSITION_LR) + (
                self.size[:, 1] + bbox[:, 3]) * TRANSITION_LR
            shapes = []
            for img in imgs:
                shapes.append(img.shape[:2])
            shapes = np.array(shapes)
            cx, cy, width, height = self._bbox_clip(cx, cy, width, height,
                                                    shapes)

            bbox = np.array([cx - width / 2, cy - height / 2, width,
                             height]).transpose()
            bbox = get_region_from_corner(bbox)
            bbox_rescaled.append(bbox)
            if (i == len(outputs[0]) - 1):
                self.center_pos = np.array([cx, cy]).transpose()
                self.size = np.array([width, height]).transpose()

        return (bbox_rescaled, ) + outputs[1:] + (x_crop, bbox_lkt)