Пример #1
0
    def track_feature(self, im, target_pos, target_sz, avg_chans=None):
        r"""
        Extract target image feature for tracker
        :param im: image frame
        :param target_pos: target position (x, y)
        :param target_sz: target size (w, h)
        :param avg_chans: channel mean values
        :return f_z feature of target image
        :return im_z_crop cropped patch of target image
        :return avg_chans channel average
        """
        if avg_chans is None:
            avg_chans = np.mean(im, axis=(0, 1))

        z_size = self._hyper_params['z_size']
        context_amount = self._hyper_params['context_amount']

        im_z_crop, _ = get_crop(
            im,
            target_pos,
            target_sz,
            z_size,
            avg_chans=avg_chans,
            context_amount=context_amount,
            func_get_subwindow=get_subwindow_tracking,
        )
        phase = self._hyper_params['phase_init']
        with torch.no_grad():
            f_z = self._tracker(imarray_to_tensor(im_z_crop).to(self.device),
                                phase=phase)[0]

        return f_z, im_z_crop, avg_chans
Пример #2
0
    def feature(self, im, target_pos, target_sz, avg_chans=None):
        r"""
        Extract feature
        :param im: initial frame
        :param target_pos: target position (x, y)
        :param target_sz: target size (w, h)
        :param avg_chans: channel mean values
        :return:
        """
        if avg_chans is None:
            avg_chans = np.mean(im, axis=(0, 1))

        z_size = self._hyper_params['z_size']
        context_amount = self._hyper_params['context_amount']

        im_z_crop, _ = get_crop(
            im,
            target_pos,
            target_sz,
            z_size,
            avg_chans=avg_chans,
            context_amount=context_amount,
            func_get_subwindow=get_subwindow_tracking,
        )
        phase = self._hyper_params['phase_init']
        with torch.no_grad():
            features = self.model(imarray_to_tensor(im_z_crop).to(self.device),
                                  phase=phase)

        return features, im_z_crop, avg_chans
    def init(self, im, state):
        r"""Initialize tracker
            Internal target state representation: self._state['state'] = (target_pos, target_sz)
        
        Arguments
        ---------
        im : np.array
            initial frame image
        state
            target state on initial frame (bbox in case of SOT), format: xywh
        """
        self.frame_num = 1
        self.temp_max = 0
        rect = state  # bbox in xywh format is given for initialization in case of tracking
        box = xywh2cxywh(rect)
        target_pos, target_sz = box[:2], box[2:]

        self._state['im_h'] = im.shape[0]
        self._state['im_w'] = im.shape[1]

        # extract template feature
        features, im_z_crop, avg_chans, im_z_crop_t = self.feature(
            im, target_pos, target_sz)

        score_size = self._hyper_params['score_size']
        if self._hyper_params['windowing'] == 'cosine':
            window = np.outer(np.hanning(score_size), np.hanning(score_size))
            window = window.reshape(-1)
        elif self._hyper_params['windowing'] == 'uniform':
            window = np.ones((score_size, score_size))
        else:
            window = np.ones((score_size, score_size))

        self._state['z_crop'] = im_z_crop
        self._state['z0_crop'] = im_z_crop_t
        with torch.no_grad():
            self._model.instance(im_z_crop_t)
        self._state['avg_chans'] = avg_chans
        self._state['features'] = features
        self._state['window'] = window
        self._state['state'] = (target_pos, target_sz)
        # init online classifier
        z_size = self._hyper_params['z_size']
        x_size = self._hyper_params['x_size']
        context_amount = self._hyper_params['context_amount']
        init_im_crop, scale_x = get_crop(
            im,
            target_pos,
            target_sz,
            z_size,
            x_size=x_size * 2,
            avg_chans=avg_chans,
            context_amount=context_amount,
            func_get_subwindow=get_subwindow_tracking,
        )
        init_x_crop_t = imarray_to_tensor(init_im_crop)
        self.online_classifier.initialize(init_x_crop_t, state)
Пример #4
0
    def feature(self, im: np.array, target_pos, target_sz, avg_chans=None):
        """Extract feature

        Parameters
        ----------
        im : np.array
            initial frame
        target_pos : 
            target position (x, y)
        target_sz : [type]
            target size (w, h)
        avg_chans : [type], optional
            channel mean values, (B, G, R), by default None
        
        Returns
        -------
        [type]
            [description]
        """
        if avg_chans is None:
            avg_chans = np.mean(im, axis=(0, 1))

        z_size = self._hyper_params['z_size']
        context_amount = self._hyper_params['context_amount']

        im_z_crop, _ = get_crop(
            im,
            target_pos,
            target_sz,
            z_size,
            avg_chans=avg_chans,
            context_amount=context_amount,
            func_get_subwindow=get_subwindow_tracking,
        )
        phase = self._hyper_params['phase_init']
        with torch.no_grad():
            features = self._model(imarray_to_tensor(im_z_crop).to(
                self.device),
                                   phase=phase)

        return features, im_z_crop, avg_chans
Пример #5
0
    def global_modeling(self):
        """
        always runs after seg4vos, takes newly predicted filtered image,
        extracts high-level feature and updates the global feature based on confidence score

        """
        filtered_image = self._state['filtered_image']  # shape: (129,129,3)
        with torch.no_grad():
            deep_feature = self._segmenter(imarray_to_tensor(filtered_image).to(
                self.device),
                                           phase='global_feature')[0]

        seg_global_feature = self._state['seg_global_feature']
        seg_init_feature = self._state['seg_init_feature']
        u = self._hyper_params['seg_ema_u']
        s = self._hyper_params['seg_ema_s']
        conf_score = self._state['conf_score']

        u = u * conf_score
        seg_global_feature = seg_global_feature * (1 - u) + deep_feature * u
        gml_feature = seg_global_feature * s + seg_init_feature * (1 - s)

        self._state['seg_global_feature'] = seg_global_feature
        self._state['gml_feature'] = gml_feature
    def track(self,
              im_x,
              target_pos,
              target_sz,
              features,
              update_state=False,
              **kwargs):
        if 'avg_chans' in kwargs:
            avg_chans = kwargs['avg_chans']
        else:
            avg_chans = self._state['avg_chans']

        z_size = self._hyper_params['z_size']
        x_size = self._hyper_params['x_size']
        if self._state["lost_target"]:
            x_size = self._hyper_params["lost_search_size"]
        context_amount = self._hyper_params['context_amount']
        phase_track = self._hyper_params['phase_track']
        im_x_crop, scale_x = get_crop(
            im_x,
            target_pos,
            target_sz,
            z_size,
            x_size=x_size,
            avg_chans=avg_chans,
            context_amount=context_amount,
            func_get_subwindow=get_subwindow_tracking,
        )
        self._state["scale_x"] = deepcopy(scale_x)
        with torch.no_grad():
            score, box, cls, ctr, extra = self._model(
                imarray_to_tensor(im_x_crop).to(self.device),
                *features,
                phase=phase_track)
        if self._hyper_params["corr_fea_output"]:
            self._state["corr_fea"] = extra["corr_fea"]

        box = tensor_to_numpy(box[0])
        score = tensor_to_numpy(score[0])[:, 0]
        cls = tensor_to_numpy(cls[0])
        ctr = tensor_to_numpy(ctr[0])
        box_wh = xyxy2cxywh(box)

        # score post-processing
        best_pscore_id, pscore, penalty = self._postprocess_score(
            score, box_wh, target_sz, scale_x)
        # box post-processing
        new_target_pos, new_target_sz = self._postprocess_box(
            best_pscore_id, score, box_wh, target_pos, target_sz, scale_x,
            x_size, penalty)

        if self.debug:
            box = self._cvt_box_crop2frame(box_wh, target_pos, x_size, scale_x)

        # restrict new_target_pos & new_target_sz
        new_target_pos, new_target_sz = self._restrict_box(
            new_target_pos, new_target_sz)

        # record basic mid-level info
        self._state['x_crop'] = im_x_crop
        bbox_pred_in_crop = np.rint(box[best_pscore_id]).astype(np.int)
        self._state['bbox_pred_in_crop'] = bbox_pred_in_crop
        # record optional mid-level info
        if update_state:
            self._state['score'] = score
            self._state['pscore'] = pscore[best_pscore_id]
            self._state['all_box'] = box
            self._state['cls'] = cls
            self._state['ctr'] = ctr
        if pscore[best_pscore_id] > self._hyper_params["lost_score_th_high"]:
            self._state["lost_target"] = False
        elif pscore[best_pscore_id] < self._hyper_params["lost_score_th_low"]:
            self._state["lost_target"] = True
            logger.info("lost target")

        return new_target_pos, new_target_sz
Пример #7
0
    def track(self,
              im_x,
              target_pos,
              target_sz,
              features,
              update_state=False,
              **kwargs):
        if 'avg_chans' in kwargs:
            avg_chans = kwargs['avg_chans']
        else:
            avg_chans = self._state['avg_chans']

        z_size = self._hyper_params['z_size']
        x_size = self._hyper_params['x_size']
        context_amount = self._hyper_params['context_amount']
        phase_track = self._hyper_params['phase_track']
        im_x_crop, scale_x = get_crop(
            im_x,
            target_pos,
            target_sz,
            z_size,
            x_size=x_size,
            avg_chans=avg_chans,
            context_amount=context_amount,
            func_get_subwindow=get_subwindow_tracking,
        )

        # process batch of templates
        score_list = []
        box_list = []
        cls_list = []
        ctr_list = []
        fms_x = None
        for ith in range(self._hyper_params['mem_len']):
            if fms_x is None:
                with torch.no_grad():
                    score, box, cls, ctr, extra = self._model(
                        imarray_to_tensor(im_x_crop).to(self.device),
                        *(features[ith]),
                        phase=phase_track)
                fms_x = [extra['c_x'], extra['r_x']]
            else:
                with torch.no_grad():
                    score, box, cls, ctr, extra = self._model(
                        *(features[ith]),
                        fms_x[0],
                        fms_x[1],
                        phase=phase_track)
            box = tensor_to_numpy(box[0])
            score = tensor_to_numpy(score[0])[:, 0]
            cls = tensor_to_numpy(cls[0])[:, 0]
            ctr = tensor_to_numpy(ctr[0])[:, 0]
            # append to list
            box_list.append(box)
            score_list.append(score)
            cls_list.append(cls)
            ctr_list.append(ctr)

        # fusion
        if self._hyper_params['mem_len'] > 1:
            score = score_list[0] * (1-self._hyper_params['mem_coef']) + \
                    np.stack(score_list[1:], axis=0).mean(axis=0) * self._hyper_params['mem_coef']
        else:
            # single template
            score = score_list[0]
        box = box_list[0]
        box_wh = xyxy2cxywh(box)

        # score post-processing
        best_pscore_id, pscore, penalty = self._postprocess_score(
            score, box_wh, target_sz, scale_x)
        # box post-processing
        new_target_pos, new_target_sz = self._postprocess_box(
            best_pscore_id, score, box_wh, target_pos, target_sz, scale_x,
            x_size, penalty)

        if self.debug:
            box = self._cvt_box_crop2frame(box_wh, target_pos, x_size, scale_x)

        # restrict new_target_pos & new_target_sz
        new_target_pos, new_target_sz = self._restrict_box(
            new_target_pos, new_target_sz)

        # record basic mid-level info
        self._state['x_crop'] = im_x_crop
        bbox_pred_in_crop = np.rint(box[best_pscore_id]).astype(np.int)
        self._state['bbox_pred_in_crop'] = bbox_pred_in_crop
        # record optional mid-level info
        if update_state:
            self._state['score'] = score
            self._state['pscore'] = pscore
            self._state['all_box'] = box
            self._state['cls'] = cls
            self._state['ctr'] = ctr

        return new_target_pos, new_target_sz
Пример #8
0
    def track(self,
              im_x,
              target_pos,
              target_sz,
              features,
              update_state=False,
              **kwargs):
        if 'avg_chans' in kwargs:
            avg_chans = kwargs['avg_chans']
        else:
            avg_chans = self._state['avg_chans']

        z_size = self._hyper_params['z_size']
        x_size = self._hyper_params['x_size']
        context_amount = self._hyper_params['context_amount']
        phase_track = self._hyper_params['phase_track']
        im_x_crop, scale_x = get_crop(
            im_x,
            target_pos,
            target_sz,
            z_size,
            x_size=x_size,
            avg_chans=avg_chans,
            context_amount=context_amount,
            func_get_subwindow=get_subwindow_tracking,
        )
        # store crop information
        self._state["crop_info"] = dict(
            target_pos=target_pos,
            target_sz=target_sz,
            scale_x=scale_x,
            avg_chans=avg_chans,
        )
        with torch.no_grad():
            score, box, cls, ctr, *args = self._model(
                imarray_to_tensor(im_x_crop).to(self.device),
                *features,
                phase=phase_track)

        box = tensor_to_numpy(box[0])
        score = tensor_to_numpy(score[0])[:, 0]
        cls = tensor_to_numpy(cls[0])
        ctr = tensor_to_numpy(ctr[0])
        box_wh = xyxy2cxywh(box)

        # score post-processing
        best_pscore_id, pscore, penalty = self._postprocess_score(
            score, box_wh, target_sz, scale_x)
        # box post-processing
        new_target_pos, new_target_sz = self._postprocess_box(
            best_pscore_id, score, box_wh, target_pos, target_sz, scale_x,
            x_size, penalty)

        if self.debug:
            box = self._cvt_box_crop2frame(box_wh, target_pos, x_size, scale_x)

        # restrict new_target_pos & new_target_sz
        # new_target_pos, new_target_sz = self._restrict_box(
        #     new_target_pos, new_target_sz)

        # record basic mid-level info
        self._state['x_crop'] = im_x_crop
        # bbox_pred_in_crop = np.rint(box[best_pscore_id]).astype(np.int)
        bbox_pred_in_crop = box[best_pscore_id]
        self._state['bbox_pred_in_crop'] = bbox_pred_in_crop
        self._state['bbox_pred_in_frame'] = bbox_pred_in_crop

        # record optional mid-level info
        if update_state:
            self._state['score'] = score
            self._state['pscore'] = pscore
            self._state['all_box'] = box
            self._state['cls'] = cls
            self._state['ctr'] = ctr

        return new_target_pos, new_target_sz
Пример #9
0
    def joint_segmentation(self, im_x, target_pos, target_sz, corr_feature,
                           gml_feature, **kwargs):
        r"""
        segment the current frame for VOS
        crop image => segmentation =>  params updation

        :param im_x: current image
        :param target_pos: target position (x, y)
        :param target_sz: target size (w, h)
        :param corr_feature: correlated feature produced by siamese encoder
        :param gml_feature: global feature produced by gloabl modeling loop
        :return: pred_mask  mask prediction in the patch of saliency image
        :return: pred_mask_b binary mask prediction in the patch of saliency image
        """

        if 'avg_chans' in kwargs:
            avg_chans = kwargs['avg_chans']
        else:
            avg_chans = self._state['avg_chans']

        # crop image for saliency encoder
        saliency_image, scale_seg = get_crop(
            im_x,
            target_pos,
            target_sz,
            z_size=self._hyper_params["z_size"],
            output_size=self._hyper_params["saliency_image_size"],
            x_size=self._hyper_params["saliency_image_field"],
            avg_chans=avg_chans,
            context_amount=self._hyper_params["context_amount"],
            func_get_subwindow=get_subwindow_tracking,
        )
        self._state["scale_x"] = scale_seg
        # mask prediction
        pred_mask = self._segmenter(imarray_to_tensor(saliency_image).to(
            self.device),
                                    corr_feature,
                                    gml_feature,
                                    phase='segment')[0]  #tensor(1,1,257,257)

        pred_mask = tensor_to_numpy(pred_mask[0]).transpose(
            (1, 2, 0))  #np (257,257,1)

        # post processing
        mask_filter = (pred_mask >
                       self._hyper_params['mask_filter_thresh']).astype(
                           np.uint8)
        pred_mask_b = (pred_mask >
                       self._hyper_params['mask_pred_thresh']).astype(np.uint8)

        if self._hyper_params['save_patch']:
            mask_red = np.zeros_like(saliency_image)
            mask_red[:, :, 0] = mask_filter[:, :, 0] * 255
            masked_image = saliency_image * 0.5 + mask_red * 0.5
            self._state['patch_prediction'] = masked_image

        filtered_image = saliency_image * mask_filter
        filtered_image = cv2.resize(filtered_image,
                                    (self._hyper_params["GMP_image_size"],
                                     self._hyper_params["GMP_image_size"]))
        self._state['filtered_image'] = filtered_image

        if pred_mask_b.sum() > 0:
            conf_score = (pred_mask * pred_mask_b).sum() / pred_mask_b.sum()
        else:
            conf_score = 0
        self._state['conf_score'] = conf_score
        mask_in_full_image = self._mask_back(
            pred_mask,
            size=self._hyper_params["saliency_image_size"],
            region=self._hyper_params["saliency_image_field"])
        self._state['mask_in_full_image'] = mask_in_full_image
        if self._tracker.get_track_score(
        ) < self._hyper_params["track_failed_score_th"]:
            self._state['mask_in_full_image'] *= 0
        return pred_mask, pred_mask_b
Пример #10
0
    def init(self, im, state, init_mask):
        """
        initialize the whole pipeline :
        tracker init => global modeling loop init

        :param im: init frame
        :param state: bbox in xywh format
        :param init_mask: binary mask of target object in shape (h,w)
        """

        #========== SiamFC++ init ==============
        self._tracker.init(im, state)
        avg_chans = self._tracker.get_avg_chans()
        self._state['avg_chans'] = avg_chans

        rect = state  # bbox in xywh format is given for initialization in case of tracking
        box = xywh2cxywh(rect)
        target_pos, target_sz = box[:2], box[2:]
        self._state['state'] = (target_pos, target_sz)
        self._state['im_h'] = im.shape[0]
        self._state['im_w'] = im.shape[1]

        # ========== Global Modeling Loop init ==============
        init_image, _ = get_crop(
            im,
            target_pos,
            target_sz,
            z_size=self._hyper_params["z_size"],
            x_size=self._hyper_params["GMP_image_size"],
            avg_chans=avg_chans,
            context_amount=self._hyper_params["context_amount"],
            func_get_subwindow=get_subwindow_tracking,
        )
        init_mask_c3 = np.stack([init_mask, init_mask, init_mask],
                                -1).astype(np.uint8)
        init_mask_crop_c3, _ = get_crop(
            init_mask_c3,
            target_pos,
            target_sz,
            z_size=self._hyper_params["z_size"],
            x_size=self._hyper_params["GMP_image_size"],
            avg_chans=avg_chans * 0,
            context_amount=self._hyper_params["context_amount"],
            func_get_subwindow=get_subwindow_tracking,
        )
        init_mask_crop = init_mask_crop_c3[:, :, 0]
        init_mask_crop = (init_mask_crop >
                          self._hyper_params['mask_filter_thresh']).astype(
                              np.uint8)
        init_mask_crop = np.expand_dims(init_mask_crop,
                                        axis=-1)  #shape: (129,129,1)
        filtered_image = init_mask_crop * init_image
        self._state['filtered_image'] = filtered_image  #shape: (129,129,3)

        with torch.no_grad():
            deep_feature = self._segmenter(imarray_to_tensor(filtered_image).to(
                self.device),
                                           phase='global_feature')[0]

        self._state['seg_init_feature'] = deep_feature  #shape : (1,256,5,5)
        self._state['seg_global_feature'] = deep_feature
        self._state['gml_feature'] = deep_feature
        self._state['conf_score'] = 1
    def track(self,
              im_x,
              target_pos,
              target_sz,
              features,
              update_state=False,
              **kwargs):
        if 'avg_chans' in kwargs:
            avg_chans = kwargs['avg_chans']
        else:
            avg_chans = self._state['avg_chans']

        z_size = self._hyper_params['z_size']
        x_size = self._hyper_params['x_size']
        context_amount = self._hyper_params['context_amount']
        phase_track = self._hyper_params['phase_track']
        im_x_crop, scale_x = get_crop(
            im_x,
            target_pos,
            target_sz,
            z_size,
            x_size=x_size,
            avg_chans=avg_chans,
            context_amount=context_amount,
            func_get_subwindow=get_subwindow_tracking,
        )
        self._state["scale_x"] = deepcopy(scale_x)
        with torch.no_grad():
            score, box, cls, ctr, extra = self._model(
                imarray_to_tensor(im_x_crop).to(self.device),
                *features,
                phase=phase_track)
        if self._hyper_params["corr_fea_output"]:
            self._state["corr_fea"] = extra["corr_fea"]

        box = tensor_to_numpy(box[0])
        score = tensor_to_numpy(score[0])[:, 0]
        cls = tensor_to_numpy(cls[0])
        ctr = tensor_to_numpy(ctr[0])

        def normalize(score):
            score = (score - np.min(score)) / (np.max(score) - np.min(score))
            return score

        if True:
            flag, s = self.online_classifier.track()
            if flag == 'not_found':
                self.lost_count += 1
            else:
                self.lost_count = 0

            confidence = s.detach().cpu().numpy()
            offset = (confidence.shape[0] -
                      self._hyper_params["score_size"]) // 2
            confidence = confidence[offset:-offset, offset:-offset]
            confidence = normalize(confidence).flatten()
        box_wh = xyxy2cxywh(box)

        # score post-processing
        best_pscore_id, pscore, penalty = self._postprocess_score(
            score, confidence, box_wh, target_sz, scale_x)
        if self._hyper_params["debug_show"]:
            bbox_in_crop = box[best_pscore_id, :]
            bbox_in_crop = tuple(map(int, bbox_in_crop))
            show_im_patch = im_x_crop.copy()
            cv2.rectangle(show_im_patch, bbox_in_crop[:2], bbox_in_crop[2:],
                          (0, 255, 0), 2)
            cv2.imshow("pred in crop", show_im_patch)
            # offline score
            score_mat = score.reshape(self._hyper_params["score_size"],
                                      self._hyper_params["score_size"])
            score_mat = (255 * score_mat).astype(np.uint8)
            score_map = cv2.applyColorMap(score_mat, cv2.COLORMAP_JET)
            cv2.imshow("offline score", score_map)
            score_mat = confidence.reshape(self._hyper_params["score_size"],
                                           self._hyper_params["score_size"])
            score_mat = (255 * score_mat).astype(np.uint8)
            score_map = cv2.applyColorMap(score_mat, cv2.COLORMAP_JET)
            cv2.imshow("online score", score_map)
            cv2.waitKey()

        # box post-processing
        new_target_pos, new_target_sz = self._postprocess_box(
            best_pscore_id, score, box_wh, target_pos, target_sz, scale_x,
            x_size, penalty)

        if self.debug:
            box = self._cvt_box_crop2frame(box_wh, target_pos, x_size, scale_x)

        # restrict new_target_pos & new_target_sz
        new_target_pos, new_target_sz = self._restrict_box(
            new_target_pos, new_target_sz)

        # record basic mid-level info
        self._state['x_crop'] = im_x_crop
        bbox_pred_in_crop = np.rint(box[best_pscore_id]).astype(np.int)
        self._state['bbox_pred_in_crop'] = bbox_pred_in_crop
        self.online_classifier.update(
            np.concatenate([new_target_pos, new_target_sz], axis=0),
            self.scale_z, flag)
        # record optional mid-level info
        if update_state:
            self._state['score'] = score
            self._state['pscore'] = pscore[best_pscore_id]
            self._state['all_box'] = box
            self._state['cls'] = cls
            self._state['ctr'] = ctr

        return new_target_pos, new_target_sz
Пример #12
0
    def track4vos(self,
                  im_x,
                  target_pos,
                  target_sz,
                  f_z,
                  update_state=False,
                  **kwargs):
        r"""
        similarity encoder with regression head
        returns regressed bbox and correlation feature

        :param im_x: current frame
        :param target_pos: target position (x, y)
        :param target_sz: target size (w, h)
        :param f_z: target feature
        :return new_target_pos, new_target_sz, corr_feature
        """

        if 'avg_chans' in kwargs:
            avg_chans = kwargs['avg_chans']
        else:
            avg_chans = self._state['avg_chans']

        z_size = self._hyper_params['z_size']
        x_size = self._hyper_params['x_size']
        context_amount = self._hyper_params['context_amount']
        phase_track = self._hyper_params['phase_track']
        im_x_crop, scale_x = get_crop(
            im_x,
            target_pos,
            target_sz,
            z_size,
            x_size=x_size,
            avg_chans=avg_chans,
            context_amount=context_amount,
            func_get_subwindow=get_subwindow_tracking,
        )
        with torch.no_grad():
            score, box, cls, ctr, corr_feature = self._tracker(
                imarray_to_tensor(im_x_crop).to(self.device),
                f_z,
                phase=phase_track)

        box = tensor_to_numpy(box[0])
        score = tensor_to_numpy(score[0])[:, 0]
        cls = tensor_to_numpy(cls[0])
        ctr = tensor_to_numpy(ctr[0])
        box_wh = xyxy2cxywh(box)

        # score post-processing
        best_pscore_id, pscore, penalty = self._postprocess_score(
            score, box_wh, target_sz, scale_x)
        # box post-processing
        new_target_pos, new_target_sz = self._postprocess_box(
            best_pscore_id, score, box_wh, target_pos, target_sz, scale_x,
            x_size, penalty)

        if self.debug:
            box = self._cvt_box_crop2frame(box_wh, target_pos, x_size, scale_x)

        # restrict new_target_pos & new_target_sz
        new_target_pos, new_target_sz = self._restrict_box(
            new_target_pos, new_target_sz)

        # record basic mid-level info
        self._state['x_crop'] = im_x_crop
        bbox_pred_in_crop = np.rint(box[best_pscore_id]).astype(np.int)
        self._state['bbox_pred_in_crop'] = bbox_pred_in_crop
        self._state['current_state'] = (target_pos, target_sz)
        self._state['scale_x'] = scale_x

        # record optional mid-level info
        if update_state:
            self._state['score'] = score
            self._state['pscore'] = pscore
            self._state['all_box'] = box
            self._state['cls'] = cls
            self._state['ctr'] = ctr

        return new_target_pos, new_target_sz, corr_feature
Пример #13
0
    def feature(self, im: np.array, target_pos, target_sz, avg_chans=None):
        """Extract feature

        Parameters
        ----------
        im : np.array
            initial frame
        target_pos : 
            target position (x, y)
        target_sz : [type]
            target size (w, h)
        avg_chans : [type], optional
            channel mean values, (B, G, R), by default None
        
        Returns
        -------
        [type]
            [description]
        """
        if avg_chans is None:
            avg_chans = np.mean(im, axis=(0, 1))

        z_size = self._hyper_params['z_size']
        x_size = self._hyper_params['x_size']
        context_amount = self._hyper_params['context_amount']

        im_z_crop, _, _ = get_crop(
            im,
            target_pos,
            target_sz,
            z_size,
            avg_chans=avg_chans,
            context_amount=context_amount,
            func_get_subwindow=get_subwindow_tracking,
            params=self._hyper_params
        )
        im_x_crop, scale_x, label = get_crop(
            im,
            target_pos,
            target_sz,
            z_size,
            x_size=x_size,
            avg_chans=avg_chans,
            context_amount=context_amount,
            func_get_subwindow=get_subwindow_tracking,
            params=self._hyper_params
        )

        label, ctr_res_final, gt_boxes_res_final = label

        phase = self._hyper_params['phase_init']

        loop_num = 2
        im_z = imarray_to_tensor(im_z_crop).to(self.device)
        for i in range(loop_num):
            im_z.requires_grad = True
            score, ctr, reg = self._model.update(im_z, imarray_to_tensor(im_x_crop).to(self.device))
            if i == 0:
                feat_len = score.shape[1]
                label_raw = label.reshape(1, feat_len, 1)
                label = torch.from_numpy(label_raw).to(self.device)
                ctr_res_final = ctr_res_final.reshape(1, feat_len, 1)
                ctr_res_final = torch.from_numpy(ctr_res_final).to(self.device)
                gt_boxes_res_final = torch.from_numpy(gt_boxes_res_final).to(self.device)
                gt_boxes_res_final = gt_boxes_res_final.unsqueeze(0)
            loss1, _ = self.loss1(score, label)
            loss2, _ = self.loss2(ctr, ctr_res_final)
            loss3, _ = self.loss3(reg, gt_boxes_res_final, label)
            loss = loss1 + loss2 + loss3
            print(loss.item())
            self._model.zero_grad()

            # Calculate gradients of model in backward pass
            loss.backward()

            # Collect datagrad
            data_grad = im_z.grad.data

            # Call FGSM Attack
            perturbed_data = fgsm_attack(im_z, 0.05, data_grad)
            im_z = perturbed_data.data

        '''重新初始化'''
        with torch.no_grad():
            features = self._model(im_z, phase=phase)

        return features, im_z_crop, avg_chans