Exemplo n.º 1
0
    def __call__(self, loc, score, anchor, img_size, scale=1.):
        if self.mode == "training":
            n_pre_nms = self.n_train_pre_nms
            n_post_nms = self.n_train_post_nms
        else:
            n_pre_nms = self.n_test_pre_nms
            n_post_nms = self.n_test_post_nms
        # 将RPN网络预测结果转化成建议框
        roi = loc2bbox(anchor, loc)

        # 利用slice进行分割,防止建议框超出图像边缘
        roi[:, slice(0, 4, 2)] = np.clip(roi[:, slice(0, 4, 2)], 0,
                                         img_size[1])
        roi[:, slice(1, 4, 2)] = np.clip(roi[:, slice(1, 4, 2)], 0,
                                         img_size[0])

        # 宽高的最小值不可以小于16
        min_size = self.min_size * scale
        # 计算高宽
        ws = roi[:, 2] - roi[:, 0]
        hs = roi[:, 3] - roi[:, 1]
        # 防止建议框过小
        keep = np.where((hs >= min_size) & (ws >= min_size))[0]
        roi = roi[keep, :]
        score = score[keep]
        # 取出成绩最好的一些建议框
        order = score.ravel().argsort()[::-1]
        if n_pre_nms > 0:
            order = order[:n_pre_nms]
        roi = roi[order, :]
        roi = nms(roi, self.nms_thresh)
        roi = torch.Tensor(roi)
        roi = roi[:n_post_nms]
        return roi
Exemplo n.º 2
0
    def predict(self, imgs, scale=1, vis=False):
        if vis:
            self._set_threshold('vis')
        else:
            self._set_threshold('eval')

        bboxes = []
        labels = []
        scores = []
        self.eval()
        with torch.no_grad():
            for img in imgs:
                img_size = img.size()[1:]
                roi_loc, roi_score, rois, _ = self.forward(
                    img.unsqueeze(0), scale)
                mean, std = self.loc_normalize_mean, self.loc_normalize_std
                roi_loc = roi_loc * std + mean
                roi_bbox = loc2bbox(rois, roi_loc)
                roi_bbox[:, 0::2].clamp_(0, img_size[0])
                roi_bbox[:, 1::3].clamp_(0, img_size[1])
                score, label = F.softmax(roi_score, dim=1).max(1)

                keep_index = score > self.score_threshold
                bbox, score, label = roi_bbox[keep_index], score[
                    keep_index], label[keep_index]
                keep_index = nms(bbox, score, self.nms_threshold)
                bbox, score, label = roi_bbox[keep_index], score[
                    keep_index], label[keep_index]
                bboxes.append(bbox)
                scores.append(score)
                labels.append(label)

        self.train()
        return bboxes, labels, scores
    def predict(self, img, scale, filp=[False, False]):
        self.extractor.eval()
        self.rpn.eval()
        self.roi_head.eval()

        n = img.shape[0]
        if n != 1:
            raise ValueError('Currently only batch size 1 is supported.')

        _, _, H, W = img.shape
        img_size = (H, W)

        # ------------------ 预测 -------------------#
        with torch.no_grad():
            scale = utils.totensor(scale)
            img = utils.totensor(img)
            features = self.extractor(img)
            rpn_loc, rpn_score, roi, _ = self.rpn(features,
                                                  img_size,
                                                  scale,
                                                  training=False)
            roi_cls_loc, roi_cls_score = self.roi_head(features, roi)

            n_roi = roi.shape[0]
            roi_cls_loc = roi_cls_loc.view(n_roi, self.n_class, 4)
            roi = utils.totensor(
                dataset_utils.bbox_inverse(roi, (H, W), filp, scale))
            # mean = utils.totensor(self.loc_normalize_mean)
            # std = utils.totensor(self.loc_normalize_std)
            # roi_cls_loc = (roi_cls_loc * std + mean)

            prob = F.softmax(roi_cls_score, dim=1)  # shape:(n_roi,21)
            label = torch.max(prob, dim=1)[1]  # shape:(n_roi,)
            index = utils.totensor(np.arange(0, n_roi)).long()
            roi_cls_loc = roi_cls_loc[index, label, :]
            label = utils.tonumpy(label).astype(int)

            cls_bbox = utils.loc2bbox(utils.tonumpy(roi),
                                      utils.tonumpy(roi_cls_loc))
            # clip bounding box
            cls_bbox[:, 0::2] = np.clip(cls_bbox[:, 0::2], 0, H)
            cls_bbox[:, 1::2] = np.clip(cls_bbox[:, 1::2], 0, W)

            # ignore background
            background_mask = np.where(label != 0)[0]
            cls_bbox = cls_bbox[background_mask]
            label = label[background_mask]

            # prob = F.softmax(roi_cls_score,dim=1)   # shape:(n_roi,21)
            # label = torch.max(prob,dim=1)[1]        # shape:(n_roi,)
            # mask_label = np.where(label.cpu().numpy()!=0)[0]
            # # print(label.cpu().numpy())
            # bbox = torch.gather(cls_bbox, 1, label.view(-1, 1).unsqueeze(2).repeat(1, 1, 4)).squeeze(1)

            # # delete background
            # label = label.cpu().numpy()[mask_label]
            # bbox = bbox.cpu().numpy()[mask_label]
            # print(cls_bbox.shape,label.shape)

        return cls_bbox, label
Exemplo n.º 4
0
    def __call__(self, locs, scores, anchors, img_size, training):
        """
		Args:
			locs: (R, 4)
			scores: (R, 2)
			anchors: (R, 4)
			img_size: tuple (H, W).
		"""
        n_in, n_out = (self.n_in_train,
                       self.n_out_train) if training else (self.n_in_test,
                                                           self.n_out_test)

        rois = loc2bbox(anchors, locs)
        rois[:, ::2].clamp_(0, img_size[0])
        rois[:, 1::2].clamp_(0, img_size[1])

        min_size = self.min_size
        roi_h = rois[:, 2] - rois[:, 0]
        roi_w = rois[:, 3] - rois[:, 1]
        keep_index = torch.where(roi_h >= min_size and roi_w >= min_size)
        rois = rois[keep_index]
        scores = scores[keep_index]

        keep_index = scores.argsort(descending=True)[:n_in]
        rois = rois[keep_index]
        scores = scores[keep_index]

        keep_index = nms(rois, scores, self.nms_threshold)
        rois = rois[keep_index]

        return rois[:n_out]
    def predict(self, img, scale):
        n = img.shape[0]
        if n != 1:
            raise ValueError('Currently only batch size 1 is supported.')

        _, _, H, W = img.shape
        img_size = (H, W)

        # ------------------ 预测 -------------------#
        with torch.no_grad():
            scale = utils.totensor(scale)
            img = utils.totensor(img)
            features = self.extractor(img)
            rpn_loc, rpn_score, roi, _ = self.rpn(features, img_size, scale)
            roi_cls_loc, roi_cls_score = self.roi_head(features, roi)

            n_roi = roi.shape[0]
            roi_cls_score = roi_cls_score.data
            roi_cls_loc = roi_cls_loc.data.view(n_roi, self.n_class, 4)
            roi = utils.totensor(roi) / scale
            mean = utils.totensor(self.loc_normalize_mean)
            std = utils.totensor(self.loc_normalize_std)
            # print(roi.size(),roi_cls_loc.size(),std.size(),mean.size())
            roi_cls_loc = (roi_cls_loc * std + mean)

            roi = roi.view(-1, 1, 4).expand_as(roi_cls_loc)
            # print(roi.shape,roi_cls_loc.shape)
            cls_bbox = utils.loc2bbox(
                utils.tonumpy(roi).reshape((-1, 4)),
                utils.tonumpy(roi_cls_loc).reshape((-1, 4)))
            cls_bbox = utils.totensor(cls_bbox)
            cls_bbox = cls_bbox.view(-1, self.n_class, 4)
            # clip bounding box
            cls_bbox[:, :, 0::2] = (cls_bbox[:, :, 0::2]).clamp(min=0, max=H)
            cls_bbox[:, :, 1::2] = (cls_bbox[:, :, 1::2]).clamp(min=0, max=W)

            prob = F.softmax(utils.totensor(roi_cls_score),
                             dim=1)  # shape:(n_roi,21)
            # print(prob)
            label = torch.max(prob, dim=1)[1].data  # shape:(n_roi,)
            # background mask
            mask_label = np.where(label.cpu().numpy() != 0)[0]
            # print(label.cpu().numpy())
            bbox = torch.gather(cls_bbox, 1,
                                label.view(-1, 1).unsqueeze(2).repeat(
                                    1, 1, 4)).squeeze(1)

            # delete background
            label = label.cpu().numpy()[mask_label]
            bbox = bbox.cpu().numpy()[mask_label]

        return bbox, label
Exemplo n.º 6
0
    def predict(self, img, scale=1., using_gpu=False):
        img_size = img.shape[2:]
        roi_locs, roi_scores, rois = self.predict_net(img, img_size, scale)
        if using_gpu:
            roi_locs, roi_scores = roi_locs.cpu(), roi_scores.cpu()

        n_class = 21
        score_thresh = 0.7
        nms_thresh = 0.3
        mean = torch.Tensor([0., 0., 0., 0.]).repeat(n_class).unsqueeze(0)
        std = torch.Tensor([0.1, 0.1, 0.2, 0.2]).repeat(n_class).unsqueeze(0)
        roi_locs = roi_locs * std + mean
        roi_locs = roi_locs.view(-1, n_class, 4)
        rois = torch.from_numpy(rois)
        rois = rois.view(-1, 1, 4).expand_as(roi_locs)

        roi_box = utils.loc2bbox(rois.numpy().reshape(-1, 4),
                                 roi_locs.detach().numpy().reshape(-1, 4))
        roi_box = torch.from_numpy(roi_box).view(-1, n_class * 4)
        roi_box[:, 0::2] = roi_box[:, 0::2].clamp(0, img_size[1])
        roi_box[:, 1::2] = roi_box[:, 1::2].clamp(0, img_size[0])

        prob = F.softmax(roi_scores, dim=1).detach().numpy()

        bbox = []
        label = []
        score = []
        for i in range(1, n_class):
            roi_box_i = roi_box.view(-1, n_class, 4)[:, i, :]
            roi_box_i = roi_box_i.detach().numpy()
            prob_i = prob[:, i]
            mask = prob_i > score_thresh
            roi_box_i = roi_box_i[mask]
            prob_i = prob_i[mask]

            order = prob_i.argsort()[::-1]
            roi_box_i = roi_box_i[order]
            keep = utils.nms(roi_box_i, nms_thresh)

            bbox.append(roi_box_i[keep])
            label.append((i - 1) * torch.ones((len(keep), )))
            score.append(prob_i[keep])
        bbox = np.concatenate(bbox, axis=0).astype(np.float32)
        label = np.concatenate(label, axis=0).astype(np.int32)
        score = np.concatenate(score, axis=0).astype(np.float32)

        bbox = bbox * 16

        return bbox, label, score
    def __call__(self, loc, score, anchor, img_size, scale=1., training=True):
        """input should  be ndarray
        Propose RoIs.

        Inputs :obj:`loc, score, anchor` refer to the same anchor when indexed
        by the same index.

        On notations, :math:`R` is the total number of anchors. This is equal
        to product of the height and the width of an image and the number of
        anchor bases per pixel.

        Type of the output is same as the inputs.

        Args:
            loc (array): Predicted offsets and scaling to anchors.
                Its shape is :math:`(R, 4)`.
            score (array): Predicted foreground probability for anchors.
                Its shape is :math:`(R,)`.
            anchor (array): Coordinates of anchors. Its shape is
                :math:`(R, 4)`.
            img_size (tuple of ints): A tuple :obj:`height, width`,
                which contains image size after scaling.
            scale (float): The scaling factor used to scale an image after
                reading it from a file.

        Returns:
            array:
            An array of coordinates of proposal boxes.
            Its shape is :math:`(S, 4)`. :math:`S` is less than
            :obj:`self.n_test_post_nms` in test time and less than
            :obj:`self.n_train_post_nms` in train time. :math:`S` depends on
            the size of the predicted bounding boxes and the number of
            bounding boxes discarded by NMS.

        """
        # NOTE: when test, remember faster_rcnn.eval() to set self.parent_model.training = False
        if training:
            n_post_nms = self.n_train_post_nms
        else:
            n_post_nms = self.n_test_post_nms

        # Convert anchors into proposal via bbox transformations.
        # 根据anchor_bbox和他的修正量loc生成修正后的bbox
        # roi:[n_anchor,4]
        roi = utils.loc2bbox(anchor, loc)

        # Clip predicted boxes to image.
        # 将roi的边界clip成原始图像边界
        roi[:, 0::2] = np.clip(roi[:, 0::2], 0, img_size[0])
        roi[:, 1::2] = np.clip(roi[:, 1::2], 0, img_size[1])

        # Remove predicted boxes with either height or width < threshold.
        # 删除长或宽小于minsize的roi
        min_size = self.min_size * scale
        hs = roi[:, 2] - roi[:, 0]
        ws = roi[:, 3] - roi[:, 1]
        keep = np.where((hs >= min_size) & (ws >= min_size))[0]
        roi = roi[keep, :]
        score = score[keep]

        # 非最大值抑制 合并anchor,选取最大的n_post_nms个作为最终的roi
        keep = utils.nms(np.ascontiguousarray(np.asarray(roi)),
                         scores=score,
                         threshold=self.nms_thresh)
        keep = keep[:n_post_nms]
        roi = roi[keep]
        return roi