Exemplo n.º 1
0
    def forward_for_single_feature_map(self, anchors, objectness,
                                       box_regression):
        """
        Arguments:
            anchors: list[BoxList]
            objectness: tensor of size N, A, H, W
            box_regression: tensor of size N, A * 4, H, W
        """
        device = objectness.device
        N, A, H, W = objectness.shape

        # put in the same format as anchors
        objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1)
        objectness = objectness.sigmoid()
        #object: (B, H*W*A)

        box_regression = permute_and_flatten(box_regression, N, A, 4, H, W)
        #box_regression: (B, H*W*A, 4)
        num_anchors = A * H * W

        pre_nms_top_n = min(self.pre_nms_top_n, num_anchors)
        objectness, topk_idx = objectness.topk(pre_nms_top_n,
                                               dim=1,
                                               sorted=True)
        # select the biggest pre_nms_top_n

        batch_idx = torch.arange(N, device=device)[:, None]
        box_regression = box_regression[batch_idx, topk_idx]
        # select the corresponding box_regression

        # anchors: (boxlist(H/4*W/4*len(aspect_ratios))*B)
        image_shapes = [box.size for box in anchors]
        concat_anchors = torch.cat([a.bbox for a in anchors], dim=0)
        # anchors: (B*(H/4*W/4*len(aspect_ratios), 4)
        concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx]
        # anchors: (B, topk_idx, 4)

        proposals = self.box_coder.decode(box_regression.view(-1, 4),
                                          concat_anchors.view(-1, 4))
        # proposals: (B*topk_idx, 4)

        proposals = proposals.view(N, -1, 4)
        # proposals: (B, topk_idx, 4)

        result = []
        for proposal, score, im_shape in zip(proposals, objectness,
                                             image_shapes):
            boxlist = BoxList(proposal, im_shape, mode="xyxy")
            boxlist.add_field("objectness", score)
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size)
            boxlist = boxlist_nms(
                boxlist,
                self.nms_thresh,
                max_proposals=self.post_nms_top_n,
                score_field="objectness",
            )
            result.append(boxlist)
        # result: [Boxlist(with objectness)*B]
        return result
Exemplo n.º 2
0
    def _process_single_level(self, locations, box_cls, box_regression,
                              centerness, image_sizes):
        N, C, H, W = box_cls.shape

        # put in the same format as locations
        box_cls = box_cls.view(N, C, H, W).permute(0, 2, 3, 1)
        box_cls = box_cls.reshape(N, -1, C).sigmoid()
        box_regression = box_regression.view(N, 4, H, W).permute(0, 2, 3, 1)
        box_regression = box_regression.reshape(N, -1, 4)
        centerness = centerness.view(N, 1, H, W).permute(0, 2, 3, 1)
        centerness = centerness.reshape(N, -1).sigmoid()

        candidate_inds = box_cls > self.pre_nms_thresh
        pre_nms_top_n = candidate_inds.view(N, -1).sum(1)
        pre_nms_top_n = pre_nms_top_n.clamp(max=self.pre_nms_top_n)

        # multiply the classification scores with centerness scores
        box_cls = box_cls * centerness[:, :, None]

        results = []
        for i in range(N):
            per_box_cls = box_cls[i]
            per_candidate_inds = candidate_inds[i]
            per_box_cls = per_box_cls[per_candidate_inds]

            per_candidate_nonzeros = per_candidate_inds.nonzero()
            per_box_loc = per_candidate_nonzeros[:, 0]
            per_class = per_candidate_nonzeros[:, 1] + 1

            per_box_regression = box_regression[i]
            per_box_regression = per_box_regression[per_box_loc]
            per_locations = locations[per_box_loc]

            per_pre_nms_top_n = pre_nms_top_n[i]

            if per_candidate_inds.sum().item() > per_pre_nms_top_n.item():
                per_box_cls, top_k_indices = \
                    per_box_cls.topk(per_pre_nms_top_n, sorted=False)
                per_class = per_class[top_k_indices]
                per_box_regression = per_box_regression[top_k_indices]
                per_locations = per_locations[top_k_indices]

            detections = torch.stack([
                per_locations[:, 0] - per_box_regression[:, 0],
                per_locations[:, 1] - per_box_regression[:, 1],
                per_locations[:, 0] + per_box_regression[:, 2],
                per_locations[:, 1] + per_box_regression[:, 3],
            ],
                                     dim=1)

            h, w = image_sizes[i]
            boxlist = BoxList(detections, (int(w), int(h)), mode="xyxy")
            boxlist.add_field("labels", per_class)
            boxlist.add_field("scores", per_box_cls)
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size)
            results.append(boxlist)

        return results
    def forward_for_single_feature_map(self, anchors, objectness,
                                       box_regression, box_orien):
        """
        Arguments:
            anchors: list[BoxList]
            objectness: tensor of size N, A, H, W
            box_regression: tensor of size N, A * 4, H, W
        """
        device = objectness.device
        N, A, H, W = objectness.shape

        # print(objectness.size(),box_regression.size(),box_orien.size(),'==============================')
        # put in the same format as anchors
        objectness = objectness.permute(0, 2, 3, 1).reshape(N, -1)
        objectness = objectness.sigmoid()
        box_orien = box_orien.view(N, -1, 2, H, W).permute(0, 3, 4, 1, 2)
        box_orien = box_orien.reshape(N, -1, 2)
        box_regression = box_regression.view(N, -1, 4, H,
                                             W).permute(0, 3, 4, 1, 2)
        box_regression = box_regression.reshape(N, -1, 4)

        num_anchors = A * H * W

        pre_nms_top_n = min(self.pre_nms_top_n, num_anchors)
        objectness, topk_idx = objectness.topk(pre_nms_top_n,
                                               dim=1,
                                               sorted=True)
        # print(objectness.size(), box_orien.size(),topk_idx.size() ,'==============================oo')
        batch_idx = torch.arange(N, device=device)[:, None]
        box_regression = box_regression[batch_idx, topk_idx]
        box_orien = box_orien[batch_idx, topk_idx]

        image_shapes = [box.size for box in anchors]
        concat_anchors = torch.cat([a.bbox for a in anchors], dim=0)
        concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx]

        proposals = self.box_coder.decode(box_regression.view(-1, 4),
                                          concat_anchors.view(-1, 4))

        proposals = proposals.view(N, -1, 4)

        result = []
        # print(proposals.size(), objectness.size(), box_orien.size(), '==============================oo')
        for proposal, score, im_shape, orien in zip(proposals, objectness,
                                                    image_shapes, box_orien):
            boxlist = BoxList(proposal, im_shape, mode="xyxy")
            boxlist.add_field("objectness", score)
            boxlist.add_field("rotations", orien)
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size, self.max_size)
            boxlist = boxlist_nms(
                boxlist,
                self.nms_thresh,
                max_proposals=self.post_nms_top_n,
                score_field="objectness",
            )
            result.append(boxlist)
        return result
Exemplo n.º 4
0
    def forward_for_single_feature_map(self, anchors, objectness,
                                       box_regression):
        """
        Arguments:
            anchors: list[BoxList]
            objectness: tensor of size N, A, H, W
            box_regression: tensor of size N, A * 4, H, W
        """
        device = objectness.device
        N, A, H, W = objectness.shape

        # put in the same format as anchors
        objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1)
        objectness = objectness.sigmoid()

        box_regression = permute_and_flatten(box_regression, N, A, 4, H, W)

        num_anchors = A * H * W

        pre_nms_top_n = min(self.pre_nms_top_n, num_anchors)
        objectness, topk_idx = objectness.topk(pre_nms_top_n,
                                               dim=1,
                                               sorted=True)

        batch_idx = torch.arange(N, device=device)[:, None]
        box_regression = box_regression[batch_idx, topk_idx]

        image_shapes = [box.size for box in anchors]
        concat_anchors = torch.cat([a.bbox for a in anchors], dim=0)
        concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx]

        #decode2cxywh = self.nms_func.input_mode == 'cxywh'
        decode2cxywh = False
        if decode2cxywh:
            proposals = self.box_coder.decode2cxywh(box_regression.view(-1, 4),
                                                    concat_anchors.view(-1, 4))
            mode = 'cxywh'
        else:
            proposals = self.box_coder.decode(box_regression.view(-1, 4),
                                              concat_anchors.view(-1, 4))
            mode = 'xyxy'

        proposals = proposals.view(N, -1, 4)

        result = []
        for proposal, score, im_shape in zip(proposals, objectness,
                                             image_shapes):
            boxlist = BoxList(proposal, im_shape, mode=mode)
            boxlist.add_field("objectness", score)
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size)
            boxlist = self.nms_func(boxlist)
            boxlist = boxlist.convert('xyxy')
            result.append(boxlist)
        return result
Exemplo n.º 5
0
    def forward_for_single_feature_map(self, anchors, objectness,
                                       box_regression):
        """
        Arguments:
            anchors: list[BoxList]
            objectness: tensor of size N, A, H, W
            box_regression: tensor of size N, A * 4, H, W
        """
        device = objectness.device
        # 得到N=图片数(batch),A=ratio数,H=该层特征图高,W=该层特征图宽
        N, A, H, W = objectness.shape

        # put in the same format as anchors
        # 在得到的目标特征图上扩充一维,该维度为特定特征图的某一个位置上anchor内是否有目标。然后取消掉除FPN层数以外的所有维度,合并到一个维度上,将图片数,高,宽等信息压缩为一维。
        objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1)
        objectness = objectness.sigmoid()

        box_regression = permute_and_flatten(box_regression, N, A, 4, H, W)

        num_anchors = A * H * W

        pre_nms_top_n = min(self.pre_nms_top_n, num_anchors)
        # Top K
        # 得到前pre_nms_top_n个目标评分最高的anchor的目标评分以及该anchor在anchor列表中的索引
        objectness, topk_idx = objectness.topk(pre_nms_top_n,
                                               dim=1,
                                               sorted=True)

        batch_idx = torch.arange(N, device=device)[:, None]
        box_regression = box_regression[batch_idx, topk_idx]

        image_shapes = [box.size for box in anchors]
        concat_anchors = torch.cat([a.bbox for a in anchors], dim=0)
        concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx]

        proposals = self.box_coder.decode(box_regression.view(-1, 4),
                                          concat_anchors.view(-1, 4))

        proposals = proposals.view(N, -1, 4)

        result = []
        for proposal, score, im_shape in zip(proposals, objectness,
                                             image_shapes):
            boxlist = BoxList(proposal, im_shape, mode="xyxy")
            boxlist.add_field("objectness", score)
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size)
            boxlist = boxlist_nms(
                boxlist,
                self.nms_thresh,
                max_proposals=self.post_nms_top_n,
                score_field="objectness",
            )
            result.append(boxlist)
        return result
Exemplo n.º 6
0
    def forward_for_single_feature_map(self, anchors, objectness,
                                       box_regression):
        """
        Arguments:
            anchors: list[BoxList], anchors size:(N, H*W*ratios,4) N为batch的个数, H W为当前特征层的size, 4为x1y1x2y2
            objectness: tensor of size N, A, H, W , 处理成 (N, H*W*ratios)
            box_regression: tensor of size N, A * 4, H, W , 处理成 (N, H*W*ratios,4)
        功能: 根据objectness概率的高低选出前pre_nms_top_n个anchor, 通过这些anchor和box_regression(学习映射函数dx dy dw dh)
        计算得到基于候选框的预测框xyxy,然后通过nms等条件进一步筛选得到最后的boxlist(把objectness分数存在extra_fields)
        """
        device = objectness.device
        N, A, H, W = objectness.shape

        # put in the same format as anchors
        objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1)
        objectness = objectness.sigmoid()  # 归一化到0-1. 取top前2000

        box_regression = permute_and_flatten(box_regression, N, A, 4, H, W)

        num_anchors = A * H * W
        #在每个特征图上每张图片选取的anchor数
        pre_nms_top_n = min(self.pre_nms_top_n, num_anchors)
        objectness, topk_idx = objectness.topk(pre_nms_top_n,
                                               dim=1,
                                               sorted=True)

        batch_idx = torch.arange(N, device=device)[:, None]
        box_regression = box_regression[batch_idx, topk_idx]

        image_shapes = [box.size for box in anchors]
        concat_anchors = torch.cat([a.bbox for a in anchors], dim=0)
        concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx]
        # 得到预测的候选框, 通过anchor和box_regression(学习映射函数dx dy dw dh)计算得到建议框
        proposals = self.box_coder.decode(box_regression.view(-1, 4),
                                          concat_anchors.view(-1, 4))

        proposals = proposals.view(N, -1, 4)

        result = []
        for proposal, score, im_shape in zip(proposals, objectness,
                                             image_shapes):
            boxlist = BoxList(proposal, im_shape, mode="xyxy")
            boxlist.add_field("objectness", score)
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(
                boxlist, self.min_size)  # 确保proposal的w & h > min_size
            boxlist = boxlist_nms(
                boxlist,
                self.nms_thresh,
                max_proposals=self.post_nms_top_n,
                score_field="objectness",
            )
            result.append(boxlist)
        return result  # 元素为N (batch)
Exemplo n.º 7
0
    def forward_for_single_feature_map(self, anchors, objectness, box_regression):
        """
        Arguments:
            anchors: list[BoxList], (assume list number = batchSize N)
            objectness: tensor of size N, A, H, W
            box_regression: tensor of size N, A * 4, H, W
        """
        device = objectness.device
        N, A, H, W = objectness.shape

        # modify tensor shape [N, A*1, H, W] => [N, H*W*A, 1] => [N, H*W*A]
        objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1)
        objectness = objectness.sigmoid()

        # modify tensor shape [N, A*41, H, W] => [N, H*W*A, 4]
        box_regression = permute_and_flatten(box_regression, N, A, 4, H, W)

        num_anchors = A * H * W
        pre_nms_top_n = min(self.pre_nms_top_n, num_anchors)
        objectness, topk_idx = objectness.topk(pre_nms_top_n, dim=1, sorted=True) # [N, top_k_elems(H*W*A)]

        batch_idx = torch.arange(N, device=device)[:, None]
        box_regression = box_regression[batch_idx, topk_idx] # [N, top_k_elems(H*W*A), 4]

        image_shapes = [box.size for box in anchors] # list(tuple)
        concat_anchors = torch.cat([a.bbox for a in anchors], dim=0) # list(3HW,4),list size=N => (N*AHW,4)
        concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx]

        # box offsets + orig boxes => proposals(N*3HW, 4)
        proposals = self.box_coder.decode(
            box_regression.view(-1, 4), concat_anchors.view(-1, 4)
        )

        proposals = proposals.view(N, -1, 4) # => (N,3HW,4)

        result = []
        # for each img if a batch(N), image_shapes => input image size
        for proposal, score, im_shape in zip(proposals, objectness, image_shapes):
            boxlist = BoxList(proposal, im_shape, mode="xyxy")
            boxlist.add_field("objectness", score)
            # clip proposals to image_shapes
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size)
            boxlist = boxlist_nms(
                boxlist,
                self.nms_thresh,
                max_proposals=self.post_nms_top_n,
                score_field="objectness",
            )
            result.append(boxlist)
        return result # N*BoxList
Exemplo n.º 8
0
    def forward_for_single_feature_map(self, box_cls, box_regression,
                                       centerness, anchors):
        N, _, H, W = box_cls.shape
        A = box_regression.size(1) // 4
        C = box_cls.size(1) // A

        # put in the same format as anchors
        box_cls = permute_and_flatten(box_cls, N, A, C, H, W)
        box_cls = box_cls.sigmoid()

        box_regression = permute_and_flatten(box_regression, N, A, 4, H, W)
        box_regression = box_regression.reshape(N, -1, 4)

        candidate_inds = box_cls > self.pre_nms_thresh
        pre_nms_top_n = candidate_inds.view(N, -1).sum(1)
        pre_nms_top_n = pre_nms_top_n.clamp(max=self.pre_nms_top_n)

        centerness = permute_and_flatten(centerness, N, A, 1, H, W)
        centerness = centerness.reshape(N, -1).sigmoid()

        # multiply the classification scores with centerness scores
        box_cls = box_cls * centerness[:, :, None]

        results = []
        for per_box_cls, per_box_regression, per_pre_nms_top_n, per_candidate_inds, per_anchors \
                in zip(box_cls, box_regression, pre_nms_top_n, candidate_inds, anchors):

            per_box_cls = per_box_cls[per_candidate_inds]

            per_box_cls, top_k_indices = per_box_cls.topk(per_pre_nms_top_n,
                                                          sorted=False)

            per_candidate_nonzeros = per_candidate_inds.nonzero()[
                top_k_indices, :]

            per_box_loc = per_candidate_nonzeros[:, 0]
            per_class = per_candidate_nonzeros[:, 1] + 1

            detections = self.box_coder.decode(
                per_box_regression[per_box_loc, :].view(-1, 4),
                per_anchors.bbox[per_box_loc, :].view(-1, 4))

            boxlist = BoxList(detections, per_anchors.size, mode="xyxy")
            boxlist.add_field("labels", per_class)
            boxlist.add_field("scores", torch.sqrt(per_box_cls))
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size)
            results.append(boxlist)

        return results
Exemplo n.º 9
0
    def forward_for_single_feature_map(self, anchors, objectness,
                                       box_regression):

        device = objectness.device
        N, A, H, W = objectness.shape

        # put in the same format as anchors
        objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1)
        objectness = objectness.sigmoid()

        box_regression = permute_and_flatten(box_regression, N, A, 4, H, W)

        num_anchors = A * H * W

        pre_nms_top_n = min(self.pre_nms_top_n, num_anchors)
        objectness, topk_idx = objectness.topk(pre_nms_top_n,
                                               dim=1,
                                               sorted=True)

        batch_idx = torch.arange(N, device=device)[:, None]
        box_regression = box_regression[batch_idx, topk_idx]

        image_shapes = [box.size for box in anchors]
        concat_anchors = torch.cat([a.bbox for a in anchors], dim=0)
        concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx]

        proposals = self.box_coder.decode(box_regression.view(-1, 4),
                                          concat_anchors.view(-1, 4))

        proposals = proposals.view(N, -1, 4)

        result = []
        for proposal, score, im_shape in zip(proposals, objectness,
                                             image_shapes):
            boxlist = BoxList(proposal, im_shape, mode="xyxy")
            boxlist.add_field("objectness", score)
            # operator Patch
            if not self._amodal:
                # default for non-amodal inference
                boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size)
            boxlist = boxlist_nms(
                boxlist,
                self.nms_thresh,
                max_proposals=self.post_nms_top_n,
                score_field="objectness",
            )
            result.append(boxlist)
        return result
Exemplo n.º 10
0
    def forward_for_single_feature_map(self, anchors, objectness,
                                       box_regression):
        """
        Arguments:
            anchors: list[BoxList] # [image,number,[n,4]]
            objectness: tensor of size N, A, H, W
            box_regression: tensor of size N, A * 4, H, W
        """
        device = objectness.device
        N, A, H, W = objectness.shape
        # put in the same format as anchors
        objectness = permute_and_flatten(objectness, N, A, 1, H,
                                         W).view(N, -1)  # N H*W*A*1
        objectness = objectness.sigmoid()
        box_regression = permute_and_flatten(box_regression, N, A, 18, H,
                                             W)  # N H*W*A 4
        num_anchors = A * H * W  # 391040 97760

        pre_nms_top_n = min(self.pre_nms_top_n, num_anchors)  #12000
        objectness, topk_idx = objectness.topk(pre_nms_top_n,
                                               dim=1,
                                               sorted=True)
        # objectness = objectness.cpu()
        batch_idx = torch.arange(N, device=device)[:, None]
        box_regression = box_regression[batch_idx, topk_idx]
        image_shapes = [box.size for box in anchors]
        concat_anchors = torch.cat([a.bbox for a in anchors], dim=0)
        concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx]
        proposals = self.box_coder.decode_iou(box_regression.view(-1, 18),
                                              concat_anchors.view(-1, 4))

        proposals = proposals.view(N, -1, 4)

        result = []
        for proposal, score, im_shape in zip(proposals, objectness,
                                             image_shapes):
            boxlist = BoxList(proposal, im_shape, mode="xyxy")
            boxlist.add_field("objectness", score)
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size)
            boxlist = boxlist_nms(
                boxlist,
                self.nms_thresh,
                max_proposals=self.post_nms_top_n,
                score_field="objectness",
            )
            result.append(boxlist)
        return result
Exemplo n.º 11
0
    def forward_for_single_feature_map(self, anchors, objectness, box_regression):
        """
        Arguments:
            anchors: list[BoxList]
            objectness: tensor of size N, A, H, W
            box_regression: tensor of size N, A * 4, H, W
        """
        device = objectness.device
        N, A, H, W = objectness.shape

        # put in the same format as anchors
        objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1)
        objectness = objectness.sigmoid()

        box_regression = permute_and_flatten(box_regression, N, A, 4, H, W)

        num_anchors = A * H * W

        pre_nms_top_n = min(self.pre_nms_top_n, num_anchors)
        objectness, topk_idx = objectness.topk(pre_nms_top_n, dim=1, sorted=True)

        batch_idx = torch.arange(N, device=device)[:, None]
        box_regression = box_regression[batch_idx, topk_idx]

        image_shapes = [box.size for box in anchors]
        concat_anchors = torch.cat([a.bbox for a in anchors], dim=0)
        concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx]

        proposals = self.box_coder.decode(
            box_regression.view(-1, 4), concat_anchors.view(-1, 4)
        )

        proposals = proposals.view(N, -1, 4)

        result = []
        for proposal, score, im_shape in zip(proposals, objectness, image_shapes):
            boxlist = BoxList(proposal, im_shape, mode="xyxy")
            boxlist.add_field("objectness", score)
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size) # MAY CAUSE RuntimeError if training is unstable: copy_if failed to synchronize: device-side assert triggered
            boxlist = boxlist_nms(
                boxlist,
                self.nms_thresh,
                max_proposals=self.post_nms_top_n,
                score_field="objectness",
            )
            result.append(boxlist)
        return result
Exemplo n.º 12
0
    def forward_for_single_feature_map_without(self, anchors, box_cls,
                                               box_regression, pre_nms_thresh):
        """
        Arguments:
            anchors: list[BoxList]
            box_cls: tensor of size N, A * C, H, W
            box_regression: tensor of size N, A * 4, H, W
        """
        N, _, H, W = box_cls.shape
        A = int(box_regression.size(1) / 4)
        C = int(box_cls.size(1) / A)

        # put in the same format as anchors
        box_cls = box_cls.view(N, -1, C, H, W).permute(0, 3, 4, 1, 2)
        box_cls = box_cls.reshape(N, -1, C)
        box_cls = box_cls.sigmoid()

        box_regression = box_regression.view(N, -1, 4, H, W)
        box_regression = box_regression.permute(0, 3, 4, 1, 2)
        box_regression = box_regression.reshape(N, -1, 4)

        results = [[] for _ in range(N)]
        candidate_inds = box_cls > pre_nms_thresh

        for batch_idx, (per_box_cls, per_box_regression, per_candidate_inds,
                        per_anchors) in enumerate(
                            zip(box_cls, box_regression, candidate_inds,
                                anchors)):
            # Sort and select TopN
            per_box_cls = per_box_cls[per_candidate_inds]
            per_candidate_nonzeros = per_candidate_inds.nonzero()
            per_box_loc = per_candidate_nonzeros[:, 0]
            per_class = per_candidate_nonzeros[:, 1]
            per_class += 1

            detections = self.box_coder.decode(
                per_box_regression[per_box_loc, :].view(-1, 4),
                per_anchors.bbox[per_box_loc, :].view(-1, 4))

            boxlist = BoxList(detections, per_anchors.size, mode="xyxy")
            boxlist.add_field("labels", per_class)
            boxlist.add_field("scores", per_box_cls)
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size)
            results[batch_idx] = boxlist

        return results
Exemplo n.º 13
0
    def forward_for_single_feature_map(self, anchors, objectness,
                                       box_regression):
        """
        Arguments:
            anchors: list[BoxList]
            objectness: tensor of size N, A, H, W
            box_regression: tensor of size N, A * 4, H, W
        """

        device = objectness.device
        N, A, H, W = objectness.shape

        objectness, topk_idx, box_regression = self.objectness_top_k(
            objectness, box_regression)

        batch_idx = torch.arange(N, device=device)[:, None]
        box_regression = box_regression[batch_idx, topk_idx]

        image_shapes = [box.size for box in anchors]
        concat_anchors = torch.cat([a.bbox for a in anchors], dim=0)
        concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx]
        proposals = self.box_coder.decode(box_regression.view(-1, 4),
                                          concat_anchors.view(-1, 4))

        proposals = proposals.view(N, -1, 4)

        result = []
        for proposal, score, im_shape in zip(proposals, objectness,
                                             image_shapes):
            boxlist = BoxList(proposal, im_shape, mode="xyxy")
            boxlist.add_field("objectness", score)
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size)
            boxlist = boxlist_nms(
                boxlist,
                self.nms_thresh,
                max_proposals=self.post_nms_top_n,
                score_field="objectness",
            )
            result.append(boxlist)
        return result
Exemplo n.º 14
0
    def forward_for_single_feature_map(self, locations, box_cls,
                                       box_regression, centerness,
                                       image_sizes):
        """
        Arguments:
            anchors: list[BoxList]
            box_cls: tensor of size N, A * C, H, W
            box_regression: tensor of size N, A * 4, H, W
        """
        N, C, H, W = box_cls.shape

        # put in the same format as locations
        box_cls = box_cls.view(N, C, H, W).permute(0, 2, 3, 1)
        box_cls = box_cls.reshape(N, -1, C).sigmoid()
        box_regression = box_regression.view(N, 4, H, W).permute(0, 2, 3, 1)
        box_regression = box_regression.reshape(N, -1, 4)

        candidate_inds = box_cls > self.pre_nms_thresh
        pre_nms_top_n = candidate_inds.view(N, -1).sum(1)
        pre_nms_top_n = pre_nms_top_n.clamp(max=self.pre_nms_top_n)

        # multiply the classification scores with centerness scores
        if centerness is not None:
            centerness = centerness.view(N, 1, H, W).permute(0, 2, 3, 1)
            centerness = centerness.reshape(N, -1).sigmoid()
            box_cls = box_cls * centerness[:, :, None]

        if self.debug_vis_label:
            # box_prob_set.extend([box_cls, centerness, centerness[:,:,None]*box_prob_set[-1]])
            show_box_cls([box_cls, box_cls**2], N, H, W, C,
                         self.pre_nms_thresh)

        # K = 1
        # box_cls = box_cls.reshape(-1, C)
        # top, idim = torch.topk(box_cls, K, dim=-1)
        # box_cls[:] = 0
        # i0 = torch.zeros(idim.size()).long() + torch.arange(0, idim.size(0))[:, None]
        # box_cls[i0, idim] = top
        # box_cls = box_cls.reshape(N, -1, C)

        results = []
        for i in range(N):
            per_box_cls = box_cls[i]
            per_candidate_inds = candidate_inds[i]
            per_box_cls = per_box_cls[per_candidate_inds]

            per_candidate_nonzeros = per_candidate_inds.nonzero()
            per_box_loc = per_candidate_nonzeros[:, 0]
            per_class = per_candidate_nonzeros[:, 1] + 1

            per_box_regression = box_regression[i]
            per_box_regression = per_box_regression[per_box_loc]
            per_locations = locations[per_box_loc]

            per_pre_nms_top_n = pre_nms_top_n[i]

            if per_candidate_inds.sum().item() > per_pre_nms_top_n.item():
                per_box_cls, top_k_indices = \
                    per_box_cls.topk(per_pre_nms_top_n, sorted=False)
                per_class = per_class[top_k_indices]
                per_box_regression = per_box_regression[top_k_indices]
                per_locations = per_locations[top_k_indices]

            detections = torch.stack([
                per_locations[:, 0] - per_box_regression[:, 0],
                per_locations[:, 1] - per_box_regression[:, 1],
                per_locations[:, 0] + per_box_regression[:, 2],
                per_locations[:, 1] + per_box_regression[:, 3],
            ],
                                     dim=1)

            h, w = image_sizes[i]
            boxlist = BoxList(detections, (int(w), int(h)), mode="xyxy")
            boxlist.add_field("labels", per_class)
            boxlist.add_field("scores", per_box_cls)
            if self.debug_vis_label:
                boxlist.add_field("det_locations", per_locations)  # add by hui
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size)
            results.append(boxlist)

        return results
    def forward_for_single_feature_map(self, anchors, objectness,
                                       box_regression):
        """
        Arguments:
            anchors: list[BoxList]
            objectness: tensor of size N, A, H, W
            box_regression: tensor of size N, A * 4, H, W
        """
        device = objectness.device
        N, A, H, W = objectness.shape

        num_anchors = A * H * W

        # If inputs are on GPU, use a faster path
        use_fast_cuda_path = (objectness.is_cuda and box_regression.is_cuda)
        # Encompasses box decode, clip_to_image and remove_small_boxes calls
        if use_fast_cuda_path:
            objectness = objectness.reshape(N, -1)  # Now [N, AHW]
            objectness = objectness.sigmoid()

            pre_nms_top_n = min(self.pre_nms_top_n, num_anchors)
            objectness, topk_idx = objectness.topk(pre_nms_top_n,
                                                   dim=1,
                                                   sorted=True)

            # Get all image shapes, and cat them together
            image_shapes = [box.size for box in anchors]
            image_shapes_cat = torch.tensor([box.size for box in anchors],
                                            device=objectness.device).float()

            # Get a single tensor for all anchors
            concat_anchors = torch.cat([a.bbox for a in anchors], dim=0)

            # Note: Take all anchors, we'll index accordingly inside the kernel
            # only take the anchors corresponding to the topk boxes
            concat_anchors = concat_anchors.reshape(N, -1,
                                                    4)  # [batch_idx, topk_idx]

            # Return pre-nms boxes, associated scores and keep flag
            # Encompasses:
            # 1. Box decode
            # 2. Box clipping
            # 3. Box filtering
            # At the end we need to keep only the proposals & scores flagged
            # Note: topk_idx, objectness are sorted => proposals, objectness, keep are also
            # sorted -- this is important later
            proposals, objectness, keep = C.GeneratePreNMSUprightBoxes(
                N,
                A,
                H,
                W,
                topk_idx,
                objectness.float(
                ),  # Need to cast these as kernel doesn't support fp16
                box_regression.float(),
                concat_anchors,
                image_shapes_cat,
                pre_nms_top_n,
                self.min_size,
                self.box_coder.bbox_xform_clip,
                True)

            # view as [N, pre_nms_top_n, 4]
            proposals = proposals.view(N, -1, 4)
            objectness = objectness.view(N, -1)
        else:
            # reverse the reshape from before ready for permutation
            objectness = objectness.reshape(N, A, H, W)
            objectness = objectness.permute(0, 2, 3, 1).reshape(N, -1)
            objectness = objectness.sigmoid()

            pre_nms_top_n = min(self.pre_nms_top_n, num_anchors)
            objectness, topk_idx = objectness.topk(pre_nms_top_n,
                                                   dim=1,
                                                   sorted=True)

            # put in the same format as anchors
            box_regression = box_regression.view(N, -1, 4, H,
                                                 W).permute(0, 3, 4, 1, 2)
            box_regression = box_regression.reshape(N, -1, 4)

            batch_idx = torch.arange(N, device=device)[:, None]
            box_regression = box_regression[batch_idx, topk_idx]

            image_shapes = [box.size for box in anchors]
            concat_anchors = torch.cat([a.bbox for a in anchors], dim=0)
            concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx,
                                                              topk_idx]

            proposals = self.box_coder.decode(box_regression.view(-1, 4),
                                              concat_anchors.view(-1, 4))

            proposals = proposals.view(N, -1, 4)

        # handle non-fast path without changing the loop
        if not use_fast_cuda_path:
            keep = [None for _ in range(N)]

        result = []
        for proposal, score, im_shape, k in zip(proposals, objectness,
                                                image_shapes, keep):
            if use_fast_cuda_path:
                # Note: Want k to be applied per-image instead of all-at-once in batched code earlier
                #       clip_to_image and remove_small_boxes already done in single kernel
                p = proposal.masked_select(k[:, None]).view(-1, 4)
                score = score.masked_select(k)
                boxlist = BoxList(p, im_shape, mode="xyxy")
            else:
                boxlist = BoxList(proposal, im_shape, mode="xyxy")
                boxlist = boxlist.clip_to_image(remove_empty=False)
                boxlist = remove_small_boxes(boxlist, self.min_size)
            boxlist.add_field("objectness", score)
            boxlist = boxlist_nms(
                boxlist,
                self.nms_thresh,
                max_proposals=self.post_nms_top_n,
                score_field="objectness",
            )
            result.append(boxlist)
        return result
Exemplo n.º 16
0
    def forward_for_single_feature_map(self, anchors, objectness, box_regression):
        """
        Arguments:
            anchors: list[BoxList], [image1-si-boxlist, image2-si-boxlist, ...]
            objectness: tensor of size N, A, H, W
            box_regression: tensor of size N, A * 4, H, W

        返回值是一个 list, len(result)=batch_size, 每个元素都是一个 BoxList 对象
        """
        device = objectness.device
        N, A, H, W = objectness.shape

        # objectness的shape是[N,A,H,W], 现在要把每个A*H*W的特征图拉成一个向量, 如果直接进行
        # reshape操作, 展开的顺序是从A那一维开始的, 所以先交换维度再reshape, 先把H*W的特征图
        # 拉成一个向量, 再把所有特征图拼接起来
        objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1)

        # rpn 中要进行的是不关心类别的二分类任务(object/bg)
        # [N, H*W*A]
        objectness = objectness.sigmoid()

        # [N, H*W*A, 4]
        box_regression = permute_and_flatten(box_regression, N, A, 4, H, W)

        num_anchors = A * H * W

        # 根据置信度选出前 k 个 anchors, k = pre_nms_top_n
        pre_nms_top_n = min(self.pre_nms_top_n, num_anchors)
        objectness, topk_idx = objectness.topk(pre_nms_top_n, dim=1, sorted=True)

        # box_regression 中同样保留 topk 的anchors
        batch_idx = torch.arange(N, device=device)[:, None]
        box_regression = box_regression[batch_idx, topk_idx]

        image_shapes = [box.size for box in anchors]

        # boxList.bbox 返回对象中的 tensor, 将 batch 中所有图片的 anchors 拼接起来
        # boxList.bbox 是个二维的 tensor, 参考 anchor_generator.grid_anchors
        concat_anchors = torch.cat([a.bbox for a in anchors], dim=0)
        # reshape 之后: [N, H*W*A, 4], 然后选出 topk
        concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx]

        proposals = self.box_coder.decode(
            box_regression.view(-1, 4), concat_anchors.view(-1, 4)
        )

        proposals = proposals.view(N, -1, 4)

        result = []
        # 分别处理 batch 中的每一张图片
        for proposal, score, im_shape in zip(proposals, objectness, image_shapes):
            boxlist = BoxList(proposal, im_shape, mode="xyxy")
            boxlist.add_field("objectness", score)

            # 将超出图片边界的 anchors 进行裁剪
            boxlist = boxlist.clip_to_image(remove_empty=False)
            # 将宽度或高度小于 min_size 的 anchors 移除
            boxlist = remove_small_boxes(boxlist, self.min_size)
            # nms
            boxlist = boxlist_nms(
                boxlist,
                self.nms_thresh,
                max_proposals=self.post_nms_top_n,
                score_field="objectness",
            )
            result.append(boxlist)
        return result
Exemplo n.º 17
0
    def forward_for_single_feature_map(self, anchors, objectness,
                                       box_regression):
        """
        apply the RPN result on anchors generate from single feature level
        from ont batch(has multiple images)
        Arguments:
            anchors: list[BoxList]
            objectness: tensor of size N, A, H, W
            box_regression: tensor of size N, A * 4, H, W
        """

        device = objectness.device
        N, A, H, W = objectness.shape

        # put in the same format as anchors
        objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1)
        objectness = objectness.sigmoid()

        box_regression = permute_and_flatten(box_regression, N, A, 4, H, W)

        num_anchors = A * H * W

        # decrease the proposal anchor number before the nms
        pre_nms_top_n = min(self.pre_nms_top_n, num_anchors)
        # filter the proposal bboxes by objectness score,
        # only left the hign objectness proposals for following operation
        objectness, topk_idx = objectness.topk(pre_nms_top_n,
                                               dim=1,
                                               sorted=True)

        batch_idx = torch.arange(N, device=device)[:, None]
        # take out the high objectness bbox regression result
        box_regression = box_regression[batch_idx, topk_idx]

        # preprocess the anchors for easy to process
        image_shapes = [box.size for box in anchors]
        concat_anchors = torch.cat([a.bbox for a in anchors], dim=0)
        concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx]

        # apply the regression on the anchor boxes
        proposals = self.box_coder.decode(box_regression.view(-1, 4),
                                          concat_anchors.view(-1, 4))
        proposals = proposals.view(N, -1, 4)

        result = []
        # collect the processed anchor boxes in to BoxList form
        # and apply the nms to generate the final proposals
        for proposal, score, im_shape in zip(proposals, objectness,
                                             image_shapes):
            boxlist = BoxList(proposal, im_shape, mode="xyxy")
            boxlist.add_field("objectness", score)
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size)
            boxlist = boxlist_nms(
                boxlist,
                self.nms_thresh,
                max_proposals=self.post_nms_top_n,
                score_field="objectness",
            )
            result.append(boxlist)
        return result
Exemplo n.º 18
0
    def forward_for_single_feature_map(self, anchors, objectness,
                                       box_regression):
        """
        Arguments:
            anchors: list[BoxList]
            objectness: tensor of size N, A, H, W
            box_regression: tensor of size N, A * 4, H, W
        """
        device = objectness.device
        N, A, H, W = objectness.shape

        # put in the same format as anchors
        objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1)
        objectness = objectness.sigmoid()

        box_regression = permute_and_flatten(box_regression, N, A, 4, H, W)

        num_anchors = A * H * W

        if self.onnx_export:
            from torch.onnx import operators
            num_anchors = operators.shape_as_tensor(objectness)[1].unsqueeze(0)

            pre_nms_top_n = torch.min(
                torch.cat((torch.tensor([self.pre_nms_top_n],
                                        dtype=torch.long), num_anchors), 0))
        else:
            pre_nms_top_n = min(self.pre_nms_top_n, num_anchors)
        objectness, topk_idx = objectness.topk(pre_nms_top_n,
                                               dim=1,
                                               sorted=True)

        batch_idx = torch.arange(N, device=device)[:, None]
        if self.onnx_export:
            # NOTE: for now only batch == 1 is supported for ONNX export.
            assert topk_idx.size(0) == 1
            topk_idx = topk_idx.squeeze(0)
            box_regression = box_regression.index_select(1, topk_idx)
        else:
            box_regression = box_regression[batch_idx, topk_idx]

        image_shapes = [box.size for box in anchors]
        concat_anchors = torch.cat([a.bbox for a in anchors], dim=0)
        if self.onnx_export:
            concat_anchors = concat_anchors.reshape(N, -1, 4).index_select(
                1, topk_idx)
        else:
            concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx,
                                                              topk_idx]

        proposals = self.box_coder.decode(box_regression.view(-1, 4),
                                          concat_anchors.view(-1, 4))

        proposals = proposals.view(N, -1, 4)

        result = []
        for proposal, score, im_shape in zip(proposals, objectness,
                                             image_shapes):
            boxlist = BoxList(proposal, im_shape, mode="xyxy")
            boxlist.add_field("objectness", score)
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size,
                                         self.onnx_export)
            boxlist = boxlist_nms(
                boxlist,
                self.nms_thresh,
                max_proposals=self.post_nms_top_n,
                score_field="objectness",
            )
            result.append(boxlist)
        return result
Exemplo n.º 19
0
    def forward_for_single_feature_map1(self, pre_anchors, box_cls, box_regression,
                                      pre_nms_thresh, stride):
        """
        retinanet-example
        Arguments:
            anchors: list[BoxList]
            box_cls: tensor of size N, A * C, H, W
            box_regression: tensor of size N, A * 4, H, W
        """
        anchors = torch.Tensor(self.cell_anchors[self.strides.index(stride)])
        top_n = self.pre_nms_top_n
        batch_size = box_cls.size()[0]
        device = box_cls.device
        box_cls = box_cls.sigmoid()

        out_scores = torch.zeros((batch_size, top_n), device=device)
        out_boxes = torch.zeros((batch_size, top_n, 4), device=device)
        out_classes = torch.zeros((batch_size, top_n), device=device).long()
        results = [[] for _ in range(batch_size)]
        if torch.cuda.is_available() and 0:
            out_scores, out_boxes, out_classes =  _nv_decode(box_cls.float(), box_regression.float(),
                stride, pre_nms_thresh, top_n, anchors.view(-1).tolist())
            out_classes = out_classes.long()
            out_classes = out_classes + 1
        else:
            anchors = anchors.to(device).type(box_cls.type())
            num_anchors = anchors.size()[0] if anchors is not None else 1
            num_classes = box_cls.size()[1] // num_anchors
            height, width = box_cls.size()[-2:]


            # Per item in batch
            for batch in range(batch_size):
                cls_head = box_cls[batch, :, :, :].contiguous().view(-1)
                box_head = box_regression[batch, :, :, :].contiguous().view(-1, 4)

                # Keep scores over threshold
                keep = (cls_head >= pre_nms_thresh).nonzero().view(-1)
                if keep.nelement() == 0:
                    empty_boxlists = []
                    for a in pre_anchors:
                        empty_boxlist = BoxList(torch.Tensor(0, 4).to(device), a.size)
                        empty_boxlist.add_field(
                            "labels", torch.LongTensor([]).to(device))
                        empty_boxlist.add_field(
                            "scores", torch.Tensor([]).to(device))
                        empty_boxlists.append(empty_boxlist)
                    return empty_boxlists

                # Gather top elements
                scores = torch.index_select(cls_head, 0, keep)
                scores, indices = torch.topk(scores, min(top_n, keep.size()[0]), dim=0)
                indices = torch.index_select(keep, 0, indices).view(-1)
                classes = (indices / width / height) % num_classes
                classes = classes.long()
                classes = classes + 1

                # Infer kept bboxes
                x = indices % width
                y = (indices / width) % height
                a = indices / num_classes / height / width
                box_head = box_head.view(num_anchors, 4, height, width)
                boxes = box_head[a, :, y, x]

                if anchors is not None:
                    grid = torch.stack([x, y, x, y], 1).type(box_cls.type()) * stride + anchors[a, :]
                    boxes = self.box_coder.decode(boxes, grid)

                out_scores[batch, :scores.size()[0]] = scores
                out_boxes[batch, :boxes.size()[0], :] = boxes
                out_classes[batch, :classes.size()[0]] = classes

        for batch in range(batch_size):
            boxlist = BoxList(out_boxes[batch], pre_anchors[batch].size, mode="xyxy")
            boxlist.add_field("labels", out_classes[batch])
            boxlist.add_field("scores", out_scores[batch])
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size)
            results[batch] = boxlist

        return results
Exemplo n.º 20
0
    def forward_for_single_feature_map(self, anchors, box_cls, box_regression, coeffs):
        """
        Arguments:
            anchors: list[BoxList] N, A * H * W
            box_cls: tensor of size N, A * C, H, W
            box_regression: tensor of size N, A * 4, H, W
            coeffs: tensor of size N, A * K, H, W
        """
        N, _, H, W = box_cls.shape
        A = box_regression.size(1) // 4
        C = box_cls.size(1) // A
        K = coeffs.size(1) // A

        # put in the same format as anchors (N, H*W*A, C)
        box_cls = permute_and_flatten(box_cls, N, A, C, H, W)
        box_cls = box_cls.sigmoid()
        # box regression is class-agnostic
        box_regression = permute_and_flatten(box_regression, N, A, 4, H, W)
        # Q: Seems redundant?
        # box_regression = box_regression.reshape(N, -1, 4)

        coeffs = permute_and_flatten(coeffs, N, A, K, H, W)

        candidate_inds = box_cls > self.pre_nms_thresh
        pre_nms_top_n = candidate_inds.view(N, -1).sum(1)
        pre_nms_top_n = pre_nms_top_n.clamp(max=self.pre_nms_top_n)

        results = []
        for per_box_cls, per_box_regression, per_coeffs, \
            per_pre_nms_top_n, per_candidate_inds, per_anchors in zip(
                box_cls, box_regression, coeffs, \
                    pre_nms_top_n, candidate_inds, anchors):

            if cfg.MODEL.YOLACT.USE_FAST_NMS:
                per_class = None
                detections = self.box_coder.decode(
                    per_box_regression,
                    per_anchors.bbox
                )
            else:
                # Sort and select TopN
                per_box_cls = per_box_cls[per_candidate_inds]

                per_box_cls, top_k_indices = \
                        per_box_cls.topk(per_pre_nms_top_n, sorted=False)

                per_candidate_nonzeros = \
                        per_candidate_inds.nonzero()[top_k_indices, :]

                per_box_loc = per_candidate_nonzeros[:, 0]
                per_class = per_candidate_nonzeros[:, 1]
                per_class += 1

                detections = self.box_coder.decode(
                    per_box_regression[per_box_loc, :].view(-1, 4),
                    per_anchors.bbox[per_box_loc, :].view(-1, 4)
                )

                per_coeffs = per_coeffs[per_box_loc, :].view(-1, K)

            image_size = per_anchors.size
            boxlist = BoxList(detections, image_size, mode="xyxy")
            if per_class is not None:
                boxlist.add_field("labels", per_class)
            boxlist.add_field("scores", per_box_cls)
            boxlist.add_field("coeffs", per_coeffs)
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size)
            results.append(boxlist)

        return results
Exemplo n.º 21
0
    def forward_for_single_feature_map(self, anchors, objectness,
                                       box_regression, i):
        """
        Arguments:
            anchors: list[BoxList]
            objectness: tensor of size N, A, H, W
            box_regression: tensor of size N, A * 4, H, W
        """
        device = objectness.device
        N, A, H, W = objectness.shape

        # put in the same format as anchors
        objectness = objectness.permute(0, 2, 3, 1).reshape(N, -1)
        objectness = objectness.sigmoid()
        box_regression = box_regression.view(N, -1, 4, H,
                                             W).permute(0, 3, 4, 1, 2)
        box_regression = box_regression.reshape(N, -1, 4)

        num_anchors = A * H * W

        pre_nms_top_n = min(self.pre_nms_top_n, num_anchors)
        # import pdb;pdb.set_trace()
        objectness, topk_idx = objectness.topk(pre_nms_top_n,
                                               dim=1,
                                               sorted=True)

        batch_idx = torch.arange(N, device=device)[:, None]
        box_regression = box_regression[batch_idx, topk_idx]
        # batch_ = batch_idx.expand([N ,pre_nms_top_n])

        image_shapes = [box.size for box in anchors]
        concat_anchors = torch.cat([a.bbox for a in anchors], dim=0)
        concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx]

        proposals = self.box_coder.decode(box_regression.view(-1, 4),
                                          concat_anchors.view(-1, 4))

        proposals = proposals.view(N, -1, 4)
        # import pdb;pdb.set_trace()
        result = []
        for j, (
                proposal,
                score,
                im_shape,
                topk_id,
        ) in enumerate(zip(
                proposals,
                objectness,
                image_shapes,
                topk_idx,
        )):
            boxlist = BoxList(proposal, im_shape, mode="xyxy")
            boxlist.add_field("objectness", score)
            if self.is_teacher:
                # boxlist.add_field("bid", batch_[j])
                boxlist.add_field("box_reg", box_regression[j])
                boxlist.add_field("rpn_topk", topk_id)
                boxlist.add_field(
                    "rpn_ancher_level",
                    torch.tensor([i] * topk_id.shape[0], device=device))
            boxlist = boxlist.clip_to_image(remove_empty=False)

            boxlist = remove_small_boxes(boxlist, self.min_size)
            boxlist = boxlist_nms(
                boxlist,
                self.nms_thresh,
                max_proposals=self.post_nms_top_n,
                score_field="objectness",
            )
            result.append(boxlist)
        return result
Exemplo n.º 22
0
    def forward_for_single_feature_map(self, anchors, objectness,
                                       box_regression, cls):
        """
        Arguments:
            anchors: list[BoxList]
            objectness: tensor of size N, A, H, W
            box_regression: tensor of size N, A * 4, H, W
        """
        device = objectness.device
        N, A, H, W = objectness.shape

        ###
        # show heat map
        ###
        # import matplotlib.pyplot as plt
        # import cv2
        # import numpy as np
        # img = cv2.imread("/home/w/workspace/onnx/maskrcnn-benchmark/demo/test_yolo.jpg")
        # img = cv2.resize(img, (416, 416))
        # img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        # temp = objectness[:, 0].cpu()[0].numpy() * 255
        # temp = temp.astype(np.uint8)
        # temp = cv2.resize(temp, (416, 416))
        # img = cv2.addWeighted(img, 0.5, temp, 0.5, 1)
        #
        # plt.imshow(img)
        # plt.show()

        ###
        # show heat map end
        ###

        N, AXC, H, W = cls.shape

        C = int(AXC / A)

        # put in the same format as anchors
        objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1)
        objectness = objectness.sigmoid()

        cls = permute_and_flatten(cls, N, A, C, H, W)

        box_regression = permute_and_flatten(box_regression, N, A, 4, H, W)

        num_anchors = A * H * W

        if self.onnx_export:
            from torch.onnx import operators
            num_anchors = operators.shape_as_tensor(objectness)[1].unsqueeze(0)

            pre_nms_top_n = torch.min(
                torch.cat((torch.tensor([self.pre_nms_top_n],
                                        dtype=torch.long), num_anchors), 0))
        else:
            pre_nms_top_n = min(self.pre_nms_top_n, num_anchors)
        objectness, topk_idx = objectness.topk(pre_nms_top_n,
                                               dim=1,
                                               sorted=True)

        batch_idx = torch.arange(N, device=device)[:, None]
        if self.onnx_export:
            # NOTE: for now only batch == 1 is supported for ONNX export.
            assert topk_idx.size(0) == 1
            topk_idx = topk_idx.squeeze(0)
            box_regression = box_regression.index_select(1, topk_idx)
        else:
            box_regression = box_regression[batch_idx, topk_idx]
            cls = cls[batch_idx, topk_idx]

        image_shapes = [box.size for box in anchors]
        concat_anchors = torch.cat([a.bbox for a in anchors], dim=0)
        if self.onnx_export:
            concat_anchors = concat_anchors.reshape(N, -1, 4).index_select(
                1, topk_idx)
        else:
            concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx,
                                                              topk_idx]

        proposals = self.box_coder.decode(box_regression.view(-1, 4),
                                          concat_anchors.view(-1, 4))

        proposals = proposals.view(N, -1, 4)
        cls = torch.argmax(cls, -1) + 1
        result = []
        for proposal, score, c, im_shape in zip(proposals, objectness, cls,
                                                image_shapes):
            boxlist = BoxList(proposal, im_shape, mode="xyxy")
            boxlist.add_field("scores", score)
            boxlist.add_field("labels", c)
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size,
                                         self.onnx_export)
            boxlist = boxlist_nms(
                boxlist,
                self.nms_thresh,
                max_proposals=self.post_nms_top_n,
                score_field="scores",
            )
            result.append(boxlist)
        return result
Exemplo n.º 23
0
    def forward_for_single_feature_map(self, anchors, objectness,
                                       box_regression):
        """
        Arguments:
            anchors: list[BoxList]
            objectness: tensor of size N, A, H, W
            box_regression: tensor of size N, A * 4, H, W
        """
        device = objectness.device
        N, A, H, W = objectness.shape

        if cfg.ROTATE and "RETINANET" in cfg.MODEL.BACKBONE.CONV_BODY:
            # put in the same format as anchors
            objectness = permute_and_flatten(objectness, N, A, 1, H,
                                             W).view(N, -1)
            objectness = objectness.sigmoid()

            box_regression = permute_and_flatten(box_regression, N, A, 5, H, W)

            num_anchors = A * H * W

            pre_nms_top_n = min(self.pre_nms_top_n, num_anchors)
            objectness, topk_idx = objectness.topk(pre_nms_top_n,
                                                   dim=1,
                                                   sorted=True)

            batch_idx = torch.arange(N, device=device)[:, None]
            box_regression = box_regression[batch_idx, topk_idx]

            image_shapes = [box.size for box in anchors]
            concat_anchors = torch.cat([a.bbox for a in anchors], dim=0)
            concat_anchors = concat_anchors.reshape(N, -1, 5)[batch_idx,
                                                              topk_idx]

            proposals = self.box_coder.decode(box_regression.view(-1, 5),
                                              concat_anchors.view(-1, 4))

            proposals = proposals.view(N, -1, 5)

            result = []
            for proposal, score, im_shape in zip(proposals, objectness,
                                                 image_shapes):
                if cfg.MODEL.RETINANET_DCN_ON:
                    xt, yt, xc, yc, r = proposal.split(1, dim=-1)
                    h = torch.sqrt((xt - xc)**2 + (yt - yc)**2)
                    w = h * torch.exp(r)
                    cost = torch.abs(xt - xc) / h
                    sint = torch.abs(yt - yc) / h
                    bbox_x1 = xt - w * sint
                    bbox_y1 = yt - w * cost
                    bbox_x2 = xt * 2 - bbox_x1
                    bbox_y2 = yt * 2 - bbox_y1
                    bbox_x3 = xc * 2 - bbox_x1
                    bbox_y3 = xc * 2 - bbox_y1
                    bbox_x4 = xc * 2 - bbox_x2
                    bbox_y4 = xc * 2 - bbox_y2
                    proposal = torch.cat((bbox_x1, bbox_y1, bbox_x2, bbox_y2,
                                          bbox_x3, bbox_y3, bbox_x4, bbox_y4),
                                         dim=0)
                else:
                    proposal = trans.convert8(proposal)
                boxlist = BoxList(proposal, im_shape, mode="xy8")
                boxlist.add_field("objectness", score)
                boxlist = boxlist.clip_to_image(remove_empty=False)
                # boxlist = remove_small_boxes(boxlist, self.min_size)
                boxlist = boxlist_rnms(
                    boxlist,
                    self.nms_thresh,
                    max_proposals=self.post_nms_top_n,
                    score_field="objectness",
                )
                result.append(boxlist)
            return result

        else:
            # put in the same format as anchors
            objectness = permute_and_flatten(objectness, N, A, 1, H,
                                             W).view(N, -1)
            objectness = objectness.sigmoid()

            box_regression = permute_and_flatten(box_regression, N, A, 4, H, W)

            num_anchors = A * H * W

            pre_nms_top_n = min(self.pre_nms_top_n, num_anchors)
            objectness, topk_idx = objectness.topk(pre_nms_top_n,
                                                   dim=1,
                                                   sorted=True)

            batch_idx = torch.arange(N, device=device)[:, None]
            box_regression = box_regression[batch_idx, topk_idx]

            image_shapes = [box.size for box in anchors]
            concat_anchors = torch.cat([a.bbox for a in anchors], dim=0)
            concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx,
                                                              topk_idx]

            proposals = self.box_coder.decode(box_regression.view(-1, 4),
                                              concat_anchors.view(-1, 4))

            proposals = proposals.view(N, -1, 4)

            result = []
            for proposal, score, im_shape in zip(proposals, objectness,
                                                 image_shapes):
                if cfg.ROTATE:
                    xmin, ymin, xmax, ymax = proposal.split(1, dim=-1)
                    proposal4 = torch.cat((xmin, ymin, xmax, ymax), dim=1)
                    proposal5 = torch.cat(
                        ((xmin + xmax) / 2.,
                         (ymin + ymax) / 2., xmax - xmin + 1, ymax - ymin + 1,
                         torch.ones_like(xmin) * (-3.14 / 2)
                         # torch.zeros_like(xmin)
                         ),
                        dim=1)
                    proposal = torch.cat(
                        (xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax),
                        dim=1)
                    boxlist = BoxList(proposal, im_shape, mode="xy854")
                    boxlist.add_field("xyxy", proposal4)
                    boxlist.add_field("xywht", proposal5)
                    boxlist.add_field("objectness", score)
                    boxlist = boxlist.clip_to_image(remove_empty=True)
                    # boxlist = remove_small_boxes(boxlist, self.min_size)
                    boxlist = boxlist_rnms(
                        boxlist,
                        self.nms_thresh,
                        max_proposals=self.post_nms_top_n,
                        score_field="objectness",
                    )
                    result.append(boxlist)
                else:
                    boxlist = BoxList(proposal, im_shape, mode="xyxy")
                    boxlist.add_field("objectness", score)
                    boxlist = boxlist.clip_to_image(remove_empty=False)
                    boxlist = remove_small_boxes(boxlist, self.min_size)
                    boxlist = boxlist_nms(
                        boxlist,
                        self.nms_thresh,
                        max_proposals=self.post_nms_top_n,
                        score_field="objectness",
                    )
                    result.append(boxlist)
            return result
Exemplo n.º 24
0
def prepare_for_coco_detection_mstest(predictions, dataset):

    # pdb.set_trace()

    predictions_s = predictions[0]
    predictions_m = predictions[1]
    predictions_l = predictions[2]

    dataset_s = dataset[0]
    dataset_m = dataset[1]
    dataset_l = dataset[2]

    coco_results = []
    # one image.
    for image_id, predictions in enumerate(
            zip(predictions_s, predictions_m, predictions_l)):

        prediction_s = predictions[0]
        prediction_m = predictions[1]
        prediction_l = predictions[2]

        original_id = dataset_l.id_to_img_map[image_id]

        if len(predictions_l) == 0:
            continue

        img_info = dataset_l.get_img_info(image_id)
        image_width = img_info["width"]
        image_height = img_info["height"]
        img_id_json = img_info['id']

        # rescale predict bbox to original images size.
        prediction_s = prediction_s.resize((image_width, image_height))
        prediction_m = prediction_m.resize((image_width, image_height))
        prediction_l = prediction_l.resize((image_width, image_height))

        # get single-scale results from type BoxList.
        bbox_s = prediction_s.bbox
        score_s = prediction_s.get_field('scores').unsqueeze(1)
        label_s = prediction_s.get_field('labels').unsqueeze(1)

        bbox_m = prediction_m.bbox
        score_m = prediction_m.get_field('scores').unsqueeze(1)
        label_m = prediction_m.get_field('labels').unsqueeze(1)

        bbox_l = prediction_l.bbox
        score_l = prediction_l.get_field('scores').unsqueeze(1)
        label_l = prediction_l.get_field('labels').unsqueeze(1)

        # concat single-scale result and convert to type BoxList. (small, medium, large)
        min_size = 0
        w = prediction_l.size[0]
        h = prediction_l.size[1]

        detections = torch.from_numpy(np.row_stack(
            (bbox_s, bbox_m, bbox_l))).cuda()
        per_class = torch.from_numpy(np.row_stack(
            (label_s, label_m, label_l))).cuda()
        per_class = torch.squeeze(per_class, dim=1)
        per_box_cls = torch.from_numpy(
            np.row_stack((score_s, score_m, score_l))).cuda()
        per_box_cls = torch.squeeze(per_box_cls, dim=1)

        boxlist = BoxList(detections, (int(w), int(h)), mode="xyxy")
        boxlist.add_field("labels", per_class)
        boxlist.add_field("scores", per_box_cls)
        boxlist = boxlist.clip_to_image(remove_empty=False)
        boxlist = remove_small_boxes(boxlist, min_size)

        # multi-scale results apply NMS. (small, medium, large)
        nms_method = cfg.TEST.MS_TEST_NMS
        nms_thresh = cfg.TEST.MS_TEST_NMS_THR

        num_classes = 81
        scores = boxlist.get_field("scores")
        labels = boxlist.get_field("labels")
        boxes = boxlist.bbox
        result = []

        # multi-scale test + NMS
        for j in range(1, num_classes):
            inds = (labels == j).nonzero().view(-1)
            scores_j = scores[inds]
            boxes_j = boxes[inds, :].view(-1, 4)
            boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy")
            boxlist_for_class.add_field("scores", scores_j)

            if nms_method == "nms":
                boxlist_for_class = boxlist_nms(boxlist_for_class,
                                                nms_thresh,
                                                score_field="scores")
            elif nms_method == "soft_nms":
                boxlist_for_class = boxlist_soft_nms(boxlist_for_class,
                                                     nms_thresh,
                                                     score_field="scores")
            else:
                print('the nms method is wrong')

            num_labels = len(boxlist_for_class)

            boxlist_for_class.add_field(
                "labels",
                torch.full((num_labels, ),
                           j,
                           dtype=torch.int64,
                           device=scores.device))

            result.append(boxlist_for_class)

        result = cat_boxlist(result)
        boxlist = result

        boxlist = boxlist.convert("xywh")
        boxes = boxlist.bbox.tolist()
        scores = boxlist.get_field("scores").tolist()
        labels = boxlist.get_field("labels").tolist()

        mapped_labels = [
            dataset_l.contiguous_category_id_to_json_id[int(i)] for i in labels
        ]

        coco_results.extend([{
            "image_id": original_id,
            "category_id": mapped_labels[k],
            "bbox": box,
            "score": scores[k],
        } for k, box in enumerate(boxes)])

    return coco_results
    def forward_for_single_feature_map(
            self, locations, box_cls,
            box_regression, bezier_regression, centerness, image_sizes, offsets=None):
        """
        Arguments:
            anchors: list[BoxList]
            box_cls: tensor of size N, A * C, H, W
            box_regression: tensor of size N, A * 4, H, W
        """
        N, C, H, W = box_cls.shape

        # put in the same format as locations
        box_cls = box_cls.view(N, C, H, W).permute(0, 2, 3, 1)
        box_cls = box_cls.reshape(N, -1, C).sigmoid()
        box_regression = box_regression.view(N, 4, H, W).permute(0, 2, 3, 1)
        box_regression = box_regression.reshape(N, -1, 4)
        bezier_regression = bezier_regression.view(N, 16, H, W).permute(0, 2, 3, 1)
        bezier_regression = bezier_regression.reshape(N, -1, 16)
        centerness = centerness.view(N, 1, H, W).permute(0, 2, 3, 1)
        centerness = centerness.reshape(N, -1).sigmoid()
        if offsets is not None:
            offsets = torch.cat((offsets, mask), dim=1)
            offsets = offsets.permute(0, 2, 3, 1).reshape(N, H * W, -1)

        candidate_inds = box_cls > self.pre_nms_thresh
        pre_nms_top_n = candidate_inds.view(N, -1).sum(1)
        pre_nms_top_n = pre_nms_top_n.clamp(max=self.pre_nms_top_n)

        # multiply the classification scores with centerness scores
        box_cls = box_cls * centerness[:, :, None]

        results = []
        for i in range(N):
            per_box_cls = box_cls[i]
            per_candidate_inds = candidate_inds[i]
            per_box_cls = per_box_cls[per_candidate_inds]

            per_candidate_nonzeros = per_candidate_inds.nonzero()
            per_box_loc = per_candidate_nonzeros[:, 0]
            per_class = per_candidate_nonzeros[:, 1] + 1

            per_box_regression = box_regression[i]
            per_box_regression = per_box_regression[per_box_loc]
            per_bezier_regression = bezier_regression[i]
            per_bezier_regression = per_bezier_regression[per_box_loc]
            per_locations = locations[per_box_loc]

            per_pre_nms_top_n = pre_nms_top_n[i]

            if offsets is not None:
                per_offsets = offsets[i]
                per_offsets = per_offsets[per_box_loc]

            if per_candidate_inds.sum().item() > per_pre_nms_top_n.item():
                per_box_cls, top_k_indices = \
                    per_box_cls.topk(per_pre_nms_top_n, sorted=False)
                per_class = per_class[top_k_indices]
                per_box_regression = per_box_regression[top_k_indices]
                per_bezier_regression = per_bezier_regression[top_k_indices]
                per_locations = per_locations[top_k_indices]
                if offsets is not None:
                    per_offsets = per_offsets[top_k_indices]

            detections = torch.stack([
                per_locations[:, 0] - per_box_regression[:, 0],
                per_locations[:, 1] - per_box_regression[:, 1],
                per_locations[:, 0] + per_box_regression[:, 2],
                per_locations[:, 1] + per_box_regression[:, 3],
            ], dim=1)

            bezier_detections = per_locations[:, [1, 0]].unsqueeze(1) + per_bezier_regression.view(-1, 8, 2) 
            bezier_detections = bezier_detections.view(-1, 16)

            h, w = image_sizes[i]
            boxlist = BoxList(detections, (int(w), int(h)), mode="xyxy")
            boxlist.add_field("labels", per_class)
            boxlist.add_field("scores", per_box_cls)
            boxlist.add_field("beziers", bezier_detections)
            if offsets is not None:
                boxlist.add_field("offsets", per_offsets[:, :max_len * 2])
                boxlist.add_field("rec_masks", per_offsets[:, max_len * 2:].sigmoid())
                boxlist.add_field("locations", per_locations)

            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size)
            results.append(boxlist)

        return results
Exemplo n.º 26
0
    def forward_for_single_feature_map(self, anchors, objectness, box_regression):
        """
        Arguments:
            anchors: list[BoxList]
            objectness: tensor of size N, A, H, W
            得到N=图片数(batch),A=ratio数,H=该层特征图高,W=该层特征图宽

            box_regression: tensor of size N, A * 4, H, W
        """
        device = objectness.device
        N, A, H, W = objectness.shape

        # put in the same format as anchors
        # 在得到的目标特征图上扩充一维,该维度为特定特征图的某一个位置上anchor内是否有目标
        # 然后取消掉除FPN层数以外的所有维度,合并到一个维度上,将图片数,高,宽等信息压缩为一维
        objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1)
        # 输出N张图,-1个待回归框,每个框需要1个得分值
        objectness = objectness.sigmoid()

        # 在得到的目标特征图上扩充一维,该维度为特定特征图的某一个位置上anchor的边框信息。
        box_regression = permute_and_flatten(box_regression, N, A, 4, H, W)
        # 输出N张图,-1个待回归框,每个框需要4个回归值

        num_anchors = A * H * W

        pre_nms_top_n = min(self.pre_nms_top_n, num_anchors)  # 得到在训练过程中设置的每张图片选取的anchor数(在每个特征图上)

        # 得到前pre_nms_top_n个目标评分最高的anchor的目标评分以及该anchor在anchor列表中的索引
        objectness, topk_idx = objectness.topk(pre_nms_top_n, dim=1, sorted=True)

        # 初始化图片个数的索引
        batch_idx = torch.arange(N, device=device)[:, None]

        # 得到前pre_nms_top_n个目标评分最高的anchor的边框回归信息!!!
        box_regression = box_regression[batch_idx, topk_idx]

        # 获取图片尺寸信息
        image_shapes = [box.size for box in anchors]
        concat_anchors = torch.cat([a.bbox for a in anchors], dim=0)

        # 得到pre_nms_top_n个目标评分最高的anchor信息!!!
        concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx]

        # 利用anchor坐标和回归信息,得到proposal边框
        proposals = self.box_coder.decode(
            box_regression.view(-1, 4), concat_anchors.view(-1, 4)  # rpn输出的是'xyxy'格式的
        )  # 用实际xyxy坐标和回归值就能得到新的检测框

        proposals = proposals.view(N, -1, 4)

        result = []
        for proposal, score, im_shape in zip(proposals, objectness, image_shapes):
            # 将预测边框保存到BoxList,
            # 为每一个FPN层的每一张图的所有候选框建立一个BoxList
            boxlist = BoxList(proposal, im_shape, mode="xyxy")
            boxlist.add_field("objectness", score)  # 将每个anchor的目标评分保存到BoxList
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size)
            boxlist = boxlist_nms(
                boxlist,
                self.nms_thresh,
                max_proposals=self.post_nms_top_n,
                score_field="objectness",
            )
            result.append(boxlist)
        return result  # rpn输出的是'xyxy'格式的
Exemplo n.º 27
0
    def forward_for_single_feature_map(self, anchors, box_cls, box_regression):
        """
        Arguments:
            anchors: list[BoxList]
            box_cls: tensor of size N, A * C, H, W
            box_regression: tensor of size N, A * 4, H, W
        """
        device = box_cls.device
        N, _, H, W = box_cls.shape
        A = box_regression.size(1) // 4
        C = box_cls.size(1) // A

        # put in the same format as anchors
        box_cls = permute_and_flatten(box_cls, N, A, C, H, W)
        box_cls = box_cls.sigmoid()

        box_regression = permute_and_flatten(box_regression, N, A, 4, H, W)

        num_anchors = A * H * W

        if self.imbalanced_decider is None:
            candidate_inds = box_cls > self.pre_nms_thresh
        else:
            candidate_inds = self.imbalanced_decider(box_cls)

        pre_nms_top_n = candidate_inds.view(N, -1).sum(1)
        pre_nms_top_n = pre_nms_top_n.clamp(max=self.pre_nms_top_n)

        results = []
        for per_box_cls, per_box_regression, per_pre_nms_top_n, \
        per_candidate_inds, per_anchors in zip(
            box_cls,
            box_regression,
            pre_nms_top_n,
            candidate_inds,
            anchors):

            # Sort and select TopN
            # TODO most of this can be made out of the loop for
            # all images.
            # TODO:Yang: Not easy to do. Because the numbers of detections are
            # different in each image. Therefore, this part needs to be done
            # per image.
            per_box_cls = per_box_cls[per_candidate_inds]

            per_box_cls, top_k_indices = \
                    per_box_cls.topk(per_pre_nms_top_n, sorted=False)

            per_candidate_nonzeros = \
                    per_candidate_inds.nonzero()[top_k_indices, :]

            per_box_loc = per_candidate_nonzeros[:, 0]
            per_class = per_candidate_nonzeros[:, 1]
            per_class += 1

            detections = self.box_coder.decode(
                per_box_regression[per_box_loc, :].view(-1, 4),
                per_anchors.bbox[per_box_loc, :].view(-1, 4))

            boxlist = BoxList(detections, per_anchors.size, mode="xyxy")
            boxlist.add_field("labels", per_class)
            boxlist.add_field("scores", per_box_cls)
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size)
            results.append(boxlist)

        return results
Exemplo n.º 28
0
    def forward_for_single_feature_map(self, locations, box_cls_set,
                                       box_regression, centerness, image_sizes,
                                       show_box_cls):
        """
        Arguments:
            anchors: list[BoxList]
            box_cls: tensor of size N, A * C, H, W
            box_regression: tensor of size N, A * 4, H, W
        """
        box_prob_set = []
        for _box_cls in np.array(list(box_cls_set.values()))[[2]]:  #
            N, C, H, W = _box_cls.shape
            _box_cls = _box_cls.view(N, C, H, W).permute(0, 2, 3, 1)
            box_prob_set.append(_box_cls.reshape(N, -1, C).sigmoid())
        box_cls = torch.exp(torch.log(torch.stack(box_prob_set)).mean(dim=0))
        # max_score = box_prob_set[-1].max()
        # box_prob_set[:-1] = [box_prob / box_prob.max() * max_score for box_prob in box_prob_set[:-1]]
        # box_cls = torch.stack(box_prob_set).max(dim=0)[0]
        centerness = None

        # put in the same format as locations
        box_regression = box_regression.view(N, 4, H, W).permute(0, 2, 3, 1)
        box_regression = box_regression.reshape(N, -1, 4)
        if centerness is not None:
            centerness = centerness.view(N, 1, H, W).permute(0, 2, 3, 1)
            centerness = centerness.reshape(N, -1).sigmoid()

        if self.vis_labels:
            # box_prob_set.extend([box_cls, centerness, centerness[:,:,None]*box_prob_set[-1]])
            show_box_cls(box_prob_set, N, H, W, C, self.pre_nms_thresh)

        candidate_inds = box_cls > self.pre_nms_thresh
        pre_nms_top_n = candidate_inds.view(N, -1).sum(1)
        pre_nms_top_n = pre_nms_top_n.clamp(max=self.pre_nms_top_n)

        # multiply the classification scores with centerness scores
        if centerness is not None:
            box_cls = (box_cls * centerness[:, :, None])

        results = []
        for i in range(N):
            per_box_cls = box_cls[i]
            per_candidate_inds = candidate_inds[i]
            per_box_cls = per_box_cls[per_candidate_inds]

            per_candidate_nonzeros = per_candidate_inds.nonzero()
            per_box_loc = per_candidate_nonzeros[:, 0]
            per_class = per_candidate_nonzeros[:, 1] + 1

            per_box_regression = box_regression[i]
            per_box_regression = per_box_regression[per_box_loc]
            per_locations = locations[per_box_loc]

            per_pre_nms_top_n = pre_nms_top_n[i]

            if per_candidate_inds.sum().item() > per_pre_nms_top_n.item():
                per_box_cls, top_k_indices = \
                    per_box_cls.topk(per_pre_nms_top_n, sorted=False)
                per_class = per_class[top_k_indices]
                per_box_regression = per_box_regression[top_k_indices]
                per_locations = per_locations[top_k_indices]

            detections = torch.stack([
                per_locations[:, 0] - per_box_regression[:, 0],
                per_locations[:, 1] - per_box_regression[:, 1],
                per_locations[:, 0] + per_box_regression[:, 2],
                per_locations[:, 1] + per_box_regression[:, 3],
            ],
                                     dim=1)

            h, w = image_sizes[i]
            boxlist = BoxList(detections, (int(w), int(h)), mode="xyxy")
            boxlist.add_field("labels", per_class)
            boxlist.add_field("scores", per_box_cls)
            boxlist.add_field("det_locations", per_locations)
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size)
            results.append(boxlist)

        return results
Exemplo n.º 29
0
    def forward_for_single_feature_map(self, anchors, box_cls, box_regression,
                                       pre_nms_thresh):
        """
        Arguments:
            anchors: list[BoxList]
            box_cls: tensor of size N, A * C, H, W
            box_regression: tensor of size N, A * 4, H, W
        """
        device = box_cls.device
        N, _, H, W = box_cls.shape
        A = int(box_regression.size(1) / 4)
        C = int(box_cls.size(1) / A)

        # put in the same format as anchors
        box_cls = box_cls.view(N, -1, C, H, W).permute(0, 3, 4, 1, 2)
        box_cls = box_cls.reshape(N, -1, C)
        box_cls = box_cls.sigmoid()

        box_regression = box_regression.view(N, -1, 4, H, W)
        box_regression = box_regression.permute(0, 3, 4, 1, 2)
        box_regression = box_regression.reshape(N, -1, 4)

        num_anchors = A * H * W

        results = [[] for _ in range(N)]
        candidate_inds = box_cls > pre_nms_thresh
        if candidate_inds.sum().item() == 0:
            empty_boxlists = []
            for a in anchors:
                empty_boxlist = BoxList(torch.Tensor(0, 4).to(device), a.size)
                empty_boxlist.add_field("labels",
                                        torch.LongTensor([]).to(device))
                empty_boxlist.add_field("scores", torch.Tensor([]).to(device))
                empty_boxlists.append(empty_boxlist)
            return empty_boxlists

        pre_nms_top_n = candidate_inds.view(N, -1).sum(1)
        pre_nms_top_n = pre_nms_top_n.clamp(max=self.pre_nms_top_n)

        for batch_idx, (per_box_cls, per_box_regression, per_pre_nms_top_n, \
        per_candidate_inds, per_anchors) in enumerate(zip(
            box_cls,
            box_regression,
            pre_nms_top_n,
            candidate_inds,
            anchors)):

            # Sort and select TopN
            per_box_cls = per_box_cls[per_candidate_inds]
            per_box_cls, top_k_indices = \
                    per_box_cls.topk(per_pre_nms_top_n, sorted=False)

            per_candidate_nonzeros = \
                    per_candidate_inds.nonzero()[top_k_indices, :]

            per_box_loc = per_candidate_nonzeros[:, 0]
            per_class = per_candidate_nonzeros[:, 1]
            per_class += 1

            detections = self.box_coder.decode(
                per_box_regression[per_box_loc, :].view(-1, 4),
                per_anchors.bbox[per_box_loc, :].view(-1, 4))

            boxlist = BoxList(detections, per_anchors.size, mode="xyxy")
            boxlist.add_field("labels", per_class)
            boxlist.add_field("scores", per_box_cls)
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size)
            results[batch_idx] = boxlist

        return results
Exemplo n.º 30
0
    def forward_for_single_feature_map(self, anchors, box_cls, box_regression):
        """
        Arguments:
            anchors: list[BoxList]
            box_cls: tensor of size N, A * C, H, W
            box_regression: tensor of size N, A * 4, H, W
        """
        device = box_cls.device
        N, _, H, W = box_cls.shape
        A = box_regression.size(1) // 4
        C = box_cls.size(1) // A

        # put in the same format as anchors (N, H*W*A, C)
        box_cls = permute_and_flatten(box_cls, N, A, C, H, W)
        box_cls = box_cls.sigmoid()
        # box regression is class-agnostic
        box_regression = permute_and_flatten(box_regression, N, A, 4, H, W)
        # Q: Seems redundant?
        box_regression = box_regression.reshape(N, -1, 4)

        num_anchors = A * H * W

        candidate_inds = box_cls > self.pre_nms_thresh

        pre_nms_top_n = candidate_inds.view(N, -1).sum(1)
        pre_nms_top_n = pre_nms_top_n.clamp(max=self.pre_nms_top_n)

        results = []
        for per_box_cls, per_box_regression, per_pre_nms_top_n, \
        per_candidate_inds, per_anchors in zip(
            box_cls,
            box_regression,
            pre_nms_top_n,
            candidate_inds,
            anchors):

            # Sort and select TopN
            # TODO most of this can be made out of the loop for
            # all images.
            # TODO:Yang: Not easy to do. Because the numbers of detections are
            # different in each image. Therefore, this part needs to be done
            # per image.

            # After the following line, per_box_cls becomes a vector
            per_box_cls = per_box_cls[per_candidate_inds]

            per_box_cls, top_k_indices = \
                    per_box_cls.topk(per_pre_nms_top_n, sorted=False)

            # per_candidate_inds (H*W*A, C), per_candidate_nonzeros (top_k_out_of_H*W*A*C, 2)
            # Note that: the boxes regressed from the same anchor with different class labels are treated as multiple dectections.
            per_candidate_nonzeros = \
                    per_candidate_inds.nonzero()[top_k_indices, :]

            # Q: What's per_box_loc and per_class?
            # A: The index of the anchor and the index of the class,
            # so that the confidence of the class per_class[i] of the anchor at per_box_loc[i] is high enough to survive.
            # Note that: index_of_class + 1 = class_label.
            per_box_loc = per_candidate_nonzeros[:, 0]
            per_class = per_candidate_nonzeros[:, 1]
            per_class += 1

            detections = self.box_coder.decode(
                per_box_regression[per_box_loc, :].view(-1, 4),
                per_anchors.bbox[per_box_loc, :].view(-1, 4))

            image_size = per_anchors.size
            boxlist = BoxList(detections, image_size, mode="xyxy")
            boxlist.add_field("labels", per_class)
            boxlist.add_field("scores", per_box_cls)
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size)
            results.append(boxlist)

        return results