def filter_proposals(self, proposals, objectness, image_shapes, num_anchors_per_level): # type: (Tensor, Tensor, List[Tuple[int, int]], List[int]) -> Tuple[List[Tensor], List[Tensor]] """ 筛除小boxes框,nms处理,根据预测概率获取前post_nms_top_n个目标 Args: proposals: 预测的bbox坐标 objectness: 预测的目标概率 image_shapes: batch中每张图片的size信息 num_anchors_per_level: 每个预测特征层上预测anchors的数目 Returns: """ num_images = proposals.shape[0] device = proposals.device # do not backprop throught objectness objectness = objectness.detach() objectness = objectness.reshape(num_images, -1) # Returns a tensor of size size filled with fill_value # levels负责记录分隔不同预测特征层上的anchors索引信息 levels = [torch.full((n, ), idx, dtype=torch.int64, device=device) for idx, n in enumerate(num_anchors_per_level)] levels = torch.cat(levels, 0) # Expand this tensor to the same size as objectness levels = levels.reshape(1, -1).expand_as(objectness) # select top_n boxes independently per level before applying nms # 获取每张预测特征图上预测概率排前pre_nms_top_n的anchors索引值 top_n_idx = self._get_top_n_idx(objectness, num_anchors_per_level) image_range = torch.arange(num_images, device=device) batch_idx = image_range[:, None] # [batch_size, 1] # 根据每个预测特征层预测概率排前pre_nms_top_n的anchors索引值获取相应概率信息 objectness = objectness[batch_idx, top_n_idx] levels = levels[batch_idx, top_n_idx] # 预测概率排前pre_nms_top_n的anchors索引值获取相应bbox坐标信息 proposals = proposals[batch_idx, top_n_idx] final_boxes = [] final_scores = [] # 遍历每张图像的相关预测信息 for boxes, scores, lvl, img_shape in zip(proposals, objectness, levels, image_shapes): # 调整预测的boxes信息,将越界的坐标调整到图片边界上 boxes = box_ops.clip_boxes_to_image(boxes, img_shape) # 返回boxes满足宽,高都大于min_size的索引 keep = box_ops.remove_small_boxes(boxes, self.min_size) boxes, scores, lvl = boxes[keep], scores[keep], lvl[keep] # non-maximum suppression, independently done per level keep = box_ops.batched_nms(boxes, scores, lvl, self.nms_thresh) # keep only topk scoring predictions keep = keep[: self.post_nms_top_n()] boxes, scores = boxes[keep], scores[keep] final_boxes.append(boxes) final_scores.append(scores) return final_boxes, final_scores
def postprocess_detections(self, class_logits, # type: Tensor box_regression, # type: Tensor proposals, # type: List[Tensor] image_shapes # type: List[Tuple[int, int]] ): # type: (...) -> Tuple[List[Tensor], List[Tensor], List[Tensor]] """ 对网络的预测数据进行后处理,包括 (1)根据proposal以及预测的回归参数计算出最终bbox坐标 (2)对预测类别结果进行softmax处理 (3)裁剪预测的boxes信息,将越界的坐标调整到图片边界上 (4)移除所有背景信息 (5)移除低概率目标 (6)移除小尺寸目标 (7)执行nms处理,并按scores进行排序 (8)根据scores排序返回前topk个目标 Args: class_logits: 网络预测类别概率信息 box_regression: 网络预测的边界框回归参数 proposals: rpn输出的proposal image_shapes: 打包成batch前每张图像的宽高 Returns: """ device = class_logits.device # 预测目标类别数 num_classes = class_logits.shape[-1] # 获取每张图像的预测bbox数量 boxes_per_image = [boxes_in_image.shape[0] for boxes_in_image in proposals] # 根据proposal以及预测的回归参数计算出最终bbox坐标 pred_boxes = self.box_coder.decode(box_regression, proposals) # 对预测类别结果进行softmax处理 pred_scores = F.softmax(class_logits, -1) # split boxes and scores per image # 根据每张图像的预测bbox数量分割结果 pred_boxes_list = pred_boxes.split(boxes_per_image, 0) pred_scores_list = pred_scores.split(boxes_per_image, 0) all_boxes = [] all_scores = [] all_labels = [] # 遍历每张图像预测信息 for boxes, scores, image_shape in zip(pred_boxes_list, pred_scores_list, image_shapes): # 裁剪预测的boxes信息,将越界的坐标调整到图片边界上 boxes = box_ops.clip_boxes_to_image(boxes, image_shape) # create labels for each prediction labels = torch.arange(num_classes, device=device) labels = labels.view(1, -1).expand_as(scores) # remove prediction with the background label # 移除索引为0的所有信息(0代表背景) boxes = boxes[:, 1:] scores = scores[:, 1:] labels = labels[:, 1:] # batch everything, by making every class prediction be a separate instance boxes = boxes.reshape(-1, 4) scores = scores.reshape(-1) labels = labels.reshape(-1) # remove low scoring boxes # 移除低概率目标,self.scores_thresh=0.05 inds = torch.nonzero(scores > self.score_thresh).squeeze(1) boxes, scores, labels = boxes[inds], scores[inds], labels[inds] # remove empty boxes # 移除小目标 keep = box_ops.remove_small_boxes(boxes, min_size=1e-2) boxes, scores, labels = boxes[keep], scores[keep], labels[keep] # non-maximun suppression, independently done per class # 执行nms处理,执行后的结果会按照scores从大到小进行排序返回 keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh) # keep only topk scoring predictions # 获取scores排在前topk个预测目标 keep = keep[:self.detection_per_img] boxes, scores, labels = boxes[keep], scores[keep], labels[keep] all_boxes.append(boxes) all_scores.append(scores) all_labels.append(labels) return all_boxes, all_scores, all_labels