def forward_for_single_feature_map(self, anchors, objectness, box_regression): """ Arguments: anchors: list[BoxList] objectness: tensor of size N, A, H, W box_regression: tensor of size N, A * 4, H, W """ device = objectness.device N, A, H, W = objectness.shape # put in the same format as anchors objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1) objectness = objectness.sigmoid() #object: (B, H*W*A) box_regression = permute_and_flatten(box_regression, N, A, 4, H, W) #box_regression: (B, H*W*A, 4) num_anchors = A * H * W pre_nms_top_n = min(self.pre_nms_top_n, num_anchors) objectness, topk_idx = objectness.topk(pre_nms_top_n, dim=1, sorted=True) # select the biggest pre_nms_top_n batch_idx = torch.arange(N, device=device)[:, None] box_regression = box_regression[batch_idx, topk_idx] # select the corresponding box_regression # anchors: (boxlist(H/4*W/4*len(aspect_ratios))*B) image_shapes = [box.size for box in anchors] concat_anchors = torch.cat([a.bbox for a in anchors], dim=0) # anchors: (B*(H/4*W/4*len(aspect_ratios), 4) concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx] # anchors: (B, topk_idx, 4) proposals = self.box_coder.decode(box_regression.view(-1, 4), concat_anchors.view(-1, 4)) # proposals: (B*topk_idx, 4) proposals = proposals.view(N, -1, 4) # proposals: (B, topk_idx, 4) result = [] for proposal, score, im_shape in zip(proposals, objectness, image_shapes): boxlist = BoxList(proposal, im_shape, mode="xyxy") boxlist.add_field("objectness", score) boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, self.min_size) boxlist = boxlist_nms( boxlist, self.nms_thresh, max_proposals=self.post_nms_top_n, score_field="objectness", ) result.append(boxlist) # result: [Boxlist(with objectness)*B] return result
def _process_single_level(self, locations, box_cls, box_regression, centerness, image_sizes): N, C, H, W = box_cls.shape # put in the same format as locations box_cls = box_cls.view(N, C, H, W).permute(0, 2, 3, 1) box_cls = box_cls.reshape(N, -1, C).sigmoid() box_regression = box_regression.view(N, 4, H, W).permute(0, 2, 3, 1) box_regression = box_regression.reshape(N, -1, 4) centerness = centerness.view(N, 1, H, W).permute(0, 2, 3, 1) centerness = centerness.reshape(N, -1).sigmoid() candidate_inds = box_cls > self.pre_nms_thresh pre_nms_top_n = candidate_inds.view(N, -1).sum(1) pre_nms_top_n = pre_nms_top_n.clamp(max=self.pre_nms_top_n) # multiply the classification scores with centerness scores box_cls = box_cls * centerness[:, :, None] results = [] for i in range(N): per_box_cls = box_cls[i] per_candidate_inds = candidate_inds[i] per_box_cls = per_box_cls[per_candidate_inds] per_candidate_nonzeros = per_candidate_inds.nonzero() per_box_loc = per_candidate_nonzeros[:, 0] per_class = per_candidate_nonzeros[:, 1] + 1 per_box_regression = box_regression[i] per_box_regression = per_box_regression[per_box_loc] per_locations = locations[per_box_loc] per_pre_nms_top_n = pre_nms_top_n[i] if per_candidate_inds.sum().item() > per_pre_nms_top_n.item(): per_box_cls, top_k_indices = \ per_box_cls.topk(per_pre_nms_top_n, sorted=False) per_class = per_class[top_k_indices] per_box_regression = per_box_regression[top_k_indices] per_locations = per_locations[top_k_indices] detections = torch.stack([ per_locations[:, 0] - per_box_regression[:, 0], per_locations[:, 1] - per_box_regression[:, 1], per_locations[:, 0] + per_box_regression[:, 2], per_locations[:, 1] + per_box_regression[:, 3], ], dim=1) h, w = image_sizes[i] boxlist = BoxList(detections, (int(w), int(h)), mode="xyxy") boxlist.add_field("labels", per_class) boxlist.add_field("scores", per_box_cls) boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, self.min_size) results.append(boxlist) return results
def forward_for_single_feature_map(self, anchors, objectness, box_regression, box_orien): """ Arguments: anchors: list[BoxList] objectness: tensor of size N, A, H, W box_regression: tensor of size N, A * 4, H, W """ device = objectness.device N, A, H, W = objectness.shape # print(objectness.size(),box_regression.size(),box_orien.size(),'==============================') # put in the same format as anchors objectness = objectness.permute(0, 2, 3, 1).reshape(N, -1) objectness = objectness.sigmoid() box_orien = box_orien.view(N, -1, 2, H, W).permute(0, 3, 4, 1, 2) box_orien = box_orien.reshape(N, -1, 2) box_regression = box_regression.view(N, -1, 4, H, W).permute(0, 3, 4, 1, 2) box_regression = box_regression.reshape(N, -1, 4) num_anchors = A * H * W pre_nms_top_n = min(self.pre_nms_top_n, num_anchors) objectness, topk_idx = objectness.topk(pre_nms_top_n, dim=1, sorted=True) # print(objectness.size(), box_orien.size(),topk_idx.size() ,'==============================oo') batch_idx = torch.arange(N, device=device)[:, None] box_regression = box_regression[batch_idx, topk_idx] box_orien = box_orien[batch_idx, topk_idx] image_shapes = [box.size for box in anchors] concat_anchors = torch.cat([a.bbox for a in anchors], dim=0) concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx] proposals = self.box_coder.decode(box_regression.view(-1, 4), concat_anchors.view(-1, 4)) proposals = proposals.view(N, -1, 4) result = [] # print(proposals.size(), objectness.size(), box_orien.size(), '==============================oo') for proposal, score, im_shape, orien in zip(proposals, objectness, image_shapes, box_orien): boxlist = BoxList(proposal, im_shape, mode="xyxy") boxlist.add_field("objectness", score) boxlist.add_field("rotations", orien) boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, self.min_size, self.max_size) boxlist = boxlist_nms( boxlist, self.nms_thresh, max_proposals=self.post_nms_top_n, score_field="objectness", ) result.append(boxlist) return result
def forward_for_single_feature_map(self, anchors, objectness, box_regression): """ Arguments: anchors: list[BoxList] objectness: tensor of size N, A, H, W box_regression: tensor of size N, A * 4, H, W """ device = objectness.device N, A, H, W = objectness.shape # put in the same format as anchors objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1) objectness = objectness.sigmoid() box_regression = permute_and_flatten(box_regression, N, A, 4, H, W) num_anchors = A * H * W pre_nms_top_n = min(self.pre_nms_top_n, num_anchors) objectness, topk_idx = objectness.topk(pre_nms_top_n, dim=1, sorted=True) batch_idx = torch.arange(N, device=device)[:, None] box_regression = box_regression[batch_idx, topk_idx] image_shapes = [box.size for box in anchors] concat_anchors = torch.cat([a.bbox for a in anchors], dim=0) concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx] #decode2cxywh = self.nms_func.input_mode == 'cxywh' decode2cxywh = False if decode2cxywh: proposals = self.box_coder.decode2cxywh(box_regression.view(-1, 4), concat_anchors.view(-1, 4)) mode = 'cxywh' else: proposals = self.box_coder.decode(box_regression.view(-1, 4), concat_anchors.view(-1, 4)) mode = 'xyxy' proposals = proposals.view(N, -1, 4) result = [] for proposal, score, im_shape in zip(proposals, objectness, image_shapes): boxlist = BoxList(proposal, im_shape, mode=mode) boxlist.add_field("objectness", score) boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, self.min_size) boxlist = self.nms_func(boxlist) boxlist = boxlist.convert('xyxy') result.append(boxlist) return result
def forward_for_single_feature_map(self, anchors, objectness, box_regression): """ Arguments: anchors: list[BoxList] objectness: tensor of size N, A, H, W box_regression: tensor of size N, A * 4, H, W """ device = objectness.device # 得到N=图片数(batch),A=ratio数,H=该层特征图高,W=该层特征图宽 N, A, H, W = objectness.shape # put in the same format as anchors # 在得到的目标特征图上扩充一维,该维度为特定特征图的某一个位置上anchor内是否有目标。然后取消掉除FPN层数以外的所有维度,合并到一个维度上,将图片数,高,宽等信息压缩为一维。 objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1) objectness = objectness.sigmoid() box_regression = permute_and_flatten(box_regression, N, A, 4, H, W) num_anchors = A * H * W pre_nms_top_n = min(self.pre_nms_top_n, num_anchors) # Top K # 得到前pre_nms_top_n个目标评分最高的anchor的目标评分以及该anchor在anchor列表中的索引 objectness, topk_idx = objectness.topk(pre_nms_top_n, dim=1, sorted=True) batch_idx = torch.arange(N, device=device)[:, None] box_regression = box_regression[batch_idx, topk_idx] image_shapes = [box.size for box in anchors] concat_anchors = torch.cat([a.bbox for a in anchors], dim=0) concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx] proposals = self.box_coder.decode(box_regression.view(-1, 4), concat_anchors.view(-1, 4)) proposals = proposals.view(N, -1, 4) result = [] for proposal, score, im_shape in zip(proposals, objectness, image_shapes): boxlist = BoxList(proposal, im_shape, mode="xyxy") boxlist.add_field("objectness", score) boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, self.min_size) boxlist = boxlist_nms( boxlist, self.nms_thresh, max_proposals=self.post_nms_top_n, score_field="objectness", ) result.append(boxlist) return result
def forward_for_single_feature_map(self, anchors, objectness, box_regression): """ Arguments: anchors: list[BoxList], anchors size:(N, H*W*ratios,4) N为batch的个数, H W为当前特征层的size, 4为x1y1x2y2 objectness: tensor of size N, A, H, W , 处理成 (N, H*W*ratios) box_regression: tensor of size N, A * 4, H, W , 处理成 (N, H*W*ratios,4) 功能: 根据objectness概率的高低选出前pre_nms_top_n个anchor, 通过这些anchor和box_regression(学习映射函数dx dy dw dh) 计算得到基于候选框的预测框xyxy,然后通过nms等条件进一步筛选得到最后的boxlist(把objectness分数存在extra_fields) """ device = objectness.device N, A, H, W = objectness.shape # put in the same format as anchors objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1) objectness = objectness.sigmoid() # 归一化到0-1. 取top前2000 box_regression = permute_and_flatten(box_regression, N, A, 4, H, W) num_anchors = A * H * W #在每个特征图上每张图片选取的anchor数 pre_nms_top_n = min(self.pre_nms_top_n, num_anchors) objectness, topk_idx = objectness.topk(pre_nms_top_n, dim=1, sorted=True) batch_idx = torch.arange(N, device=device)[:, None] box_regression = box_regression[batch_idx, topk_idx] image_shapes = [box.size for box in anchors] concat_anchors = torch.cat([a.bbox for a in anchors], dim=0) concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx] # 得到预测的候选框, 通过anchor和box_regression(学习映射函数dx dy dw dh)计算得到建议框 proposals = self.box_coder.decode(box_regression.view(-1, 4), concat_anchors.view(-1, 4)) proposals = proposals.view(N, -1, 4) result = [] for proposal, score, im_shape in zip(proposals, objectness, image_shapes): boxlist = BoxList(proposal, im_shape, mode="xyxy") boxlist.add_field("objectness", score) boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes( boxlist, self.min_size) # 确保proposal的w & h > min_size boxlist = boxlist_nms( boxlist, self.nms_thresh, max_proposals=self.post_nms_top_n, score_field="objectness", ) result.append(boxlist) return result # 元素为N (batch)
def forward_for_single_feature_map(self, anchors, objectness, box_regression): """ Arguments: anchors: list[BoxList], (assume list number = batchSize N) objectness: tensor of size N, A, H, W box_regression: tensor of size N, A * 4, H, W """ device = objectness.device N, A, H, W = objectness.shape # modify tensor shape [N, A*1, H, W] => [N, H*W*A, 1] => [N, H*W*A] objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1) objectness = objectness.sigmoid() # modify tensor shape [N, A*41, H, W] => [N, H*W*A, 4] box_regression = permute_and_flatten(box_regression, N, A, 4, H, W) num_anchors = A * H * W pre_nms_top_n = min(self.pre_nms_top_n, num_anchors) objectness, topk_idx = objectness.topk(pre_nms_top_n, dim=1, sorted=True) # [N, top_k_elems(H*W*A)] batch_idx = torch.arange(N, device=device)[:, None] box_regression = box_regression[batch_idx, topk_idx] # [N, top_k_elems(H*W*A), 4] image_shapes = [box.size for box in anchors] # list(tuple) concat_anchors = torch.cat([a.bbox for a in anchors], dim=0) # list(3HW,4),list size=N => (N*AHW,4) concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx] # box offsets + orig boxes => proposals(N*3HW, 4) proposals = self.box_coder.decode( box_regression.view(-1, 4), concat_anchors.view(-1, 4) ) proposals = proposals.view(N, -1, 4) # => (N,3HW,4) result = [] # for each img if a batch(N), image_shapes => input image size for proposal, score, im_shape in zip(proposals, objectness, image_shapes): boxlist = BoxList(proposal, im_shape, mode="xyxy") boxlist.add_field("objectness", score) # clip proposals to image_shapes boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, self.min_size) boxlist = boxlist_nms( boxlist, self.nms_thresh, max_proposals=self.post_nms_top_n, score_field="objectness", ) result.append(boxlist) return result # N*BoxList
def forward_for_single_feature_map(self, box_cls, box_regression, centerness, anchors): N, _, H, W = box_cls.shape A = box_regression.size(1) // 4 C = box_cls.size(1) // A # put in the same format as anchors box_cls = permute_and_flatten(box_cls, N, A, C, H, W) box_cls = box_cls.sigmoid() box_regression = permute_and_flatten(box_regression, N, A, 4, H, W) box_regression = box_regression.reshape(N, -1, 4) candidate_inds = box_cls > self.pre_nms_thresh pre_nms_top_n = candidate_inds.view(N, -1).sum(1) pre_nms_top_n = pre_nms_top_n.clamp(max=self.pre_nms_top_n) centerness = permute_and_flatten(centerness, N, A, 1, H, W) centerness = centerness.reshape(N, -1).sigmoid() # multiply the classification scores with centerness scores box_cls = box_cls * centerness[:, :, None] results = [] for per_box_cls, per_box_regression, per_pre_nms_top_n, per_candidate_inds, per_anchors \ in zip(box_cls, box_regression, pre_nms_top_n, candidate_inds, anchors): per_box_cls = per_box_cls[per_candidate_inds] per_box_cls, top_k_indices = per_box_cls.topk(per_pre_nms_top_n, sorted=False) per_candidate_nonzeros = per_candidate_inds.nonzero()[ top_k_indices, :] per_box_loc = per_candidate_nonzeros[:, 0] per_class = per_candidate_nonzeros[:, 1] + 1 detections = self.box_coder.decode( per_box_regression[per_box_loc, :].view(-1, 4), per_anchors.bbox[per_box_loc, :].view(-1, 4)) boxlist = BoxList(detections, per_anchors.size, mode="xyxy") boxlist.add_field("labels", per_class) boxlist.add_field("scores", torch.sqrt(per_box_cls)) boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, self.min_size) results.append(boxlist) return results
def forward_for_single_feature_map(self, anchors, objectness, box_regression): device = objectness.device N, A, H, W = objectness.shape # put in the same format as anchors objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1) objectness = objectness.sigmoid() box_regression = permute_and_flatten(box_regression, N, A, 4, H, W) num_anchors = A * H * W pre_nms_top_n = min(self.pre_nms_top_n, num_anchors) objectness, topk_idx = objectness.topk(pre_nms_top_n, dim=1, sorted=True) batch_idx = torch.arange(N, device=device)[:, None] box_regression = box_regression[batch_idx, topk_idx] image_shapes = [box.size for box in anchors] concat_anchors = torch.cat([a.bbox for a in anchors], dim=0) concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx] proposals = self.box_coder.decode(box_regression.view(-1, 4), concat_anchors.view(-1, 4)) proposals = proposals.view(N, -1, 4) result = [] for proposal, score, im_shape in zip(proposals, objectness, image_shapes): boxlist = BoxList(proposal, im_shape, mode="xyxy") boxlist.add_field("objectness", score) # operator Patch if not self._amodal: # default for non-amodal inference boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, self.min_size) boxlist = boxlist_nms( boxlist, self.nms_thresh, max_proposals=self.post_nms_top_n, score_field="objectness", ) result.append(boxlist) return result
def forward_for_single_feature_map(self, anchors, objectness, box_regression): """ Arguments: anchors: list[BoxList] # [image,number,[n,4]] objectness: tensor of size N, A, H, W box_regression: tensor of size N, A * 4, H, W """ device = objectness.device N, A, H, W = objectness.shape # put in the same format as anchors objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1) # N H*W*A*1 objectness = objectness.sigmoid() box_regression = permute_and_flatten(box_regression, N, A, 18, H, W) # N H*W*A 4 num_anchors = A * H * W # 391040 97760 pre_nms_top_n = min(self.pre_nms_top_n, num_anchors) #12000 objectness, topk_idx = objectness.topk(pre_nms_top_n, dim=1, sorted=True) # objectness = objectness.cpu() batch_idx = torch.arange(N, device=device)[:, None] box_regression = box_regression[batch_idx, topk_idx] image_shapes = [box.size for box in anchors] concat_anchors = torch.cat([a.bbox for a in anchors], dim=0) concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx] proposals = self.box_coder.decode_iou(box_regression.view(-1, 18), concat_anchors.view(-1, 4)) proposals = proposals.view(N, -1, 4) result = [] for proposal, score, im_shape in zip(proposals, objectness, image_shapes): boxlist = BoxList(proposal, im_shape, mode="xyxy") boxlist.add_field("objectness", score) boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, self.min_size) boxlist = boxlist_nms( boxlist, self.nms_thresh, max_proposals=self.post_nms_top_n, score_field="objectness", ) result.append(boxlist) return result
def forward_for_single_feature_map(self, anchors, objectness, box_regression): """ Arguments: anchors: list[BoxList] objectness: tensor of size N, A, H, W box_regression: tensor of size N, A * 4, H, W """ device = objectness.device N, A, H, W = objectness.shape # put in the same format as anchors objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1) objectness = objectness.sigmoid() box_regression = permute_and_flatten(box_regression, N, A, 4, H, W) num_anchors = A * H * W pre_nms_top_n = min(self.pre_nms_top_n, num_anchors) objectness, topk_idx = objectness.topk(pre_nms_top_n, dim=1, sorted=True) batch_idx = torch.arange(N, device=device)[:, None] box_regression = box_regression[batch_idx, topk_idx] image_shapes = [box.size for box in anchors] concat_anchors = torch.cat([a.bbox for a in anchors], dim=0) concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx] proposals = self.box_coder.decode( box_regression.view(-1, 4), concat_anchors.view(-1, 4) ) proposals = proposals.view(N, -1, 4) result = [] for proposal, score, im_shape in zip(proposals, objectness, image_shapes): boxlist = BoxList(proposal, im_shape, mode="xyxy") boxlist.add_field("objectness", score) boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, self.min_size) # MAY CAUSE RuntimeError if training is unstable: copy_if failed to synchronize: device-side assert triggered boxlist = boxlist_nms( boxlist, self.nms_thresh, max_proposals=self.post_nms_top_n, score_field="objectness", ) result.append(boxlist) return result
def forward_for_single_feature_map_without(self, anchors, box_cls, box_regression, pre_nms_thresh): """ Arguments: anchors: list[BoxList] box_cls: tensor of size N, A * C, H, W box_regression: tensor of size N, A * 4, H, W """ N, _, H, W = box_cls.shape A = int(box_regression.size(1) / 4) C = int(box_cls.size(1) / A) # put in the same format as anchors box_cls = box_cls.view(N, -1, C, H, W).permute(0, 3, 4, 1, 2) box_cls = box_cls.reshape(N, -1, C) box_cls = box_cls.sigmoid() box_regression = box_regression.view(N, -1, 4, H, W) box_regression = box_regression.permute(0, 3, 4, 1, 2) box_regression = box_regression.reshape(N, -1, 4) results = [[] for _ in range(N)] candidate_inds = box_cls > pre_nms_thresh for batch_idx, (per_box_cls, per_box_regression, per_candidate_inds, per_anchors) in enumerate( zip(box_cls, box_regression, candidate_inds, anchors)): # Sort and select TopN per_box_cls = per_box_cls[per_candidate_inds] per_candidate_nonzeros = per_candidate_inds.nonzero() per_box_loc = per_candidate_nonzeros[:, 0] per_class = per_candidate_nonzeros[:, 1] per_class += 1 detections = self.box_coder.decode( per_box_regression[per_box_loc, :].view(-1, 4), per_anchors.bbox[per_box_loc, :].view(-1, 4)) boxlist = BoxList(detections, per_anchors.size, mode="xyxy") boxlist.add_field("labels", per_class) boxlist.add_field("scores", per_box_cls) boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, self.min_size) results[batch_idx] = boxlist return results
def forward_for_single_feature_map(self, anchors, objectness, box_regression): """ Arguments: anchors: list[BoxList] objectness: tensor of size N, A, H, W box_regression: tensor of size N, A * 4, H, W """ device = objectness.device N, A, H, W = objectness.shape objectness, topk_idx, box_regression = self.objectness_top_k( objectness, box_regression) batch_idx = torch.arange(N, device=device)[:, None] box_regression = box_regression[batch_idx, topk_idx] image_shapes = [box.size for box in anchors] concat_anchors = torch.cat([a.bbox for a in anchors], dim=0) concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx] proposals = self.box_coder.decode(box_regression.view(-1, 4), concat_anchors.view(-1, 4)) proposals = proposals.view(N, -1, 4) result = [] for proposal, score, im_shape in zip(proposals, objectness, image_shapes): boxlist = BoxList(proposal, im_shape, mode="xyxy") boxlist.add_field("objectness", score) boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, self.min_size) boxlist = boxlist_nms( boxlist, self.nms_thresh, max_proposals=self.post_nms_top_n, score_field="objectness", ) result.append(boxlist) return result
def forward_for_single_feature_map(self, locations, box_cls, box_regression, centerness, image_sizes): """ Arguments: anchors: list[BoxList] box_cls: tensor of size N, A * C, H, W box_regression: tensor of size N, A * 4, H, W """ N, C, H, W = box_cls.shape # put in the same format as locations box_cls = box_cls.view(N, C, H, W).permute(0, 2, 3, 1) box_cls = box_cls.reshape(N, -1, C).sigmoid() box_regression = box_regression.view(N, 4, H, W).permute(0, 2, 3, 1) box_regression = box_regression.reshape(N, -1, 4) candidate_inds = box_cls > self.pre_nms_thresh pre_nms_top_n = candidate_inds.view(N, -1).sum(1) pre_nms_top_n = pre_nms_top_n.clamp(max=self.pre_nms_top_n) # multiply the classification scores with centerness scores if centerness is not None: centerness = centerness.view(N, 1, H, W).permute(0, 2, 3, 1) centerness = centerness.reshape(N, -1).sigmoid() box_cls = box_cls * centerness[:, :, None] if self.debug_vis_label: # box_prob_set.extend([box_cls, centerness, centerness[:,:,None]*box_prob_set[-1]]) show_box_cls([box_cls, box_cls**2], N, H, W, C, self.pre_nms_thresh) # K = 1 # box_cls = box_cls.reshape(-1, C) # top, idim = torch.topk(box_cls, K, dim=-1) # box_cls[:] = 0 # i0 = torch.zeros(idim.size()).long() + torch.arange(0, idim.size(0))[:, None] # box_cls[i0, idim] = top # box_cls = box_cls.reshape(N, -1, C) results = [] for i in range(N): per_box_cls = box_cls[i] per_candidate_inds = candidate_inds[i] per_box_cls = per_box_cls[per_candidate_inds] per_candidate_nonzeros = per_candidate_inds.nonzero() per_box_loc = per_candidate_nonzeros[:, 0] per_class = per_candidate_nonzeros[:, 1] + 1 per_box_regression = box_regression[i] per_box_regression = per_box_regression[per_box_loc] per_locations = locations[per_box_loc] per_pre_nms_top_n = pre_nms_top_n[i] if per_candidate_inds.sum().item() > per_pre_nms_top_n.item(): per_box_cls, top_k_indices = \ per_box_cls.topk(per_pre_nms_top_n, sorted=False) per_class = per_class[top_k_indices] per_box_regression = per_box_regression[top_k_indices] per_locations = per_locations[top_k_indices] detections = torch.stack([ per_locations[:, 0] - per_box_regression[:, 0], per_locations[:, 1] - per_box_regression[:, 1], per_locations[:, 0] + per_box_regression[:, 2], per_locations[:, 1] + per_box_regression[:, 3], ], dim=1) h, w = image_sizes[i] boxlist = BoxList(detections, (int(w), int(h)), mode="xyxy") boxlist.add_field("labels", per_class) boxlist.add_field("scores", per_box_cls) if self.debug_vis_label: boxlist.add_field("det_locations", per_locations) # add by hui boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, self.min_size) results.append(boxlist) return results
def forward_for_single_feature_map(self, anchors, objectness, box_regression): """ Arguments: anchors: list[BoxList] objectness: tensor of size N, A, H, W box_regression: tensor of size N, A * 4, H, W """ device = objectness.device N, A, H, W = objectness.shape num_anchors = A * H * W # If inputs are on GPU, use a faster path use_fast_cuda_path = (objectness.is_cuda and box_regression.is_cuda) # Encompasses box decode, clip_to_image and remove_small_boxes calls if use_fast_cuda_path: objectness = objectness.reshape(N, -1) # Now [N, AHW] objectness = objectness.sigmoid() pre_nms_top_n = min(self.pre_nms_top_n, num_anchors) objectness, topk_idx = objectness.topk(pre_nms_top_n, dim=1, sorted=True) # Get all image shapes, and cat them together image_shapes = [box.size for box in anchors] image_shapes_cat = torch.tensor([box.size for box in anchors], device=objectness.device).float() # Get a single tensor for all anchors concat_anchors = torch.cat([a.bbox for a in anchors], dim=0) # Note: Take all anchors, we'll index accordingly inside the kernel # only take the anchors corresponding to the topk boxes concat_anchors = concat_anchors.reshape(N, -1, 4) # [batch_idx, topk_idx] # Return pre-nms boxes, associated scores and keep flag # Encompasses: # 1. Box decode # 2. Box clipping # 3. Box filtering # At the end we need to keep only the proposals & scores flagged # Note: topk_idx, objectness are sorted => proposals, objectness, keep are also # sorted -- this is important later proposals, objectness, keep = C.GeneratePreNMSUprightBoxes( N, A, H, W, topk_idx, objectness.float( ), # Need to cast these as kernel doesn't support fp16 box_regression.float(), concat_anchors, image_shapes_cat, pre_nms_top_n, self.min_size, self.box_coder.bbox_xform_clip, True) # view as [N, pre_nms_top_n, 4] proposals = proposals.view(N, -1, 4) objectness = objectness.view(N, -1) else: # reverse the reshape from before ready for permutation objectness = objectness.reshape(N, A, H, W) objectness = objectness.permute(0, 2, 3, 1).reshape(N, -1) objectness = objectness.sigmoid() pre_nms_top_n = min(self.pre_nms_top_n, num_anchors) objectness, topk_idx = objectness.topk(pre_nms_top_n, dim=1, sorted=True) # put in the same format as anchors box_regression = box_regression.view(N, -1, 4, H, W).permute(0, 3, 4, 1, 2) box_regression = box_regression.reshape(N, -1, 4) batch_idx = torch.arange(N, device=device)[:, None] box_regression = box_regression[batch_idx, topk_idx] image_shapes = [box.size for box in anchors] concat_anchors = torch.cat([a.bbox for a in anchors], dim=0) concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx] proposals = self.box_coder.decode(box_regression.view(-1, 4), concat_anchors.view(-1, 4)) proposals = proposals.view(N, -1, 4) # handle non-fast path without changing the loop if not use_fast_cuda_path: keep = [None for _ in range(N)] result = [] for proposal, score, im_shape, k in zip(proposals, objectness, image_shapes, keep): if use_fast_cuda_path: # Note: Want k to be applied per-image instead of all-at-once in batched code earlier # clip_to_image and remove_small_boxes already done in single kernel p = proposal.masked_select(k[:, None]).view(-1, 4) score = score.masked_select(k) boxlist = BoxList(p, im_shape, mode="xyxy") else: boxlist = BoxList(proposal, im_shape, mode="xyxy") boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, self.min_size) boxlist.add_field("objectness", score) boxlist = boxlist_nms( boxlist, self.nms_thresh, max_proposals=self.post_nms_top_n, score_field="objectness", ) result.append(boxlist) return result
def forward_for_single_feature_map(self, anchors, objectness, box_regression): """ Arguments: anchors: list[BoxList], [image1-si-boxlist, image2-si-boxlist, ...] objectness: tensor of size N, A, H, W box_regression: tensor of size N, A * 4, H, W 返回值是一个 list, len(result)=batch_size, 每个元素都是一个 BoxList 对象 """ device = objectness.device N, A, H, W = objectness.shape # objectness的shape是[N,A,H,W], 现在要把每个A*H*W的特征图拉成一个向量, 如果直接进行 # reshape操作, 展开的顺序是从A那一维开始的, 所以先交换维度再reshape, 先把H*W的特征图 # 拉成一个向量, 再把所有特征图拼接起来 objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1) # rpn 中要进行的是不关心类别的二分类任务(object/bg) # [N, H*W*A] objectness = objectness.sigmoid() # [N, H*W*A, 4] box_regression = permute_and_flatten(box_regression, N, A, 4, H, W) num_anchors = A * H * W # 根据置信度选出前 k 个 anchors, k = pre_nms_top_n pre_nms_top_n = min(self.pre_nms_top_n, num_anchors) objectness, topk_idx = objectness.topk(pre_nms_top_n, dim=1, sorted=True) # box_regression 中同样保留 topk 的anchors batch_idx = torch.arange(N, device=device)[:, None] box_regression = box_regression[batch_idx, topk_idx] image_shapes = [box.size for box in anchors] # boxList.bbox 返回对象中的 tensor, 将 batch 中所有图片的 anchors 拼接起来 # boxList.bbox 是个二维的 tensor, 参考 anchor_generator.grid_anchors concat_anchors = torch.cat([a.bbox for a in anchors], dim=0) # reshape 之后: [N, H*W*A, 4], 然后选出 topk concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx] proposals = self.box_coder.decode( box_regression.view(-1, 4), concat_anchors.view(-1, 4) ) proposals = proposals.view(N, -1, 4) result = [] # 分别处理 batch 中的每一张图片 for proposal, score, im_shape in zip(proposals, objectness, image_shapes): boxlist = BoxList(proposal, im_shape, mode="xyxy") boxlist.add_field("objectness", score) # 将超出图片边界的 anchors 进行裁剪 boxlist = boxlist.clip_to_image(remove_empty=False) # 将宽度或高度小于 min_size 的 anchors 移除 boxlist = remove_small_boxes(boxlist, self.min_size) # nms boxlist = boxlist_nms( boxlist, self.nms_thresh, max_proposals=self.post_nms_top_n, score_field="objectness", ) result.append(boxlist) return result
def forward_for_single_feature_map(self, anchors, objectness, box_regression): """ apply the RPN result on anchors generate from single feature level from ont batch(has multiple images) Arguments: anchors: list[BoxList] objectness: tensor of size N, A, H, W box_regression: tensor of size N, A * 4, H, W """ device = objectness.device N, A, H, W = objectness.shape # put in the same format as anchors objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1) objectness = objectness.sigmoid() box_regression = permute_and_flatten(box_regression, N, A, 4, H, W) num_anchors = A * H * W # decrease the proposal anchor number before the nms pre_nms_top_n = min(self.pre_nms_top_n, num_anchors) # filter the proposal bboxes by objectness score, # only left the hign objectness proposals for following operation objectness, topk_idx = objectness.topk(pre_nms_top_n, dim=1, sorted=True) batch_idx = torch.arange(N, device=device)[:, None] # take out the high objectness bbox regression result box_regression = box_regression[batch_idx, topk_idx] # preprocess the anchors for easy to process image_shapes = [box.size for box in anchors] concat_anchors = torch.cat([a.bbox for a in anchors], dim=0) concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx] # apply the regression on the anchor boxes proposals = self.box_coder.decode(box_regression.view(-1, 4), concat_anchors.view(-1, 4)) proposals = proposals.view(N, -1, 4) result = [] # collect the processed anchor boxes in to BoxList form # and apply the nms to generate the final proposals for proposal, score, im_shape in zip(proposals, objectness, image_shapes): boxlist = BoxList(proposal, im_shape, mode="xyxy") boxlist.add_field("objectness", score) boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, self.min_size) boxlist = boxlist_nms( boxlist, self.nms_thresh, max_proposals=self.post_nms_top_n, score_field="objectness", ) result.append(boxlist) return result
def forward_for_single_feature_map(self, anchors, objectness, box_regression): """ Arguments: anchors: list[BoxList] objectness: tensor of size N, A, H, W box_regression: tensor of size N, A * 4, H, W """ device = objectness.device N, A, H, W = objectness.shape # put in the same format as anchors objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1) objectness = objectness.sigmoid() box_regression = permute_and_flatten(box_regression, N, A, 4, H, W) num_anchors = A * H * W if self.onnx_export: from torch.onnx import operators num_anchors = operators.shape_as_tensor(objectness)[1].unsqueeze(0) pre_nms_top_n = torch.min( torch.cat((torch.tensor([self.pre_nms_top_n], dtype=torch.long), num_anchors), 0)) else: pre_nms_top_n = min(self.pre_nms_top_n, num_anchors) objectness, topk_idx = objectness.topk(pre_nms_top_n, dim=1, sorted=True) batch_idx = torch.arange(N, device=device)[:, None] if self.onnx_export: # NOTE: for now only batch == 1 is supported for ONNX export. assert topk_idx.size(0) == 1 topk_idx = topk_idx.squeeze(0) box_regression = box_regression.index_select(1, topk_idx) else: box_regression = box_regression[batch_idx, topk_idx] image_shapes = [box.size for box in anchors] concat_anchors = torch.cat([a.bbox for a in anchors], dim=0) if self.onnx_export: concat_anchors = concat_anchors.reshape(N, -1, 4).index_select( 1, topk_idx) else: concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx] proposals = self.box_coder.decode(box_regression.view(-1, 4), concat_anchors.view(-1, 4)) proposals = proposals.view(N, -1, 4) result = [] for proposal, score, im_shape in zip(proposals, objectness, image_shapes): boxlist = BoxList(proposal, im_shape, mode="xyxy") boxlist.add_field("objectness", score) boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, self.min_size, self.onnx_export) boxlist = boxlist_nms( boxlist, self.nms_thresh, max_proposals=self.post_nms_top_n, score_field="objectness", ) result.append(boxlist) return result
def forward_for_single_feature_map1(self, pre_anchors, box_cls, box_regression, pre_nms_thresh, stride): """ retinanet-example Arguments: anchors: list[BoxList] box_cls: tensor of size N, A * C, H, W box_regression: tensor of size N, A * 4, H, W """ anchors = torch.Tensor(self.cell_anchors[self.strides.index(stride)]) top_n = self.pre_nms_top_n batch_size = box_cls.size()[0] device = box_cls.device box_cls = box_cls.sigmoid() out_scores = torch.zeros((batch_size, top_n), device=device) out_boxes = torch.zeros((batch_size, top_n, 4), device=device) out_classes = torch.zeros((batch_size, top_n), device=device).long() results = [[] for _ in range(batch_size)] if torch.cuda.is_available() and 0: out_scores, out_boxes, out_classes = _nv_decode(box_cls.float(), box_regression.float(), stride, pre_nms_thresh, top_n, anchors.view(-1).tolist()) out_classes = out_classes.long() out_classes = out_classes + 1 else: anchors = anchors.to(device).type(box_cls.type()) num_anchors = anchors.size()[0] if anchors is not None else 1 num_classes = box_cls.size()[1] // num_anchors height, width = box_cls.size()[-2:] # Per item in batch for batch in range(batch_size): cls_head = box_cls[batch, :, :, :].contiguous().view(-1) box_head = box_regression[batch, :, :, :].contiguous().view(-1, 4) # Keep scores over threshold keep = (cls_head >= pre_nms_thresh).nonzero().view(-1) if keep.nelement() == 0: empty_boxlists = [] for a in pre_anchors: empty_boxlist = BoxList(torch.Tensor(0, 4).to(device), a.size) empty_boxlist.add_field( "labels", torch.LongTensor([]).to(device)) empty_boxlist.add_field( "scores", torch.Tensor([]).to(device)) empty_boxlists.append(empty_boxlist) return empty_boxlists # Gather top elements scores = torch.index_select(cls_head, 0, keep) scores, indices = torch.topk(scores, min(top_n, keep.size()[0]), dim=0) indices = torch.index_select(keep, 0, indices).view(-1) classes = (indices / width / height) % num_classes classes = classes.long() classes = classes + 1 # Infer kept bboxes x = indices % width y = (indices / width) % height a = indices / num_classes / height / width box_head = box_head.view(num_anchors, 4, height, width) boxes = box_head[a, :, y, x] if anchors is not None: grid = torch.stack([x, y, x, y], 1).type(box_cls.type()) * stride + anchors[a, :] boxes = self.box_coder.decode(boxes, grid) out_scores[batch, :scores.size()[0]] = scores out_boxes[batch, :boxes.size()[0], :] = boxes out_classes[batch, :classes.size()[0]] = classes for batch in range(batch_size): boxlist = BoxList(out_boxes[batch], pre_anchors[batch].size, mode="xyxy") boxlist.add_field("labels", out_classes[batch]) boxlist.add_field("scores", out_scores[batch]) boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, self.min_size) results[batch] = boxlist return results
def forward_for_single_feature_map(self, anchors, box_cls, box_regression, coeffs): """ Arguments: anchors: list[BoxList] N, A * H * W box_cls: tensor of size N, A * C, H, W box_regression: tensor of size N, A * 4, H, W coeffs: tensor of size N, A * K, H, W """ N, _, H, W = box_cls.shape A = box_regression.size(1) // 4 C = box_cls.size(1) // A K = coeffs.size(1) // A # put in the same format as anchors (N, H*W*A, C) box_cls = permute_and_flatten(box_cls, N, A, C, H, W) box_cls = box_cls.sigmoid() # box regression is class-agnostic box_regression = permute_and_flatten(box_regression, N, A, 4, H, W) # Q: Seems redundant? # box_regression = box_regression.reshape(N, -1, 4) coeffs = permute_and_flatten(coeffs, N, A, K, H, W) candidate_inds = box_cls > self.pre_nms_thresh pre_nms_top_n = candidate_inds.view(N, -1).sum(1) pre_nms_top_n = pre_nms_top_n.clamp(max=self.pre_nms_top_n) results = [] for per_box_cls, per_box_regression, per_coeffs, \ per_pre_nms_top_n, per_candidate_inds, per_anchors in zip( box_cls, box_regression, coeffs, \ pre_nms_top_n, candidate_inds, anchors): if cfg.MODEL.YOLACT.USE_FAST_NMS: per_class = None detections = self.box_coder.decode( per_box_regression, per_anchors.bbox ) else: # Sort and select TopN per_box_cls = per_box_cls[per_candidate_inds] per_box_cls, top_k_indices = \ per_box_cls.topk(per_pre_nms_top_n, sorted=False) per_candidate_nonzeros = \ per_candidate_inds.nonzero()[top_k_indices, :] per_box_loc = per_candidate_nonzeros[:, 0] per_class = per_candidate_nonzeros[:, 1] per_class += 1 detections = self.box_coder.decode( per_box_regression[per_box_loc, :].view(-1, 4), per_anchors.bbox[per_box_loc, :].view(-1, 4) ) per_coeffs = per_coeffs[per_box_loc, :].view(-1, K) image_size = per_anchors.size boxlist = BoxList(detections, image_size, mode="xyxy") if per_class is not None: boxlist.add_field("labels", per_class) boxlist.add_field("scores", per_box_cls) boxlist.add_field("coeffs", per_coeffs) boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, self.min_size) results.append(boxlist) return results
def forward_for_single_feature_map(self, anchors, objectness, box_regression, i): """ Arguments: anchors: list[BoxList] objectness: tensor of size N, A, H, W box_regression: tensor of size N, A * 4, H, W """ device = objectness.device N, A, H, W = objectness.shape # put in the same format as anchors objectness = objectness.permute(0, 2, 3, 1).reshape(N, -1) objectness = objectness.sigmoid() box_regression = box_regression.view(N, -1, 4, H, W).permute(0, 3, 4, 1, 2) box_regression = box_regression.reshape(N, -1, 4) num_anchors = A * H * W pre_nms_top_n = min(self.pre_nms_top_n, num_anchors) # import pdb;pdb.set_trace() objectness, topk_idx = objectness.topk(pre_nms_top_n, dim=1, sorted=True) batch_idx = torch.arange(N, device=device)[:, None] box_regression = box_regression[batch_idx, topk_idx] # batch_ = batch_idx.expand([N ,pre_nms_top_n]) image_shapes = [box.size for box in anchors] concat_anchors = torch.cat([a.bbox for a in anchors], dim=0) concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx] proposals = self.box_coder.decode(box_regression.view(-1, 4), concat_anchors.view(-1, 4)) proposals = proposals.view(N, -1, 4) # import pdb;pdb.set_trace() result = [] for j, ( proposal, score, im_shape, topk_id, ) in enumerate(zip( proposals, objectness, image_shapes, topk_idx, )): boxlist = BoxList(proposal, im_shape, mode="xyxy") boxlist.add_field("objectness", score) if self.is_teacher: # boxlist.add_field("bid", batch_[j]) boxlist.add_field("box_reg", box_regression[j]) boxlist.add_field("rpn_topk", topk_id) boxlist.add_field( "rpn_ancher_level", torch.tensor([i] * topk_id.shape[0], device=device)) boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, self.min_size) boxlist = boxlist_nms( boxlist, self.nms_thresh, max_proposals=self.post_nms_top_n, score_field="objectness", ) result.append(boxlist) return result
def forward_for_single_feature_map(self, anchors, objectness, box_regression, cls): """ Arguments: anchors: list[BoxList] objectness: tensor of size N, A, H, W box_regression: tensor of size N, A * 4, H, W """ device = objectness.device N, A, H, W = objectness.shape ### # show heat map ### # import matplotlib.pyplot as plt # import cv2 # import numpy as np # img = cv2.imread("/home/w/workspace/onnx/maskrcnn-benchmark/demo/test_yolo.jpg") # img = cv2.resize(img, (416, 416)) # img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # temp = objectness[:, 0].cpu()[0].numpy() * 255 # temp = temp.astype(np.uint8) # temp = cv2.resize(temp, (416, 416)) # img = cv2.addWeighted(img, 0.5, temp, 0.5, 1) # # plt.imshow(img) # plt.show() ### # show heat map end ### N, AXC, H, W = cls.shape C = int(AXC / A) # put in the same format as anchors objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1) objectness = objectness.sigmoid() cls = permute_and_flatten(cls, N, A, C, H, W) box_regression = permute_and_flatten(box_regression, N, A, 4, H, W) num_anchors = A * H * W if self.onnx_export: from torch.onnx import operators num_anchors = operators.shape_as_tensor(objectness)[1].unsqueeze(0) pre_nms_top_n = torch.min( torch.cat((torch.tensor([self.pre_nms_top_n], dtype=torch.long), num_anchors), 0)) else: pre_nms_top_n = min(self.pre_nms_top_n, num_anchors) objectness, topk_idx = objectness.topk(pre_nms_top_n, dim=1, sorted=True) batch_idx = torch.arange(N, device=device)[:, None] if self.onnx_export: # NOTE: for now only batch == 1 is supported for ONNX export. assert topk_idx.size(0) == 1 topk_idx = topk_idx.squeeze(0) box_regression = box_regression.index_select(1, topk_idx) else: box_regression = box_regression[batch_idx, topk_idx] cls = cls[batch_idx, topk_idx] image_shapes = [box.size for box in anchors] concat_anchors = torch.cat([a.bbox for a in anchors], dim=0) if self.onnx_export: concat_anchors = concat_anchors.reshape(N, -1, 4).index_select( 1, topk_idx) else: concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx] proposals = self.box_coder.decode(box_regression.view(-1, 4), concat_anchors.view(-1, 4)) proposals = proposals.view(N, -1, 4) cls = torch.argmax(cls, -1) + 1 result = [] for proposal, score, c, im_shape in zip(proposals, objectness, cls, image_shapes): boxlist = BoxList(proposal, im_shape, mode="xyxy") boxlist.add_field("scores", score) boxlist.add_field("labels", c) boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, self.min_size, self.onnx_export) boxlist = boxlist_nms( boxlist, self.nms_thresh, max_proposals=self.post_nms_top_n, score_field="scores", ) result.append(boxlist) return result
def forward_for_single_feature_map(self, anchors, objectness, box_regression): """ Arguments: anchors: list[BoxList] objectness: tensor of size N, A, H, W box_regression: tensor of size N, A * 4, H, W """ device = objectness.device N, A, H, W = objectness.shape if cfg.ROTATE and "RETINANET" in cfg.MODEL.BACKBONE.CONV_BODY: # put in the same format as anchors objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1) objectness = objectness.sigmoid() box_regression = permute_and_flatten(box_regression, N, A, 5, H, W) num_anchors = A * H * W pre_nms_top_n = min(self.pre_nms_top_n, num_anchors) objectness, topk_idx = objectness.topk(pre_nms_top_n, dim=1, sorted=True) batch_idx = torch.arange(N, device=device)[:, None] box_regression = box_regression[batch_idx, topk_idx] image_shapes = [box.size for box in anchors] concat_anchors = torch.cat([a.bbox for a in anchors], dim=0) concat_anchors = concat_anchors.reshape(N, -1, 5)[batch_idx, topk_idx] proposals = self.box_coder.decode(box_regression.view(-1, 5), concat_anchors.view(-1, 4)) proposals = proposals.view(N, -1, 5) result = [] for proposal, score, im_shape in zip(proposals, objectness, image_shapes): if cfg.MODEL.RETINANET_DCN_ON: xt, yt, xc, yc, r = proposal.split(1, dim=-1) h = torch.sqrt((xt - xc)**2 + (yt - yc)**2) w = h * torch.exp(r) cost = torch.abs(xt - xc) / h sint = torch.abs(yt - yc) / h bbox_x1 = xt - w * sint bbox_y1 = yt - w * cost bbox_x2 = xt * 2 - bbox_x1 bbox_y2 = yt * 2 - bbox_y1 bbox_x3 = xc * 2 - bbox_x1 bbox_y3 = xc * 2 - bbox_y1 bbox_x4 = xc * 2 - bbox_x2 bbox_y4 = xc * 2 - bbox_y2 proposal = torch.cat((bbox_x1, bbox_y1, bbox_x2, bbox_y2, bbox_x3, bbox_y3, bbox_x4, bbox_y4), dim=0) else: proposal = trans.convert8(proposal) boxlist = BoxList(proposal, im_shape, mode="xy8") boxlist.add_field("objectness", score) boxlist = boxlist.clip_to_image(remove_empty=False) # boxlist = remove_small_boxes(boxlist, self.min_size) boxlist = boxlist_rnms( boxlist, self.nms_thresh, max_proposals=self.post_nms_top_n, score_field="objectness", ) result.append(boxlist) return result else: # put in the same format as anchors objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1) objectness = objectness.sigmoid() box_regression = permute_and_flatten(box_regression, N, A, 4, H, W) num_anchors = A * H * W pre_nms_top_n = min(self.pre_nms_top_n, num_anchors) objectness, topk_idx = objectness.topk(pre_nms_top_n, dim=1, sorted=True) batch_idx = torch.arange(N, device=device)[:, None] box_regression = box_regression[batch_idx, topk_idx] image_shapes = [box.size for box in anchors] concat_anchors = torch.cat([a.bbox for a in anchors], dim=0) concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx] proposals = self.box_coder.decode(box_regression.view(-1, 4), concat_anchors.view(-1, 4)) proposals = proposals.view(N, -1, 4) result = [] for proposal, score, im_shape in zip(proposals, objectness, image_shapes): if cfg.ROTATE: xmin, ymin, xmax, ymax = proposal.split(1, dim=-1) proposal4 = torch.cat((xmin, ymin, xmax, ymax), dim=1) proposal5 = torch.cat( ((xmin + xmax) / 2., (ymin + ymax) / 2., xmax - xmin + 1, ymax - ymin + 1, torch.ones_like(xmin) * (-3.14 / 2) # torch.zeros_like(xmin) ), dim=1) proposal = torch.cat( (xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax), dim=1) boxlist = BoxList(proposal, im_shape, mode="xy854") boxlist.add_field("xyxy", proposal4) boxlist.add_field("xywht", proposal5) boxlist.add_field("objectness", score) boxlist = boxlist.clip_to_image(remove_empty=True) # boxlist = remove_small_boxes(boxlist, self.min_size) boxlist = boxlist_rnms( boxlist, self.nms_thresh, max_proposals=self.post_nms_top_n, score_field="objectness", ) result.append(boxlist) else: boxlist = BoxList(proposal, im_shape, mode="xyxy") boxlist.add_field("objectness", score) boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, self.min_size) boxlist = boxlist_nms( boxlist, self.nms_thresh, max_proposals=self.post_nms_top_n, score_field="objectness", ) result.append(boxlist) return result
def prepare_for_coco_detection_mstest(predictions, dataset): # pdb.set_trace() predictions_s = predictions[0] predictions_m = predictions[1] predictions_l = predictions[2] dataset_s = dataset[0] dataset_m = dataset[1] dataset_l = dataset[2] coco_results = [] # one image. for image_id, predictions in enumerate( zip(predictions_s, predictions_m, predictions_l)): prediction_s = predictions[0] prediction_m = predictions[1] prediction_l = predictions[2] original_id = dataset_l.id_to_img_map[image_id] if len(predictions_l) == 0: continue img_info = dataset_l.get_img_info(image_id) image_width = img_info["width"] image_height = img_info["height"] img_id_json = img_info['id'] # rescale predict bbox to original images size. prediction_s = prediction_s.resize((image_width, image_height)) prediction_m = prediction_m.resize((image_width, image_height)) prediction_l = prediction_l.resize((image_width, image_height)) # get single-scale results from type BoxList. bbox_s = prediction_s.bbox score_s = prediction_s.get_field('scores').unsqueeze(1) label_s = prediction_s.get_field('labels').unsqueeze(1) bbox_m = prediction_m.bbox score_m = prediction_m.get_field('scores').unsqueeze(1) label_m = prediction_m.get_field('labels').unsqueeze(1) bbox_l = prediction_l.bbox score_l = prediction_l.get_field('scores').unsqueeze(1) label_l = prediction_l.get_field('labels').unsqueeze(1) # concat single-scale result and convert to type BoxList. (small, medium, large) min_size = 0 w = prediction_l.size[0] h = prediction_l.size[1] detections = torch.from_numpy(np.row_stack( (bbox_s, bbox_m, bbox_l))).cuda() per_class = torch.from_numpy(np.row_stack( (label_s, label_m, label_l))).cuda() per_class = torch.squeeze(per_class, dim=1) per_box_cls = torch.from_numpy( np.row_stack((score_s, score_m, score_l))).cuda() per_box_cls = torch.squeeze(per_box_cls, dim=1) boxlist = BoxList(detections, (int(w), int(h)), mode="xyxy") boxlist.add_field("labels", per_class) boxlist.add_field("scores", per_box_cls) boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, min_size) # multi-scale results apply NMS. (small, medium, large) nms_method = cfg.TEST.MS_TEST_NMS nms_thresh = cfg.TEST.MS_TEST_NMS_THR num_classes = 81 scores = boxlist.get_field("scores") labels = boxlist.get_field("labels") boxes = boxlist.bbox result = [] # multi-scale test + NMS for j in range(1, num_classes): inds = (labels == j).nonzero().view(-1) scores_j = scores[inds] boxes_j = boxes[inds, :].view(-1, 4) boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy") boxlist_for_class.add_field("scores", scores_j) if nms_method == "nms": boxlist_for_class = boxlist_nms(boxlist_for_class, nms_thresh, score_field="scores") elif nms_method == "soft_nms": boxlist_for_class = boxlist_soft_nms(boxlist_for_class, nms_thresh, score_field="scores") else: print('the nms method is wrong') num_labels = len(boxlist_for_class) boxlist_for_class.add_field( "labels", torch.full((num_labels, ), j, dtype=torch.int64, device=scores.device)) result.append(boxlist_for_class) result = cat_boxlist(result) boxlist = result boxlist = boxlist.convert("xywh") boxes = boxlist.bbox.tolist() scores = boxlist.get_field("scores").tolist() labels = boxlist.get_field("labels").tolist() mapped_labels = [ dataset_l.contiguous_category_id_to_json_id[int(i)] for i in labels ] coco_results.extend([{ "image_id": original_id, "category_id": mapped_labels[k], "bbox": box, "score": scores[k], } for k, box in enumerate(boxes)]) return coco_results
def forward_for_single_feature_map( self, locations, box_cls, box_regression, bezier_regression, centerness, image_sizes, offsets=None): """ Arguments: anchors: list[BoxList] box_cls: tensor of size N, A * C, H, W box_regression: tensor of size N, A * 4, H, W """ N, C, H, W = box_cls.shape # put in the same format as locations box_cls = box_cls.view(N, C, H, W).permute(0, 2, 3, 1) box_cls = box_cls.reshape(N, -1, C).sigmoid() box_regression = box_regression.view(N, 4, H, W).permute(0, 2, 3, 1) box_regression = box_regression.reshape(N, -1, 4) bezier_regression = bezier_regression.view(N, 16, H, W).permute(0, 2, 3, 1) bezier_regression = bezier_regression.reshape(N, -1, 16) centerness = centerness.view(N, 1, H, W).permute(0, 2, 3, 1) centerness = centerness.reshape(N, -1).sigmoid() if offsets is not None: offsets = torch.cat((offsets, mask), dim=1) offsets = offsets.permute(0, 2, 3, 1).reshape(N, H * W, -1) candidate_inds = box_cls > self.pre_nms_thresh pre_nms_top_n = candidate_inds.view(N, -1).sum(1) pre_nms_top_n = pre_nms_top_n.clamp(max=self.pre_nms_top_n) # multiply the classification scores with centerness scores box_cls = box_cls * centerness[:, :, None] results = [] for i in range(N): per_box_cls = box_cls[i] per_candidate_inds = candidate_inds[i] per_box_cls = per_box_cls[per_candidate_inds] per_candidate_nonzeros = per_candidate_inds.nonzero() per_box_loc = per_candidate_nonzeros[:, 0] per_class = per_candidate_nonzeros[:, 1] + 1 per_box_regression = box_regression[i] per_box_regression = per_box_regression[per_box_loc] per_bezier_regression = bezier_regression[i] per_bezier_regression = per_bezier_regression[per_box_loc] per_locations = locations[per_box_loc] per_pre_nms_top_n = pre_nms_top_n[i] if offsets is not None: per_offsets = offsets[i] per_offsets = per_offsets[per_box_loc] if per_candidate_inds.sum().item() > per_pre_nms_top_n.item(): per_box_cls, top_k_indices = \ per_box_cls.topk(per_pre_nms_top_n, sorted=False) per_class = per_class[top_k_indices] per_box_regression = per_box_regression[top_k_indices] per_bezier_regression = per_bezier_regression[top_k_indices] per_locations = per_locations[top_k_indices] if offsets is not None: per_offsets = per_offsets[top_k_indices] detections = torch.stack([ per_locations[:, 0] - per_box_regression[:, 0], per_locations[:, 1] - per_box_regression[:, 1], per_locations[:, 0] + per_box_regression[:, 2], per_locations[:, 1] + per_box_regression[:, 3], ], dim=1) bezier_detections = per_locations[:, [1, 0]].unsqueeze(1) + per_bezier_regression.view(-1, 8, 2) bezier_detections = bezier_detections.view(-1, 16) h, w = image_sizes[i] boxlist = BoxList(detections, (int(w), int(h)), mode="xyxy") boxlist.add_field("labels", per_class) boxlist.add_field("scores", per_box_cls) boxlist.add_field("beziers", bezier_detections) if offsets is not None: boxlist.add_field("offsets", per_offsets[:, :max_len * 2]) boxlist.add_field("rec_masks", per_offsets[:, max_len * 2:].sigmoid()) boxlist.add_field("locations", per_locations) boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, self.min_size) results.append(boxlist) return results
def forward_for_single_feature_map(self, anchors, objectness, box_regression): """ Arguments: anchors: list[BoxList] objectness: tensor of size N, A, H, W 得到N=图片数(batch),A=ratio数,H=该层特征图高,W=该层特征图宽 box_regression: tensor of size N, A * 4, H, W """ device = objectness.device N, A, H, W = objectness.shape # put in the same format as anchors # 在得到的目标特征图上扩充一维,该维度为特定特征图的某一个位置上anchor内是否有目标 # 然后取消掉除FPN层数以外的所有维度,合并到一个维度上,将图片数,高,宽等信息压缩为一维 objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1) # 输出N张图,-1个待回归框,每个框需要1个得分值 objectness = objectness.sigmoid() # 在得到的目标特征图上扩充一维,该维度为特定特征图的某一个位置上anchor的边框信息。 box_regression = permute_and_flatten(box_regression, N, A, 4, H, W) # 输出N张图,-1个待回归框,每个框需要4个回归值 num_anchors = A * H * W pre_nms_top_n = min(self.pre_nms_top_n, num_anchors) # 得到在训练过程中设置的每张图片选取的anchor数(在每个特征图上) # 得到前pre_nms_top_n个目标评分最高的anchor的目标评分以及该anchor在anchor列表中的索引 objectness, topk_idx = objectness.topk(pre_nms_top_n, dim=1, sorted=True) # 初始化图片个数的索引 batch_idx = torch.arange(N, device=device)[:, None] # 得到前pre_nms_top_n个目标评分最高的anchor的边框回归信息!!! box_regression = box_regression[batch_idx, topk_idx] # 获取图片尺寸信息 image_shapes = [box.size for box in anchors] concat_anchors = torch.cat([a.bbox for a in anchors], dim=0) # 得到pre_nms_top_n个目标评分最高的anchor信息!!! concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx] # 利用anchor坐标和回归信息,得到proposal边框 proposals = self.box_coder.decode( box_regression.view(-1, 4), concat_anchors.view(-1, 4) # rpn输出的是'xyxy'格式的 ) # 用实际xyxy坐标和回归值就能得到新的检测框 proposals = proposals.view(N, -1, 4) result = [] for proposal, score, im_shape in zip(proposals, objectness, image_shapes): # 将预测边框保存到BoxList, # 为每一个FPN层的每一张图的所有候选框建立一个BoxList boxlist = BoxList(proposal, im_shape, mode="xyxy") boxlist.add_field("objectness", score) # 将每个anchor的目标评分保存到BoxList boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, self.min_size) boxlist = boxlist_nms( boxlist, self.nms_thresh, max_proposals=self.post_nms_top_n, score_field="objectness", ) result.append(boxlist) return result # rpn输出的是'xyxy'格式的
def forward_for_single_feature_map(self, anchors, box_cls, box_regression): """ Arguments: anchors: list[BoxList] box_cls: tensor of size N, A * C, H, W box_regression: tensor of size N, A * 4, H, W """ device = box_cls.device N, _, H, W = box_cls.shape A = box_regression.size(1) // 4 C = box_cls.size(1) // A # put in the same format as anchors box_cls = permute_and_flatten(box_cls, N, A, C, H, W) box_cls = box_cls.sigmoid() box_regression = permute_and_flatten(box_regression, N, A, 4, H, W) num_anchors = A * H * W if self.imbalanced_decider is None: candidate_inds = box_cls > self.pre_nms_thresh else: candidate_inds = self.imbalanced_decider(box_cls) pre_nms_top_n = candidate_inds.view(N, -1).sum(1) pre_nms_top_n = pre_nms_top_n.clamp(max=self.pre_nms_top_n) results = [] for per_box_cls, per_box_regression, per_pre_nms_top_n, \ per_candidate_inds, per_anchors in zip( box_cls, box_regression, pre_nms_top_n, candidate_inds, anchors): # Sort and select TopN # TODO most of this can be made out of the loop for # all images. # TODO:Yang: Not easy to do. Because the numbers of detections are # different in each image. Therefore, this part needs to be done # per image. per_box_cls = per_box_cls[per_candidate_inds] per_box_cls, top_k_indices = \ per_box_cls.topk(per_pre_nms_top_n, sorted=False) per_candidate_nonzeros = \ per_candidate_inds.nonzero()[top_k_indices, :] per_box_loc = per_candidate_nonzeros[:, 0] per_class = per_candidate_nonzeros[:, 1] per_class += 1 detections = self.box_coder.decode( per_box_regression[per_box_loc, :].view(-1, 4), per_anchors.bbox[per_box_loc, :].view(-1, 4)) boxlist = BoxList(detections, per_anchors.size, mode="xyxy") boxlist.add_field("labels", per_class) boxlist.add_field("scores", per_box_cls) boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, self.min_size) results.append(boxlist) return results
def forward_for_single_feature_map(self, locations, box_cls_set, box_regression, centerness, image_sizes, show_box_cls): """ Arguments: anchors: list[BoxList] box_cls: tensor of size N, A * C, H, W box_regression: tensor of size N, A * 4, H, W """ box_prob_set = [] for _box_cls in np.array(list(box_cls_set.values()))[[2]]: # N, C, H, W = _box_cls.shape _box_cls = _box_cls.view(N, C, H, W).permute(0, 2, 3, 1) box_prob_set.append(_box_cls.reshape(N, -1, C).sigmoid()) box_cls = torch.exp(torch.log(torch.stack(box_prob_set)).mean(dim=0)) # max_score = box_prob_set[-1].max() # box_prob_set[:-1] = [box_prob / box_prob.max() * max_score for box_prob in box_prob_set[:-1]] # box_cls = torch.stack(box_prob_set).max(dim=0)[0] centerness = None # put in the same format as locations box_regression = box_regression.view(N, 4, H, W).permute(0, 2, 3, 1) box_regression = box_regression.reshape(N, -1, 4) if centerness is not None: centerness = centerness.view(N, 1, H, W).permute(0, 2, 3, 1) centerness = centerness.reshape(N, -1).sigmoid() if self.vis_labels: # box_prob_set.extend([box_cls, centerness, centerness[:,:,None]*box_prob_set[-1]]) show_box_cls(box_prob_set, N, H, W, C, self.pre_nms_thresh) candidate_inds = box_cls > self.pre_nms_thresh pre_nms_top_n = candidate_inds.view(N, -1).sum(1) pre_nms_top_n = pre_nms_top_n.clamp(max=self.pre_nms_top_n) # multiply the classification scores with centerness scores if centerness is not None: box_cls = (box_cls * centerness[:, :, None]) results = [] for i in range(N): per_box_cls = box_cls[i] per_candidate_inds = candidate_inds[i] per_box_cls = per_box_cls[per_candidate_inds] per_candidate_nonzeros = per_candidate_inds.nonzero() per_box_loc = per_candidate_nonzeros[:, 0] per_class = per_candidate_nonzeros[:, 1] + 1 per_box_regression = box_regression[i] per_box_regression = per_box_regression[per_box_loc] per_locations = locations[per_box_loc] per_pre_nms_top_n = pre_nms_top_n[i] if per_candidate_inds.sum().item() > per_pre_nms_top_n.item(): per_box_cls, top_k_indices = \ per_box_cls.topk(per_pre_nms_top_n, sorted=False) per_class = per_class[top_k_indices] per_box_regression = per_box_regression[top_k_indices] per_locations = per_locations[top_k_indices] detections = torch.stack([ per_locations[:, 0] - per_box_regression[:, 0], per_locations[:, 1] - per_box_regression[:, 1], per_locations[:, 0] + per_box_regression[:, 2], per_locations[:, 1] + per_box_regression[:, 3], ], dim=1) h, w = image_sizes[i] boxlist = BoxList(detections, (int(w), int(h)), mode="xyxy") boxlist.add_field("labels", per_class) boxlist.add_field("scores", per_box_cls) boxlist.add_field("det_locations", per_locations) boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, self.min_size) results.append(boxlist) return results
def forward_for_single_feature_map(self, anchors, box_cls, box_regression, pre_nms_thresh): """ Arguments: anchors: list[BoxList] box_cls: tensor of size N, A * C, H, W box_regression: tensor of size N, A * 4, H, W """ device = box_cls.device N, _, H, W = box_cls.shape A = int(box_regression.size(1) / 4) C = int(box_cls.size(1) / A) # put in the same format as anchors box_cls = box_cls.view(N, -1, C, H, W).permute(0, 3, 4, 1, 2) box_cls = box_cls.reshape(N, -1, C) box_cls = box_cls.sigmoid() box_regression = box_regression.view(N, -1, 4, H, W) box_regression = box_regression.permute(0, 3, 4, 1, 2) box_regression = box_regression.reshape(N, -1, 4) num_anchors = A * H * W results = [[] for _ in range(N)] candidate_inds = box_cls > pre_nms_thresh if candidate_inds.sum().item() == 0: empty_boxlists = [] for a in anchors: empty_boxlist = BoxList(torch.Tensor(0, 4).to(device), a.size) empty_boxlist.add_field("labels", torch.LongTensor([]).to(device)) empty_boxlist.add_field("scores", torch.Tensor([]).to(device)) empty_boxlists.append(empty_boxlist) return empty_boxlists pre_nms_top_n = candidate_inds.view(N, -1).sum(1) pre_nms_top_n = pre_nms_top_n.clamp(max=self.pre_nms_top_n) for batch_idx, (per_box_cls, per_box_regression, per_pre_nms_top_n, \ per_candidate_inds, per_anchors) in enumerate(zip( box_cls, box_regression, pre_nms_top_n, candidate_inds, anchors)): # Sort and select TopN per_box_cls = per_box_cls[per_candidate_inds] per_box_cls, top_k_indices = \ per_box_cls.topk(per_pre_nms_top_n, sorted=False) per_candidate_nonzeros = \ per_candidate_inds.nonzero()[top_k_indices, :] per_box_loc = per_candidate_nonzeros[:, 0] per_class = per_candidate_nonzeros[:, 1] per_class += 1 detections = self.box_coder.decode( per_box_regression[per_box_loc, :].view(-1, 4), per_anchors.bbox[per_box_loc, :].view(-1, 4)) boxlist = BoxList(detections, per_anchors.size, mode="xyxy") boxlist.add_field("labels", per_class) boxlist.add_field("scores", per_box_cls) boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, self.min_size) results[batch_idx] = boxlist return results
def forward_for_single_feature_map(self, anchors, box_cls, box_regression): """ Arguments: anchors: list[BoxList] box_cls: tensor of size N, A * C, H, W box_regression: tensor of size N, A * 4, H, W """ device = box_cls.device N, _, H, W = box_cls.shape A = box_regression.size(1) // 4 C = box_cls.size(1) // A # put in the same format as anchors (N, H*W*A, C) box_cls = permute_and_flatten(box_cls, N, A, C, H, W) box_cls = box_cls.sigmoid() # box regression is class-agnostic box_regression = permute_and_flatten(box_regression, N, A, 4, H, W) # Q: Seems redundant? box_regression = box_regression.reshape(N, -1, 4) num_anchors = A * H * W candidate_inds = box_cls > self.pre_nms_thresh pre_nms_top_n = candidate_inds.view(N, -1).sum(1) pre_nms_top_n = pre_nms_top_n.clamp(max=self.pre_nms_top_n) results = [] for per_box_cls, per_box_regression, per_pre_nms_top_n, \ per_candidate_inds, per_anchors in zip( box_cls, box_regression, pre_nms_top_n, candidate_inds, anchors): # Sort and select TopN # TODO most of this can be made out of the loop for # all images. # TODO:Yang: Not easy to do. Because the numbers of detections are # different in each image. Therefore, this part needs to be done # per image. # After the following line, per_box_cls becomes a vector per_box_cls = per_box_cls[per_candidate_inds] per_box_cls, top_k_indices = \ per_box_cls.topk(per_pre_nms_top_n, sorted=False) # per_candidate_inds (H*W*A, C), per_candidate_nonzeros (top_k_out_of_H*W*A*C, 2) # Note that: the boxes regressed from the same anchor with different class labels are treated as multiple dectections. per_candidate_nonzeros = \ per_candidate_inds.nonzero()[top_k_indices, :] # Q: What's per_box_loc and per_class? # A: The index of the anchor and the index of the class, # so that the confidence of the class per_class[i] of the anchor at per_box_loc[i] is high enough to survive. # Note that: index_of_class + 1 = class_label. per_box_loc = per_candidate_nonzeros[:, 0] per_class = per_candidate_nonzeros[:, 1] per_class += 1 detections = self.box_coder.decode( per_box_regression[per_box_loc, :].view(-1, 4), per_anchors.bbox[per_box_loc, :].view(-1, 4)) image_size = per_anchors.size boxlist = BoxList(detections, image_size, mode="xyxy") boxlist.add_field("labels", per_class) boxlist.add_field("scores", per_box_cls) boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, self.min_size) results.append(boxlist) return results