def nms_boxes(self, obj_dists, rois, box_deltas, im_sizes): """ Performs NMS on the boxes :param obj_dists: [#rois, #classes], ex:[4000+, 151] :param rois: [#rois, 5], ex:[4000+, 5] :param box_deltas: [#rois, #classes, 4] :param im_sizes: sizes of images [6,3] :return nms_inds [#nms], ex: #nms=384 nms_scores [#nms] nms_labels [#nms] nms_boxes_assign [#nms, 4] nms_boxes [#nms, #classes, 4]. classid=0 is the box prior. """ # Now Converts "deltas" (predicted by the network) along with prior boxes into (x1, y1, x2, y2) representation. # box deltas is (num_rois, num_classes, 4) but rois is only #(num_rois, 4) # boxes = bbox_preds([#rois * 151, 4]) = [#rois, 151, 4] boxes = bbox_preds( rois[:, None, 1:].expand_as(box_deltas).contiguous().view(-1, 4), box_deltas.view(-1, 4)).view(*box_deltas.size()) inds = rois[:, 0].long().contiguous() dets = [] # Clip the boxes and get the best N dets per image. for i, s, e in enumerate_by_image(inds.data): h, w = im_sizes[i, :2] boxes[s:e, :, 0].data.clamp_(min=0, max=w - 1) boxes[s:e, :, 1].data.clamp_(min=0, max=h - 1) boxes[s:e, :, 2].data.clamp_(min=0, max=w - 1) boxes[s:e, :, 3].data.clamp_(min=0, max=h - 1) d_filtered = filter_det( F.softmax(obj_dists[s:e], 1), boxes[s:e], start_ind=s, nms_filter_duplicates=self.nms_filter_duplicates, max_per_img=self.max_per_img, thresh=self.thresh, ) if d_filtered is not None: dets.append(d_filtered) # dets is a list: len is 6 (images); each image has (inds, scores, labels), each len is 64 if len(dets) == 0: print("nothing was detected", flush=True) return None nms_inds, nms_scores, nms_labels = [ torch.cat(x, 0) for x in zip(*dets) ] # [384] twod_inds = nms_inds * boxes.size(1) + nms_labels.data nms_boxes_assign = boxes.view(-1, 4)[twod_inds] # nms_boxes: [384,151,4], the first dim of 151 is not "0" background class, it's rois # rois[:, 1:][nms_inds][:, None].shape: [384, 1, 4]; boxes[nms_inds][:, 1:]: [384,150,4] nms_boxes = torch.cat( (rois[:, 1:][nms_inds][:, None], boxes[nms_inds][:, 1:]), 1) return nms_inds, nms_scores, nms_labels, nms_boxes_assign, nms_boxes, inds[ nms_inds]
def nms_boxes(self, obj_dists, rois, box_deltas, im_sizes): """ Performs NMS on the boxes :param obj_dists: [#rois, #classes] :param rois: [#rois, 5] :param box_deltas: [#rois, #classes, 4] :param im_sizes: sizes of images :return nms_inds [#nms] nms_scores [#nms] nms_labels [#nms] nms_boxes_assign [#nms, 4] nms_boxes [#nms, #classes, 4]. classid=0 is the box prior. """ # Now produce the boxes # box deltas is (num_rois, num_classes, 4) but rois is only #(num_rois, 4) boxes = bbox_preds( rois[:, None, 1:].expand_as(box_deltas).contiguous().view(-1, 4), box_deltas.view(-1, 4)).view(*box_deltas.size()) # Clip the boxes and get the best N dets per image. inds = rois[:, 0].long().contiguous() dets = [] for i, s, e in enumerate_by_image(inds.data): h, w = im_sizes[i, :2] boxes[s:e, :, 0].data.clamp_(min=0, max=w - 1) boxes[s:e, :, 1].data.clamp_(min=0, max=h - 1) boxes[s:e, :, 2].data.clamp_(min=0, max=w - 1) boxes[s:e, :, 3].data.clamp_(min=0, max=h - 1) d_filtered = filter_det( F.softmax(obj_dists[s:e], 1), boxes[s:e], start_ind=s, nms_filter_duplicates=self.nms_filter_duplicates, max_per_img=self.max_per_img, thresh=self.thresh, ) if d_filtered is not None: dets.append(d_filtered) if len(dets) == 0: print("nothing was detected", flush=True) return None nms_inds, nms_scores, nms_labels = [ torch.cat(x, 0) for x in zip(*dets) ] twod_inds = nms_inds * boxes.size(1) + nms_labels.data nms_boxes_assign = boxes.view(-1, 4)[twod_inds] nms_boxes = torch.cat( (rois[:, 1:][nms_inds][:, None], boxes[nms_inds][:, 1:]), 1) return nms_inds, nms_scores, nms_labels, nms_boxes_assign, nms_boxes, inds[ nms_inds]
def roi_proposals(self, fmap, im_sizes, nms_thresh=0.7, pre_nms_topn=12000, post_nms_topn=2000): """ :param fmap: [batch_size, IM_SIZE/16, IM_SIZE/16, A, 6] :param im_sizes: [batch_size, 3] numpy array of (h, w, scale) :return: ROIS: shape [a <=post_nms_topn, 5] array of ROIS. """ # print("*** RPNHead.roi_proposals ***") # print("pre_nms_topn", pre_nms_topn) # 6000 # print("post_nms_topn", post_nms_topn) # 1000 class_fmap = fmap[:, :, :, :, :2].contiguous() # GET THE GOOD BOXES AYY LMAO :') class_preds = F.softmax(class_fmap, 4)[..., 1].data.contiguous() box_fmap = fmap[:, :, :, :, 2:].data.contiguous() anchor_stacked = torch.cat([self.anchors[None]] * fmap.size(0), 0) box_preds = bbox_preds(anchor_stacked.view(-1, 4), box_fmap.view(-1, 4)).view(*box_fmap.size()) for i, (h, w, scale) in enumerate(im_sizes): # Zero out all the bad boxes h, w, A, 4 h_end = int(h) // self.stride w_end = int(w) // self.stride if h_end < class_preds.size(1): class_preds[i, h_end:] = -0.01 if w_end < class_preds.size(2): class_preds[i, :, w_end:] = -0.01 # and clamp the others box_preds[i, :, :, :, 0].clamp_(min=0, max=w - 1) box_preds[i, :, :, :, 1].clamp_(min=0, max=h - 1) box_preds[i, :, :, :, 2].clamp_(min=0, max=w - 1) box_preds[i, :, :, :, 3].clamp_(min=0, max=h - 1) sizes = center_size(box_preds.view(-1, 4)) class_preds.view(-1)[(sizes[:, 2] < 4) | (sizes[:, 3] < 4)] = -0.01 return filter_roi_proposals( box_preds.view(-1, 4), class_preds.view(-1), boxes_per_im=np.array([np.prod(box_preds.size()[1:-1])] * fmap.size(0)), nms_thresh=nms_thresh, pre_nms_topn=pre_nms_topn, post_nms_topn=post_nms_topn)