def __call__(self, loc, score, anchor, img_size, scale=1.): # NOTE: when test, remember # faster_rcnn.eval() # to set self.training = False if self.parent_model.training: n_pre_nms = self.n_train_pre_nms n_post_nms = self.n_train_post_nms else: n_pre_nms = self.n_test_pre_nms n_post_nms = self.n_test_post_nms # Convert anchors into proposal via bbox transformations. roi = loc2bbox(anchor, loc) # Clip predicted boxes to image. roi[:, slice(0, 4, 2)] = np.clip(roi[:, slice(0, 4, 2)], 0, img_size[0]) roi[:, slice(1, 4, 2)] = np.clip(roi[:, slice(1, 4, 2)], 0, img_size[1]) # Remove predicted boxes with either height or width < threshold. min_size = self.min_size * scale hs = roi[:, 2] - roi[:, 0] ws = roi[:, 3] - roi[:, 1] keep = np.where((hs >= min_size) & (ws >= min_size))[0] roi = roi[keep, :] score = score[keep] # Sort all (proposal, score) pairs by score from highest to lowest. # Take top pre_nms_topN (e.g. 6000). order = score.ravel().argsort()[::-1] if n_pre_nms > 0: order = order[:n_pre_nms] roi = roi[order, :] score = score[order] # Apply nms (e.g. threshold = 0.7). # Take after_nms_topN (e.g. 300). # unNOTE: somthing is wrong here! # TODO: remove cuda.to_gpu keep = nms( t.from_numpy(roi).cuda(), t.from_numpy(score).cuda(), self.nms_thresh ) if n_post_nms > 0: keep = keep[:n_post_nms] roi = roi[keep.cpu().numpy()] return roi
def predict(self, x, size): self.use_preset('evaluate') bboxes = list() labels = list() scores = list() #print(x.shape, size) scale = x.shape[3] / size[1] roi_cls_loc, roi_scores, rois, _ = self(x, scale=scale) roi_score = roi_scores.data roi_cls_loc = roi_cls_loc.data roi = to_tensor(rois) / scale mean = torch.Tensor((0., 0., 0., 0.)).cuda().repeat(21)[None] std = torch.Tensor((0.1, 0.1, 0.2, 0.2)).cuda().repeat(21)[None] roi_cls_loc = (roi_cls_loc * std + mean) roi_cls_loc = roi_cls_loc.view(-1, 21, 4) roi = roi.view(-1, 1, 4).expand_as(roi_cls_loc) cls_bbox = loc2bbox( to_numpy(roi).reshape((-1, 4)), to_numpy(roi_cls_loc).reshape((-1, 4))) cls_bbox = to_tensor(cls_bbox) cls_bbox = cls_bbox.view(-1, 21 * 4) cls_bbox[:, 0::2] = (cls_bbox[:, 0::2]).clamp(min=0, max=size[0]) cls_bbox[:, 1::2] = (cls_bbox[:, 1::2]).clamp(min=0, max=size[1]) prob = to_numpy(F.softmax(to_tensor(roi_score), dim=1)) raw_cls_bbox = to_numpy(cls_bbox) raw_prob = to_numpy(prob) bbox, label, score = self._suppress(raw_cls_bbox, raw_prob) bboxes.append(bbox) labels.append(label) scores.append(score) return bboxes, labels, scores
def __call__(self, loc, score, anchor, img_size, scale=1.): """input should be ndarray Propose RoIs. Inputs :obj:`loc, score, anchor` refer to the same anchor when indexed by the same index. On notations, :math:`R` is the total number of anchors. This is equal to product of the height and the width of an image and the number of anchor bases per pixel. Type of the output is same as the inputs. Args: loc (array): Predicted offsets and scaling to anchors. Its shape is :math:`(R, 4)`. score (array): Predicted foreground probability for anchors. Its shape is :math:`(R,)`. anchor (array): Coordinates of anchors. Its shape is :math:`(R, 4)`. img_size (tuple of ints): A tuple :obj:`height, width`, which contains image size after scaling. scale (float): The scaling factor used to scale an image after reading it from a file. Returns: array: An array of coordinates of proposal boxes. Its shape is :math:`(S, 4)`. :math:`S` is less than :obj:`self.n_test_post_nms` in test time and less than :obj:`self.n_train_post_nms` in train time. :math:`S` depends on the size of the predicted bounding boxes and the number of bounding boxes discarded by NMS. """ # NOTE: when test, remember # faster_rcnn.eval() # to set self.traing = False if self.parent_model.training: n_pre_nms = self.n_train_pre_nms n_post_nms = self.n_train_post_nms else: n_pre_nms = self.n_test_pre_nms n_post_nms = self.n_test_post_nms # Convert anchors into proposal via bbox transformations. # roi = loc2bbox(anchor, loc) roi = loc2bbox(anchor, loc) # Clip predicted boxes to image. roi[:, slice(0, 4, 2)] = np.clip( roi[:, slice(0, 4, 2)], 0, img_size[0]) roi[:, slice(1, 4, 2)] = np.clip( roi[:, slice(1, 4, 2)], 0, img_size[1]) # Remove predicted boxes with either height or width < threshold. min_size = self.min_size * scale hs = roi[:, 2] - roi[:, 0] ws = roi[:, 3] - roi[:, 1] keep = np.where((hs >= min_size) & (ws >= min_size))[0] roi = roi[keep, :] score = score[keep] # Sort all (proposal, score) pairs by score from highest to lowest. # Take top pre_nms_topN (e.g. 6000). order = score.ravel().argsort()[::-1] if n_pre_nms > 0: order = order[:n_pre_nms] roi = roi[order, :] # Apply nms (e.g. threshold = 0.7). # Take after_nms_topN (e.g. 300). # unNOTE: somthing is wrong here! # TODO: remove cuda.to_gpu keep = non_maximum_suppression( cp.ascontiguousarray(cp.asarray(roi)), thresh=self.nms_thresh) if n_post_nms > 0: keep = keep[:n_post_nms] roi = roi[keep] return roi
def forward(self, x, scale, gt_bboxes, gt_labels, original_size=None): if self.training: img_size = tuple(x.shape[2:]) # Feature extractor from the base network(e.g. VGG16, ResNet) feature = self._extract_features(x) # Region Proposal Network rpn_result = self.rpn(feature, img_size, scale, gt_bboxes[0], gt_labels[0]) roi, gt_roi_loc, gt_roi_label, rpn_loc_loss, rpn_cls_loss = rpn_result # bbox regression & classification roi_loc, roi_score = self._bbox_regression_and_classification(feature, roi) # Faster R-CNN loss n_sample = roi_loc.shape[0] roi_loc = roi_loc.view(n_sample, -1, 4) roi_loc = roi_loc[t.arange(0, n_sample).long().cuda(), at.totensor(gt_roi_label).long()] gt_roi_loc = at.totensor(gt_roi_loc) gt_roi_label = at.totensor(gt_roi_label).long() roi_loc_loss = bbox_regression_loss( roi_loc.contiguous(), gt_roi_loc, gt_roi_label.data, self.roi_sigma ) roi_cls_loss = F.cross_entropy(roi_score, gt_roi_label.cuda()) # Stack losses losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss] losses = losses + [sum(losses)] return LossTuple(*losses) else: with t.no_grad(): x = at.totensor(x).float() img_size = tuple(x.shape[2:]) # Feature extractor from the base network(e.g. VGG16, ResNet) feature = self._extract_features(x) # Region Proposal Network roi = self.rpn(feature, img_size, scale, None, None) # bbox regression & classification roi_loc, roi_score = self._bbox_regression_and_classification(feature, roi) roi_loc = roi_loc.data roi_score = roi_score.data roi = at.totensor(roi) / scale # Convert predictions to bounding boxes in image coordinates. # Bounding boxes are scaled to the scale of the input images. mean = t.tensor(self.loc_normalize_mean).cuda(). \ repeat(self.n_class)[None] std = t.tensor(self.loc_normalize_std).cuda(). \ repeat(self.n_class)[None] roi_loc = (roi_loc * std + mean) roi_loc = roi_loc.view(-1, self.n_class, 4) roi = roi.view(-1, 1, 4).expand_as(roi_loc) bbox = loc2bbox(at.tonumpy(roi).reshape(-1, 4), at.tonumpy(roi_loc).reshape(-1, 4)) bbox = at.totensor(bbox) bbox = bbox.view(-1, self.n_class * 4) # clip bbox bbox[:, 0::2] = bbox[:, 0::2].clamp(min=0, max=original_size[0]) bbox[:, 1::2] = bbox[:, 1::2].clamp(min=0, max=original_size[1]) prob = F.softmax(at.totensor(roi_score), dim=1) bbox, label, score = self._suppress(bbox, prob) return bbox, label, score