def compute_object_mislabeling_gradient(self, x, detections): x_local = x.copy() * 255 x_tensor = t.from_numpy(preprocess(x_local[0].transpose( (2, 0, 1))))[None].cuda().float() x_tensor.requires_grad = True _bboxes = t.from_numpy(detections[np.newaxis, :, [-3, -4, -1, -2]]).float() _labels = t.from_numpy(detections[np.newaxis, :, 0]).int() _scale = at.scalar(np.asarray([1.])) losses = self.forward(x_tensor, _bboxes, _labels, _scale) self.optimizer.zero_grad() self.faster_rcnn.zero_grad() losses.object_mislabeling_loss.backward() return x_tensor.grad.data.cpu().numpy().transpose((0, 2, 3, 1))
def compute_object_fabrication_gradient(self, x, detections=None): x_local = x.copy() * 255 x_tensor = t.from_numpy(preprocess(x_local[0].transpose( (2, 0, 1))))[None].cuda().float() x_tensor.requires_grad = True _bboxes = t.from_numpy(np.zeros((1, 1, 4))).float() _labels = t.from_numpy(np.zeros((1, 1))).int() _scale = at.scalar(np.asarray([1.])) losses = self.forward(x_tensor, _bboxes, _labels, _scale) self.optimizer.zero_grad() self.faster_rcnn.zero_grad() losses.object_fabrication_loss.backward() return x_tensor.grad.data.cpu().numpy().transpose((0, 2, 3, 1))
def compute_object_untargeted_gradient(self, x, detections): x_local = x.copy() * 255 x_tensor = t.from_numpy(preprocess(x_local[0].transpose( (2, 0, 1))))[None].cuda().float() x_tensor.requires_grad = True if detections is not None and len(detections) > 0: _bboxes = t.from_numpy(detections[np.newaxis, :, [-3, -4, -1, -2]]).float() _labels = t.from_numpy(detections[np.newaxis, :, 0]).int() else: _bboxes = t.from_numpy(np.zeros((1, 1, 4))).float() _labels = t.from_numpy(np.zeros((1, 1))).int() _scale = at.scalar(np.asarray([1.])) losses = self.forward(x_tensor, _bboxes, _labels, _scale) self.optimizer.zero_grad() self.faster_rcnn.zero_grad() if len(detections) > 0: losses.object_untargeted_loss.backward() else: losses.object_fabrication_loss.backward() return x_tensor.grad.data.cpu().numpy().transpose((0, 2, 3, 1))
def predict(self, imgs, score_thresh, nms_thresh): """Detect objects from images. This method predicts objects for each image. Args: imgs (iterable of numpy.ndarray): Arrays holding images. All images are in CHW and RGB format and the range of their value is :math:`[0, 255]`. Returns: tuple of lists: This method returns a tuple of three lists, :obj:`(bboxes, labels, scores)`. * **bboxes**: A list of float arrays of shape :math:`(R, 4)`, \ where :math:`R` is the number of bounding boxes in a image. \ Each bounding box is organized by \ :math:`(y_{min}, x_{min}, y_{max}, x_{max})` \ in the second axis. * **labels** : A list of integer arrays of shape :math:`(R,)`. \ Each value indicates the class of the bounding box. \ Values are in range :math:`[0, L - 1]`, where :math:`L` is the \ number of the foreground classes. * **scores** : A list of float arrays of shape :math:`(R,)`. \ Each value indicates how confident the prediction is. """ self.eval() self.score_thresh = score_thresh self.nms_thresh = nms_thresh prepared_imgs = list() sizes = list() for img in imgs: size = img.shape[1:] img = preprocess(at.tonumpy(img)) prepared_imgs.append(img) sizes.append(size) bboxes = list() labels = list() scores = list() logits = list() for img, size in zip(prepared_imgs, sizes): img = at.totensor(img[None]).float() scale = img.shape[3] / size[1] roi_cls_loc, roi_scores, rois, _ = self(img, scale=scale) # We are assuming that batch size is 1. roi_score = roi_scores.data roi_cls_loc = roi_cls_loc.data roi = at.totensor(rois) / scale # Convert predictions to bounding boxes in image coordinates. # Bounding boxes are scaled to the scale of the input images. mean = t.Tensor(self.loc_normalize_mean).cuda().repeat( self.n_class)[None] std = t.Tensor(self.loc_normalize_std).cuda().repeat( self.n_class)[None] roi_cls_loc = (roi_cls_loc * std + mean) roi_cls_loc = roi_cls_loc.view(-1, self.n_class, 4) roi = roi.view(-1, 1, 4).expand_as(roi_cls_loc) cls_bbox = loc2bbox( at.tonumpy(roi).reshape((-1, 4)), at.tonumpy(roi_cls_loc).reshape((-1, 4))) cls_bbox = at.totensor(cls_bbox) cls_bbox = cls_bbox.view(-1, self.n_class * 4) # clip bounding box cls_bbox[:, 0::2] = (cls_bbox[:, 0::2]).clamp(min=0, max=size[0]) cls_bbox[:, 1::2] = (cls_bbox[:, 1::2]).clamp(min=0, max=size[1]) prob = at.tonumpy(F.softmax(at.totensor(roi_score), dim=1)) logit_ = at.tonumpy(at.totensor(roi_score)) raw_cls_bbox = at.tonumpy(cls_bbox) raw_prob = at.tonumpy(prob) raw_logit = at.tonumpy(logit_) bbox, label, score, logit = self._suppress(raw_cls_bbox, raw_prob, raw_logit) bboxes.append(bbox) labels.append(label) scores.append(score) logits.append(logit) self.use_preset('evaluate') self.train() return bboxes, labels, scores, logits