Пример #1
0
    def compute_object_mislabeling_gradient(self, x, detections):
        x_local = x.copy() * 255

        x_tensor = t.from_numpy(preprocess(x_local[0].transpose(
            (2, 0, 1))))[None].cuda().float()
        x_tensor.requires_grad = True

        _bboxes = t.from_numpy(detections[np.newaxis, :,
                                          [-3, -4, -1, -2]]).float()
        _labels = t.from_numpy(detections[np.newaxis, :, 0]).int()
        _scale = at.scalar(np.asarray([1.]))

        losses = self.forward(x_tensor, _bboxes, _labels, _scale)
        self.optimizer.zero_grad()
        self.faster_rcnn.zero_grad()
        losses.object_mislabeling_loss.backward()
        return x_tensor.grad.data.cpu().numpy().transpose((0, 2, 3, 1))
Пример #2
0
    def compute_object_fabrication_gradient(self, x, detections=None):
        x_local = x.copy() * 255

        x_tensor = t.from_numpy(preprocess(x_local[0].transpose(
            (2, 0, 1))))[None].cuda().float()
        x_tensor.requires_grad = True

        _bboxes = t.from_numpy(np.zeros((1, 1, 4))).float()
        _labels = t.from_numpy(np.zeros((1, 1))).int()
        _scale = at.scalar(np.asarray([1.]))

        losses = self.forward(x_tensor, _bboxes, _labels, _scale)
        self.optimizer.zero_grad()
        self.faster_rcnn.zero_grad()
        losses.object_fabrication_loss.backward()

        return x_tensor.grad.data.cpu().numpy().transpose((0, 2, 3, 1))
Пример #3
0
    def compute_object_untargeted_gradient(self, x, detections):
        x_local = x.copy() * 255

        x_tensor = t.from_numpy(preprocess(x_local[0].transpose(
            (2, 0, 1))))[None].cuda().float()
        x_tensor.requires_grad = True

        if detections is not None and len(detections) > 0:
            _bboxes = t.from_numpy(detections[np.newaxis, :,
                                              [-3, -4, -1, -2]]).float()
            _labels = t.from_numpy(detections[np.newaxis, :, 0]).int()
        else:
            _bboxes = t.from_numpy(np.zeros((1, 1, 4))).float()
            _labels = t.from_numpy(np.zeros((1, 1))).int()
        _scale = at.scalar(np.asarray([1.]))

        losses = self.forward(x_tensor, _bboxes, _labels, _scale)
        self.optimizer.zero_grad()
        self.faster_rcnn.zero_grad()
        if len(detections) > 0:
            losses.object_untargeted_loss.backward()
        else:
            losses.object_fabrication_loss.backward()
        return x_tensor.grad.data.cpu().numpy().transpose((0, 2, 3, 1))
Пример #4
0
    def predict(self, imgs, score_thresh, nms_thresh):
        """Detect objects from images.

        This method predicts objects for each image.

        Args:
            imgs (iterable of numpy.ndarray): Arrays holding images.
                All images are in CHW and RGB format
                and the range of their value is :math:`[0, 255]`.

        Returns:
           tuple of lists:
           This method returns a tuple of three lists,
           :obj:`(bboxes, labels, scores)`.

           * **bboxes**: A list of float arrays of shape :math:`(R, 4)`, \
               where :math:`R` is the number of bounding boxes in a image. \
               Each bounding box is organized by \
               :math:`(y_{min}, x_{min}, y_{max}, x_{max})` \
               in the second axis.
           * **labels** : A list of integer arrays of shape :math:`(R,)`. \
               Each value indicates the class of the bounding box. \
               Values are in range :math:`[0, L - 1]`, where :math:`L` is the \
               number of the foreground classes.
           * **scores** : A list of float arrays of shape :math:`(R,)`. \
               Each value indicates how confident the prediction is.

        """
        self.eval()
        self.score_thresh = score_thresh
        self.nms_thresh = nms_thresh
        prepared_imgs = list()
        sizes = list()
        for img in imgs:
            size = img.shape[1:]
            img = preprocess(at.tonumpy(img))
            prepared_imgs.append(img)
            sizes.append(size)
        bboxes = list()
        labels = list()
        scores = list()
        logits = list()
        for img, size in zip(prepared_imgs, sizes):
            img = at.totensor(img[None]).float()
            scale = img.shape[3] / size[1]
            roi_cls_loc, roi_scores, rois, _ = self(img, scale=scale)
            # We are assuming that batch size is 1.
            roi_score = roi_scores.data
            roi_cls_loc = roi_cls_loc.data
            roi = at.totensor(rois) / scale

            # Convert predictions to bounding boxes in image coordinates.
            # Bounding boxes are scaled to the scale of the input images.
            mean = t.Tensor(self.loc_normalize_mean).cuda().repeat(
                self.n_class)[None]
            std = t.Tensor(self.loc_normalize_std).cuda().repeat(
                self.n_class)[None]

            roi_cls_loc = (roi_cls_loc * std + mean)
            roi_cls_loc = roi_cls_loc.view(-1, self.n_class, 4)
            roi = roi.view(-1, 1, 4).expand_as(roi_cls_loc)
            cls_bbox = loc2bbox(
                at.tonumpy(roi).reshape((-1, 4)),
                at.tonumpy(roi_cls_loc).reshape((-1, 4)))
            cls_bbox = at.totensor(cls_bbox)
            cls_bbox = cls_bbox.view(-1, self.n_class * 4)
            # clip bounding box
            cls_bbox[:, 0::2] = (cls_bbox[:, 0::2]).clamp(min=0, max=size[0])
            cls_bbox[:, 1::2] = (cls_bbox[:, 1::2]).clamp(min=0, max=size[1])

            prob = at.tonumpy(F.softmax(at.totensor(roi_score), dim=1))
            logit_ = at.tonumpy(at.totensor(roi_score))

            raw_cls_bbox = at.tonumpy(cls_bbox)
            raw_prob = at.tonumpy(prob)
            raw_logit = at.tonumpy(logit_)

            bbox, label, score, logit = self._suppress(raw_cls_bbox, raw_prob,
                                                       raw_logit)
            bboxes.append(bbox)
            labels.append(label)
            scores.append(score)
            logits.append(logit)

        self.use_preset('evaluate')
        self.train()
        return bboxes, labels, scores, logits