def predict(self, imgs):
        prepared_imgs = list()
        sizes = list()
        print("predicting!")
        for img in imgs:
            size = img.shape[1:]
            img = self.prepare(img.astype(np.float32))
            prepared_imgs.append(img)
            sizes.append(size)
        bboxes = list()
        out_rois = list()
        labels = list()
        scores = list()
        masks = list()
        for img, size in zip(prepared_imgs, sizes):
            with chainer.using_config('train', False), \
                chainer.function.no_backprop_mode():
                img_var = chainer.Variable(self.xp.asarray(img[None]))
                scale = img_var.shape[3] / size[1]
                roi_cls_locs, roi_scores, rois, _, roi_masks = self.__call__(
                    img_var, scale=scale)

            #assuming batch size = 1
            roi_cls_loc = roi_cls_locs.data
            roi_score = roi_scores.data
            roi_mask = F.sigmoid(roi_masks).data
            roi = rois / scale
            mean = self.xp.tile(self.xp.asarray(self.loc_normalize_mean),
                                self.n_class)
            std = self.xp.tile(self.xp.asarray(self.loc_normalize_std),
                               self.n_class)
            roi_cls_loc = (roi_cls_loc * std + mean).astype(np.float32)
            roi_cls_loc = roi_cls_loc.reshape((-1, self.n_class, 4))
            roi = self.xp.broadcast_to(roi[:, None],
                                       roi_cls_loc.shape).reshape((-1, 4))
            cls_bbox = loc2bbox(roi, roi_cls_loc.reshape((-1, 4)))
            cls_bbox = cls_bbox.reshape((-1, self.n_class * 4))
            cls_roi = roi.reshape((-1, self.n_class * 4))
            #clip the bbox
            cls_bbox[:, 0::2] = self.xp.clip(cls_bbox[:, 0::2], 0, size[0])
            cls_bbox[:, 1::2] = self.xp.clip(cls_bbox[:, 1::2], 0, size[1])
            cls_roi[:, 0::2] = self.xp.clip(cls_roi[:, 0::2], 0, size[0])
            cls_roi[:, 1::2] = self.xp.clip(cls_roi[:, 1::2], 0, size[1])

            prob = F.softmax(roi_score).data

            raw_cls_bbox = cuda.to_cpu(cls_bbox)
            raw_cls_roi = cuda.to_cpu(cls_roi)
            raw_prob = cuda.to_cpu(prob)
            raw_mask = cuda.to_cpu(roi_mask)
            bbox, out_roi, label, score, mask = self._suppress(
                raw_cls_bbox, raw_cls_roi, raw_prob, raw_mask)
            bboxes.append(bbox)
            out_rois.append(out_roi)
            labels.append(label)
            scores.append(score)
            masks.append(mask)

        return bboxes, out_rois, labels, scores, masks
示例#2
0
    def _to_bbox_label_score(self, roi_cls_locs, roi_scores, rois, roi_indices, scale, size):  # NOQA
        # We are assuming that batch size is 1.
        roi_cls_loc = roi_cls_locs.data
        roi_score = roi_scores.data
        roi = rois / scale
        roi_index = roi_indices

        # Convert predictions to bounding boxes in image coordinates.
        # Bounding boxes are scaled to the scale of the input images.
        mean = self.xp.tile(self.xp.asarray(self.loc_normalize_mean),
                            self.n_class)
        std = self.xp.tile(self.xp.asarray(self.loc_normalize_std),
                           self.n_class)
        roi_cls_loc = (roi_cls_loc * std + mean).astype(np.float32)
        roi_cls_loc = roi_cls_loc.reshape((-1, self.n_class, 4))
        roi_cls = self.xp.broadcast_to(roi[:, None], roi_cls_loc.shape)
        cls_bbox = loc2bbox(roi_cls.reshape((-1, 4)),
                            roi_cls_loc.reshape((-1, 4)))
        cls_bbox = cls_bbox.reshape((-1, self.n_class * 4))
        # clip bounding box
        cls_bbox[:, 0::2] = self.xp.clip(cls_bbox[:, 0::2], 0, size[0])
        cls_bbox[:, 1::2] = self.xp.clip(cls_bbox[:, 1::2], 0, size[1])
        # clip roi
        roi[:, 0::2] = self.xp.clip(roi[:, 0::2], 0, size[0])
        roi[:, 1::2] = self.xp.clip(roi[:, 1::2], 0, size[1])

        prob = F.softmax(roi_score).data

        roi_index = self.xp.broadcast_to(
            roi_index[:, None], roi_cls_loc.shape[:2])
        raw_cls_bbox = cuda.to_cpu(cls_bbox)
        raw_prob = cuda.to_cpu(prob)

        if self.context:
            n_fg_class = self.n_class - 1
            for l in range(n_fg_class):
                if l not in self.context:
                    raw_prob[:, l + 1] = 0
            raw_prob = raw_prob / raw_prob.sum(axis=0)
        bbox, label, score = self._suppress(raw_cls_bbox, raw_prob)

        bbox_int = np.round(bbox).astype(np.int32)
        bbox_sizes = ((bbox_int[:, 2] - bbox_int[:, 0]) *
                      (bbox_int[:, 3] - bbox_int[:, 1]))
        keep = bbox_sizes > 0
        bbox = bbox[keep]
        label = label[keep]
        score = score[keep]

        if self._detections_per_im > 0:
            indices = np.argsort(score)
            keep = indices >= (len(indices) - self._detections_per_im)
            bbox = bbox[keep]
            label = label[keep]
            score = score[keep]

        return bbox, label, score
示例#3
0
    def __call__(self, x, rois, roi_indices, img_size):
        """Forward the chain.

        We assume that there are :math:`N` batches.

        Args:
            x (~chainer.Variable): 4D image variable.
            rois (array): A bounding box array containing coordinates of
                proposal boxes.  This is a concatenation of bounding box
                arrays from multiple images in the batch.
                Its shape is :math:`(R', 4)`. Given :math:`R_i` proposed
                RoIs from the :math:`i` th image,
                :math:`R' = \\sum _{i=1} ^ N R_i`.
            roi_indices (array): An array containing indices of images to
                which bounding boxes correspond to. Its shape is :math:`(R',)`.
            img_size (tuple of int): A tuple containing image size.

        """
        h = F.relu(self.conv1(x))
        h_cls_seg = self.cls_seg(h)
        h_ag_loc = self.ag_loc(h)

        # PSROI pooling and regression
        roi_ag_seg_scores, roi_ag_locs, roi_cls_scores = self._pool(
            h_cls_seg, h_ag_loc, rois, roi_indices)
        if self.iter2:
            # 2nd Iteration
            # get rois2 for more precise prediction
            roi_ag_locs = roi_ag_locs.array
            mean = self.xp.array(self.loc_normalize_mean)
            std = self.xp.array(self.loc_normalize_std)
            roi_locs = roi_ag_locs[:, 1, :]
            roi_locs = (roi_locs * std + mean).astype(np.float32)
            rois2 = loc2bbox(rois, roi_locs)

            rois2[:, 0::2] = self.xp.clip(rois2[:, 0::2], 0, img_size[0])
            rois2[:, 1::2] = self.xp.clip(rois2[:, 1::2], 0, img_size[1])

            # PSROI pooling and regression
            roi_ag_seg_scores2, roi_ag_locs2, roi_cls_scores2 = self._pool(
                h_cls_seg, h_ag_loc, rois2, roi_indices)

            # concat 1st and 2nd iteration results
            rois = self.xp.concatenate((rois, rois2))
            roi_indices = self.xp.concatenate((roi_indices, roi_indices))
            roi_ag_seg_scores = F.concat(
                (roi_ag_seg_scores, roi_ag_seg_scores2), axis=0)
            roi_ag_locs = F.concat(
                (roi_ag_locs, roi_ag_locs2), axis=0)
            roi_cls_scores = F.concat(
                (roi_cls_scores, roi_cls_scores2), axis=0)
        return roi_ag_seg_scores, roi_ag_locs, roi_cls_scores, \
            rois, roi_indices
示例#4
0
    def predict_each_box(self, imgs):
        prepared_imgs = []
        sizes = []
        for img in imgs:
            size = img.shape[1:]
            img = self.prepare(img.astype(np.float32))
            prepared_imgs.append(img)
            sizes.append(size)

        bboxes = []
        labels = []
        scores = []
        for img, size in zip(prepared_imgs, sizes):
            with chainer.using_config('train', False), \
                    chainer.function.no_backprop_mode():
                img_var = chainer.Variable(self.xp.asarray(img[None]))
                scale = img_var.shape[3] / size[1]
                roi_cls_locs, roi_scores, rois, _ = self.forward(
                    img_var, scales=[scale])
            # We are assuming that batch size is 1.
            roi_cls_loc = roi_cls_locs.array
            roi_score = roi_scores.array
            roi = rois / scale

            # Convert predictions to bounding boxes in image coordinates.
            # Bounding boxes are scaled to the scale of the input images.
            mean = self.xp.tile(self.xp.asarray(self.loc_normalize_mean),
                                self.n_class)
            std = self.xp.tile(self.xp.asarray(self.loc_normalize_std),
                               self.n_class)
            roi_cls_loc = (roi_cls_loc * std + mean).astype(np.float32)
            roi_cls_loc = roi_cls_loc.reshape((-1, self.n_class, 4))
            roi = self.xp.broadcast_to(roi[:, None], roi_cls_loc.shape)
            cls_bbox = loc2bbox(roi.reshape((-1, 4)),
                                roi_cls_loc.reshape((-1, 4)))
            cls_bbox = cls_bbox.reshape((-1, self.n_class * 4))
            # clip bounding box
            cls_bbox[:, 0::2] = self.xp.clip(cls_bbox[:, 0::2], 0, size[0])
            cls_bbox[:, 1::2] = self.xp.clip(cls_bbox[:, 1::2], 0, size[1])
            #print(roi_score)
            prob = chainer.functions.softmax(roi_score).array

            raw_cls_bbox = cuda.to_cpu(cls_bbox)
            raw_prob = cuda.to_cpu(prob)

            bbox, label, prob = self._suppress_each_box(raw_cls_bbox, raw_prob)
            bboxes.append(bbox)
            labels.append(label)
            scores.append(prob)

        return bboxes, labels, scores
示例#5
0
    def __call__(self,
                 x,
                 rois,
                 roi_indices,
                 img_size,
                 iter2,
                 gt_roi_labels=None):
        roi_indices = roi_indices.astype(np.float32)
        indices_and_rois = self.xp.concatenate((roi_indices[:, None], rois),
                                               axis=1)

        h = F.relu(self.psroi_conv1(x))
        h_cls_seg = self.psroi_conv2(h)
        h_locs = self.psroi_conv3(h)

        # PSROI pooling and regression
        roi_seg_scores, roi_cls_locs, roi_cls_scores = self._pool(
            indices_and_rois, h_cls_seg, h_locs, gt_roi_labels=gt_roi_labels)
        if iter2:
            # 2nd Iteration
            # get rois2 for more precise prediction
            roi_cls_locs = roi_cls_locs.array
            roi_locs = roi_cls_locs[:, 1, :]
            mean = self.xp.array(self.loc_normalize_mean, np.float32)
            std = self.xp.array(self.loc_normalize_std, np.float32)
            roi_locs = roi_locs * std + mean
            rois2 = loc2bbox(rois, roi_locs)
            H, W = img_size
            rois2[:, 0::2] = self.xp.clip(rois2[:, 0::2], 0, H)
            rois2[:, 1::2] = self.xp.clip(rois2[:, 1::2], 0, W)

            # PSROI pooling and regression
            indices_and_rois2 = self.xp.concatenate(
                (roi_indices[:, None], rois2), axis=1)
            roi_seg_scores2, roi_cls_locs2, roi_cls_scores2 = self._pool(
                indices_and_rois2,
                h_cls_seg,
                h_locs,
                gt_roi_labels=gt_roi_labels)

            # concat 1st and 2nd iteration results
            rois = self.xp.concatenate((rois, rois2))
            roi_indices = self.xp.concatenate((roi_indices, roi_indices))
            roi_cls_scores = F.concat((roi_cls_scores, roi_cls_scores2),
                                      axis=0)
            roi_cls_locs = F.concat((roi_cls_locs, roi_cls_locs2), axis=0)
            roi_seg_scores = F.concat((roi_seg_scores, roi_seg_scores2),
                                      axis=0)
        return rois, roi_indices, roi_seg_scores, roi_cls_locs, roi_cls_scores
示例#6
0
    def predict(self, imgs):
        """Detect objects from images.

        This method predicts objects for each image.

        Args:
            imgs (iterable of numpy.ndarray): Arrays holding images.
                All images are in CHW and RGB format
                and the range of their value is :math:`[0, 255]`.

        Returns:
           tuple of lists:
           This method returns a tuple of three lists,
           :obj:`(bboxes, labels, scores)`.

           * **bboxes**: A list of float arrays of shape :math:`(R, 4)`, \
               where :math:`R` is the number of bounding boxes in a image. \
               Each bounding box is organized by \
               :math:`(y_{min}, x_{min}, y_{max}, x_{max})` \
               in the second axis.
           * **labels** : A list of integer arrays of shape :math:`(R,)`. \
               Each value indicates the class of the bounding box. \
               Values are in range :math:`[0, L - 1]`, where :math:`L` is the \
               number of the foreground classes.
           * **scores** : A list of float arrays of shape :math:`(R,)`. \
               Each value indicates how confident the prediction is.

        """
        prepared_imgs = []
        sizes = []
        for img in imgs:
            size = img.shape[1:]
            img = self.prepare(img.astype(np.float32))
            prepared_imgs.append(img)
            sizes.append(size)

        bboxes = []
        labels = []
        scores = []
        for img, size in zip(prepared_imgs, sizes):
            with chainer.using_config('train', False), \
                    chainer.function.no_backprop_mode():
                img_var = chainer.Variable(self.xp.asarray(img[None]))
                scale = img_var.shape[3] / size[1]
                roi_cls_locs, roi_scores, rois, _ = self.__call__(
                    img_var, scale=scale)
            # We are assuming that batch size is 1.
            roi_cls_loc = roi_cls_locs.array
            roi_score = roi_scores.array
            roi = rois / scale

            # Convert predictions to bounding boxes in image coordinates.
            # Bounding boxes are scaled to the scale of the input images.
            mean = self.xp.tile(self.xp.asarray(self.loc_normalize_mean),
                                self.n_class)
            std = self.xp.tile(self.xp.asarray(self.loc_normalize_std),
                               self.n_class)
            roi_cls_loc = (roi_cls_loc * std + mean).astype(np.float32)
            roi_cls_loc = roi_cls_loc.reshape((-1, self.n_class, 4))
            roi = self.xp.broadcast_to(roi[:, None], roi_cls_loc.shape)
            cls_bbox = loc2bbox(roi.reshape((-1, 4)),
                                roi_cls_loc.reshape((-1, 4)))
            cls_bbox = cls_bbox.reshape((-1, self.n_class * 4))
            # clip bounding box
            cls_bbox[:, 0::2] = self.xp.clip(cls_bbox[:, 0::2], 0, size[0])
            cls_bbox[:, 1::2] = self.xp.clip(cls_bbox[:, 1::2], 0, size[1])

            prob = F.softmax(roi_score).array

            raw_cls_bbox = cuda.to_cpu(cls_bbox)
            raw_prob = cuda.to_cpu(prob)

            bbox, label, prob = self._suppress(raw_cls_bbox, raw_prob)
            bboxes.append(bbox)
            labels.append(label)
            scores.append(prob)

        return bboxes, labels, scores
示例#7
0
    def predict(self, imgs):
        """Detect objects from images.

        This method predicts objects for each image.

        Args:
            imgs (iterable of numpy.ndarray): Arrays holding images.
                All images are in CHW and RGB format
                and the range of their value is :math:`[0, 255]`.

        Returns:
           tuple of lists:
           This method returns a tuple of three lists,
           :obj:`(bboxes, labels, scores)`.

           * **bboxes**: A list of float arrays of shape :math:`(R, 4)`, \
               where :math:`R` is the number of bounding boxes in a image. \
               Each bouding box is organized by \
               :math:`(y_{min}, x_{min}, y_{max}, x_{max})` \
               in the second axis.
           * **labels** : A list of integer arrays of shape :math:`(R,)`. \
               Each value indicates the class of the bounding box. \
               Values are in range :math:`[0, L - 1]`, where :math:`L` is the \
               number of the foreground classes.
           * **scores** : A list of float arrays of shape :math:`(R,)`. \
               Each value indicates how confident the prediction is.

        """
        prepared_imgs = []
        sizes = []
        for img in imgs:
            size = img.shape[1:]
            img = self.prepare(img.astype(np.float32))
            prepared_imgs.append(img)
            sizes.append(size)

        bboxes = []
        labels = []
        scores = []
        for img, size in zip(prepared_imgs, sizes):
            with chainer.using_config('train', False), \
                    chainer.function.no_backprop_mode():
                img_var = chainer.Variable(self.xp.asarray(img[None]))
                scale = img_var.shape[3] / size[1]
                roi_cls_locs, roi_scores, rois, _ = self.__call__(
                    img_var, scale=scale)
            # We are assuming that batch size is 1.
            roi_cls_loc = roi_cls_locs.array
            roi_score = roi_scores.array
            roi = rois / scale

            # Convert predictions to bounding boxes in image coordinates.
            # Bounding boxes are scaled to the scale of the input images.
            mean = self.xp.tile(self.xp.asarray(self.loc_normalize_mean),
                                self.n_class)
            std = self.xp.tile(self.xp.asarray(self.loc_normalize_std),
                               self.n_class)
            roi_cls_loc = (roi_cls_loc * std + mean).astype(np.float32)
            roi_cls_loc = roi_cls_loc.reshape((-1, self.n_class, 4))
            roi = self.xp.broadcast_to(roi[:, None], roi_cls_loc.shape)
            cls_bbox = loc2bbox(roi.reshape((-1, 4)),
                                roi_cls_loc.reshape((-1, 4)))
            cls_bbox = cls_bbox.reshape((-1, self.n_class * 4))
            # clip bounding box
            cls_bbox[:, 0::2] = self.xp.clip(cls_bbox[:, 0::2], 0, size[0])
            cls_bbox[:, 1::2] = self.xp.clip(cls_bbox[:, 1::2], 0, size[1])

            prob = F.softmax(roi_score).array

            raw_cls_bbox = cuda.to_cpu(cls_bbox)
            raw_prob = cuda.to_cpu(prob)

            bbox, label, score = self._suppress(raw_cls_bbox, raw_prob)
            bboxes.append(bbox)
            labels.append(label)
            scores.append(score)

        return bboxes, labels, scores
示例#8
0
    def predict(self, imgs):
        prepared_imgs = list()
        scales = list()
        for img in imgs:
            size = img.shape[1:]
            img = self.prepare(img.astype(np.float32))
            prepared_imgs.append(img)
            scale = img.shape[2] / size[1]
            scales.append(scale)

        bboxes = list()
        masks = list()
        labels = list()
        scores = list()
        for img, scale in zip(prepared_imgs, scales):
            with chainer.using_config('train', False), \
                    chainer.function.no_backprop_mode():
                img_var = chainer.Variable(self.xp.asarray(img[None]))

                img_size = img_var.shape[2:]

                h = self.extractor(img_var)
                rpn_locs, rpn_scores, rois, roi_indices, anchor =\
                    self.rpn(h, img_size, [scale])
                roi_cls_locs, roi_scores, _, = self.head(
                    h, rois, roi_indices, pred_mask=False)
            # We are assuming that batch size is 1.
            roi_cls_loc = roi_cls_locs.data
            roi_score = roi_scores.data
            roi = rois / scale
            roi_index = roi_indices

            # Convert predictions to bounding boxes in image coordinates.
            # Bounding boxes are scaled to the scale of the input images.
            mean = self.xp.tile(self.xp.asarray(self.loc_normalize_mean),
                                self.n_class)
            std = self.xp.tile(self.xp.asarray(self.loc_normalize_std),
                               self.n_class)
            roi_cls_loc = (roi_cls_loc * std + mean).astype(np.float32)
            roi_cls_loc = roi_cls_loc.reshape((-1, self.n_class, 4))
            roi_cls = self.xp.broadcast_to(roi[:, None], roi_cls_loc.shape)
            cls_bbox = loc2bbox(roi_cls.reshape((-1, 4)),
                                roi_cls_loc.reshape((-1, 4)))
            cls_bbox = cls_bbox.reshape((-1, self.n_class * 4))
            # clip bounding box
            cls_bbox[:, 0::2] = self.xp.clip(cls_bbox[:, 0::2], 0, size[0])
            cls_bbox[:, 1::2] = self.xp.clip(cls_bbox[:, 1::2], 0, size[1])
            # clip roi
            roi[:, 0::2] = self.xp.clip(roi[:, 0::2], 0, size[0])
            roi[:, 1::2] = self.xp.clip(roi[:, 1::2], 0, size[1])

            prob = F.softmax(roi_score).data

            roi_index = self.xp.broadcast_to(
                roi_index[:, None], roi_cls_loc.shape[:2])
            raw_cls_bbox = cuda.to_cpu(cls_bbox)
            raw_prob = cuda.to_cpu(prob)

            bbox, label, score = self._suppress(raw_cls_bbox, raw_prob)

            bbox_int = np.round(bbox).astype(np.int32)
            bbox_sizes = ((bbox_int[:, 2] - bbox_int[:, 0]) *
                          (bbox_int[:, 3] - bbox_int[:, 1]))
            keep = bbox_sizes > 0
            bbox = bbox[keep]
            label = label[keep]
            score = score[keep]

            if self._detections_per_im > 0:
                indices = np.argsort(score)
                keep = indices >= (len(indices) - self._detections_per_im)
                bbox = bbox[keep]
                label = label[keep]
                score = score[keep]

            bboxes.append(bbox)
            labels.append(label)
            scores.append(score)

            if len(bbox) == 0:
                masks.append(np.zeros((0, size[0], size[1]), dtype=bool))
                continue

            # use predicted bbox as rois
            with chainer.using_config('train', False), \
                    chainer.function.no_backprop_mode():
                rois = self.xp.asarray(bbox) * scale
                roi_indices = self.xp.zeros(
                    (len(bbox),), dtype=np.int32)
                _, _, roi_masks = self.head(
                    x=h, rois=rois, roi_indices=roi_indices,
                    pred_bbox=False, pred_mask=True)
                roi_masks = F.sigmoid(roi_masks)
            roi_mask = cuda.to_cpu(roi_masks.data)

            mask = segm_results(
                bbox,
                label,
                roi_mask,
                size[0],
                size[1],
                mask_size=self.head.mask_size,
            )
            masks.append(mask)

        return bboxes, masks, labels, scores
示例#9
0
    def _to_bboxes(self, roi_cls_locs, roi_scores, rois, roi_indices, sizes,
                   scales):
        if isinstance(roi_cls_locs, chainer.Variable):
            roi_cls_locs = roi_cls_locs.array
        probs = F.softmax(roi_scores).array
        del roi_scores

        bboxes = []
        labels = []
        scores = []
        for index in range(len(sizes)):
            scale = scales[index]
            size = sizes[index]

            keep = roi_indices == index
            roi_cls_loc = roi_cls_locs[keep]
            prob = probs[keep]
            roi = rois[keep] / scale

            # Convert predictions to bounding boxes in image coordinates.
            # Bounding boxes are scaled to the scale of the input images.
            mean = self.xp.tile(self.xp.asarray(self.loc_normalize_mean),
                                self.n_class)
            std = self.xp.tile(self.xp.asarray(self.loc_normalize_std),
                               self.n_class)
            roi_cls_loc = (roi_cls_loc * std + mean).astype(np.float32)
            roi_cls_loc = roi_cls_loc.reshape((-1, self.n_class, 4))
            roi_cls = self.xp.broadcast_to(roi[:, None], roi_cls_loc.shape)
            cls_bbox = loc2bbox(roi_cls.reshape((-1, 4)),
                                roi_cls_loc.reshape((-1, 4)))
            cls_bbox = cls_bbox.reshape((-1, self.n_class * 4))
            # clip bounding box
            cls_bbox[:, 0::2] = self.xp.clip(cls_bbox[:, 0::2], 0, size[0])
            cls_bbox[:, 1::2] = self.xp.clip(cls_bbox[:, 1::2], 0, size[1])
            # clip roi
            roi[:, 0::2] = self.xp.clip(roi[:, 0::2], 0, size[0])
            roi[:, 1::2] = self.xp.clip(roi[:, 1::2], 0, size[1])

            raw_cls_bbox = cuda.to_cpu(cls_bbox)
            raw_prob = cuda.to_cpu(prob)

            bbox, label, score = self._suppress(raw_cls_bbox, raw_prob)

            bbox_int = np.round(bbox).astype(np.int32)
            bbox_sizes = ((bbox_int[:, 2] - bbox_int[:, 0]) *
                          (bbox_int[:, 3] - bbox_int[:, 1]))
            keep = bbox_sizes > 0
            bbox = bbox[keep]
            label = label[keep]
            score = score[keep]

            if self._detections_per_im > 0:
                indices = np.argsort(score)
                keep = indices >= (len(indices) - self._detections_per_im)
                bbox = bbox[keep]
                label = label[keep]
                score = score[keep]

            bboxes.append(bbox)
            labels.append(label)
            scores.append(score)
        return bboxes, labels, scores
示例#10
0
    def predict(self, imgs):
        prepared_imgs = list()
        sizes = list()
        #print("predicting!")
        for img in imgs:
            size = img.shape[1:]
            img = self.prepare(img.astype(np.float32))
            prepared_imgs.append(img)
            sizes.append(size)
        bboxes = list()
        out_rois = list()
        labels = list()
        scores = list()
        masks = list()
        for img, size in zip(prepared_imgs, sizes):
            with chainer.using_config('train', False), \
                chainer.function.no_backprop_mode():
                img_var = chainer.Variable(self.xp.asarray(img[None]))
                scale = img_var.shape[3] / size[1]
                roi_cls_locs, roi_scores, rois, _, h = self.__call__(
                    img_var, scale=scale)
            #assuming batch size = 1
            roi_cls_loc = roi_cls_locs.data
            roi_score = roi_scores.data
            roi = rois / scale
            mean = self.xp.tile(self.xp.asarray(self.loc_normalize_mean),
                                self.n_class)
            std = self.xp.tile(self.xp.asarray(self.loc_normalize_std),
                               self.n_class)
            roi_cls_loc = (roi_cls_loc * std + mean).astype(np.float32)
            roi_cls_loc = roi_cls_loc.reshape((-1, self.n_class, 4))
            roi = self.xp.broadcast_to(roi[:, None],
                                       roi_cls_loc.shape).reshape((-1, 4))
            cls_bbox = loc2bbox(roi, roi_cls_loc.reshape((-1, 4)))
            cls_bbox = cls_bbox.reshape((-1, self.n_class * 4))
            cls_roi = roi.reshape((-1, self.n_class * 4))
            #clip the bbox
            cls_bbox[:, 0::2] = self.xp.clip(cls_bbox[:, 0::2], 0, size[0])
            cls_bbox[:, 1::2] = self.xp.clip(cls_bbox[:, 1::2], 0, size[1])
            cls_roi[:, 0::2] = self.xp.clip(cls_roi[:, 0::2], 0, size[0])
            cls_roi[:, 1::2] = self.xp.clip(cls_roi[:, 1::2], 0, size[1])

            prob = F.softmax(roi_score).data
            raw_cls_bbox = cuda.to_cpu(cls_bbox)
            raw_cls_roi = cuda.to_cpu(cls_roi)
            raw_prob = cuda.to_cpu(prob)
            bbox, out_roi, label, score = self._suppress(
                raw_cls_bbox, raw_cls_roi, raw_prob)
            mask = []
            if len(bbox) > 0:
                # mask head
                roi_indices = self.xp.zeros((len(bbox), ), dtype=np.int32)
                with chainer.using_config('train', False), \
                    chainer.function.no_backprop_mode():
                    hres5 = self.head.res5head(h, cuda.to_gpu(bbox * scale),
                                               roi_indices)
                    roi_masks = self.head.maskhead(hres5)
                roi_mask = F.sigmoid(roi_masks).data
                raw_mask = cuda.to_cpu(roi_mask)
                # postprocess
                if self.preset == 'evaluate':
                    bboxes.append(bbox_yxyx2xywh(bbox))
                    wmasks = []
                    for m, b, l in zip(raw_mask, bbox, label):
                        wm = im_mask(m[int(l + 1)], size, b)
                        # encode the mask
                        wm = pycocotools.mask.encode(np.asfortranarray(wm))
                        wm['counts'] = wm['counts'].decode('ascii')
                        mask.append(wm)
                elif self.preset == 'visualize':
                    bboxes.append(bbox)
                    wmasks = []
                    for m, b, l in zip(raw_mask, bbox, label):
                        wm = im_mask(m[int(l + 1)], size, b)
                        mask.append(wm)
            elif self.preset == 'evaluate':
                # len(bbox) = 0
                wm = np.zeros((size[0], size[1]), dtype=np.uint8)
                wm = pycocotools.mask.encode(np.asfortranarray(wm))
                wm['counts'] = wm['counts'].decode('ascii')
                mask.append(wm)
                bboxes.append(bbox_yxyx2xywh(bbox))
            labels.append([self.class_ids[int(l)] for l in label.tolist()])
            scores.append(score)
            masks.append(mask)

        return bboxes, labels, scores, masks
示例#11
0
    def predict(self, imgs):
        prepared_imgs = []
        sizes = []
        for img in imgs:
            size = img.shape[1:]
            img = self.prepare(img.astype(np.float32))
            prepared_imgs.append(img)
            sizes.append(size)

        bboxes = []
        labels = []
        scores = []
        masks = []
        for img, size in zip(prepared_imgs, sizes):
            with chainer.using_config('train', False), \
                    chainer.function.no_backprop_mode():
                img_var = chainer.Variable(self.xp.asarray(img[None]))
                scale = img_var.shape[3] / size[1]
                roi_cls_locs, roi_scores, rois, roi_indices, levels = self.__call__(
                    img_var, scale=scale)
            # We are assuming that batch size is 1.
            roi = rois / scale
            roi_cls_loc = roi_cls_locs.data
            roi_score = roi_scores.data

            if roi_cls_loc.shape[1] == 4:
                roi_cls_loc = self.xp.tile(roi_cls_loc, self.n_class)

            # if loc prediction layer uses shared weight, expand (though, not optimized way)
            if roi_cls_loc.shape[1] == 4:
                roi_cls_loc = self.xp.tile(roi_cls_loc, self.n_class)

            # Convert predictions to bounding boxes in image coordinates.
            # Bounding boxes are scaled to the scale of the input images.
            mean = self.xp.tile(self.xp.asarray(self.loc_normalize_mean),
                                self.n_class)
            std = self.xp.tile(self.xp.asarray(self.loc_normalize_std),
                               self.n_class)
            roi_cls_loc = (roi_cls_loc * std + mean).astype(np.float32)
            roi_cls_loc = roi_cls_loc.reshape((-1, self.n_class, 4))
            roi = self.xp.broadcast_to(roi[:, None], roi_cls_loc.shape)
            cls_bbox = loc2bbox(roi.reshape((-1, 4)),
                                roi_cls_loc.reshape((-1, 4)))
            cls_bbox = cls_bbox.reshape((-1, self.n_class * 4))
            # clip bounding box
            cls_bbox[:, 0::2] = self.xp.clip(cls_bbox[:, 0::2], 0, size[0])
            cls_bbox[:, 1::2] = self.xp.clip(cls_bbox[:, 1::2], 0, size[1])

            prob = F.softmax(roi_score).data

            raw_cls_bbox = cuda.to_cpu(cls_bbox)
            raw_prob = cuda.to_cpu(prob)
            raw_roi = cuda.to_cpu(roi)
            raw_levels = cuda.to_cpu(levels)

            bbox, label, score, roi, levels = self._suppress(
                raw_cls_bbox, raw_prob, raw_roi, raw_levels)

            # predict only mask based on detected roi
            mask_per_image = []
            if len(label) > 0:
                with chainer.using_config('train', False), \
                        chainer.function.no_backprop_mode():
                    # because we are assuming batch size=1, all elements of roi_indices is zero.
                    roi_indices = self.xp.zeros(roi.shape[0], dtype=np.float32)
                    bbox_gpu = cuda.to_gpu(
                        bbox) if chainer.cuda.available else bbox
                    indices_and_rois = self.xp.concatenate(
                        (roi_indices[:, None], bbox_gpu * scale), axis=1)

                    mask = self.head.predict_mask(
                        levels, indices_and_rois,
                        self.extractor.spatial_scales)

                if self.predict_mask:
                    mask = F.sigmoid(mask).data
                    mask = mask[np.arange(mask.shape[0]), label]
                    maskをresizeする
                    for i, (b, m) in enumerate(zip(bbox, mask)):
                        w = b[3] - b[1]
                        h = b[2] - b[0]
                        m = cv2.resize(m, (w, h)) * 255
                        m = m.astype(np.uint8)
                        _, m = cv2.threshold(m, 127, 255, cv2.THRESH_BINARY)

                    mask_per_image.append(m)
                else:
                    mask = mask.reshape((mask.shape[0], 17, -1)).data
                    mask = cuda.to_cpu(mask)
                    mask_per_image.append(mask)

            bboxes.append(bbox)
            labels.append(label)
            scores.append(score)
            masks.append(mask_per_image)

        return bboxes, labels, scores, masks
示例#12
0
    def __call__(self, loc, score, anchor, img_size, scale=1.):
        """Propose RoIs.

        Inputs :obj:`loc, score, anchor` refer to the same anchor when indexed
        by the same index.

        On notations, :math:`R` is the total number of anchors. This is equal
        to product of the height and the width of an image and the number of
        anchor bases per pixel.

        Type of the output is same as the inputs.

        Args:
            loc (array): Predicted offsets and scaling to anchors.
                Its shape is :math:`(R, 4)`.
            score (array): Predicted foreground probability for anchors.
                Its shape is :math:`(R,)`.
            anchor (array): Coordinates of anchors. Its shape is
                :math:`(R, 4)`.
            img_size (tuple of ints): A tuple :obj:`height, width`,
                which contains image size after scaling.
            scale (float): The scaling factor used to scale an image after
                reading it from a file.

        Returns:
            array:
            An array of coordinates of proposal boxes.
            Its shape is :math:`(S, 4)`. :math:`S` is less than
            :obj:`self.n_test_post_nms` in test time and less than
            :obj:`self.n_train_post_nms` in train time. :math:`S` depends on
            the size of the predicted bounding boxes and the number of
            bounding boxes discarded by NMS.

        """
        if chainer.config.train:
            n_pre_nms = self.n_train_pre_nms
            n_post_nms = self.n_train_post_nms
        else:
            n_pre_nms = self.n_test_pre_nms
            n_post_nms = self.n_test_post_nms

        xp = cuda.get_array_module(loc)
        loc = cuda.to_cpu(loc)
        score = cuda.to_cpu(score)
        anchor = cuda.to_cpu(anchor)

        # Convert anchors into proposal via bbox transformations.
        roi = loc2bbox(anchor, loc)

        # Clip predicted boxes to image.
        roi[:, slice(0, 4, 2)] = np.clip(roi[:, slice(0, 4, 2)], 0,
                                         img_size[0])
        roi[:, slice(1, 4, 2)] = np.clip(roi[:, slice(1, 4, 2)], 0,
                                         img_size[1])

        # Remove predicted boxes with either height or width < threshold.
        min_size = self.min_size * scale
        hs = roi[:, 2] - roi[:, 0]
        ws = roi[:, 3] - roi[:, 1]
        keep = np.where((hs >= min_size) & (ws >= min_size))[0]
        roi = roi[keep, :]
        score = score[keep]

        # Sort all (proposal, score) pairs by score from highest to lowest.
        # Take top pre_nms_topN (e.g. 6000).
        order = score.ravel().argsort()[::-1]
        if n_pre_nms > 0:
            order = order[:n_pre_nms]
        roi = roi[order, :]
        score = score[order]

        # Apply nms (e.g. threshold = 0.7).
        # Take after_nms_topN (e.g. 300).
        if xp != np and not self.force_cpu_nms:
            keep = non_maximum_suppression(cuda.to_gpu(roi),
                                           thresh=self.nms_thresh)
            keep = cuda.to_cpu(keep)
        else:
            keep = non_maximum_suppression(roi, thresh=self.nms_thresh)
        if n_post_nms > 0:
            keep = keep[:n_post_nms]
        roi = roi[keep]

        if xp != np:
            roi = cuda.to_gpu(roi)
        return roi
    def __call__(self, x, scale=1.0, iter2=True):
        img_size = x.shape[2:]

        # Feature Extractor
        h = self.res1(x)
        h = self.res2(h)
        h = self.res3(h)
        h = self.res4(h)

        # RPN
        rpn_locs, rpn_scores, rois, roi_indices, anchor = self.rpn(
            h, img_size, scale)
        roi_indices = roi_indices.astype(np.float32)
        indices_and_rois = self.xp.concatenate((roi_indices[:, None], rois),
                                               axis=1)

        # ResNet101C5 with dilated convolution
        h = self.res5(h)

        # Convolution for PSROI pooling
        h = F.relu(self.psroi_conv1(h))
        h_seg = self.psroi_conv2(h)
        h_locs = self.psroi_conv3(h)

        # PSROI pooling and regression
        roi_seg_scores, roi_cls_locs, roi_cls_scores = self._pool_and_predict(
            indices_and_rois, h_seg, h_locs)
        roi_cls_probs = F.softmax(roi_cls_scores)
        roi_seg_probs = F.softmax(roi_seg_scores)
        roi_seg_probs = roi_seg_probs.array
        roi_cls_probs = roi_cls_probs.array

        if iter2:
            # 2nd Iteration
            # get rois2 for more precise prediction
            roi_cls_locs = roi_cls_locs.array
            roi_locs = roi_cls_locs[:, 1, :]
            mean = self.xp.array(self.loc_normalize_mean)
            std = self.xp.array(self.loc_normalize_std)
            roi_locs = roi_locs * std + mean
            rois2 = loc2bbox(rois, roi_locs)
            H, W = img_size
            rois2[:, 0::2] = self.xp.clip(rois2[:, 0::2], 0, H)
            rois2[:, 1::2] = self.xp.clip(rois2[:, 1::2], 0, W)

            # PSROI pooling and regression
            indices_and_rois2 = self.xp.concatenate(
                (roi_indices[:, None], rois2), axis=1)
            indices_and_rois2 = indices_and_rois2.astype(self.xp.float32)
            roi_seg_scores2, _, roi_cls_scores2 = self._pool_and_predict(
                indices_and_rois2, h_seg, h_locs)
            roi_cls_probs2 = F.softmax(roi_cls_scores2)
            roi_seg_probs2 = F.softmax(roi_seg_scores2)
            roi_seg_probs2 = roi_seg_probs2.array
            roi_cls_probs2 = roi_cls_probs2.array

            # concat 1st and 2nd iteration results
            rois = self.xp.concatenate((rois, rois2))
            roi_indices = self.xp.concatenate((roi_indices, roi_indices))
            roi_cls_probs = self.xp.concatenate(
                (roi_cls_probs, roi_cls_probs2))
            roi_seg_probs = self.xp.concatenate(
                (roi_seg_probs, roi_seg_probs2))

        return roi_indices, rois, roi_seg_probs, roi_cls_probs
示例#14
0
    def predict(self, imgs):
        """Detect objects from images.

        This method predicts objects for each image.

        Args:
            imgs (iterable of numpy.ndarray): Arrays holding images.
                All images are in CHW and RGB format
                and the range of their value is :math:`[0, 255]`.

        Returns:
           tuple of lists:
           This method returns a tuple of three lists,
           :obj:`(bboxes, labels, scores)`.

           * **bboxes**: A list of float arrays of shape :math:`(R, 4)`, \
               where :math:`R` is the number of bounding boxes in a image. \
               Each bouding box is organized by \
               :obj:`(x_min, y_min, x_max, y_max)` \
               in the second axis.
           * **labels** : A list of integer arrays of shape :math:`(R,)`. \
               Each value indicates the class of the bounding box. \
               Values are in range :math:`[0, L - 1]`, where :math:`L` is the \
               number of the foreground classes.
           * **scores** : A list of float arrays of shape :math:`(R,)`. \
               Each value indicates how confident the prediction is.

        """
        prepared_imgs = list()
        scales = list()
        for img in imgs:
            _, H, W = img.shape
            img = self.prepare(img.astype(np.float32))
            scale = img.shape[2] / W
            prepared_imgs.append(img)
            scales.append(scale)

        bboxes = list()
        labels = list()
        scores = list()
        for img, scale in zip(prepared_imgs, scales):
            img_var = chainer.Variable(self.xp.asarray(img[None]),
                                       volatile=chainer.flag.ON)
            H, W = img_var.shape[2:]
            roi_cls_locs, roi_scores, rois, _ = self.__call__(img_var,
                                                              scale=scale,
                                                              test=True)
            # We are assuming that batch size is 1.
            roi_cls_loc = roi_cls_locs.data
            roi_score = roi_scores.data
            roi = rois / scale

            # Convert predictions to bounding boxes in image coordinates.
            # Bounding boxes are scaled to the scale of the input images.
            mean = self.xp.tile(self.xp.asarray(self.loc_normalize_mean),
                                self.n_class)
            std = self.xp.tile(self.xp.asarray(self.loc_normalize_std),
                               self.n_class)
            roi_cls_loc = (roi_cls_loc * std + mean).astype(np.float32)
            roi_cls_loc = roi_cls_loc.reshape(-1, self.n_class, 4)
            roi = self.xp.broadcast_to(roi[:, None], roi_cls_loc.shape)
            cls_bbox = loc2bbox(roi.reshape(-1, 4), roi_cls_loc.reshape(-1, 4))
            cls_bbox = cls_bbox.reshape(-1, self.n_class * 4)
            # clip bounding box
            cls_bbox[:, slice(0, 4, 2)] = self.xp.clip(
                cls_bbox[:, slice(0, 4, 2)], 0, W / scale)
            cls_bbox[:, slice(1, 4, 2)] = self.xp.clip(
                cls_bbox[:, slice(1, 4, 2)], 0, H / scale)

            prob = F.softmax(roi_score).data

            raw_cls_bbox = cuda.to_cpu(cls_bbox)
            raw_prob = cuda.to_cpu(prob)

            bbox, label, score = self._suppress(raw_cls_bbox, raw_prob)
            bboxes.append(bbox)
            labels.append(label)
            scores.append(score)

        return bboxes, labels, scores