Exemplo n.º 1
0
    def __call__(self, bbox, anchor, img_size):
        """Assign ground truth supervision to sampled subset of anchors.

        Types of input arrays and output arrays are same.

        Here are notations.

        * :math:`S` is the number of anchors.
        * :math:`R` is the number of bounding boxes.

        Args:
            bbox (array): Coordinates of bounding boxes. Its shape is
                :math:`(R, 4)`.
            anchor (array): Coordinates of anchors. Its shape is
                :math:`(S, 4)`.
            img_size (tuple of ints): A tuple :obj:`W, H`, which
                is a tuple of height and width of an image.

        Returns:
            (array, array):

            * **loc**: Offsets and scales to match the anchors to \
                the ground truth bounding boxes. Its shape is :math:`(S, 4)`.
            * **label**: Labels of anchors with values \
                :obj:`(1=positive, 0=negative, -1=ignore)`. Its shape \
                is :math:`(S,)`.

        """
        xp = cuda.get_array_module(bbox)
        bbox = cuda.to_cpu(bbox)
        anchor = cuda.to_cpu(anchor)

        img_W, img_H = img_size

        n_anchor = len(anchor)
        inside_index = _get_inside_index(anchor, img_W, img_H)
        anchor = anchor[inside_index]
        argmax_ious, label = self._create_label(
            inside_index, anchor, bbox)

        # compute bounding box regression targets
        loc = bbox2loc(anchor, bbox[argmax_ious])

        # map up to original set of anchors
        label = _unmap(label, n_anchor, inside_index, fill=-1)
        loc = _unmap(loc, n_anchor, inside_index, fill=0)

        if xp != np:
            loc = chainer.cuda.to_gpu(loc)
            label = chainer.cuda.to_gpu(label)
        return loc, label
Exemplo n.º 2
0
    def __call__(self, bbox, anchor, img_size):
        """Assign ground truth supervision to sampled subset of anchors.

        Types of input arrays and output arrays are same.

        Here are notations.

        * :math:`S` is the number of anchors.
        * :math:`R` is the number of bounding boxes.

        Args:
            bbox (array): Coordinates of bounding boxes. Its shape is
                :math:`(R, 4)`.
            anchor (array): Coordinates of anchors. Its shape is
                :math:`(S, 4)`.
            img_size (tuple of ints): A tuple :obj:`H, W`, which
                is a tuple of height and width of an image.

        Returns:
            (array, array):

            * **loc**: Offsets and scales to match the anchors to \
                the ground truth bounding boxes. Its shape is :math:`(S, 4)`.
            * **label**: Labels of anchors with values \
                :obj:`(1=positive, 0=negative, -1=ignore)`. Its shape \
                is :math:`(S,)`.

        """
        xp = cuda.get_array_module(bbox)
        bbox = cuda.to_cpu(bbox)
        anchor = cuda.to_cpu(anchor)

        img_H, img_W = img_size

        n_anchor = len(anchor)
        inside_index = _get_inside_index(anchor, img_H, img_W)
        anchor = anchor[inside_index]
        argmax_ious, label = self._create_label(
            inside_index, anchor, bbox)

        # compute bounding box regression targets
        loc = bbox2loc(anchor, bbox[argmax_ious])

        # map up to original set of anchors
        label = _unmap(label, n_anchor, inside_index, fill=-1)
        loc = _unmap(loc, n_anchor, inside_index, fill=0)

        if xp != np:
            loc = chainer.cuda.to_gpu(loc)
            label = chainer.cuda.to_gpu(label)
        return loc, label
    def __call__(
            self,
            roi,
            mask,
            label,
            bbox,
            loc_normalize_mean=(0., 0., 0., 0.),
            loc_normalize_std=(0.2, 0.2, 0.5, 0.5),
            mask_size=(21, 21),
    ):
        """Assigns ground truth to sampled proposals.

        This function samples total of :obj:`self.n_sample` RoIs
        from the combination of :obj:`roi`, :obj:`mask`, :obj:`label`
        and :obj: `bbox`. The RoIs are assigned with the ground truth class
        labels as well as bounding box offsets and scales to match the ground
        truth bounding boxes. As many as :obj:`pos_ratio * self.n_sample` RoIs
        are sampled as foregrounds.

        Offsets and scales of bounding boxes are calculated using
        :func:`chainercv.links.model.faster_rcnn.bbox2loc`.
        Also, types of input arrays and output arrays are same.

        Here are notations.

        * :math:`S` is the total number of sampled RoIs, which equals \
            :obj:`self.n_sample`.
        * :math:`L` is number of object classes possibly including the \
            background.
        * :math:`H` is the image height.
        * :math:`W` is the image width.
        * :math:`RH` is the mask height.
        * :math:`RW` is the mask width.

        Args:
            roi (array): Region of Interests (RoIs) from which we sample.
                Its shape is :math:`(R, 4)`
            mask (array): The coordinates of ground truth masks.
                Its shape is :math:`(R', H, W)`.
            label (array): Ground truth bounding box labels. Its shape
                is :math:`(R',)`. Its range is :math:`[0, L - 1]`, where
                :math:`L` is the number of foreground classes.
            bbox (array): The coordinates of ground truth bounding boxes.
                Its shape is :math:`(R', 4)`.
            loc_normalize_mean (tuple of four floats): Mean values to normalize
                coordinates of bounding boxes.
            loc_normalize_std (tuple of four floats): Standard deviation of
                the coordinates of bounding boxes.
            mask_size (tuple of int or int): Generated mask size, which is
                equal to :math:`(RH, RW)`.

        Returns:
            (array, array, array, array):

            * **sample_roi**: Regions of interests that are sampled. \
                Its shape is :math:`(S, 4)`.
            * **gt_roi_mask**: Masks assigned to sampled RoIs. Its shape is \
                :math:`(S, RH, RW)`.
            * **gt_roi_label**: Labels assigned to sampled RoIs. Its shape is \
                :math:`(S,)`. Its range is :math:`[0, L]`. The label with \
                value 0 is the background.
            * **gt_roi_loc**: Offsets and scales to match \
                the sampled RoIs to the ground truth bounding boxes. \
                Its shape is :math:`(S, 4)`.

        """

        xp = cuda.get_array_module(roi)
        roi = cuda.to_cpu(roi)
        mask = cuda.to_cpu(mask)
        label = cuda.to_cpu(label)
        bbox = cuda.to_cpu(bbox)

        if not isinstance(mask_size, tuple):
            mask_size = (mask_size, mask_size)

        n_bbox, _ = bbox.shape

        roi = np.concatenate((roi, bbox), axis=0)

        if self.n_sample is None:
            n_sample = roi.shape[0]
        else:
            n_sample = self.n_sample

        pos_roi_per_image = np.round(n_sample * self.pos_ratio)
        iou = bbox_iou(roi, bbox)
        gt_assignment = iou.argmax(axis=1)
        max_iou = iou.max(axis=1)

        # Offset range of classes from [0, n_fg_class - 1] to [1, n_fg_class].
        # The label with value 0 is the background.
        gt_roi_label = label[gt_assignment] + 1

        # Select foreground RoIs as those with >= pos_iou_thresh IoU.
        pos_index = np.where(max_iou >= self.pos_iou_thresh)[0]
        pos_roi_per_this_image = int(min(pos_roi_per_image, pos_index.size))
        if pos_index.size > 0:
            pos_index = np.random.choice(pos_index,
                                         size=pos_roi_per_this_image,
                                         replace=False)

        # Select background RoIs as those within
        # [neg_iou_thresh_lo, neg_iou_thresh_hi).
        neg_index = np.where((max_iou < self.neg_iou_thresh_hi)
                             & (max_iou >= self.neg_iou_thresh_lo))[0]
        neg_roi_per_this_image = self.n_sample - pos_roi_per_this_image
        neg_roi_per_this_image = int(
            min(neg_roi_per_this_image, neg_index.size))
        if neg_index.size > 0:
            neg_index = np.random.choice(neg_index,
                                         size=neg_roi_per_this_image,
                                         replace=False)

        # The indices that we're selecting (both foreground and background).
        keep_index = np.append(pos_index, neg_index)
        gt_roi_label = gt_roi_label[keep_index]
        gt_roi_label[pos_roi_per_this_image:] = 0  # negative labels --> 0
        sample_roi = roi[keep_index]

        # locs
        # Compute offsets and scales to match sampled RoIs to the GTs.
        loc_normalize_mean = np.array(loc_normalize_mean, np.float32)
        loc_normalize_std = np.array(loc_normalize_std, np.float32)
        gt_roi_loc = bbox2loc(sample_roi, bbox[gt_assignment[keep_index]])
        gt_roi_loc = gt_roi_loc - loc_normalize_mean
        gt_roi_loc = gt_roi_loc / loc_normalize_std

        # masks
        gt_roi_mask = -1 * np.ones(
            (len(keep_index), mask_size[0], mask_size[1]), dtype=np.int32)

        for i, pos_ind in enumerate(pos_index):
            bb = np.round(sample_roi[i]).astype(np.int)
            gt_msk = mask[gt_assignment[pos_ind]]
            gt_roi_msk = gt_msk[bb[0]:bb[2], bb[1]:bb[3]]
            gt_roi_msk = resize(
                gt_roi_msk.astype(np.float32)[None], mask_size)[0]
            gt_roi_msk = (gt_roi_msk >= self.binary_thresh).astype(np.int)
            gt_roi_mask[i] = gt_roi_msk

        if xp != np:
            sample_roi = cuda.to_gpu(sample_roi)
            gt_roi_mask = cuda.to_gpu(gt_roi_mask)
            gt_roi_label = cuda.to_gpu(gt_roi_label)
            gt_roi_loc = cuda.to_gpu(gt_roi_loc)

        return sample_roi, gt_roi_mask, gt_roi_label, gt_roi_loc
Exemplo n.º 4
0
    def __call__(self,
                 roi,
                 bbox,
                 label,
                 mask,
                 loc_normalize_mean=(0., 0., 0., 0.),
                 loc_normalize_std=(0.1, 0.1, 0.2, 0.2)):
        """Assigns ground truth to sampled proposals.

        This function samples total of :obj:`self.n_sample` RoIs
        from the combination of :obj:`roi` and :obj:`bbox`.
        The RoIs are assigned with the ground truth class labels as well as
        bounding box offsets and scales to match the ground truth bounding
        boxes. As many as :obj:`pos_ratio * self.n_sample` RoIs are
        sampled as foregrounds.

        Offsets and scales of bounding boxes are calculated using
        :func:`chainercv.links.model.faster_rcnn.bbox2loc`.
        Also, types of input arrays and output arrays are same.

        Here are notations.

        * :math:`S` is the total number of sampled RoIs, which equals \
            :obj:`self.n_sample`.
        * :math:`L` is number of object classes possibly including the \
            background.

        Args:
            roi (array): Region of Interests (RoIs) from which we sample.
                Its shape is :math:`(R, 4)`
            bbox (array): The coordinates of ground truth bounding boxes.
                Its shape is :math:`(R', 4)`.
            label (array): Ground truth bounding box labels. Its shape
                is :math:`(R',)`. Its range is :math:`[0, L - 1]`, where
                :math:`L` is the number of foreground classes.
            loc_normalize_mean (tuple of four floats): Mean values to normalize
                coordinates of bouding boxes.
            loc_normalize_std (tupler of four floats): Standard deviation of
                the coordinates of bounding boxes.

        Returns:
            (array, array, array):

            * **sample_roi**: Regions of interests that are sampled. \
                Its shape is :math:`(S, 4)`.
            * **gt_roi_loc**: Offsets and scales to match \
                the sampled RoIs to the ground truth bounding boxes. \
                Its shape is :math:`(S, 4)`.
            * **gt_roi_label**: Labels assigned to sampled RoIs. Its shape is \
                :math:`(S,)`. Its range is :math:`[0, L]`. The label with \
                value 0 is the background.

        """
        xp = cuda.get_array_module(roi)
        roi = cuda.to_cpu(roi)
        bbox = cuda.to_cpu(bbox)
        label = cuda.to_cpu(label)
        mask = cuda.to_cpu(mask)

        n_bbox, _ = bbox.shape
        roi = np.concatenate((roi, bbox), axis=0)

        pos_roi_per_image = np.round(self.n_sample * self.pos_ratio)
        iou = bbox_iou(roi, bbox)
        gt_assignment = iou.argmax(axis=1)
        max_iou = iou.max(axis=1)

        # Offset range of classes from [0, n_fg_class - 1] to [1, n_fg_class].
        # The label with value 0 is the background.
        gt_roi_label = label[gt_assignment] + 1

        # Select foreground RoIs as those with >= pos_iou_thresh IoU.
        pos_index = np.where(max_iou >= self.pos_iou_thresh)[0]
        pos_roi_per_this_image = int(min(pos_roi_per_image, pos_index.size))
        if pos_index.size > 0:
            pos_index = np.random.choice(pos_index,
                                         size=pos_roi_per_this_image,
                                         replace=False)

        # Select background RoIs as those within
        # [neg_iou_thresh_lo, neg_iou_thresh_hi).
        neg_index = np.where((max_iou < self.neg_iou_thresh_hi)
                             & (max_iou >= self.neg_iou_thresh_lo))[0]
        neg_roi_per_this_image = self.n_sample - pos_roi_per_this_image
        neg_roi_per_this_image = int(
            min(neg_roi_per_this_image, neg_index.size))
        if neg_index.size > 0:
            neg_index = np.random.choice(neg_index,
                                         size=neg_roi_per_this_image,
                                         replace=False)

        # The indices that we're selecting (both positive and negative).
        keep_index = np.append(pos_index, neg_index)
        gt_roi_label = gt_roi_label[keep_index]
        gt_roi_label[pos_roi_per_this_image:] = 0  # negative labels --> 0
        sample_roi = roi[keep_index]  # sampled <- proposed

        # Compute offsets and scales to match sampled RoIs to the GTs.
        gt_roi_loc = bbox2loc(sample_roi, bbox[gt_assignment[keep_index]])
        gt_roi_loc = ((gt_roi_loc - np.array(loc_normalize_mean, np.float32)) /
                      np.array(loc_normalize_std, np.float32))

        # Prepare groundtruth masks
        gt_roi_mask = []
        _, h, w = mask.shape
        for i, idx in enumerate(gt_assignment[pos_index]):
            A = mask[idx,
                     np.max((int(sample_roi[i, 0]),
                             0)):np.min((int(sample_roi[i, 2]), h)),
                     np.max((int(sample_roi[i, 1]),
                             0)):np.min((int(sample_roi[i, 3]), w))]
            gt_roi_mask.append(
                cv2.resize(A, (self.roi_size * 2, self.roi_size * 2)))
        #debug: visualize masks
        #cv2.imwrite("gt_assignment_mask.png",mask[0,np.max((int(sample_roi[0,0]),0)):np.min((int(sample_roi[0,2]),h)), np.max((int(sample_roi[0,1]),0)):np.min((int(sample_roi[0,3]),w))]*255)
        #cv2.imwrite("gt_roi_mask.png",gt_roi_mask[0]*244)#

        if xp != np:
            sample_roi = cuda.to_gpu(sample_roi)
            gt_roi_loc = cuda.to_gpu(gt_roi_loc)
            gt_roi_label = cuda.to_gpu(gt_roi_label)
            gt_roi_mask = cuda.to_gpu(np.stack(gt_roi_mask).astype(np.int32))
        else:
            gt_roi_mask = np.stack(gt_roi_mask).astype(np.int32)
        return sample_roi, gt_roi_loc, gt_roi_label, gt_roi_mask
Exemplo n.º 5
0
    def __call__(self, rois, bboxes, whole_mask, labels):

        rois = cuda.to_cpu(rois)
        bboxes = cuda.to_cpu(bboxes)
        whole_mask = cuda.to_cpu(whole_mask)
        labels = cuda.to_cpu(labels)

        n_bbox, _ = bboxes.shape

        rois = np.concatenate((rois, bboxes), axis=0)
        if self.n_sample is None:
            n_sample = rois.shape[0]
        else:
            n_sample = self.n_sample

        fg_rois_per_image = np.round(n_sample * self.fg_ratio)
        iou = bbox_iou(rois, bboxes)
        gt_assignment = iou.argmax(axis=1)
        max_iou = iou.max(axis=1)

        # Select foreground RoIs as those with >= fg_iou_thresh IoU.
        fg_indices = np.where(max_iou >= self.fg_iou_thresh)[0]
        fg_rois_per_this_image = int(min(fg_rois_per_image, fg_indices.size))
        if fg_indices.size > 0:
            fg_indices = np.random.choice(fg_indices,
                                          size=fg_rois_per_this_image,
                                          replace=False)

        # Select background RoIs as those within
        # [bg_iou_thresh_lo, bg_iou_thresh_hi).
        bg_indices = np.where((max_iou < self.bg_iou_thresh_hi)
                              & (max_iou >= self.bg_iou_thresh_lo))[0]
        bg_rois_per_this_image = n_sample - fg_rois_per_this_image
        bg_rois_per_this_image = int(
            min(bg_rois_per_this_image, bg_indices.size))
        if bg_indices.size > 0:
            bg_indices = np.random.choice(bg_indices,
                                          size=bg_rois_per_this_image,
                                          replace=False)

        # The indices that we're selecting (both foreground and background).
        keep_indices = np.append(fg_indices, bg_indices)

        # sample_rois
        sample_rois = rois[keep_indices]

        # locs
        # Compute offsets and scales to match sampled RoIs to the GTs.
        loc_normalize_mean = np.array(self.loc_normalize_mean, np.float32)
        loc_normalize_std = np.array(self.loc_normalize_std, np.float32)
        gt_roi_locs = bbox2loc(sample_rois,
                               bboxes[gt_assignment[keep_indices]])
        gt_roi_locs = gt_roi_locs - loc_normalize_mean
        gt_roi_locs = gt_roi_locs / loc_normalize_std

        # masks
        gt_roi_masks = -1 * np.ones(
            (len(keep_indices), self.mask_size, self.mask_size),
            dtype=np.int32)

        for i, fg_index in enumerate(fg_indices):
            roi = np.round(sample_rois[i]).astype(np.int32)
            gt_roi = np.round(bboxes[gt_assignment[fg_index]])
            gt_roi = gt_roi.astype(np.int32)
            gt_mask = whole_mask[gt_assignment[fg_index]]
            gt_roi_mask = fcis.mask.intersect_bbox_mask(
                roi, gt_roi, gt_mask, self.mask_size)
            gt_roi_mask = cv2.resize(gt_roi_mask.astype(np.float32),
                                     (self.mask_size, self.mask_size))
            gt_roi_mask = gt_roi_mask >= self.binary_thresh
            gt_roi_mask = gt_roi_mask.astype(np.int32)
            gt_roi_masks[i, ...] = gt_roi_mask

        # labels
        # The label with value 0 is the background.
        gt_roi_labels = labels[gt_assignment[keep_indices]]
        # set labels of bg_rois to be 0
        gt_roi_labels[fg_rois_per_this_image:] = 0

        sample_rois = cuda.to_gpu(sample_rois)
        gt_roi_locs = cuda.to_gpu(gt_roi_locs)
        gt_roi_masks = cuda.to_gpu(gt_roi_masks)
        gt_roi_labels = cuda.to_gpu(gt_roi_labels)

        return sample_rois, gt_roi_locs, gt_roi_masks, gt_roi_labels
Exemplo n.º 6
0
    def __call__(self,
                 roi,
                 bbox,
                 label,
                 mask,
                 loc_normalize_mean=(0., 0., 0., 0.),
                 loc_normalize_std=(0.1, 0.1, 0.2, 0.2)):
        """Assigns ground truth to sampled proposals.

        This function samples total of :obj:`self.n_sample` RoIs
        from the combination of :obj:`roi` and :obj:`bbox`.
        The RoIs are assigned with the ground truth class labels as well as
        bounding box offsets and scales to match the ground truth bounding
        boxes. As many as :obj:`pos_ratio * self.n_sample` RoIs are
        sampled as foregrounds.

        Offsets and scales of bounding boxes are calculated using
        :func:`chainercv.links.model.faster_rcnn.bbox2loc`.
        Also, types of input arrays and output arrays are same.

        Here are notations.

        * :math:`S` is the total number of sampled RoIs, which equals \
            :obj:`self.n_sample`.
        * :math:`L` is number of object classes possibly including the \
            background.

        Args:
            roi (array): Region of Interests (RoIs) from which we sample.
                Its shape is :math:`(R, 4)`
            bbox (array): The coordinates of ground truth bounding boxes.
                Its shape is :math:`(R', 4)`.
            label (array): Ground truth bounding box labels. Its shape
                is :math:`(R',)`. Its range is :math:`[0, L - 1]`, where
                :math:`L` is the number of foreground classes.
            loc_normalize_mean (tuple of four floats): Mean values to normalize
                coordinates of bouding boxes.
            loc_normalize_std (tupler of four floats): Standard deviation of
                the coordinates of bounding boxes.

        Returns:
            (array, array, array):

            * **sample_roi**: Regions of interests that are sampled. \
                Its shape is :math:`(S, 4)`.
            * **gt_roi_loc**: Offsets and scales to match \
                the sampled RoIs to the ground truth bounding boxes. \
                Its shape is :math:`(S, 4)`.
            * **gt_roi_label**: Labels assigned to sampled RoIs. Its shape is \
                :math:`(S,)`. Its range is :math:`[0, L]`. The label with \
                value 0 is the background.

        """
        xp = cuda.get_array_module(roi)
        roi = cuda.to_cpu(roi)
        bbox = cuda.to_cpu(bbox)
        label = cuda.to_cpu(label)

        n_bbox, _ = bbox.shape
        if n_bbox == 0:
            raise ValueError('Empty bbox is not supported.')

        roi = np.concatenate((roi, bbox), axis=0)

        pos_roi_per_image = np.round(self.n_sample * self.pos_ratio)
        iou = bbox_iou(roi, bbox)
        gt_assignment = iou.argmax(axis=1)
        max_iou = iou.max(axis=1)
        # Offset range of classes from [0, n_fg_class - 1] to [1, n_fg_class].
        # The label with value 0 is the background.
        gt_roi_label = label[gt_assignment] + 1

        # Select foreground RoIs as those with >= pos_iou_thresh IoU.
        pos_index = np.where(max_iou >= self.pos_iou_thresh)[0]
        pos_roi_per_this_image = int(min(pos_roi_per_image, pos_index.size))
        if pos_index.size > 0:
            pos_index = np.random.choice(pos_index,
                                         size=pos_roi_per_this_image,
                                         replace=False)

        # Select background RoIs as those within
        # [neg_iou_thresh_lo, neg_iou_thresh_hi).
        neg_index = np.where((max_iou < self.neg_iou_thresh_hi)
                             & (max_iou >= self.neg_iou_thresh_lo))[0]
        neg_roi_per_this_image = self.n_sample - pos_roi_per_this_image
        neg_roi_per_this_image = int(
            min(neg_roi_per_this_image, neg_index.size))
        if neg_index.size > 0:
            neg_index = np.random.choice(neg_index,
                                         size=neg_roi_per_this_image,
                                         replace=False)

        # The indices that we're selecting (both positive and negative).
        keep_index = np.append(pos_index, neg_index)
        gt_roi_label = gt_roi_label[keep_index]
        gt_roi_label[pos_roi_per_this_image:] = 0  # negative labels --> 0
        sample_roi = roi[keep_index]

        # Compute offsets and scales to match sampled RoIs to the GTs.
        gt_roi_loc = bbox2loc(sample_roi, bbox[gt_assignment[keep_index]])
        gt_roi_loc = ((gt_roi_loc - np.array(loc_normalize_mean, np.float32)) /
                      np.array(loc_normalize_std, np.float32))

        # Compute gt masks
        gt_roi_mask = -np.ones(
            (len(sample_roi), self.mask_size, self.mask_size), dtype=np.int32)
        for i, pos_ind in enumerate(pos_index):
            roi = np.round(sample_roi[i]).astype(np.int32)
            gt_mask = mask[gt_assignment[pos_ind]]
            gt_roi_mask_i = gt_mask[roi[0]:roi[2], roi[1]:roi[3]]
            gt_roi_mask_i_score = (
                np.arange(gt_roi_mask_i.max() +
                          1) == gt_roi_mask_i[..., None]).astype(
                              np.float32)  # label -> onehot
            gt_roi_mask_i_score = cv2.resize(gt_roi_mask_i_score,
                                             (self.mask_size, self.mask_size))
            if gt_roi_mask_i_score.ndim == 2:
                gt_roi_mask_i_score = gt_roi_mask_i_score.reshape(
                    gt_roi_mask_i_score.shape[:2] + (1, ))
            gt_roi_mask_i = np.argmax(gt_roi_mask_i_score, axis=2)
            gt_roi_mask[i] = gt_roi_mask_i.astype(np.int32)

        if xp != np:
            sample_roi = cuda.to_gpu(sample_roi)
            gt_roi_loc = cuda.to_gpu(gt_roi_loc)
            gt_roi_label = cuda.to_gpu(gt_roi_label)
            gt_roi_mask = cuda.to_gpu(gt_roi_mask)
        return sample_roi, gt_roi_loc, gt_roi_label, gt_roi_mask
Exemplo n.º 7
0
    def __call__(self, roi, bbox, label,
                 loc_normalize_mean=(0., 0., 0., 0.),
                 loc_normalize_std=(0.1, 0.1, 0.2, 0.2)):
        """Assigns ground truth to sampled proposals.

        This function samples total of :obj:`self.n_sample` RoIs
        from the combination of :obj:`roi` and :obj:`bbox`.
        The RoIs are assigned with the ground truth class labels as well as
        bounding box offsets and scales to match the ground truth bounding
        boxes. As many as :obj:`pos_ratio * self.n_sample` RoIs are
        sampled as foregrounds.

        Offsets and scales of bounding boxes are calculated using
        :func:`chainercv.links.model.faster_rcnn.bbox2loc`.
        Also, types of input arrays and output arrays are same.

        Here are notations.

        * :math:`S` is the total number of sampled RoIs, which equals \
            :obj:`self.n_sample`.
        * :math:`L` is number of object classes possibly including the \
            background.

        Args:
            roi (array): Region of Interests (RoIs) from which we sample.
                Its shape is :math:`(R, 4)`
            bbox (array): The coordinates of ground truth bounding boxes.
                Its shape is :math:`(R', 4)`.
            label (array): Ground truth bounding box labels. Its shape
                is :math:`(R',)`. Its range is :math:`[0, L - 1]`, where
                :math:`L` is the number of foreground classes.
            loc_normalize_mean (tuple of four floats): Mean values to normalize
                coordinates of bouding boxes.
            loc_normalize_std (tupler of four floats): Standard deviation of
                the coordinates of bounding boxes.

        Returns:
            (array, array, array):

            * **sample_roi**: Regions of interests that are sampled. \
                Its shape is :math:`(S, 4)`.
            * **gt_roi_loc**: Offsets and scales to match \
                the sampled RoIs to the ground truth bounding boxes. \
                Its shape is :math:`(S, 4)`.
            * **gt_roi_label**: Labels assigned to sampled RoIs. Its shape is \
                :math:`(S,)`. Its range is :math:`[0, L]`. The label with \
                value 0 is the background.

        """
        xp = cuda.get_array_module(roi)
        roi = cuda.to_cpu(roi)
        bbox = cuda.to_cpu(bbox)
        label = cuda.to_cpu(label)

        n_bbox, _ = bbox.shape

        roi = np.concatenate((roi, bbox), axis=0)

        pos_roi_per_image = np.round(self.n_sample * self.pos_ratio)
        iou = bbox_iou(roi, bbox)
        gt_assignment = iou.argmax(axis=1)
        max_iou = iou.max(axis=1)
        # Offset range of classes from [0, n_fg_class - 1] to [1, n_fg_class].
        # The label with value 0 is the background.
        gt_roi_label = label[gt_assignment] + 1

        # Select foreground RoIs as those with >= pos_iou_thresh IoU.
        pos_index = np.where(max_iou >= self.pos_iou_thresh)[0]
        pos_roi_per_this_image = int(min(pos_roi_per_image, pos_index.size))
        if pos_index.size > 0:
            pos_index = np.random.choice(
                pos_index, size=pos_roi_per_this_image, replace=False)

        # Select background RoIs as those within
        # [neg_iou_thresh_lo, neg_iou_thresh_hi).
        neg_index = np.where((max_iou < self.neg_iou_thresh_hi) &
                             (max_iou >= self.neg_iou_thresh_lo))[0]
        neg_roi_per_this_image = self.n_sample - pos_roi_per_this_image
        neg_roi_per_this_image = int(min(neg_roi_per_this_image,
                                         neg_index.size))
        if neg_index.size > 0:
            neg_index = np.random.choice(
                neg_index, size=neg_roi_per_this_image, replace=False)

        # The indices that we're selecting (both positive and negative).
        keep_index = np.append(pos_index, neg_index)
        gt_roi_label = gt_roi_label[keep_index]
        gt_roi_label[pos_roi_per_this_image:] = 0  # negative labels --> 0
        sample_roi = roi[keep_index]

        # Compute offsets and scales to match sampled RoIs to the GTs.
        gt_roi_loc = bbox2loc(sample_roi, bbox[gt_assignment[keep_index]])
        gt_roi_loc = ((gt_roi_loc - np.array(loc_normalize_mean, np.float32)
                       ) / np.array(loc_normalize_std, np.float32))

        if xp != np:
            sample_roi = cuda.to_gpu(sample_roi)
            gt_roi_loc = cuda.to_gpu(gt_roi_loc)
            gt_roi_label = cuda.to_gpu(gt_roi_label)
        return sample_roi, gt_roi_loc, gt_roi_label
    def __call__(self,
                 roi,
                 bbox,
                 label,
                 mask,
                 levels,
                 loc_normalize_mean=(0., 0., 0., 0.),
                 loc_normalize_std=(0.1, 0.1, 0.2, 0.2),
                 mask_size=14,
                 binary_mask=True):
        """
        binary_mask = False -> keypoint
        """
        xp = cuda.get_array_module(roi)
        roi = cuda.to_cpu(roi)
        bbox = cuda.to_cpu(bbox)
        label = cuda.to_cpu(label)
        mask = cuda.to_cpu(mask)
        levels = cuda.to_cpu(levels)

        n_bbox, _ = bbox.shape
        n_proposal = roi.shape[0]
        roi = np.concatenate((roi, bbox), axis=0)

        # assign feature levels of ground truth boxes
        bbox_levels = map_rois_to_fpn_levels(np, bbox)
        levels = np.concatenate([levels, bbox_levels])

        pos_roi_per_image = np.round(self.n_sample * self.pos_ratio)
        iou = bbox_iou(roi, bbox)
        gt_assignment = iou.argmax(axis=1)
        max_iou = iou.max(axis=1)
        # Offset range of classes from [0, n_fg_class - 1] to [1, n_fg_class].
        # The label with value 0 is the background.
        gt_roi_label = label[gt_assignment] + 1

        # Select foreground RoIs as those with >= pos_iou_thresh IoU.
        pos_index = np.where(max_iou >= self.pos_iou_thresh)[0]
        pos_roi_per_this_image = int(min(pos_roi_per_image, pos_index.size))
        if pos_index.size > 0:
            pos_index = np.random.choice(pos_index,
                                         size=pos_roi_per_this_image,
                                         replace=False)

        # Select background RoIs as those within
        # [neg_iou_thresh_lo, neg_iou_thresh_hi).
        neg_index = np.where((max_iou < self.neg_iou_thresh_hi)
                             & (max_iou >= self.neg_iou_thresh_lo))[0]
        neg_roi_per_this_image = self.n_sample - pos_roi_per_this_image
        neg_roi_per_this_image = int(
            min(neg_roi_per_this_image, neg_index.size))
        if neg_index.size > 0:
            neg_index = np.random.choice(neg_index,
                                         size=neg_roi_per_this_image,
                                         replace=False)

        # The indices that we're selecting (both positive and negative).
        keep_index = np.append(pos_index, neg_index)
        gt_roi_label = gt_roi_label[keep_index]
        gt_roi_label[pos_roi_per_this_image:] = 0  # negative labels --> 0
        sample_roi = roi[keep_index]
        sample_levels = levels[keep_index]

        # Compute offsets and scales to match sampled RoIs to the GTs.
        gt_roi_loc = bbox2loc(sample_roi, bbox[gt_assignment[keep_index]])
        gt_roi_loc = ((gt_roi_loc - np.array(loc_normalize_mean, np.float32)) /
                      np.array(loc_normalize_std, np.float32))

        # https://engineer.dena.jp/2017/12/chainercvmask-r-cnn.html
        gt_roi_mask = []
        _, h, w = mask.shape
        if binary_mask:
            for i, idx in enumerate(gt_assignment[pos_index]):
                A = mask[idx,
                         np.max((int(sample_roi[i, 0]),
                                 0)):np.min((int(sample_roi[i, 2]), h)),
                         np.max((int(sample_roi[i, 1]),
                                 0)):np.min((int(sample_roi[i, 3]), w))]
                gt_roi_mask.append(
                    cv2.resize(A, (mask_size, mask_size)).astype(np.int32))
        else:
            for i, idx in enumerate(gt_assignment[pos_index]):
                m = np.zeros((mask_size, mask_size), dtype=np.int32)
                # remind: shape of keypoints is (N, 17, 3), N is number of bbox, 17 is number of keypoints, 3 is (x, y, v)
                # v=0: unlabeled, v=1, labeled but invisible, v=2 labeled and visible

                # bbox's (y0, x0), (y1, x1)
                y0, x0, y1, x1 = list(map(int, sample_roi[i, :4]))
                kp = mask[idx]  # shape is (17, 3)
                # convert keypoints coordinate (y, x) into mask coordinate system [0, mask_size]x[0, mask_size]
                kp[:, :2] = (kp[:, :2] - [y0, x0]) / \
                    [max(y1 - y0, 1), max(x1 - x0, 1)] * mask_size
                # mask_size x mask_size 空間でどこにあるかをラベルとして扱う(あとでsoftmax cross entropyする)
                # -1でignoreされる
                keypoint_labels = np.zeros(kp.shape[0], dtype=np.int32)
                for j, r in enumerate(kp):
                    y, x, v = list(map(int, r))
                    if v == 2 and 0 <= y and y < mask_size and 0 <= x and x < mask_size:
                        keypoint_labels[j] = y * mask_size + x

                    else:
                        keypoint_labels[j] = -1

                gt_roi_mask.append(keypoint_labels)

        gt_roi_mask = xp.array(gt_roi_mask)

        if xp != np:
            sample_roi = cuda.to_gpu(sample_roi)
            gt_roi_loc = cuda.to_gpu(gt_roi_loc)
            gt_roi_label = cuda.to_gpu(gt_roi_label)
            gt_roi_mask = cuda.to_gpu(gt_roi_mask)
            sample_levels = cuda.to_gpu(sample_levels)
        return sample_roi, sample_levels, gt_roi_loc, gt_roi_label, gt_roi_mask