Exemplo n.º 1
0
    def _calc_ious(self, anchor, bbox, inside_index):
        # ious between the anchors and the gt boxes
        ious = bbox_iou(anchor, bbox)
        argmax_ious = ious.argmax(axis=1)
        max_ious = ious[np.arange(len(inside_index)), argmax_ious]
        gt_argmax_ious = ious.argmax(axis=0)
        gt_max_ious = ious[gt_argmax_ious, np.arange(ious.shape[1])]
        gt_argmax_ious = np.where(ious == gt_max_ious)[0]

        return argmax_ious, max_ious, gt_argmax_ious
Exemplo n.º 2
0
    def _calc_ious(self, anchor, bbox, inside_index):
        # ious between the anchors and the gt boxes
        ious = bbox_iou(anchor, bbox)
        argmax_ious = ious.argmax(axis=1)
        max_ious = ious[np.arange(len(inside_index)), argmax_ious]
        gt_argmax_ious = ious.argmax(axis=0)
        gt_max_ious = ious[gt_argmax_ious, np.arange(ious.shape[1])]
        gt_argmax_ious = np.where(ious == gt_max_ious)[0]

        return argmax_ious, max_ious, gt_argmax_ious
Exemplo n.º 3
0
def mask_voting(
        rois, cls_probs, mask_probs,
        n_class, H, W,
        score_thresh=0.7,
        nms_thresh=0.3,
        mask_merge_thresh=0.5,
        binary_thresh=0.4):

    mask_size = mask_probs.shape[-1]
    v_labels = np.empty((0, ), dtype=np.int32)
    v_masks = np.empty((0, mask_size, mask_size), dtype=np.float32)
    v_bboxes = np.empty((0, 4), dtype=np.float32)
    v_cls_probs = np.empty((0, ), dtype=np.float32)

    for l in range(0, n_class - 1):
        # non maximum suppression
        cls_prob_l = cls_probs[:, l+1]
        thresh_mask = cls_prob_l >= 0.001
        bbox_l = rois[thresh_mask]
        cls_prob_l = cls_prob_l[thresh_mask]
        keep = non_maximum_suppression(
            bbox_l, nms_thresh, cls_prob_l, limit=100)
        bbox_l = bbox_l[keep]
        cls_prob_l = cls_prob_l[keep]

        n_bbox_l = len(bbox_l)
        v_mask_l = np.zeros((n_bbox_l, mask_size, mask_size))
        v_bbox_l = np.zeros((n_bbox_l, 4))

        for i, bbox in enumerate(bbox_l):
            iou = bbox_iou(rois, bbox[np.newaxis, :])
            idx = np.where(iou > mask_merge_thresh)[0]
            mask_weights = cls_probs[idx, l + 1]
            mask_weights = mask_weights / mask_weights.sum()
            mask_prob_l = mask_probs[idx]
            rois_l = rois[idx]
            orig_mask, v_bbox_l[i] = mask_aggregation(
                rois_l, mask_prob_l, mask_weights, H, W, binary_thresh)
            v_mask_l[i] = cv2.resize(
                orig_mask.astype(np.float32), (mask_size, mask_size))

        score_thresh_mask = cls_prob_l > score_thresh
        v_cls_prob_l = cls_prob_l[score_thresh_mask]
        v_mask_l = v_mask_l[score_thresh_mask]
        v_bbox_l = v_bbox_l[score_thresh_mask]
        v_label_l = np.repeat(l, v_bbox_l.shape[0])
        v_cls_probs = np.concatenate((v_cls_probs, v_cls_prob_l))
        v_masks = np.concatenate((v_masks, v_mask_l))
        v_bboxes = np.concatenate((v_bboxes, v_bbox_l))
        v_labels = np.concatenate((v_labels, v_label_l))
    return v_labels, v_masks, v_bboxes, v_cls_probs
    def __call__(
            self,
            roi,
            mask,
            label,
            bbox,
            loc_normalize_mean=(0., 0., 0., 0.),
            loc_normalize_std=(0.2, 0.2, 0.5, 0.5),
            mask_size=(21, 21),
    ):
        """Assigns ground truth to sampled proposals.

        This function samples total of :obj:`self.n_sample` RoIs
        from the combination of :obj:`roi`, :obj:`mask`, :obj:`label`
        and :obj: `bbox`. The RoIs are assigned with the ground truth class
        labels as well as bounding box offsets and scales to match the ground
        truth bounding boxes. As many as :obj:`pos_ratio * self.n_sample` RoIs
        are sampled as foregrounds.

        Offsets and scales of bounding boxes are calculated using
        :func:`chainercv.links.model.faster_rcnn.bbox2loc`.
        Also, types of input arrays and output arrays are same.

        Here are notations.

        * :math:`S` is the total number of sampled RoIs, which equals \
            :obj:`self.n_sample`.
        * :math:`L` is number of object classes possibly including the \
            background.
        * :math:`H` is the image height.
        * :math:`W` is the image width.
        * :math:`RH` is the mask height.
        * :math:`RW` is the mask width.

        Args:
            roi (array): Region of Interests (RoIs) from which we sample.
                Its shape is :math:`(R, 4)`
            mask (array): The coordinates of ground truth masks.
                Its shape is :math:`(R', H, W)`.
            label (array): Ground truth bounding box labels. Its shape
                is :math:`(R',)`. Its range is :math:`[0, L - 1]`, where
                :math:`L` is the number of foreground classes.
            bbox (array): The coordinates of ground truth bounding boxes.
                Its shape is :math:`(R', 4)`.
            loc_normalize_mean (tuple of four floats): Mean values to normalize
                coordinates of bounding boxes.
            loc_normalize_std (tuple of four floats): Standard deviation of
                the coordinates of bounding boxes.
            mask_size (tuple of int or int): Generated mask size, which is
                equal to :math:`(RH, RW)`.

        Returns:
            (array, array, array, array):

            * **sample_roi**: Regions of interests that are sampled. \
                Its shape is :math:`(S, 4)`.
            * **gt_roi_mask**: Masks assigned to sampled RoIs. Its shape is \
                :math:`(S, RH, RW)`.
            * **gt_roi_label**: Labels assigned to sampled RoIs. Its shape is \
                :math:`(S,)`. Its range is :math:`[0, L]`. The label with \
                value 0 is the background.
            * **gt_roi_loc**: Offsets and scales to match \
                the sampled RoIs to the ground truth bounding boxes. \
                Its shape is :math:`(S, 4)`.

        """

        xp = cuda.get_array_module(roi)
        roi = cuda.to_cpu(roi)
        mask = cuda.to_cpu(mask)
        label = cuda.to_cpu(label)
        bbox = cuda.to_cpu(bbox)

        if not isinstance(mask_size, tuple):
            mask_size = (mask_size, mask_size)

        n_bbox, _ = bbox.shape

        roi = np.concatenate((roi, bbox), axis=0)

        if self.n_sample is None:
            n_sample = roi.shape[0]
        else:
            n_sample = self.n_sample

        pos_roi_per_image = np.round(n_sample * self.pos_ratio)
        iou = bbox_iou(roi, bbox)
        gt_assignment = iou.argmax(axis=1)
        max_iou = iou.max(axis=1)

        # Offset range of classes from [0, n_fg_class - 1] to [1, n_fg_class].
        # The label with value 0 is the background.
        gt_roi_label = label[gt_assignment] + 1

        # Select foreground RoIs as those with >= pos_iou_thresh IoU.
        pos_index = np.where(max_iou >= self.pos_iou_thresh)[0]
        pos_roi_per_this_image = int(min(pos_roi_per_image, pos_index.size))
        if pos_index.size > 0:
            pos_index = np.random.choice(pos_index,
                                         size=pos_roi_per_this_image,
                                         replace=False)

        # Select background RoIs as those within
        # [neg_iou_thresh_lo, neg_iou_thresh_hi).
        neg_index = np.where((max_iou < self.neg_iou_thresh_hi)
                             & (max_iou >= self.neg_iou_thresh_lo))[0]
        neg_roi_per_this_image = self.n_sample - pos_roi_per_this_image
        neg_roi_per_this_image = int(
            min(neg_roi_per_this_image, neg_index.size))
        if neg_index.size > 0:
            neg_index = np.random.choice(neg_index,
                                         size=neg_roi_per_this_image,
                                         replace=False)

        # The indices that we're selecting (both foreground and background).
        keep_index = np.append(pos_index, neg_index)
        gt_roi_label = gt_roi_label[keep_index]
        gt_roi_label[pos_roi_per_this_image:] = 0  # negative labels --> 0
        sample_roi = roi[keep_index]

        # locs
        # Compute offsets and scales to match sampled RoIs to the GTs.
        loc_normalize_mean = np.array(loc_normalize_mean, np.float32)
        loc_normalize_std = np.array(loc_normalize_std, np.float32)
        gt_roi_loc = bbox2loc(sample_roi, bbox[gt_assignment[keep_index]])
        gt_roi_loc = gt_roi_loc - loc_normalize_mean
        gt_roi_loc = gt_roi_loc / loc_normalize_std

        # masks
        gt_roi_mask = -1 * np.ones(
            (len(keep_index), mask_size[0], mask_size[1]), dtype=np.int32)

        for i, pos_ind in enumerate(pos_index):
            bb = np.round(sample_roi[i]).astype(np.int)
            gt_msk = mask[gt_assignment[pos_ind]]
            gt_roi_msk = gt_msk[bb[0]:bb[2], bb[1]:bb[3]]
            gt_roi_msk = resize(
                gt_roi_msk.astype(np.float32)[None], mask_size)[0]
            gt_roi_msk = (gt_roi_msk >= self.binary_thresh).astype(np.int)
            gt_roi_mask[i] = gt_roi_msk

        if xp != np:
            sample_roi = cuda.to_gpu(sample_roi)
            gt_roi_mask = cuda.to_gpu(gt_roi_mask)
            gt_roi_label = cuda.to_gpu(gt_roi_label)
            gt_roi_loc = cuda.to_gpu(gt_roi_loc)

        return sample_roi, gt_roi_mask, gt_roi_label, gt_roi_loc
def mask_voting(roi_cmask_prob,
                bbox,
                roi_cls_prob,
                size,
                score_thresh,
                nms_thresh,
                mask_merge_thresh,
                binary_thresh,
                limit=100,
                bg_label=0):
    """Refine mask probabilities by merging multiple masks.

    First, this function discard invalid masks with non maximum suppression.
    Then, it merges masks with weight calculated from class probabilities and
    iou.
    This function improves the mask qualities by merging overlapped masks
    predicted as the same object class.

    Here are notations used.
    * :math:`R'` is the total number of RoIs produced across batches.
    * :math:`L` is the number of classes excluding the background.
    * :math:`RH` is the height of pooled image.
    * :math:`RW` is the height of pooled image.

    Args:
        roi_cmask_prob (array): A mask probability array whose shape is
            :math:`(R, RH, RW)`.
        bbox (array): A bounding box array whose shape is
            :math:`(R, 4)`.
        cls_prob (array): A class probability array whose shape is
            :math:`(R, L + 1)`.
        size (tuple of int): Original image size.
        score_thresh (float): A threshold value of the class score.
        nms_thresh (float): A threshold value of non maximum suppression.
        mask_merge_thresh (float): A threshold value of the bounding box iou
            for mask merging.
        binary_thresh (float): A threshold value of mask score
            for mask merging.
        limit (int): The maximum number of outputs.
        bg_label (int): The id of the background label.

    Returns:
        array, array, array, array:
        * **v_cmask_prob**: Merged mask probability. Its shapes is \
            :math:`(N, RH, RW)`.
        * **v_bbox**: Bounding boxes for the merged masks. Its shape is \
            :math:`(N, 4)`.
        * **v_label**: Class labels for the merged masks. Its shape is \
            :math:`(N, )`.
        * **v_score**: Class probabilities for the merged masks. Its shape \
            is :math:`(N, )`.

    """

    roi_cmask_size = roi_cmask_prob.shape[1:]
    n_class = roi_cls_prob.shape[1]

    v_cmask_prob = []
    v_bbox = []
    v_label = []
    v_cls_prob = []

    cls_score = []
    cls_bbox = []

    for label in range(0, n_class):
        # background
        if label == bg_label:
            continue
        # non maximum suppression
        score_l = roi_cls_prob[:, label]
        keep_indices = non_maximum_suppression(bbox, nms_thresh, score_l)
        bbox_l = bbox[keep_indices]
        score_l = score_l[keep_indices]
        cls_bbox.append(bbox_l)
        cls_score.append(score_l)

    sorted_score = np.sort(np.concatenate(cls_score))[::-1]
    n_keep = min(len(sorted_score), limit)
    score_thresh = max(sorted_score[n_keep - 1], score_thresh)

    for label in range(0, n_class):
        # background
        if label == bg_label:
            continue
        bbox_l = cls_bbox[label - 1]
        score_l = cls_score[label - 1]
        keep_indices = np.where(score_l >= score_thresh)
        bbox_l = bbox_l[keep_indices]
        score_l = score_l[keep_indices]

        v_cmask_prob_l = []
        v_bbox_l = []
        v_score_l = []

        for i, bb in enumerate(bbox_l):
            iou = bbox_iou(bbox, bb[np.newaxis, :])
            keep_indices = np.where(iou >= mask_merge_thresh)[0]
            cmask_weight = roi_cls_prob[keep_indices, label]
            cmask_weight = cmask_weight / cmask_weight.sum()
            cmask_prob_i = roi_cmask_prob[keep_indices]
            bbox_i = bbox[keep_indices]
            m_cmask, m_bbox = _mask_aggregation(bbox_i, cmask_prob_i,
                                                cmask_weight, size,
                                                binary_thresh)
            if m_cmask is not None and m_bbox is not None:
                m_cmask = resize(m_cmask.astype(np.float32), roi_cmask_size)
                v_cmask_prob_l.append(m_cmask)
                v_bbox_l.append(m_bbox)
                v_score_l.append(score_l[i])

        if len(v_cmask_prob_l) > 0:
            v_cmask_prob_l = np.concatenate(v_cmask_prob_l)
            v_bbox_l = np.concatenate(v_bbox_l)
            v_score_l = np.array(v_score_l)

            v_label_l = np.repeat(label - 1, v_bbox_l.shape[0])
            v_label_l = v_label_l.astype(np.int32)
            v_cmask_prob.append(v_cmask_prob_l)
            v_bbox.append(v_bbox_l)
            v_label.append(v_label_l)
            v_cls_prob.append(v_score_l)

    if len(v_cmask_prob) > 0:
        v_cmask_prob = np.concatenate(v_cmask_prob)
        v_bbox = np.concatenate(v_bbox)
        v_label = np.concatenate(v_label)
        v_cls_prob = np.concatenate(v_cls_prob)
    else:
        v_cmask_prob = np.empty((0, roi_cmask_size[0], roi_cmask_size[1]))
        v_bbox = np.empty((0, 4))
        v_label = np.empty((0, ))
        v_cls_prob = np.empty((0, ))
    return v_cmask_prob, v_bbox, v_label, v_cls_prob
Exemplo n.º 6
0
    def __call__(self,
                 roi,
                 bbox,
                 label,
                 mask,
                 loc_normalize_mean=(0., 0., 0., 0.),
                 loc_normalize_std=(0.1, 0.1, 0.2, 0.2)):
        """Assigns ground truth to sampled proposals.

        This function samples total of :obj:`self.n_sample` RoIs
        from the combination of :obj:`roi` and :obj:`bbox`.
        The RoIs are assigned with the ground truth class labels as well as
        bounding box offsets and scales to match the ground truth bounding
        boxes. As many as :obj:`pos_ratio * self.n_sample` RoIs are
        sampled as foregrounds.

        Offsets and scales of bounding boxes are calculated using
        :func:`chainercv.links.model.faster_rcnn.bbox2loc`.
        Also, types of input arrays and output arrays are same.

        Here are notations.

        * :math:`S` is the total number of sampled RoIs, which equals \
            :obj:`self.n_sample`.
        * :math:`L` is number of object classes possibly including the \
            background.

        Args:
            roi (array): Region of Interests (RoIs) from which we sample.
                Its shape is :math:`(R, 4)`
            bbox (array): The coordinates of ground truth bounding boxes.
                Its shape is :math:`(R', 4)`.
            label (array): Ground truth bounding box labels. Its shape
                is :math:`(R',)`. Its range is :math:`[0, L - 1]`, where
                :math:`L` is the number of foreground classes.
            loc_normalize_mean (tuple of four floats): Mean values to normalize
                coordinates of bouding boxes.
            loc_normalize_std (tupler of four floats): Standard deviation of
                the coordinates of bounding boxes.

        Returns:
            (array, array, array):

            * **sample_roi**: Regions of interests that are sampled. \
                Its shape is :math:`(S, 4)`.
            * **gt_roi_loc**: Offsets and scales to match \
                the sampled RoIs to the ground truth bounding boxes. \
                Its shape is :math:`(S, 4)`.
            * **gt_roi_label**: Labels assigned to sampled RoIs. Its shape is \
                :math:`(S,)`. Its range is :math:`[0, L]`. The label with \
                value 0 is the background.

        """
        xp = cuda.get_array_module(roi)
        roi = cuda.to_cpu(roi)
        bbox = cuda.to_cpu(bbox)
        label = cuda.to_cpu(label)
        mask = cuda.to_cpu(mask)

        n_bbox, _ = bbox.shape
        roi = np.concatenate((roi, bbox), axis=0)

        pos_roi_per_image = np.round(self.n_sample * self.pos_ratio)
        iou = bbox_iou(roi, bbox)
        gt_assignment = iou.argmax(axis=1)
        max_iou = iou.max(axis=1)

        # Offset range of classes from [0, n_fg_class - 1] to [1, n_fg_class].
        # The label with value 0 is the background.
        gt_roi_label = label[gt_assignment] + 1

        # Select foreground RoIs as those with >= pos_iou_thresh IoU.
        pos_index = np.where(max_iou >= self.pos_iou_thresh)[0]
        pos_roi_per_this_image = int(min(pos_roi_per_image, pos_index.size))
        if pos_index.size > 0:
            pos_index = np.random.choice(pos_index,
                                         size=pos_roi_per_this_image,
                                         replace=False)

        # Select background RoIs as those within
        # [neg_iou_thresh_lo, neg_iou_thresh_hi).
        neg_index = np.where((max_iou < self.neg_iou_thresh_hi)
                             & (max_iou >= self.neg_iou_thresh_lo))[0]
        neg_roi_per_this_image = self.n_sample - pos_roi_per_this_image
        neg_roi_per_this_image = int(
            min(neg_roi_per_this_image, neg_index.size))
        if neg_index.size > 0:
            neg_index = np.random.choice(neg_index,
                                         size=neg_roi_per_this_image,
                                         replace=False)

        # The indices that we're selecting (both positive and negative).
        keep_index = np.append(pos_index, neg_index)
        gt_roi_label = gt_roi_label[keep_index]
        gt_roi_label[pos_roi_per_this_image:] = 0  # negative labels --> 0
        sample_roi = roi[keep_index]  # sampled <- proposed

        # Compute offsets and scales to match sampled RoIs to the GTs.
        gt_roi_loc = bbox2loc(sample_roi, bbox[gt_assignment[keep_index]])
        gt_roi_loc = ((gt_roi_loc - np.array(loc_normalize_mean, np.float32)) /
                      np.array(loc_normalize_std, np.float32))

        # Prepare groundtruth masks
        gt_roi_mask = []
        _, h, w = mask.shape
        for i, idx in enumerate(gt_assignment[pos_index]):
            A = mask[idx,
                     np.max((int(sample_roi[i, 0]),
                             0)):np.min((int(sample_roi[i, 2]), h)),
                     np.max((int(sample_roi[i, 1]),
                             0)):np.min((int(sample_roi[i, 3]), w))]
            gt_roi_mask.append(
                cv2.resize(A, (self.roi_size * 2, self.roi_size * 2)))
        #debug: visualize masks
        #cv2.imwrite("gt_assignment_mask.png",mask[0,np.max((int(sample_roi[0,0]),0)):np.min((int(sample_roi[0,2]),h)), np.max((int(sample_roi[0,1]),0)):np.min((int(sample_roi[0,3]),w))]*255)
        #cv2.imwrite("gt_roi_mask.png",gt_roi_mask[0]*244)#

        if xp != np:
            sample_roi = cuda.to_gpu(sample_roi)
            gt_roi_loc = cuda.to_gpu(gt_roi_loc)
            gt_roi_label = cuda.to_gpu(gt_roi_label)
            gt_roi_mask = cuda.to_gpu(np.stack(gt_roi_mask).astype(np.int32))
        else:
            gt_roi_mask = np.stack(gt_roi_mask).astype(np.int32)
        return sample_roi, gt_roi_loc, gt_roi_label, gt_roi_mask
Exemplo n.º 7
0
def calc_detection_voc_prec_rec(pred_bboxes,
                                pred_labels,
                                pred_scores,
                                gt_bboxes,
                                gt_labels,
                                gt_difficults=None,
                                iou_thresh=0.5):
    """Calculate precision and recall based on evaluation code of PASCAL VOC.

    This function calculates precision and recall of
    predicted bounding boxes obtained from a dataset which has :math:`N`
    images.
    The code is based on the evaluation code used in PASCAL VOC Challenge.

    Args:
        pred_bboxes (iterable of numpy.ndarray): An iterable of :math:`N`
            sets of bounding boxes.
            Its index corresponds to an index for the base dataset.
            Each element of :obj:`pred_bboxes` is a set of coordinates
            of bounding boxes. This is an array whose shape is :math:`(R, 4)`,
            where :math:`R` corresponds
            to the number of bounding boxes, which may vary among boxes.
            The second axis corresponds to :obj:`y_min, x_min, y_max, x_max`
            of a bounding box.
        pred_labels (iterable of numpy.ndarray): An iterable of labels.
            Similar to :obj:`pred_bboxes`, its index corresponds to an
            index for the base dataset. Its length is :math:`N`.
        pred_scores (iterable of numpy.ndarray): An iterable of confidence
            scores for predicted bounding boxes. Similar to :obj:`pred_bboxes`,
            its index corresponds to an index for the base dataset.
            Its length is :math:`N`.
        gt_bboxes (iterable of numpy.ndarray): An iterable of ground truth
            bounding boxes
            whose length is :math:`N`. An element of :obj:`gt_bboxes` is a
            bounding box whose shape is :math:`(R, 4)`. Note that the number of
            bounding boxes in each image does not need to be same as the number
            of corresponding predicted boxes.
        gt_labels (iterable of numpy.ndarray): An iterable of ground truth
            labels which are organized similarly to :obj:`gt_bboxes`.
        gt_difficults (iterable of numpy.ndarray): An iterable of boolean
            arrays which is organized similarly to :obj:`gt_bboxes`.
            This tells whether the
            corresponding ground truth bounding box is difficult or not.
            By default, this is :obj:`None`. In that case, this function
            considers all bounding boxes to be not difficult.
        iou_thresh (float): A prediction is correct if its Intersection over
            Union with the ground truth is above this value..

    Returns:
        tuple of two lists:
        This function returns two lists: :obj:`prec` and :obj:`rec`.

        * :obj:`prec`: A list of arrays. :obj:`prec[l]` is precision \
            for class :math:`l`. If class :math:`l` does not exist in \
            either :obj:`pred_labels` or :obj:`gt_labels`, :obj:`prec[l]` is \
            set to :obj:`None`.
        * :obj:`rec`: A list of arrays. :obj:`rec[l]` is recall \
            for class :math:`l`. If class :math:`l` that is not marked as \
            difficult does not exist in \
            :obj:`gt_labels`, :obj:`rec[l]` is \
            set to :obj:`None`.

    """

    pred_bboxes = iter(pred_bboxes)
    pred_labels = iter(pred_labels)
    pred_scores = iter(pred_scores)
    gt_bboxes = iter(gt_bboxes)
    gt_labels = iter(gt_labels)
    if gt_difficults is None:
        gt_difficults = itertools.repeat(None)
    else:
        gt_difficults = iter(gt_difficults)

    n_pos = defaultdict(int)
    score = defaultdict(list)
    match = defaultdict(list)

    for pred_bbox, pred_label, pred_score, gt_bbox, gt_label, gt_difficult in \
        six.moves.zip(
            pred_bboxes, pred_labels, pred_scores,
            gt_bboxes, gt_labels, gt_difficults):

        if gt_difficult is None:
            gt_difficult = np.zeros(gt_bbox.shape[0], dtype=bool)

        for l in np.unique(np.concatenate((pred_label, gt_label)).astype(int)):
            pred_mask_l = pred_label == l
            pred_bbox_l = pred_bbox[pred_mask_l]
            pred_score_l = pred_score[pred_mask_l]
            # sort by score
            order = pred_score_l.argsort()[::-1]
            pred_bbox_l = pred_bbox_l[order]
            pred_score_l = pred_score_l[order]

            gt_mask_l = gt_label == l
            gt_bbox_l = gt_bbox[gt_mask_l]
            gt_difficult_l = gt_difficult[gt_mask_l]

            n_pos[l] += np.logical_not(gt_difficult_l).sum()
            score[l].extend(pred_score_l)

            if len(pred_bbox_l) == 0:
                continue
            if len(gt_bbox_l) == 0:
                match[l].extend((0, ) * pred_bbox_l.shape[0])
                continue

            # VOC evaluation follows integer typed bounding boxes.
            pred_bbox_l = pred_bbox_l.copy()
            pred_bbox_l[:, 2:] += 1
            gt_bbox_l = gt_bbox_l.copy()
            gt_bbox_l[:, 2:] += 1

            iou = bbox_iou(pred_bbox_l, gt_bbox_l)
            gt_index = iou.argmax(axis=1)
            # set -1 if there is no matching ground truth
            gt_index[iou.max(axis=1) < iou_thresh] = -1
            del iou

            selec = np.zeros(gt_bbox_l.shape[0], dtype=bool)
            for gt_idx in gt_index:
                if gt_idx >= 0:
                    if gt_difficult_l[gt_idx]:
                        match[l].append(-1)
                    else:
                        if not selec[gt_idx]:
                            match[l].append(1)
                        else:
                            match[l].append(0)
                    selec[gt_idx] = True
                else:
                    match[l].append(0)

    for iter_ in (pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels,
                  gt_difficults):
        if next(iter_, None) is not None:
            raise ValueError('Length of input iterables need to be same.')

    n_fg_class = max(n_pos.keys()) + 1
    prec = [None] * n_fg_class
    rec = [None] * n_fg_class

    for l in n_pos.keys():
        score_l = np.array(score[l])
        match_l = np.array(match[l], dtype=np.int8)

        order = score_l.argsort()[::-1]
        match_l = match_l[order]

        tp = np.cumsum(match_l == 1)
        fp = np.cumsum(match_l == 0)

        # If an element of fp + tp is 0,
        # the corresponding element of prec[l] is nan.
        prec[l] = tp / (fp + tp)
        # If n_pos[l] is 0, rec[l] is None.
        if n_pos[l] > 0:
            rec[l] = tp / n_pos[l]

    return prec, rec
Exemplo n.º 8
0
    def __call__(self, rois, bboxes, whole_mask, labels):

        rois = cuda.to_cpu(rois)
        bboxes = cuda.to_cpu(bboxes)
        whole_mask = cuda.to_cpu(whole_mask)
        labels = cuda.to_cpu(labels)

        n_bbox, _ = bboxes.shape

        rois = np.concatenate((rois, bboxes), axis=0)
        if self.n_sample is None:
            n_sample = rois.shape[0]
        else:
            n_sample = self.n_sample

        fg_rois_per_image = np.round(n_sample * self.fg_ratio)
        iou = bbox_iou(rois, bboxes)
        gt_assignment = iou.argmax(axis=1)
        max_iou = iou.max(axis=1)

        # Select foreground RoIs as those with >= fg_iou_thresh IoU.
        fg_indices = np.where(max_iou >= self.fg_iou_thresh)[0]
        fg_rois_per_this_image = int(min(fg_rois_per_image, fg_indices.size))
        if fg_indices.size > 0:
            fg_indices = np.random.choice(fg_indices,
                                          size=fg_rois_per_this_image,
                                          replace=False)

        # Select background RoIs as those within
        # [bg_iou_thresh_lo, bg_iou_thresh_hi).
        bg_indices = np.where((max_iou < self.bg_iou_thresh_hi)
                              & (max_iou >= self.bg_iou_thresh_lo))[0]
        bg_rois_per_this_image = n_sample - fg_rois_per_this_image
        bg_rois_per_this_image = int(
            min(bg_rois_per_this_image, bg_indices.size))
        if bg_indices.size > 0:
            bg_indices = np.random.choice(bg_indices,
                                          size=bg_rois_per_this_image,
                                          replace=False)

        # The indices that we're selecting (both foreground and background).
        keep_indices = np.append(fg_indices, bg_indices)

        # sample_rois
        sample_rois = rois[keep_indices]

        # locs
        # Compute offsets and scales to match sampled RoIs to the GTs.
        loc_normalize_mean = np.array(self.loc_normalize_mean, np.float32)
        loc_normalize_std = np.array(self.loc_normalize_std, np.float32)
        gt_roi_locs = bbox2loc(sample_rois,
                               bboxes[gt_assignment[keep_indices]])
        gt_roi_locs = gt_roi_locs - loc_normalize_mean
        gt_roi_locs = gt_roi_locs / loc_normalize_std

        # masks
        gt_roi_masks = -1 * np.ones(
            (len(keep_indices), self.mask_size, self.mask_size),
            dtype=np.int32)

        for i, fg_index in enumerate(fg_indices):
            roi = np.round(sample_rois[i]).astype(np.int32)
            gt_roi = np.round(bboxes[gt_assignment[fg_index]])
            gt_roi = gt_roi.astype(np.int32)
            gt_mask = whole_mask[gt_assignment[fg_index]]
            gt_roi_mask = fcis.mask.intersect_bbox_mask(
                roi, gt_roi, gt_mask, self.mask_size)
            gt_roi_mask = cv2.resize(gt_roi_mask.astype(np.float32),
                                     (self.mask_size, self.mask_size))
            gt_roi_mask = gt_roi_mask >= self.binary_thresh
            gt_roi_mask = gt_roi_mask.astype(np.int32)
            gt_roi_masks[i, ...] = gt_roi_mask

        # labels
        # The label with value 0 is the background.
        gt_roi_labels = labels[gt_assignment[keep_indices]]
        # set labels of bg_rois to be 0
        gt_roi_labels[fg_rois_per_this_image:] = 0

        sample_rois = cuda.to_gpu(sample_rois)
        gt_roi_locs = cuda.to_gpu(gt_roi_locs)
        gt_roi_masks = cuda.to_gpu(gt_roi_masks)
        gt_roi_labels = cuda.to_gpu(gt_roi_labels)

        return sample_rois, gt_roi_locs, gt_roi_masks, gt_roi_labels
Exemplo n.º 9
0
    def __call__(self,
                 roi,
                 bbox,
                 label,
                 mask,
                 loc_normalize_mean=(0., 0., 0., 0.),
                 loc_normalize_std=(0.1, 0.1, 0.2, 0.2)):
        """Assigns ground truth to sampled proposals.

        This function samples total of :obj:`self.n_sample` RoIs
        from the combination of :obj:`roi` and :obj:`bbox`.
        The RoIs are assigned with the ground truth class labels as well as
        bounding box offsets and scales to match the ground truth bounding
        boxes. As many as :obj:`pos_ratio * self.n_sample` RoIs are
        sampled as foregrounds.

        Offsets and scales of bounding boxes are calculated using
        :func:`chainercv.links.model.faster_rcnn.bbox2loc`.
        Also, types of input arrays and output arrays are same.

        Here are notations.

        * :math:`S` is the total number of sampled RoIs, which equals \
            :obj:`self.n_sample`.
        * :math:`L` is number of object classes possibly including the \
            background.

        Args:
            roi (array): Region of Interests (RoIs) from which we sample.
                Its shape is :math:`(R, 4)`
            bbox (array): The coordinates of ground truth bounding boxes.
                Its shape is :math:`(R', 4)`.
            label (array): Ground truth bounding box labels. Its shape
                is :math:`(R',)`. Its range is :math:`[0, L - 1]`, where
                :math:`L` is the number of foreground classes.
            loc_normalize_mean (tuple of four floats): Mean values to normalize
                coordinates of bouding boxes.
            loc_normalize_std (tupler of four floats): Standard deviation of
                the coordinates of bounding boxes.

        Returns:
            (array, array, array):

            * **sample_roi**: Regions of interests that are sampled. \
                Its shape is :math:`(S, 4)`.
            * **gt_roi_loc**: Offsets and scales to match \
                the sampled RoIs to the ground truth bounding boxes. \
                Its shape is :math:`(S, 4)`.
            * **gt_roi_label**: Labels assigned to sampled RoIs. Its shape is \
                :math:`(S,)`. Its range is :math:`[0, L]`. The label with \
                value 0 is the background.

        """
        xp = cuda.get_array_module(roi)
        roi = cuda.to_cpu(roi)
        bbox = cuda.to_cpu(bbox)
        label = cuda.to_cpu(label)

        n_bbox, _ = bbox.shape
        if n_bbox == 0:
            raise ValueError('Empty bbox is not supported.')

        roi = np.concatenate((roi, bbox), axis=0)

        pos_roi_per_image = np.round(self.n_sample * self.pos_ratio)
        iou = bbox_iou(roi, bbox)
        gt_assignment = iou.argmax(axis=1)
        max_iou = iou.max(axis=1)
        # Offset range of classes from [0, n_fg_class - 1] to [1, n_fg_class].
        # The label with value 0 is the background.
        gt_roi_label = label[gt_assignment] + 1

        # Select foreground RoIs as those with >= pos_iou_thresh IoU.
        pos_index = np.where(max_iou >= self.pos_iou_thresh)[0]
        pos_roi_per_this_image = int(min(pos_roi_per_image, pos_index.size))
        if pos_index.size > 0:
            pos_index = np.random.choice(pos_index,
                                         size=pos_roi_per_this_image,
                                         replace=False)

        # Select background RoIs as those within
        # [neg_iou_thresh_lo, neg_iou_thresh_hi).
        neg_index = np.where((max_iou < self.neg_iou_thresh_hi)
                             & (max_iou >= self.neg_iou_thresh_lo))[0]
        neg_roi_per_this_image = self.n_sample - pos_roi_per_this_image
        neg_roi_per_this_image = int(
            min(neg_roi_per_this_image, neg_index.size))
        if neg_index.size > 0:
            neg_index = np.random.choice(neg_index,
                                         size=neg_roi_per_this_image,
                                         replace=False)

        # The indices that we're selecting (both positive and negative).
        keep_index = np.append(pos_index, neg_index)
        gt_roi_label = gt_roi_label[keep_index]
        gt_roi_label[pos_roi_per_this_image:] = 0  # negative labels --> 0
        sample_roi = roi[keep_index]

        # Compute offsets and scales to match sampled RoIs to the GTs.
        gt_roi_loc = bbox2loc(sample_roi, bbox[gt_assignment[keep_index]])
        gt_roi_loc = ((gt_roi_loc - np.array(loc_normalize_mean, np.float32)) /
                      np.array(loc_normalize_std, np.float32))

        # Compute gt masks
        gt_roi_mask = -np.ones(
            (len(sample_roi), self.mask_size, self.mask_size), dtype=np.int32)
        for i, pos_ind in enumerate(pos_index):
            roi = np.round(sample_roi[i]).astype(np.int32)
            gt_mask = mask[gt_assignment[pos_ind]]
            gt_roi_mask_i = gt_mask[roi[0]:roi[2], roi[1]:roi[3]]
            gt_roi_mask_i_score = (
                np.arange(gt_roi_mask_i.max() +
                          1) == gt_roi_mask_i[..., None]).astype(
                              np.float32)  # label -> onehot
            gt_roi_mask_i_score = cv2.resize(gt_roi_mask_i_score,
                                             (self.mask_size, self.mask_size))
            if gt_roi_mask_i_score.ndim == 2:
                gt_roi_mask_i_score = gt_roi_mask_i_score.reshape(
                    gt_roi_mask_i_score.shape[:2] + (1, ))
            gt_roi_mask_i = np.argmax(gt_roi_mask_i_score, axis=2)
            gt_roi_mask[i] = gt_roi_mask_i.astype(np.int32)

        if xp != np:
            sample_roi = cuda.to_gpu(sample_roi)
            gt_roi_loc = cuda.to_gpu(gt_roi_loc)
            gt_roi_label = cuda.to_gpu(gt_roi_label)
            gt_roi_mask = cuda.to_gpu(gt_roi_mask)
        return sample_roi, gt_roi_loc, gt_roi_label, gt_roi_mask
Exemplo n.º 10
0
    def __call__(self, roi, bbox, label,
                 loc_normalize_mean=(0., 0., 0., 0.),
                 loc_normalize_std=(0.1, 0.1, 0.2, 0.2)):
        """Assigns ground truth to sampled proposals.

        This function samples total of :obj:`self.n_sample` RoIs
        from the combination of :obj:`roi` and :obj:`bbox`.
        The RoIs are assigned with the ground truth class labels as well as
        bounding box offsets and scales to match the ground truth bounding
        boxes. As many as :obj:`pos_ratio * self.n_sample` RoIs are
        sampled as foregrounds.

        Offsets and scales of bounding boxes are calculated using
        :func:`chainercv.links.model.faster_rcnn.bbox2loc`.
        Also, types of input arrays and output arrays are same.

        Here are notations.

        * :math:`S` is the total number of sampled RoIs, which equals \
            :obj:`self.n_sample`.
        * :math:`L` is number of object classes possibly including the \
            background.

        Args:
            roi (array): Region of Interests (RoIs) from which we sample.
                Its shape is :math:`(R, 4)`
            bbox (array): The coordinates of ground truth bounding boxes.
                Its shape is :math:`(R', 4)`.
            label (array): Ground truth bounding box labels. Its shape
                is :math:`(R',)`. Its range is :math:`[0, L - 1]`, where
                :math:`L` is the number of foreground classes.
            loc_normalize_mean (tuple of four floats): Mean values to normalize
                coordinates of bouding boxes.
            loc_normalize_std (tupler of four floats): Standard deviation of
                the coordinates of bounding boxes.

        Returns:
            (array, array, array):

            * **sample_roi**: Regions of interests that are sampled. \
                Its shape is :math:`(S, 4)`.
            * **gt_roi_loc**: Offsets and scales to match \
                the sampled RoIs to the ground truth bounding boxes. \
                Its shape is :math:`(S, 4)`.
            * **gt_roi_label**: Labels assigned to sampled RoIs. Its shape is \
                :math:`(S,)`. Its range is :math:`[0, L]`. The label with \
                value 0 is the background.

        """
        xp = cuda.get_array_module(roi)
        roi = cuda.to_cpu(roi)
        bbox = cuda.to_cpu(bbox)
        label = cuda.to_cpu(label)

        n_bbox, _ = bbox.shape

        roi = np.concatenate((roi, bbox), axis=0)

        pos_roi_per_image = np.round(self.n_sample * self.pos_ratio)
        iou = bbox_iou(roi, bbox)
        gt_assignment = iou.argmax(axis=1)
        max_iou = iou.max(axis=1)
        # Offset range of classes from [0, n_fg_class - 1] to [1, n_fg_class].
        # The label with value 0 is the background.
        gt_roi_label = label[gt_assignment] + 1

        # Select foreground RoIs as those with >= pos_iou_thresh IoU.
        pos_index = np.where(max_iou >= self.pos_iou_thresh)[0]
        pos_roi_per_this_image = int(min(pos_roi_per_image, pos_index.size))
        if pos_index.size > 0:
            pos_index = np.random.choice(
                pos_index, size=pos_roi_per_this_image, replace=False)

        # Select background RoIs as those within
        # [neg_iou_thresh_lo, neg_iou_thresh_hi).
        neg_index = np.where((max_iou < self.neg_iou_thresh_hi) &
                             (max_iou >= self.neg_iou_thresh_lo))[0]
        neg_roi_per_this_image = self.n_sample - pos_roi_per_this_image
        neg_roi_per_this_image = int(min(neg_roi_per_this_image,
                                         neg_index.size))
        if neg_index.size > 0:
            neg_index = np.random.choice(
                neg_index, size=neg_roi_per_this_image, replace=False)

        # The indices that we're selecting (both positive and negative).
        keep_index = np.append(pos_index, neg_index)
        gt_roi_label = gt_roi_label[keep_index]
        gt_roi_label[pos_roi_per_this_image:] = 0  # negative labels --> 0
        sample_roi = roi[keep_index]

        # Compute offsets and scales to match sampled RoIs to the GTs.
        gt_roi_loc = bbox2loc(sample_roi, bbox[gt_assignment[keep_index]])
        gt_roi_loc = ((gt_roi_loc - np.array(loc_normalize_mean, np.float32)
                       ) / np.array(loc_normalize_std, np.float32))

        if xp != np:
            sample_roi = cuda.to_gpu(sample_roi)
            gt_roi_loc = cuda.to_gpu(gt_roi_loc)
            gt_roi_label = cuda.to_gpu(gt_roi_label)
        return sample_roi, gt_roi_loc, gt_roi_label
    def __call__(self,
                 roi,
                 bbox,
                 label,
                 mask,
                 levels,
                 loc_normalize_mean=(0., 0., 0., 0.),
                 loc_normalize_std=(0.1, 0.1, 0.2, 0.2),
                 mask_size=14,
                 binary_mask=True):
        """
        binary_mask = False -> keypoint
        """
        xp = cuda.get_array_module(roi)
        roi = cuda.to_cpu(roi)
        bbox = cuda.to_cpu(bbox)
        label = cuda.to_cpu(label)
        mask = cuda.to_cpu(mask)
        levels = cuda.to_cpu(levels)

        n_bbox, _ = bbox.shape
        n_proposal = roi.shape[0]
        roi = np.concatenate((roi, bbox), axis=0)

        # assign feature levels of ground truth boxes
        bbox_levels = map_rois_to_fpn_levels(np, bbox)
        levels = np.concatenate([levels, bbox_levels])

        pos_roi_per_image = np.round(self.n_sample * self.pos_ratio)
        iou = bbox_iou(roi, bbox)
        gt_assignment = iou.argmax(axis=1)
        max_iou = iou.max(axis=1)
        # Offset range of classes from [0, n_fg_class - 1] to [1, n_fg_class].
        # The label with value 0 is the background.
        gt_roi_label = label[gt_assignment] + 1

        # Select foreground RoIs as those with >= pos_iou_thresh IoU.
        pos_index = np.where(max_iou >= self.pos_iou_thresh)[0]
        pos_roi_per_this_image = int(min(pos_roi_per_image, pos_index.size))
        if pos_index.size > 0:
            pos_index = np.random.choice(pos_index,
                                         size=pos_roi_per_this_image,
                                         replace=False)

        # Select background RoIs as those within
        # [neg_iou_thresh_lo, neg_iou_thresh_hi).
        neg_index = np.where((max_iou < self.neg_iou_thresh_hi)
                             & (max_iou >= self.neg_iou_thresh_lo))[0]
        neg_roi_per_this_image = self.n_sample - pos_roi_per_this_image
        neg_roi_per_this_image = int(
            min(neg_roi_per_this_image, neg_index.size))
        if neg_index.size > 0:
            neg_index = np.random.choice(neg_index,
                                         size=neg_roi_per_this_image,
                                         replace=False)

        # The indices that we're selecting (both positive and negative).
        keep_index = np.append(pos_index, neg_index)
        gt_roi_label = gt_roi_label[keep_index]
        gt_roi_label[pos_roi_per_this_image:] = 0  # negative labels --> 0
        sample_roi = roi[keep_index]
        sample_levels = levels[keep_index]

        # Compute offsets and scales to match sampled RoIs to the GTs.
        gt_roi_loc = bbox2loc(sample_roi, bbox[gt_assignment[keep_index]])
        gt_roi_loc = ((gt_roi_loc - np.array(loc_normalize_mean, np.float32)) /
                      np.array(loc_normalize_std, np.float32))

        # https://engineer.dena.jp/2017/12/chainercvmask-r-cnn.html
        gt_roi_mask = []
        _, h, w = mask.shape
        if binary_mask:
            for i, idx in enumerate(gt_assignment[pos_index]):
                A = mask[idx,
                         np.max((int(sample_roi[i, 0]),
                                 0)):np.min((int(sample_roi[i, 2]), h)),
                         np.max((int(sample_roi[i, 1]),
                                 0)):np.min((int(sample_roi[i, 3]), w))]
                gt_roi_mask.append(
                    cv2.resize(A, (mask_size, mask_size)).astype(np.int32))
        else:
            for i, idx in enumerate(gt_assignment[pos_index]):
                m = np.zeros((mask_size, mask_size), dtype=np.int32)
                # remind: shape of keypoints is (N, 17, 3), N is number of bbox, 17 is number of keypoints, 3 is (x, y, v)
                # v=0: unlabeled, v=1, labeled but invisible, v=2 labeled and visible

                # bbox's (y0, x0), (y1, x1)
                y0, x0, y1, x1 = list(map(int, sample_roi[i, :4]))
                kp = mask[idx]  # shape is (17, 3)
                # convert keypoints coordinate (y, x) into mask coordinate system [0, mask_size]x[0, mask_size]
                kp[:, :2] = (kp[:, :2] - [y0, x0]) / \
                    [max(y1 - y0, 1), max(x1 - x0, 1)] * mask_size
                # mask_size x mask_size 空間でどこにあるかをラベルとして扱う(あとでsoftmax cross entropyする)
                # -1でignoreされる
                keypoint_labels = np.zeros(kp.shape[0], dtype=np.int32)
                for j, r in enumerate(kp):
                    y, x, v = list(map(int, r))
                    if v == 2 and 0 <= y and y < mask_size and 0 <= x and x < mask_size:
                        keypoint_labels[j] = y * mask_size + x

                    else:
                        keypoint_labels[j] = -1

                gt_roi_mask.append(keypoint_labels)

        gt_roi_mask = xp.array(gt_roi_mask)

        if xp != np:
            sample_roi = cuda.to_gpu(sample_roi)
            gt_roi_loc = cuda.to_gpu(gt_roi_loc)
            gt_roi_label = cuda.to_gpu(gt_roi_label)
            gt_roi_mask = cuda.to_gpu(gt_roi_mask)
            sample_levels = cuda.to_gpu(sample_levels)
        return sample_roi, sample_levels, gt_roi_loc, gt_roi_label, gt_roi_mask
Exemplo n.º 12
0
def calc_detection_inria_prec_rec(
        pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels,
        gt_difficults=None,
        iou_thresh=0.5):
        
    pred_bboxes = iter(pred_bboxes)
    pred_labels = iter(pred_labels)
    pred_scores = iter(pred_scores)
    gt_bboxes = iter(gt_bboxes)
    gt_labels = iter(gt_labels)
    if gt_difficults is None:
        gt_difficults = itertools.repeat(None)
    else:
        gt_difficults = iter(gt_difficults)

    n_pos = defaultdict(int)
    score = defaultdict(list)
    match = defaultdict(list)

    for pred_bbox, pred_label, pred_score, gt_bbox, gt_label, gt_difficult in \
        six.moves.zip(
            pred_bboxes, pred_labels, pred_scores,
            gt_bboxes, gt_labels, gt_difficults):

        if gt_difficult is None:
            gt_difficult = np.zeros(gt_bbox.shape[0], dtype=bool)

        for l in np.unique(np.concatenate((pred_label, gt_label)).astype(int)):
            pred_mask_l = pred_label == l
            pred_bbox_l = pred_bbox[pred_mask_l]
            pred_score_l = pred_score[pred_mask_l]
            # sort by score
            order = pred_score_l.argsort()[::-1]
            pred_bbox_l = pred_bbox_l[order]
            pred_score_l = pred_score_l[order]

            gt_mask_l = gt_label == l
            gt_bbox_l = gt_bbox[gt_mask_l]
            gt_difficult_l = gt_difficult[gt_mask_l]

            n_pos[l] += np.logical_not(gt_difficult_l).sum()
            score[l].extend(pred_score_l)

            if len(pred_bbox_l) == 0:
                continue
            if len(gt_bbox_l) == 0:
                match[l].extend((0,) * pred_bbox_l.shape[0])
                continue

            # VOC evaluation follows integer typed bounding boxes.
            pred_bbox_l = pred_bbox_l.copy()
            pred_bbox_l[:, 2:] += 1
            gt_bbox_l = gt_bbox_l.copy()
            gt_bbox_l[:, 2:] += 1

            iou = bbox_iou(pred_bbox_l, gt_bbox_l)
            gt_index = iou.argmax(axis=1)
            # set -1 if there is no matching ground truth
            gt_index[iou.max(axis=1) < iou_thresh] = -1
            del iou

            selec = np.zeros(gt_bbox_l.shape[0], dtype=bool)
            for gt_idx in gt_index:
                if gt_idx >= 0:
                    if gt_difficult_l[gt_idx]:
                        match[l].append(-1)
                    else:
                        if not selec[gt_idx]:
                            match[l].append(1)
                        else:
                            match[l].append(0)
                    selec[gt_idx] = True
                else:
                    match[l].append(0)

    for iter_ in (
            pred_bboxes, pred_labels, pred_scores,
            gt_bboxes, gt_labels, gt_difficults):
        if next(iter_, None) is not None:
            raise ValueError('Length of input iterables need to be same.')

    n_fg_class = max(n_pos.keys()) + 1
    prec = [None] * n_fg_class
    rec = [None] * n_fg_class

    print("###n_pos.keys()= ", n_pos.keys())
    for l in n_pos.keys():
        score_l = np.array(score[l])
        match_l = np.array(match[l], dtype=np.int8)
        print("###score_l= ", len(score_l), score_l)
        print("###match_l= ", len(match_l), match_l)

        order = score_l.argsort()[::-1]
        match_l = match_l[order]

        tp = np.cumsum(match_l == 1)
        fp = np.cumsum(match_l == 0)
        
        print("###tp= ", len(tp), tp)
        print("###fp= ", len(fp), fp)
        print("###n_pos[l]= ", n_pos[l])

        # If an element of fp + tp is 0,
        # the corresponding element of prec[l] is nan.
        prec[l] = tp / (fp + tp)
        # If n_pos[l] is 0, rec[l] is None.
        if n_pos[l] > 0:
            rec[l] = tp / n_pos[l]

        print("###rec= ", len(rec[l]), rec[l])
    return prec, rec
Exemplo n.º 13
0
def mask_head_loss_pre(rois, roi_indices, gt_masks, gt_bboxes,
                       gt_head_labels, segm_size):
    """Loss function for Mask Head (pre).

    This function processes RoIs for :func:`mask_head_loss_post` by
    selecting RoIs for mask loss calculation and
    preparing ground truth network output.

    Args:
        rois (iterable of arrays): An iterable of arrays of
            shape :math:`(R_l, 4)`, where :math:`R_l` is the number
            of RoIs in the :math:`l`-th feature map.
        roi_indices (iterable of arrays): An iterable of arrays of
            shape :math:`(R_l,)`.
        gt_masks (iterable of arrays): An iterable of arrays whose shape is
            :math:`(R_n, H, W)`, where :math:`R_n` is the number of
            ground truth objects.
        gt_head_labels (iterable of arrays): An iterable of arrays of
            shape :math:`(R_l,)`. This is a collection of ground-truth
            labels assigned to :obj:`rois` during bounding box localization
            stage. The range of value is :math:`(0, n\_class - 1)`.
        segm_size (int): Size of the ground truth network output.

    Returns:
        tuple of four lists:
        :obj:`mask_rois`, :obj:`mask_roi_indices`,
        :obj:`gt_segms`, and :obj:`gt_mask_labels`.

        * **rois**: A list of arrays of shape :math:`(R'_l, 4)`, \
            where :math:`R'_l` is the number of RoIs in the :math:`l`-th \
            feature map.
        * **roi_indices**: A list of arrays of shape :math:`(R'_l,)`.
        * **gt_segms**: A list of arrays of shape :math:`(R'_l, M, M). \
            :math:`M` is the argument :obj:`segm_size`.
        * **gt_mask_labels**: A list of arrays of shape :math:`(R'_l,)` \
            indicating the classes of ground truth.
    """

    xp = cuda.get_array_module(*rois)

    n_level = len(rois)

    roi_levels = xp.hstack(
        xp.array((l,) * len(rois[l])) for l in range(n_level)).astype(np.int32)
    rois = xp.vstack(rois).astype(np.float32)
    roi_indices = xp.hstack(roi_indices).astype(np.int32)
    gt_head_labels = xp.hstack(gt_head_labels)

    index = (gt_head_labels > 0).nonzero()[0]
    mask_roi_levels = roi_levels[index]
    mask_rois = rois[index]
    mask_roi_indices = roi_indices[index]
    gt_mask_labels = gt_head_labels[index]

    gt_segms = xp.empty(
        (len(mask_rois), segm_size, segm_size), dtype=np.float32)
    for i in np.unique(cuda.to_cpu(mask_roi_indices)):
        gt_mask = gt_masks[i]
        gt_bbox = gt_bboxes[i]

        index = (mask_roi_indices == i).nonzero()[0]
        mask_roi = mask_rois[index]
        iou = bbox_iou(mask_roi, gt_bbox)
        gt_index = iou.argmax(axis=1)
        gt_segms[index] = xp.array(
            mask_to_segm(gt_mask, mask_roi, segm_size, gt_index))

    flag_masks = [mask_roi_levels == l for l in range(n_level)]
    mask_rois = [mask_rois[m] for m in flag_masks]
    mask_roi_indices = [mask_roi_indices[m] for m in flag_masks]
    gt_segms = [gt_segms[m] for m in flag_masks]
    gt_mask_labels = [gt_mask_labels[m] for m in flag_masks]
    return mask_rois, mask_roi_indices, gt_segms, gt_mask_labels
Exemplo n.º 14
0
def mask_voting(
        rois, mask_probs, cls_probs,
        n_class, H, W,
        score_thresh=0.7,
        nms_thresh=0.3,
        mask_merge_thresh=0.5,
        binary_thresh=0.4, max_num=100):

    mask_size = mask_probs.shape[-1]
    v_labels = np.empty((0, ), dtype=np.int32)
    v_masks = np.empty((0, mask_size, mask_size), dtype=np.float32)
    v_bboxes = np.empty((0, 4), dtype=np.float32)
    v_cls_probs = np.empty((0, ), dtype=np.float32)

    tmp_all_scores = np.empty((0, ), dtype=np.float32)
    tmp_cls_probs = []
    tmp_bbox = []
    for label in range(0, n_class):
        if label == 0:
            # l == 0 is background
            continue
        # non maximum suppression
        cls_prob_l = cls_probs[:, label]
        keep_indices = non_maximum_suppression(
            rois, nms_thresh, cls_prob_l, limit=max_num)
        bbox_l = rois[keep_indices]
        cls_prob_l = cls_prob_l[keep_indices]
        tmp_bbox.append(bbox_l)
        tmp_cls_probs.append(cls_prob_l)
        tmp_all_scores = np.concatenate((tmp_all_scores, cls_prob_l))

    sorted_all_scores = np.sort(tmp_all_scores)[::-1]
    keep_num = min(len(sorted_all_scores), max_num)
    thresh = max(sorted_all_scores[keep_num - 1], 1e-3)

    for label in range(0, n_class):
        if label == 0:
            continue
        bbox_l = tmp_bbox[label - 1]
        cls_prob_l = tmp_cls_probs[label - 1]
        keep_indices = np.where(cls_prob_l >= thresh)
        bbox_l = bbox_l[keep_indices]
        cls_prob_l = cls_prob_l[keep_indices]

        v_mask_l = np.empty((0, mask_size, mask_size), dtype=np.float32)
        v_bbox_l = np.empty((0, 4), dtype=np.float32)
        v_cls_prob_l = np.empty((0, ), dtype=np.float32)

        for i, bbox in enumerate(bbox_l):
            iou = bbox_iou(rois, bbox[np.newaxis, :])
            idx = np.where(iou >= mask_merge_thresh)[0]
            mask_weights = cls_probs[idx, label]
            mask_weights = mask_weights / mask_weights.sum()
            mask_prob_l = mask_probs[idx]
            rois_l = rois[idx]
            clipped_bbox, clipped_mask = mask_aggregation(
                rois_l, mask_prob_l, mask_weights, H, W, binary_thresh)
            if clipped_bbox is not None and clipped_mask is not None:
                clipped_mask = cv2.resize(
                    clipped_mask.astype(np.float32),
                    (mask_size, mask_size))
                v_mask_l = np.concatenate((v_mask_l, clipped_mask[None]))
                v_bbox_l = np.concatenate((v_bbox_l, clipped_bbox[None]))
                v_cls_prob_l = np.concatenate(
                    (v_cls_prob_l, cls_prob_l[i][None]))

        keep_indices = v_cls_prob_l > score_thresh
        v_mask_l = v_mask_l[keep_indices]
        v_bbox_l = v_bbox_l[keep_indices]
        v_cls_prob_l = v_cls_prob_l[keep_indices]

        v_label_l = np.repeat(label, v_bbox_l.shape[0])
        v_masks = np.concatenate((v_masks, v_mask_l))
        v_bboxes = np.concatenate((v_bboxes, v_bbox_l))
        v_labels = np.concatenate((v_labels, v_label_l))
        v_cls_probs = np.concatenate((v_cls_probs, v_cls_prob_l))
    return v_bboxes, v_masks, v_labels, v_cls_probs
Exemplo n.º 15
0
def calc_detection_voc_prec_rec(
        pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels,
        gt_difficults=None,
        iou_thresh=0.5):
    """Calculate precision and recall based on evaluation code of PASCAL VOC.

    This function calculates precision and recall of
    predicted bounding boxes obtained from a dataset which has :math:`N`
    images.
    The code is based on the evaluation code used in PASCAL VOC Challenge.

    Args:
        pred_bboxes (iterable of numpy.ndarray): An iterable of :math:`N`
            sets of bounding boxes.
            Its index corresponds to an index for the base dataset.
            Each element of :obj:`pred_bboxes` is a set of coordinates
            of bounding boxes. This is an array whose shape is :math:`(R, 4)`,
            where :math:`R` corresponds
            to the number of bounding boxes, which may vary among boxes.
            The second axis corresponds to
            :math:`y_{min}, x_{min}, y_{max}, x_{max}` of a bounding box.
        pred_labels (iterable of numpy.ndarray): An iterable of labels.
            Similar to :obj:`pred_bboxes`, its index corresponds to an
            index for the base dataset. Its length is :math:`N`.
        pred_scores (iterable of numpy.ndarray): An iterable of confidence
            scores for predicted bounding boxes. Similar to :obj:`pred_bboxes`,
            its index corresponds to an index for the base dataset.
            Its length is :math:`N`.
        gt_bboxes (iterable of numpy.ndarray): An iterable of ground truth
            bounding boxes
            whose length is :math:`N`. An element of :obj:`gt_bboxes` is a
            bounding box whose shape is :math:`(R, 4)`. Note that the number of
            bounding boxes in each image does not need to be same as the number
            of corresponding predicted boxes.
        gt_labels (iterable of numpy.ndarray): An iterable of ground truth
            labels which are organized similarly to :obj:`gt_bboxes`.
        gt_difficults (iterable of numpy.ndarray): An iterable of boolean
            arrays which is organized similarly to :obj:`gt_bboxes`.
            This tells whether the
            corresponding ground truth bounding box is difficult or not.
            By default, this is :obj:`None`. In that case, this function
            considers all bounding boxes to be not difficult.
        iou_thresh (float): A prediction is correct if its Intersection over
            Union with the ground truth is above this value..

    Returns:
        tuple of two lists:
        This function returns two lists: :obj:`prec` and :obj:`rec`.

        * :obj:`prec`: A list of arrays. :obj:`prec[l]` is precision \
            for class :math:`l`. If class :math:`l` does not exist in \
            either :obj:`pred_labels` or :obj:`gt_labels`, :obj:`prec[l]` is \
            set to :obj:`None`.
        * :obj:`rec`: A list of arrays. :obj:`rec[l]` is recall \
            for class :math:`l`. If class :math:`l` that is not marked as \
            difficult does not exist in \
            :obj:`gt_labels`, :obj:`rec[l]` is \
            set to :obj:`None`.

    """

    pred_bboxes = iter(pred_bboxes)
    pred_labels = iter(pred_labels)
    pred_scores = iter(pred_scores)
    gt_bboxes = iter(gt_bboxes)
    gt_labels = iter(gt_labels)
    if gt_difficults is None:
        gt_difficults = itertools.repeat(None)
    else:
        gt_difficults = iter(gt_difficults)

    n_pos = defaultdict(int)
    score = defaultdict(list)
    match = defaultdict(list)

    for pred_bbox, pred_label, pred_score, gt_bbox, gt_label, gt_difficult in \
        six.moves.zip(
            pred_bboxes, pred_labels, pred_scores,
            gt_bboxes, gt_labels, gt_difficults):

        if gt_difficult is None:
            gt_difficult = np.zeros(gt_bbox.shape[0], dtype=bool)

        for l in np.unique(np.concatenate((pred_label, gt_label)).astype(int)):
            pred_mask_l = pred_label == l
            pred_bbox_l = pred_bbox[pred_mask_l]
            pred_score_l = pred_score[pred_mask_l]
            # sort by score
            order = pred_score_l.argsort()[::-1]
            pred_bbox_l = pred_bbox_l[order]
            pred_score_l = pred_score_l[order]

            gt_mask_l = gt_label == l
            gt_bbox_l = gt_bbox[gt_mask_l]
            gt_difficult_l = gt_difficult[gt_mask_l]

            n_pos[l] += np.logical_not(gt_difficult_l).sum()
            score[l].extend(pred_score_l)

            if len(pred_bbox_l) == 0:
                continue
            if len(gt_bbox_l) == 0:
                match[l].extend((0,) * pred_bbox_l.shape[0])
                continue

            # VOC evaluation follows integer typed bounding boxes.
            pred_bbox_l = pred_bbox_l.copy()
            pred_bbox_l[:, 2:] += 1
            gt_bbox_l = gt_bbox_l.copy()
            gt_bbox_l[:, 2:] += 1

            iou = bbox_iou(pred_bbox_l, gt_bbox_l)
            gt_index = iou.argmax(axis=1)
            # set -1 if there is no matching ground truth
            gt_index[iou.max(axis=1) < iou_thresh] = -1
            del iou

            selec = np.zeros(gt_bbox_l.shape[0], dtype=bool)
            for gt_idx in gt_index:
                if gt_idx >= 0:
                    if gt_difficult_l[gt_idx]:
                        match[l].append(-1)
                    else:
                        if not selec[gt_idx]:
                            match[l].append(1)
                        else:
                            match[l].append(0)
                    selec[gt_idx] = True
                else:
                    match[l].append(0)

    for iter_ in (
            pred_bboxes, pred_labels, pred_scores,
            gt_bboxes, gt_labels, gt_difficults):
        if next(iter_, None) is not None:
            raise ValueError('Length of input iterables need to be same.')

    n_fg_class = max(n_pos.keys()) + 1
    prec = [None] * n_fg_class
    rec = [None] * n_fg_class

    for l in n_pos.keys():
        score_l = np.array(score[l])
        match_l = np.array(match[l], dtype=np.int8)

        order = score_l.argsort()[::-1]
        match_l = match_l[order]

        tp = np.cumsum(match_l == 1)
        fp = np.cumsum(match_l == 0)

        # If an element of fp + tp is 0,
        # the corresponding element of prec[l] is nan.
        prec[l] = tp / (fp + tp)
        # If n_pos[l] is 0, rec[l] is None.
        if n_pos[l] > 0:
            rec[l] = tp / n_pos[l]

    return prec, rec