def test_non_maximum_suppression_consistency(self):
        bbox = generate_random_bbox(6000, (600, 800), 32, 512)

        cpu_selec = non_maximum_suppression(bbox, 0.5)
        gpu_selec = non_maximum_suppression(cuda.to_gpu(bbox), 0.5)

        np.testing.assert_equal(cpu_selec, cuda.to_cpu(gpu_selec))
示例#2
0
    def test_non_maximum_suppression_consistency(self):
        bbox = generate_random_bbox(6000, (600, 800), 32, 512)

        cpu_selec = non_maximum_suppression(bbox, 0.5)
        gpu_selec = non_maximum_suppression(cuda.to_gpu(bbox), 0.5)

        np.testing.assert_equal(cpu_selec, cuda.to_cpu(gpu_selec))
 def check_non_maximum_suppression(self, bbox, threshold, expect):
     selec = non_maximum_suppression(bbox, threshold)
     self.assertIsInstance(selec, type(bbox))
     self.assertEqual(selec.dtype, np.int32)
     np.testing.assert_equal(
         cuda.to_cpu(selec),
         cuda.to_cpu(expect))
示例#4
0
def _suppress(raw_bbox, raw_score, nms_thresh, score_thresh):
    xp = cuda.get_array_module(raw_bbox, raw_score)

    bbox = []
    label = []
    score = []
    for l in range(raw_score.shape[1] - 1):
        bbox_l = raw_bbox[:, l + 1]
        score_l = raw_score[:, l + 1]

        mask = score_l >= score_thresh
        bbox_l = bbox_l[mask]
        score_l = score_l[mask]

        order = argsort(-score_l)
        bbox_l = bbox_l[order]
        score_l = score_l[order]
        indices = utils.non_maximum_suppression(bbox_l, nms_thresh)
        bbox_l = bbox_l[indices]
        score_l = score_l[indices]

        bbox.append(bbox_l)
        label.append(xp.array((l, ) * len(bbox_l)))
        score.append(score_l)

    bbox = xp.vstack(bbox).astype(np.float32)
    label = xp.hstack(label).astype(np.int32)
    score = xp.hstack(score).astype(np.float32)
    return bbox, label, score
示例#5
0
    def _suppress(self, raw_cls_bbox, raw_prob):
        bbox = list()
        label = list()
        score = list()
        # skip cls_id = 0 because it is the background class
        for l in range(1, self.n_class):
            cls_bbox_l = raw_cls_bbox.reshape((-1, self.n_class, 4))[:, l, :]
            prob_l = raw_prob[:, l]

            # thresholding by score
            keep = prob_l > self.score_thresh
            cls_bbox_l = cls_bbox_l[keep]
            prob_l = prob_l[keep]

            # thresholding by nms
            keep = non_maximum_suppression(
                cls_bbox_l, self.nms_thresh, prob_l)
            bbox.append(cls_bbox_l[keep])
            # The labels are in [0, self.n_class - 2].
            label.append((l - 1) * np.ones((len(keep),)))
            score.append(prob_l[keep])
        bbox = np.concatenate(bbox, axis=0).astype(np.float32)
        label = np.concatenate(label, axis=0).astype(np.int32)
        score = np.concatenate(score, axis=0).astype(np.float32)
        return bbox, label, score
示例#6
0
文件: ssd.py 项目: bkartel1/chainercv
    def _suppress(self, raw_bbox, raw_score):
        xp = self.xp

        bbox = list()
        label = list()
        score = list()
        for l in range(self.n_fg_class):
            bbox_l = raw_bbox
            # the l-th class corresponds for the (l + 1)-th column.
            score_l = raw_score[:, l + 1]

            mask = score_l >= self.score_thresh
            bbox_l = bbox_l[mask]
            score_l = score_l[mask]

            if self.nms_thresh is not None:
                indices = utils.non_maximum_suppression(
                    bbox_l, self.nms_thresh, score_l)
                bbox_l = bbox_l[indices]
                score_l = score_l[indices]

            bbox.append(bbox_l)
            label.append(xp.array((l,) * len(bbox_l)))
            score.append(score_l)

        bbox = xp.vstack(bbox).astype(np.float32)
        label = xp.hstack(label).astype(np.int32)
        score = xp.hstack(score).astype(np.float32)

        return bbox, label, score
 def _suppress(self, raw_cls_bbox, raw_cls_roi, raw_prob, raw_mask):
     bbox = list()
     roi = list()
     label = list()
     score = list()
     mask = list()
     for l in range(1, self.n_class):
         cls_bbox_l = raw_cls_bbox.reshape((-1, self.n_class, 4))[:, l, :]
         cls_roi_l = raw_cls_roi.reshape((-1, self.n_class, 4))[:, l, :]
         prob_l = raw_prob[:, l]
         lmask = prob_l > self.score_thresh
         cls_bbox_l = cls_bbox_l[lmask]
         cls_roi_l = cls_roi_l[lmask]
         prob_l = prob_l[lmask]
         mask_l = raw_mask[:, l]
         mask_l = mask_l[lmask]
         keep = non_maximum_suppression(cls_bbox_l, self.nms_thresh, prob_l)
         bbox.append(cls_bbox_l[keep])
         roi.append(cls_roi_l[keep])
         #labels are in [0, self.nclass - 2].
         label.append((l - 1) * np.ones((len(keep), )))
         score.append(prob_l[keep])
         mask.append(mask_l[keep])
     bbox = np.concatenate(bbox, axis=0).astype(np.float32)
     roi = np.concatenate(roi, axis=0).astype(np.float32)
     label = np.concatenate(label, axis=0).astype(np.float32)
     score = np.concatenate(score, axis=0).astype(np.float32)
     mask = np.concatenate(mask, axis=0).astype(np.float32)
     return bbox, roi, label, score, mask
示例#8
0
    def _suppress(self, raw_cls_bbox, raw_prob, raw_roi, raw_level):
        bbox = []
        label = []
        score = []
        roi = []
        level = []
        # skip cls_id = 0 because it is the background class
        # -> maskは0から始まるから、l-1を使う
        # -> あーしまったTrainChainで最後のクラスToothBlushは範囲外になっておるわ・・
        for l in range(1, self.n_class):
            if self.predict_mask and l == self.n_class - 1:
                # まったく本質的でないのだか、maskを推定するときの学習でオフセットを間違えており、
                # l == self.n_class-1でindex out of boundsする?要検証
                continue
            cls_bbox_l = raw_cls_bbox.reshape((-1, self.n_class, 4))[:, l, :]
            prob_l = raw_prob[:, l]
            mask = prob_l > self.score_thresh
            cls_bbox_l = cls_bbox_l[mask]
            prob_l = prob_l[mask]
            keep = non_maximum_suppression(cls_bbox_l, self.nms_thresh, prob_l)
            bbox.append(cls_bbox_l[keep])
            # The labels are in [0, self.n_class - 2].
            label.append((l - 1) * np.ones((len(keep), )))
            score.append(prob_l[keep])
            raw_roi_l = raw_roi[:, l, :][mask]
            roi.append(raw_roi_l[keep])
            level_l = raw_level[mask]
            level.append(level_l[keep])

        bbox = np.concatenate(bbox, axis=0).astype(np.float32)
        label = np.concatenate(label, axis=0).astype(np.int32)
        score = np.concatenate(score, axis=0).astype(np.float32)
        roi = np.concatenate(roi, axis=0)
        level = np.concatenate(level, axis=0).astype(np.int32)
        return bbox, label, score, roi, level
    def check_non_maximum_suppression_options(
            self, bbox, threshold, score, limit):
        # Pass all options to the tested function
        scored_selec = non_maximum_suppression(bbox, threshold, score, limit)
        self.assertIsInstance(scored_selec, type(bbox))

        # Reorder inputs befor passing it to the function.
        # Reorder the outputs according to scores.
        order = score.argsort()[::-1]
        reordered_selec = non_maximum_suppression(
            bbox[order], threshold, score=None, limit=None)
        reordered_selec = reordered_selec[:limit]
        reordered_selec = order[reordered_selec]

        np.testing.assert_equal(
            cuda.to_cpu(scored_selec), cuda.to_cpu(reordered_selec))
    def decode(self, mb_loc, mb_conf, nms_thresh, score_thresh):
        xp = self.xp

        mb_bbox = self._default_bbox.copy()
        mb_bbox[:, :2] += mb_loc[:, :2] * self._variance[
            0] * self._default_bbox[:, 2:]
        mb_bbox[:, 2:] *= xp.exp(mb_loc[:, 2:] * self._variance[1])

        mb_bbox[:, :2] -= mb_bbox[:, 2:] / 2
        mb_bbox[:, 2:] += mb_bbox[:, :2]

        if xp == np:
            mb_conf[mb_conf > 88.72] = 88.72  # avoid overflow
        mb_score = xp.exp(mb_conf)
        mb_score /= mb_score.sum(axis=1, keepdims=True)

        # intra-class non-maximum suppression
        bbox = []
        label = []
        score = []
        for l in range(mb_conf.shape[1] - 1):
            bbox_l = mb_bbox
            score_l = mb_score[:, l + 1]

            mask = score_l >= score_thresh
            bbox_l = bbox_l[mask]
            score_l = score_l[mask]

            indices = utils.non_maximum_suppression(bbox_l, nms_thresh,
                                                    score_l)
            bbox_l = bbox_l[indices]
            score_l = score_l[indices]

            bbox.append(bbox_l)
            label.append(xp.array((l, ) * len(bbox_l)))
            score.append(score_l)

        # inter-class non-maximum suppression
        bbox = xp.vstack(bbox)
        label = xp.hstack(label)
        score = xp.hstack(score)
        indices = utils.non_maximum_suppression(bbox, nms_thresh, score)
        bbox = bbox[indices].astype(np.float32)
        label = label[indices].astype(np.int32)
        score = score[indices].astype(np.float32)

        return bbox, label, score
示例#11
0
    def check_non_maximum_suppression_options(self, bbox, threshold, score,
                                              limit):
        # Pass all options to the tested function
        scored_selec = non_maximum_suppression(bbox, threshold, score, limit)
        self.assertIsInstance(scored_selec, type(bbox))

        # Reorder inputs befor passing it to the function.
        # Reorder the outputs according to scores.
        order = score.argsort()[::-1]
        reordered_selec = non_maximum_suppression(bbox[order],
                                                  threshold,
                                                  score=None,
                                                  limit=None)
        reordered_selec = reordered_selec[:limit]
        reordered_selec = order[reordered_selec]

        np.testing.assert_equal(cuda.to_cpu(scored_selec),
                                cuda.to_cpu(reordered_selec))
示例#12
0
def get_humans_by_feature(model,
                          feature_map,
                          detection_thresh=0.15,
                          min_num_keypoints=-1):
    resp, conf, x, y, w, h, e = feature_map
    start = time.time()
    delta = resp * conf
    K = len(model.keypoint_names)
    outW, outH = model.outsize
    ROOT_NODE = 0  # instance
    start = time.time()
    rx, ry = model.restore_xy(x, y)
    rw, rh = model.restore_size(w, h)
    ymin, ymax = ry - rh / 2, ry + rh / 2
    xmin, xmax = rx - rw / 2, rx + rw / 2
    bbox = np.array([ymin, xmin, ymax, xmax])
    bbox = bbox.transpose(1, 2, 3, 0)
    root_bbox = bbox[ROOT_NODE]
    score = delta[ROOT_NODE]
    candidate = np.where(score > detection_thresh)
    score = score[candidate]
    root_bbox = root_bbox[candidate]
    selected = non_maximum_suppression(bbox=root_bbox, thresh=0.3, score=score)
    root_bbox = root_bbox[selected]
    #logger.info('detect instance {:.5f}'.format(time.time() - start))
    start = time.time()

    humans = []
    e = e.transpose(0, 3, 4, 1, 2)
    ei = 0  # index of edges which contains ROOT_NODE as begin
    # alchemy_on_humans
    for hxw in zip(candidate[0][selected], candidate[1][selected]):
        human = {ROOT_NODE: bbox[(ROOT_NODE, hxw[0], hxw[1])]}  # initial
        for graph in DIRECTED_GRAPHS:
            eis, ts = graph
            i_h, i_w = hxw
            for ei, t in zip(eis, ts):
                index = (ei, i_h, i_w)  # must be tuple
                u_ind = np.unravel_index(np.argmax(e[index]), e[index].shape)
                j_h = i_h + u_ind[0] - model.local_grid_size[1] // 2
                j_w = i_w + u_ind[1] - model.local_grid_size[0] // 2
                if j_h < 0 or j_w < 0 or j_h >= outH or j_w >= outW:
                    break
                if delta[t, j_h, j_w] < detection_thresh:
                    break
                human[t] = bbox[(t, j_h, j_w)]
                i_h, i_w = j_h, j_w
        if min_num_keypoints <= len(human) - 1:
            humans.append(human)

    #logger.info('alchemy time {:.5f}'.format(time.time() - start))
    logger.info('num humans = {}'.format(len(humans)))
    return humans
示例#13
0
def mask_voting(
        rois, cls_probs, mask_probs,
        n_class, H, W,
        score_thresh=0.7,
        nms_thresh=0.3,
        mask_merge_thresh=0.5,
        binary_thresh=0.4):

    mask_size = mask_probs.shape[-1]
    v_labels = np.empty((0, ), dtype=np.int32)
    v_masks = np.empty((0, mask_size, mask_size), dtype=np.float32)
    v_bboxes = np.empty((0, 4), dtype=np.float32)
    v_cls_probs = np.empty((0, ), dtype=np.float32)

    for l in range(0, n_class - 1):
        # non maximum suppression
        cls_prob_l = cls_probs[:, l+1]
        thresh_mask = cls_prob_l >= 0.001
        bbox_l = rois[thresh_mask]
        cls_prob_l = cls_prob_l[thresh_mask]
        keep = non_maximum_suppression(
            bbox_l, nms_thresh, cls_prob_l, limit=100)
        bbox_l = bbox_l[keep]
        cls_prob_l = cls_prob_l[keep]

        n_bbox_l = len(bbox_l)
        v_mask_l = np.zeros((n_bbox_l, mask_size, mask_size))
        v_bbox_l = np.zeros((n_bbox_l, 4))

        for i, bbox in enumerate(bbox_l):
            iou = bbox_iou(rois, bbox[np.newaxis, :])
            idx = np.where(iou > mask_merge_thresh)[0]
            mask_weights = cls_probs[idx, l + 1]
            mask_weights = mask_weights / mask_weights.sum()
            mask_prob_l = mask_probs[idx]
            rois_l = rois[idx]
            orig_mask, v_bbox_l[i] = mask_aggregation(
                rois_l, mask_prob_l, mask_weights, H, W, binary_thresh)
            v_mask_l[i] = cv2.resize(
                orig_mask.astype(np.float32), (mask_size, mask_size))

        score_thresh_mask = cls_prob_l > score_thresh
        v_cls_prob_l = cls_prob_l[score_thresh_mask]
        v_mask_l = v_mask_l[score_thresh_mask]
        v_bbox_l = v_bbox_l[score_thresh_mask]
        v_label_l = np.repeat(l, v_bbox_l.shape[0])
        v_cls_probs = np.concatenate((v_cls_probs, v_cls_prob_l))
        v_masks = np.concatenate((v_masks, v_mask_l))
        v_bboxes = np.concatenate((v_bboxes, v_bbox_l))
        v_labels = np.concatenate((v_labels, v_label_l))
    return v_labels, v_masks, v_bboxes, v_cls_probs
示例#14
0
    def detect(
        self,
        image: Image.Image,
        nms_iou_threshold: float = 0.5
    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
        """Detect characters from the image."""
        img_w, img_h = image.size

        if img_w < img_h:
            w = self.image_min_side
            h = img_h * self.image_min_side / img_w
            h = 32 * int(round(h / 32))
        else:
            h = self.image_min_side
            w = img_w * self.image_min_side / img_h
            w = 32 * int(round(w / 32))

        image = image.resize((w, h), resample=Image.BILINEAR)

        img = np.asarray(image, dtype=np.float32).transpose(2, 0, 1)
        imgs = img.reshape(1, *img.shape)

        if self.xp != np:
            imgs = cuda.to_gpu(imgs)

        imgs = (imgs - 127.5) / 128.0
        with chainer.using_config('train', False), chainer.no_backprop_mode():

            heatmap = self(imgs)
            heatmap = heatmap.array
            heatmap[:-4] = _sigmoid(heatmap[:-4])

        bboxes, _, scores = heatmap_to_labeled_bboxes(heatmap,
                                                      self.score_threshold)
        bboxes, scores = bboxes[0], scores[0]

        hm_h, hm_w = heatmap.shape[2:4]

        bboxes[:, 0::2] *= img_w / hm_w
        bboxes[:, 1::2] *= img_h / hm_h

        keep = non_maximum_suppression(bboxes, nms_iou_threshold, score=scores)
        bboxes = bboxes[keep]
        scores = scores[keep]

        if self.xp != np:
            bboxes = cuda.to_cpu(bboxes)
            scores = cuda.to_cpu(scores)

        return bboxes, scores
示例#15
0
    def _filter_overlapping_bboxs(self, mb_boxs, mb_confs):
        confs = []
        labels = []
        for box, conf, label in zip(mb_boxs, mb_confs, self.gt_mb_labels):
            indices = utils.non_maximum_suppression(box, self.nms_thresh)

            confs.append(conf[indices])
            if chainer.cuda.available:
                labels.append(label[indices].get())
            else:
                labels.append(label[indices])
        confs = F.concat(confs, axis=0)
        labels = np.concatenate(labels)
        return zip(labels, confs)
示例#16
0
    def filter_overlapping_bboxs(self, mb_boxs, mb_confs, gt_labels):
        confs = []
        labels = []
        for box, conf, label in zip(mb_boxs, mb_confs, gt_labels):
            indices = non_maximum_suppression(box, 0.5)
            # Add more beautiful version of this nms-thresh

            confs.append(conf[indices])
            if chainer.cuda.available:
                labels.append(label[indices.get()])
            else:
                labels.append(label[indices])
        confs = F.concat(confs, axis=0)
        labels = np.concatenate(labels)
        return zip(labels, confs)
示例#17
0
    def _decode(self, loc, obj, conf):
        raw_bbox = self._default_bbox.copy()
        raw_bbox[:, :2] += 1 / (1 + self.xp.exp(-loc[:, :2]))
        raw_bbox[:, :2] *= self._step[:, None]
        raw_bbox[:, 2:] *= self.xp.exp(loc[:, 2:])
        raw_bbox[:, :2] -= raw_bbox[:, 2:] / 2
        raw_bbox[:, 2:] += raw_bbox[:, :2]

        obj = 1 / (1 + self.xp.exp(-obj))
        conf = 1 / (1 + self.xp.exp(-conf))
        raw_score = obj[:, None] * conf
        np_step = cuda.to_cpu(self._step)
        step_list = sorted(set(np_step), key=np_step.tolist().index)

        bbox = []
        label = []
        score = []

        layer_id = []
        for l in range(self.n_fg_class):
            bbox_l = raw_bbox
            score_l = raw_score[:, l]

            mask = score_l >= self.score_thresh
            bbox_l = bbox_l[mask]
            score_l = score_l[mask]
            step_l = self._step[mask]
            layer_id_l = np.array([step_list.index(step) for step in step_l])

            indices = utils.non_maximum_suppression(
                bbox_l, self.nms_thresh, score_l)
            bbox_l = bbox_l[indices]
            score_l = score_l[indices]
            layer_id_l = layer_id_l[cuda.to_cpu(indices)]

            bbox.append(bbox_l)
            label.append(self.xp.array((l,) * len(bbox_l)))
            score.append(score_l)
            layer_id.append(layer_id_l)
        bbox = self.xp.vstack(bbox).astype(np.float32)
        label = self.xp.hstack(label).astype(np.int32)
        score = self.xp.hstack(score).astype(np.float32)
        layer_id = self.xp.hstack(layer_id).astype(np.int32)
        return bbox, label, score, layer_id
示例#18
0
    def _suppress_each_box(self, raw_cls_bbox, raw_prob):
        xp = np  #model.xp
        bbox = []
        label = []
        prob = []
        best_class = raw_prob.argmax(axis=1)
        self.raw_cls_bbox = raw_cls_bbox
        if True:
            best_class = best_class[:len(raw_cls_bbox)]
            cls_bbox_l = raw_cls_bbox.reshape((-1, self.n_class, 4))

            cls_bbox_l = xp.array(
                [cls_bbox_l[i, item, :] for i, item in enumerate(best_class)])

            non_bg_mask = best_class > 0
            #prob_l = raw_prob[:, best_class]
            prob_l = xp.array(
                [raw_prob[i, item] for i, item in enumerate(best_class)])
            self.prob_l = prob_l

            mask = np.logical_and(prob_l > self.score_thresh, best_class > 0)
            cls_bbox_l = cls_bbox_l[mask]
            prob_l = prob_l[mask]
            #best_class_mask = best_class[mask]
            keep = non_maximum_suppression(cls_bbox_l, self.nms_thresh, prob_l)
            bbox.append(cls_bbox_l[keep])
            # The labels are in [0, self.n_class - 2].
            #label.append((l - 1) * np.ones((len(keep),)))

            self.out = prob_l
            self.best_class = best_class
            self.mask = mask
            self.keep = keep
            self.raw_prob = raw_prob

            label.append(best_class[mask][keep] - 1)

            prob.append(prob_l[keep])
        bbox = np.concatenate(bbox, axis=0).astype(np.float32)
        label = np.concatenate(label, axis=0).astype(np.int32)
        prob = np.concatenate(prob, axis=0).astype(np.float32)
        return bbox, label, prob
    def detect(self, image: Image.Image):

        # get all prediction results
        pred_bboxes_set = []
        pred_scores_set = []

        for detector in self.detectors:
            bboxes, scores = detector.detect(image)
            pred_bboxes_set.append(bboxes)
            pred_scores_set.append(scores)

        all_bboxes = np.concatenate(pred_bboxes_set)
        all_scores = np.concatenate(pred_scores_set)

        # apply NMS to obtain base bounding boxes for refinement
        keep = non_maximum_suppression(all_bboxes,
                                       thresh=0.3,
                                       score=all_scores)
        base_bboxes = all_bboxes[keep]
        base_scores = all_scores[keep]

        # get matched bboxes
        iou_mat = calc_iou_mat(base_bboxes, all_bboxes)
        match_mat = iou_mat >= 0.5

        # refine bboxes by bbox voting
        refined_bboxes = np.empty_like(base_bboxes)
        refined_scores = np.empty_like(base_scores)
        votes = np.sum(match_mat, axis=1)

        for i in range(len(base_bboxes)):
            match = match_mat[i]
            scores = all_scores[match]
            bboxes = all_bboxes[match]

            refined_bboxes[i] = np.sum(scores[:, None] * bboxes,
                                       axis=0) / scores.sum()
            refined_scores[i] = np.average(scores)

        min_votes_mask = votes >= self.min_votes
        return refined_bboxes[min_votes_mask], refined_scores[min_votes_mask]
示例#20
0
 def _suppress(self, raw_cls_bbox, raw_prob):
     bbox = []
     label = []
     score = []
     # skip cls_id = 0 because it is the background class
     for l in range(1, self.n_class):
         cls_bbox_l = raw_cls_bbox.reshape((-1, self.n_class, 4))[:, l, :]
         prob_l = raw_prob[:, l]
         mask = prob_l > self.score_thresh
         cls_bbox_l = cls_bbox_l[mask]
         prob_l = prob_l[mask]
         keep = non_maximum_suppression(
             cls_bbox_l, self.nms_thresh, prob_l)
         bbox.append(cls_bbox_l[keep])
         # The labels are in [0, self.n_class - 2].
         label.append((l - 1) * np.ones((len(keep),)))
         score.append(prob_l[keep])
     bbox = np.concatenate(bbox, axis=0).astype(np.float32)
     label = np.concatenate(label, axis=0).astype(np.int32)
     score = np.concatenate(score, axis=0).astype(np.float32)
     return bbox, label, score
示例#21
0
    def _decode(self, loc, obj, conf):
        raw_bbox = self._default_bbox.copy()
        raw_bbox[:, :2] += 1 / (1 + self.xp.exp(-loc[:, :2]))
        raw_bbox[:, 2:] *= self.xp.exp(loc[:, 2:])
        raw_bbox[:, :2] -= raw_bbox[:, 2:] / 2
        raw_bbox[:, 2:] += raw_bbox[:, :2]
        raw_bbox *= self.insize / self.extractor.grid

        obj = 1 / (1 + self.xp.exp(-obj))
        conf = self.xp.exp(conf)
        conf /= conf.sum(axis=1, keepdims=True)
        raw_score = obj[:, None] * conf

        bbox = []
        label = []
        score = []
        for l in range(self.n_fg_class):
            bbox_l = raw_bbox
            score_l = raw_score[:, l]

            mask = score_l >= self.score_thresh
            bbox_l = bbox_l[mask]
            score_l = score_l[mask]

            indices = utils.non_maximum_suppression(bbox_l, self.nms_thresh,
                                                    score_l)
            bbox_l = bbox_l[indices]
            score_l = score_l[indices]

            bbox.append(bbox_l)
            label.append(self.xp.array((l, ) * len(bbox_l)))
            score.append(score_l)

        bbox = self.xp.vstack(bbox).astype(np.float32)
        label = self.xp.hstack(label).astype(np.int32)
        score = self.xp.hstack(score).astype(np.float32)

        return bbox, label, score
示例#22
0
    def _decode(self, loc, conf):
        raw_bbox = self._default_bbox.copy()
        raw_bbox[:, :2] += 1 / (1 + self.xp.exp(-loc[:, :2]))
        raw_bbox[:, :2] *= self._step[:, None]
        raw_bbox[:, 2:] *= self.xp.exp(loc[:, 2:])
        raw_bbox[:, :2] -= raw_bbox[:, 2:] / 2
        raw_bbox[:, 2:] += raw_bbox[:, :2]

        conf = 1 / (1 + self.xp.exp(-conf))
        raw_score = conf[:, 0, None] * conf[:, 1:]

        bbox = []
        label = []
        score = []
        for l in range(self.n_fg_class):
            bbox_l = raw_bbox
            score_l = raw_score[:, l]

            mask = score_l >= self.score_thresh
            bbox_l = bbox_l[mask]
            score_l = score_l[mask]

            indices = utils.non_maximum_suppression(
                bbox_l, self.nms_thresh, score_l)
            bbox_l = bbox_l[indices]
            score_l = score_l[indices]

            bbox.append(cuda.to_cpu(bbox_l))
            label.append(np.array((l,) * len(bbox_l)))
            score.append(cuda.to_cpu(score_l))

        bbox = np.vstack(bbox).astype(np.float32)
        label = np.hstack(label).astype(np.int32)
        score = np.hstack(score).astype(np.float32)

        return bbox, label, score
示例#23
0
    def decode(self, mb_loc, mb_conf, nms_thresh=0.45, score_thresh=0.6):
        """Decodes back to coordinates and classes of bounding boxes.

        This method decodes :obj:`mb_loc` and :obj:`mb_conf` returned
        by a SSD network back to :obj:`bbox`, :obj:`label` and :obj:`score`.

        Args:
            mb_loc (array): A float array whose shape is
                :math:`(K, 4)`, :math:`K` is the number of
                default bounding boxes.
            mb_conf (array): A float array whose shape is
                :math:`(K, n\_fg\_class + 1)`.
            nms_thresh (float): The threshold value
                for :func:`~chainercv.utils.non_maximum_suppression`.
                The default value is :obj:`0.45`.
            score_thresh (float): The threshold value for confidence score.
                If a bounding box whose confidence score is lower than
                this value, the bounding box will be suppressed.
                The default value is :obj:`0.6`.

        Returns:
            tuple of three arrays:
            This method returns a tuple of three arrays,
            :obj:`(bbox, label, score)`.

            * **bbox**: A float array of shape :math:`(R, 4)`, \
                where :math:`R` is the number of bounding boxes in a image. \
                Each bouding box is organized by \
                :math:`(y_{min}, x_{min}, y_{max}, x_{max})` \
                in the second axis.
            * **label** : An integer array of shape :math:`(R,)`. \
                Each value indicates the class of the bounding box.
            * **score** : A float array of shape :math:`(R,)`. \
                Each value indicates how confident the prediction is.

        """
        xp = self.xp

        # (center_y, center_x, height, width)
        mb_bbox = self._default_bbox.copy()
        mb_bbox[:, :2] += mb_loc[:, :2] * self._variance[0] \
            * self._default_bbox[:, 2:]
        mb_bbox[:, 2:] *= xp.exp(mb_loc[:, 2:] * self._variance[1])

        # (center_y, center_x, height, width) -> (y_min, x_min, height, width)
        mb_bbox[:, :2] -= mb_bbox[:, 2:] / 2
        # (center_y, center_x, height, width) -> (y_min, x_min, y_max, x_max)
        mb_bbox[:, 2:] += mb_bbox[:, :2]

        # softmax
        mb_score = xp.exp(mb_conf)
        mb_score /= mb_score.sum(axis=1, keepdims=True)

        bbox = list()
        label = list()
        score = list()
        for l in range(mb_conf.shape[1] - 1):
            bbox_l = mb_bbox
            # the l-th class corresponds for the (l + 1)-th column.
            score_l = mb_score[:, l + 1]

            mask = score_l >= score_thresh
            bbox_l = bbox_l[mask]
            score_l = score_l[mask]

            if nms_thresh is not None:
                indices = utils.non_maximum_suppression(
                    bbox_l, nms_thresh, score_l)
                bbox_l = bbox_l[indices]
                score_l = score_l[indices]

            bbox.append(bbox_l)
            label.append(xp.array((l, ) * len(bbox_l)))
            score.append(score_l)

        bbox = xp.vstack(bbox).astype(np.float32)
        label = xp.hstack(label).astype(np.int32)
        score = xp.hstack(score).astype(np.float32)

        return bbox, label, score
示例#24
0
 def __call__(self, bbox, score):
     selc = non_maximum_suppression(bbox, self._thresh, score)
     return selc
 def check_non_maximum_suppression_zero_legnth_bbox(
         self, bbox, threshold):
     selec = non_maximum_suppression(bbox, threshold)
     self.assertIsInstance(selec, type(bbox))
     self.assertEqual(selec.shape, (0,))
def mask_voting(roi_cmask_prob,
                bbox,
                roi_cls_prob,
                size,
                score_thresh,
                nms_thresh,
                mask_merge_thresh,
                binary_thresh,
                limit=100,
                bg_label=0):
    """Refine mask probabilities by merging multiple masks.

    First, this function discard invalid masks with non maximum suppression.
    Then, it merges masks with weight calculated from class probabilities and
    iou.
    This function improves the mask qualities by merging overlapped masks
    predicted as the same object class.

    Here are notations used.
    * :math:`R'` is the total number of RoIs produced across batches.
    * :math:`L` is the number of classes excluding the background.
    * :math:`RH` is the height of pooled image.
    * :math:`RW` is the height of pooled image.

    Args:
        roi_cmask_prob (array): A mask probability array whose shape is
            :math:`(R, RH, RW)`.
        bbox (array): A bounding box array whose shape is
            :math:`(R, 4)`.
        cls_prob (array): A class probability array whose shape is
            :math:`(R, L + 1)`.
        size (tuple of int): Original image size.
        score_thresh (float): A threshold value of the class score.
        nms_thresh (float): A threshold value of non maximum suppression.
        mask_merge_thresh (float): A threshold value of the bounding box iou
            for mask merging.
        binary_thresh (float): A threshold value of mask score
            for mask merging.
        limit (int): The maximum number of outputs.
        bg_label (int): The id of the background label.

    Returns:
        array, array, array, array:
        * **v_cmask_prob**: Merged mask probability. Its shapes is \
            :math:`(N, RH, RW)`.
        * **v_bbox**: Bounding boxes for the merged masks. Its shape is \
            :math:`(N, 4)`.
        * **v_label**: Class labels for the merged masks. Its shape is \
            :math:`(N, )`.
        * **v_score**: Class probabilities for the merged masks. Its shape \
            is :math:`(N, )`.

    """

    roi_cmask_size = roi_cmask_prob.shape[1:]
    n_class = roi_cls_prob.shape[1]

    v_cmask_prob = []
    v_bbox = []
    v_label = []
    v_cls_prob = []

    cls_score = []
    cls_bbox = []

    for label in range(0, n_class):
        # background
        if label == bg_label:
            continue
        # non maximum suppression
        score_l = roi_cls_prob[:, label]
        keep_indices = non_maximum_suppression(bbox, nms_thresh, score_l)
        bbox_l = bbox[keep_indices]
        score_l = score_l[keep_indices]
        cls_bbox.append(bbox_l)
        cls_score.append(score_l)

    sorted_score = np.sort(np.concatenate(cls_score))[::-1]
    n_keep = min(len(sorted_score), limit)
    score_thresh = max(sorted_score[n_keep - 1], score_thresh)

    for label in range(0, n_class):
        # background
        if label == bg_label:
            continue
        bbox_l = cls_bbox[label - 1]
        score_l = cls_score[label - 1]
        keep_indices = np.where(score_l >= score_thresh)
        bbox_l = bbox_l[keep_indices]
        score_l = score_l[keep_indices]

        v_cmask_prob_l = []
        v_bbox_l = []
        v_score_l = []

        for i, bb in enumerate(bbox_l):
            iou = bbox_iou(bbox, bb[np.newaxis, :])
            keep_indices = np.where(iou >= mask_merge_thresh)[0]
            cmask_weight = roi_cls_prob[keep_indices, label]
            cmask_weight = cmask_weight / cmask_weight.sum()
            cmask_prob_i = roi_cmask_prob[keep_indices]
            bbox_i = bbox[keep_indices]
            m_cmask, m_bbox = _mask_aggregation(bbox_i, cmask_prob_i,
                                                cmask_weight, size,
                                                binary_thresh)
            if m_cmask is not None and m_bbox is not None:
                m_cmask = resize(m_cmask.astype(np.float32), roi_cmask_size)
                v_cmask_prob_l.append(m_cmask)
                v_bbox_l.append(m_bbox)
                v_score_l.append(score_l[i])

        if len(v_cmask_prob_l) > 0:
            v_cmask_prob_l = np.concatenate(v_cmask_prob_l)
            v_bbox_l = np.concatenate(v_bbox_l)
            v_score_l = np.array(v_score_l)

            v_label_l = np.repeat(label - 1, v_bbox_l.shape[0])
            v_label_l = v_label_l.astype(np.int32)
            v_cmask_prob.append(v_cmask_prob_l)
            v_bbox.append(v_bbox_l)
            v_label.append(v_label_l)
            v_cls_prob.append(v_score_l)

    if len(v_cmask_prob) > 0:
        v_cmask_prob = np.concatenate(v_cmask_prob)
        v_bbox = np.concatenate(v_bbox)
        v_label = np.concatenate(v_label)
        v_cls_prob = np.concatenate(v_cls_prob)
    else:
        v_cmask_prob = np.empty((0, roi_cmask_size[0], roi_cmask_size[1]))
        v_bbox = np.empty((0, 4))
        v_label = np.empty((0, ))
        v_cls_prob = np.empty((0, ))
    return v_cmask_prob, v_bbox, v_label, v_cls_prob
示例#27
0
    def decode(self,
               arm_loc,
               arm_conf,
               odm_loc,
               odm_conf,
               nms_thresh=0.45,
               score_thresh=0.6):
        xp = self.xp

        # (center_y, center_x, height, width)
        mb_bbox = self._default_bbox.copy()

        mb_bbox[:, :2] += arm_loc[:, :2] * self._variance[0] \
                          * self._default_bbox[:, 2:]
        mb_bbox[:, 2:] *= xp.exp(arm_loc[:, 2:] * self._variance[1])

        # Anchor refinement
        mb_bbox[:, :2] += odm_loc[:, :2] * self._variance[0] \
                          * mb_bbox[:, 2:]
        mb_bbox[:, 2:] *= xp.exp(odm_loc[:, 2:] * self._variance[1])

        # (center_y, center_x, height, width) -> (y_min, x_min, height, width)
        mb_bbox[:, :2] -= mb_bbox[:, 2:] / 2
        # (center_y, center_x, height, width) -> (y_min, x_min, y_max, x_max)
        mb_bbox[:, 2:] += mb_bbox[:, :2]

        # softmax
        mb_score = xp.exp(odm_conf)
        mb_score /= mb_score.sum(axis=1, keepdims=True)

        objectness = xp.exp(arm_conf)
        negativeness = xp.exp(1 - arm_conf)
        objectness /= objectness + negativeness

        # negative anchor filtering
        mb_score[objectness <= 0.01] = 0

        bbox = list()
        label = list()
        score = list()
        for l in range(odm_conf.shape[1] - 1):
            bbox_l = mb_bbox
            # the l-th class corresponds for the (l + 1)-th column.
            score_l = mb_score[:, l + 1]

            mask = score_l >= score_thresh
            bbox_l = bbox_l[mask]
            score_l = score_l[mask]

            if nms_thresh is not None:
                indices = utils.non_maximum_suppression(bbox_l,
                                                        nms_thresh,
                                                        score_l,
                                                        limit=400)
                bbox_l = bbox_l[indices]
                score_l = score_l[indices]

            bbox.append(bbox_l)
            label.append(xp.array((l, ) * len(bbox_l)))
            score.append(score_l)

        bbox = xp.vstack(bbox).astype(np.float32)
        label = xp.hstack(label).astype(np.int32)
        score = xp.hstack(score).astype(np.float32)

        return bbox, label, score
示例#28
0
 def check_non_maximum_suppression_zero_legnth_bbox(self, bbox, threshold):
     selec = non_maximum_suppression(bbox, threshold)
     self.assertIsInstance(selec, type(bbox))
     self.assertEqual(selec.shape, (0, ))
示例#29
0
文件: rpn.py 项目: ml-lab/chainer-fpn
    def decode(self, locs, confs, anchors, in_shape):
        if chainer.config.train:
            nms_limit_pre = self._train_nms_limit_pre
            nms_limit_post = self._train_nms_limit_post
        else:
            nms_limit_pre = self._test_nms_limit_pre
            nms_limit_post = self._test_nms_limit_post

        rois = []
        roi_indices = []
        for i in range(in_shape[0]):
            roi = []
            conf = []
            for l in range(len(self._scales)):
                loc_l = locs[l].array[i]
                conf_l = confs[l].array[i]

                roi_l = anchors[l].copy()
                # tlbr -> yxhw
                roi_l[:, 2:] -= roi_l[:, :2]
                roi_l[:, :2] += roi_l[:, 2:] / 2
                # offset
                roi_l[:, :2] += loc_l[:, :2] * roi_l[:, 2:]
                roi_l[:, 2:] *= self.xp.exp(
                    self.xp.minimum(loc_l[:, 2:], exp_clip))
                # yxhw -> tlbr
                roi_l[:, :2] -= roi_l[:, 2:] / 2
                roi_l[:, 2:] += roi_l[:, :2]
                # clip
                roi_l[:, :2] = self.xp.maximum(roi_l[:, :2], 0)
                roi_l[:, 2:] = self.xp.minimum(roi_l[:, 2:],
                                               self.xp.array(in_shape[2:]))

                order = _argsort(-conf_l)[:nms_limit_pre]
                roi_l = roi_l[order]
                conf_l = conf_l[order]

                mask = (roi_l[:, 2:] - roi_l[:, :2] > 0).all(axis=1)
                roi_l = roi_l[mask]
                conf_l = conf_l[mask]

                indices = utils.non_maximum_suppression(roi_l,
                                                        self._nms_thresh,
                                                        limit=nms_limit_post)
                roi_l = roi_l[indices]
                conf_l = conf_l[indices]

                roi.append(roi_l)
                conf.append(conf_l)

            roi = self.xp.vstack(roi).astype(np.float32)
            conf = self.xp.hstack(conf).astype(np.float32)

            order = _argsort(-conf)[:nms_limit_post]
            roi = roi[order]

            rois.append(roi)
            roi_indices.append(self.xp.array((i, ) * len(roi)))

        rois = self.xp.vstack(rois).astype(np.float32)
        roi_indices = self.xp.hstack(roi_indices).astype(np.int32)
        return rois, roi_indices
示例#30
0
 def check_non_maximum_suppression(self, bbox, threshold, expect):
     selec = non_maximum_suppression(bbox, threshold)
     self.assertIsInstance(selec, type(bbox))
     self.assertEqual(selec.dtype, np.int32)
     np.testing.assert_equal(cuda.to_cpu(selec), cuda.to_cpu(expect))
示例#31
0
def mask_voting(
        rois, mask_probs, cls_probs,
        n_class, H, W,
        score_thresh=0.7,
        nms_thresh=0.3,
        mask_merge_thresh=0.5,
        binary_thresh=0.4, max_num=100):

    mask_size = mask_probs.shape[-1]
    v_labels = np.empty((0, ), dtype=np.int32)
    v_masks = np.empty((0, mask_size, mask_size), dtype=np.float32)
    v_bboxes = np.empty((0, 4), dtype=np.float32)
    v_cls_probs = np.empty((0, ), dtype=np.float32)

    tmp_all_scores = np.empty((0, ), dtype=np.float32)
    tmp_cls_probs = []
    tmp_bbox = []
    for label in range(0, n_class):
        if label == 0:
            # l == 0 is background
            continue
        # non maximum suppression
        cls_prob_l = cls_probs[:, label]
        keep_indices = non_maximum_suppression(
            rois, nms_thresh, cls_prob_l, limit=max_num)
        bbox_l = rois[keep_indices]
        cls_prob_l = cls_prob_l[keep_indices]
        tmp_bbox.append(bbox_l)
        tmp_cls_probs.append(cls_prob_l)
        tmp_all_scores = np.concatenate((tmp_all_scores, cls_prob_l))

    sorted_all_scores = np.sort(tmp_all_scores)[::-1]
    keep_num = min(len(sorted_all_scores), max_num)
    thresh = max(sorted_all_scores[keep_num - 1], 1e-3)

    for label in range(0, n_class):
        if label == 0:
            continue
        bbox_l = tmp_bbox[label - 1]
        cls_prob_l = tmp_cls_probs[label - 1]
        keep_indices = np.where(cls_prob_l >= thresh)
        bbox_l = bbox_l[keep_indices]
        cls_prob_l = cls_prob_l[keep_indices]

        v_mask_l = np.empty((0, mask_size, mask_size), dtype=np.float32)
        v_bbox_l = np.empty((0, 4), dtype=np.float32)
        v_cls_prob_l = np.empty((0, ), dtype=np.float32)

        for i, bbox in enumerate(bbox_l):
            iou = bbox_iou(rois, bbox[np.newaxis, :])
            idx = np.where(iou >= mask_merge_thresh)[0]
            mask_weights = cls_probs[idx, label]
            mask_weights = mask_weights / mask_weights.sum()
            mask_prob_l = mask_probs[idx]
            rois_l = rois[idx]
            clipped_bbox, clipped_mask = mask_aggregation(
                rois_l, mask_prob_l, mask_weights, H, W, binary_thresh)
            if clipped_bbox is not None and clipped_mask is not None:
                clipped_mask = cv2.resize(
                    clipped_mask.astype(np.float32),
                    (mask_size, mask_size))
                v_mask_l = np.concatenate((v_mask_l, clipped_mask[None]))
                v_bbox_l = np.concatenate((v_bbox_l, clipped_bbox[None]))
                v_cls_prob_l = np.concatenate(
                    (v_cls_prob_l, cls_prob_l[i][None]))

        keep_indices = v_cls_prob_l > score_thresh
        v_mask_l = v_mask_l[keep_indices]
        v_bbox_l = v_bbox_l[keep_indices]
        v_cls_prob_l = v_cls_prob_l[keep_indices]

        v_label_l = np.repeat(label, v_bbox_l.shape[0])
        v_masks = np.concatenate((v_masks, v_mask_l))
        v_bboxes = np.concatenate((v_bboxes, v_bbox_l))
        v_labels = np.concatenate((v_labels, v_label_l))
        v_cls_probs = np.concatenate((v_cls_probs, v_cls_prob_l))
    return v_bboxes, v_masks, v_labels, v_cls_probs
示例#32
0
    def decode(self, locs, confs, anchors, in_shape):
        """Decodes back to coordinates of RoIs.

        This method decodes :obj:`locs` and :obj:`confs` returned
        by a FPN network back to :obj:`rois` and :obj:`roi_indices`.

        Args:
            locs (list of arrays): A list of arrays whose shape is
                :math:`(N, K_l, 4)`, where :math:`N` is the size of batch and
                :math:`K_l` is the number of the anchor boxes
                of the :math:`l`-th level.
            confs (list of arrays): A list of array whose shape is
                :math:`(N, K_l)`.
            anchors (list of arrays): Anchor boxes returned by :meth:`anchors`.
            in_shape (tuple of ints): The shape of input of array
                the feature extractor.

        Returns:
            tuple of two arrays:
            :obj:`rois` and :obj:`roi_indices`.

            * **rois**: An array of shape :math:`(R, 4)`, \
                where :math:`R` is the total number of RoIs in the given batch.
            * **roi_indices** : An array of shape :math:`(R,)`.
        """

        if chainer.config.train:
            nms_limit_pre = self._train_nms_limit_pre
            nms_limit_post = self._train_nms_limit_post
        else:
            nms_limit_pre = self._test_nms_limit_pre
            nms_limit_post = self._test_nms_limit_post

        rois = []
        roi_indices = []
        for i in range(in_shape[0]):
            roi = []
            conf = []
            for l in range(len(self._scales)):
                loc_l = locs[l].array[i]
                conf_l = confs[l].array[i]

                roi_l = anchors[l].copy()
                # tlbr -> yxhw
                roi_l[:, 2:] -= roi_l[:, :2]
                roi_l[:, :2] += roi_l[:, 2:] / 2
                # offset
                roi_l[:, :2] += loc_l[:, :2] * roi_l[:, 2:]
                roi_l[:, 2:] *= self.xp.exp(
                    self.xp.minimum(loc_l[:, 2:], exp_clip))
                # yxhw -> tlbr
                roi_l[:, :2] -= roi_l[:, 2:] / 2
                roi_l[:, 2:] += roi_l[:, :2]
                # clip
                roi_l[:, :2] = self.xp.maximum(roi_l[:, :2], 0)
                roi_l[:, 2:] = self.xp.minimum(roi_l[:, 2:],
                                               self.xp.array(in_shape[2:]))

                order = argsort(-conf_l)[:nms_limit_pre]
                roi_l = roi_l[order]
                conf_l = conf_l[order]

                mask = (roi_l[:, 2:] - roi_l[:, :2] > 0).all(axis=1)
                roi_l = roi_l[mask]
                conf_l = conf_l[mask]

                indices = utils.non_maximum_suppression(roi_l,
                                                        self._nms_thresh,
                                                        limit=nms_limit_post)
                roi_l = roi_l[indices]
                conf_l = conf_l[indices]

                roi.append(roi_l)
                conf.append(conf_l)

            roi = self.xp.vstack(roi).astype(np.float32)
            conf = self.xp.hstack(conf).astype(np.float32)

            order = argsort(-conf)[:nms_limit_post]
            roi = roi[order]

            rois.append(roi)
            roi_indices.append(self.xp.array((i, ) * len(roi)))

        rois = self.xp.vstack(rois).astype(np.float32)
        roi_indices = self.xp.hstack(roi_indices).astype(np.int32)
        return rois, roi_indices