示例#1
0
    def test_random_crop_with_bbox_constraints(self):
        img = np.random.randint(0, 256, size=(3, 480, 640)).astype(np.float32)
        bbox = generate_random_bbox(10, img.shape[1:], 0.1, 0.9)

        out, param = random_crop_with_bbox_constraints(img,
                                                       bbox,
                                                       min_scale=0.3,
                                                       max_scale=1,
                                                       max_aspect_ratio=2,
                                                       return_param=True)

        if param['constraint'] is None:
            np.testing.assert_equal(out, img)
        else:
            np.testing.assert_equal(out, img[:, param['y_slice'],
                                             param['x_slice']])

            self.assertGreaterEqual(out.size, img.size * 0.3 * 0.3)
            self.assertLessEqual(out.size, img.size * 1 * 1)

            # to ignore rounding error, add 1
            self.assertLessEqual(out.shape[1] / (out.shape[2] + 1),
                                 img.shape[1] / img.shape[2] * 2)
            self.assertLessEqual(out.shape[2] / (out.shape[1] + 1),
                                 img.shape[2] / img.shape[1] * 2)

            bb = np.array((param['y_slice'].start, param['x_slice'].start,
                           param['y_slice'].stop, param['x_slice'].stop))
            iou = bbox_iou(bb[np.newaxis], bbox)
            min_iou, max_iou = param['constraint']
            if min_iou:
                self.assertGreaterEqual(iou.min(), min_iou)
            if max_iou:
                self.assertLessEqual(iou.max(), max_iou)
示例#2
0
    def check(self, bbox_a, bbox_b, expected):
        iou = bbox_iou(bbox_a, bbox_b)

        self.assertIsInstance(iou, type(expected))
        np.testing.assert_equal(
            cuda.to_cpu(iou),
            cuda.to_cpu(expected))
    def __call__(self, *inputs):
        images, labels = inputs[:2]
        with cuda.Device(self.device):
            _, bboxes = self.link(images)

            bboxes = cuda.to_cpu(bboxes.data)
            labels = cuda.to_cpu(labels)

            xp = cuda.get_array_module(bboxes)

            bboxes = self.extract_corners(bboxes)
            bboxes = self.scale_bboxes(bboxes, Size._make(images.shape[-2:]))

            ious = bbox_iou(bboxes.data.copy(), xp.squeeze(labels))[xp.eye(len(bboxes)).astype(xp.bool)]
            mean_iou = ious.mean()

            reporter.report({'mean_iou': mean_iou})

            pred_bboxes = [bbox.data[xp.newaxis, ...].astype(xp.int32) for bbox in F.separate(bboxes, axis=0)]
            pred_scores = xp.ones((len(bboxes), 1))
            pred_labels = xp.zeros_like(pred_scores)

            gt_bboxes = [bbox.data[...] for bbox in F.separate(labels, axis=0)]
            gt_labels = xp.zeros_like(pred_scores)

            result = chainercv.evaluations.eval_detection_voc(
                pred_bboxes,
                pred_labels,
                pred_scores,
                gt_bboxes,
                gt_labels
            )

            reporter.report({'map': result['map']})
            reporter.report({'ap/sheep': result['ap'][0]})
    def _asign_gt_to_anchor(self, anchors, locs, confs, gt_bboxes, gt_labels):
        _anchors, _locs, _confs = [], [], []
        _gt_labels, _gt_bboxes = [], []
        for anchor, loc, conf, gt_bbox, gt_label in zip(
                anchors, locs, confs, gt_bboxes, gt_labels):
            if gt_label.shape[0] > 0:
                iou = bbox_iou(anchor, gt_bbox)
                max_iou = self.xp.max(iou, axis=-1)
                max_iou_indices = self.xp.argmax(iou, axis=-1)
            else:  # guard no annotation
                max_iou = self.xp.zeros(conf.shape[0], self.xp.float32)
                max_iou_indices = self.xp.empty(conf.shape[0], self.xp.float32)

            fg_mask = max_iou > self._fg_thresh
            bg_mask = max_iou < self._bg_thresh
            n_bg = self.xp.where(bg_mask)[0].shape[0]
            max_iou_indices_fg = max_iou_indices[fg_mask]

            _gt_label_fg = self.xp.array(
                [gt_label[i] + 1 for i in max_iou_indices_fg], self.xp.int32)

            _gt_bbox_fg = self.xp.array(
                [gt_bbox[i] for i in max_iou_indices_fg], self.xp.float32)
            if _gt_bbox_fg.shape[0] == 0:  # guard not fg anchor
                _gt_bbox_fg = self.xp.empty((0, 4), self.xp.float32)

            _anchors.append(F.vstack((anchor[fg_mask], anchor[bg_mask])))
            _locs.append(F.vstack((loc[fg_mask], loc[bg_mask])))
            _confs.append(F.vstack((conf[fg_mask], conf[bg_mask])))
            _gt_bboxes.append(
                self.xp.vstack((_gt_bbox_fg, self.xp.zeros((n_bg, 4)))))
            _gt_labels.append(
                self.xp.hstack((_gt_label_fg, self.xp.zeros(n_bg))))

        return _anchors, _locs, _confs, _gt_bboxes, _gt_labels
示例#5
0
def merge_entries(entry1, entry2, thresh):
    bbox1, score1 = entry1['bbox'], entry1['score']
    bbox2, score2 = entry2['bbox'], entry2['score']
    bbox = np.concatenate((bbox1, bbox2), axis=0)
    score = np.concatenate((score1, score2), axis=0)
    if len(score) == 0:
        return bbox, score
    order = score.argsort()[::-1]
    bbox = bbox[order]
    score = score[order]

    iou = bbox_iou(bbox, bbox)
    iou *= 1 - np.eye(len(bbox))  # ignore IoU with itself
    new_bbox = []
    new_score = []
    for i in range(len(bbox)):
        max_iou = iou[i].max()
        if max_iou <= thresh:
            new_bbox.append(bbox[i])
            new_score.append(score[i])
        else:
            max_index = iou[i].argmax()
            if max_index > i:
                new_bbox.append(get_bbox_intersection(bbox[i],
                                                      bbox[max_index]))
                new_score.append(score[i])
    new_bbox = np.array(new_bbox, dtype=np.float32).reshape(-1, 4)
    new_score = np.array(new_score, dtype=np.float32)
    return new_bbox, new_score
示例#6
0
def crop_with_bbox_constraints(
        img, bbox, crop_width=None, crop_height=None, constraints=None,
        max_trial=10, return_param=False):
    if constraints is None:
        constraints = (
            (0.1, None),
            (0.3, None),
            (0.5, None),
            (0.7, None),
            (0.9, None),
            (None, 1),
        )

    _, H, W = img.shape

    crop_h = int(crop_height)
    crop_w = int(crop_width)

    diff_h = int((H - crop_h) / 2.)
    diff_w = int((W - crop_w) / 2.)

    params = [{
        'constraint': None, 'y_slice': slice(diff_h, diff_h + crop_h),
        'x_slice': slice(diff_w, diff_w + crop_w)}]

    if len(bbox) == 0:
        constraints = list()

    range_H = H - crop_h
    range_W = W - crop_w

    for min_iou, max_iou in constraints:
        if min_iou is None:
            min_iou = 0
        if max_iou is None:
            max_iou = 1

        for _ in six.moves.range(max_trial):
            crop_t = 0 if range_H == 0 else random.randrange(range_H)
            crop_l = 0 if range_W == 0 else random.randrange(range_W)
            crop_bb = np.array((
                crop_t, crop_l, crop_t + crop_h, crop_l + crop_w))

            iou = utils.bbox_iou(bbox, crop_bb[np.newaxis])
            if min_iou < iou.min() and iou.max() <= max_iou:
                params.append({
                    'constraint': (min_iou, max_iou),
                    'y_slice': slice(crop_t, crop_t + crop_h),
                    'x_slice': slice(crop_l, crop_l + crop_w)})
                break

    param = random.choice(params)
    img = img[:, param['y_slice'], param['x_slice']]

    if return_param:
        return img, param
    else:
        return img
示例#7
0
    def calc_loss(self, image_size, predicted_grids, gt_bbox_points, objectness_scores):
        predicted_bbox_points = self.get_corners(predicted_grids, image_size, scale_to_image_size=False)

        # 1. transform box coordinates to aabb coordinates for determination of iou
        predicted_bbox_points = predicted_bbox_points[0], predicted_bbox_points[3], predicted_bbox_points[1], predicted_bbox_points[5]
        predicted_bbox_points = F.stack(predicted_bbox_points, axis=1)

        # 2. find best prediction area for each gt bbox
        gt_bboxes_to_use_for_loss = []
        positive_anchor_indices = self.xp.empty((0,), dtype=self.xp.int32)
        not_contributing_anchors = self.xp.empty((0,), dtype=self.xp.int32)
        for index, gt_bbox in enumerate(gt_bbox_points):
            # determine which bboxes are positive boxes as they have high iou with gt and also which bboxes are negative
            # this is also used to train objectness classification
            gt_bbox = self.xp.tile(gt_bbox[None, ...], (len(predicted_bbox_points), 1))

            ious = bbox_iou(gt_bbox, predicted_bbox_points.data)
            positive_boxes = self.xp.where((ious[0] >= 0.7))
            not_contributing_boxes = self.xp.where(self.xp.logical_and(0.3 < ious[0], ious[0] < 0.7))
            if len(positive_boxes[0]) == 0:
                best_iou_index = ious[0, :].argmax()
                positive_anchor_indices = self.xp.concatenate((positive_anchor_indices, best_iou_index[None, ...]), axis=0)
                gt_bboxes_to_use_for_loss.append(gt_bbox[0])
            else:
                positive_anchor_indices = self.xp.concatenate((positive_anchor_indices, positive_boxes[0]), axis=0)
                gt_bboxes_to_use_for_loss.extend(gt_bbox[:len(positive_boxes[0])])
            not_contributing_anchors = self.xp.concatenate((not_contributing_anchors, not_contributing_boxes[0]), axis=0)

        if len(gt_bboxes_to_use_for_loss) == 0:
            return Variable(self.xp.array(0, dtype=predicted_grids.dtype))

        gt_bboxes_to_use_for_loss = F.stack(gt_bboxes_to_use_for_loss)

        # filter predicted bboxes and only keep bboxes from those regions that actually contain a bbox
        predicted_bbox_points = F.get_item(predicted_bbox_points, positive_anchor_indices)

        # 3. calculate L1 loss for bbox regression
        loss = F.huber_loss(
            predicted_bbox_points,
            gt_bboxes_to_use_for_loss,
            1
        )

        # 4. calculate objectness loss
        objectness_labels = self.xp.zeros(len(objectness_scores), dtype=self.xp.int32)
        objectness_labels[not_contributing_anchors] = -1
        objectness_labels[positive_anchor_indices] = 1

        objectness_loss = F.softmax_cross_entropy(
            objectness_scores,
            objectness_labels,
            ignore_label=-1,
        )

        return F.mean(loss), objectness_loss
示例#8
0
 def rebase_sst(self, s_in, s_st, bboxes):
     _sst = []
     for sin, sst, bbox in zip(s_in, s_st, bboxes):
         n, h, w = sst.shape
         union_masks = np.empty((n, h, w), dtype=np.float32)
         for idx, s_mask in enumerate(sst):
             union_masks[idx] = np.bitwise_or(sin, s_mask)
         union_bboxes = mask_to_bbox(union_masks)
         iou = np.squeeze(bbox_iou(union_bboxes, np.array([bbox])))
         order = np.argsort(iou, axis=0)[::-1]
         _sst.append(sst[order])
     return _sst
示例#9
0
    def box_alignment(self, img, bboxes, masks, boxes):
        s_in, s_st = self.get_initial_sets(img, bboxes, masks, boxes)

        if len(s_in) == 0 or len(s_st) == 0:
            return [], [], []

        s_st = self.rebase_sst(s_in, s_st, bboxes)
        final_boxes = []
        final_masks = []
        added_superpixel_masks = []
        for bbox, sin, sst in zip(bboxes, s_in, s_st):
            s = sin
            if s.ndim == 0:
                continue
            assert len(sst) >= 1, "No straddling boxes are found"

            proc = 0
            new_superpixels = np.zeros_like(s)
            new_s = np.bitwise_or(s, sst[0])
            iou_old = bbox_iou(mask_to_bbox(np.array([s])),
                               np.array([bbox]))[0][0]
            iou_new = bbox_iou(mask_to_bbox(np.array([new_s])),
                               np.array([bbox]))[0][0]
            for sk in sst[1:]:
                if iou_old > iou_new:
                    break
                iou_old = iou_new
                s = new_s
                new_s = np.bitwise_or(s, sk)
                iou_new = bbox_iou(mask_to_bbox(np.array([new_s])),
                                   np.array([bbox]))[0][0]
                proc += 1
                new_superpixels = np.bitwise_or(new_superpixels, sk)
            final_masks.append(s)
            final_boxes.append(mask_to_bbox(np.array([s]))[-1])
            added_superpixel_masks.append(new_superpixels.astype(np.int32))
            if self.verbosity:
                print('No. of superpixels added: {:2d}'.format(proc))
        final_masks, final_boxes = np.array(final_masks), np.array(final_boxes)
        return final_boxes, final_masks, added_superpixel_masks
    def encode(self, bbox, label, iou_thresh=0.5):
        xp = self.xp

        if len(bbox) == 0:
            return (xp.zeros(self._default_bbox.shape, dtype=np.float32),
                    xp.zeros(self._default_bbox.shape[0], dtype=np.int32))

        iou = utils.bbox_iou(
            xp.hstack(
                (self._default_bbox[:, :2] - self._default_bbox[:, 2:] / 2,
                 self._default_bbox[:, :2] + self._default_bbox[:, 2:] / 2)),
            bbox)

        index = xp.empty(len(self._default_bbox), dtype=int)
        index[:] = -1  # background

        masked_iou = iou.copy()
        while True:
            i, j = xp.unravel_index(masked_iou.argmax(), masked_iou.shape)
            if masked_iou[i, j] < 1e-6:
                break
            index[i] = j
            masked_iou[i, :] = 0
            masked_iou[:, j] = 0

        mask = xp.logical_and(index < 0, iou.max(axis=1) >= iou_thresh)
        index[mask] = iou[mask].argmax(axis=1)

        mb_bbox = bbox[index].copy()
        mb_bbox[:, 2:] -= mb_bbox[:, :2]
        mb_bbox[:, :2] += mb_bbox[:, 2:] / 2

        mb_loc = xp.empty_like(mb_bbox)
        mb_loc[:, :2] = (mb_bbox[:, :2] - self._default_bbox[:, :2]) / \
                (self._variance[0] * self._default_bbox[:, 2:])
        mb_loc[:, 2:] = xp.log(mb_bbox[:, 2:] / self._default_bbox[:, 2:]) / \
                self._variance[1]

        mb_label = label[index] + 1
        mb_label[index < 0] = 0

        return mb_loc.astype(np.float32), mb_label.astype(np.int32)
示例#11
0
    def get_loss(self, 
                 g_bboxes, g_labels,
                 p_bboxes, p_confs, p_objs
                 ):
        """ Generate loss
        """
        b_loss = 0
        c_loss = 0
        p_loss = 0
        for g_bbox, g_label, p_bbox, p_conf, p_obj in zip(
            g_bboxes, g_labels, p_bboxes, p_confs, p_objs
            ):
            IoU = bbox_iou(g_bbox, p_bbox)
            pick = self.xp.argmax(IoU, axis=-1)
            p_bbox = p_bbox[pick]
            p_conf = p_conf[pick]
            p_obj = p_obj[pick]

            b_loss += F.sum((p_bbox - g_bbox) ** 2)
            c_loss += F.sum((p_conf - ))
def get_naive_zoom(image, paste_x, paste_y, stamp):
    zoom_ratio = random.random() * 10 + 0.3
    crop_width = min(stamp.width + zoom_ratio * stamp.width, image.width)
    crop_height = min(stamp.height + zoom_ratio * stamp.height, image.height)

    width_insert_ratio = random.random()
    height_insert_ratio = random.random()

    insert_max = [min(paste_x, image.width - crop_width), min(paste_y, image.height - crop_height)]
    insert_min = [max(paste_x + stamp.width - crop_width, 0), max(paste_y + stamp.height - crop_height, 0)]

    for i in range(2):
        if insert_max[i] < insert_min[i]:
            insert_max[i] = insert_min[i]

    insert_point = [int(mi + ratio * (ma - mi)) for mi, ma, ratio in zip(insert_min, insert_max, [width_insert_ratio, height_insert_ratio])]

    crop_bbox = [insert_point[0], insert_point[1], insert_point[0] + crop_width, insert_point[1] + crop_height]
    paste_bbox = np.array([paste_x, paste_y, paste_x + stamp.width, paste_y + stamp.height])
    stamp_with_background = image.crop(crop_bbox)

    iou = bbox_iou(np.array(crop_bbox)[None, ...], paste_bbox[None, ...])[0, 0]
    return stamp_with_background, iou
def get_iou_crop(image, paste_x, paste_y, stamp):
    global iou_index
    iou_index = (iou_index + 1) % len(iou_ranges)
    desired_iou = min(iou_ranges[iou_index % len(iou_ranges)] / 100, 1.0)

    num_retries = 0
    good_bbox_found = False
    while not good_bbox_found and num_retries < 200:
        paste_bbox = np.array([paste_x, paste_y, paste_x + stamp.width, paste_y + stamp.height])
        paste_bbox_size = paste_bbox[2:] - paste_bbox[:2]
        max_size_deviation = 1.0 - desired_iou

        for _ in range(200):
            if desired_iou < 0.3:
                crop_width = int(min(stamp.width + (1 - desired_iou) * 10 * stamp.width, image.width))
                crop_height = int(min(stamp.height + (1 - desired_iou) * 10 * stamp.height, image.height))
            else:
                crop_width = random.randint(
                    max(int(paste_bbox_size[0] - paste_bbox_size[0] * max_size_deviation), 1),
                    int(paste_bbox_size[0] + paste_bbox_size[0] * max_size_deviation)
                )
                crop_height = random.randint(
                    max(int(paste_bbox_size[1] - paste_bbox_size[1] * max_size_deviation), 1),
                    int(paste_bbox_size[1] + paste_bbox_size[1] * max_size_deviation)
                )

            crop_bbox = iou_crop(image, paste_bbox, crop_width, crop_height, desired_iou)

            ious = bbox_iou(crop_bbox[None, ...], paste_bbox[None, ...])[0]
            largest_iou = abs(np.max(ious))
            if desired_iou - 0.05 < largest_iou <= desired_iou:
                good_bbox_found = True
                break
        num_retries += 1
    if good_bbox_found is False:
        raise ValueError("No Good BBOX Found")
    return image.crop(crop_bbox), ious[0]
    def test_random_crop_with_bbox_constraints(self):
        img = np.random.randint(0, 256, size=(3, 480, 640)).astype(np.float32)
        bbox = generate_random_bbox(10, img.shape[1:], 0.1, 0.9)

        out, param = random_crop_with_bbox_constraints(
            img, bbox,
            min_scale=0.3, max_scale=1,
            max_aspect_ratio=2,
            return_param=True)

        if param['constraint'] is None:
            np.testing.assert_equal(out, img)
        else:
            np.testing.assert_equal(
                out, img[:, param['y_slice'], param['x_slice']])

            # to ignore rounding error, add 1
            self.assertGreaterEqual(
                out.shape[0] * (out.shape[1] + 1) * (out.shape[2] + 1),
                img.size * 0.3 * 0.3)
            self.assertLessEqual(out.size, img.size * 1 * 1)
            self.assertLessEqual(
                out.shape[1] / (out.shape[2] + 1),
                img.shape[1] / img.shape[2] * 2)
            self.assertLessEqual(
                out.shape[2] / (out.shape[1] + 1),
                img.shape[2] / img.shape[1] * 2)

            bb = np.array((
                param['y_slice'].start, param['x_slice'].start,
                param['y_slice'].stop, param['x_slice'].stop))
            iou = bbox_iou(bb[np.newaxis], bbox)
            min_iou, max_iou = param['constraint']
            if min_iou:
                self.assertGreaterEqual(iou.min(), min_iou)
            if max_iou:
                self.assertLessEqual(iou.max(), max_iou)
示例#15
0
    def test_bbox_iou_invalid(self):
        bbox_a = np.array(self.bbox_a, dtype=np.float32)
        bbox_b = np.array(self.bbox_b, dtype=np.float32)

        with self.assertRaises(IndexError):
            bbox_iou(bbox_a, bbox_b)
示例#16
0
    def check(self, bbox_a, bbox_b, expected):
        iou = bbox_iou(bbox_a, bbox_b)

        self.assertIsInstance(iou, type(expected))
        np.testing.assert_equal(cuda.to_cpu(iou), cuda.to_cpu(expected))
示例#17
0
def head_loss_pre(rois, roi_indices, std, bboxes, labels):
    thresh = 0.5
    batchsize_per_image = 512
    fg_ratio = 0.25

    xp = cuda.get_array_module(*rois)

    n_level = len(rois)
    roi_levels = xp.hstack(
        xp.array((l,) * len(rois[l])) for l in range(n_level)).astype(np.int32)
    rois = xp.vstack(rois).astype(np.float32)
    roi_indices = xp.hstack(roi_indices).astype(np.int32)

    rois_yx = (rois[:, 2:] + rois[:, :2]) / 2
    rois_hw = rois[:, 2:] - rois[:, :2]
    indices = np.unique(cuda.to_cpu(roi_indices))

    gt_locs = xp.empty_like(rois)
    gt_labels = xp.empty_like(roi_indices)
    for i in indices:
        mask = roi_indices == i

        if len(bboxes[i]) > 0:
            iou = utils.bbox_iou(rois[mask], bboxes[i])
            gt_index = iou.argmax(axis=1)

            gt_loc = bboxes[i][gt_index].copy()
        else:
            gt_loc = xp.empty_like(rois[mask])
        # tlbr -> yxhw
        gt_loc[:, 2:] -= gt_loc[:, :2]
        gt_loc[:, :2] += gt_loc[:, 2:] / 2
        # offset
        gt_loc[:, :2] = (gt_loc[:, :2] - rois_yx[mask]) / \
            rois_hw[mask] / std[0]
        gt_loc[:, 2:] = xp.log(gt_loc[:, 2:] / rois_hw[mask]) / std[1]

        if len(bboxes[i]) > 0:
            gt_label = labels[i][gt_index] + 1
            gt_label[iou.max(axis=1) < thresh] = 0
        else:
            gt_label = xp.zeros(int(mask.sum()), dtype=np.int32)

        fg_index = xp.where(gt_label > 0)[0]
        n_fg = int(batchsize_per_image * fg_ratio)
        if len(fg_index) > n_fg:
            gt_label[_choice(fg_index, size=len(fg_index) - n_fg)] = -1

        bg_index = xp.where(gt_label == 0)[0]
        n_bg = batchsize_per_image - int((gt_label > 0).sum())
        if len(bg_index) > n_bg:
            gt_label[_choice(bg_index, size=len(bg_index) - n_bg)] = -1

        gt_locs[mask] = gt_loc
        gt_labels[mask] = gt_label

    mask = gt_labels >= 0
    rois = rois[mask]
    roi_indices = roi_indices[mask]
    roi_levels = roi_levels[mask]
    gt_locs = gt_locs[mask]
    gt_labels = gt_labels[mask]

    masks = [roi_levels == l for l in range(n_level)]
    rois = [rois[mask] for mask in masks]
    roi_indices = [roi_indices[mask] for mask in masks]
    gt_locs = [gt_locs[mask] for mask in masks]
    gt_labels = [gt_labels[mask] for mask in masks]

    return rois, roi_indices, gt_locs, gt_labels
示例#18
0
def random_crop_with_bbox_constraints(
        img, bbox, min_scale=0.3, max_scale=1,
        max_aspect_ratio=2, constraints=None,
        max_trial=50, return_param=False):
    """Crop an image randomly with bounding box constraints.

    This data augmentation is used in training of
    Single Shot Multibox Detector [#]_. More details can be found in
    data augmentation section of the original paper.

    .. [#] Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy,
       Scott Reed, Cheng-Yang Fu, Alexander C. Berg.
       SSD: Single Shot MultiBox Detector. ECCV 2016.

    Args:
        img (~numpy.ndarray): An image array to be cropped. This is in
            CHW format.
        bbox (~numpy.ndarray): Bounding boxes used for constraints.
            The shape is :math:`(R, 4)`.
            :math:`R` is the number of bounding boxes.
        min_scale (float): The minimum ratio between a cropped
            region and the original image. The default value is :obj:`0.3`.
        max_scale (float): The maximum ratio between a cropped
            region and the original image. The default value is :obj:`1`.
        max_aspect_ratio (float): The maximum aspect ratio of cropped region.
            The default value is :obj:`2`.
        constaraints (iterable of tuples): An iterable of constraints.
            Each constraint should be :obj:`(min_iou, max_iou)` format.
            If you set :obj:`min_iou` or :obj:`max_iou` to :obj:`None`,
            it means not limited.
            If this argument is not specified, :obj:`((0.1, None), (0.3, None),
            (0.5, None), (0.7, None), (0.9, None), (None, 1))` will be used.
        max_trial (int): The maximum number of trials to be conducted
            for each constraint. If this function
            can not find any region that satisfies the constraint in
            :math:`max\_trial` trials, this function skips the constraint.
            The default value is :obj:`50`.
        return_param (bool): If :obj:`True`, this function returns
            information of intermediate values.

    Returns:
        ~numpy.ndarray or (~numpy.ndarray, dict):

        If :obj:`return_param = False`,
        returns an array :obj:`img` that is cropped from the input
        array.

        If :obj:`return_param = True`,
        returns a tuple whose elements are :obj:`img, param`.
        :obj:`param` is a dictionary of intermediate parameters whose
        contents are listed below with key, value-type and the description
        of the value.

        * **constraint** (*tuple*): The chosen constraint.
        * **y_slice** (*slice*): A slice in vertical direction used to crop \
            the input image.
        * **x_slice** (*slice*): A slice in horizontal direction used to crop \
            the input image.

    """

    if constraints is None:
        constraints = (
            (0.1, None),
            (0.3, None),
            (0.5, None),
            (0.7, None),
            (0.9, None),
            (None, 1),
        )

    _, H, W = img.shape
    params = [{
        'constraint': None, 'y_slice': slice(0, H), 'x_slice': slice(0, W)}]

    if len(bbox) == 0:
        constraints = []

    for min_iou, max_iou in constraints:
        if min_iou is None:
            min_iou = 0
        if max_iou is None:
            max_iou = 1

        for _ in six.moves.range(max_trial):
            scale = random.uniform(min_scale, max_scale)
            aspect_ratio = random.uniform(
                max(1 / max_aspect_ratio, scale * scale),
                min(max_aspect_ratio, 1 / (scale * scale)))
            crop_h = int(H * scale / np.sqrt(aspect_ratio))
            crop_w = int(W * scale * np.sqrt(aspect_ratio))

            crop_t = random.randrange(H - crop_h)
            crop_l = random.randrange(W - crop_w)
            crop_bb = np.array((
                crop_t, crop_l, crop_t + crop_h, crop_l + crop_w))

            iou = utils.bbox_iou(bbox, crop_bb[np.newaxis])
            if min_iou <= iou.min() and iou.max() <= max_iou:
                params.append({
                    'constraint': (min_iou, max_iou),
                    'y_slice': slice(crop_t, crop_t + crop_h),
                    'x_slice': slice(crop_l, crop_l + crop_w)})
                break

    param = random.choice(params)
    img = img[:, param['y_slice'], param['x_slice']]

    if return_param:
        return img, param
    else:
        return img
示例#19
0
def random_crop_with_bbox_constraints(img,
                                      bbox,
                                      min_scale=0.3,
                                      max_scale=1,
                                      max_aspect_ratio=2,
                                      constraints=None,
                                      max_trial=50,
                                      return_param=False):
    if constraints is None:
        constraints = (
            (0.1, None),
            (0.3, None),
            (0.5, None),
            (0.7, None),
            (0.9, None),
            (None, 1),
        )

    _, H, W = img.shape
    params = [{
        'constraint': None,
        'y_slice': slice(0, H),
        'x_slice': slice(0, W)
    }]

    if len(bbox) == 0:
        constraints = list()

    for min_iou, max_iou in constraints:
        if min_iou is None:
            min_iou = 0
        if max_iou is None:
            max_iou = 1

        for _ in six.moves.range(max_trial):
            if min_iou == 0 and max_iou == 1:
                # IOUを気にせず、bounding box全体を必ず含むような値を取る。
                scale = random.uniform(0.9, max_scale)
            else:
                scale = random.uniform(min_scale, max_scale)

            # scale = random.uniform(min_scale, max_scale)
            aspect_ratio = random.uniform(
                max(1 / max_aspect_ratio, scale * scale),
                min(max_aspect_ratio, 1 / (scale * scale)))
            crop_h = int(H * scale / np.sqrt(aspect_ratio))
            crop_w = int(W * scale * np.sqrt(aspect_ratio))

            crop_t = random.randrange(H - crop_h)
            crop_l = random.randrange(W - crop_w)
            crop_bb = np.array(
                (crop_t, crop_l, crop_t + crop_h, crop_l + crop_w))

            iou = utils.bbox_iou(bbox, crop_bb[np.newaxis])
            if min_iou < iou.min() and iou.max() <= max_iou:
                params.append({
                    'constraint': (min_iou, max_iou),
                    'y_slice': slice(crop_t, crop_t + crop_h),
                    'x_slice': slice(crop_l, crop_l + crop_w)
                })
                break

    param = random.choice(params)
    img = img[:, param['y_slice'], param['x_slice']]

    if return_param:
        return img, param
    else:
        return img
示例#20
0
def rpn_loss(locs, confs, anchors, sizes, bboxes):
    """Loss function for RPN.

     Args:
         locs (iterable of arrays): An iterable of arrays whose shape is
             :math:`(N, K_l, 4)`, where :math:`K_l` is the number of
             the anchor boxes of the :math:`l`-th level.
         confs (iterable of arrays): An iterable of arrays whose shape is
             :math:`(N, K_l)`.
         anchors (list of arrays): A list of arrays returned by
             :meth:`anchors`.
         sizes (list of tuples of two ints): A list of
             :math:`(H_n, W_n)`, where :math:`H_n` and :math:`W_n`
             are height and width of the :math:`n`-th image.
         bboxes (list of arrays): A list of arrays whose shape is
             :math:`(R_n, 4)`, where :math:`R_n` is the number of
             ground truth bounding boxes.

     Returns:
         tuple of two variables:
         :obj:`loc_loss` and :obj:`conf_loss`.
    """
    fg_thresh = 0.7
    bg_thresh = 0.3
    batchsize_per_image = 256
    fg_ratio = 0.25

    locs = F.concat(locs)
    confs = F.concat(confs)

    xp = cuda.get_array_module(locs.array, confs.array)

    anchors = xp.vstack(anchors)
    anchors_yx = (anchors[:, 2:] + anchors[:, :2]) / 2
    anchors_hw = anchors[:, 2:] - anchors[:, :2]

    loc_loss = 0
    conf_loss = 0
    for i in range(len(sizes)):
        if len(bboxes[i]) > 0:
            iou = utils.bbox_iou(anchors, bboxes[i])

            gt_loc = bboxes[i][iou.argmax(axis=1)].copy()
            # tlbr -> yxhw
            gt_loc[:, 2:] -= gt_loc[:, :2]
            gt_loc[:, :2] += gt_loc[:, 2:] / 2
            # offset
            gt_loc[:, :2] = (gt_loc[:, :2] - anchors_yx) / anchors_hw
            gt_loc[:, 2:] = xp.log(gt_loc[:, 2:] / anchors_hw)
        else:
            gt_loc = xp.empty_like(anchors)

        gt_label = xp.empty(len(anchors), dtype=np.int32)
        gt_label[:] = -1

        mask = xp.logical_and(anchors[:, :2] >= 0,
                              anchors[:, 2:] < xp.array(sizes[i])).all(axis=1)

        if len(bboxes[i]) > 0:
            gt_label[xp.where(mask)[0][(iou[mask] == iou[mask].max(
                axis=0)).any(axis=1)]] = 1
            gt_label[xp.logical_and(mask, iou.max(axis=1) >= fg_thresh)] = 1

        fg_index = xp.where(gt_label == 1)[0]
        n_fg = int(batchsize_per_image * fg_ratio)
        if len(fg_index) > n_fg:
            gt_label[choice(fg_index, size=len(fg_index) - n_fg)] = -1

        if len(bboxes[i]) > 0:
            bg_index = xp.where(
                xp.logical_and(mask,
                               iou.max(axis=1) < bg_thresh))[0]
        else:
            bg_index = xp.where(mask)[0]
        n_bg = batchsize_per_image - int((gt_label == 1).sum())
        if len(bg_index) > n_bg:
            gt_label[bg_index[xp.random.randint(len(bg_index), size=n_bg)]] = 0

        n_sample = (gt_label >= 0).sum()
        loc_loss += F.sum(
            smooth_l1(locs[i][gt_label == 1], gt_loc[gt_label == 1],
                      1 / 9)) / n_sample
        conf_loss += F.sum(F.sigmoid_cross_entropy(
            confs[i][gt_label >= 0], gt_label[gt_label >= 0], reduce='no')) \
            / n_sample

    loc_loss /= len(sizes)
    conf_loss /= len(sizes)

    return loc_loss, conf_loss
示例#21
0
    def encode(self, bbox, label, iou_thresh=0.5):
        """Encodes coordinates and classes of bounding boxes.

        This method encodes :obj:`bbox` and :obj:`label` to :obj:`mb_loc`
        and :obj:`mb_label`, which are used to compute multibox loss.

        Args:
            bbox (array): A float array of shape :math:`(R, 4)`,
                where :math:`R` is the number of bounding boxes in an image.
                Each bouding box is organized by
                :math:`(y_{min}, x_{min}, y_{max}, x_{max})`
                in the second axis.
            label (array) : An integer array of shape :math:`(R,)`.
                Each value indicates the class of the bounding box.
            iou_thresh (float): The threshold value to determine
                a default bounding box is assigned to a ground truth
                or not. The default value is :obj:`0.5`.

        Returns:
            tuple of two arrays:
            This method returns a tuple of two arrays,
            :obj:`(mb_loc, mb_label)`.

            * **mb_loc**: A float array of shape :math:`(K, 4)`, \
                where :math:`K` is the number of default bounding boxes.
            * **mb_label**: An integer array of shape :math:`(K,)`.

        """
        xp = self.xp

        if len(bbox) == 0:
            return (xp.zeros(self._default_bbox.shape, dtype=np.float32),
                    xp.zeros(self._default_bbox.shape[0], dtype=np.int32))

        iou = utils.bbox_iou(
            xp.hstack(
                (self._default_bbox[:, :2] - self._default_bbox[:, 2:] / 2,
                 self._default_bbox[:, :2] + self._default_bbox[:, 2:] / 2)),
            bbox)

        index = xp.empty(len(self._default_bbox), dtype=int)
        # -1 is for background
        index[:] = -1

        masked_iou = iou.copy()
        while True:
            i, j = _unravel_index(masked_iou.argmax(), masked_iou.shape)
            if masked_iou[i, j] <= 1e-6:
                break
            index[i] = j
            masked_iou[i, :] = 0
            masked_iou[:, j] = 0

        mask = xp.logical_and(index < 0, iou.max(axis=1) >= iou_thresh)
        index[mask] = iou[mask].argmax(axis=1)

        mb_bbox = bbox[index].copy()
        # (y_min, x_min, y_max, x_max) -> (y_min, x_min, height, width)
        mb_bbox[:, 2:] -= mb_bbox[:, :2]
        # (y_min, x_min, height, width) -> (center_y, center_x, height, width)
        mb_bbox[:, :2] += mb_bbox[:, 2:] / 2

        mb_loc = xp.empty_like(mb_bbox)
        mb_loc[:, :2] = (mb_bbox[:, :2] - self._default_bbox[:, :2]) / \
            (self._variance[0] * self._default_bbox[:, 2:])
        mb_loc[:, 2:] = xp.log(mb_bbox[:, 2:] / self._default_bbox[:, 2:]) / \
            self._variance[1]

        # [0, n_fg_class - 1] -> [1, n_fg_class]
        mb_label = label[index] + 1
        # 0 is for background
        mb_label[index < 0] = 0

        return mb_loc.astype(np.float32), mb_label.astype(np.int32)
示例#22
0
    def test_bbox_iou_invalid(self):
        bbox_a = np.array(self.bbox_a, dtype=np.float32)
        bbox_b = np.array(self.bbox_b, dtype=np.float32)

        with self.assertRaises(IndexError):
            bbox_iou(bbox_a, bbox_b)
示例#23
0
def random_crop_with_bbox_constraints(img,
                                      bbox,
                                      min_scale=0.3,
                                      max_scale=1,
                                      max_aspect_ratio=2,
                                      constraints=None,
                                      max_trial=50,
                                      return_param=False):
    """Crop an image randomly with bounding box constraints.

    This data augmentation is used in training of
    Single Shot Multibox Detector [#]_. More details can be found in
    data augmentation section of the original paper.

    .. [#] Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy,
       Scott Reed, Cheng-Yang Fu, Alexander C. Berg.
       SSD: Single Shot MultiBox Detector. ECCV 2016.

    Args:
        img (~numpy.ndarray): An image array to be cropped. This is in
            CHW format.
        bbox (~numpy.ndarray): Bounding boxes used for constraints.
            The shape is :math:`(R, 4)`.
            :math:`R` is the number of bounding boxes.
        min_scale (float): The minimum ratio between a cropped
            region and the original image. The default value is :obj:`0.3`.
        max_scale (float): The maximum ratio between a cropped
            region and the original image. The default value is :obj:`1`.
        max_aspect_ratio (float): The maximum aspect ratio of cropped region.
            The default value is :obj:`2`.
        constaraints (iterable of tuples): An iterable of constraints.
            Each constraint should be :obj:`(min_iou, max_iou)` format.
            If you set :obj:`min_iou` or :obj:`max_iou` to :obj:`None`,
            it means not limited.
            If this argument is not specified, :obj:`((0.1, None), (0.3, None),
            (0.5, None), (0.7, None), (0.9, None), (None, 1))` will be used.
        max_trial (int): The maximum number of trials to be conducted
            for each constraint. If this function
            can not find any region that satisfies the constraint in
            :math:`max\_trial` trials, this function skips the constraint.
            The default value is :obj:`50`.
        return_param (bool): If :obj:`True`, this function returns
            information of intermediate values.

    Returns:
        ~numpy.ndarray or (~numpy.ndarray, dict):

        If :obj:`return_param = False`,
        returns an array :obj:`img` that is cropped from the input
        array.

        If :obj:`return_param = True`,
        returns a tuple whose elements are :obj:`img, param`.
        :obj:`param` is a dictionary of intermediate parameters whose
        contents are listed below with key, value-type and the description
        of the value.

        * **constraint** (*tuple*): The chosen constraint.
        * **y_slice** (*slice*): A slice in vertical direction used to crop \
            the input image.
        * **x_slice** (*slice*): A slice in horizontal direction used to crop \
            the input image.

    """

    if constraints is None:
        constraints = (
            (0.1, None),
            (0.3, None),
            (0.5, None),
            (0.7, None),
            (0.9, None),
            (None, 1),
        )

    _, H, W = img.shape
    params = [{
        'constraint': None,
        'y_slice': slice(0, H),
        'x_slice': slice(0, W)
    }]

    if len(bbox) == 0:
        constraints = list()

    for min_iou, max_iou in constraints:
        if min_iou is None:
            min_iou = 0
        if max_iou is None:
            max_iou = 1

        for _ in six.moves.range(max_trial):
            scale = random.uniform(min_scale, max_scale)
            aspect_ratio = random.uniform(
                max(1 / max_aspect_ratio, scale * scale),
                min(max_aspect_ratio, 1 / (scale * scale)))
            crop_h = int(H * scale / np.sqrt(aspect_ratio))
            crop_w = int(W * scale * np.sqrt(aspect_ratio))

            crop_t = random.randrange(H - crop_h)
            crop_l = random.randrange(W - crop_w)
            crop_bb = np.array(
                (crop_t, crop_l, crop_t + crop_h, crop_l + crop_w))

            iou = utils.bbox_iou(bbox, crop_bb[np.newaxis])
            if min_iou <= iou.min() and iou.max() <= max_iou:
                params.append({
                    'constraint': (min_iou, max_iou),
                    'y_slice': slice(crop_t, crop_t + crop_h),
                    'x_slice': slice(crop_l, crop_l + crop_w)
                })
                break

    param = random.choice(params)
    img = img[:, param['y_slice'], param['x_slice']]

    if return_param:
        return img, param
    else:
        return img
示例#24
0
文件: rpn.py 项目: ml-lab/chainer-fpn
def rpn_loss(locs, confs, anchors, sizes, bboxes):
    fg_thresh = 0.7
    bg_thresh = 0.3
    batchsize_per_image = 256
    fg_ratio = 0.25

    locs = F.concat(locs)
    confs = F.concat(confs)

    xp = cuda.get_array_module(locs.array, confs.array)

    anchors = xp.vstack(anchors)
    anchors_yx = (anchors[:, 2:] + anchors[:, :2]) / 2
    anchors_hw = anchors[:, 2:] - anchors[:, :2]

    loc_loss = 0
    conf_loss = 0
    for i in range(len(sizes)):
        if len(bboxes[i]) > 0:
            iou = utils.bbox_iou(anchors, bboxes[i])

            gt_loc = bboxes[i][iou.argmax(axis=1)].copy()
            # tlbr -> yxhw
            gt_loc[:, 2:] -= gt_loc[:, :2]
            gt_loc[:, :2] += gt_loc[:, 2:] / 2
            # offset
            gt_loc[:, :2] = (gt_loc[:, :2] - anchors_yx) / anchors_hw
            gt_loc[:, 2:] = xp.log(gt_loc[:, 2:] / anchors_hw)
        else:
            gt_loc = xp.empty_like(anchors)

        gt_label = xp.empty(len(anchors), dtype=np.int32)
        gt_label[:] = -1

        mask = xp.logical_and(anchors[:, :2] >= 0,
                              anchors[:, 2:] < xp.array(sizes[i])).all(axis=1)

        if len(bboxes[i]) > 0:
            gt_label[xp.where(mask)[0][(iou[mask] == iou[mask].max(
                axis=0)).any(axis=1)]] = 1
            gt_label[xp.logical_and(mask, iou.max(axis=1) >= fg_thresh)] = 1

        fg_index = xp.where(gt_label == 1)[0]
        n_fg = int(batchsize_per_image * fg_ratio)
        if len(fg_index) > n_fg:
            gt_label[_choice(fg_index, size=len(fg_index) - n_fg)] = -1

        if len(bboxes[i]) > 0:
            bg_index = xp.where(
                xp.logical_and(mask,
                               iou.max(axis=1) < bg_thresh))[0]
        else:
            bg_index = xp.where(mask)[0]
        n_bg = batchsize_per_image - int((gt_label == 1).sum())
        if len(bg_index) > n_bg:
            gt_label[bg_index[xp.random.randint(len(bg_index), size=n_bg)]] = 0

        n_sample = (gt_label >= 0).sum()
        loc_loss += F.sum(
            smooth_l1(locs[i][gt_label == 1], gt_loc[gt_label == 1],
                      1 / 9)) / n_sample
        conf_loss += F.sum(F.sigmoid_cross_entropy(
            confs[i][gt_label >= 0], gt_label[gt_label >= 0], reduce='no')) \
            / n_sample

    loc_loss /= len(sizes)
    conf_loss /= len(sizes)

    return loc_loss, conf_loss
示例#25
0
def bbox_head_loss_pre(rois, roi_indices, std, bboxes, labels):
    """Loss function for Head (pre).

    This function processes RoIs for :func:`bbox_head_loss_post`.

    Args:
        rois (iterable of arrays): An iterable of arrays of
            shape :math:`(R_l, 4)`, where :math:`R_l` is the number
            of RoIs in the :math:`l`-th feature map.
        roi_indices (iterable of arrays): An iterable of arrays of
            shape :math:`(R_l,)`.
        std (tuple of floats): Two coefficients used for encoding
            bounding boxes.
        bboxes (list of arrays): A list of arrays whose shape is
            :math:`(R_n, 4)`, where :math:`R_n` is the number of
            ground truth bounding boxes.
        labels (list of arrays): A list of arrays whose shape is
            :math:`(R_n,)`.

     Returns:
         tuple of four lists:
         :obj:`rois`, :obj:`roi_indices`, :obj:`gt_locs`, and :obj:`gt_labels`.

          * **rois**: A list of arrays of shape :math:`(R'_l, 4)`, \
              where :math:`R'_l` is the number of RoIs in the :math:`l`-th \
              feature map.
          * **roi_indices**: A list of arrays of shape :math:`(R'_l,)`.
          * **gt_locs**: A list of arrays of shape :math:`(R'_l, 4) \
              indicating the bounding boxes of ground truth.
          * **roi_indices**: A list of arrays of shape :math:`(R'_l,)` \
              indicating the classes of ground truth.
    """

    thresh = 0.5
    batchsize_per_image = 512
    fg_ratio = 0.25

    xp = cuda.get_array_module(*rois)

    n_level = len(rois)
    roi_levels = xp.hstack(
        xp.array((l, ) * len(rois[l]))
        for l in range(n_level)).astype(np.int32)
    rois = xp.vstack(rois).astype(np.float32)
    roi_indices = xp.hstack(roi_indices).astype(np.int32)

    rois_yx = (rois[:, 2:] + rois[:, :2]) / 2
    rois_hw = rois[:, 2:] - rois[:, :2]
    indices = np.unique(cuda.to_cpu(roi_indices))

    gt_locs = xp.empty_like(rois)
    gt_labels = xp.empty_like(roi_indices)
    for i in indices:
        mask = roi_indices == i

        if len(bboxes[i]) > 0:
            iou = utils.bbox_iou(rois[mask], bboxes[i])
            gt_index = iou.argmax(axis=1)

            gt_loc = bboxes[i][gt_index].copy()
        else:
            gt_loc = xp.empty_like(rois[mask])
        # tlbr -> yxhw
        gt_loc[:, 2:] -= gt_loc[:, :2]
        gt_loc[:, :2] += gt_loc[:, 2:] / 2
        # offset
        gt_loc[:, :2] = (gt_loc[:, :2] - rois_yx[mask]) / \
            rois_hw[mask] / std[0]
        gt_loc[:, 2:] = xp.log(gt_loc[:, 2:] / rois_hw[mask]) / std[1]

        if len(bboxes[i]) > 0:
            gt_label = labels[i][gt_index] + 1
            gt_label[iou.max(axis=1) < thresh] = 0
        else:
            gt_label = xp.zeros(int(mask.sum()), dtype=np.int32)

        fg_index = xp.where(gt_label > 0)[0]
        n_fg = int(batchsize_per_image * fg_ratio)
        if len(fg_index) > n_fg:
            gt_label[choice(fg_index, size=len(fg_index) - n_fg)] = -1

        bg_index = xp.where(gt_label == 0)[0]
        n_bg = batchsize_per_image - int((gt_label > 0).sum())
        if len(bg_index) > n_bg:
            gt_label[choice(bg_index, size=len(bg_index) - n_bg)] = -1

        gt_locs[mask] = gt_loc
        gt_labels[mask] = gt_label

    mask = gt_labels >= 0
    rois = rois[mask]
    roi_indices = roi_indices[mask]
    roi_levels = roi_levels[mask]
    gt_locs = gt_locs[mask]
    gt_labels = gt_labels[mask]

    masks = [roi_levels == l for l in range(n_level)]
    rois = [rois[m] for m in masks]
    roi_indices = [roi_indices[m] for m in masks]
    gt_locs = [gt_locs[m] for m in masks]
    gt_labels = [gt_labels[m] for m in masks]

    return rois, roi_indices, gt_locs, gt_labels
示例#26
0
def iou_linear_assignment(bbox_a, bbox_b):
    iou = bbox_iou(bbox_a, bbox_b)
    indices = linear_assignment(-iou)
    return indices[:, 0], indices[:, 1]