def __call__(self, x, im_info):
        h, n = self.trunk(x), x.data.shape[0]
        rpn_cls_score = self.rpn_cls_score(h)
        c, hh, ww = rpn_cls_score.data.shape[1:]
        rpn_bbox_pred = self.rpn_bbox_pred(h)
        rpn_cls_score = F.reshape(rpn_cls_score, (n, 2, -1))

        # RoI Proposal
        rpn_cls_prob = F.softmax(rpn_cls_score)
        rpn_cls_prob_reshape = F.reshape(rpn_cls_prob, (n, c, hh, ww))
        rois = self.proposal_layer(
            rpn_cls_prob_reshape, rpn_bbox_pred, im_info, self.train)
        boxes = rois[:, 1:5] / im_info[0][2]
        rois = chainer.Variable(rois, volatile=not self.train)

        # RCNN
        pool5 = F.roi_pooling_2d(self.trunk.relu5_3_out, rois, 7, 7, 0.0625)
        fc6 = F.relu(self.fc6(pool5))
        fc7 = F.relu(self.fc7(fc6))
        self.scores = F.softmax(self.cls_score(fc7))

        box_deltas = self.bbox_pred(fc7).data
        pred_boxes = bbox_transform_inv(boxes, box_deltas)
        self.pred_boxes = clip_boxes(pred_boxes, im_info[0][:2])

        if self.train:
            # loss_cls = F.softmax_cross_entropy(cls_score, labels)
            # huber loss with delta=1 means SmoothL1Loss
            return None
        else:
            return self.scores, self.pred_boxes
Exemplo n.º 2
0
    def __call__(self, x, im_info):
        h, n = self.trunk(x), x.data.shape[0]
        rpn_cls_score = self.rpn_cls_score(h)
        c, hh, ww = rpn_cls_score.data.shape[1:]
        rpn_bbox_pred = self.rpn_bbox_pred(h)
        rpn_cls_score = F.reshape(rpn_cls_score, (n, 2, -1))

        # RoI Proposal
        rpn_cls_prob = F.softmax(rpn_cls_score)
        rpn_cls_prob_reshape = F.reshape(rpn_cls_prob, (n, c, hh, ww))
        rois = self.proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred,
                                   im_info, self.train)
        if self.gpu >= 0:
            rois = to_gpu(rois, device=self.gpu)
            im_info = to_gpu(im_info, device=self.gpu)
            with chainer.cuda.Device(self.gpu):
                boxes = rois[:, 1:5] / im_info[0][2]
        else:
            boxes = rois[:, 1:5] / im_info[0][2]
        rois = chainer.Variable(rois, volatile=not self.train)

        # RCNN
        pool5 = roi_pooling_2d(self.trunk.feature, rois, 7, 7, 0.0625)
        fc6 = F.relu(self.fc6(pool5))
        fc7 = F.relu(self.fc7(fc6))
        self.scores = F.softmax(self.cls_score(fc7))

        box_deltas = self.bbox_pred(fc7).data
        pred_boxes = bbox_transform_inv(boxes, box_deltas, self.gpu)
        self.pred_boxes = clip_boxes(pred_boxes, im_info[0][:2], self.gpu)

        if self.train:
            # loss_cls = F.softmax_cross_entropy(cls_score, labels)
            # huber loss with delta=1 means SmoothL1Loss
            return None
        else:
            return self.scores, self.pred_boxes
Exemplo n.º 3
0
    def __call__(self, rpn_cls_prob, rpn_bbox_pred, im_info, train):
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        pre_nms_topN = self.TRAIN_RPN_PRE_NMS_TOP_N \
            if train else self.TEST_RPN_PRE_NMS_TOP_N
        post_nms_topN = self.TRAIN_RPN_POST_NMS_TOP_N \
            if train else self.TEST_RPN_POST_NMS_TOP_N
        nms_thresh = self.RPN_NMS_THRESH
        min_size = self.RPN_MIN_SIZE

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs, which we want
        scores = to_cpu(rpn_cls_prob.data[:, self._num_anchors:, :, :])
        bbox_deltas = to_cpu(rpn_bbox_pred.data)
        im_info = im_info[0, :]

        # 1. Generate proposals from bbox deltas and shifted anchors
        height, width = scores.shape[-2:]

        # Enumerate all shifts
        shift_x = np.arange(0, width) * self._feat_stride
        shift_y = np.arange(0, height) * self._feat_stride
        shift_x, shift_y = np.asarray(np.meshgrid(shift_x, shift_y))
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                            shift_x.ravel(), shift_y.ravel())).transpose()

        # Enumerate all shifted anchors:
        #
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]
        anchors = self._anchors.reshape((1, A, 4)) + \
            shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        anchors = anchors.reshape((K * A, 4))

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #
        # bbox deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

        # Same story for the scores:
        #
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas, -1)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info[:2])

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        keep = _filter_boxes(proposals, min_size * im_info[2])
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        keep = nms(np.hstack((proposals, scores)), nms_thresh)
        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        proposals = proposals[keep, :]
        scores = scores[keep]

        # Output rois blob
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        rois = np.asarray(np.hstack((batch_inds, proposals)), dtype=np.float32)

        return rois
Exemplo n.º 4
0
    def __call__(self, rpn_cls_prob, rpn_bbox_pred, im_info, train):
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        pre_nms_topN = self.RPN_PRE_NMS_TOP_N if train else 6000
        post_nms_topN = self.RPN_POST_NMS_TOP_N if train else 300
        nms_thresh = self.RPN_NMS_THRESH
        min_size = self.RPN_MIN_SIZE

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs, which we want
        scores = to_cpu(rpn_cls_prob.data[:, self._num_anchors:, :, :])
        bbox_deltas = to_cpu(rpn_bbox_pred.data)
        im_info = im_info[0, :]

        # 1. Generate proposals from bbox deltas and shifted anchors
        height, width = scores.shape[-2:]

        # Enumerate all shifts
        shift_x = np.arange(0, width) * self._feat_stride
        shift_y = np.arange(0, height) * self._feat_stride
        shift_x, shift_y = np.asarray(np.meshgrid(shift_x, shift_y))
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                            shift_y.ravel())).transpose()

        # Enumerate all shifted anchors:
        #
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]
        anchors = self._anchors.reshape((1, A, 4)) + \
            shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        anchors = anchors.reshape((K * A, 4))

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #
        # bbox deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

        # Same story for the scores:
        #
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas, -1)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info[:2])

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        keep = _filter_boxes(proposals, min_size * im_info[2])
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        keep = nms(np.hstack((proposals, scores)), nms_thresh)
        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        proposals = proposals[keep, :]
        scores = scores[keep]

        # Output rois blob
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        rois = np.asarray(np.hstack((batch_inds, proposals)), dtype=np.float32)

        return rois
Exemplo n.º 5
0
    def __call__(self, x, im_info, gt_boxes=None):
        h = self.trunk(x)
        if isinstance(im_info, chainer.cuda.cupy.ndarray):
            im_info = chainer.cuda.cupy.asnumpy(im_info)
        if self.train:
            im_info = im_info.data
            gt_boxes = gt_boxes.data
            if isinstance(gt_boxes, chainer.cuda.cupy.ndarray):
                im_info = chainer.cuda.cupy.asnumpy(im_info)
                gt_boxes = chainer.cuda.cupy.asnumpy(gt_boxes)
            rpn_cls_loss, rpn_loss_bbox, rois = self.RPN(
                h, im_info, self.gpu, gt_boxes)
        else:
            rois = self.RPN(h, im_info, self.gpu, gt_boxes)

        if self.train:
            rois, labels, bbox_targets, bbox_inside_weights, \
                bbox_outside_weights = self.proposal_target_layer(
                    rois, gt_boxes)

        # Convert rois
        if self.gpu >= 0:
            rois = to_gpu(rois, device=self.gpu)
            im_info = to_gpu(im_info, device=self.gpu)
            with chainer.cuda.Device(self.gpu):
                boxes = rois[:, 1:5] / im_info[0][2]
        else:
            boxes = rois[:, 1:5] / im_info[0][2]

        # RCNN
        pool5 = roi_pooling_2d(
            self.trunk.feature, rois, 7, 7, self.spatial_scale)
        fc6 = F.dropout(F.relu(self.fc6(pool5)), train=self.train)
        fc7 = F.dropout(F.relu(self.fc7(fc6)), train=self.train)

        # Per class probability
        cls_score = self.cls_score(fc7)
        cls_prob = F.softmax(cls_score)

        # BBox predictions
        bbox_pred = self.bbox_pred(fc7)
        box_deltas = bbox_pred.data

        if self.train:
            if self.gpu >= 0:
                tg = lambda x: to_gpu(x, device=self.gpu)
                labels = tg(labels)
                bbox_targets = tg(bbox_targets)
                bbox_inside_weights = tg(bbox_inside_weights)
                bbox_outside_weights = tg(bbox_outside_weights)
            loss_cls = F.softmax_cross_entropy(cls_score, labels)
            labels = Variable(labels, volatile='off')
            bbox_targets = Variable(bbox_targets, volatile='off')
            loss_bbox = smooth_l1_loss(
                bbox_pred, bbox_targets, bbox_inside_weights,
                bbox_outside_weights, self.sigma)

            reporter.report({'rpn_loss_cls': rpn_cls_loss,
                             'rpn_loss_bbox': rpn_loss_bbox,
                             'loss_bbox': loss_bbox,
                             'loss_cls': loss_cls}, self)

            return rpn_cls_loss, rpn_loss_bbox, loss_bbox, loss_cls
        else:
            pred_boxes = bbox_transform_inv(boxes, box_deltas, self.gpu)
            pred_boxes = clip_boxes(pred_boxes, im_info[0][:2], self.gpu)

            return cls_prob, pred_boxes
Exemplo n.º 6
0
    def __call__(self, x, im_info, gt_boxes=None):
        h = self.trunk(x)
        if chainer.cuda.available \
                and isinstance(im_info, chainer.cuda.cupy.ndarray):
            im_info = chainer.cuda.cupy.asnumpy(im_info)
        if self.train:
            im_info = im_info.data
            gt_boxes = gt_boxes.data
            if isinstance(gt_boxes, chainer.cuda.cupy.ndarray):
                im_info = chainer.cuda.cupy.asnumpy(im_info)
                gt_boxes = chainer.cuda.cupy.asnumpy(gt_boxes)
            rpn_cls_loss, rpn_loss_bbox, rois = self.RPN(
                h, im_info, self.gpu, gt_boxes)
        else:
            rois = self.RPN(h, im_info, self.gpu, gt_boxes)

        if self.train:
            rois, labels, bbox_targets, bbox_inside_weights, \
                bbox_outside_weights = self.proposal_target_layer(
                    rois, gt_boxes)

        # Convert rois
        if self.gpu >= 0:
            rois = to_gpu(rois, device=self.gpu)
            im_info = to_gpu(im_info, device=self.gpu)
            with chainer.cuda.Device(self.gpu):
                boxes = rois[:, 1:5] / im_info[0][2]
        else:
            boxes = rois[:, 1:5] / im_info[0][2]

        # RCNN
        pool5 = roi_pooling_2d(self.trunk.feature, rois, 7, 7,
                               self.spatial_scale)
        fc6 = F.dropout(F.relu(self.fc6(pool5)), train=self.train)
        fc7 = F.dropout(F.relu(self.fc7(fc6)), train=self.train)

        # Per class probability
        cls_score = self.cls_score(fc7)
        cls_prob = F.softmax(cls_score)

        # BBox predictions
        bbox_pred = self.bbox_pred(fc7)
        box_deltas = bbox_pred.data

        if self.train:
            if self.gpu >= 0:
                tg = lambda x: to_gpu(x, device=self.gpu)
                labels = tg(labels)
                bbox_targets = tg(bbox_targets)
                bbox_inside_weights = tg(bbox_inside_weights)
                bbox_outside_weights = tg(bbox_outside_weights)
            loss_cls = F.softmax_cross_entropy(cls_score, labels)
            labels = Variable(labels, volatile='auto')
            bbox_targets = Variable(bbox_targets, volatile='auto')
            loss_bbox = smooth_l1_loss(bbox_pred, bbox_targets,
                                       bbox_inside_weights,
                                       bbox_outside_weights, self.sigma)

            reporter.report(
                {
                    'rpn_loss_cls': rpn_cls_loss,
                    'rpn_loss_bbox': rpn_loss_bbox,
                    'loss_bbox': loss_bbox,
                    'loss_cls': loss_cls
                }, self)

            return rpn_cls_loss, rpn_loss_bbox, loss_bbox, loss_cls
        else:
            pred_boxes = bbox_transform_inv(boxes, box_deltas, self.gpu)
            pred_boxes = clip_boxes(pred_boxes, im_info[0][:2], self.gpu)

            return cls_prob, pred_boxes