Пример #1
0
def demo_test(net, im, pyramid):
    """Detect object classes in an image using pre-computed object proposals."""

    # Detect all object classes and regress object bounds
    probs, boxes = detect_list(net, im, pyramid=pyramid)

    # Visualize detections for each class
    CONF_THRESH = 0.1
    NMS_THRESH = 0.3
    # for cls_ind, cls in enumerate(CLASSES[1:]):
    #     cls_ind += 1 # because we skipped background
    #     if cls_name == cls:
    #         cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
    #         cls_scores = scores[:, cls_ind]
    #         dets = np.hstack((cls_boxes,
    #                           cls_scores[:, np.newaxis])).astype(np.float32)
    #         keep = nms(dets, NMS_THRESH)
    #         dets = dets[keep, :]
    #         # vis_detections(im, cls, dets, thresh=CONF_THRESH)
    #         inds = np.where(dets[:, -1] >= CONF_THRESH)[0]
    #         dets = dets[inds]

    inds = np.where(probs[:, 0] > CONF_THRESH)[0]
    probs = probs[inds, 0]
    boxes = boxes[inds, :]
    dets = np.hstack((boxes, probs[:, np.newaxis])).astype(np.float32,
                                                           copy=False)
    keep = nms(dets, NMS_THRESH)
    dets = dets[keep, :]
    return dets
Пример #2
0
    def forward(self, img_path, i):
        im = cv2.imread(img_path)
        input_size = 500
        imageBuffer = np.zeros([input_size, input_size, 3])

        crop_y1 = random.randint(0, max(0, im.shape[0] - input_size))
        crop_x1 = random.randint(0, max(0, im.shape[1] - input_size))
        crop_y2 = min(im.shape[0] - 1, crop_y1 + input_size - 1)
        crop_x2 = min(im.shape[1] - 1, crop_x1 + input_size - 1)

        crop_h = crop_y2 - crop_y1 + 1
        crop_w = crop_x2 - crop_x1 + 1

        paste_y1 = random.randint(0, input_size - crop_h)
        paste_x1 = random.randint(0, input_size - crop_w)
        paste_y2 = paste_y1 + crop_h - 1
        paste_x2 = paste_x1 + crop_w - 1

        imageBuffer[paste_y1:paste_y2 + 1,
                    paste_x1:paste_x2 + 1, :] = im[crop_y1:crop_y2 + 1,
                                                   crop_x1:crop_x2 + 1, :]

        cv2.imwrite('input.jpg', imageBuffer)

        blob = imageBuffer[:, :, ::-1].transpose(2, 0, 1)
        blob = mx.nd.array(blob[np.newaxis, :, :, :])
        blob.copyto(self.exec_.arg_dict['data'])

        self.exec_.forward(is_train=False)

        outputs = [output.asnumpy() for output in self.exec_._get_outputs()]
        cls_map = outputs[0]
        reg_map = outputs[1]
        bbox_deltas = reg_map.transpose((0, 2, 3, 1)).reshape((-1, 4))
        scores = cls_map[0, 1:2, :, :].reshape(
            (1, 25, 63, 63))  # (1,1,1575,63)
        scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

        proposals = bbox_transform_inv(self.anchors, bbox_deltas)
        #proposals = self.anchors
        #draw_boxes(imageBuffer, proposals[:100], 'res1')
        order = scores.ravel().argsort()[::-1]
        order = order[:6000]
        scores = scores[order]
        proposals = proposals[order, :]
        keep = nms(np.hstack((proposals, scores)), 0.05)

        keep = keep[:300]
        proposals = proposals[keep, :]
        scores = scores[keep]

        keep = np.where(scores > 0.4)[0]
        proposals = proposals[keep, :]
        scores = scores[keep]

        draw_boxes(imageBuffer, proposals, 'res_{}'.format(i))
Пример #3
0
 def nms(self, nms_threshold):
     # Non-max suppression
     for key_record in range(len(self.content)):
         if self.content[key_record]['rois'] != np.array([]):
             keep = nms(torch.cat((torch.from_numpy(self.content[key_record]['rois']).float(), 
                         torch.from_numpy(self.content[key_record]['scores']).unsqueeze(1).float()), 1), nms_threshold)
             ind = keep.numpy()
             self.content[key_record]['scores'] = self.content[key_record]['scores'][ind]
             self.content[key_record]['rois'] = self.content[key_record]['rois'][ind]
             self.content[key_record]['class_ids'] = self.content[key_record]['class_ids'][ind]
Пример #4
0
def nms_cuda(boxes_np, nms_thresh=0.7, xyxy=True):
    if xyxy:
        x1, y1, x2, y2, scores = np.split(boxes_np, 5, axis=1)
        boxes_np = np.hstack([y1, x1, y2, x2, scores])
    boxes_pth = torch.from_numpy(boxes_np).float().cuda()
    pick = nms(boxes_pth, nms_thresh)
    pick = pick.cpu().data.numpy()
    if len(pick.shape) == 2:
        pick = pick.squeeze()
    return pick
Пример #5
0
def temporal_nms(bboxes, thresh, score_ind=3):
    """
    One-dimensional non-maximal suppression
    :param bboxes: [[st, ed, cls, score], ...]
    :param thresh:
    :return:
    """
    if not nms:
        return temporal_nms_fallback(bboxes, thresh, score_ind=score_ind)
    else:
        keep = nms(np.array([[x[0], x[1], x[3]] for x in bboxes]), thresh, device_id=0)
        return [bboxes[i] for i in keep]
Пример #6
0
def detect_im(net, im, thresh=0.05):
    im_scale = _compute_scaling_factor(im.shape,cfg.TEST.SCALES[0],cfg.TEST.MAX_SIZE)
    im_blob = _get_image_blob(im,[im_scale])
    probs, boxes = forward_net(net,im_blob[0],im_scale,False)
    boxes = boxes[:, 0:4]

    inds = np.where(probs[:, 0] > thresh)[0]
    probs = probs[inds, 0]
    boxes = boxes[inds, :]
    dets = np.hstack((boxes, probs[:, np.newaxis])) \
            .astype(np.float32, copy=False)
    keep = nms(dets, cfg.TEST.NMS_THRESH)
    cls_dets = dets[keep, :]
    return cls_dets
Пример #7
0
def detect_im(net, im, thresh=0.05):
    im_scale = _compute_scaling_factor(im.shape, cfg.TEST.SCALES[0],
                                       cfg.TEST.MAX_SIZE)
    im_blob = _get_image_blob(im, [im_scale])
    probs, boxes = forward_net(net, im_blob[0], im_scale, False)
    boxes = boxes[:, 0:4]

    inds = np.where(probs[:, 0] > thresh)[0]
    probs = probs[inds, 0]
    boxes = boxes[inds, :]
    dets = np.hstack((boxes, probs[:, np.newaxis])) \
            .astype(np.float32, copy=False)
    keep = nms(dets, cfg.TEST.NMS_THRESH)
    cls_dets = dets[keep, :]
    return cls_dets
Пример #8
0
    def _nms_boxes(self, boxes, scores):
        """ Perform non-maximum supression of similar boxes/detections.
        Args:
            boxes: Rois for this image. Array (num_rois, num_classes * 4).
            scores: Class probabilities for each roi.
                Array (num_rois, num_classes).
        Returns:
            A list of NMSed class detections for this image.
        """
        all_boxes = [[] for _ in range(self.num_classes)]
        # skip j = 0, because it's the background class
        for class_id in range(1, self.num_classes):
            # Whether to use only the top class for each box or
            # all classes over a certain threshhold.
            if self.top_class_only:
                detection_criterion = (np.argmax(scores, axis=1) == class_id)
            else:
                detection_criterion = (scores[:, class_id] >
                                       self.class_detection_thresh)
            class_detected_indexes = np.where(detection_criterion)[0]

            cls_scores = scores[class_detected_indexes, class_id]
            class_box_start = class_id * 4
            class_box_end = class_box_start + 4
            cls_boxes = boxes[class_detected_indexes,
                              class_box_start:class_box_end]

            cls_dets = np.hstack(
                (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32,
                                                               copy=False)

            if len(cls_dets) > 1:
                keep = nms(cls_dets, self.nms_thresh, force_cpu=True)
                cls_dets = cls_dets[keep, :]
            all_boxes[class_id] = cls_dets
        return all_boxes
Пример #9
0
    def forward(self, bottom, top):
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        assert bottom[0].data.shape[0] == 1, \
            'Only single item batches are supported'
        if self.phase==0:
            cfg_key = 'TRAIN'
        elif self.phase==1:
            cfg_key = 'TEST'
        else:
            cfg_key = str(self.phase) # either 'TRAIN' or 'TEST'

        if cfg_key == 'TRAIN':
            nms_thresh = cfg[cfg_key].NMS_THRESH
            post_nms_topN = cfg[cfg_key].ANCHOR_N_POST_NMS
            pre_nms_topN = cfg[cfg_key].ANCHOR_N_PRE_NMS

        if cfg_key == 'TEST':
            pre_nms_topN =  cfg[cfg_key].N_DETS_PER_MODULE

        min_size = cfg[cfg_key].ANCHOR_MIN_SIZE

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs, which we want
        scores = bottom[0].data[:, self._num_anchors:, :, :]
        bbox_deltas = bottom[1].data
        im_info = bottom[2].data[0, :]

        if DEBUG:
            print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
            print 'scale: {}'.format(im_info[2])

        # 1. Generate proposals from bbox deltas and shifted anchors
        height, width = scores.shape[-2:]

        if DEBUG:
            print 'score map size: {}'.format(scores.shape)

        # Enumerate all shifts
        shift_x = np.arange(0, width) * self._feat_stride
        shift_y = np.arange(0, height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                            shift_x.ravel(), shift_y.ravel())).transpose()

        # Enumerate all shifted anchors:
        #
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]
        anchors = self._anchors.reshape((1, A, 4)) + \
                  shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        anchors = anchors.reshape((K * A, 4))

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #
        # bbox deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

        # Same story for the scores:
        #
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info[:2])

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        keep = _filter_boxes(proposals, min_size * im_info[2])
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN

        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (if in training mode)
        # 7. take after_nms_topN
        # 8. return the top proposals (-> RoIs top)
        if self.phase == 0:
            # DO NMS ONLY IN TRAINING TIME
            # DURING TEST WE HAVE NMS OUTSIDE OF THIS FUNCTION 
            keep = nms(np.hstack((proposals, scores)), nms_thresh)
            if post_nms_topN > 0:
                keep = keep[:post_nms_topN]
            proposals = proposals[keep, :]
            scores = scores[keep]


        # Output rois blob
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        if proposals.shape[0] == 0:
            blob = np.array([[0,0,0,16,16]],dtype=np.float32)
        else:
            batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
            blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))

        top[0].reshape(*(blob.shape))
        top[0].data[...] = blob

        # [Optional] output scores blob
        if len(top) > 1:
            top[1].reshape(*(scores.shape))
            top[1].data[...] = scores
Пример #10
0
def refine_detections(rois, probs, deltas, window, config):
    """Refine classified proposals and filter overlaps and return final
    detections.

    Inputs:
        rois: [N, (y1, x1, y2, x2)] in normalized coordinates
        probs: [N, num_classes]. Class probabilities.
        deltas: [N, num_classes, (dy, dx, log(dh), log(dw))]. Class-specific
                bounding box deltas.
        window: (y1, x1, y2, x2) in image coordinates. The part of the image
            that contains the image excluding the padding.

    Returns detections shaped: [N, (y1, x1, y2, x2, class_id, score)]
    """

    # Class IDs per ROI
    _, class_ids = torch.max(probs, dim=1)

    # Class probability of the top class of each ROI
    # Class-specific bounding box deltas
    idx = torch.arange(class_ids.size()[0]).long()
    if config.GPU_COUNT:
        idx = idx.cuda()
    class_scores = probs[idx, class_ids.data]
    deltas_specific = deltas[idx, class_ids.data]

    # Apply bounding box deltas
    # Shape: [boxes, (y1, x1, y2, x2)] in normalized coordinates
    std_dev = Variable(torch.from_numpy(
        np.reshape(config.RPN_BBOX_STD_DEV, [1, 4])).float(),
                       requires_grad=False)
    if config.GPU_COUNT:
        std_dev = std_dev.cuda()
    refined_rois = proposal.apply_box_deltas(rois, deltas_specific * std_dev)

    # Convert coordiates to image domain
    height, width = config.IMAGE_SHAPE[:2]
    scale = Variable(torch.from_numpy(np.array([height, width, height,
                                                width])).float(),
                     requires_grad=False)
    if config.GPU_COUNT:
        scale = scale.cuda()
    refined_rois *= scale

    # Clip boxes to image window
    refined_rois = clip_to_window(window, refined_rois)

    # Round and cast to int since we're deadling with pixels now
    refined_rois = torch.round(refined_rois)

    # TODO: Filter out boxes with zero area

    # Filter out background boxes
    keep_bool = class_ids > 0

    # Filter out low confidence boxes
    if config.DETECTION_MIN_CONFIDENCE:
        keep_bool = keep_bool & (class_scores >=
                                 config.DETECTION_MIN_CONFIDENCE)
    keep = torch.nonzero(keep_bool)[:, 0]

    # Apply per-class NMS
    pre_nms_class_ids = class_ids[keep.data]
    pre_nms_scores = class_scores[keep.data]
    pre_nms_rois = refined_rois[keep.data]

    for i, class_id in enumerate(util_pytorch.unique1d(pre_nms_class_ids)):
        # Pick detections of this class
        ixs = torch.nonzero(pre_nms_class_ids == class_id)[:, 0]

        # Sort
        ix_rois = pre_nms_rois[ixs.data]
        ix_scores = pre_nms_scores[ixs]
        ix_scores, order = ix_scores.sort(descending=True)
        ix_rois = ix_rois[order.data, :]

        class_keep = nms(
            torch.cat((ix_rois, ix_scores.unsqueeze(1)), dim=1).data,
            config.DETECTION_NMS_THRESHOLD)

        # Map indicies
        class_keep = keep[ixs[order[class_keep].data].data]

        if i == 0:
            nms_keep = class_keep
        else:
            nms_keep = util_pytorch.unique1d(torch.cat((nms_keep, class_keep)))
    keep = util_pytorch.intersect1d(keep, nms_keep)

    # Keep top detections
    roi_count = config.DETECTION_MAX_INSTANCES
    top_ids = class_scores[keep.data].sort(descending=True)[1][:roi_count]
    keep = keep[top_ids.data]

    # Arrange output as [N, (y1, x1, y2, x2, class_id, score)]
    # Coordinates are in image domain.
    result = torch.cat(
        (refined_rois[keep.data], class_ids[keep.data].unsqueeze(1).float(),
         class_scores[keep.data].unsqueeze(1)),
        dim=1)

    return result
Пример #11
0
def proposal_layer(inputs,
                   proposal_count,
                   nms_threshold,
                   anchors,
                   config=None):
    """Receives anchor scores and selects a subset to pass as proposals
    to the second stage. Filtering is done based on anchor scores and
    non-max suppression to remove overlaps. It also applies bounding
    box refinment detals to anchors.

    Inputs:
        rpn_probs: [batch, anchors, (bg prob, fg prob)]
        rpn_bbox: [batch, anchors, (dy, dx, log(dh), log(dw))]

    Returns:
        Proposals in normalized coordinates [batch, rois, (y1, x1, y2, x2)]
    """

    # Currently only supports batchsize 1
    inputs[0] = inputs[0].squeeze(0)
    inputs[1] = inputs[1].squeeze(0)

    # Box Scores. Use the foreground class confidence. [Batch, num_rois, 1]
    scores = inputs[0][:, 1]

    # Box deltas [batch, num_rois, 4]
    deltas = inputs[1]
    std_dev = Variable(torch.from_numpy(
        np.reshape(config.RPN_BBOX_STD_DEV, [1, 4])).float(),
                       requires_grad=False)
    if config.GPU_COUNT:
        std_dev = std_dev.cuda()
    deltas = deltas * std_dev

    # Improve performance by trimming to top anchors by score
    # and doing the rest on the smaller subset.
    pre_nms_limit = min(6000, anchors.size()[0])
    scores, order = scores.sort(descending=True)
    order = order[:pre_nms_limit]
    scores = scores[:pre_nms_limit]
    deltas = deltas[order.data, :]  # TODO: Support batch size > 1 ff.
    anchors = anchors[order.data, :]

    # Apply deltas to anchors to get refined anchors.
    # [batch, N, (y1, x1, y2, x2)]
    boxes = apply_box_deltas(anchors, deltas)

    # Clip to image boundaries. [batch, N, (y1, x1, y2, x2)]
    height, width = config.IMAGE_SHAPE[:2]
    window = np.array([0, 0, height, width]).astype(np.float32)
    boxes = clip_boxes(boxes, window)

    # Filter out small boxes
    # According to Xinlei Chen's paper, this reduces detection accuracy
    # for small objects, so we're skipping it.

    # Non-max suppression
    keep = nms(torch.cat((boxes, scores.unsqueeze(1)), 1).data, nms_threshold)
    keep = keep[:proposal_count]
    boxes = boxes[keep, :]

    # Normalize dimensions to range of 0 to 1.
    norm = Variable(torch.from_numpy(np.array([height, width, height,
                                               width])).float(),
                    requires_grad=False)
    if config.GPU_COUNT:
        norm = norm.cuda()
    normalized_boxes = boxes / norm

    # Add back batch dimension
    normalized_boxes = normalized_boxes.unsqueeze(0)

    return normalized_boxes
Пример #12
0
    def forward(self, bottom, top):
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        assert bottom[0].data.shape[0] == 1, \
            'Only single item batches are supported'
        if self.phase == 0:
            cfg_key = 'TRAIN'
        elif self.phase == 1:
            cfg_key = 'TEST'
        else:
            cfg_key = str(self.phase)  # either 'TRAIN' or 'TEST'

        if cfg_key == 'TRAIN':
            nms_thresh = cfg[cfg_key].NMS_THRESH
            post_nms_topN = cfg[cfg_key].ANCHOR_N_POST_NMS
            pre_nms_topN = cfg[cfg_key].ANCHOR_N_PRE_NMS

        if cfg_key == 'TEST':
            pre_nms_topN = cfg[cfg_key].N_DETS_PER_MODULE
            score_thresh = cfg[cfg_key].SCORE_THRESH

        min_size = cfg[cfg_key].ANCHOR_MIN_SIZE

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs, which we want
        scores = bottom[-3].data  # For multi-class
        bbox_deltas = bottom[-2].data
        im_info = bottom[-1].data[0, :]

        # 1. Generate proposals from bbox deltas and shifted anchors
        height, width = scores.shape[-2:]

        # Enumerate all shifts
        shift_x = np.arange(0, width) * self._feat_stride[0]
        shift_y = np.arange(0, height) * self._feat_stride[0]
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                            shift_y.ravel())).transpose()

        # Enumerate all shifted anchors:
        #
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]
        num_classes = scores.shape[1] / (A * self._num_feats)
        anchors = self._anchors.reshape((1, A, 4)) + \
            shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        anchors = anchors.reshape((K * A, 4))
        self.anchors = anchors

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #
        # bbox deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

        # Same story for the scores:
        #
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = scores.transpose((0, 2, 3, 1)).reshape(
            (-1, num_classes, A * self._num_feats)).transpose(
                (0, 2, 1)).reshape((-1, num_classes))

        # Convert anchors into proposals via bbox transformations
        new_anchors = np.concatenate([anchors[:, np.newaxis, :]] *
                                     self._num_feats,
                                     axis=1).reshape((-1, 4))
        proposals = bbox_transform_inv(new_anchors, bbox_deltas)
        for i in range(self._num_refine):
            # Do this because a combination of bbox_transform_inv and _compute_targets
            # will cause a larger 3rd and 4th entry of coordinates
            # We do not do this at the last regression, just to follow the original code
            proposals[:, 2:4] -= 1
            refine_delta = bottom[i].data
            refine_delta = refine_delta.transpose((0, 2, 3, 1)).reshape(
                (-1, 4))
            proposals = bbox_transform_inv(proposals, refine_delta)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info[:2])

        if self._subsampled:
            anchor_map = np.zeros((height, width, A))
            for i in xrange(A):
                stride = self._feat_stride[i / len(self._shifts)**
                                           2] // self._feat_stride[0]
                anchor_map[::stride, ::stride, i] = 1
            anchor_map = anchor_map.reshape((K * A))
            subsampled_inds = np.where(anchor_map)[0]
            proposals = proposals[subsampled_inds, :]
            scores = scores[subsampled_inds, :]

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        keep = _filter_boxes(proposals, min_size * im_info[2])
        proposals = proposals[keep, :]
        scores = scores[keep, :]

        # # 4. sort all (proposal, score) pairs by score from highest to lowest
        # # 5. take top pre_nms_topN
        #
        max_score = np.max(scores[:, 1:], axis=1).ravel()
        order = max_score.argsort()[::-1]
        try:
            thresh_idx = np.where(max_score[order] >= score_thresh)[0].max()
        except:
            thresh_idx = 0  # Nothing greater then score_thresh, just keep the largest one
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        order = order[:thresh_idx + 1]
        proposals = proposals[order, :]
        scores = scores[order, :]

        # 6. apply nms (if in training mode)
        # 7. take after_nms_topN
        # 8. return the top proposals (-> RoIs top)
        if self.phase == 0:
            # DO NMS ONLY IN TRAINING TIME
            # DURING TEST WE HAVE NMS OUTSIDE OF THIS FUNCTION
            keep = nms(np.hstack((proposals, scores)), nms_thresh)
            if post_nms_topN > 0:
                keep = keep[:post_nms_topN]
            proposals = proposals[keep, :]
            scores = scores[keep]

        # Output rois blob
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        if proposals.shape[0] == 0:
            blob = np.array([[0, 0, 0, 16, 16]], dtype=np.float32)
        else:
            batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
            blob = np.hstack(
                (batch_inds, proposals.astype(np.float32, copy=False)))

        top[0].reshape(*(blob.shape))
        top[0].data[...] = blob

        # [Optional] output scores blob
        if len(top) > 1:
            top[1].reshape(*(scores.shape))
            top[1].data[...] = scores
Пример #13
0
    def forward(self, bottom, top):
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted transform deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)
        assert bottom[0].data.shape[
            0] == 1, 'Only single item batches are supported'

        cfg_key = str(self.phase)  # either 'TRAIN' or 'TEST'
        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
        min_size = cfg[cfg_key].RPN_MIN_SIZE

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs, which we want
        scores = bottom[0].data[:, self._num_anchors:, :, :]
        bbox_deltas = bottom[1].data
        im_info = bottom[2].data[0, :]

        # 1. Generate proposals from transform deltas and shifted anchors
        height, width = scores.shape[-2:]
        self._height = height
        self._width = width
        # Enumerate all shifts
        shift_x = np.arange(0, self._width) * self._feat_stride
        shift_y = np.arange(0, self._height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                            shift_y.ravel())).transpose()

        # Enumerate all shifted anchors:
        #
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]
        anchors = self._anchors.reshape((1, A, 4)) + \
                  shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        anchors = anchors.reshape((K * A, 4))
        _, keep = clip_boxes(anchors, im_info[:2])
        self._anchor_index_before_clip = keep

        # Transpose and reshape predicted transform transformations to get them
        # into the same order as the anchors:
        #
        # transform deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

        # Same story for the scores:
        #
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

        # Convert anchors into proposals via transform transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas)

        # 2. clip predicted boxes to image
        proposals, keep = clip_boxes(proposals, im_info[:2])
        # Record the cooresponding index before and after clip
        # This step doesn't need unmap
        # We need it to decide whether do back propagation
        self._proposal_index_before_clip = keep

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        keep = filter_small_boxes(proposals, min_size * im_info[2])
        proposals = proposals[keep, :]
        scores = scores[keep]
        self._ind_after_filter = keep

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]

        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]
        self._ind_after_sort = order
        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        keep = nms(np.hstack((proposals, scores)), nms_thresh)

        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        proposals = proposals[keep, :]

        scores = scores[keep]
        # Output rois blob
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        proposals = np.hstack(
            (batch_inds, proposals.astype(np.float32, copy=False)))
        self._proposal_index = keep

        blobs = {'rois': proposals}

        if str(self.phase) == 'TRAIN':
            if cfg.TRAIN.MIX_INDEX:
                all_rois_index = self._ind_after_filter[self._ind_after_sort[
                    self._proposal_index]].reshape(1, len(keep))
                blobs['proposal_index'] = all_rois_index

        # Copy data to forward to top layer
        for blob_name, blob in blobs.iteritems():
            top[self._top_name_map[blob_name]].reshape(*blob.shape)
            top[self._top_name_map[blob_name]].data[...] = blob.astype(
                np.float32, copy=False)
Пример #14
0
    def forward(self, input):

        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs
        scores = input[0][:, self._num_anchors:, :, :]
        bbox_deltas = input[1]
        im_info = input[2]
        cfg_key = input[3]

        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
        min_size = cfg[cfg_key].RPN_MIN_SIZE

        batch_size = bbox_deltas.size(0)

        feat_height, feat_width = scores.size(2), scores.size(3)
        shift_x = np.arange(0, feat_width) * self._feat_stride
        shift_y = np.arange(0, feat_height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = torch.from_numpy(
            np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                       shift_y.ravel())).transpose())
        shifts = shifts.contiguous().type_as(scores).float()

        A = self._num_anchors
        K = shifts.size(0)

        self._anchors = self._anchors.type_as(scores)
        # anchors = self._anchors.view(1, A, 4) + shifts.view(1, K, 4).permute(1, 0, 2).contiguous()
        anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4)
        anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4)

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:

        bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous()
        bbox_deltas = bbox_deltas.view(batch_size, -1, 4)

        # Same story for the scores:
        scores = scores.permute(0, 2, 3, 1).contiguous()
        scores = scores.view(batch_size, -1)

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info, batch_size)
        # proposals = clip_boxes_batch(proposals, im_info, batch_size)

        # assign the score to 0 if it's non keep.
        # keep = self._filter_boxes(proposals, min_size * im_info[:, 2])

        # trim keep index to make it euqal over batch
        # keep_idx = torch.cat(tuple(keep_idx), 0)

        # scores_keep = scores.view(-1)[keep_idx].view(batch_size, trim_size)
        # proposals_keep = proposals.view(-1, 4)[keep_idx, :].contiguous().view(batch_size, trim_size, 4)

        # _, order = torch.sort(scores_keep, 1, True)

        scores_keep = scores
        proposals_keep = proposals
        _, order = torch.sort(scores_keep, 1, True)

        output = scores.new(batch_size, post_nms_topN, 5).zero_()
        for i in range(batch_size):
            # # 3. remove predicted boxes with either height or width < threshold
            # # (NOTE: convert min_size to input image scale stored in im_info[2])
            proposals_single = proposals_keep[i]
            scores_single = scores_keep[i]

            # # 4. sort all (proposal, score) pairs by score from highest to lowest
            # # 5. take top pre_nms_topN (e.g. 6000)
            order_single = order[i]

            if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel():
                order_single = order_single[:pre_nms_topN]

            proposals_single = proposals_single[order_single, :]
            scores_single = scores_single[order_single].view(-1, 1)

            # 6. apply nms (e.g. threshold = 0.7)
            # 7. take after_nms_topN (e.g. 300)
            # 8. return the top proposals (-> RoIs top)

            keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1),
                             nms_thresh,
                             force_cpu=not cfg.USE_GPU_NMS)
            keep_idx_i = keep_idx_i.long().view(-1)

            if post_nms_topN > 0:
                keep_idx_i = keep_idx_i[:post_nms_topN]
            proposals_single = proposals_single[keep_idx_i, :]
            scores_single = scores_single[keep_idx_i, :]

            # padding 0 at the end.
            num_proposal = proposals_single.size(0)
            output[i, :, 0] = i
            output[i, :num_proposal, 1:] = proposals_single

        return output
Пример #15
0
    def forward(self, input):
        # input[0]: (batch_size, channels, H, W) = (1, 24, 19, 20)
        # input[1]: (batch_size, channels, H, W) = (1, 12*4, 19, 20)
        # input[2]: (batch_size, H, W) = (1, 240, 320)
        # input[3]: "TEST" or "TRAIN"

        all_anchors = self.all_anchors.cuda()

        scores = input[
            0][:, self.
               _num_anchors_type:, :, :]  # class score (binary) for each feature map pixel
        bbox_deltas = input[
            1]  # bbox for each feature map pixel, size (batch_size, 48, 19, 20)
        im_info = input[
            2]  # image shape, for jhmdb, it is [[240, 320]] TODO1: change this to [240, 320]
        cfg_key = input[3]  # TRAIN or TEST
        im_info = np.array(im_info)

        pre_nms_topN = cfg[
            cfg_key].RPN_PRE_NMS_TOP_N  # train: 12000, test: 6000
        post_nms_topN = cfg[
            cfg_key].RPN_POST_NMS_TOP_N  # train: 2000,  test: 300
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH  # train: 0.7,   test: 0.7
        min_size = cfg[cfg_key].RPN_MIN_SIZE  # train: 8,     test: 16

        batch_size = bbox_deltas.size(0)  # mostly 1

        # since the anchors are obtained from dataset, we can just use it, change it to
        # (batch_size, 3600, 4) TODO: this is different from origin
        all_anchors = all_anchors.contiguous()
        all_anchors = all_anchors.view(1, self.all_num_anchors,
                                       4).expand(batch_size,
                                                 self.all_num_anchors, 4)

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:  change to (batch_size, 19, 20, 48)
        bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous()
        bbox_deltas = bbox_deltas.view(batch_size, -1, 4)

        # Same story for the scores:
        # batch_size, 19, 25, 12
        scores = scores.permute(0, 2, 3, 1).contiguous()
        '''
        x = torch.randn(5, 4)
        print(x.stride(), x.is_contiguous())
        print(x.t().stride(), x.t().is_contiguous())
        x.view(4, 5) # ok
        x.t().view(4, 5) # fails
        '''
        scores = scores.view(batch_size, -1)

        # Convert anchors into proposals via bbox transformations
        # so we get a big list of bbox
        ## slide anchors on each pixels on the feature map 19*20, get bounding boxes
        # achors, 630 * 4, means 630 anchors, with 4 coordinates
        #  bbox_deltas, batch_size, 19, 20, 48.  48 means 4 cooridnates * 12 anchors
        # all_anchors.shape = 1x3600x4, bbox_deltas.shape=1x3600x4
        proposals = bbox_transform_inv2(all_anchors, bbox_deltas, batch_size)

        # 2. clip predicted boxes to image: TODO: this line is useless, since our input anchor is already fixed with
        # image size.
        ## remove the bboxes that outside of the image boundary
        # proposals.shape = [1, 3600, 4]), im_info = [[240, 320]]
        proposals = clip_boxes(proposals, im_info, batch_size)

        scores_keep = scores  #(batch_size, 12, 19, 25)
        proposals_keep = proposals

        _, order = torch.sort(
            scores_keep, 1,
            True)  # sort 12 'anchors', here is the cnn output score

        output = scores.new(batch_size, post_nms_topN, 5).zero_()
        for i in range(batch_size):
            # # 3. remove predicted boxes with either height or width < threshold
            # # (NOTE: convert min_size to input image scale stored in im_info[2])
            proposals_single = proposals_keep[
                i]  # for one batch, all the anchors for each feature map pixel
            # size: (12, 19, 25)
            scores_single = scores_keep[
                i]  # binary class score for each feature map pixel
            # size: (12, 19, 25)

            # # 4. sort all (proposal, score) pairs by score from highest to lowest
            # # 5. take top pre_nms_topN (e.g. 6000)
            order_single = order[i]

            if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel():
                order_single = order_single[:pre_nms_topN]

            proposals_single = proposals_single[order_single, :]
            scores_single = scores_single[order_single].view(-1, 1)

            # 6. apply nms (e.g. threshold = 0.7)
            # 7. take after_nms_topN (e.g. 300)
            # 8. return the top proposals (-> RoIs top)

            keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1),
                             nms_thresh,
                             force_cpu=not cfg.USE_GPU_NMS)
            keep_idx_i = keep_idx_i.long().view(-1)

            if post_nms_topN > 0:
                keep_idx_i = keep_idx_i[:post_nms_topN]
            proposals_single = proposals_single[keep_idx_i, :]
            scores_single = scores_single[keep_idx_i, :]

            # padding 0 at the end.
            num_proposal = proposals_single.size(0)
            output[i, :, 0] = i
            output[i, :num_proposal, 1:] = proposals_single

        return output
Пример #16
0
def cpu_mask_voting(masks, boxes, scores, num_classes, max_per_image, im_width, im_height):
    """
    Wrapper function for mask voting, note we already know the class of boxes and masks
    Args:
        masks: ~ n x mask_sz x mask_sz
        boxes: ~ n x 4
        scores: ~ n x 1
        max_per_image: default would be 100
        im_width: width of image
        im_height: height of image
    """
    # apply nms and sort to get first images according to their scores
    scores = scores[:, 1:]
    num_detect = boxes.shape[0]
    res_mask = [[] for _ in xrange(num_detect)]
    for i in xrange(num_detect):
        box = np.round(boxes[i]).astype(int)
        mask = cv2.resize(masks[i, 0].astype(np.float32), (box[2] - box[0] + 1, box[3] - box[1] + 1))
        res_mask[i] = mask
    # Intermediate results
    sup_boxes = []
    sup_masks = []
    sup_scores = []
    tobesort_scores = []

    for i in xrange(num_classes - 1):
        dets = np.hstack((boxes.astype(np.float32), scores[:, i:i+1]))
        inds = nms(dets, cfg.TEST.MASK_MERGE_NMS_THRESH)
        ind_boxes = boxes[inds]
        ind_masks = masks[inds]
        ind_scores = scores[inds, i]
        order = ind_scores.ravel().argsort()[::-1]
        num_keep = min(len(order), max_per_image)
        order = order[0:num_keep]
        sup_boxes.append(ind_boxes[order])
        sup_masks.append(ind_masks[order])
        sup_scores.append(ind_scores[order])
        tobesort_scores.extend(ind_scores[order])

    sorted_scores = np.sort(tobesort_scores)[::-1]
    num_keep = min(len(sorted_scores), max_per_image)
    thresh = sorted_scores[num_keep-1]
    result_box = []
    result_mask = []
    for c in xrange(num_classes - 1):
        cls_box = sup_boxes[c]
        cls_score = sup_scores[c]
        keep = np.where(cls_score >= thresh)[0]
        new_sup_boxes = cls_box[keep]
        num_sup_box = len(new_sup_boxes)
        masks_ar = np.zeros((num_sup_box, 1, cfg.MASK_SIZE, cfg.MASK_SIZE))
        boxes_ar = np.zeros((num_sup_box, 4))
        for i in xrange(num_sup_box):
            # Get weights according to their segmentation scores
            cur_ov = bbox_overlaps(boxes.astype(np.float), new_sup_boxes[i, np.newaxis].astype(np.float))
            cur_inds = np.where(cur_ov >= cfg.TEST.MASK_MERGE_IOU_THRESH)[0]
            cur_weights = scores[cur_inds, c]
            cur_weights = cur_weights / sum(cur_weights)
            # Re-format mask when passing it to mask_aggregation
            pass_mask = [res_mask[j] for j in list(cur_inds)]
            # do mask aggregation
            tmp_mask, boxes_ar[i] = mask_aggregation(boxes[cur_inds], pass_mask, cur_weights, im_width, im_height)
            tmp_mask = cv2.resize(tmp_mask.astype(np.float32), (cfg.MASK_SIZE, cfg.MASK_SIZE))
            masks_ar[i, 0] = tmp_mask
        # make new array such that scores is the last dimension of boxes
        boxes_scored_ar = np.hstack((boxes_ar, cls_score[keep, np.newaxis]))
        result_box.append(boxes_scored_ar)
        result_mask.append(masks_ar)
    return result_box, result_mask
Пример #17
0
            im2show = np.copy(im)
        for j in xrange(1, imdb.num_classes):
            inds = torch.nonzero(scores[:, j] > thresh).view(-1)
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                if args.class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                cls_dets = cls_dets[order]
                keep = nms(cls_dets, cfg.TEST.NMS)
                cls_dets = cls_dets[keep.view(-1).long()]
                if vis:
                    im2show = vis_detections(im2show, imdb.classes[j],
                                             cls_dets.cpu().numpy(), 0.3)
                all_boxes[j][i] = cls_dets.cpu().numpy()
            else:
                all_boxes[j][i] = empty_array

        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack(
                [all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in xrange(1, imdb.num_classes):
Пример #18
0
def gpu_mask_voting(masks, boxes, scores, num_classes, max_per_image, im_width, im_height, cfg):
    """
    A wrapper function, note we already know the class of boxes and masks
    Args:
        masks: ~ 300 x 21 x 21
        boxes: ~ 300 x 4
        scores: ~ 300 x 1
        max_per_image: default would be 100
        im_width:
        im_height:
    """
    # Intermediate results
    sup_boxes = []
    sup_scores = []
    tobesort_scores = []
    for i in xrange(num_classes):
        if i == 0:
            sup_boxes.append([])
            sup_scores.append([])
            continue
        dets = np.hstack((boxes.astype(np.float32), scores[:, i:i+1].astype(np.float32)))
        #thresh = (cfg.TEST_DEFAULT_MASK_MERGE_IOU_THRESH).astype(np.float32)
        #print ('dets.shape: {}'.format(dets.shape))
        #print ('dets.dtype: {}'.format(dets.dtype))
        inds = nms(dets, cfg.TEST_DEFAULT_MASK_MERGE_IOU_THRESH, cfg)
        ind_boxes = boxes[inds]
        ind_scores = scores[inds, i]
        num_keep = min(len(ind_scores), max_per_image)
        sup_boxes.append(ind_boxes[0:num_keep, :])
        sup_scores.append(ind_scores[0:num_keep])
        tobesort_scores.extend(ind_scores[0:num_keep])

    sorted_scores = np.sort(tobesort_scores)[::-1]
    num_keep = min(len(sorted_scores), max_per_image)
    thresh = sorted_scores[num_keep-1]
    # inds array to record which mask should be aggregated together
    candidate_inds = []
    # weight for each element in the candidate inds
    candidate_weights = []
    # start position for candidate array
    candidate_start = []
    candidate_scores = []
    class_bar = []
    for c in xrange(num_classes):
        if c == 0:
            continue
        cls_box = sup_boxes[c]
        cls_score = sup_scores[c]
        keep = np.where(cls_score >= thresh)[0]
        new_sup_boxes = cls_box[keep]
        num_sup_box = len(new_sup_boxes)
        
        for i in xrange(num_sup_box):
            cur_ov = bbox_overlaps(boxes.astype(np.float), new_sup_boxes[i, np.newaxis].astype(np.float))
            cur_inds = np.where(cur_ov >= cfg.TEST_DEFAULT_MASK_MERGE_IOU_THRESH)[0]
            candidate_inds.extend(cur_inds)
            cur_weights = scores[cur_inds, c]
            cur_weights = cur_weights / sum(cur_weights)
            candidate_weights.extend(cur_weights)
            candidate_start.append(len(candidate_inds))
        candidate_scores.extend(cls_score[keep])
        class_bar.append(len(candidate_scores))
        
    candidate_inds = np.array(candidate_inds, dtype=np.int32)
    candidate_weights = np.array(candidate_weights, dtype=np.float32)
    candidate_start = np.array(candidate_start, dtype=np.int32)
    candidate_scores = np.array(candidate_scores, dtype=np.float32)
    
    #print ('boxes.shape: {}'.format(boxes.shape))
    #print ('masks.shape: {}'.format(masks.shape))
    masks = np.reshape(masks, (masks.shape[0],1,masks.shape[1],masks.shape[2])) # rfm add
    result_mask, result_box = mv(boxes.astype(np.float32), masks.astype(np.float32), 
                                 candidate_inds, candidate_start, 
                                 candidate_weights, im_height, im_width)
    #print ('result_mask.shape: {}'.format(result_mask.shape))
    #print ('result_box.shape: {}'.format(result_box.shape))
    result_box = np.hstack((result_box, candidate_scores[:, np.newaxis]))
    list_result_box = []
    list_result_mask = []
    # separate result mask into different classes
    for i in xrange(num_classes - 1):
        cls_start = class_bar[i - 1] if i > 0 else 0
        cls_end = class_bar[i]
        list_result_box.append(result_box[cls_start:cls_end, :])
        list_result_mask.append(result_mask[cls_start:cls_end, :, :, :])

    return list_result_mask, list_result_box
Пример #19
0
            im2show = np.copy(im)
        for j in xrange(1, len(pascal_classes)):
            inds = torch.nonzero(scores[:, j] > thresh).view(-1)
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                if args.class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                cls_dets = cls_dets[order]
                keep = nms(cls_dets, args.TEST_NMS)
                cls_dets = cls_dets[keep.view(-1).long()]
                if vis:
                    im2show = vis_detections(im2show, pascal_classes[j], cls_dets.cpu().numpy(), 0.5)

        misc_toc = time.time()
        nms_time = misc_toc - misc_tic

        if webcam_num == -1:
            sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s   \r' \
                             .format(num_images + 1, len(imglist), detect_time, nms_time))
            sys.stdout.flush()

        if vis and webcam_num == -1:
            # cv2.imshow('test', im2show)
            # cv2.waitKey(0)
Пример #20
0
def refine_detections(rois, probs, deltas, window, config):
    """Refine classified proposals and filter overlaps and return final
    detections.

    Inputs:
        rois: [N, (y1, x1, y2, x2)] in normalized coordinates
        probs: [N, num_classes]. Class probabilities.
        deltas: [N, num_classes, (dy, dx, log(dh), log(dw))]. Class-specific
                bounding box deltas.
        window: (y1, x1, y2, x2) in image coordinates. The part of the image
            that contains the image excluding the padding.

    Returns detections shaped: [N, (y1, x1, y2, x2, class_id, score)]
    """

    # Class IDs per ROI
    _, class_ids = torch.max(probs, dim=1)

    # Class probability of the top class of each ROI
    # Class-specific bounding box deltas
    idx = torch.arange(class_ids.size()[0]).long()
    if config.GPU_COUNT:
        idx = idx.cuda()
    class_scores = probs[idx, class_ids.data]
    deltas_specific = deltas[idx, class_ids.data]

    refined_rois = coordinate_convert(rois, deltas_specific, config,
                                      config.GPU_COUNT)

    # Clip boxes to image window
    refined_rois = clip_to_window(window, refined_rois)

    # Round and cast to int since we're deadling with pixels now
    refined_rois = torch.round(refined_rois)

    # TODO: Filter out boxes with zero area

    # Filter out background boxes

    keep_bool = class_ids > 0
    if config.USE_NMS:
        # Filter out low confidence boxes
        if config.DETECTION_MIN_CONFIDENCE:
            keep_bool = keep_bool & (class_scores >=
                                     config.DETECTION_MIN_CONFIDENCE)

        if max(keep_bool) == 0:
            return [], []
        keep = torch.nonzero(keep_bool)[:, 0]

        # Apply per-class NMS
        pre_nms_class_ids = class_ids[keep.data]
        pre_nms_scores = class_scores[keep.data]
        pre_nms_rois = refined_rois[keep.data]

        for i, class_id in enumerate(unique1d(pre_nms_class_ids)):
            # Pick detections of this class
            ixs = torch.nonzero(pre_nms_class_ids == class_id)[:, 0]

            # Sort
            ix_rois = pre_nms_rois[ixs.data]
            ix_scores = pre_nms_scores[ixs]
            ix_scores, order = ix_scores.sort(descending=True)
            ix_rois = ix_rois[order.data, :]

            class_keep = nms(
                torch.cat((ix_rois, ix_scores.unsqueeze(1)), dim=1).data,
                config.DETECTION_NMS_THRESHOLD)

            # Map indicies
            class_keep = keep[ixs[order[class_keep].data].data]

            if i == 0:
                nms_keep = class_keep
            else:
                nms_keep = unique1d(torch.cat((nms_keep, class_keep)))
        keep = intersect1d(keep, nms_keep)
    else:

        keep = torch.nonzero(keep_bool).view((-1))

        if len(keep) > 100:
            ix_scores, order = class_scores[keep.data].sort(descending=True)
            keep = keep[order[:100]]

        # else:
        #     ix_scores, order = class_scores[~keep_bool].sort(descending=False)
        #     keep2 = torch.nonzero(~keep_bool).view((-1))[order[:(1000-len(keep))]]
        #     keep = torch.cat((keep,keep2),0)

    ix_scores, order = class_scores[keep.data].sort(descending=True)
    keep = keep[order]

    # Keep top detections
    roi_count = config.DETECTION_MAX_INSTANCES

    if len(keep.data) > 0:
        top_ids = class_scores[keep.data].sort(descending=True)[1][:]
        keep = keep[top_ids.data]

        # Arrange output as [N, (y1, x1, y2, x2, class_id, score)]
        # Coordinates are in image domain.
        result = torch.cat((refined_rois[keep.data],
                            class_ids[keep.data].unsqueeze(1).float(),
                            class_scores[keep.data].unsqueeze(1)),
                           dim=1)
    else:
        return [], []

    return result, keep
    anchors = anchors[order.data, :]

    boxes = apply_box_deltas(anchors, deltas)

    # Clip to image boundaries.. [y1, x1, y2, x2]
    height, width = config.IMAGE_SHAPE[:2] # 画像本来の大きさ [1024, 1024, 3]ここは変更予定
    window = np.array([0, 0, height, width]).astype(np.float32)
    boxes = clip_boxes(boxes, window)
    # 画像からはみ出てるバウンディングボックスを変形する

    # Filter out small boxes
    # According to Xinlei Chen's paper, this reduces detection accuracy

    # Non-max suppression
    # nms_thresholdを超えた値を削除している
    keep = nms(torch.cat((boxes, scores.unsqueeze(1)), 1).data, nms_threshold)
    # keepはインデックス?
    keep = keep[:proposal_count]
    boxes = boxes[keep, :]

    # Normalize dimensions to range of 0 to 1. .
    # 相対的な値に変換する
    norm = Variable(torch.from_numpy(np.array([height, widht, height, width]))\
                                         .float(), requires_grad=False)
    if config.GPU_COUNT:
        norm = norm.cuda()
    normalized_boxes = boxes / norm

    normalize_boxed = normalized_boxes.unsqueeze(0)
    # 元に戻す
Пример #22
0
def cpu_mask_voting(masks, boxes, scores, num_classes, max_per_image, im_width, im_height, cfg):
    """
    Wrapper function for mask voting, note we already know the class of boxes and masks
    Args:
        masks: ~ n x mask_sz x mask_sz
        boxes: ~ n x 4
        scores: ~ n x 1
        max_per_image: default would be 100
        im_width: width of image
        im_height: height of image
    """
    # apply nms and sort to get first images according to their scores
    scores = scores[:, 1:] # remove bg scores
    num_detect = boxes.shape[0]
    res_mask = [[] for _ in xrange(num_detect)]
    for i in xrange(num_detect):
        box = np.round(boxes[i]).astype(int)
        mask = cv2.resize(masks[i].astype(np.float32), (box[2]-box[0]+1, box[3]-box[1]+1))
        # unpool mask pooled
        #mask = unpool_mask(masks[i], (box[3]-box[1]+1, box[2]-box[0]+1))
        res_mask[i] = mask
    # Intermediate results
    sup_boxes = []
    sup_masks = []
    sup_scores = []
    tobesort_scores = []

    for i in xrange(num_classes - 1):
        dets = np.hstack((boxes.astype(np.float32), scores[:, i:i+1]))
        inds = nms(dets, cfg.TEST_DEFAULT_MASK_MERGE_NMS_THRESH, cfg)
        ind_boxes = boxes[inds]
        ind_masks = masks[inds]
        ind_scores = scores[inds, i]
        order = ind_scores.ravel().argsort()[::-1]
        num_keep = min(len(order), max_per_image)
        order = order[0:num_keep]
        sup_boxes.append(ind_boxes[order])
        sup_masks.append(ind_masks[order])
        sup_scores.append(ind_scores[order])
        tobesort_scores.extend(ind_scores[order])

    sorted_scores = np.sort(tobesort_scores)[::-1]
    num_keep = min(len(sorted_scores), max_per_image)
    thresh = sorted_scores[num_keep-1]
    result_box = []
    result_mask = []
    for c in xrange(num_classes - 1):
        cls_box = sup_boxes[c]
        cls_score = sup_scores[c]
        keep = np.where(cls_score >= thresh)[0]
        new_sup_boxes = cls_box[keep]
        num_sup_box = len(new_sup_boxes)
        #masks_ar = np.zeros((num_sup_box, 1, cfg.MAIN_DEFAULT_MASK_SIZE, cfg.MAIN_DEFAULT_MASK_SIZE))
        masks_ar = np.zeros((num_sup_box, cfg.MAIN_DEFAULT_MASK_SIZE, cfg.MAIN_DEFAULT_MASK_SIZE))
        boxes_ar = np.zeros((num_sup_box, 4))
        for i in xrange(num_sup_box):
            # Get weights according to their segmentation scores
            cur_ov = bbox_overlaps(boxes.astype(np.float), new_sup_boxes[i, np.newaxis].astype(np.float))
            cur_inds = np.where(cur_ov >= cfg.TEST_DEFAULT_MASK_MERGE_IOU_THRESH)[0]
            cur_weights = scores[cur_inds, c]
            cur_weights = cur_weights / sum(cur_weights)
            # Re-format mask when passing it to mask_aggregation
            pass_mask = [res_mask[j] for j in list(cur_inds)]
            # do mask aggregation
            tmp_mask, boxes_ar[i] = mask_aggregation(boxes[cur_inds], pass_mask, cur_weights, im_width, im_height, cfg)
            tmp_mask = cv2.resize(tmp_mask.astype(np.float32), (cfg.MAIN_DEFAULT_MASK_SIZE, cfg.MAIN_DEFAULT_MASK_SIZE))
            # pool mask to get a fixed size
            #tmp_mask = pool_mask(tmp_mask, boxes_ar[i], (cfg.MAIN_DEFAULT_MASK_SIZE, cfg.MAIN_DEFAULT_MASK_SIZE))
            masks_ar[i] = tmp_mask
        # make new array such that scores is the last dimension of boxes
        boxes_scored_ar = np.hstack((boxes_ar, cls_score[keep, np.newaxis]))
        result_box.append(boxes_scored_ar)
        result_mask.append(masks_ar)
        
    return result_mask, result_box 
Пример #23
0
def gpu_mask_voting(masks, boxes, scores, num_classes, max_per_image, im_width, im_height):
    """
    A wrapper function, note we already know the class of boxes and masks
    Args:
        masks: ~ 300 x 21 x 21
        boxes: ~ 300 x 4
        scores: ~ 300 x 1
        max_per_image: default would be 100
        im_width:
        im_height:
    """
    # Intermediate results
    sup_boxes = []
    sup_scores = []
    tobesort_scores = []
    for i in xrange(num_classes):
        if i == 0:
            sup_boxes.append([])
            sup_scores.append([])
            continue
        dets = np.hstack((boxes.astype(np.float32), scores[:, i:i+1]))
        inds = nms(dets, cfg.TEST.MASK_MERGE_NMS_THRESH)
        ind_boxes = boxes[inds]
        ind_scores = scores[inds, i]
        num_keep = min(len(ind_scores), max_per_image)
        sup_boxes.append(ind_boxes[0:num_keep, :])
        sup_scores.append(ind_scores[0:num_keep])
        tobesort_scores.extend(ind_scores[0:num_keep])

    sorted_scores = np.sort(tobesort_scores)[::-1]
    num_keep = min(len(sorted_scores), max_per_image)
    thresh = sorted_scores[num_keep-1]
    # inds array to record which mask should be aggregated together
    candidate_inds = []
    # weight for each element in the candidate inds
    candidate_weights = []
    # start position for candidate array
    candidate_start = []
    candidate_scores = []
    class_bar = []
    for c in xrange(num_classes):
        if c == 0:
            continue
        cls_box = sup_boxes[c]
        cls_score = sup_scores[c]
        keep = np.where(cls_score >= thresh)[0]
        new_sup_boxes = cls_box[keep]
        num_sup_box = len(new_sup_boxes)
        for i in xrange(num_sup_box):
            cur_ov = bbox_overlaps(boxes.astype(np.float), new_sup_boxes[i, np.newaxis].astype(np.float))
            cur_inds = np.where(cur_ov >= cfg.TEST.MASK_MERGE_IOU_THRESH)[0]
            candidate_inds.extend(cur_inds)
            cur_weights = scores[cur_inds, c]
            cur_weights = cur_weights / sum(cur_weights)
            candidate_weights.extend(cur_weights)
            candidate_start.append(len(candidate_inds))
        candidate_scores.extend(cls_score[keep])
        class_bar.append(len(candidate_scores))
    candidate_inds = np.array(candidate_inds, dtype=np.int32)
    candidate_weights = np.array(candidate_weights, dtype=np.float32)
    candidate_start = np.array(candidate_start, dtype=np.int32)
    candidate_scores = np.array(candidate_scores, dtype=np.float32)
    result_mask, result_box = mv(boxes.astype(np.float32), masks, candidate_inds, candidate_start, candidate_weights, im_height, im_width)
    result_box = np.hstack((result_box, candidate_scores[:, np.newaxis]))
    list_result_box = []
    list_result_mask = []
    # separate result mask into different classes
    for i in xrange(num_classes - 1):
        cls_start = class_bar[i - 1] if i > 0 else 0
        cls_end = class_bar[i]
        list_result_box.append(result_box[cls_start:cls_end, :])
        list_result_mask.append(result_mask[cls_start:cls_end, :, :, :])

    return list_result_mask, list_result_box
Пример #24
0
def detect(net, im_path, thresh=0.05, visualize=False, timers=None, pyramid=False, visualization_folder=None):
    """
    Main module to detect faces
    :param net: The trained network
    :param im_path: The path to the image
    :param thresh: Detection with a less score than thresh are ignored
    :param visualize: Whether to visualize the detections
    :param timers: Timers for calculating detect time (if None new timers would be created)
    :param pyramid: Whether to use pyramid during inference
    :param visualization_folder: If set the visualizations would be saved in this folder (if visualize=True)
    :return: cls_dets (bounding boxes concatenated with scores) and the timers
    """
    if not timers:
        timers = {'detect': Timer(),
                  'misc': Timer()}

    im = cv2.imread(im_path)
    imfname = os.path.basename(im_path)
    sys.stdout.flush()
    timers['detect'].tic()

    if not pyramid:
        im_scale = _compute_scaling_factor(im.shape,cfg.TEST.SCALES[0],cfg.TEST.MAX_SIZE)
        im_blob = _get_image_blob(im,[im_scale])
        probs, boxes = forward_net(net,im_blob[0],im_scale,False)
        boxes = boxes[:, 0:4]
    else:
        all_probs = []
        all_boxes = []
        # Compute the scaling coefficients for the pyramid
        base_scale = _compute_scaling_factor(im.shape,cfg.TEST.PYRAMID_BASE_SIZE[0],cfg.TEST.PYRAMID_BASE_SIZE[1])
        pyramid_scales = [float(scale)/cfg.TEST.PYRAMID_BASE_SIZE[0]*base_scale
                          for scale in cfg.TEST.SCALES]

        im_blobs = _get_image_blob(im,pyramid_scales)

        for i in range(len(pyramid_scales)):
            probs,boxes = forward_net(net,im_blobs[i],pyramid_scales[i],True)
            for j in xrange(len(probs)):
                # Do not apply M3 to the largest scale
                if i<len(pyramid_scales)-1 or j<len(probs)-1:
                    all_boxes.append(boxes[j][:,0:4])
                    all_probs.append(probs[j].copy())

        probs = np.concatenate(all_probs)
        boxes = np.concatenate(all_boxes)

    timers['detect'].toc()
    timers['misc'].tic()

    inds = np.where(probs[:, 0] > thresh)[0]
    probs = probs[inds, 0]
    boxes = boxes[inds, :]
    dets = np.hstack((boxes, probs[:, np.newaxis])) \
            .astype(np.float32, copy=False)
    keep = nms(dets, cfg.TEST.NMS_THRESH)
    cls_dets = dets[keep, :]
    if visualize:
        plt_name = os.path.splitext(imfname)[0] + '_detections_{}'.format(net.name)
        visusalize_detections(im, cls_dets, plt_name=plt_name, visualization_folder=visualization_folder)
    timers['misc'].toc()
    return cls_dets,timers
Пример #25
0
    def forward(self, bottom, top):
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted transform deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)
        assert bottom[0].data.shape[0] == 1, 'Only single item batches are supported'

        cfg_key = str(self.phase)  # either 'TRAIN' or 'TEST'
        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
        min_size = cfg[cfg_key].RPN_MIN_SIZE

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs, which we want
        scores = bottom[0].data[:, self._num_anchors:, :, :]
        bbox_deltas = bottom[1].data
        im_info = bottom[2].data[0, :]

        # 1. Generate proposals from transform deltas and shifted anchors
        height, width = scores.shape[-2:]
        self._height = height
        self._width = width
        # Enumerate all shifts
        shift_x = np.arange(0, self._width) * self._feat_stride
        shift_y = np.arange(0, self._height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                            shift_x.ravel(), shift_y.ravel())).transpose()

        # Enumerate all shifted anchors:
        #
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]
        anchors = self._anchors.reshape((1, A, 4)) + \
                  shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        anchors = anchors.reshape((K * A, 4))
        _, keep = clip_boxes(anchors, im_info[:2])
        self._anchor_index_before_clip = keep

        # Transpose and reshape predicted transform transformations to get them
        # into the same order as the anchors:
        #
        # transform deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

        # Same story for the scores:
        #
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

        # Convert anchors into proposals via transform transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas)

        # 2. clip predicted boxes to image
        proposals, keep = clip_boxes(proposals, im_info[:2])
        # Record the cooresponding index before and after clip
        # This step doesn't need unmap
        # We need it to decide whether do back propagation
        self._proposal_index_before_clip = keep

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        keep = filter_small_boxes(proposals, min_size * im_info[2])
        proposals = proposals[keep, :]
        scores = scores[keep]
        self._ind_after_filter = keep

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]

        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]
        self._ind_after_sort = order
        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        keep = nms(np.hstack((proposals, scores)), nms_thresh)

        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        proposals = proposals[keep, :]

        scores = scores[keep]
        # Output rois blob
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        proposals = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
        self._proposal_index = keep

        blobs = {
            'rois': proposals
        }

        if str(self.phase) == 'TRAIN':
            if cfg.TRAIN.MIX_INDEX:
                all_rois_index = self._ind_after_filter[self._ind_after_sort[self._proposal_index]].reshape(1, len(keep))
                blobs['proposal_index'] = all_rois_index

        # Copy data to forward to top layer
        for blob_name, blob in blobs.iteritems():
            top[self._top_name_map[blob_name]].reshape(*blob.shape)
            top[self._top_name_map[blob_name]].data[...] = blob.astype(np.float32, copy=False)
Пример #26
0
def detect(net,
           im_path,
           thresh=0.05,
           visualize=False,
           timers=None,
           pyramid=False,
           dect_visualization_folder=None):
    """
    Main module to detect faces
    :param net: The trained network
    :param im_path: The path to the image
    :param thresh: Detection with a less score than thresh are ignored
    :param visualize: Whether to visualize the detections
    :param timers: Timers for calculating detect time (if None new timers would be created)
    :param pyramid: Whether to use pyramid during inference
    :param visualization_folder: If set the visualizations would be saved in this folder (if visualize=True)
    :return: cls_dets (bounding boxes concatenated with scores) and the timers
    """
    if not timers:
        timers = {'detect': Timer(), 'misc': Timer()}

    im = cv2.imread(im_path)
    im_class__file = im_path.split('/')[-2]
    imfname = os.path.basename(im_path)
    sys.stdout.flush()
    timers['detect'].tic()

    if not pyramid:
        im_scale = _compute_scaling_factor(im.shape, cfg.TEST.SCALES[0],
                                           cfg.TEST.MAX_SIZE)
        im_blob = _get_image_blob(im, [im_scale])
        probs, boxes = forward_net(net, im_blob[0], im_scale, False)
        boxes = boxes[:, 0:4]
    else:
        all_probs = []
        all_boxes = []
        # Compute the scaling coefficients for the pyramid
        base_scale = _compute_scaling_factor(im.shape,
                                             cfg.TEST.PYRAMID_BASE_SIZE[0],
                                             cfg.TEST.PYRAMID_BASE_SIZE[1])
        pyramid_scales = [
            float(scale) / cfg.TEST.PYRAMID_BASE_SIZE[0] * base_scale
            for scale in cfg.TEST.SCALES
        ]

        im_blobs = _get_image_blob(im, pyramid_scales)

        for i in range(len(pyramid_scales)):
            probs, boxes = forward_net(net, im_blobs[i], pyramid_scales[i],
                                       True)
            for j in xrange(len(probs)):
                # Do not apply M3 to the largest scale
                if i < len(pyramid_scales) - 1 or j < len(probs) - 1:
                    all_boxes.append(boxes[j][:, 0:4])
                    all_probs.append(probs[j].copy())

        probs = np.concatenate(all_probs)
        boxes = np.concatenate(all_boxes)

    timers['detect'].toc()
    timers['misc'].tic()

    inds = np.where(probs[:, 0] > thresh)[0]
    probs = probs[inds, 0]
    boxes = boxes[inds, :]
    dets = np.hstack((boxes, probs[:, np.newaxis])) \
            .astype(np.float32, copy=False)
    keep = nms(dets, cfg.TEST.NMS_THRESH)
    cls_dets = dets[keep, :]
    if visualize:
        plt_name = os.path.splitext(imfname)[0] + '_detections_{}'.format(
            net.name)
        dect_visualization_folder = os.path.join(dect_visualization_folder,
                                                 im_class__file)
        visusalize_detections(im,
                              cls_dets,
                              plt_name=plt_name,
                              visualization_folder=dect_visualization_folder)
    timers['misc'].toc()
    return cls_dets, timers
    def forward(self, bottom, top):
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        assert bottom[0].data.shape[0] == 1, \
            'Only single item batches are supported'
        if self.phase==0:
            cfg_key = 'TRAIN'
        elif self.phase==1:
            cfg_key = 'TEST'
        else:
            cfg_key = str(self.phase) # either 'TRAIN' or 'TEST'

        if cfg_key == 'TRAIN':
            nms_thresh = cfg[cfg_key].NMS_THRESH
            post_nms_topN = cfg[cfg_key].ANCHOR_N_POST_NMS
            pre_nms_topN = cfg[cfg_key].ANCHOR_N_PRE_NMS

        if cfg_key == 'TEST':
            pre_nms_topN =  cfg[cfg_key].N_DETS_PER_MODULE

        min_size = cfg[cfg_key].ANCHOR_MIN_SIZE

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs, which we want
        scores = bottom[0].data[:, self._num_anchors:, :, :]
        bbox_deltas = bottom[1].data
        im_info = bottom[2].data[0, :]

        if DEBUG:
            print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
            print 'scale: {}'.format(im_info[2])

        # 1. Generate proposals from bbox deltas and shifted anchors
        height, width = scores.shape[-2:]

        if DEBUG:
            print 'score map size: {}'.format(scores.shape)

        # Enumerate all shifts
        shift_x = np.arange(0, width) * self._feat_stride
        shift_y = np.arange(0, height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                            shift_x.ravel(), shift_y.ravel())).transpose()

        # Enumerate all shifted anchors:
        #
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]
        anchors = self._anchors.reshape((1, A, 4)) + \
                  shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        anchors = anchors.reshape((K * A, 4))

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #
        # bbox deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

        # Same story for the scores:
        #
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info[:2])

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        keep = _filter_boxes(proposals, min_size * im_info[2])
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN

        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (if in training mode)
        # 7. take after_nms_topN
        # 8. return the top proposals (-> RoIs top)
        if self.phase == 0:
            # DO NMS ONLY IN TRAINING TIME
            # DURING TEST WE HAVE NMS OUTSIDE OF THIS FUNCTION 
            keep = nms(np.hstack((proposals, scores)), nms_thresh)
            if post_nms_topN > 0:
                keep = keep[:post_nms_topN]
            proposals = proposals[keep, :]
            scores = scores[keep]


        # Output rois blob
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        if proposals.shape[0] == 0:
            blob = np.array([[0,0,0,16,16]],dtype=np.float32)
        else:
            batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
            blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))

        top[0].reshape(*(blob.shape))
        top[0].data[...] = blob

        # [Optional] output scores blob
        if len(top) > 1:
            top[1].reshape(*(scores.shape))
            top[1].data[...] = scores