Exemplo n.º 1
0
    def call(self, input):
        class_probs = input[0][:, :, 1]  #begin foreground

        bbox_offset = input[1]
        bbox_offset = bbox_offset * np.reshape(self.config.BBOX_STD_DEV,
                                               [1, 1, 4])

        anchors = self.anchors

        pre_nms_limit = min(self.config.PRE_NMS_LIMIT, self.anchors.shape[0])
        ids = tf.nn.top_k(
            class_probs, pre_nms_limit, sorted=True,
            name="top_anchors").indices  #find k largest probabilities

        #slice to each batch ( images per process)
        class_probs = utils.batch_slice([class_probs, ids],
                                        lambda x, y: tf.gather(x, y),
                                        self.config.IMAGES_PER_GPU)
        bbox_offset = utils.batch_slice([bbox_offset, ids],
                                        lambda x, y: tf.gather(x, y),
                                        self.config.IMAGES_PER_GPU)
        anchors = utils.batch_slice(ids,
                                    lambda x: tf.gather(anchors, x),
                                    self.config.IMAGES_PER_GPU,
                                    names=["pre_nms_anchors"])

        #apply bbox to anchor boxes to get better bounding box closer to the closed Foreground object.
        bboxes = utils.batch_slice([anchors, bbox_offset],
                                   lambda x, y: utils.apply_bbox_offset(x, y),
                                   self.config.IMAGES_PER_GPU,
                                   names=["refined_anchors"])

        #clip to 0..1 range
        h, w = self.config.IMAGE_SHAPE[:2]
        window = np.array([0, 0, h, w], dtype=np.float32)
        bboxes = utils.batch_slice(bboxes,
                                   lambda x: utils.clip_boxes(x, window),
                                   self.config.IMAGES_PER_GPU,
                                   names=["refined_anchors_clipped"])

        #generate proposal by NMS

        normalized_bboxes = bboxes / np.array([[h, w, h, w]])

        def nms(normalized_bboxes, scores):
            ids = tf.image.non_max_suppression(normalized_bboxes,
                                               scores,
                                               self.num_proposal,
                                               self.nms_threshold,
                                               name="rpn_non_max_suppression")
            proposals = tf.gather(normalized_bboxes, ids)
            padding = tf.maximum(self.num_proposal - tf.shape(proposals)[0], 0)
            proposals = tf.pad(proposals, [(0, padding), (0, 0)])
            return proposals

        proposals = utils.batch_slice([normalized_bboxes, class_probs], nms,
                                      self.config.IMAGES_PER_GPU)

        return proposals
 def bbox_reg(self, boxes, box_deltas, im):
   if CUDA_AVAILABLE:
       boxes = boxes.data[:,1:].cpu().numpy()
       box_deltas = box_deltas.data.cpu().numpy()
   else:
       boxes = boxes.data[:,1:].numpy()
       box_deltas = box_deltas.data.numpy()
   pred_boxes = bbox_transform_inv(boxes, box_deltas)
   pred_boxes = clip_boxes(pred_boxes, im.size()[-2:])
   return _tovar(pred_boxes)
Exemplo n.º 3
0
    def get_roi_boxes(self, anchors, rpn_map, rpn_bbox_deltas, im):
        # TODO fix this!!!
        im_info = (100, 100, 1)

        if CUDA_AVAILABLE:
            bbox_deltas = rpn_bbox_deltas.data.cpu().numpy()
        else:
            bbox_deltas = rpn_bbox_deltas.data.numpy()

        bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs, which we want
        #scores = bottom[0].data[:, self._num_anchors:, :, :]
        if CUDA_AVAILABLE:
            scores = rpn_map.data[:, self._num_anchors:, :, :].cpu().numpy()
        else:
            scores = rpn_map.data[:, self._num_anchors:, :, :].numpy()

        scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im.size()[-2:])

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        keep = filter_boxes(proposals, self.min_size * im_info[2])
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]
        if self.pre_nms_topN > 0:
            order = order[:self.pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        keep = nms(np.hstack((proposals, scores)), self.nms_thresh)
        if self.post_nms_topN > 0:
            keep = keep[:self.post_nms_topN]
        proposals = proposals[keep, :]
        scores = scores[keep]

        return proposals, scores
Exemplo n.º 4
0
def generate_proposals(data):
    # Extract feature map
    feature_map = CNN_model_cut.predict(
        data.reshape(-1, data.shape[0], data.shape[1], data.shape[2]))
    padded_fcmap = np.pad(feature_map, ((0, 0), (1, 1), (1, 1), (0, 0)),
                          mode='constant')

    # Extract RPN results
    RPN_results = RPN_model.predict(padded_fcmap)
    anchor_probs = RPN_results[0].reshape((-1, 1))
    anchor_targets = RPN_results[1].reshape((-1, 4))

    # Original anchors
    feature_size = feature_map.shape[1]
    number_feature_points = feature_size * feature_size
    feature_stride = int(image_size / feature_size)
    base_anchors = generate_anchors(feature_stride,
                                    feature_stride,
                                    ratios=ANCHOR_RATIOS,
                                    scales=ANCHOR_SCALES)
    shift = np.arange(0, feature_size) * feature_stride
    shift_x, shift_y = np.meshgrid(shift, shift)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                        shift_y.ravel())).transpose()
    original_anchors = (base_anchors.reshape(
        (1, anchor_number, 4)) + shifts.reshape(
            (1, number_feature_points, 4)).transpose((1, 0, 2)))
    original_anchors = original_anchors.reshape((-1, 4))

    # Proposals by the RPN
    proposals = bbox_transform_inv(original_anchors, anchor_targets)
    proposals = clip_boxes(proposals,
                           (data.shape[0], data.shape[1]))  # clip to image.
    high_to_low_scores = anchor_probs.ravel().argsort()[::-1]  # highest scores
    high_to_low_scores = high_to_low_scores[0:N]
    proposals = proposals[high_to_low_scores, :]
    anchor_probs = anchor_probs[high_to_low_scores]

    del original_anchors
    del RPN_results
    del feature_map
    del padded_fcmap

    return proposals, anchor_probs
Exemplo n.º 5
0
 def bbox_reg(self, boxes, box_deltas, im):
     boxes = boxes.data[:, 1:].numpy()
     box_deltas = box_deltas.data.numpy()
     pred_boxes = bbox_transform_inv(boxes, box_deltas)
     pred_boxes = clip_boxes(pred_boxes, im.size()[-2:])
     return to_var(pred_boxes)
Exemplo n.º 6
0
    def forward(self, input):
    """
    Parameters
    ----------
    input - list contains:
        cls_prob_alls: (BS , H , W , Ax2) outputs of RPN (here - Feature Pyramid Network),
                       prob of bg or fg;
        bbox_pred_alls: (BS , H , W , Ax4), rgs boxes output of RPN;
        im_info: a list of [image_height, image_width, scale_ratios];
        rpn_shapes: width and height of feature map;
    ----------
    Returns
    ----------
    rpn_rois : (1 x H x W x A, 5) e.g. [0, x1, y1, x2, y2]
    # Algorithm:
    #
    # for each (H, W) location i
    # generate A anchor boxes centered on cell i
    # apply predicted bbox deltas at cell i to each of the A anchors
    # clip predicted boxes to image
    # remove predicted boxes with either height or width < threshold
    # sort all (proposal, score) pairs by score from highest to lowest
    # take top pre_nms_topN proposals before NMS
    # apply NMS with threshold 0.7 to remaining proposals
    # take after_nms_topN proposals after NMS
    # return the top proposals (-> RoIs top, scores top)
    """

    scores = input[0][:, :, 1]  # batch_size x num_rois x 1
    bbox_deltas = input[1]      # batch_size x num_rois x 4
    im_info = input[2]

    anchors = torch.from_numpy(generate_anchors_all_pyramids(self.fpn_scales, self.anchor_ratios, 
                feat_shapes, self.feat_strides, self.fpn_anchor_stride)).type_as(scores)
    num_anchors = anchors.size(0)
     
    anchors = anchors.view(1, num_anchors, 4).expand(batch_size, num_anchors, 4)

    # Convert anchors into proposals via bbox transformations
    proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size)

    # 2. clip predicted boxes to image
    proposals = clip_boxes(proposals, im_info, batch_size)
    # keep_idx = self._filter_boxes(proposals, min_size).squeeze().long().nonzero().squeeze()
                
    scores_keep = scores
    proposals_keep = proposals

    _, order = torch.sort(scores_keep, 1, True)

    output = scores.new(batch_size, self.post_nms_topN, 5).zero_()

    for i in range(batch_size):
        # # 3. remove predicted boxes with either height or width < threshold
        # # (NOTE: convert min_size to input image scale stored in im_info[2])
        proposals_single = proposals_keep[i]
        scores_single = scores_keep[i]

        # # 4. sort all (proposal, score) pairs by score from highest to lowest
        # # 5. take top pre_nms_topN (e.g. 6000)
        order_single = order[i]

        if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel():
            order_single = order_single[:pre_nms_topN]

        proposals_single = proposals_single[order_single, :]
        scores_single = scores_single[order_single].view(-1,1)

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)

        keep_idx_i = nms(proposals_single, scores_single, self.rpn_nms_thresh)
        keep_idx_i = keep_idx_i.long().view(-1)

        if self.post_nms_topN > 0:
            keep_idx_i = keep_idx_i[:self.post_nms_topN]
        proposals_single = proposals_single[keep_idx_i, :]
        scores_single = scores_single[keep_idx_i, :]

        # padding 0 at the end.
        num_proposal = proposals_single.size(0)
        output[i,:,0] = i
        output[i,:num_proposal,1:] = proposals_single

    return output

    def backward(self, top, propagate_down, bottom):
        """This layer does not propagate gradients."""
        pass

    def reshape(self, bottom, top):
        """Reshaping happens during the call to forward."""
        pass

    def _filter_boxes(self, boxes, min_size):
        """Remove all boxes with any side smaller than min_size."""
        ws = boxes[:, :, 2] - boxes[:, :, 0] + 1
        hs = boxes[:, :, 3] - boxes[:, :, 1] + 1
        keep = ((ws >= min_size) & (hs >= min_size))
        return keep
def produce_batch(feature_map, gt_boxes, h_w=None, category=None):
    height = np.shape(feature_map)[1]
    width = np.shape(feature_map)[2]
    num_feature_map = width * height

    w_stride = h_w[1] / width
    h_stride = h_w[0] / height
    #base anchors are 9 anchors wrt a tile (0,0,w_stride-1,h_stride-1)
    base_anchors = generate_anchors(w_stride, h_stride)
    shift_x = np.arange(0, width) * w_stride
    shift_y = np.arange(0, height) * h_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                        shift_y.ravel())).transpose()

    all_anchors = (base_anchors.reshape((1, anchors_num, 4)) + shifts.reshape(
        (1, num_feature_map, 4)).transpose((1, 0, 2)))
    total_anchors = num_feature_map * anchors_num
    all_anchors = all_anchors.reshape((total_anchors, 4))
    # 用训练好的rpn进行预测,得出scores和deltas
    res = rpn_model.query_cnn(feature_map)
    scores = res[0]
    scores = scores.reshape(-1, 1)
    deltas = res[1]
    deltas = np.reshape(deltas, (-1, 4))
    # 把dx dy转换成具体的xy值,并把照片以外的anchors去掉
    proposals = bbox_transform_inv(all_anchors, deltas)
    proposals = clip_boxes(proposals, (h_w[0], h_w[1]))
    # remove small boxes
    keep = filter_boxes(proposals,
                        small_box_threshold)  # here threshold is 40 pixel
    proposals = proposals[keep, :]
    scores = scores[keep]

    # sort socres and only keep top 6000.
    pre_nms_topN = 6000
    order = scores.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    proposals = proposals[order, :]
    scores = scores[order]
    # apply NMS to to 6000, and then keep top 300
    post_nms_topN = 300
    keep = py_cpu_nms(np.hstack((proposals, scores)), 0.7)
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    scores = scores[keep]
    # 把ground true也加到proposals中
    proposals = np.vstack((proposals, gt_boxes))
    # calculate overlaps of proposal and gt_boxes
    overlaps = bbox_overlaps(proposals, gt_boxes)
    gt_assignment = overlaps.argmax(axis=1)
    max_overlaps = overlaps.max(axis=1)
    # labels = gt_labels[gt_assignment] #?

    # sub sample
    fg_inds = np.where(max_overlaps >= FG_THRESH)[0]
    fg_rois_per_this_image = min(int(BATCH * FG_FRAC), fg_inds.size)
    # Sample foreground regions without replacement
    if fg_inds.size > 0:
        fg_inds = npr.choice(fg_inds,
                             size=fg_rois_per_this_image,
                             replace=False)
    bg_inds = np.where((max_overlaps < BG_THRESH_HI)
                       & (max_overlaps >= BG_THRESH_LO))[0]
    bg_rois_per_this_image = BATCH - fg_rois_per_this_image
    bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
    # Sample background regions without replacement
    if bg_inds.size > 0:
        bg_inds = npr.choice(bg_inds,
                             size=bg_rois_per_this_image,
                             replace=False)
    # The indices that we're selecting (both fg and bg)
    keep_inds = np.append(fg_inds, bg_inds)
    # Select sampled values from various arrays:
    # labels = labels[keep_inds]
    rois = proposals[keep_inds]
    gt_rois = gt_boxes[gt_assignment[keep_inds]]
    targets = bbox_transform(rois, gt_rois)  #input rois
    rois_num = targets.shape[0]
    batch_box = np.zeros((rois_num, 200, 4))
    for i in range(rois_num):
        batch_box[i, category] = targets[i]
    batch_box = np.reshape(batch_box, (rois_num, -1))
    # get gt category
    batch_categories = np.zeros((rois_num, 200, 1))
    for i in range(rois_num):
        batch_categories[i, category] = 1
    batch_categories = np.reshape(batch_categories, (rois_num, -1))
    return rois, batch_box, batch_categories
Exemplo n.º 8
0
    def get_text_lines_oriented(self, text_proposals, scores, im_size):
        """
        text_proposals:boxes
        
        """
        # tp=text proposal
        tp_groups = self.group_text_proposals(text_proposals, scores,
                                              im_size)  #首先还是建图,获取到文本行由哪几个小框构成

        text_lines = np.zeros((len(tp_groups), 8), np.float32)

        for index, tp_indices in enumerate(tp_groups):
            text_line_boxes = text_proposals[list(tp_indices)]  #每个文本行的全部小框
            X = (text_line_boxes[:, 0] +
                 text_line_boxes[:, 2]) / 2  # 求每一个小框的中心x,y坐标
            Y = (text_line_boxes[:, 1] + text_line_boxes[:, 3]) / 2

            z1 = np.polyfit(X, Y, 1)  #多项式拟合,根据之前求的中心店拟合一条直线(最小二乘)

            x0 = np.min(text_line_boxes[:, 0])  #文本行x坐标最小值
            x1 = np.max(text_line_boxes[:, 2])  #文本行x坐标最大值

            offset = (text_line_boxes[0, 2] -
                      text_line_boxes[0, 0]) * 0.5  #小框宽度的一半

            # 以全部小框的左上角这个点去拟合一条直线,然后计算一下文本行x坐标的极左极右对应的y坐标
            lt_y, rt_y = self.fit_y(text_line_boxes[:, 0], text_line_boxes[:,
                                                                           1],
                                    x0 + offset, x1 - offset)
            # 以全部小框的左下角这个点去拟合一条直线,然后计算一下文本行x坐标的极左极右对应的y坐标
            lb_y, rb_y = self.fit_y(text_line_boxes[:, 0], text_line_boxes[:,
                                                                           3],
                                    x0 + offset, x1 - offset)

            score = scores[list(tp_indices)].sum() / float(
                len(tp_indices))  #求全部小框得分的均值作为文本行的均值

            text_lines[index, 0] = x0
            text_lines[index, 1] = min(lt_y, rt_y)  #文本行上端 线段 的y坐标的小值
            text_lines[index, 2] = x1
            text_lines[index, 3] = max(lb_y, rb_y)  #文本行下端 线段 的y坐标的大值
            text_lines[index, 4] = score  #文本行得分
            text_lines[index, 5] = z1[0]  #根据中心点拟合的直线的k,b
            text_lines[index, 6] = z1[1]
            height = np.mean(
                (text_line_boxes[:, 3] - text_line_boxes[:, 1]))  #小框平均高度
            text_lines[index, 7] = height + 2.5

        text_recs = np.zeros((len(text_lines), 9), np.float32)
        index = 0
        for line in text_lines:
            b1 = line[6] - line[7] / 2  # 根据高度和文本行中心线,求取文本行上下两条线的b值
            b2 = line[6] + line[7] / 2
            x1 = line[0]
            y1 = line[5] * line[0] + b1  # 左上
            x2 = line[2]
            y2 = line[5] * line[2] + b1  # 右上
            x3 = line[0]
            y3 = line[5] * line[0] + b2  # 左下
            x4 = line[2]
            y4 = line[5] * line[2] + b2  # 右下
            disX = x2 - x1
            disY = y2 - y1
            width = np.sqrt(disX * disX + disY * disY)  # 文本行宽度

            fTmp0 = y3 - y1  # 文本行高度
            fTmp1 = fTmp0 * disY / width
            x = np.fabs(fTmp1 * disX / width)  # 做补偿
            y = np.fabs(fTmp1 * disY / width)
            if line[5] < 0:
                x1 -= x
                y1 += y
                x4 += x
                y4 -= y
            else:
                x2 += x
                y2 += y
                x3 -= x
                y3 -= y
            text_recs[index, 0] = x1
            text_recs[index, 1] = y1
            text_recs[index, 2] = x2
            text_recs[index, 3] = y2
            text_recs[index, 4] = x3
            text_recs[index, 5] = y3
            text_recs[index, 6] = x4
            text_recs[index, 7] = y4
            text_recs[index, 8] = line[4]
            index = index + 1

        text_recs = clip_boxes(text_recs, im_size)

        return text_recs
def refine_detections(rois, probs, deltas, window, config):
    # Class IDs per ROI
    class_ids = tf.argmax(probs, axis=1, output_type=tf.int32)
    # Class probability of the top class of each ROI
    indices = tf.stack([tf.range(tf.shape(probs)[0]), class_ids], axis=1)
    class_scores = tf.gather_nd(probs, indices)
    # Class-specific bounding box deltas
    deltas_specific = tf.gather_nd(deltas, indices)
    # Apply bounding box deltas
    refined_rois = utils.apply_bbox_offset(
        rois, deltas_specific * config.BBOX_STD_DEV)
    # Convert coordiates to image domain
    # TODO: better to keep them normalized until later
    height, width = config.IMAGE_SHAPE[:2]
    refined_rois *= tf.constant([height, width, height, width],
                                dtype=tf.float32)
    # Clip boxes to image window
    refined_rois = utils.clip_boxes(refined_rois, window)
    # Round and cast to int since we're deadling with pixels now
    refined_rois = tf.cast(tf.math.rint(refined_rois), tf.int32)

    # TODO: Filter out boxes with zero area

    # Filter out background boxes
    keep = tf.where(class_ids > 0)[:, 0]
    # Filter out low confidence boxes
    if config.DETECTION_MIN_CONFIDENCE:
        conf_keep = tf.where(
            class_scores >= config.DETECTION_MIN_CONFIDENCE)[:, 0]
        #keep = tf.sets.set_intersection(tf.expand_dims(keep, 0),
        #                                tf.expand_dims(conf_keep, 0))
        keep = tf.sets.intersection(tf.expand_dims(keep, 0),
                                    tf.expand_dims(conf_keep, 0))
        #keep = tf.sparse_tensor_to_dense(keep)[0]
        keep = tf.sparse.to_dense(keep)[0]

    # Apply per-class NMS
    # 1. Prepare variables
    pre_nms_class_ids = tf.gather(class_ids, keep)
    pre_nms_scores = tf.gather(class_scores, keep)
    pre_nms_rois = tf.gather(refined_rois, keep)
    unique_pre_nms_class_ids = tf.unique(pre_nms_class_ids)[0]

    def nms_keep_map(class_id):
        """Apply Non-Maximum Suppression on ROIs of the given class."""
        # Indices of ROIs of the given class
        ixs = tf.where(tf.equal(pre_nms_class_ids, class_id))[:, 0]
        # Apply NMS
        class_keep = tf.image.non_max_suppression(
            #tf.to_float(tf.gather(pre_nms_rois, ixs)),
            tf.cast(tf.gather(pre_nms_rois, ixs), tf.float32),
            tf.gather(pre_nms_scores, ixs),
            max_output_size=config.DETECTION_MAX_INSTANCES,
            iou_threshold=config.DETECTION_NMS_THRESHOLD)
        # Map indicies
        class_keep = tf.gather(keep, tf.gather(ixs, class_keep))
        # Pad with -1 so returned tensors have the same shape
        gap = config.DETECTION_MAX_INSTANCES - tf.shape(class_keep)[0]
        class_keep = tf.pad(class_keep, [(0, gap)],
                            mode='CONSTANT',
                            constant_values=-1)
        # Set shape so map_fn() can infer result shape
        class_keep.set_shape([config.DETECTION_MAX_INSTANCES])
        return class_keep

    # 2. Map over class IDs
    nms_keep = tf.map_fn(nms_keep_map,
                         unique_pre_nms_class_ids,
                         dtype=tf.int64)
    # 3. Merge results into one list, and remove -1 padding
    nms_keep = tf.reshape(nms_keep, [-1])
    nms_keep = tf.gather(nms_keep, tf.where(nms_keep > -1)[:, 0])
    # 4. Compute intersection between keep and nms_keep
    #keep = tf.sets.set_intersection(tf.expand_dims(keep, 0),
    #                                tf.expand_dims(nms_keep, 0))
    #keep = tf.sparse_tensor_to_dense(keep)[0]
    keep = tf.sets.intersection(tf.expand_dims(keep, 0),
                                tf.expand_dims(nms_keep, 0))
    keep = tf.sparse.to_dense(keep)[0]
    # Keep top detections
    roi_count = config.DETECTION_MAX_INSTANCES
    class_scores_keep = tf.gather(class_scores, keep)
    num_keep = tf.minimum(tf.shape(class_scores_keep)[0], roi_count)
    top_ids = tf.nn.top_k(class_scores_keep, k=num_keep, sorted=True)[1]
    keep = tf.gather(keep, top_ids)

    # Arrange output as [N, (y1, x1, y2, x2, class_id, score)]
    # Coordinates are in image domain.
    detections = tf.concat(
        [
            #tf.to_float(tf.gather(refined_rois, keep)),
            tf.cast(tf.gather(refined_rois, keep), tf.float32),
            #tf.to_float(tf.gather(class_ids, keep))[..., tf.newaxis],
            tf.cast(tf.gather(class_ids, keep), tf.float32)[..., tf.newaxis],
            tf.gather(class_scores, keep)[..., tf.newaxis]
        ],
        axis=1)

    # Pad with zeros if detections < DETECTION_MAX_INSTANCES
    gap = config.DETECTION_MAX_INSTANCES - tf.shape(detections)[0]
    detections = tf.pad(detections, [(0, gap), (0, 0)], "CONSTANT")
    return detections
Exemplo n.º 10
0
def produce_batch(filepath, gt_boxes, h_w, category):
    img = load_img(filepath)
    img_width = np.shape(img)[1] * scale[1]
    img_height = np.shape(img)[0] * scale[0]
    img = img.resize((int(img_width), int(img_height)))
    #feed image to pretrained model and get feature map
    img = img_to_array(img)
    img = np.expand_dims(img, axis=0)
    feature_map = pretrained_model.predict(img)
    height = np.shape(feature_map)[1]
    width = np.shape(feature_map)[2]
    num_feature_map = width * height
    #calculate output w, h stride
    w_stride = h_w[1] / width
    h_stride = h_w[0] / height
    #generate base anchors according output stride.
    #base anchors are 9 anchors wrt a tile (0,0,w_stride-1,h_stride-1)
    base_anchors = generate_anchors(w_stride, h_stride)
    #slice tiles according to image size and stride.
    #each 1x1x1532 feature map is mapping to a tile.
    shift_x = np.arange(0, width) * w_stride
    shift_y = np.arange(0, height) * h_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                        shift_y.ravel())).transpose()
    #apply base anchors to all tiles, to have a num_feature_map*9 anchors.
    all_anchors = (base_anchors.reshape((1, 9, 4)) + shifts.reshape(
        (1, num_feature_map, 4)).transpose((1, 0, 2)))
    total_anchors = num_feature_map * 9
    all_anchors = all_anchors.reshape((total_anchors, 4))
    # feed feature map to pretrained RPN model, get proposal labels and bboxes.
    res = rpn_model.predict(feature_map)
    scores = res[0]
    scores = scores.reshape(-1, 1)
    deltas = res[1]
    deltas = np.reshape(deltas, (-1, 4))
    # proposals transform to bbox values (x1, y1, x2, y2)
    proposals = bbox_transform_inv(all_anchors, deltas)
    proposals = clip_boxes(proposals, (h_w[0], h_w[1]))
    # remove small boxes, here threshold is 40 pixel
    keep = filter_boxes(proposals, 40)
    proposals = proposals[keep, :]
    scores = scores[keep]

    # sort socres and only keep top 6000.
    pre_nms_topN = 6000
    order = scores.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    proposals = proposals[order, :]
    scores = scores[order]
    # apply NMS to to 6000, and then keep top 300
    post_nms_topN = 300
    keep = py_cpu_nms(np.hstack((proposals, scores)), 0.7)
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    scores = scores[keep]
    # add gt_boxes to proposals.
    proposals = np.vstack((proposals, gt_boxes))
    # calculate overlaps of proposal and gt_boxes
    overlaps = bbox_overlaps(proposals, gt_boxes)
    gt_assignment = overlaps.argmax(axis=1)
    max_overlaps = overlaps.max(axis=1)
    # labels = gt_labels[gt_assignment] #?

    # sub sample
    fg_inds = np.where(max_overlaps >= FG_THRESH)[0]
    fg_rois_per_this_image = min(int(BATCH * FG_FRAC), fg_inds.size)
    # Sample foreground regions without replacement
    if fg_inds.size > 0:
        fg_inds = npr.choice(fg_inds,
                             size=fg_rois_per_this_image,
                             replace=False)
    bg_inds = np.where((max_overlaps < BG_THRESH_HI)
                       & (max_overlaps >= BG_THRESH_LO))[0]
    bg_rois_per_this_image = BATCH - fg_rois_per_this_image
    bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
    # Sample background regions without replacement
    if bg_inds.size > 0:
        bg_inds = npr.choice(bg_inds,
                             size=bg_rois_per_this_image,
                             replace=False)
    # The indices that we're selecting (both fg and bg)
    keep_inds = np.append(fg_inds, bg_inds)
    # Select sampled values from various arrays:
    # labels = labels[keep_inds]
    rois = proposals[keep_inds]
    gt_rois = gt_boxes[gt_assignment[keep_inds]]
    targets = bbox_transform(rois, gt_rois)  #input rois
    rois_num = targets.shape[0]
    batch_box = np.zeros((rois_num, 200, 4))
    for i in range(rois_num):
        batch_box[i, category] = targets[i]
    batch_box = np.reshape(batch_box, (rois_num, -1))
    # get gt category
    batch_categories = np.zeros((rois_num, 200, 1))
    for i in range(rois_num):
        batch_categories[i, category] = 1
    batch_categories = np.reshape(batch_categories, (rois_num, -1))
    return rois, batch_box, batch_categories