Exemplo n.º 1
0
def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image,
                 num_classes, fast_iou_positive_threshld):
    """Generate a random sample of RoIs comprising foreground and background
    examples.

    all_rois shape is [-1, 4]
    gt_boxes shape is [-1, 5]. that is [x1, y1, x2, y2, label]
    """
    # overlaps: (rois x gt_boxes)
    overlaps = bbox_overlaps(
        np.ascontiguousarray(all_rois, dtype=np.float),
        np.ascontiguousarray(gt_boxes[:, :-1], dtype=np.float))
    gt_assignment = overlaps.argmax(axis=1)
    max_overlaps = overlaps.max(axis=1)
    labels = gt_boxes[gt_assignment, -1]

    # Select foreground RoIs as those with >= FG_THRESH overlap
    fg_inds = np.where(max_overlaps >= fast_iou_positive_threshld)[0]

    # Guard against the case when an image has fewer than fg_rois_per_image
    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
    bg_inds = np.where((max_overlaps < fast_iou_positive_threshld) & (
        max_overlaps >= cfgs.FAST_RCNN_IOU_NEGATIVE_THRESHOLD))[0]
    # print("first fileter, fg_size: {} || bg_size: {}".format(fg_inds.shape, bg_inds.shape))
    # Guard against the case when an image has fewer than fg_rois_per_image
    # foreground RoIs
    fg_rois_per_this_image = min(fg_rois_per_image, fg_inds.size)

    # Sample foreground regions without replacement
    if fg_inds.size > 0:
        fg_inds = npr.choice(fg_inds,
                             size=int(fg_rois_per_this_image),
                             replace=False)
    # Compute number of background RoIs to take from this image (guarding
    # against there being fewer than desired)
    bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
    bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
    # Sample background regions without replacement
    if bg_inds.size > 0:
        bg_inds = npr.choice(bg_inds,
                             size=int(bg_rois_per_this_image),
                             replace=False)

    # print("second fileter, fg_size: {} || bg_size: {}".format(fg_inds.shape, bg_inds.shape))
    # The indices that we're selecting (both fg and bg)
    keep_inds = np.append(fg_inds, bg_inds)

    # Select sampled values from various arrays:
    labels = labels[keep_inds]

    # Clamp labels for the background RoIs to 0
    labels[int(fg_rois_per_this_image):] = 0
    rois = all_rois[keep_inds]

    bbox_target_data = _compute_targets(
        rois, gt_boxes[gt_assignment[keep_inds], :-1], labels)
    bbox_targets = \
        _get_bbox_regression_labels(bbox_target_data, num_classes)

    return labels, rois, bbox_targets
def iou_rotate(boxes1, boxes2):
    boxes1_convert = forward_convert(boxes1, False)
    # boxes2_convert = forward_convert(boxes2, False)

    boxes1_h = get_horizen_minAreaRectangle(boxes1_convert)
    # boxes2_h = get_horizen_minAreaRectangle(boxes2_convert)

    iou_h = bbox_overlaps(np.ascontiguousarray(boxes1_h, dtype=np.float),
                          np.ascontiguousarray(boxes2, dtype=np.float))

    # argmax_overlaps_inds = np.argmax(iou_h, axis=1)
    # target_boxes = boxes2[argmax_overlaps_inds]
    #
    # delta_theta = np.abs(boxes1[:, -1] - target_boxes[:, -1])
    # iou_h[delta_theta > 10] = 0
    #
    # argmax_overlaps_inds = np.argmax(iou_h, axis=1)
    # max_overlaps = iou_h[np.arange(iou_h.shape[0]), argmax_overlaps_inds]
    # indices = max_overlaps < 0.7
    # iou_h[indices] = 0

    # boxes1 = boxes1[indices]
    #
    # overlaps = get_iou_matrix(np.ascontiguousarray(boxes1, dtype=np.float32),
    #                           np.ascontiguousarray(boxes2, dtype=np.float32))
    #
    # iou_r = np.zeros_like(iou_h)
    # iou_r[indices] = overlaps

    return iou_h
Exemplo n.º 3
0
def anchor_target_layer(gt_boxes_h, gt_boxes_r, anchors, gpu_id=0):

    anchor_states = np.zeros((anchors.shape[0],))
    labels = np.zeros((anchors.shape[0], cfgs.CLASS_NUM))
    if gt_boxes_r.shape[0]:
        # [N, M]

        if cfgs.METHOD == 'H':
            overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float),
                                     np.ascontiguousarray(gt_boxes_h, dtype=np.float))
        else:
            overlaps = rbbx_overlaps(np.ascontiguousarray(anchors, dtype=np.float32),
                                     np.ascontiguousarray(gt_boxes_r[:, :-1], dtype=np.float32), gpu_id)

            # overlaps = get_iou_matrix(np.ascontiguousarray(anchors, dtype=np.float32),
            #                           np.ascontiguousarray(gt_boxes_r[:, :-1], dtype=np.float32))

        argmax_overlaps_inds = np.argmax(overlaps, axis=1)
        max_overlaps = overlaps[np.arange(overlaps.shape[0]), argmax_overlaps_inds]

        # compute box regression targets
        target_boxes = gt_boxes_r[argmax_overlaps_inds]

        if cfgs.USE_ANGLE_COND:
            if cfgs.METHOD == 'R':
                delta_theta = np.abs(target_boxes[:, -2] - anchors[:, -1])
                theta_indices = delta_theta < 15
                positive_indices = (max_overlaps >= cfgs.IOU_POSITIVE_THRESHOLD) & theta_indices
            else:
                positive_indices = max_overlaps >= cfgs.IOU_POSITIVE_THRESHOLD

            ignore_indices = (max_overlaps > cfgs.IOU_NEGATIVE_THRESHOLD) & (max_overlaps < cfgs.IOU_POSITIVE_THRESHOLD)

        else:
            positive_indices = max_overlaps >= cfgs.IOU_POSITIVE_THRESHOLD
            ignore_indices = (max_overlaps > cfgs.IOU_NEGATIVE_THRESHOLD) & ~positive_indices

        anchor_states[ignore_indices] = -1
        anchor_states[positive_indices] = 1

        # compute target class labels
        labels[positive_indices, target_boxes[positive_indices, -1].astype(int) - 1] = 1
    else:
        # no annotations? then everything is background
        target_boxes = np.zeros((anchors.shape[0], gt_boxes_r.shape[1]))

    if cfgs.METHOD == 'H':
        x_c = (anchors[:, 2] + anchors[:, 0]) / 2
        y_c = (anchors[:, 3] + anchors[:, 1]) / 2
        h = anchors[:, 2] - anchors[:, 0] + 1
        w = anchors[:, 3] - anchors[:, 1] + 1
        theta = -90 * np.ones_like(x_c)
        anchors = np.vstack([x_c, y_c, w, h, theta]).transpose()
    target_delta = bbox_transform.rbbox_transform(ex_rois=anchors, gt_rois=target_boxes)

    return np.array(labels, np.float32), np.array(target_delta, np.float32), \
           np.array(anchor_states, np.float32), np.array(target_boxes, np.float32)
Exemplo n.º 4
0
def wending(new_center, old_cetner, k):

    overlaps = bbox_overlaps(np.ascontiguousarray(new_center, dtype=np.float),
                             np.ascontiguousarray(old_cetner, dtype=np.float))

    dis = []
    for i in range(k):
        dis.append(1 - overlaps[i, i])

    if sum(dis) <= 0.000001:
        return False
    else:
        return True
Exemplo n.º 5
0
def anchor_target_layer(gt_boxes, anchors):
    """
    :param gt_boxes: np.array of shape (M, 5) for (x1, y1, x2, y2, label).
    :param img_shape:
    :param anchors: np.array of annotations of shape (N, 4) for (x1, y1, x2, y2).
    :return:
    """

    anchor_states = np.zeros((anchors.shape[0], ))
    labels = np.zeros((anchors.shape[0], cfgs.CLASS_NUM))
    if gt_boxes.shape[0]:
        # [N, M]
        overlaps = bbox_overlaps(
            np.ascontiguousarray(anchors, dtype=np.float),
            np.ascontiguousarray(gt_boxes, dtype=np.float))

        argmax_overlaps_inds = np.argmax(overlaps, axis=1)
        max_overlaps = overlaps[np.arange(overlaps.shape[0]),
                                argmax_overlaps_inds]

        positive_indices = max_overlaps >= cfgs.IOU_POSITIVE_THRESHOLD
        ignore_indices = (max_overlaps >
                          cfgs.IOU_NEGATIVE_THRESHOLD) & ~positive_indices
        anchor_states[ignore_indices] = -1
        anchor_states[positive_indices] = 1

        # compute box regression targets
        target_boxes = gt_boxes[argmax_overlaps_inds]

        # compute target class labels
        labels[positive_indices,
               target_boxes[positive_indices, 4].astype(int) - 1] = 1
    else:
        # no annotations? then everything is background
        target_boxes = np.zeros((anchors.shape[0], gt_boxes.shape[1]))

    target_delta = bbox_transform.bbox_transform(ex_rois=anchors,
                                                 gt_rois=target_boxes)

    return np.array(labels,
                    np.float32), np.array(target_delta, np.float32), np.array(
                        anchor_states, np.float32)
Exemplo n.º 6
0
def cluster(boxes, k):

    center_id = np.random.choice(np.arange(len(boxes)), k, replace=False)
    new_center_boxes = [boxes[i] for i in center_id]
    old_center_boxes = [np.zeros_like(box) for box in new_center_boxes]

    i = 0
    while wending(new_center_boxes, old_center_boxes, k):
        overlaps = bbox_overlaps(
            np.ascontiguousarray(boxes, dtype=np.float),
            np.ascontiguousarray(new_center_boxes, dtype=np.float))
        argmax_id = np.argmax(overlaps, axis=1)
        for i in range(k):
            cluster_i_box = boxes[argmax_id == i]
            old_center_boxes[i] = new_center_boxes[i]
            new_center_boxes[i] = np.mean(cluster_i_box, axis=0)
        # if i % 1 == 0:
        #     print ("i", i)
        if i > 1000000:
            break
        i += 1

    return new_center_boxes
def anchor_target_layer(gt_boxes,
                        img_shape,
                        all_anchors,
                        is_restrict_bg=False):
    """Same as the anchor target layer in original Fast/er RCNN """

    total_anchors = all_anchors.shape[0]
    img_h, img_w = img_shape[1], img_shape[2]
    gt_boxes = gt_boxes[:, :-1]  # remove class label

    # allow boxes to sit over the edge by a small amount
    _allowed_border = 0

    # only keep anchors inside the image
    inds_inside = np.where(
        (all_anchors[:, 0] >= -_allowed_border)
        & (all_anchors[:, 1] >= -_allowed_border)
        & (all_anchors[:, 2] < img_w + _allowed_border) &  # width
        (all_anchors[:, 3] < img_h + _allowed_border)  # height
    )[0]

    anchors = all_anchors[inds_inside, :]

    # label: 1 is positive, 0 is negative, -1 is dont care
    # 首先将所有的label都定义为 - 1
    # 其label长度为在图像内部的Anchor的数目值
    labels = np.empty((len(inds_inside), ), dtype=np.float32)
    labels.fill(-1)

    # overlaps between the anchors and the gt boxes
    overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float),
                             np.ascontiguousarray(gt_boxes, dtype=np.float))

    # 计算每一行的重叠率最大的值所在的索引,行数则为在图像大小范围内的所有Anchors数目(每一个Anchor与哪一个ground truth框重叠最大
    argmax_overlaps = overlaps.argmax(axis=1)

    #取出与相关的Anchors重叠最大的ground truth的那个值
    max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]

    #计算出每一列的最大值的索引,一共有ground truth目标数目个列(每一个ground truth与哪一个Anchor重叠最大)
    gt_argmax_overlaps = overlaps.argmax(axis=0)

    #取出与ground truth最大重叠的Anchor的重叠率的数值
    gt_max_overlaps = overlaps[gt_argmax_overlaps,
                               np.arange(overlaps.shape[1])]
    gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

    # 如果每一个最大重叠框与其最大的ground truth框的重叠率小于RPN_IOU_NEG 的重叠率,则这个框的label为背景
    if not cfgs.TRAIN_RPN_CLOOBER_POSITIVES:
        labels[max_overlaps < cfgs.RPN_IOU_NEGATIVE_THRESHOLD] = 0

    # 如果每一个ground truth框对应的anchor的重叠率大于RPN_IOU_POS 的重叠率,则这个框的label为目标
    labels[gt_argmax_overlaps] = 1
    # 如果每一个anchor对应的最大重叠框的重叠率大于RPN_POS的重叠率阈值,则也认为其为目标
    labels[max_overlaps >= cfgs.RPN_IOU_POSITIVE_THRESHOLD] = 1

    if cfgs.TRAIN_RPN_CLOOBER_POSITIVES:
        labels[max_overlaps < cfgs.RPN_IOU_NEGATIVE_THRESHOLD] = 0

    # 预先设定的前景的目标数目
    num_fg = int(cfgs.RPN_MINIBATCH_SIZE * cfgs.RPN_POSITIVE_RATE)
    fg_inds = np.where(labels == 1)[0]  # 所有label为1的包含目标的点
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(fg_inds,
                                  size=(len(fg_inds) - num_fg),
                                  replace=False)
        labels[disable_inds] = -1

    # 如果label等于目标的数目大于所预先设定的目标数目的值,就随机的将部分label设定为-1,不参与计算
    num_bg = cfgs.RPN_MINIBATCH_SIZE - np.sum(labels == 1)
    if is_restrict_bg:
        num_bg = max(num_bg, num_fg * 1.5)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(bg_inds,
                                  size=(len(bg_inds) - num_bg),
                                  replace=False)
        labels[disable_inds] = -1
    # 如果背景的label数目大于所设定的背景数目,则将部分的背景标签设置为 - 1,不参与计算。
    # 如果小于,则不做任何改变,保留所有背景的相关标签为0

    bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])

    # 这一块输入的参数为所有的Anchors以及与每一个anchor对应的重叠率最大的那个ground truth目标框所对应的坐标
    # bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])
    # 其返回值为每一个在图像内的anchor与其对应的具有最大重叠率的ground truth框之间的映射关系,也就是对其进行编码的过程
    #
    #
    # 因为一直在计算中都是针对于所有在图像内的框进行运算,并没有考虑到在图像外的框,但是在最终的计算中,针对的是所有的anchor,
    # 因此需要将处理过的与原始的进行融合
    # map up to original set of anchors
    labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)

    # labels = labels.reshape((1, height, width, A))
    rpn_labels = labels.reshape((-1, 1))

    # bbox_targets
    bbox_targets = bbox_targets.reshape((-1, 4))
    rpn_bbox_targets = bbox_targets

    # 最后返回的为编码后的label,以及映射因子矩阵
    return rpn_labels, rpn_bbox_targets
Exemplo n.º 8
0
def anchor_target_layer(gt_boxes, img_shape, all_anchors, is_restrict=False):
    """
    get target anchor the same as Fast/er RCNN
    :param gt_boxes:
    :param img_shape:
    :param all_anchors:
    :param is_restrict:
    :return:
    """
    anchors_num = all_anchors.shape[0]
    img_height, img_width = img_shape[1], img_shape[2]
    gt_boxes = gt_boxes[:, :-1]  # remove class label

    # the number of a small amount boxes allow  to sit over the edge
    allow_border = 0

    # only keep anchors inside the image
    indices_inside = np.where(
        (all_anchors[:, 0] >= -allow_border) &  # left_up_x
        (all_anchors[:, 1] >= -allow_border) &  # left_up_y
        (all_anchors[:, 2] < img_width + allow_border) &  # right_down_x
        (all_anchors[:, 3] < img_height + allow_border)  # right_down_y
    )[0]

    anchors = all_anchors[indices_inside, :]

    # label: 1 -> positive, 0 -> negative, -1 -> dont care
    labels = np.empty((len(indices_inside), ), dtype=np.float32)
    labels.fill(-1)

    # overlaps between the anchors and the gtbox
    overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float),
                             np.ascontiguousarray(gt_boxes, dtype=np.float))

    argmax_overlaps = overlaps.argmax(axis=1)
    max_overlaps = overlaps[np.arange(len(indices_inside)), argmax_overlaps]
    gt_argmax_overlaps = overlaps.argmax(axis=0)
    gt_max_overlaps = overlaps[gt_argmax_overlaps,
                               np.arange(overlaps.shape[1])]
    gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

    if not cfgs.TRAIN_RPN_CLOOBER_POSITIVES:
        labels[max_overlaps < cfgs.RPN_IOU_NEGATIVE_THRESHOLD] = 0

    labels[gt_argmax_overlaps] = 1
    labels[max_overlaps >= cfgs.RPN_IOU_POSITIVE_THRESHOLD] = 1

    if cfgs.TRAIN_RPN_CLOOBER_POSITIVES:
        labels[max_overlaps < cfgs.RPN_IOU_NEGATIVE_THRESHOLD] = 0
    # reference paper(Faster RCNN) balance positive and negative ratio

    # num foreground of RPN
    num_fg = int(cfgs.RPN_MINIBATCH_SIZE * cfgs.RPN_POSITIVE_RATE)
    fg_indices = np.where(labels == 1)[0]
    if len(fg_indices) > num_fg:
        disable_indices = np.random.choice(fg_indices,
                                           size=(len(fg_indices) - num_fg),
                                           replace=False)
        labels[disable_indices] = -1

    # num backgound of RPN
    num_bg = cfgs.RPN_MINIBATCH_SIZE - np.sum(labels == 1)
    if is_restrict:
        num_bg = max(num_bg, num_fg * 1.5)

    bg_indices = np.where(labels == 0)[0]
    if len(bg_indices) > num_bg:
        disable_indices = np.random.choice(bg_indices,
                                           size=(len(bg_indices) - num_bg),
                                           replace=False)
        labels[disable_indices] = -1

    bbox_targets = compute_targets(anchors, gt_boxes[argmax_overlaps, :])

    # map up to original set of anchors
    labels = unmap_anchor(labels, anchors_num, indices_inside, fill=-1)
    bbox_targets = unmap_anchor(bbox_targets,
                                anchors_num,
                                indices_inside,
                                fill=0)

    rpn_labels = labels.reshape((-1, 1))

    bbox_targets = bbox_targets.reshape((-1, 4))
    rpn_bbox_targets = bbox_targets

    return rpn_labels, rpn_bbox_targets
def anchor_target_layer(gt_boxes,
                        img_shape,
                        all_anchors,
                        is_restrict_bg=False):
    """Same as the anchor target layer in original Fast/er RCNN """

    total_anchors = all_anchors.shape[0]
    img_h, img_w = img_shape[1], img_shape[2]
    gt_boxes = gt_boxes[:, :-1]  # remove class label

    # allow boxes to sit over the edge by a small amount
    _allowed_border = 0

    # only keep anchors inside the image
    inds_inside = np.where(
        (all_anchors[:, 0] >= -_allowed_border)
        & (all_anchors[:, 1] >= -_allowed_border)
        & (all_anchors[:, 2] < img_w + _allowed_border) &  # width
        (all_anchors[:, 3] < img_h + _allowed_border)  # height
    )[0]

    anchors = all_anchors[inds_inside, :]

    # label: 1 is positive, 0 is negative, -1 is dont care
    labels = np.empty((len(inds_inside), ), dtype=np.float32)
    labels.fill(-1)

    # overlaps between the anchors and the gt boxes
    overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float),
                             np.ascontiguousarray(gt_boxes, dtype=np.float))

    argmax_overlaps = overlaps.argmax(axis=1)
    max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
    gt_argmax_overlaps = overlaps.argmax(axis=0)
    gt_max_overlaps = overlaps[gt_argmax_overlaps,
                               np.arange(overlaps.shape[1])]
    gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

    if not cfgs.TRAIN_RPN_CLOOBER_POSITIVES:
        labels[max_overlaps < cfgs.RPN_IOU_NEGATIVE_THRESHOLD] = 0

    labels[gt_argmax_overlaps] = 1
    labels[max_overlaps >= cfgs.RPN_IOU_POSITIVE_THRESHOLD] = 1

    if cfgs.TRAIN_RPN_CLOOBER_POSITIVES:
        labels[max_overlaps < cfgs.RPN_IOU_NEGATIVE_THRESHOLD] = 0

    num_fg = int(cfgs.RPN_MINIBATCH_SIZE * cfgs.RPN_POSITIVE_RATE)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(fg_inds,
                                  size=(len(fg_inds) - num_fg),
                                  replace=False)
        labels[disable_inds] = -1

    num_bg = cfgs.RPN_MINIBATCH_SIZE - np.sum(labels == 1)
    if is_restrict_bg:
        num_bg = max(num_bg, num_fg * 1.5)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(bg_inds,
                                  size=(len(bg_inds) - num_bg),
                                  replace=False)
        labels[disable_inds] = -1

    bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])

    # map up to original set of anchors
    labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)

    # labels = labels.reshape((1, height, width, A))
    rpn_labels = labels.reshape((-1, 1))

    # bbox_targets
    bbox_targets = bbox_targets.reshape((-1, 4))
    rpn_bbox_targets = bbox_targets

    return rpn_labels, rpn_bbox_targets
Exemplo n.º 10
0
def anchor_target_layer(gt_boxes_h_batch,
                        gt_boxes_r_batch,
                        gt_encode_label_batch,
                        anchor_batch,
                        gpu_id=0):

    all_labels, all_target_delta, all_anchor_states, all_target_boxes, all_target_encode_label = [], [], [], [], []
    for i in range(cfgs.BATCH_SIZE):
        anchors = np.array(anchor_batch[i], np.float32)
        gt_boxes_h = gt_boxes_h_batch[i, :, :]
        gt_boxes_r = gt_boxes_r_batch[i, :, :]
        gt_encode_label = gt_encode_label_batch[i, :, :]
        anchor_states = np.zeros((anchors.shape[0], ))
        labels = np.zeros((anchors.shape[0], cfgs.CLASS_NUM))
        if gt_boxes_r.shape[0]:
            # [N, M]

            if cfgs.METHOD == 'H':
                overlaps = bbox_overlaps(
                    np.ascontiguousarray(anchors, dtype=np.float),
                    np.ascontiguousarray(gt_boxes_h, dtype=np.float))
            else:
                overlaps = rbbx_overlaps(
                    np.ascontiguousarray(anchors, dtype=np.float32),
                    np.ascontiguousarray(gt_boxes_r[:, :-1], dtype=np.float32),
                    gpu_id)

            argmax_overlaps_inds = np.argmax(overlaps, axis=1)
            max_overlaps = overlaps[np.arange(overlaps.shape[0]),
                                    argmax_overlaps_inds]

            # compute box regression targets
            target_boxes = gt_boxes_r[argmax_overlaps_inds]
            target_encode_label = gt_encode_label[argmax_overlaps_inds]

            positive_indices = max_overlaps >= cfgs.IOU_POSITIVE_THRESHOLD
            ignore_indices = (max_overlaps >
                              cfgs.IOU_NEGATIVE_THRESHOLD) & ~positive_indices

            anchor_states[ignore_indices] = -1
            anchor_states[positive_indices] = 1

            # compute target class labels
            labels[positive_indices,
                   target_boxes[positive_indices, -1].astype(int) - 1] = 1
        else:
            # no annotations? then everything is background
            target_boxes = np.zeros((anchors.shape[0], gt_boxes_r.shape[1]))
            target_encode_label = np.zeros(
                (anchors.shape[0], gt_encode_label.shape[1]))

        if cfgs.METHOD == 'H':
            x_c = (anchors[:, 2] + anchors[:, 0]) / 2
            y_c = (anchors[:, 3] + anchors[:, 1]) / 2
            h = anchors[:, 2] - anchors[:, 0] + 1
            w = anchors[:, 3] - anchors[:, 1] + 1
            theta = -90 * np.ones_like(x_c)
            anchors = np.vstack([x_c, y_c, w, h, theta]).transpose()

        if cfgs.ANGLE_RANGE == 180:
            anchors = coordinate_present_convert(anchors, mode=-1)
            target_boxes = coordinate_present_convert(target_boxes, mode=-1)
        target_delta = bbox_transform.rbbox_transform(ex_rois=anchors,
                                                      gt_rois=target_boxes)

        all_labels.append(labels)
        all_target_delta.append(target_delta)
        all_anchor_states.append(anchor_states)
        all_target_boxes.append(target_boxes)
        all_target_encode_label.append(target_encode_label)

    return np.array(all_labels, np.float32), np.array(all_target_delta, np.float32)[:, :, :-1], \
           np.array(all_anchor_states, np.float32), np.array(all_target_boxes, np.float32), \
           np.array(all_target_encode_label, np.float32)
Exemplo n.º 11
0
    def evaluate_recall(self,
                        candidate_boxes=None,
                        thresholds=None,
                        area='all',
                        limit=None):
        """Evaluate detection proposal recall metrics.

        Returns:
            results: dictionary of results with keys
                'ar': average recall
                'recalls': vector recalls at each IoU overlap threshold
                'thresholds': vector of IoU overlap thresholds
                'gt_overlaps': vector of all ground-truth overlaps
        """
        # Record max overlap value for each gt box
        # Return vector of overlap values
        areas = {
            'all': 0,
            'small': 1,
            'medium': 2,
            'large': 3,
            '96-128': 4,
            '128-256': 5,
            '256-512': 6,
            '512-inf': 7
        }
        area_ranges = [
            [0**2, 1e5**2],  # all
            [0**2, 32**2],  # small
            [32**2, 96**2],  # medium
            [96**2, 1e5**2],  # large
            [96**2, 128**2],  # 96-128
            [128**2, 256**2],  # 128-256
            [256**2, 512**2],  # 256-512
            [512**2, 1e5**2],  # 512-inf
        ]
        assert areas.has_key(area), 'unknown area range: {}'.format(area)
        area_range = area_ranges[areas[area]]
        gt_overlaps = np.zeros(0)
        num_pos = 0
        for i in xrange(self.num_images):
            # Checking for max_overlaps == 1 avoids including crowd annotations
            # (...pretty hacking :/)
            max_gt_overlaps = self.roidb[i]['gt_overlaps'].toarray().max(
                axis=1)
            gt_inds = np.where((self.roidb[i]['gt_classes'] > 0)
                               & (max_gt_overlaps == 1))[0]
            gt_boxes = self.roidb[i]['boxes'][gt_inds, :]
            gt_areas = self.roidb[i]['seg_areas'][gt_inds]
            valid_gt_inds = np.where((gt_areas >= area_range[0])
                                     & (gt_areas <= area_range[1]))[0]
            gt_boxes = gt_boxes[valid_gt_inds, :]
            num_pos += len(valid_gt_inds)

            if candidate_boxes is None:
                # If candidate_boxes is not supplied, the default is to use the
                # non-ground-truth boxes from this roidb
                non_gt_inds = np.where(self.roidb[i]['gt_classes'] == 0)[0]
                boxes = self.roidb[i]['boxes'][non_gt_inds, :]
            else:
                boxes = candidate_boxes[i]
            if boxes.shape[0] == 0:
                continue
            if limit is not None and boxes.shape[0] > limit:
                boxes = boxes[:limit, :]

            overlaps = bbox_overlaps(boxes.astype(np.float),
                                     gt_boxes.astype(np.float))

            _gt_overlaps = np.zeros((gt_boxes.shape[0]))
            for j in xrange(gt_boxes.shape[0]):
                # find which proposal box maximally covers each gt box
                argmax_overlaps = overlaps.argmax(axis=0)
                # and get the iou amount of coverage for each gt box
                max_overlaps = overlaps.max(axis=0)
                # find which gt box is 'best' covered (i.e. 'best' = most iou)
                gt_ind = max_overlaps.argmax()
                gt_ovr = max_overlaps.max()
                assert (gt_ovr >= 0)
                # find the proposal box that covers the best covered gt box
                box_ind = argmax_overlaps[gt_ind]
                # record the iou coverage of this gt box
                _gt_overlaps[j] = overlaps[box_ind, gt_ind]
                assert (_gt_overlaps[j] == gt_ovr)
                # mark the proposal box and the gt box as used
                overlaps[box_ind, :] = -1
                overlaps[:, gt_ind] = -1
            # append recorded iou coverage level
            gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps))

        gt_overlaps = np.sort(gt_overlaps)
        if thresholds is None:
            step = 0.05
            thresholds = np.arange(0.5, 0.95 + 1e-5, step)
        recalls = np.zeros_like(thresholds)
        # compute recall for each iou threshold
        for i, t in enumerate(thresholds):
            recalls[i] = (gt_overlaps >= t).sum() / float(num_pos)
        # ar = 2 * np.trapz(recalls, thresholds)
        ar = recalls.mean()
        return {
            'ar': ar,
            'recalls': recalls,
            'thresholds': thresholds,
            'gt_overlaps': gt_overlaps
        }
def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image,
                 num_classes):
    """Generate a random sample of RoIs comprising foreground and background
    examples.

    all_rois shape is [-1, 4]
    gt_boxes shape is [-1, 5]. that is [x1, y1, x2, y2, label]
    """
    # overlaps: (rois x gt_boxes)

    # clw note:计算所有的RPN产生的ROI与所有的ground truth的目标框的重叠率
    overlaps = bbox_overlaps(
        np.ascontiguousarray(all_rois, dtype=np.float),
        np.ascontiguousarray(gt_boxes[:, :-1], dtype=np.float))

    # 得到与每一个roi最大重叠的gt_box 的框的索引 以及 重叠率
    gt_assignment = overlaps.argmax(axis=1)
    max_overlaps = overlaps.max(axis=1)
    # 获得相对应的类别标签
    labels = gt_boxes[gt_assignment, -1]

    # Select foreground RoIs as those with >= FG_THRESH overlap
    fg_inds = np.where(
        max_overlaps >= cfgs.FAST_RCNN_IOU_POSITIVE_THRESHOLD)[0]

    # Guard against the case when an image has fewer than fg_rois_per_image
    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
    bg_inds = np.where(
        (max_overlaps < cfgs.FAST_RCNN_IOU_POSITIVE_THRESHOLD)
        & (max_overlaps >= cfgs.FAST_RCNN_IOU_NEGATIVE_THRESHOLD))[0]
    # print("first fileter, fg_size: {} || bg_size: {}".format(fg_inds.shape, bg_inds.shape))
    # Guard against the case when an image has fewer than fg_rois_per_image
    # foreground RoIs
    fg_rois_per_this_image = min(fg_rois_per_image, fg_inds.size)
    # 以最小的 fg_size 作为fg_rois_per_this_image

    # Sample foreground regions without replacement
    if fg_inds.size > 0:  # 如果有目标
        fg_inds = npr.choice(fg_inds,
                             size=int(fg_rois_per_this_image),
                             replace=False)
    # Compute number of background RoIs to take from this image (guarding
    # against there being fewer than desired)
    bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
    bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
    # Sample background regions without replacement
    if bg_inds.size > 0:
        bg_inds = npr.choice(bg_inds,
                             size=int(bg_rois_per_this_image),
                             replace=False)

    # print("second fileter, fg_size: {} || bg_size: {}".format(fg_inds.shape, bg_inds.shape))
    # The indices that we're selecting (both fg and bg)
    keep_inds = np.append(fg_inds, bg_inds)
    #  选择出来的fg以及bg是在相关的阈值基础上得到的,bg的选取有一个最低的阈值

    # Select sampled values from various arrays:
    labels = labels[keep_inds]

    # Clamp labels for the background RoIs to 0
    labels[int(fg_rois_per_this_image):] = 0
    rois = all_rois[keep_inds]

    # 计算bbox目标数据,输入都是对应的keep_inds所对应的roi,gt_box,labels
    bbox_target_data = _compute_targets(
        rois, gt_boxes[gt_assignment[keep_inds], :-1], labels)
    # 其返回值为 roi与gt_box 之间映射的因子矩阵以及对应的类别信息,
    # 下面的函数将为每一个非background的类写入相关的四个坐标因此t,
    # 这里,由于num_classes是从tf-record 中直接得到的,因此类数量是包含background的,因此比真实的要多出一类
    bbox_targets = _get_bbox_regression_labels(bbox_target_data, num_classes)

    # 返回值后期计算的labels(这里为具体的类),rois为要保留的roi,bbox_targets 为每一个具体的类
    # (一共的NUM_CLASS个类,每一个类对应四个坐标点)对应的坐标映射矩阵
    return labels, rois, bbox_targets
Exemplo n.º 13
0
def sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image,
                num_classes):
    """
    Generate a random sample of RoIs comprising foreground and background examples.
    :param all_rois: rois shape is [-1, 4]
    :param gt_boxes: gt_boxes shape is [-1, 5]. that is [x1, y1, x2, y2, label]
    :param fg_rois_per_image:
    :param rois_per_image:
    :param num_classes: object_classes + 1(background)
    :return:
    """
    # overlaps rois gt_boxes
    overlaps = bbox_overlaps(
        np.ascontiguousarray(all_rois, dtype=np.float),
        np.ascontiguousarray(gt_boxes[:, :-1], dtype=np.float))
    gt_assignment = overlaps.argmax(axis=1)
    max_overlaps = overlaps.max(axis=1)
    labels = gt_boxes[gt_assignment, -1]

    # Select foreground RoIs as those with >= FG_THRESH overlap
    fg_indices = np.where(
        max_overlaps >= cfgs.FAST_RCNN_IOU_POSITIVE_THRESHOLD)[0]

    # Guard against the case when an image has fewer than fg_rois_per_image
    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
    bg_indices = np.where(
        (max_overlaps < cfgs.FAST_RCNN_IOU_POSITIVE_THRESHOLD)
        & (max_overlaps >= cfgs.FAST_RCNN_IOU_NEGATIVE_THRESHOLD))[0]

    fg_rois_per_this_image = min(fg_rois_per_image, fg_indices.size)

    # Sample foreground regions without replacement
    if fg_indices.size > 0:
        fg_indices = np.random.choice(fg_indices,
                                      size=int(fg_rois_per_this_image),
                                      replace=False)
    # Compute number of background RoIs to take from this image (guarding
    # against there being fewer than desired)
    bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
    bg_rois_per_this_image = min(bg_rois_per_this_image, bg_indices.size)
    # Sample background regions without replacement
    if bg_indices.size > 0:
        bg_indices = np.random.choice(bg_indices,
                                      size=int(bg_rois_per_this_image),
                                      replace=False)

    # The indices that we're selecting (both fg and bg)
    keep_inds = np.append(fg_indices, bg_indices)

    # Select sampled values from various arrays:
    labels = labels[keep_inds]

    # Clamp labels for the background RoIs to 0
    # positive -> 1 , negative -> 0
    labels[int(fg_rois_per_this_image
               ):] = 0  # [i+1 for i in range(cfgs.CLASS_NUM)] + [0]
    rois = all_rois[keep_inds]

    bbox_target_data = compute_targets(
        ex_rois=rois,
        gt_rois=gt_boxes[gt_assignment[keep_inds], :-1],  # bbox
        labels=labels)  # labels
    bbox_targets = get_bbox_regression_labels(
        bbox_target_data, num_classes)  # (rois.shape[0], num_classes)

    return labels, rois, bbox_targets