def im_detect(sess, net, im):
    # 在这里应用的是一种比例,600
    blobs, im_scales = _get_blobs(im)
    assert len(im_scales) == 1, "Only single-image batch implemented"

    im_blob = blobs['data']
    # seems to have height, width, and image scales
    # still not sure about the scale, maybe full image it is 1.
    blobs['im_info'] = np.array(
        [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32)

    _, scores, bbox_pred, rois = net.test_image(sess, blobs['data'],
                                                blobs['im_info'])

    boxes = rois[:, 1:5] / im_scales[0]
    # print(scores.shape, bbox_pred.shape, rois.shape, boxes.shape)
    scores = np.reshape(scores, [scores.shape[0], -1])
    bbox_pred = np.reshape(bbox_pred, [bbox_pred.shape[0], -1])
    # tf.app.flags.DEFINE_boolean('test_bbox_reg', True, "Test using bounding-box regressors")
    if cfg.FLAGS.test_bbox_reg:
        # Apply bounding-box regression deltas
        # box_deltas这里是预测的坐标偏移量
        box_deltas = bbox_pred
        # pred_boxes这个是修正过后的Bbox的位置坐标,并且对于预测的每一个类别,都有一个预测的Bbox坐标
        pred_boxes = bbox_transform_inv(boxes, box_deltas)
        pred_boxes = _clip_boxes(pred_boxes, im.shape)
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    return scores, pred_boxes
Exemplo n.º 2
0
def demo(sess, net, image_name, memory_storex, memory_storey,
         kitti_memory_0323, AN, sess2):
    """Detect object classes in an image using pre-computed object proposals."""

    # Load the demo image
    im = cv2.imread(image_name)
    im = cv2.resize(im, (1242, 375))
    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, bbox_pred, _, rois, fc = im_detect(sess, net, im, memory_storex,
                                               memory_storey)
    timer.toc()
    print('Detection took {:.3f}s for {:d} object proposals'.format(
        timer.total_time, bbox_pred.shape[0]))
    # Visualize detections for each class
    CONF_THRESH = 0.1
    NMS_THRESH = 0.1
    im_shape = im.shape[:2]
    box_deltas = bbox_pred
    pred_boxes = bbox_transform_inv(rois, box_deltas)
    boxes = clip_boxes(pred_boxes, im_shape)

    # show.vis_detections(image_name, scores, boxes, dis_pre, fc, NMS_THRESH, CONF_THRESH)
    show.vis_detections(image_name, scores, boxes, fc, kitti_memory_0323, AN,
                        sess2, NMS_THRESH, CONF_THRESH)
Exemplo n.º 3
0
def im_detect(sess, net, im):
    # 对图片进行缩放处理
    blobs, im_scales = _get_blobs(im)
    assert len(im_scales) == 1, "Only single-image batch implemented"

    # 封装网络需要的输入参数
    im_blob = blobs['data']
    blobs['im_info'] = np.array(
        [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32)
    # 开始测试,如果采用nms产生rois,则大概有300个rois
    _, scores, bbox_pred, rois = net.test_image(sess, blobs['data'],
                                                blobs['im_info'])
    # 将预测框复原到原图大小,300*5,第一列全是0
    boxes = rois[:, 1:5] / im_scales[0]
    # 300*21
    scores = np.reshape(scores, [scores.shape[0], -1])
    # 300*84
    bbox_pred = np.reshape(bbox_pred, [bbox_pred.shape[0], -1])

    if cfg.FLAGS.test_bbox_reg:  # ture
        # 原图上的boxes进行根据偏移进行修正,然后修剪超出边界
        box_deltas = bbox_pred
        pred_boxes = bbox_transform_inv(boxes, box_deltas)
        pred_boxes = _clip_boxes(pred_boxes, im.shape)
    else:
        # 如果不修正,则简单的将boxes重复对应每一个类
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    return scores, pred_boxes
Exemplo n.º 4
0
def im_detect(sess, net, im):
    # 获取检测图片
    blobs, im_scales = _get_blobs(im)
    assert len(im_scales) == 1, "Only single-image batch implemented"

    im_blob = blobs['data']
    # seems to have height, width, and image scales
    # still not sure about the scale, maybe full image it is 1.
    blobs['im_info'] = np.array(
        [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32)

    _, scores, bbox_pred, rois = net.test_image(sess, blobs['data'],
                                                blobs['im_info'])

    print('scores:', scores)
    print('bbox_pred:', bbox_pred)
    print('rois:', rois)
    boxes = rois[:, 1:5] / im_scales[0]
    print(scores.shape, bbox_pred.shape, rois.shape, boxes.shape)
    scores = np.reshape(scores, [scores.shape[0], -1])
    bbox_pred = np.reshape(bbox_pred, [bbox_pred.shape[0], -1])
    if cfg.FLAGS.test_bbox_reg:
        # Apply bounding-box regression deltas
        box_deltas = bbox_pred
        pred_boxes = bbox_transform_inv(boxes, box_deltas)
        pred_boxes = _clip_boxes(pred_boxes, im.shape)
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    return scores, pred_boxes
Exemplo n.º 5
0
def im_detect(sess, net, im):
    blobs, im_scales = _get_blobs(im)
    assert len(im_scales) == 1, "Only single-image batch implemented"

    im_blob = blobs['data']
    # seems to have height, width, and image scales
    # still not sure about the scale, maybe full image it is 1.
    blobs['im_info'] = np.array(
        [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32)
    #分数,预测bbox位移,感兴趣区域
    _, scores, bbox_pred, rois = net.test_image(sess, blobs['data'],
                                                blobs['im_info'])

    boxes = rois[:, 1:5] / im_scales[0]
    # print(scores.shape, bbox_pred.shape, rois.shape, boxes.shape)
    scores = np.reshape(scores, [scores.shape[0], -1])
    bbox_pred = np.reshape(bbox_pred, [bbox_pred.shape[0], -1])
    #test_bbox_reg', True, "Test using bounding-box regressors")
    if cfg.FLAGS.test_bbox_reg:
        # Apply bounding-box regression deltas
        box_deltas = bbox_pred
        ##计算经过偏移后的预测坐标
        pred_boxes = bbox_transform_inv(boxes, box_deltas)
        #将预测框剪切到图像范围内
        pred_boxes = _clip_boxes(pred_boxes, im.shape)
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    return scores, pred_boxes
Exemplo n.º 6
0
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride,
                   anchors, num_anchors):
    """A simplified version compared to fast/er RCNN
       For details please see the technical report
    """
    if type(cfg_key) == bytes:
        cfg_key = cfg_key.decode('utf-8')

    if cfg_key == "TRAIN":
        pre_nms_topN = cfg.FLAGS.rpn_train_pre_nms_top_n
        post_nms_topN = cfg.FLAGS.rpn_train_post_nms_top_n
        nms_thresh = cfg.FLAGS.rpn_train_nms_thresh
    else:
        pre_nms_topN = cfg.FLAGS.rpn_test_pre_nms_top_n
        post_nms_topN = cfg.FLAGS.rpn_test_post_nms_top_n
        nms_thresh = cfg.FLAGS.rpn_test_nms_thresh
    # 从config文件读取配置
    # post_nms_topN(执行NMS算法后proposal的数量)
    # nms_thresh(NMS阈值)

    # 学习参数:rpn_bbox_pred
    # 原始anchor给出的proposal通过学习参数转换为与gt接近的边框,裁剪掉超出图片的部分

    im_info = im_info[0]
    # Get the scores and bounding boxes
    scores = rpn_cls_prob[:, :, :, num_anchors:]
    rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4))
    scores = scores.reshape((-1, 1))
    proposals = bbox_transform_inv(anchors, rpn_bbox_pred)
    # bbox_transform_inv:
    # 每个anchor的边框学习之前得到的偏移量(这里的偏移量就是需要学习的rpn_bbox_pred)做位移和缩放,获取最终的预测边框
    # 也就是原始proposal A,通过学习rpn_bbox_pred中的参数,得到一个与ground truth G相近的预测边框G'
    proposals = clip_boxes(proposals, im_info[:2])
    # clip_boxes:
    # 裁剪掉超出原始图片边框的部分

    # Pick the top region proposals
    order = scores.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    proposals = proposals[order, :]
    scores = scores[order]

    # Non-maximal suppression
    keep = nms(np.hstack((proposals, scores)), nms_thresh)

    # Pick th top region proposals after NMS
    # 执行NMS算法,获取最终的proposals
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    scores = scores[keep]

    # Only support single image as input
    batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
    blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))

    return blob, scores
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride,
                   anchors, num_anchors):
    """A simplified version compared to fast/er RCNN
       For details please see the technical report
    """
    if type(cfg_key) == bytes:
        cfg_key = cfg_key.decode('utf-8')

    if cfg_key == "TRAIN":
        # 预处理,非极大值抑制前,保留12000个bbox
        pre_nms_topN = cfg.FLAGS.rpn_train_pre_nms_top_n
        # 非极大值抑制后,选取2000个bbox
        post_nms_topN = cfg.FLAGS.rpn_train_post_nms_top_n
        # 非极大值抑制阈值0.7,与最大概率的bbox IoU超过0.7的bbox会被丢弃
        nms_thresh = cfg.FLAGS.rpn_train_nms_thresh
    else:
        # 预处理,非极大值抑制前,6000个bbox
        pre_nms_topN = cfg.FLAGS.rpn_test_pre_nms_top_n
        # 非极大值抑制后,选取300个bbox
        post_nms_topN = cfg.FLAGS.rpn_test_post_nms_top_n
        # nms阈值0.7
        nms_thresh = cfg.FLAGS.rpn_test_nms_thresh

    im_info = im_info[0]
    # Get the scores and bounding boxes
    scores = rpn_cls_prob[:, :, :, num_anchors:]
    rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4))
    scores = scores.reshape((-1, 1))
    proposals = bbox_transform_inv(anchors, rpn_bbox_pred)
    proposals = clip_boxes(proposals, im_info[:2])

    # Pick the top region proposals
    order = scores.ravel().argsort()[::-1]
    # 按照分类得分,取前pre_nms_topN个bbox(12000/6000)
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    proposals = proposals[order, :]
    scores = scores[order]

    # 做nms
    # Non-maximal suppression
    keep = nms(np.hstack((proposals, scores)), nms_thresh)

    # nms后保留post_nms_topN个bbox(2000/300)
    # Pick th top region proposals after NMS
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    scores = scores[keep]

    # Only support single image as input
    batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
    blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))

    # 返回bbox以及分类得分
    return blob, scores
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride,
                   anchors, num_anchors):
    """A simplified version compared to fast/er RCNN
       For details please see the technical report
    """
    if type(cfg_key) == bytes:
        cfg_key = cfg_key.decode('utf-8')

    # train时,根据得分顺序,取前12000个anchor,再NMS,最后取前面2000个
    # test时,变为6000和300
    if cfg_key == "TRAIN":
        pre_nms_topN = cfg.FLAGS.rpn_train_pre_nms_top_n
        post_nms_topN = cfg.FLAGS.rpn_train_post_nms_top_n
        nms_thresh = cfg.FLAGS.rpn_train_nms_thresh
    else:
        pre_nms_topN = cfg.FLAGS.rpn_test_pre_nms_top_n
        post_nms_topN = cfg.FLAGS.rpn_test_post_nms_top_n
        nms_thresh = cfg.FLAGS.rpn_test_nms_thresh

    im_info = im_info[0]
    # Get the scores and bounding boxes
    scores = rpn_cls_prob[:, :, :, num_anchors:]
    rpn_bbox_pred = rpn_bbox_pred.reshape(
        (-1, 4))  #每一行对应于一个anchor,每9行对应于一个点产生的9个anchor
    scores = scores.reshape((-1, 1))  #每一行对应于一个anchor,每9行对应于一个点产生的9个anchor

    # 使用经过rpn网络层后生成的rpn_box_prob把anchor位置进行第一次修正
    proposals = bbox_transform_inv(anchors, rpn_bbox_pred)
    # Clip boxes to image boundaries.
    proposals = clip_boxes(proposals, im_info[:2])

    # Pick the top region proposals
    # argsort()函数时将array中的元素从小到大排列,提取其对应的index,然后输出到y
    order = scores.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    proposals = proposals[order, :]
    scores = scores[order]

    # Non-maximal suppression
    keep = nms(np.hstack((proposals, scores)), nms_thresh)

    # Pick th top region proposals after NMS
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    scores = scores[keep]

    # Only support single image as input
    batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
    blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))

    # 返回的blob中多加了一列
    return blob, scores
Exemplo n.º 9
0
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride,
                   anchors, num_anchors):
    """A simplified version compared to fast/er RCNN
       For details please see the technical report
    """
    if type(cfg_key) == bytes:
        cfg_key = cfg_key.decode('utf-8')

    if cfg_key == "TRAIN":
        pre_nms_topN = cfg.FLAGS.rpn_train_pre_nms_top_n  #('rpn_train_pre_nms_top_n', 12000,
        post_nms_topN = cfg.FLAGS.rpn_train_post_nms_top_n  #rpn_train_post_nms_top_n', 2000
        nms_thresh = cfg.FLAGS.rpn_train_nms_thresh  #'rpn_train_nms_thresh', 0.7
    else:
        pre_nms_topN = cfg.FLAGS.rpn_test_pre_nms_top_n
        post_nms_topN = cfg.FLAGS.rpn_test_post_nms_top_n
        nms_thresh = cfg.FLAGS.rpn_test_nms_thresh

    im_info = im_info[0]
    # Get the scores and bounding boxes
    scores = rpn_cls_prob[:, :, :,
                          num_anchors:]  #[,H,W,2*num_anchors]---#18个channel 按照(bg,fg)这里只取了fg的9个channel((1,9,h,w))
    # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
    # in slowest to fastest order
    rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4))  #(, height, width, A * 4)
    scores = scores.reshape((-1, 1))
    ## #计算经过偏移后的预测坐标
    proposals = bbox_transform_inv(anchors, rpn_bbox_pred)
    # 2. clip predicted boxes to image  将预测框剪切到图像范围内
    proposals = clip_boxes(proposals, im_info[:2])

    # Pick the top region proposals
    # ravel()平铺扁平化  argsort()函数是将x中的元素从小到大排列,提取其对应的index(索引),然后输出 [::-1]反向取索引
    order = scores.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
        # 取出值scores最大的 pre_nms_topN个
    proposals = proposals[order, :]
    scores = scores[order]

    # Non-maximal suppression
    keep = nms(np.hstack((proposals, scores)), nms_thresh)

    # Pick th top region proposals after NMS
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    scores = scores[keep]

    # Only support single image as input
    batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
    blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))

    return blob, scores
Exemplo n.º 10
0
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride,
                   anchors, anchors_dis, num_anchors):
    """A simplified version compared to fast/er RCNN
       For details please see the technical report
    """
    if type(cfg_key) == bytes:
        cfg_key = cfg_key.decode('utf-8')

    if cfg_key == "TRAIN":
        pre_nms_topN = cfg.FLAGS.rpn_train_pre_nms_top_n
        post_nms_topN = cfg.FLAGS.rpn_train_post_nms_top_n
        nms_thresh = cfg.FLAGS.rpn_train_nms_thresh
    else:
        pre_nms_topN = cfg.FLAGS.rpn_test_pre_nms_top_n
        post_nms_topN = cfg.FLAGS.rpn_test_post_nms_top_n
        nms_thresh = cfg.FLAGS.rpn_test_nms_thresh

    im_info = im_info[0]
    # Get the scores and bounding boxes
    scores = rpn_cls_prob[:, :, :, num_anchors:]
    rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4))
    scores = scores.reshape((-1, 1))
    # print("anchors_dis", anchors_dis.shape)  # anc (16848, 4) (16848, 4)
    proposals = bbox_transform_inv(anchors, rpn_bbox_pred)
    proposals = clip_boxes(proposals, im_info[:2])

    # Pick the top region proposals
    order = scores.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    proposals = proposals[order, :]
    proposals_dis = anchors_dis[order, :]
    scores = scores[order]

    # Non-maximal suppression
    keep = nms(np.hstack((proposals, scores)), nms_thresh)

    # Pick th top region proposals after NMS
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    proposals_dis = proposals_dis[keep, :]
    scores = scores[keep]

    # Only support single image as input  proposals.shape[0]:2000  blob.shape:[2000, 8]
    batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
    blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False),
                      proposals_dis.astype(np.float32, copy=False)))
    return blob, scores
Exemplo n.º 11
0
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors):
    if type(cfg_key) == bytes:
        cfg_key = cfg_key.decode('utf-8')

    if cfg_key == "TRAIN":
        pre_nms_topN = cfg.FLAGS.rpn_train_pre_nms_top_n #12000
        post_nms_topN = cfg.FLAGS.rpn_train_post_nms_top_n #2000
        nms_thresh = cfg.FLAGS.rpn_train_nms_thresh #0.7
    else:
        pre_nms_topN = cfg.FLAGS.rpn_test_pre_nms_top_n #6000
        post_nms_topN = cfg.FLAGS.rpn_test_post_nms_top_n #300
        nms_thresh = cfg.FLAGS.rpn_test_nms_thresh #0.7
    # 因为我们的输入是(1,3)维的
    im_info = im_info[0]
    # 1 * H * W * 9 其他维度不变,取18元素后9个为前景得分
    scores = rpn_cls_prob[:, :, :, num_anchors:]  # 9
    # 9WH * 4 个偏移量
    rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4))
    # 9WH 个得分
    scores = scores.reshape((-1, 1))
    # 通过偏移量对anchor进行调整,得到proposals
    proposals = bbox_transform_inv(anchors, rpn_bbox_pred)
    # 修剪proposal,将超出边界的proposal修剪到图片范围内
    proposals = clip_boxes(proposals, im_info[:2])  # im_info[:2] 表示 宽和高

    # 得分从大到小排序,存储的为坐标
    order = scores.ravel().argsort()[::-1]
    # 根据坐标,筛选出前12000个proposals和scores
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    proposals = proposals[order, :]
    scores = scores[order]

    # 非极大值抑制
    keep = nms(np.hstack((proposals, scores)), nms_thresh)  # np.hstack((proposals, scores) [x1,y1,x2,y2,score]

    # 因为keep已经排过序了,直接取前2000个
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    scores = scores[keep]

    # 给proposal叠加一个维度,第一列全是0.0
    batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
    blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))

    return blob, scores
def proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, im_info, _feat_stride, anchors, num_anchors):
    """A layer that just selects the top region proposals
       without using non-maximal suppression,
       For details please see the technical report
    """
    # 300 "Only useful when TEST.MODE is 'top', specifies the number of top proposals to select"
    rpn_top_n = cfg.FLAGS.rpn_top_n
    im_info = im_info[0]

    scores = rpn_cls_prob[:, :, :, num_anchors:]

    rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4))
    scores = scores.reshape((-1, 1))

    length = scores.shape[0]
    if length < rpn_top_n:
        # Random selection, maybe unnecessary and loses good proposals
        # But such case rarely happens
        top_inds = npr.choice(length, size=rpn_top_n, replace=True)
    else:
        top_inds = scores.argsort(0)[::-1]   #使用方法[start: end : step ],也就是[ 起始下标 : 终止下标 : 间隔距离 ]
        top_inds = top_inds[:rpn_top_n]
        top_inds = top_inds.reshape(rpn_top_n, )

    # Do the selection here
    anchors = anchors[top_inds, :]
    rpn_bbox_pred = rpn_bbox_pred[top_inds, :]
    scores = scores[top_inds]

    # Convert anchors into proposals via bbox transformations
    # #使用经过rpn网络层后生成的rpn_box_prob把anchor位置进行第一次修正
    proposals = bbox_transform_inv(anchors, rpn_bbox_pred)

    # Clip predicted boxes to image
    proposals = clip_boxes(proposals, im_info[:2])

    # Output rois blob
    # Our RPN implementation only supports a single input image, so all
    # batch inds are 0
    batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
    blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
    return blob, scores
Exemplo n.º 13
0
def freeze_graph_test(sess, blobs):
    '''
	:param pb_path:pb文件的路径
	:param image_path:测试图片的路径
	:return:
	'''
    # 定义输入的张量名称,对应网络结构的输入张量
    # input:0作为输入图像,keep_prob:0作为dropout的参数,测试时值为1,is_training:0训练参数
    # 定义输出的张量名称
    input_image_tensor = sess.graph.get_tensor_by_name("Placeholder:0")
    tensor_info = sess.graph.get_tensor_by_name("Placeholder_1:0")

    biasadd = sess.graph.get_tensor_by_name("vgg_16/cls_score/BiasAdd:0")
    score = sess.graph.get_tensor_by_name("vgg_16/cls_prob:0")
    bbox = sess.graph.get_tensor_by_name("add:0")
    rois = sess.graph.get_tensor_by_name("vgg_16/rois/PyFunc:0")

    # input_image_tensor = tf.placeholder(tf.float32, shape=[1, None, None, 3])
    # tensor_info = tf.placeholder(tf.float32, shape=[3, ])
    # print("输入tensor")
    # print(input_image_tensor)
    # print(tensor_info)
    feed_dict = {
        input_image_tensor: blobs['data'],
        tensor_info: blobs['im_info']
    }
    # 模型预测结果 返回四个值
    # scores、bbox_pred、rois,分别是检测框分数,修正值,检测框
    _, scores, bbox_pred, rois = sess.run([biasadd, score, bbox, rois],
                                          feed_dict=feed_dict)
    print("=======blobs==========\n", blobs)
    im_scales = blobs['im_info'][2]
    # 检测框修正
    boxes = rois[:, 1:5] / im_scales
    scores = np.reshape(scores, [scores.shape[0], -1])
    bbox_pred = np.reshape(bbox_pred, [bbox_pred.shape[0], -1])
    # Apply bounding-box regression deltas
    box_deltas = bbox_pred
    pred_boxes = bbox_transform_inv(boxes, box_deltas)
    pred_boxes = _clip_boxes(pred_boxes, (255, 255, 0))

    return scores, pred_boxes
Exemplo n.º 14
0
def proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, im_info, _feat_stride,
                       anchors, num_anchors):
    # 只保留300个rois
    rpn_top_n = cfg.FLAGS.rpn_top_n  # 300
    # 因为我们的输入是(1,3)维的
    im_info = im_info[0]
    # 1 * H * W * 9 其他维度不变,取18元素后9个为前景得分
    scores = rpn_cls_prob[:, :, :, num_anchors:]
    # 9WH * 4 个偏移量
    rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4))
    # 9WH 个得分
    scores = scores.reshape((-1, 1))

    length = scores.shape[0]  # 9HW
    if length < rpn_top_n:  # 9HW 不足 300
        # 随机挑选,知道凑齐300个
        top_inds = npr.choice(length, size=rpn_top_n, replace=True)
    else:
        # 挑选前300个得分
        top_inds = scores.argsort(0)[::-1]
        top_inds = top_inds[:rpn_top_n]
        top_inds = top_inds.reshape(rpn_top_n, )

    # 拿出前300个anchors,bbox,scores
    anchors = anchors[top_inds, :]
    rpn_bbox_pred = rpn_bbox_pred[top_inds, :]
    scores = scores[top_inds]

    # 用bbox偏移量对anchors进行修正
    proposals = bbox_transform_inv(anchors, rpn_bbox_pred)
    # 将超出边框的剪切掉
    proposals = clip_boxes(proposals, im_info[:2])

    # 给proposal叠加一个维度,第一列全是0.0
    batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
    blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))

    return blob, scores
Exemplo n.º 15
0
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride,
                   anchors, num_anchors):
    if type(cfg_key) == bytes:
        cfg_key = cfg_key.decode('utf-8')
    if cfg_key == "TRAIN":
        pre_nms_topN = cfg.FLAGS.rpn_train_pre_nms_top_n
        post_nms_topN = cfg.FLAGS.rpn_train_post_nms_top_n
        nms_thresh = cfg.FLAGS.rpn_train_nms_thresh
    else:
        pre_nms_topN = cfg.FLAGS.rpn_test_pre_nms_top_n
        post_nms_topN = cfg.FLAGS.rpn_test_post_nms_top_n
        nms_thresh = cfg.FLAGS.rpn_test_nms_thresh
    im_info = im_info[0]
    # Get the scores and bounding boxes
    scores = rpn_cls_prob[:, :, :, num_anchors:]
    rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4))
    scores = scores.reshape((-1, 1))
    proposals = bbox_transform_inv(anchors, rpn_bbox_pred)
    proposals = clip_boxes(proposals, im_info[:2])
    # Pick the top region proposals
    order = scores.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    proposals = proposals[order, :]
    scores = scores[order]
    # Non-maximal suppression
    keep = nms(np.hstack((proposals, scores)), nms_thresh)
    # Pick th top region proposals after NMS
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    scores = scores[keep]
    # Only support single image as input
    batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
    blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
    return blob, scores
Exemplo n.º 16
0
    def test(net, data_loader, data_logger):
        #####################################
        # Preparation
        #####################################
        os.makedirs(cfg.TEST_SAVE_DIR, exist_ok=True)
        mAP_CLASSIFICATION = Evaluate_metric(cfg.NUM_CLASSES, ignore_class=[0], overlap_threshold=cfg.MAP_THRESH)
        mAP_MASK = Evaluate_metric(cfg.NUM_CLASSES, ignore_class=[0], overlap_threshold=cfg.MAP_THRESH)

        ####################################
        # Accumulate data
        ####################################
        pred_all = {}
        gt_all = {}

        timer = Timer()
        timer.tic()
        print('starting test on whole scan....')
        for iter, blobs in enumerate(tqdm(data_loader)):

            try:
                gt_box = blobs['gt_box'][0].numpy()[:, 0:6]
                gt_class = blobs['gt_box'][0][:, 6].numpy()
            except:
                continue

            # color proj
            killing_inds = None
            if cfg.USE_IMAGES:
                grid_shape = blobs['data'].shape[-3:]
                projection_helper = ProjectionHelper(cfg.INTRINSIC, cfg.PROJ_DEPTH_MIN, cfg.PROJ_DEPTH_MAX, cfg.DEPTH_SHAPE, grid_shape, cfg.VOXEL_SIZE)
                if grid_shape[0]*grid_shape[1]*grid_shape[2] > cfg.MAX_VOLUME or blobs['nearest_images']['depths'][0].shape[0] > cfg.MAX_IMAGE:
                    proj_mapping = [projection_helper.compute_projection(d, c, t) for d, c, t in zip(blobs['nearest_images']['depths'][0], blobs['nearest_images']['poses'][0], blobs['nearest_images']['world2grid'][0])]
                else:
                    proj_mapping = [projection_helper.compute_projection(d.cuda(), c.cuda(), t.cuda()) for d, c, t in zip(blobs['nearest_images']['depths'][0], blobs['nearest_images']['poses'][0], blobs['nearest_images']['world2grid'][0])]
                    
                killing_inds = []
                real_proj_mapping = []
                if None in proj_mapping: #invalid sample
                    for killing_ind, killing_item in enumerate(proj_mapping):
                        if killing_item == None:
                            killing_inds.append(killing_ind)
                        else:
                            real_proj_mapping.append(killing_item)
                    print('{}: (invalid sample: no valid projection)'.format(blobs['id']))
                else:
                    real_proj_mapping = proj_mapping
                blobs['proj_ind_3d'] = []
                blobs['proj_ind_2d'] = []
                proj_mapping0, proj_mapping1 = zip(*real_proj_mapping)
                blobs['proj_ind_3d'].append(torch.stack(proj_mapping0))
                blobs['proj_ind_2d'].append(torch.stack(proj_mapping1))

            net.forward(blobs, 'TEST', killing_inds)

            # test with detection pipeline
            pred_class = net._predictions['cls_pred'].data.cpu().numpy()
            rois = net._predictions['rois'][0].cpu()
            box_reg_pre = net._predictions["bbox_pred"].data.cpu().numpy()
            box_reg = np.zeros((box_reg_pre.shape[0], 6))
            pred_conf_pre = net._predictions['cls_prob'].data.cpu().numpy()
            pred_conf = np.zeros((pred_conf_pre.shape[0]))

            for pred_ind in range(pred_class.shape[0]):
                box_reg[pred_ind, :] = box_reg_pre[pred_ind, pred_class[pred_ind]*6:(pred_class[pred_ind]+1)*6]
                pred_conf[pred_ind] = pred_conf_pre[pred_ind, pred_class[pred_ind]]

            pred_box = bbox_transform_inv(rois, torch.from_numpy(box_reg).float())
            pred_box = clip_boxes(pred_box, net._scene_info[:3]).numpy()

            os.makedirs('{}/{}'.format(cfg.TEST_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), exist_ok=True)
            np.save('{}/{}/pred_class'.format(cfg.TEST_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), pred_class)
            np.save('{}/{}/pred_conf'.format(cfg.TEST_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), pred_conf)
            np.save('{}/{}/pred_box'.format(cfg.TEST_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), pred_box)
            np.save('{}/{}/scene'.format(cfg.TEST_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), np.where(blobs['data'][0,0].numpy() <= 1, 1, 0))
            np.save('{}/{}/gt_class'.format(cfg.TEST_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), gt_class)
            np.save('{}/{}/gt_box'.format(cfg.TEST_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), gt_box)

            # pickup
            sort_index = []
            for conf_index in range(pred_conf.shape[0]):
                if pred_conf[conf_index] > cfg.CLASS_THRESH:
                    sort_index.append(True)
                else:
                    sort_index.append(False)

            # eliminate bad box
            for idx, box in enumerate(pred_box):
                if round(box[0]) >= round(box[3]) or round(box[1]) >= round(box[4]) or round(box[2]) >= round(box[5]):
                    sort_index[idx] = False

            mAP_CLASSIFICATION.evaluate(
                    pred_box[sort_index],
                    pred_class[sort_index],
                    pred_conf[sort_index],
                    gt_box,
                    gt_class)

            if cfg.USE_MASK:
                gt_mask = blobs['gt_mask'][0]
                # pickup
                sort_index = []
                for conf_index in range(pred_conf.shape[0]):
                    if pred_conf[conf_index] > cfg.CLASS_THRESH:
                        sort_index.append(True)
                    else:
                        sort_index.append(False)

                # eliminate bad box
                for idx, box in enumerate(pred_box):
                    if round(box[0]) >= round(box[3]) or round(box[1]) >= round(box[4]) or round(box[2]) >= round(box[5]):
                        sort_index[idx] = False

                # test with mask pipeline
                net.mask_backbone.eval()
                net.mask_backbone.cuda()
                mask_pred_batch = []
                for net_i in range(1):
                    mask_pred = []
                    for pred_box_ind, pred_box_item in enumerate(pred_box):
                        if sort_index[pred_box_ind]:
                            mask_pred.append(net.mask_backbone(Variable(blobs['data'].cuda())[net_i:net_i+1, :, 
                                                                            int(round(pred_box_item[0])):int(round(pred_box_item[3])),
                                                                            int(round(pred_box_item[1])):int(round(pred_box_item[4])), 
                                                                            int(round(pred_box_item[2])):int(round(pred_box_item[5]))
                                                                            ], [] if cfg.USE_IMAGES else None))

                    mask_pred_batch.append(mask_pred)
                net._predictions['mask_pred'] = mask_pred_batch

                # save test result
                pred_mask = []
                mask_ind = 0
                for ind, cls in enumerate(pred_class):
                    if sort_index[ind]:
                        mask = net._predictions['mask_pred'][0][mask_ind][0][cls].data.cpu().numpy()
                        mask = np.where(mask >=cfg.MASK_THRESH, 1, 0).astype(np.float32)
                        pred_mask.append(mask)
                        mask_ind += 1

                pickle.dump(pred_mask, open('{}/{}/pred_mask'.format(cfg.TEST_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), 'wb'))
                pickle.dump(sort_index, open('{}/{}/pred_mask_index'.format(cfg.TEST_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), 'wb'))
                pickle.dump(gt_mask, open('{}/{}/gt_mask'.format(cfg.TEST_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), 'wb'))

                mAP_MASK.evaluate_mask(
                        pred_box[sort_index],
                        pred_class[sort_index],
                        pred_conf[sort_index],
                        pred_mask,
                        gt_box,
                        gt_class, 
                        gt_mask, 
                        net._scene_info)

        timer.toc()
        print('It took {:.3f}s for test on whole scenes'.format(timer.total_time()))

        ###################################
        # Summary
        ###################################
        if cfg.USE_CLASS:
            mAP_CLASSIFICATION.finalize()
            print('mAP of CLASSIFICATION: {}'.format(mAP_CLASSIFICATION.mAP()))
            for class_ind in range(cfg.NUM_CLASSES):
                if class_ind not in mAP_CLASSIFICATION.ignore_class:
                    print('class {}: {}'.format(class_ind, mAP_CLASSIFICATION.AP(class_ind)))

        if cfg.USE_MASK:
            mAP_MASK.finalize()
            print('mAP of mask: {}'.format(mAP_MASK.mAP()))
            for class_ind in range(cfg.NUM_CLASSES):
                if class_ind not in mAP_MASK.ignore_class:
                    print('class {}: {}'.format(class_ind, mAP_MASK.AP(class_ind)))
Exemplo n.º 17
0
    def forward(self, input):

        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)


        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs
        scores = input[0][:, self._num_anchors:, :, :]

        bbox_deltas = input[1]
        im_info = input[2]
        cfg_key = input[3]

        # pre_nms_topN  = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        # post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        # nms_thresh    = cfg[cfg_key].RPN_NMS_THRESH
        # min_size      = cfg[cfg_key].RPN_MIN_SIZE
        if cfg_key == 'TRAIN':
            pre_nms_topN  = 12000
            post_nms_topN = 2000
            nms_thresh    = 0.7
            min_size      = 8
        else:
            pre_nms_topN  = 6000
            post_nms_topN = 300
            nms_thresh    = 0.7
            min_size      = 16


        batch_size = bbox_deltas.size(0)

        feat_height, feat_width = scores.size(2), scores.size(3)
        shift_x = np.arange(0, feat_width) * self._feat_stride
        shift_y = np.arange(0, feat_height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = torch.from_numpy(np.vstack((shift_x.ravel(), shift_y.ravel(),
                                  shift_x.ravel(), shift_y.ravel())).transpose())
        shifts = shifts.contiguous().type_as(scores).float()

        A = self._num_anchors
        K = shifts.size(0)

        self._anchors = self._anchors.type_as(scores)

        anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4)
        anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4)

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:

        bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous()
        bbox_deltas = bbox_deltas.view(batch_size, -1, 4)

        # Same story for the scores:
        scores = scores.permute(0, 2, 3, 1).contiguous()
        scores = scores.view(batch_size, -1)

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, torch.Tensor(im_info.tolist() * batch_size).cuda(), batch_size)

        # assign the score to 0 if it's non keep.
        # keep = self._filter_boxes(proposals, min_size * im_info[:, 2])

        # trim keep index to make it euqal over batch
        # keep_idx = torch.cat(tuple(keep_idx), 0)

        # scores_keep = scores.view(-1)[keep_idx].view(batch_size, trim_size)
        # proposals_keep = proposals.view(-1, 4)[keep_idx, :].contiguous().view(batch_size, trim_size, 4)
        
        # _, order = torch.sort(scores_keep, 1, True)
        
        scores_keep = scores
        proposals_keep = proposals
        _, order = torch.sort(scores_keep, 1, True)

        output = scores.new(batch_size, post_nms_topN, 5).zero_()
        for i in range(batch_size):
            # # 3. remove predicted boxes with either height or width < threshold
            # # (NOTE: convert min_size to input image scale stored in im_info[2])
            proposals_single = proposals_keep[i]
            scores_single = scores_keep[i]

            # # 4. sort all (proposal, score) pairs by score from highest to lowest
            # # 5. take top pre_nms_topN (e.g. 6000)
            order_single = order[i]

            if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel():
                order_single = order_single[:pre_nms_topN]

            proposals_single = proposals_single[order_single, :]
            scores_single = scores_single[order_single].view(-1,1)

            # 6. apply nms (e.g. threshold = 0.7)
            # 7. take after_nms_topN (e.g. 300)
            # 8. return the top proposals (-> RoIs top)

            keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1), nms_thresh, force_cpu=not cfg.USE_GPU_NMS)
            # keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1), nms_thresh, force_cpu=True)
            keep_idx_i = keep_idx_i.long().view(-1)

            if post_nms_topN > 0:
                keep_idx_i = keep_idx_i[:post_nms_topN]
            proposals_single = proposals_single[keep_idx_i, :]
            scores_single = scores_single[keep_idx_i, :]

            # padding 0 at the end.
            num_proposal = proposals_single.size(0)
            output[i,:,0] = i
            output[i,:num_proposal,1:] = proposals_single

        return output
Exemplo n.º 18
0
    def forward(self, blobs, mode='TRAIN', killing_inds=None):
        self._scene_info = blobs['data'].shape[2:]
        self._id = blobs['id'][0]
        self.cuda() 
        self.batch_size = blobs['data'].shape[0]

        if mode == 'TRAIN':
            self.train()
            if cfg.USE_IMAGES and not cfg.USE_IMAGES_GT:
                # eval of enet
                self.image_enet_fixed.eval()
                self.image_enet_trainable.eval()
            self._mode = 'TRAIN'
            self._scene = Variable(blobs['data'].cuda())
            self._gt_bbox = blobs['gt_box']
            self._gt_mask = blobs['gt_mask'] if cfg.USE_MASK else None


            if cfg.USE_IMAGES:
                grid_shape = blobs['data'].shape[-3:]
                self._imageft = []
                for i in range(self.batch_size):
                    num_images = blobs['nearest_images']['images'][i].shape[0]
                    if cfg.USE_IMAGES_GT:
                        imageft = Variable(blobs['nearest_images']['images'][i].cuda())
                        #imageft = imageft.expand(imageft.shape[0], 128, imageft.shape[2], imageft.shape[3]).contiguous()

                    else:
                        imageft = self.image_enet_fixed(Variable(blobs['nearest_images']['images'][i].cuda()))
                        imageft = self.image_enet_trainable(imageft)

                    proj3d = Variable(blobs['proj_ind_3d'][i].cuda())
                    proj2d = Variable(blobs['proj_ind_2d'][i].cuda())

                    # project 2d to 3d
                    imageft = [Projection.apply(ft, ind3d, ind2d, grid_shape) for ft, ind3d, ind2d in zip(imageft, proj3d, proj2d)]
                    imageft = torch.stack(imageft, dim=4)
                    # reshape to max pool over features
                    sz = imageft.shape
                    imageft = imageft.view(sz[0], -1, num_images)
                    imageft = torch.nn.MaxPool1d(kernel_size=num_images)(imageft)
                    imageft = imageft.view(sz[0], sz[1], sz[2], sz[3], 1)
                    self._imageft.append(imageft.permute(4, 0, 3, 2, 1))
                self._imageft = torch.cat(self._imageft, 0)


            #--------------------------
            # visualization snippets
            #-------------------------
            #import ipdb
            #ipdb.set_trace()
            #data = np.where(self._scene[0,0].data.cpu().numpy() <=1.0, 1, 0)
            #data = self._imageft[0]
            #write_mask(data, 'data.ply')
            #data = blobs['gt_box'][0].numpy()
            #write_bbox(data, 'bbox.ply')

            if cfg.USE_BACKBONE:
                net_conv_level1, net_conv_level2, net_conv_level3 = self._backbone()

            if cfg.USE_RPN:
                # build the anchors for the scene
                if cfg.FIRST_TIME_ANCHORS:
                    cfg.FIRST_TIME_ANCHORS = False
                    # build the anchors for the scene
                    if cfg.NUM_ANCHORS_LEVEL1 != 0:
                        size_level1 = [net_conv_level1.size(2), net_conv_level1.size(3), net_conv_level1.size(4)]
                    if cfg.NUM_ANCHORS_LEVEL2 != 0:
                        size_level2 = [net_conv_level2.size(2), net_conv_level2.size(3), net_conv_level2.size(4)]
                    if cfg.NUM_ANCHORS_LEVEL3 != 0:
                        size_level3 = [net_conv_level3.size(2), net_conv_level3.size(3), net_conv_level3.size(4)]

                    self._anchor_component(size_level1 if cfg.NUM_ANCHORS_LEVEL1 !=0 else [],
                                           size_level2 if cfg.NUM_ANCHORS_LEVEL2 !=0 else [],
                                           size_level3 if cfg.NUM_ANCHORS_LEVEL3 !=0 else [])

                self._region_proposal(net_conv_level1, net_conv_level2, net_conv_level3)
            else:
                # only predictions['rois']/['roi_scores']/['mask_pred'] batch is a list, since not even number/dim in each sample
                self._predictions['rois'] = [self._gt_bbox[i][:,:6].cuda() for i in range(self.batch_size)]
                self._predictions['roi_scores'] = [torch.ones(self._gt_bbox[i].size(0), 1).cuda() for i in range(self.batch_size)]

            if cfg.USE_CLASS:
                self._proposal_target_layer(self._predictions['rois'], self._predictions['roi_scores'], self._predictions['level_inds'])
                pool5 = self._roi_pool_layer(net_conv_level1, net_conv_level2, net_conv_level3, 
                                             self._proposal_targets['rois'], self._proposal_targets['levelInds'],
                                             self._feat_stride, cfg.CLASS_POOLING_SIZE)
                fc7 = self._classifier(pool5)
                self._region_classification(fc7)
            else:
                self._predictions["cls_pred"] = Variable(self._gt_bbox[0][:,6].long())
                self._predictions["cls_prob"] = Variable(torch.zeros((self._predictions['cls_pred'].shape[0], cfg.NUM_CLASSES)))
                self._predictions["bbox_pred"] = Variable(torch.zeros((self._predictions['cls_pred'].shape[0], cfg.NUM_CLASSES*6)))
                for ind_sample in range(self._predictions['cls_pred'].shape[0]):
                    self._predictions['cls_prob'][ind_sample, self._predictions['cls_pred'].data[ind_sample]] = 1.0

            if cfg.USE_MASK:
                self._mask_target_layer(self._predictions['rois'])
                mask_pred_batch = []
                for i in range(self.batch_size):
                    mask_pred = []
                    for roi in self._mask_targets['rois'][i]:
                        mask_pred.append(self.mask_backbone(self._scene[i:i+1, :, 
                                                                        int(round(roi[0].item())):int(round(roi[3].item())),
                                                                        int(round(roi[1].item())):int(round(roi[4].item())), 
                                                                        int(round(roi[2].item())):int(round(roi[5].item()))
                                                                        ], self._imageft[i:i+1, :, 
                                                                                         int(round(roi[0].item())):int(round(roi[3].item())),
                                                                                         int(round(roi[1].item())):int(round(roi[4].item())), 
                                                                                         int(round(roi[2].item())):int(round(roi[5].item()))] if cfg.USE_IMAGES else None))

                    mask_pred_batch.append(mask_pred)
                self._predictions['mask_pred'] = mask_pred_batch
            self._add_losses()

        elif mode == 'TEST':
            with torch.no_grad():
                self.eval()
                self._mode = 'TEST'
                self._scene = blobs['data'].cuda()
                self._gt_bbox = blobs['gt_box']
                self._gt_mask = blobs['gt_mask'] if cfg.USE_MASK else None
                if cfg.USE_IMAGES:
                    grid_shape = blobs['data'].shape[-3:]
                    self._imageft = []
                    for i in range(self.batch_size):
                        num_images = blobs['nearest_images']['images'][i].shape[0]
                        if cfg.USE_IMAGES_GT:
                            with torch.no_grad():
                                imageft = Variable(blobs['nearest_images']['images'][i].cuda())
                        else:
                            with torch.no_grad():
                                imageft = self.image_enet_fixed(Variable(blobs['nearest_images']['images'][i].cuda()))
                            imageft = self.image_enet_trainable(imageft)

                        proj3d = Variable(blobs['proj_ind_3d'][i])
                        proj2d = Variable(blobs['proj_ind_2d'][i])

                        if blobs['data'].shape[2]*blobs['data'].shape[3]*blobs['data'].shape[4] > cfg.MAX_VOLUME or len(proj3d) > cfg.MAX_IMAGE:
                            print('on cpu')
                            imageft = imageft.cpu()
                            proj3d = proj3d.cpu()
                            proj2d = proj2d.cpu()

                        # project 2d to 3d
                        counter = 0
                        init = True

                        for ft, ind3d, ind2d in zip(imageft, proj3d, proj2d):
                            counter += 1
                            if counter-1 in killing_inds:
                                continue
                            imageft_temp = Projection.apply(ft, ind3d, ind2d, grid_shape)[:, :,:, :].contiguous()
                            sz = imageft_temp.shape
                            if init:
                                imageft = imageft_temp.view(sz[0], sz[1], sz[2], sz[3])
                                init = False
                                continue

                            imageft = torch.stack([imageft, imageft_temp], dim=4)
                            # reshape to max pool over features
                            imageft = imageft.view(sz[0], -1, 2)
                            imageft = torch.nn.MaxPool1d(kernel_size=2)(imageft)
                            imageft = imageft.view(sz[0], sz[1], sz[2], sz[3])

                        imageft = imageft.view(sz[0], sz[1], sz[2], sz[3], self.batch_size)
                        self._imageft = imageft.permute(4, 0, 3, 2, 1)
                        self._imageft = self._imageft.cuda()
                        del proj3d
                        del proj2d
                        torch.cuda.empty_cache()

                if cfg.USE_BACKBONE:
                    net_conv_level1, net_conv_level2, net_conv_level3 = self._backbone()

                if cfg.USE_RPN:
                    # build the anchors for the scene
                    if cfg.NUM_ANCHORS_LEVEL1 != 0:
                        size_level1 = [net_conv_level1.size(2), net_conv_level1.size(3), net_conv_level1.size(4)]
                    if cfg.NUM_ANCHORS_LEVEL2 != 0:
                        size_level2 = [net_conv_level2.size(2), net_conv_level2.size(3), net_conv_level2.size(4)]
                    if cfg.NUM_ANCHORS_LEVEL3 != 0:
                        size_level3 = [net_conv_level3.size(2), net_conv_level3.size(3), net_conv_level3.size(4)]

                    self._anchor_component(size_level1 if cfg.NUM_ANCHORS_LEVEL1 !=0 else [],
                                           size_level2 if cfg.NUM_ANCHORS_LEVEL2 !=0 else [],
                                           size_level3 if cfg.NUM_ANCHORS_LEVEL3 !=0 else [])

                    self._region_proposal(net_conv_level1, net_conv_level2, net_conv_level3)

                else:
                    # only predictions['rois']/['roi_scores'] batch is a list, since not even number in each sample
                    self._predictions['rois'] = [self._gt_bbox[i][:,:6].cuda() for i in range(self.batch_size)]
                    self._predictions['roi_scores'] = [torch.ones(self._gt_bbox[i].size(0), 1).cuda() for i in range(self.batch_size)]

                # especially for validation, since we don't want to resample in val for mAP

                if cfg.USE_CLASS:
                    pool5 = self._roi_pool_layer(net_conv_level1, net_conv_level2, net_conv_level3, 
                                                 Variable(torch.cat(self._predictions['rois'], 0)), 
                                                 Variable(torch.cat(self._predictions['level_inds'], 0)),
                                                 self._feat_stride, cfg.CLASS_POOLING_SIZE)
                    fc7 = self._classifier(pool5)
                    self._region_classification(fc7)
                else:
                    self._predictions["cls_pred"] = Variable(self._gt_bbox[0][:,6].long())
                    self._predictions["cls_prob"] = Variable(torch.zeros((self._predictions['cls_pred'].shape[0], cfg.NUM_CLASSES)))
                    self._predictions["bbox_pred"] = Variable(torch.zeros((self._predictions['cls_pred'].shape[0], cfg.NUM_CLASSES*6)))
                    for ind_sample in range(self._predictions['cls_pred'].shape[0]):
                        self._predictions['cls_prob'][ind_sample, self._predictions['cls_pred'].data[ind_sample]] = 1.0

                if cfg.USE_MASK:
                    mask_pred_batch = []
                    rois = self._predictions['rois'][0].cpu()
                    box_reg_pre = self._predictions["bbox_pred"].data.cpu().numpy()
                    box_reg = np.zeros((box_reg_pre.shape[0], 6))
                    pred_class = self._predictions['cls_pred'].data.cpu().numpy()
                    pred_conf = np.zeros((pred_class.shape[0]))
                    for pred_ind in range(pred_class.shape[0]):
                        box_reg[pred_ind, :] = box_reg_pre[pred_ind, pred_class[pred_ind]*6:(pred_class[pred_ind]+1)*6]
                        pred_conf[pred_ind] = self._predictions['cls_prob'].data.cpu().numpy()[pred_ind, pred_class.data[pred_ind]]
                    pred_box = bbox_transform_inv(rois, torch.from_numpy(box_reg).float())
                    pred_box = clip_boxes(pred_box, self._scene_info[:3]).numpy()

                    sort_index = pred_conf > cfg.CLASS_THRESH

                    # eliminate bad box
                    for idx, box in enumerate(pred_box):
                        if round(box[0]) >= round(box[3]) or round(box[1]) >= round(box[4]) or round(box[2]) >= round(box[5]):
                            sort_index[idx] = False
                    
                    for i in range(self.batch_size):
                        mask_pred = []
                        for ind, roi in enumerate(pred_box):
                            if sort_index[ind]:
                                mask_pred.append(self.mask_backbone(self._scene[i:i+1, :, 
                                                                                int(round(roi[0])):int(round(roi[3])),
                                                                                int(round(roi[1])):int(round(roi[4])), 
                                                                                int(round(roi[2])):int(round(roi[5]))
                                                                                ], self._imageft[i:i+1, :, 
                                                                                                 int(round(roi[0])):int(round(roi[3])),
                                                                                                 int(round(roi[1])):int(round(roi[4])), 
                                                                                                 int(round(roi[2])):int(round(roi[5]))] if cfg.USE_IMAGES else None))

                        mask_pred_batch.append(mask_pred)
                    self._predictions['mask_pred'] = mask_pred_batch
Exemplo n.º 19
0
    def forward(self, input):

        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs

        scores = input[0][:, self._num_anchors:, :, :]
        bbox_frame = input[1]
        im_info = input[2]
        cfg_key = input[3]
        time_dim = input[4]

        batch_size = bbox_frame.size(0)

        pre_nms_topN = conf[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = conf[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh = conf[cfg_key].RPN_NMS_THRESH
        min_size = conf[cfg_key].RPN_MIN_SIZE

        ##################
        # Create anchors #
        ##################

        feat_height, feat_width = scores.size(2), scores.size(
            3)  # (batch_size, 512/256, 7, 7)
        shift_x = np.arange(0, feat_width) * self._feat_stride
        shift_y = np.arange(0, feat_height) * self._feat_stride
        shift_z = np.arange(0, 1)
        shift_x, shift_y, shift_z = np.meshgrid(shift_x, shift_y, shift_z)
        shifts = torch.from_numpy(
            np.vstack((shift_x.ravel(), shift_y.ravel(), shift_z.ravel(),
                       shift_x.ravel(), shift_y.ravel(),
                       shift_z.ravel())).transpose())
        shifts = shifts.contiguous().type_as(scores).float()

        A = self._num_anchors
        K = shifts.size(0)

        self._anchors = self._anchors.type_as(scores)

        anchors = self._anchors.view(1, A, 6) + shifts.view(K, 1, 6)
        anchors = anchors.view(1, K * A, 6)
        anchors = anchors.expand(batch_size, K * A, 6)

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        bbox_frame = bbox_frame.permute(0, 2, 3, 1).contiguous()
        bbox_frame = bbox_frame.view(batch_size, -1, 4)

        # Same story for the scores:
        scores = scores.permute(0, 2, 3, 1).contiguous()
        scores = scores.view(batch_size, -1)
        """
        we have 16 frames, and 28224 3d anchors for each 16 frames
        """
        # Convert anchors into proposals via bbox transformations
        # proposals = bbox_frames_transform_inv(anchors, bbox_deltas, batch_size)
        anchors_xy = anchors[:, :, [0, 1, 3, 4]]
        proposals_xy = bbox_transform_inv(
            anchors_xy, bbox_frame,
            batch_size)  # proposals have 441 * time_dim shape

        ## if any dimension exceeds the dims of the original image, clamp_ them
        proposals_xy = clip_boxes(proposals_xy, im_info, batch_size)
        proposals = torch.cat(
            (proposals_xy[:, :, [0, 1]], anchors[:, :, 2].unsqueeze(2),
             proposals_xy[:, :, [2, 3]], anchors[:, :, 5].unsqueeze(2)),
            dim=2)

        scores_keep = scores
        proposals_keep = proposals

        _, order = torch.sort(scores, 1, True)

        output = scores.new(batch_size, post_nms_topN, 8).zero_()
        # print('output.shape :',output.shape)
        for i in range(batch_size):
            # # 3. remove predicted boxes with either height or width < threshold
            # # (NOTE: convert min_size to input image scale stored in im_info[2])
            proposals_single = proposals_keep[i]
            scores_single = scores_keep[i]
            # print('scores_single.shape :',scores_single.shape)
            # # 4. sort all (proposal, score) pairs by score from highest to lowest
            # # 5. take top pre_nms_topN (e.g. 6000)
            order_single = order[i]

            proposals_single = proposals_single[order_single, :]
            scores_single = scores_single[order_single].view(-1, 1)

            proposals_single = proposals_single[:post_nms_topN, :]
            scores_single = scores_single[:post_nms_topN]
            # print('scores_single.shape :',scores_single.shape)
            # padding 0 at the end.
            num_proposal = proposals_single.size(0)
            output[i, :num_proposal, 0] = i
            output[i, :num_proposal, 1:7] = proposals_single
            output[i, :num_proposal, 7] = scores_single.squeeze()

        return output
Exemplo n.º 20
0
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride,
                   anchors, num_anchors):
    '''
    Return final proposal boxes

    Parameters
    ----------
    rpn_cls_prob: ndarray
        属于每一类的得分
    rpn_bbox_pred: ndarray
        rpn网络输出结果
    im_info: ndarray
        shape=[batch_size, 3]
    cfg_key: string
        Train or Test;
    _feat_stride: list
        [16, ]
    anchors: ndarray
        generate_anchors_pre(height, width, feat_stride, anchor_scales=(8, 16, 32), anchor_ratios=(0.5, 1, 2))
    num_anchors: int32
        num_anchors = 3 x 3

    Return
    ------
    blob:ndarray
        记录需要保留的box信息,[0, x1, y1, x2, y2]
    scores: list
        记录需要保留的box分数
    '''
    """A simplified version compared to fast/er RCNN
       For details please see the technical report
    """
    if type(cfg_key) == bytes:
        cfg_key = cfg_key.decode('utf-8')

    # 训练阶段
    if cfg_key == "TRAIN":
        # Number of top scoring boxes to keep before apply NMS to RPN proposals
        # nms之前最多框限制
        pre_nms_topN = cfg.FLAGS.rpn_train_pre_nms_top_n  # 12000
        post_nms_topN = cfg.FLAGS.rpn_train_post_nms_top_n  # 2000
        nms_thresh = cfg.FLAGS.rpn_train_nms_thresh  # 0.7
    # 测试阶段
    else:
        pre_nms_topN = cfg.FLAGS.rpn_test_pre_nms_top_n  # 6000
        post_nms_topN = cfg.FLAGS.rpn_test_post_nms_top_n  # 300
        nms_thresh = cfg.FLAGS.rpn_test_nms_thresh  # 0.7

    im_info = im_info[0]
    # Get the scores and bounding boxes
    # 提取分类概率和bounding box位置
    # 得到RPN预测框属于前景的分数(前9个是属于背景的概率,后9个是属于前景的概率)
    scores = rpn_cls_prob[:, :, :, num_anchors:]
    scores = scores.reshape((-1, 1))

    # 得到回归后的boxes顶点坐标值,并将回归后的boxes裁剪到图片内
    rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4))
    proposals = bbox_transform_inv(anchors, rpn_bbox_pred)
    proposals = clip_boxes(proposals, im_info[:2])

    # Pick the top region proposals
    # 提取前N个索引及分数
    order = scores.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    # proposals记录topN的顶点坐标值信息,为回归并裁剪后的;scores记录topN的分数
    proposals = proposals[order, :]
    scores = scores[order]

    # Non-maximal suppression
    # 调用非极大值抑制,记录nms后需要保留的box的索引
    keep = nms(np.hstack((proposals, scores)), nms_thresh)

    # Pick th top region proposals after NMS
    # 若Keep过长,则保留前topN个,将proposals更新为需要保留的box的坐标信息
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    scores = scores[keep]

    # Only support single image as input
    # 在proposals前加一列,以备以后加入batch中的图片编号信息
    batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
    blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))

    return blob, scores
Exemplo n.º 21
0
def proposal_layer(rpn_cls_prob_level1, rpn_bbox_pred_level1,
                   all_anchors_level1, rpn_cls_prob_level2,
                   rpn_bbox_pred_level2, all_anchors_level2,
                   rpn_cls_prob_level3, rpn_bbox_pred_level3,
                   all_anchors_level3, scene_info, cfg_key,
                   anchors_filter_level1, anchors_filter_level2,
                   anchors_filter_level3):
    """

    :param rpn_cls_prob <Tensor>: (1, 2, H, W, L, num_anchors)
    :param rpn_bbox_pred <Tensor>: (1, H, W, L, num_anchorsx6), coord. of boxes
    :param scene_info: [64, 32, 64] height, width, length
    :param cfg_key: "TRAIN" or "TEST"
    :param anchors: (NUM_ANCHORSxWxHxL, 6)
    :return: rois in feature map
    """

    # Number of top scoring boxes to keep before apply NMS to RPN proposals
    pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
    # Number of top scoring boxes to keep after applying NMS to RPN proposals
    post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
    # NMS threshold used on RPN proposals
    nms_thresh = cfg[cfg_key].RPN_NMS_THRESH

    # only keep anchors inside the image
    _allowed_border = cfg.ALLOW_BORDER
    if cfg.NUM_ANCHORS_LEVEL1 != 0:
        inds_inside_level1 = np.where(
            (all_anchors_level1[:, 0] >= -_allowed_border)
            & (all_anchors_level1[:, 1] >= -_allowed_border)
            & (all_anchors_level1[:, 2] >= -_allowed_border)
            & (all_anchors_level1[:, 3] < scene_info[0] + _allowed_border)
            &  #width
            (all_anchors_level1[:, 4] < scene_info[1] + _allowed_border)
            &  #height
            (all_anchors_level1[:,
                                5] < scene_info[2] + _allowed_border)  #length
        )[0]

        if anchors_filter_level1 is not None:
            if len(anchors_filter_level1) == 0:
                anchors_filter_level1 = [0]
            inds_inside_level1 = inds_inside_level1[anchors_filter_level1]

        anchors_level1 = all_anchors_level1[inds_inside_level1, :]

    if cfg.NUM_ANCHORS_LEVEL2 != 0:
        inds_inside_level2 = np.where(
            (all_anchors_level2[:, 0] >= -_allowed_border)
            & (all_anchors_level2[:, 1] >= -_allowed_border)
            & (all_anchors_level2[:, 2] >= -_allowed_border)
            & (all_anchors_level2[:, 3] < scene_info[0] + _allowed_border)
            &  #width
            (all_anchors_level2[:, 4] < scene_info[1] + _allowed_border)
            &  #height
            (all_anchors_level2[:,
                                5] < scene_info[2] + _allowed_border)  #length
        )[0]

        if anchors_filter_level2 is not None:
            if len(anchors_filter_level2) == 0:
                anchors_filter_level2 = [0]
            inds_inside_level2 = inds_inside_level2[anchors_filter_level2]

        anchors_level2 = all_anchors_level2[inds_inside_level2, :]

    if cfg.NUM_ANCHORS_LEVEL3 != 0:
        inds_inside_level3 = np.where(
            (all_anchors_level3[:, 0] >= -_allowed_border)
            & (all_anchors_level3[:, 1] >= -_allowed_border)
            & (all_anchors_level3[:, 2] >= -_allowed_border)
            & (all_anchors_level3[:, 3] < scene_info[0] + _allowed_border)
            &  #width
            (all_anchors_level3[:, 4] < scene_info[1] + _allowed_border)
            &  #height
            (all_anchors_level3[:,
                                5] < scene_info[2] + _allowed_border)  #length
        )[0]

        if anchors_filter_level3 is not None:
            if len(anchors_filter_level3) == 0:
                anchors_filter_level3 = [0]
            inds_inside_level3 = inds_inside_level3[anchors_filter_level3]

        anchors_level3 = all_anchors_level3[inds_inside_level3, :]

    # Get the scores and the bounding boxes
    proposals_batch = []
    scores_batch = []
    levelInds_batch = []
    for i in range(cfg.BATCH_SIZE):
        if cfg.NUM_ANCHORS_LEVEL1 != 0:
            #-------------------------
            # level 1
            #-------------------------
            # (wxhxlxnum_anchors, 6)
            rpn_bbox_pred_reshape_level1 = rpn_bbox_pred_level1[i].view(
                -1, 6)[inds_inside_level1, :]
            # (wxhxlxnum_anchors)
            scores_level1 = rpn_cls_prob_level1[i, 1, :, :, :, :].view(
                -1, 1)[inds_inside_level1, :]

            # anchors is in the scene coord
            # return the proposals on scene coord.
            proposals_level1 = bbox_transform_inv(
                anchors_level1, rpn_bbox_pred_reshape_level1)
            proposals_level1 = clip_boxes(proposals_level1, scene_info[:3])

        if cfg.NUM_ANCHORS_LEVEL2 != 0:
            #-------------------------
            # level 2
            #-------------------------
            # (wxhxlxnum_anchors, 6)
            rpn_bbox_pred_reshape_level2 = rpn_bbox_pred_level2[i].view(
                -1, 6)[inds_inside_level2, :]
            # (wxhxlxnum_anchors)
            scores_level2 = rpn_cls_prob_level2[i, 1, :, :, :, :].view(
                -1, 1)[inds_inside_level2, :]

            # anchors is in the scene coord
            # return the proposals on scene coord.
            proposals_level2 = bbox_transform_inv(
                anchors_level2, rpn_bbox_pred_reshape_level2)
            proposals_level2 = clip_boxes(proposals_level2, scene_info[:3])
            #TODO: eliminate bad box

        if cfg.NUM_ANCHORS_LEVEL3 != 0:
            #-------------------------
            # level 3
            #-------------------------
            # (wxhxlxnum_anchors, 6)
            rpn_bbox_pred_reshape_level3 = rpn_bbox_pred_level3[i].view(
                -1, 6)[inds_inside_level3, :]
            # (wxhxlxnum_anchors)
            scores_level3 = rpn_cls_prob_level3[i, 1, :, :, :, :].view(
                -1, 1)[inds_inside_level3, :]

            # anchors is in the scene coord
            # return the proposals on scene coord.
            proposals_level3 = bbox_transform_inv(
                anchors_level3, rpn_bbox_pred_reshape_level3)
            proposals_level3 = clip_boxes(proposals_level3, scene_info[:3])
            #TODO: eliminate bad box

        #------------------------
        # combine
        #------------------------
        proposals_combined_list = []
        scores_combined_list = []
        levelInds_combined_list = []
        if cfg.NUM_ANCHORS_LEVEL1 != 0:
            proposals_combined_list.append(proposals_level1)
            scores_combined_list.append(scores_level1)
            levelInds_combined_list.append(torch.ones_like(scores_level1))

        if cfg.NUM_ANCHORS_LEVEL2 != 0:
            proposals_combined_list.append(proposals_level2)
            scores_combined_list.append(scores_level2)
            levelInds_combined_list.append(torch.ones_like(scores_level2) * 2)

        if cfg.NUM_ANCHORS_LEVEL3 != 0:
            proposals_combined_list.append(proposals_level3)
            scores_combined_list.append(scores_level3)
            levelInds_combined_list.append(torch.ones_like(scores_level3) * 3)

        proposals = torch.cat(proposals_combined_list, 0)
        scores = torch.cat(scores_combined_list, 0)[:, 0]
        levelInds = torch.cat(levelInds_combined_list, 0)[:, 0]

        #proposals = proposals_level2
        #scores = scores_level2[:,0]

        #box= np.stack([np.concatenate([proposals[5222].cpu().numpy(), np.ones(1)], 0),
        #               np.concatenate([proposals[5228].cpu().numpy(), np.ones(1)], 0),
        #               np.concatenate([proposals[5229].cpu().numpy(), np.ones(1)], 0),
        #               np.concatenate([proposals[5319].cpu().numpy(), np.ones(1)], 0),
        #               np.concatenate([proposals[5356].cpu().numpy(), np.ones(1)], 0),
        #               np.concatenate([proposals[5393].cpu().numpy(), np.ones(1)], 0)], 0)
        #visualize('./vis', 'pos_proposal', data=None, bbox=box)

        #box= np.stack([np.concatenate([proposals[5222].cpu().numpy(), np.ones(1)], 0),
        #               np.concatenate([proposals[5223].cpu().numpy(), np.ones(1)], 0),
        #               np.concatenate([proposals[5224].cpu().numpy(), np.ones(1)], 0),
        #               np.concatenate([proposals[5225].cpu().numpy(), np.ones(1)], 0),
        #               np.concatenate([proposals[5226].cpu().numpy(), np.ones(1)], 0),
        #               np.concatenate([proposals[5227].cpu().numpy(), np.ones(1)], 0)], 0)
        #visualize('./vis', 'neg_proposal', data=None, bbox=box)

        # pick up the top region proposals
        scores, order = scores.sort(descending=True)
        #ipdb.set_trace()
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
            scores = scores[:pre_nms_topN].view(-1, 1)
        proposals = proposals[order, :]
        levelInds = levelInds[order]

        # Non-maximal supprression
        keep = nms(proposals, nms_thresh)

        # pick up the top region proposals after NMS
        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        proposals = proposals[keep, :]
        scores = scores[keep, ]
        levelInds = levelInds[keep, ]

        # support more than 1 scene
        proposals_batch.append(proposals)
        scores_batch.append(scores)
        levelInds_batch.append(levelInds)

    return proposals_batch, scores_batch, levelInds_batch
Exemplo n.º 22
0
def proposal_layer(rpn_cls_prob_level1, rpn_bbox_pred_level1, all_anchors_level1,  
                   rpn_cls_prob_level2, rpn_bbox_pred_level2, all_anchors_level2,
                   rpn_cls_prob_level3, rpn_bbox_pred_level3, all_anchors_level3,
                   scene_info, cfg_key,
                   anchors_filter_level1, anchors_filter_level2, anchors_filter_level3):
    """

    :param rpn_cls_prob <Tensor>: (1, 2, H, W, L, num_anchors)
    :param rpn_bbox_pred <Tensor>: (1, H, W, L, num_anchorsx6), coord. of boxes
    :param scene_info: [64, 32, 64] height, width, length
    :param cfg_key: "TRAIN" or "TEST"
    :param anchors: (NUM_ANCHORSxWxHxL, 6)
    :return: rois in feature map
    """

    # Number of top scoring boxes to keep before apply NMS to RPN proposals
    pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
    # Number of top scoring boxes to keep after applying NMS to RPN proposals
    post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
    # NMS threshold used on RPN proposals
    nms_thresh = cfg[cfg_key].RPN_NMS_THRESH

    # only keep anchors inside the image
    _allowed_border = cfg.ALLOW_BORDER
    if cfg.NUM_ANCHORS_LEVEL1 != 0:
        inds_inside_level1 = np.where(
            (all_anchors_level1[:, 0] >= -_allowed_border) &
            (all_anchors_level1[:, 1] >= -_allowed_border) &
            (all_anchors_level1[:, 2] >= -_allowed_border) &
            (all_anchors_level1[:, 3] < scene_info[0] + _allowed_border) &  #width
            (all_anchors_level1[:, 4] < scene_info[1] + _allowed_border) &  #height
            (all_anchors_level1[:, 5] < scene_info[2] + _allowed_border)   #length
        )[0]

        if anchors_filter_level1 is not None:
            if len(anchors_filter_level1) == 0:
                anchors_filter_level1 = [0]
            inds_inside_level1 = inds_inside_level1[anchors_filter_level1]

        anchors_level1 = all_anchors_level1[inds_inside_level1, :]

    if cfg.NUM_ANCHORS_LEVEL2 != 0:
        inds_inside_level2 = np.where(
            (all_anchors_level2[:, 0] >= -_allowed_border) &
            (all_anchors_level2[:, 1] >= -_allowed_border) &
            (all_anchors_level2[:, 2] >= -_allowed_border) &
            (all_anchors_level2[:, 3] < scene_info[0] + _allowed_border) &  #width
            (all_anchors_level2[:, 4] < scene_info[1] + _allowed_border) &  #height
            (all_anchors_level2[:, 5] < scene_info[2] + _allowed_border)   #length
        )[0]

        if anchors_filter_level2 is not None:
            if len(anchors_filter_level2) == 0:
                anchors_filter_level2 = [0]
            inds_inside_level2 = inds_inside_level2[anchors_filter_level2]

        anchors_level2 = all_anchors_level2[inds_inside_level2, :]

    if cfg.NUM_ANCHORS_LEVEL3 != 0:
        inds_inside_level3 = np.where(
            (all_anchors_level3[:, 0] >= -_allowed_border) &
            (all_anchors_level3[:, 1] >= -_allowed_border) &
            (all_anchors_level3[:, 2] >= -_allowed_border) &
            (all_anchors_level3[:, 3] < scene_info[0] + _allowed_border) &  #width
            (all_anchors_level3[:, 4] < scene_info[1] + _allowed_border) &  #height
            (all_anchors_level3[:, 5] < scene_info[2] + _allowed_border)   #length
        )[0]

        if anchors_filter_level3 is not None:
            if len(anchors_filter_level3) == 0:
                anchors_filter_level3 = [0]
            inds_inside_level3 = inds_inside_level3[anchors_filter_level3]

        anchors_level3 = all_anchors_level3[inds_inside_level3, :]

    # Get the scores and the bounding boxes
    proposals_batch = []
    scores_batch = []
    levelInds_batch = []
    for i in range(cfg.BATCH_SIZE):
        if cfg.NUM_ANCHORS_LEVEL1 != 0:
            #-------------------------
            # level 1
            #-------------------------
            # (wxhxlxnum_anchors, 6)
            rpn_bbox_pred_reshape_level1 = rpn_bbox_pred_level1[i].view(-1, 6)[inds_inside_level1, :]
             # (wxhxlxnum_anchors)
            scores_level1 = rpn_cls_prob_level1[i, 1, :, :, :, :].view(-1, 1)[inds_inside_level1, :]

            # anchors is in the scene coord
            # return the proposals on scene coord.
            proposals_level1 = bbox_transform_inv(anchors_level1, rpn_bbox_pred_reshape_level1)
            proposals_level1 = clip_boxes(proposals_level1, scene_info[:3])

        if cfg.NUM_ANCHORS_LEVEL2 != 0:
            #-------------------------
            # level 2
            #-------------------------
            # (wxhxlxnum_anchors, 6)
            rpn_bbox_pred_reshape_level2 = rpn_bbox_pred_level2[i].view(-1, 6)[inds_inside_level2, :]
             # (wxhxlxnum_anchors)
            scores_level2 = rpn_cls_prob_level2[i, 1, :, :, :, :].view(-1, 1)[inds_inside_level2, :]

            # anchors is in the scene coord
            # return the proposals on scene coord.
            proposals_level2 = bbox_transform_inv(anchors_level2, rpn_bbox_pred_reshape_level2)
            proposals_level2 = clip_boxes(proposals_level2, scene_info[:3])
            #TODO: eliminate bad box

        if cfg.NUM_ANCHORS_LEVEL3 != 0:
            #-------------------------
            # level 3
            #-------------------------
            # (wxhxlxnum_anchors, 6)
            rpn_bbox_pred_reshape_level3 = rpn_bbox_pred_level3[i].view(-1, 6)[inds_inside_level3, :]
             # (wxhxlxnum_anchors)
            scores_level3 = rpn_cls_prob_level3[i, 1, :, :, :, :].view(-1, 1)[inds_inside_level3, :]

            # anchors is in the scene coord
            # return the proposals on scene coord.
            proposals_level3 = bbox_transform_inv(anchors_level3, rpn_bbox_pred_reshape_level3)
            proposals_level3 = clip_boxes(proposals_level3, scene_info[:3])
            #TODO: eliminate bad box

        #------------------------
        # combine
        #------------------------
        proposals_combined_list = []
        scores_combined_list = []
        levelInds_combined_list = []
        if cfg.NUM_ANCHORS_LEVEL1 != 0:
            proposals_combined_list.append(proposals_level1)
            scores_combined_list.append(scores_level1)
            levelInds_combined_list.append(torch.ones_like(scores_level1))

        if cfg.NUM_ANCHORS_LEVEL2 !=0:
            proposals_combined_list.append(proposals_level2)
            scores_combined_list.append(scores_level2)
            levelInds_combined_list.append(torch.ones_like(scores_level2)*2)

        if cfg.NUM_ANCHORS_LEVEL3 !=0:
            proposals_combined_list.append(proposals_level3)
            scores_combined_list.append(scores_level3)
            levelInds_combined_list.append(torch.ones_like(scores_level3)*3)

        proposals = torch.cat(proposals_combined_list, 0)
        scores = torch.cat(scores_combined_list, 0)[:,0]
        levelInds = torch.cat(levelInds_combined_list, 0)[:,0]

        #proposals = proposals_level2
        #scores = scores_level2[:,0]


        #box= np.stack([np.concatenate([proposals[5222].cpu().numpy(), np.ones(1)], 0),
        #               np.concatenate([proposals[5228].cpu().numpy(), np.ones(1)], 0),
        #               np.concatenate([proposals[5229].cpu().numpy(), np.ones(1)], 0),
        #               np.concatenate([proposals[5319].cpu().numpy(), np.ones(1)], 0),
        #               np.concatenate([proposals[5356].cpu().numpy(), np.ones(1)], 0),
        #               np.concatenate([proposals[5393].cpu().numpy(), np.ones(1)], 0)], 0)
        #visualize('./vis', 'pos_proposal', data=None, bbox=box)

        #box= np.stack([np.concatenate([proposals[5222].cpu().numpy(), np.ones(1)], 0),
        #               np.concatenate([proposals[5223].cpu().numpy(), np.ones(1)], 0),
        #               np.concatenate([proposals[5224].cpu().numpy(), np.ones(1)], 0),
        #               np.concatenate([proposals[5225].cpu().numpy(), np.ones(1)], 0),
        #               np.concatenate([proposals[5226].cpu().numpy(), np.ones(1)], 0),
        #               np.concatenate([proposals[5227].cpu().numpy(), np.ones(1)], 0)], 0)
        #visualize('./vis', 'neg_proposal', data=None, bbox=box)

        # pick up the top region proposals
        scores, order = scores.sort(descending=True)
        #ipdb.set_trace()
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
            scores = scores[:pre_nms_topN].view(-1, 1)
        proposals = proposals[order, :]
        levelInds = levelInds[order]

        # Non-maximal supprression
        keep = nms(proposals, nms_thresh)

        # pick up the top region proposals after NMS
        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        proposals = proposals[keep, :]
        scores = scores[keep,]
        levelInds = levelInds[keep,]

        # support more than 1 scene
        proposals_batch.append(proposals)
        scores_batch.append(scores)
        levelInds_batch.append(levelInds)

    return proposals_batch, scores_batch, levelInds_batch
Exemplo n.º 23
0
    def validation(self, index, mode):
        #####################################
        # Preparation
        #####################################
        #-------------------------------
        # metric
        #-------------------------------
        mAP_RPN = Evaluate_metric(1, overlap_threshold=cfg.MAP_THRESH)
        mAP_CLASSIFICATION = Evaluate_metric(cfg.NUM_CLASSES, ignore_class=[0], overlap_threshold=cfg.MAP_THRESH)
        mAP_MASK = Evaluate_metric(cfg.NUM_CLASSES, ignore_class=[0], overlap_threshold=cfg.MAP_THRESH)
        if mode == 'val':
            data_loader = self.dataloader_val
            data_logger = self.logger_val
        elif mode == 'trainval':
            data_loader = self.dataloader_trainval
            data_logger = self.logger_trainval

        ####################################
        # Accumulate data
        ####################################
        timer = Timer()
        timer.tic()
        print('starting validation....')
        for iter, blobs in enumerate(tqdm(data_loader)):
            # if no box: skip
            if len(blobs['gt_box']) == 0:
                continue

            if cfg.USE_IMAGES:
                grid_shape = blobs['data'].shape[-3:]
                projection_helper = ProjectionHelper(cfg.INTRINSIC, cfg.PROJ_DEPTH_MIN, cfg.PROJ_DEPTH_MAX, cfg.DEPTH_SHAPE, grid_shape, cfg.VOXEL_SIZE)
                proj_mapping = [projection_helper.compute_projection(d.cuda(), c.cuda(), t.cuda()) for d, c, t in zip(blobs['nearest_images']['depths'][0], blobs['nearest_images']['poses'][0], blobs['nearest_images']['world2grid'][0])]

                if None in proj_mapping: #invalid sample
                    continue
                
                blobs['proj_ind_3d'] = []
                blobs['proj_ind_2d'] = []
                proj_mapping0, proj_mapping1 = zip(*proj_mapping)
                blobs['proj_ind_3d'].append(torch.stack(proj_mapping0))
                blobs['proj_ind_2d'].append(torch.stack(proj_mapping1))

            self.net.forward(blobs, 'TEST', [])
            #--------------------------------------
            # RPN: loss, metric 
            #--------------------------------------
            if cfg.USE_RPN:
                # (n, 6)
                gt_box = blobs['gt_box'][0].numpy()[:, 0:6]
                gt_box_label = np.zeros(gt_box.shape[0])

                try:
                    pred_box_num = (self.net._predictions['roi_scores'][0][:, 0] > cfg.ROI_THRESH).nonzero().size(0)
                    pred_box = self.net._predictions['rois'][0].cpu().numpy()[:pred_box_num]
                    pred_box_label = np.zeros(pred_box_num) 
                    pred_box_score = self.net._predictions['roi_scores'][0].cpu().numpy()[:pred_box_num, 0]
                except:
                    pred_box = self.net._predictions['rois'][0].cpu().numpy()[:1]
                    pred_box_label = np.zeros(1)
                    pred_box_score = self.net._predictions['roi_scores'][0].cpu().numpy()[:1, 0]

                #evaluation metric 
                mAP_RPN.evaluate(pred_box,
                                 pred_box_label,
                                 pred_box_score,
                                 gt_box,
                                 gt_box_label)

            #--------------------------------------
            # Classification: loss, metric 
            #--------------------------------------
            if cfg.USE_CLASS:
                # groundtruth
                gt_box = blobs['gt_box'][0].numpy()[:, 0:6]
                gt_class = blobs['gt_box'][0][:, 6].numpy()

                # predictions
                pred_class = self.net._predictions['cls_pred'].data.cpu().numpy()

                # only predictions['rois'] is list and is Tensor / others are no list and Variable
                rois = self.net._predictions['rois'][0].cpu()
                box_reg_pre = self.net._predictions["bbox_pred"].data.cpu().numpy()
                box_reg = np.zeros((box_reg_pre.shape[0], 6))
                pred_conf_pre = self.net._predictions['cls_prob'].data.cpu().numpy()
                pred_conf = np.zeros((pred_conf_pre.shape[0]))


                for pred_ind in range(pred_class.shape[0]):
                    box_reg[pred_ind, :] = box_reg_pre[pred_ind, pred_class[pred_ind]*6:(pred_class[pred_ind]+1)*6]
                    pred_conf[pred_ind] = pred_conf_pre[pred_ind, pred_class[pred_ind]]

                pred_box = bbox_transform_inv(rois, torch.from_numpy(box_reg).float())
                pred_box = clip_boxes(pred_box, self.net._scene_info[:3]).numpy()

                # pickup
                sort_index = []
                for conf_index in range(pred_conf.shape[0]):
                    if pred_conf[conf_index] > cfg.CLASS_THRESH:
                        sort_index.append(True)
                    else:
                        sort_index.append(False)

                # eliminate bad box
                for idx, box in enumerate(pred_box):
                    if round(box[0]) >= round(box[3]) or round(box[1]) >= round(box[4]) or round(box[2]) >= round(box[5]):
                        sort_index[idx] = False

                if len(pred_box[sort_index]) == 0:
                    print('no pred box')

                if iter < cfg.VAL_NUM:
                    os.makedirs('{}/{}'.format(cfg.VAL_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), exist_ok=True)
                    np.save('{}/{}/pred_class'.format(cfg.VAL_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), pred_class)
                    np.save('{}/{}/pred_conf'.format(cfg.VAL_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), pred_conf)
                    np.save('{}/{}/pred_box'.format(cfg.VAL_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), pred_box)
                    np.save('{}/{}/scene'.format(cfg.VAL_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), np.where(blobs['data'][0,0].numpy() <= 1, 1, 0))
                    np.save('{}/{}/gt_class'.format(cfg.VAL_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), gt_class)
                    np.save('{}/{}/gt_box'.format(cfg.VAL_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), gt_box)

                mAP_CLASSIFICATION.evaluate(
                        pred_box[sort_index],
                        pred_class[sort_index],
                        pred_conf[sort_index],
                        gt_box,
                        gt_class)

            #--------------------------------------
            # MASK: loss, metric 
            #--------------------------------------
            if cfg.USE_MASK:
                # gt data
                gt_box = blobs['gt_box'][0].numpy()[:, 0:6]
                gt_class = blobs['gt_box'][0][:, 6].numpy()
                gt_mask = blobs['gt_mask'][0]

                pred_class = self.net._predictions['cls_pred'].data.cpu().numpy()
                pred_conf = np.zeros((pred_class.shape[0]))
                for pred_ind in range(pred_class.shape[0]):
                    pred_conf[pred_ind] = self.net._predictions['cls_prob'].data.cpu().numpy()[pred_ind, pred_class.data[pred_ind]]

                # pickup
                sort_index = pred_conf > cfg.CLASS_THRESH

                # eliminate bad box
                for idx, box in enumerate(pred_box):
                    if round(box[0]) >= round(box[3]) or round(box[1]) >= round(box[4]) or round(box[2]) >= round(box[5]):
                        sort_index[idx] = False

                pred_mask = []
                mask_ind = 0
                for ind, cls in enumerate(pred_class):
                    if sort_index[ind]:
                        mask = self.net._predictions['mask_pred'][0][mask_ind][0][cls].data.cpu().numpy()
                        mask = np.where(mask >=cfg.MASK_THRESH, 1, 0).astype(np.float32)
                        pred_mask.append(mask)
                        mask_ind += 1

                if iter < cfg.VAL_NUM: 
                    pickle.dump(pred_mask, open('{}/{}/pred_mask'.format(cfg.VAL_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), 'wb'))
                    pickle.dump(sort_index, open('{}/{}/pred_mask_index'.format(cfg.VAL_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), 'wb'))
                    pickle.dump(gt_mask, open('{}/{}/gt_mask'.format(cfg.VAL_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), 'wb'))

                mAP_MASK.evaluate_mask(
                        pred_box[sort_index],
                        pred_class[sort_index],
                        pred_conf[sort_index],
                        pred_mask,
                        gt_box,
                        gt_class, 
                        gt_mask, 
                        self.net._scene_info)

            self.net.delete_intermediate_states()
        timer.toc()
        print('It took {:.3f}s for Validation on chunks'.format(timer.total_time()))

        ###################################
        # Summary
        ###################################
        if cfg.USE_RPN:
            mAP_RPN.finalize()
            print('AP of RPN: {}'.format(mAP_RPN.mAP()))
            data_logger.scalar_summary('AP_ROI', mAP_RPN.mAP(), index)

        if cfg.USE_CLASS:
            mAP_CLASSIFICATION.finalize()
            print('mAP of CLASSIFICATION: {}'.format(mAP_CLASSIFICATION.mAP()))
            for class_ind in range(cfg.NUM_CLASSES):
                if class_ind not in mAP_CLASSIFICATION.ignore_class:
                    print('class {}: {}'.format(class_ind, mAP_CLASSIFICATION.AP(class_ind)))
            data_logger.scalar_summary('mAP_CLASSIFICATION', mAP_CLASSIFICATION.mAP(), index)

        if cfg.USE_MASK:
            mAP_MASK.finalize()
            print('mAP of mask: {}'.format(mAP_MASK.mAP()))
            for class_ind in range(cfg.NUM_CLASSES):
                if class_ind not in mAP_MASK.ignore_class:
                    print('class {}: {}'.format(class_ind, mAP_MASK.AP(class_ind)))
            data_logger.scalar_summary('mAP_MASK', mAP_MASK.mAP(), index)
    #                            + torch.FloatTensor(bbox_normalize_means).to(device)

    # box_deltas = box_deltas.view(1,-1,4)
    # pred_boxes = bbox_transform_inv_3d(tubes, box_deltas, 1)
    # pred_boxes = clip_boxes_3d(pred_boxes, im_info.data, 1)
    # pred_boxes = pred_boxes.view(1,rois.size(1),1,6)

    box_deltas_s = bbox_pred_s.view(-1, 4) * torch.FloatTensor(bbox_normalize_stds_s).to(device) \
                               + torch.FloatTensor(bbox_normalize_means_s).to(device)
    print('box_deltas_s :', box_deltas_s.shape)
    box_deltas_s = box_deltas_s.view(16,10,4)

    print(im_info.data)
    im_info_s = torch.Tensor([[112,112]] * 16).to(device)
    print('im_info_s.shape :',im_info_s)
    pred_boxes_s = bbox_transform_inv(rois, bbox_pred_s, 16)
    pred_boxes_s = clip_boxes(pred_boxes_s,im_info_s , 16)
    pred_boxes_s = pred_boxes_s.view(16,rois.size(1),1,4)

    print('pred_boxes_s :', pred_boxes_s.shape)
    print('pred_boxes_s.shape :', pred_boxes_s)
    # print('bbox_pred.shape :',pred_boxes.shape)
    
    # print(scores)
    # pred_boxes = pred_boxes.data
    # print(pred_boxes_s)
    colors = [ (255,0,0), (0,255,0), (0,0,255)]
    clips2 = clips2.squeeze().permute(1,2,3,0)

    print('rois.shape  :',rois.shape )
    for i in range(16): # frame