def demo(sess, net, image_name, memory_storex, memory_storey, kitti_memory_0323, AN, sess2): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image im = cv2.imread(image_name) im = cv2.resize(im, (1242, 375)) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, bbox_pred, _, rois, fc = im_detect(sess, net, im, memory_storex, memory_storey) timer.toc() print('Detection took {:.3f}s for {:d} object proposals'.format( timer.total_time, bbox_pred.shape[0])) # Visualize detections for each class CONF_THRESH = 0.1 NMS_THRESH = 0.1 im_shape = im.shape[:2] box_deltas = bbox_pred pred_boxes = bbox_transform_inv(rois, box_deltas) boxes = clip_boxes(pred_boxes, im_shape) # show.vis_detections(image_name, scores, boxes, dis_pre, fc, NMS_THRESH, CONF_THRESH) show.vis_detections(image_name, scores, boxes, fc, kitti_memory_0323, AN, sess2, NMS_THRESH, CONF_THRESH)
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): """A simplified version compared to fast/er RCNN For details please see the technical report """ if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') if cfg_key == "TRAIN": pre_nms_topN = cfg.FLAGS.rpn_train_pre_nms_top_n post_nms_topN = cfg.FLAGS.rpn_train_post_nms_top_n nms_thresh = cfg.FLAGS.rpn_train_nms_thresh else: pre_nms_topN = cfg.FLAGS.rpn_test_pre_nms_top_n post_nms_topN = cfg.FLAGS.rpn_test_post_nms_top_n nms_thresh = cfg.FLAGS.rpn_test_nms_thresh # 从config文件读取配置 # post_nms_topN(执行NMS算法后proposal的数量) # nms_thresh(NMS阈值) # 学习参数:rpn_bbox_pred # 原始anchor给出的proposal通过学习参数转换为与gt接近的边框,裁剪掉超出图片的部分 im_info = im_info[0] # Get the scores and bounding boxes scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) scores = scores.reshape((-1, 1)) proposals = bbox_transform_inv(anchors, rpn_bbox_pred) # bbox_transform_inv: # 每个anchor的边框学习之前得到的偏移量(这里的偏移量就是需要学习的rpn_bbox_pred)做位移和缩放,获取最终的预测边框 # 也就是原始proposal A,通过学习rpn_bbox_pred中的参数,得到一个与ground truth G相近的预测边框G' proposals = clip_boxes(proposals, im_info[:2]) # clip_boxes: # 裁剪掉超出原始图片边框的部分 # Pick the top region proposals order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # Non-maximal suppression keep = nms(np.hstack((proposals, scores)), nms_thresh) # Pick th top region proposals after NMS # 执行NMS算法,获取最终的proposals if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Only support single image as input batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob, scores
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): """A simplified version compared to fast/er RCNN For details please see the technical report """ if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') if cfg_key == "TRAIN": # 预处理,非极大值抑制前,保留12000个bbox pre_nms_topN = cfg.FLAGS.rpn_train_pre_nms_top_n # 非极大值抑制后,选取2000个bbox post_nms_topN = cfg.FLAGS.rpn_train_post_nms_top_n # 非极大值抑制阈值0.7,与最大概率的bbox IoU超过0.7的bbox会被丢弃 nms_thresh = cfg.FLAGS.rpn_train_nms_thresh else: # 预处理,非极大值抑制前,6000个bbox pre_nms_topN = cfg.FLAGS.rpn_test_pre_nms_top_n # 非极大值抑制后,选取300个bbox post_nms_topN = cfg.FLAGS.rpn_test_post_nms_top_n # nms阈值0.7 nms_thresh = cfg.FLAGS.rpn_test_nms_thresh im_info = im_info[0] # Get the scores and bounding boxes scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) scores = scores.reshape((-1, 1)) proposals = bbox_transform_inv(anchors, rpn_bbox_pred) proposals = clip_boxes(proposals, im_info[:2]) # Pick the top region proposals order = scores.ravel().argsort()[::-1] # 按照分类得分,取前pre_nms_topN个bbox(12000/6000) if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 做nms # Non-maximal suppression keep = nms(np.hstack((proposals, scores)), nms_thresh) # nms后保留post_nms_topN个bbox(2000/300) # Pick th top region proposals after NMS if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Only support single image as input batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) # 返回bbox以及分类得分 return blob, scores
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): """A simplified version compared to fast/er RCNN For details please see the technical report """ if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') # train时,根据得分顺序,取前12000个anchor,再NMS,最后取前面2000个 # test时,变为6000和300 if cfg_key == "TRAIN": pre_nms_topN = cfg.FLAGS.rpn_train_pre_nms_top_n post_nms_topN = cfg.FLAGS.rpn_train_post_nms_top_n nms_thresh = cfg.FLAGS.rpn_train_nms_thresh else: pre_nms_topN = cfg.FLAGS.rpn_test_pre_nms_top_n post_nms_topN = cfg.FLAGS.rpn_test_post_nms_top_n nms_thresh = cfg.FLAGS.rpn_test_nms_thresh im_info = im_info[0] # Get the scores and bounding boxes scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = rpn_bbox_pred.reshape( (-1, 4)) #每一行对应于一个anchor,每9行对应于一个点产生的9个anchor scores = scores.reshape((-1, 1)) #每一行对应于一个anchor,每9行对应于一个点产生的9个anchor # 使用经过rpn网络层后生成的rpn_box_prob把anchor位置进行第一次修正 proposals = bbox_transform_inv(anchors, rpn_bbox_pred) # Clip boxes to image boundaries. proposals = clip_boxes(proposals, im_info[:2]) # Pick the top region proposals # argsort()函数时将array中的元素从小到大排列,提取其对应的index,然后输出到y order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # Non-maximal suppression keep = nms(np.hstack((proposals, scores)), nms_thresh) # Pick th top region proposals after NMS if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Only support single image as input batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) # 返回的blob中多加了一列 return blob, scores
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): """A simplified version compared to fast/er RCNN For details please see the technical report """ if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') if cfg_key == "TRAIN": pre_nms_topN = cfg.FLAGS.rpn_train_pre_nms_top_n #('rpn_train_pre_nms_top_n', 12000, post_nms_topN = cfg.FLAGS.rpn_train_post_nms_top_n #rpn_train_post_nms_top_n', 2000 nms_thresh = cfg.FLAGS.rpn_train_nms_thresh #'rpn_train_nms_thresh', 0.7 else: pre_nms_topN = cfg.FLAGS.rpn_test_pre_nms_top_n post_nms_topN = cfg.FLAGS.rpn_test_post_nms_top_n nms_thresh = cfg.FLAGS.rpn_test_nms_thresh im_info = im_info[0] # Get the scores and bounding boxes scores = rpn_cls_prob[:, :, :, num_anchors:] #[,H,W,2*num_anchors]---#18个channel 按照(bg,fg)这里只取了fg的9个channel((1,9,h,w)) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) #(, height, width, A * 4) scores = scores.reshape((-1, 1)) ## #计算经过偏移后的预测坐标 proposals = bbox_transform_inv(anchors, rpn_bbox_pred) # 2. clip predicted boxes to image 将预测框剪切到图像范围内 proposals = clip_boxes(proposals, im_info[:2]) # Pick the top region proposals # ravel()平铺扁平化 argsort()函数是将x中的元素从小到大排列,提取其对应的index(索引),然后输出 [::-1]反向取索引 order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] # 取出值scores最大的 pre_nms_topN个 proposals = proposals[order, :] scores = scores[order] # Non-maximal suppression keep = nms(np.hstack((proposals, scores)), nms_thresh) # Pick th top region proposals after NMS if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Only support single image as input batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob, scores
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, anchors_dis, num_anchors): """A simplified version compared to fast/er RCNN For details please see the technical report """ if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') if cfg_key == "TRAIN": pre_nms_topN = cfg.FLAGS.rpn_train_pre_nms_top_n post_nms_topN = cfg.FLAGS.rpn_train_post_nms_top_n nms_thresh = cfg.FLAGS.rpn_train_nms_thresh else: pre_nms_topN = cfg.FLAGS.rpn_test_pre_nms_top_n post_nms_topN = cfg.FLAGS.rpn_test_post_nms_top_n nms_thresh = cfg.FLAGS.rpn_test_nms_thresh im_info = im_info[0] # Get the scores and bounding boxes scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) scores = scores.reshape((-1, 1)) # print("anchors_dis", anchors_dis.shape) # anc (16848, 4) (16848, 4) proposals = bbox_transform_inv(anchors, rpn_bbox_pred) proposals = clip_boxes(proposals, im_info[:2]) # Pick the top region proposals order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] proposals_dis = anchors_dis[order, :] scores = scores[order] # Non-maximal suppression keep = nms(np.hstack((proposals, scores)), nms_thresh) # Pick th top region proposals after NMS if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] proposals_dis = proposals_dis[keep, :] scores = scores[keep] # Only support single image as input proposals.shape[0]:2000 blob.shape:[2000, 8] batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False), proposals_dis.astype(np.float32, copy=False))) return blob, scores
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') if cfg_key == "TRAIN": pre_nms_topN = cfg.FLAGS.rpn_train_pre_nms_top_n #12000 post_nms_topN = cfg.FLAGS.rpn_train_post_nms_top_n #2000 nms_thresh = cfg.FLAGS.rpn_train_nms_thresh #0.7 else: pre_nms_topN = cfg.FLAGS.rpn_test_pre_nms_top_n #6000 post_nms_topN = cfg.FLAGS.rpn_test_post_nms_top_n #300 nms_thresh = cfg.FLAGS.rpn_test_nms_thresh #0.7 # 因为我们的输入是(1,3)维的 im_info = im_info[0] # 1 * H * W * 9 其他维度不变,取18元素后9个为前景得分 scores = rpn_cls_prob[:, :, :, num_anchors:] # 9 # 9WH * 4 个偏移量 rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) # 9WH 个得分 scores = scores.reshape((-1, 1)) # 通过偏移量对anchor进行调整,得到proposals proposals = bbox_transform_inv(anchors, rpn_bbox_pred) # 修剪proposal,将超出边界的proposal修剪到图片范围内 proposals = clip_boxes(proposals, im_info[:2]) # im_info[:2] 表示 宽和高 # 得分从大到小排序,存储的为坐标 order = scores.ravel().argsort()[::-1] # 根据坐标,筛选出前12000个proposals和scores if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 非极大值抑制 keep = nms(np.hstack((proposals, scores)), nms_thresh) # np.hstack((proposals, scores) [x1,y1,x2,y2,score] # 因为keep已经排过序了,直接取前2000个 if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # 给proposal叠加一个维度,第一列全是0.0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob, scores
def proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, im_info, _feat_stride, anchors, num_anchors): """A layer that just selects the top region proposals without using non-maximal suppression, For details please see the technical report """ # 300 "Only useful when TEST.MODE is 'top', specifies the number of top proposals to select" rpn_top_n = cfg.FLAGS.rpn_top_n im_info = im_info[0] scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) scores = scores.reshape((-1, 1)) length = scores.shape[0] if length < rpn_top_n: # Random selection, maybe unnecessary and loses good proposals # But such case rarely happens top_inds = npr.choice(length, size=rpn_top_n, replace=True) else: top_inds = scores.argsort(0)[::-1] #使用方法[start: end : step ],也就是[ 起始下标 : 终止下标 : 间隔距离 ] top_inds = top_inds[:rpn_top_n] top_inds = top_inds.reshape(rpn_top_n, ) # Do the selection here anchors = anchors[top_inds, :] rpn_bbox_pred = rpn_bbox_pred[top_inds, :] scores = scores[top_inds] # Convert anchors into proposals via bbox transformations # #使用经过rpn网络层后生成的rpn_box_prob把anchor位置进行第一次修正 proposals = bbox_transform_inv(anchors, rpn_bbox_pred) # Clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob, scores
def proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, im_info, _feat_stride, anchors, num_anchors): # 只保留300个rois rpn_top_n = cfg.FLAGS.rpn_top_n # 300 # 因为我们的输入是(1,3)维的 im_info = im_info[0] # 1 * H * W * 9 其他维度不变,取18元素后9个为前景得分 scores = rpn_cls_prob[:, :, :, num_anchors:] # 9WH * 4 个偏移量 rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) # 9WH 个得分 scores = scores.reshape((-1, 1)) length = scores.shape[0] # 9HW if length < rpn_top_n: # 9HW 不足 300 # 随机挑选,知道凑齐300个 top_inds = npr.choice(length, size=rpn_top_n, replace=True) else: # 挑选前300个得分 top_inds = scores.argsort(0)[::-1] top_inds = top_inds[:rpn_top_n] top_inds = top_inds.reshape(rpn_top_n, ) # 拿出前300个anchors,bbox,scores anchors = anchors[top_inds, :] rpn_bbox_pred = rpn_bbox_pred[top_inds, :] scores = scores[top_inds] # 用bbox偏移量对anchors进行修正 proposals = bbox_transform_inv(anchors, rpn_bbox_pred) # 将超出边框的剪切掉 proposals = clip_boxes(proposals, im_info[:2]) # 给proposal叠加一个维度,第一列全是0.0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob, scores
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') if cfg_key == "TRAIN": pre_nms_topN = cfg.FLAGS.rpn_train_pre_nms_top_n post_nms_topN = cfg.FLAGS.rpn_train_post_nms_top_n nms_thresh = cfg.FLAGS.rpn_train_nms_thresh else: pre_nms_topN = cfg.FLAGS.rpn_test_pre_nms_top_n post_nms_topN = cfg.FLAGS.rpn_test_post_nms_top_n nms_thresh = cfg.FLAGS.rpn_test_nms_thresh im_info = im_info[0] # Get the scores and bounding boxes scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) scores = scores.reshape((-1, 1)) proposals = bbox_transform_inv(anchors, rpn_bbox_pred) proposals = clip_boxes(proposals, im_info[:2]) # Pick the top region proposals order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # Non-maximal suppression keep = nms(np.hstack((proposals, scores)), nms_thresh) # Pick th top region proposals after NMS if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Only support single image as input batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob, scores
def proposal_layer(rpn_cls_prob_level1, rpn_bbox_pred_level1, all_anchors_level1, rpn_cls_prob_level2, rpn_bbox_pred_level2, all_anchors_level2, rpn_cls_prob_level3, rpn_bbox_pred_level3, all_anchors_level3, scene_info, cfg_key, anchors_filter_level1, anchors_filter_level2, anchors_filter_level3): """ :param rpn_cls_prob <Tensor>: (1, 2, H, W, L, num_anchors) :param rpn_bbox_pred <Tensor>: (1, H, W, L, num_anchorsx6), coord. of boxes :param scene_info: [64, 32, 64] height, width, length :param cfg_key: "TRAIN" or "TEST" :param anchors: (NUM_ANCHORSxWxHxL, 6) :return: rois in feature map """ # Number of top scoring boxes to keep before apply NMS to RPN proposals pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N # Number of top scoring boxes to keep after applying NMS to RPN proposals post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N # NMS threshold used on RPN proposals nms_thresh = cfg[cfg_key].RPN_NMS_THRESH # only keep anchors inside the image _allowed_border = cfg.ALLOW_BORDER if cfg.NUM_ANCHORS_LEVEL1 != 0: inds_inside_level1 = np.where( (all_anchors_level1[:, 0] >= -_allowed_border) & (all_anchors_level1[:, 1] >= -_allowed_border) & (all_anchors_level1[:, 2] >= -_allowed_border) & (all_anchors_level1[:, 3] < scene_info[0] + _allowed_border) & #width (all_anchors_level1[:, 4] < scene_info[1] + _allowed_border) & #height (all_anchors_level1[:, 5] < scene_info[2] + _allowed_border) #length )[0] if anchors_filter_level1 is not None: if len(anchors_filter_level1) == 0: anchors_filter_level1 = [0] inds_inside_level1 = inds_inside_level1[anchors_filter_level1] anchors_level1 = all_anchors_level1[inds_inside_level1, :] if cfg.NUM_ANCHORS_LEVEL2 != 0: inds_inside_level2 = np.where( (all_anchors_level2[:, 0] >= -_allowed_border) & (all_anchors_level2[:, 1] >= -_allowed_border) & (all_anchors_level2[:, 2] >= -_allowed_border) & (all_anchors_level2[:, 3] < scene_info[0] + _allowed_border) & #width (all_anchors_level2[:, 4] < scene_info[1] + _allowed_border) & #height (all_anchors_level2[:, 5] < scene_info[2] + _allowed_border) #length )[0] if anchors_filter_level2 is not None: if len(anchors_filter_level2) == 0: anchors_filter_level2 = [0] inds_inside_level2 = inds_inside_level2[anchors_filter_level2] anchors_level2 = all_anchors_level2[inds_inside_level2, :] if cfg.NUM_ANCHORS_LEVEL3 != 0: inds_inside_level3 = np.where( (all_anchors_level3[:, 0] >= -_allowed_border) & (all_anchors_level3[:, 1] >= -_allowed_border) & (all_anchors_level3[:, 2] >= -_allowed_border) & (all_anchors_level3[:, 3] < scene_info[0] + _allowed_border) & #width (all_anchors_level3[:, 4] < scene_info[1] + _allowed_border) & #height (all_anchors_level3[:, 5] < scene_info[2] + _allowed_border) #length )[0] if anchors_filter_level3 is not None: if len(anchors_filter_level3) == 0: anchors_filter_level3 = [0] inds_inside_level3 = inds_inside_level3[anchors_filter_level3] anchors_level3 = all_anchors_level3[inds_inside_level3, :] # Get the scores and the bounding boxes proposals_batch = [] scores_batch = [] levelInds_batch = [] for i in range(cfg.BATCH_SIZE): if cfg.NUM_ANCHORS_LEVEL1 != 0: #------------------------- # level 1 #------------------------- # (wxhxlxnum_anchors, 6) rpn_bbox_pred_reshape_level1 = rpn_bbox_pred_level1[i].view(-1, 6)[inds_inside_level1, :] # (wxhxlxnum_anchors) scores_level1 = rpn_cls_prob_level1[i, 1, :, :, :, :].view(-1, 1)[inds_inside_level1, :] # anchors is in the scene coord # return the proposals on scene coord. proposals_level1 = bbox_transform_inv(anchors_level1, rpn_bbox_pred_reshape_level1) proposals_level1 = clip_boxes(proposals_level1, scene_info[:3]) if cfg.NUM_ANCHORS_LEVEL2 != 0: #------------------------- # level 2 #------------------------- # (wxhxlxnum_anchors, 6) rpn_bbox_pred_reshape_level2 = rpn_bbox_pred_level2[i].view(-1, 6)[inds_inside_level2, :] # (wxhxlxnum_anchors) scores_level2 = rpn_cls_prob_level2[i, 1, :, :, :, :].view(-1, 1)[inds_inside_level2, :] # anchors is in the scene coord # return the proposals on scene coord. proposals_level2 = bbox_transform_inv(anchors_level2, rpn_bbox_pred_reshape_level2) proposals_level2 = clip_boxes(proposals_level2, scene_info[:3]) #TODO: eliminate bad box if cfg.NUM_ANCHORS_LEVEL3 != 0: #------------------------- # level 3 #------------------------- # (wxhxlxnum_anchors, 6) rpn_bbox_pred_reshape_level3 = rpn_bbox_pred_level3[i].view(-1, 6)[inds_inside_level3, :] # (wxhxlxnum_anchors) scores_level3 = rpn_cls_prob_level3[i, 1, :, :, :, :].view(-1, 1)[inds_inside_level3, :] # anchors is in the scene coord # return the proposals on scene coord. proposals_level3 = bbox_transform_inv(anchors_level3, rpn_bbox_pred_reshape_level3) proposals_level3 = clip_boxes(proposals_level3, scene_info[:3]) #TODO: eliminate bad box #------------------------ # combine #------------------------ proposals_combined_list = [] scores_combined_list = [] levelInds_combined_list = [] if cfg.NUM_ANCHORS_LEVEL1 != 0: proposals_combined_list.append(proposals_level1) scores_combined_list.append(scores_level1) levelInds_combined_list.append(torch.ones_like(scores_level1)) if cfg.NUM_ANCHORS_LEVEL2 !=0: proposals_combined_list.append(proposals_level2) scores_combined_list.append(scores_level2) levelInds_combined_list.append(torch.ones_like(scores_level2)*2) if cfg.NUM_ANCHORS_LEVEL3 !=0: proposals_combined_list.append(proposals_level3) scores_combined_list.append(scores_level3) levelInds_combined_list.append(torch.ones_like(scores_level3)*3) proposals = torch.cat(proposals_combined_list, 0) scores = torch.cat(scores_combined_list, 0)[:,0] levelInds = torch.cat(levelInds_combined_list, 0)[:,0] #proposals = proposals_level2 #scores = scores_level2[:,0] #box= np.stack([np.concatenate([proposals[5222].cpu().numpy(), np.ones(1)], 0), # np.concatenate([proposals[5228].cpu().numpy(), np.ones(1)], 0), # np.concatenate([proposals[5229].cpu().numpy(), np.ones(1)], 0), # np.concatenate([proposals[5319].cpu().numpy(), np.ones(1)], 0), # np.concatenate([proposals[5356].cpu().numpy(), np.ones(1)], 0), # np.concatenate([proposals[5393].cpu().numpy(), np.ones(1)], 0)], 0) #visualize('./vis', 'pos_proposal', data=None, bbox=box) #box= np.stack([np.concatenate([proposals[5222].cpu().numpy(), np.ones(1)], 0), # np.concatenate([proposals[5223].cpu().numpy(), np.ones(1)], 0), # np.concatenate([proposals[5224].cpu().numpy(), np.ones(1)], 0), # np.concatenate([proposals[5225].cpu().numpy(), np.ones(1)], 0), # np.concatenate([proposals[5226].cpu().numpy(), np.ones(1)], 0), # np.concatenate([proposals[5227].cpu().numpy(), np.ones(1)], 0)], 0) #visualize('./vis', 'neg_proposal', data=None, bbox=box) # pick up the top region proposals scores, order = scores.sort(descending=True) #ipdb.set_trace() if pre_nms_topN > 0: order = order[:pre_nms_topN] scores = scores[:pre_nms_topN].view(-1, 1) proposals = proposals[order, :] levelInds = levelInds[order] # Non-maximal supprression keep = nms(proposals, nms_thresh) # pick up the top region proposals after NMS if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep,] levelInds = levelInds[keep,] # support more than 1 scene proposals_batch.append(proposals) scores_batch.append(scores) levelInds_batch.append(levelInds) return proposals_batch, scores_batch, levelInds_batch
def proposal_layer(rpn_cls_prob_level1, rpn_bbox_pred_level1, all_anchors_level1, rpn_cls_prob_level2, rpn_bbox_pred_level2, all_anchors_level2, rpn_cls_prob_level3, rpn_bbox_pred_level3, all_anchors_level3, scene_info, cfg_key, anchors_filter_level1, anchors_filter_level2, anchors_filter_level3): """ :param rpn_cls_prob <Tensor>: (1, 2, H, W, L, num_anchors) :param rpn_bbox_pred <Tensor>: (1, H, W, L, num_anchorsx6), coord. of boxes :param scene_info: [64, 32, 64] height, width, length :param cfg_key: "TRAIN" or "TEST" :param anchors: (NUM_ANCHORSxWxHxL, 6) :return: rois in feature map """ # Number of top scoring boxes to keep before apply NMS to RPN proposals pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N # Number of top scoring boxes to keep after applying NMS to RPN proposals post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N # NMS threshold used on RPN proposals nms_thresh = cfg[cfg_key].RPN_NMS_THRESH # only keep anchors inside the image _allowed_border = cfg.ALLOW_BORDER if cfg.NUM_ANCHORS_LEVEL1 != 0: inds_inside_level1 = np.where( (all_anchors_level1[:, 0] >= -_allowed_border) & (all_anchors_level1[:, 1] >= -_allowed_border) & (all_anchors_level1[:, 2] >= -_allowed_border) & (all_anchors_level1[:, 3] < scene_info[0] + _allowed_border) & #width (all_anchors_level1[:, 4] < scene_info[1] + _allowed_border) & #height (all_anchors_level1[:, 5] < scene_info[2] + _allowed_border) #length )[0] if anchors_filter_level1 is not None: if len(anchors_filter_level1) == 0: anchors_filter_level1 = [0] inds_inside_level1 = inds_inside_level1[anchors_filter_level1] anchors_level1 = all_anchors_level1[inds_inside_level1, :] if cfg.NUM_ANCHORS_LEVEL2 != 0: inds_inside_level2 = np.where( (all_anchors_level2[:, 0] >= -_allowed_border) & (all_anchors_level2[:, 1] >= -_allowed_border) & (all_anchors_level2[:, 2] >= -_allowed_border) & (all_anchors_level2[:, 3] < scene_info[0] + _allowed_border) & #width (all_anchors_level2[:, 4] < scene_info[1] + _allowed_border) & #height (all_anchors_level2[:, 5] < scene_info[2] + _allowed_border) #length )[0] if anchors_filter_level2 is not None: if len(anchors_filter_level2) == 0: anchors_filter_level2 = [0] inds_inside_level2 = inds_inside_level2[anchors_filter_level2] anchors_level2 = all_anchors_level2[inds_inside_level2, :] if cfg.NUM_ANCHORS_LEVEL3 != 0: inds_inside_level3 = np.where( (all_anchors_level3[:, 0] >= -_allowed_border) & (all_anchors_level3[:, 1] >= -_allowed_border) & (all_anchors_level3[:, 2] >= -_allowed_border) & (all_anchors_level3[:, 3] < scene_info[0] + _allowed_border) & #width (all_anchors_level3[:, 4] < scene_info[1] + _allowed_border) & #height (all_anchors_level3[:, 5] < scene_info[2] + _allowed_border) #length )[0] if anchors_filter_level3 is not None: if len(anchors_filter_level3) == 0: anchors_filter_level3 = [0] inds_inside_level3 = inds_inside_level3[anchors_filter_level3] anchors_level3 = all_anchors_level3[inds_inside_level3, :] # Get the scores and the bounding boxes proposals_batch = [] scores_batch = [] levelInds_batch = [] for i in range(cfg.BATCH_SIZE): if cfg.NUM_ANCHORS_LEVEL1 != 0: #------------------------- # level 1 #------------------------- # (wxhxlxnum_anchors, 6) rpn_bbox_pred_reshape_level1 = rpn_bbox_pred_level1[i].view( -1, 6)[inds_inside_level1, :] # (wxhxlxnum_anchors) scores_level1 = rpn_cls_prob_level1[i, 1, :, :, :, :].view( -1, 1)[inds_inside_level1, :] # anchors is in the scene coord # return the proposals on scene coord. proposals_level1 = bbox_transform_inv( anchors_level1, rpn_bbox_pred_reshape_level1) proposals_level1 = clip_boxes(proposals_level1, scene_info[:3]) if cfg.NUM_ANCHORS_LEVEL2 != 0: #------------------------- # level 2 #------------------------- # (wxhxlxnum_anchors, 6) rpn_bbox_pred_reshape_level2 = rpn_bbox_pred_level2[i].view( -1, 6)[inds_inside_level2, :] # (wxhxlxnum_anchors) scores_level2 = rpn_cls_prob_level2[i, 1, :, :, :, :].view( -1, 1)[inds_inside_level2, :] # anchors is in the scene coord # return the proposals on scene coord. proposals_level2 = bbox_transform_inv( anchors_level2, rpn_bbox_pred_reshape_level2) proposals_level2 = clip_boxes(proposals_level2, scene_info[:3]) #TODO: eliminate bad box if cfg.NUM_ANCHORS_LEVEL3 != 0: #------------------------- # level 3 #------------------------- # (wxhxlxnum_anchors, 6) rpn_bbox_pred_reshape_level3 = rpn_bbox_pred_level3[i].view( -1, 6)[inds_inside_level3, :] # (wxhxlxnum_anchors) scores_level3 = rpn_cls_prob_level3[i, 1, :, :, :, :].view( -1, 1)[inds_inside_level3, :] # anchors is in the scene coord # return the proposals on scene coord. proposals_level3 = bbox_transform_inv( anchors_level3, rpn_bbox_pred_reshape_level3) proposals_level3 = clip_boxes(proposals_level3, scene_info[:3]) #TODO: eliminate bad box #------------------------ # combine #------------------------ proposals_combined_list = [] scores_combined_list = [] levelInds_combined_list = [] if cfg.NUM_ANCHORS_LEVEL1 != 0: proposals_combined_list.append(proposals_level1) scores_combined_list.append(scores_level1) levelInds_combined_list.append(torch.ones_like(scores_level1)) if cfg.NUM_ANCHORS_LEVEL2 != 0: proposals_combined_list.append(proposals_level2) scores_combined_list.append(scores_level2) levelInds_combined_list.append(torch.ones_like(scores_level2) * 2) if cfg.NUM_ANCHORS_LEVEL3 != 0: proposals_combined_list.append(proposals_level3) scores_combined_list.append(scores_level3) levelInds_combined_list.append(torch.ones_like(scores_level3) * 3) proposals = torch.cat(proposals_combined_list, 0) scores = torch.cat(scores_combined_list, 0)[:, 0] levelInds = torch.cat(levelInds_combined_list, 0)[:, 0] #proposals = proposals_level2 #scores = scores_level2[:,0] #box= np.stack([np.concatenate([proposals[5222].cpu().numpy(), np.ones(1)], 0), # np.concatenate([proposals[5228].cpu().numpy(), np.ones(1)], 0), # np.concatenate([proposals[5229].cpu().numpy(), np.ones(1)], 0), # np.concatenate([proposals[5319].cpu().numpy(), np.ones(1)], 0), # np.concatenate([proposals[5356].cpu().numpy(), np.ones(1)], 0), # np.concatenate([proposals[5393].cpu().numpy(), np.ones(1)], 0)], 0) #visualize('./vis', 'pos_proposal', data=None, bbox=box) #box= np.stack([np.concatenate([proposals[5222].cpu().numpy(), np.ones(1)], 0), # np.concatenate([proposals[5223].cpu().numpy(), np.ones(1)], 0), # np.concatenate([proposals[5224].cpu().numpy(), np.ones(1)], 0), # np.concatenate([proposals[5225].cpu().numpy(), np.ones(1)], 0), # np.concatenate([proposals[5226].cpu().numpy(), np.ones(1)], 0), # np.concatenate([proposals[5227].cpu().numpy(), np.ones(1)], 0)], 0) #visualize('./vis', 'neg_proposal', data=None, bbox=box) # pick up the top region proposals scores, order = scores.sort(descending=True) #ipdb.set_trace() if pre_nms_topN > 0: order = order[:pre_nms_topN] scores = scores[:pre_nms_topN].view(-1, 1) proposals = proposals[order, :] levelInds = levelInds[order] # Non-maximal supprression keep = nms(proposals, nms_thresh) # pick up the top region proposals after NMS if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep, ] levelInds = levelInds[keep, ] # support more than 1 scene proposals_batch.append(proposals) scores_batch.append(scores) levelInds_batch.append(levelInds) return proposals_batch, scores_batch, levelInds_batch
def forward(self, input): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) # the first set of _num_anchors channels are bg probs # the second set are the fg probs scores = input[0][:, self._num_anchors:, :, :] bbox_frame = input[1] im_info = input[2] cfg_key = input[3] time_dim = input[4] batch_size = bbox_frame.size(0) pre_nms_topN = conf[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = conf[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = conf[cfg_key].RPN_NMS_THRESH min_size = conf[cfg_key].RPN_MIN_SIZE ################## # Create anchors # ################## feat_height, feat_width = scores.size(2), scores.size( 3) # (batch_size, 512/256, 7, 7) shift_x = np.arange(0, feat_width) * self._feat_stride shift_y = np.arange(0, feat_height) * self._feat_stride shift_z = np.arange(0, 1) shift_x, shift_y, shift_z = np.meshgrid(shift_x, shift_y, shift_z) shifts = torch.from_numpy( np.vstack((shift_x.ravel(), shift_y.ravel(), shift_z.ravel(), shift_x.ravel(), shift_y.ravel(), shift_z.ravel())).transpose()) shifts = shifts.contiguous().type_as(scores).float() A = self._num_anchors K = shifts.size(0) self._anchors = self._anchors.type_as(scores) anchors = self._anchors.view(1, A, 6) + shifts.view(K, 1, 6) anchors = anchors.view(1, K * A, 6) anchors = anchors.expand(batch_size, K * A, 6) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: bbox_frame = bbox_frame.permute(0, 2, 3, 1).contiguous() bbox_frame = bbox_frame.view(batch_size, -1, 4) # Same story for the scores: scores = scores.permute(0, 2, 3, 1).contiguous() scores = scores.view(batch_size, -1) """ we have 16 frames, and 28224 3d anchors for each 16 frames """ # Convert anchors into proposals via bbox transformations # proposals = bbox_frames_transform_inv(anchors, bbox_deltas, batch_size) anchors_xy = anchors[:, :, [0, 1, 3, 4]] proposals_xy = bbox_transform_inv( anchors_xy, bbox_frame, batch_size) # proposals have 441 * time_dim shape ## if any dimension exceeds the dims of the original image, clamp_ them proposals_xy = clip_boxes(proposals_xy, im_info, batch_size) proposals = torch.cat( (proposals_xy[:, :, [0, 1]], anchors[:, :, 2].unsqueeze(2), proposals_xy[:, :, [2, 3]], anchors[:, :, 5].unsqueeze(2)), dim=2) scores_keep = scores proposals_keep = proposals _, order = torch.sort(scores, 1, True) output = scores.new(batch_size, post_nms_topN, 8).zero_() # print('output.shape :',output.shape) for i in range(batch_size): # # 3. remove predicted boxes with either height or width < threshold # # (NOTE: convert min_size to input image scale stored in im_info[2]) proposals_single = proposals_keep[i] scores_single = scores_keep[i] # print('scores_single.shape :',scores_single.shape) # # 4. sort all (proposal, score) pairs by score from highest to lowest # # 5. take top pre_nms_topN (e.g. 6000) order_single = order[i] proposals_single = proposals_single[order_single, :] scores_single = scores_single[order_single].view(-1, 1) proposals_single = proposals_single[:post_nms_topN, :] scores_single = scores_single[:post_nms_topN] # print('scores_single.shape :',scores_single.shape) # padding 0 at the end. num_proposal = proposals_single.size(0) output[i, :num_proposal, 0] = i output[i, :num_proposal, 1:7] = proposals_single output[i, :num_proposal, 7] = scores_single.squeeze() return output
def test(net, data_loader, data_logger): ##################################### # Preparation ##################################### os.makedirs(cfg.TEST_SAVE_DIR, exist_ok=True) mAP_CLASSIFICATION = Evaluate_metric(cfg.NUM_CLASSES, ignore_class=[0], overlap_threshold=cfg.MAP_THRESH) mAP_MASK = Evaluate_metric(cfg.NUM_CLASSES, ignore_class=[0], overlap_threshold=cfg.MAP_THRESH) #################################### # Accumulate data #################################### pred_all = {} gt_all = {} timer = Timer() timer.tic() print('starting test on whole scan....') for iter, blobs in enumerate(tqdm(data_loader)): try: gt_box = blobs['gt_box'][0].numpy()[:, 0:6] gt_class = blobs['gt_box'][0][:, 6].numpy() except: continue # color proj killing_inds = None if cfg.USE_IMAGES: grid_shape = blobs['data'].shape[-3:] projection_helper = ProjectionHelper(cfg.INTRINSIC, cfg.PROJ_DEPTH_MIN, cfg.PROJ_DEPTH_MAX, cfg.DEPTH_SHAPE, grid_shape, cfg.VOXEL_SIZE) if grid_shape[0]*grid_shape[1]*grid_shape[2] > cfg.MAX_VOLUME or blobs['nearest_images']['depths'][0].shape[0] > cfg.MAX_IMAGE: proj_mapping = [projection_helper.compute_projection(d, c, t) for d, c, t in zip(blobs['nearest_images']['depths'][0], blobs['nearest_images']['poses'][0], blobs['nearest_images']['world2grid'][0])] else: proj_mapping = [projection_helper.compute_projection(d.cuda(), c.cuda(), t.cuda()) for d, c, t in zip(blobs['nearest_images']['depths'][0], blobs['nearest_images']['poses'][0], blobs['nearest_images']['world2grid'][0])] killing_inds = [] real_proj_mapping = [] if None in proj_mapping: #invalid sample for killing_ind, killing_item in enumerate(proj_mapping): if killing_item == None: killing_inds.append(killing_ind) else: real_proj_mapping.append(killing_item) print('{}: (invalid sample: no valid projection)'.format(blobs['id'])) else: real_proj_mapping = proj_mapping blobs['proj_ind_3d'] = [] blobs['proj_ind_2d'] = [] proj_mapping0, proj_mapping1 = zip(*real_proj_mapping) blobs['proj_ind_3d'].append(torch.stack(proj_mapping0)) blobs['proj_ind_2d'].append(torch.stack(proj_mapping1)) net.forward(blobs, 'TEST', killing_inds) # test with detection pipeline pred_class = net._predictions['cls_pred'].data.cpu().numpy() rois = net._predictions['rois'][0].cpu() box_reg_pre = net._predictions["bbox_pred"].data.cpu().numpy() box_reg = np.zeros((box_reg_pre.shape[0], 6)) pred_conf_pre = net._predictions['cls_prob'].data.cpu().numpy() pred_conf = np.zeros((pred_conf_pre.shape[0])) for pred_ind in range(pred_class.shape[0]): box_reg[pred_ind, :] = box_reg_pre[pred_ind, pred_class[pred_ind]*6:(pred_class[pred_ind]+1)*6] pred_conf[pred_ind] = pred_conf_pre[pred_ind, pred_class[pred_ind]] pred_box = bbox_transform_inv(rois, torch.from_numpy(box_reg).float()) pred_box = clip_boxes(pred_box, net._scene_info[:3]).numpy() os.makedirs('{}/{}'.format(cfg.TEST_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), exist_ok=True) np.save('{}/{}/pred_class'.format(cfg.TEST_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), pred_class) np.save('{}/{}/pred_conf'.format(cfg.TEST_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), pred_conf) np.save('{}/{}/pred_box'.format(cfg.TEST_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), pred_box) np.save('{}/{}/scene'.format(cfg.TEST_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), np.where(blobs['data'][0,0].numpy() <= 1, 1, 0)) np.save('{}/{}/gt_class'.format(cfg.TEST_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), gt_class) np.save('{}/{}/gt_box'.format(cfg.TEST_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), gt_box) # pickup sort_index = [] for conf_index in range(pred_conf.shape[0]): if pred_conf[conf_index] > cfg.CLASS_THRESH: sort_index.append(True) else: sort_index.append(False) # eliminate bad box for idx, box in enumerate(pred_box): if round(box[0]) >= round(box[3]) or round(box[1]) >= round(box[4]) or round(box[2]) >= round(box[5]): sort_index[idx] = False mAP_CLASSIFICATION.evaluate( pred_box[sort_index], pred_class[sort_index], pred_conf[sort_index], gt_box, gt_class) if cfg.USE_MASK: gt_mask = blobs['gt_mask'][0] # pickup sort_index = [] for conf_index in range(pred_conf.shape[0]): if pred_conf[conf_index] > cfg.CLASS_THRESH: sort_index.append(True) else: sort_index.append(False) # eliminate bad box for idx, box in enumerate(pred_box): if round(box[0]) >= round(box[3]) or round(box[1]) >= round(box[4]) or round(box[2]) >= round(box[5]): sort_index[idx] = False # test with mask pipeline net.mask_backbone.eval() net.mask_backbone.cuda() mask_pred_batch = [] for net_i in range(1): mask_pred = [] for pred_box_ind, pred_box_item in enumerate(pred_box): if sort_index[pred_box_ind]: mask_pred.append(net.mask_backbone(Variable(blobs['data'].cuda())[net_i:net_i+1, :, int(round(pred_box_item[0])):int(round(pred_box_item[3])), int(round(pred_box_item[1])):int(round(pred_box_item[4])), int(round(pred_box_item[2])):int(round(pred_box_item[5])) ], [] if cfg.USE_IMAGES else None)) mask_pred_batch.append(mask_pred) net._predictions['mask_pred'] = mask_pred_batch # save test result pred_mask = [] mask_ind = 0 for ind, cls in enumerate(pred_class): if sort_index[ind]: mask = net._predictions['mask_pred'][0][mask_ind][0][cls].data.cpu().numpy() mask = np.where(mask >=cfg.MASK_THRESH, 1, 0).astype(np.float32) pred_mask.append(mask) mask_ind += 1 pickle.dump(pred_mask, open('{}/{}/pred_mask'.format(cfg.TEST_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), 'wb')) pickle.dump(sort_index, open('{}/{}/pred_mask_index'.format(cfg.TEST_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), 'wb')) pickle.dump(gt_mask, open('{}/{}/gt_mask'.format(cfg.TEST_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), 'wb')) mAP_MASK.evaluate_mask( pred_box[sort_index], pred_class[sort_index], pred_conf[sort_index], pred_mask, gt_box, gt_class, gt_mask, net._scene_info) timer.toc() print('It took {:.3f}s for test on whole scenes'.format(timer.total_time())) ################################### # Summary ################################### if cfg.USE_CLASS: mAP_CLASSIFICATION.finalize() print('mAP of CLASSIFICATION: {}'.format(mAP_CLASSIFICATION.mAP())) for class_ind in range(cfg.NUM_CLASSES): if class_ind not in mAP_CLASSIFICATION.ignore_class: print('class {}: {}'.format(class_ind, mAP_CLASSIFICATION.AP(class_ind))) if cfg.USE_MASK: mAP_MASK.finalize() print('mAP of mask: {}'.format(mAP_MASK.mAP())) for class_ind in range(cfg.NUM_CLASSES): if class_ind not in mAP_MASK.ignore_class: print('class {}: {}'.format(class_ind, mAP_MASK.AP(class_ind)))
def validation(self, index, mode): ##################################### # Preparation ##################################### #------------------------------- # metric #------------------------------- mAP_RPN = Evaluate_metric(1, overlap_threshold=cfg.MAP_THRESH) mAP_CLASSIFICATION = Evaluate_metric(cfg.NUM_CLASSES, ignore_class=[0], overlap_threshold=cfg.MAP_THRESH) mAP_MASK = Evaluate_metric(cfg.NUM_CLASSES, ignore_class=[0], overlap_threshold=cfg.MAP_THRESH) if mode == 'val': data_loader = self.dataloader_val data_logger = self.logger_val elif mode == 'trainval': data_loader = self.dataloader_trainval data_logger = self.logger_trainval #################################### # Accumulate data #################################### timer = Timer() timer.tic() print('starting validation....') for iter, blobs in enumerate(tqdm(data_loader)): # if no box: skip if len(blobs['gt_box']) == 0: continue if cfg.USE_IMAGES: grid_shape = blobs['data'].shape[-3:] projection_helper = ProjectionHelper(cfg.INTRINSIC, cfg.PROJ_DEPTH_MIN, cfg.PROJ_DEPTH_MAX, cfg.DEPTH_SHAPE, grid_shape, cfg.VOXEL_SIZE) proj_mapping = [projection_helper.compute_projection(d.cuda(), c.cuda(), t.cuda()) for d, c, t in zip(blobs['nearest_images']['depths'][0], blobs['nearest_images']['poses'][0], blobs['nearest_images']['world2grid'][0])] if None in proj_mapping: #invalid sample continue blobs['proj_ind_3d'] = [] blobs['proj_ind_2d'] = [] proj_mapping0, proj_mapping1 = zip(*proj_mapping) blobs['proj_ind_3d'].append(torch.stack(proj_mapping0)) blobs['proj_ind_2d'].append(torch.stack(proj_mapping1)) self.net.forward(blobs, 'TEST', []) #-------------------------------------- # RPN: loss, metric #-------------------------------------- if cfg.USE_RPN: # (n, 6) gt_box = blobs['gt_box'][0].numpy()[:, 0:6] gt_box_label = np.zeros(gt_box.shape[0]) try: pred_box_num = (self.net._predictions['roi_scores'][0][:, 0] > cfg.ROI_THRESH).nonzero().size(0) pred_box = self.net._predictions['rois'][0].cpu().numpy()[:pred_box_num] pred_box_label = np.zeros(pred_box_num) pred_box_score = self.net._predictions['roi_scores'][0].cpu().numpy()[:pred_box_num, 0] except: pred_box = self.net._predictions['rois'][0].cpu().numpy()[:1] pred_box_label = np.zeros(1) pred_box_score = self.net._predictions['roi_scores'][0].cpu().numpy()[:1, 0] #evaluation metric mAP_RPN.evaluate(pred_box, pred_box_label, pred_box_score, gt_box, gt_box_label) #-------------------------------------- # Classification: loss, metric #-------------------------------------- if cfg.USE_CLASS: # groundtruth gt_box = blobs['gt_box'][0].numpy()[:, 0:6] gt_class = blobs['gt_box'][0][:, 6].numpy() # predictions pred_class = self.net._predictions['cls_pred'].data.cpu().numpy() # only predictions['rois'] is list and is Tensor / others are no list and Variable rois = self.net._predictions['rois'][0].cpu() box_reg_pre = self.net._predictions["bbox_pred"].data.cpu().numpy() box_reg = np.zeros((box_reg_pre.shape[0], 6)) pred_conf_pre = self.net._predictions['cls_prob'].data.cpu().numpy() pred_conf = np.zeros((pred_conf_pre.shape[0])) for pred_ind in range(pred_class.shape[0]): box_reg[pred_ind, :] = box_reg_pre[pred_ind, pred_class[pred_ind]*6:(pred_class[pred_ind]+1)*6] pred_conf[pred_ind] = pred_conf_pre[pred_ind, pred_class[pred_ind]] pred_box = bbox_transform_inv(rois, torch.from_numpy(box_reg).float()) pred_box = clip_boxes(pred_box, self.net._scene_info[:3]).numpy() # pickup sort_index = [] for conf_index in range(pred_conf.shape[0]): if pred_conf[conf_index] > cfg.CLASS_THRESH: sort_index.append(True) else: sort_index.append(False) # eliminate bad box for idx, box in enumerate(pred_box): if round(box[0]) >= round(box[3]) or round(box[1]) >= round(box[4]) or round(box[2]) >= round(box[5]): sort_index[idx] = False if len(pred_box[sort_index]) == 0: print('no pred box') if iter < cfg.VAL_NUM: os.makedirs('{}/{}'.format(cfg.VAL_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), exist_ok=True) np.save('{}/{}/pred_class'.format(cfg.VAL_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), pred_class) np.save('{}/{}/pred_conf'.format(cfg.VAL_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), pred_conf) np.save('{}/{}/pred_box'.format(cfg.VAL_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), pred_box) np.save('{}/{}/scene'.format(cfg.VAL_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), np.where(blobs['data'][0,0].numpy() <= 1, 1, 0)) np.save('{}/{}/gt_class'.format(cfg.VAL_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), gt_class) np.save('{}/{}/gt_box'.format(cfg.VAL_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), gt_box) mAP_CLASSIFICATION.evaluate( pred_box[sort_index], pred_class[sort_index], pred_conf[sort_index], gt_box, gt_class) #-------------------------------------- # MASK: loss, metric #-------------------------------------- if cfg.USE_MASK: # gt data gt_box = blobs['gt_box'][0].numpy()[:, 0:6] gt_class = blobs['gt_box'][0][:, 6].numpy() gt_mask = blobs['gt_mask'][0] pred_class = self.net._predictions['cls_pred'].data.cpu().numpy() pred_conf = np.zeros((pred_class.shape[0])) for pred_ind in range(pred_class.shape[0]): pred_conf[pred_ind] = self.net._predictions['cls_prob'].data.cpu().numpy()[pred_ind, pred_class.data[pred_ind]] # pickup sort_index = pred_conf > cfg.CLASS_THRESH # eliminate bad box for idx, box in enumerate(pred_box): if round(box[0]) >= round(box[3]) or round(box[1]) >= round(box[4]) or round(box[2]) >= round(box[5]): sort_index[idx] = False pred_mask = [] mask_ind = 0 for ind, cls in enumerate(pred_class): if sort_index[ind]: mask = self.net._predictions['mask_pred'][0][mask_ind][0][cls].data.cpu().numpy() mask = np.where(mask >=cfg.MASK_THRESH, 1, 0).astype(np.float32) pred_mask.append(mask) mask_ind += 1 if iter < cfg.VAL_NUM: pickle.dump(pred_mask, open('{}/{}/pred_mask'.format(cfg.VAL_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), 'wb')) pickle.dump(sort_index, open('{}/{}/pred_mask_index'.format(cfg.VAL_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), 'wb')) pickle.dump(gt_mask, open('{}/{}/gt_mask'.format(cfg.VAL_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), 'wb')) mAP_MASK.evaluate_mask( pred_box[sort_index], pred_class[sort_index], pred_conf[sort_index], pred_mask, gt_box, gt_class, gt_mask, self.net._scene_info) self.net.delete_intermediate_states() timer.toc() print('It took {:.3f}s for Validation on chunks'.format(timer.total_time())) ################################### # Summary ################################### if cfg.USE_RPN: mAP_RPN.finalize() print('AP of RPN: {}'.format(mAP_RPN.mAP())) data_logger.scalar_summary('AP_ROI', mAP_RPN.mAP(), index) if cfg.USE_CLASS: mAP_CLASSIFICATION.finalize() print('mAP of CLASSIFICATION: {}'.format(mAP_CLASSIFICATION.mAP())) for class_ind in range(cfg.NUM_CLASSES): if class_ind not in mAP_CLASSIFICATION.ignore_class: print('class {}: {}'.format(class_ind, mAP_CLASSIFICATION.AP(class_ind))) data_logger.scalar_summary('mAP_CLASSIFICATION', mAP_CLASSIFICATION.mAP(), index) if cfg.USE_MASK: mAP_MASK.finalize() print('mAP of mask: {}'.format(mAP_MASK.mAP())) for class_ind in range(cfg.NUM_CLASSES): if class_ind not in mAP_MASK.ignore_class: print('class {}: {}'.format(class_ind, mAP_MASK.AP(class_ind))) data_logger.scalar_summary('mAP_MASK', mAP_MASK.mAP(), index)
def forward(self, input): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) # the first set of _num_anchors channels are bg probs # the second set are the fg probs scores = input[0][:, self._num_anchors:, :, :] bbox_deltas = input[1] im_info = input[2] cfg_key = input[3] # pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N # post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N # nms_thresh = cfg[cfg_key].RPN_NMS_THRESH # min_size = cfg[cfg_key].RPN_MIN_SIZE if cfg_key == 'TRAIN': pre_nms_topN = 12000 post_nms_topN = 2000 nms_thresh = 0.7 min_size = 8 else: pre_nms_topN = 6000 post_nms_topN = 300 nms_thresh = 0.7 min_size = 16 batch_size = bbox_deltas.size(0) feat_height, feat_width = scores.size(2), scores.size(3) shift_x = np.arange(0, feat_width) * self._feat_stride shift_y = np.arange(0, feat_height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = torch.from_numpy(np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()) shifts = shifts.contiguous().type_as(scores).float() A = self._num_anchors K = shifts.size(0) self._anchors = self._anchors.type_as(scores) anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4) anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous() bbox_deltas = bbox_deltas.view(batch_size, -1, 4) # Same story for the scores: scores = scores.permute(0, 2, 3, 1).contiguous() scores = scores.view(batch_size, -1) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, torch.Tensor(im_info.tolist() * batch_size).cuda(), batch_size) # assign the score to 0 if it's non keep. # keep = self._filter_boxes(proposals, min_size * im_info[:, 2]) # trim keep index to make it euqal over batch # keep_idx = torch.cat(tuple(keep_idx), 0) # scores_keep = scores.view(-1)[keep_idx].view(batch_size, trim_size) # proposals_keep = proposals.view(-1, 4)[keep_idx, :].contiguous().view(batch_size, trim_size, 4) # _, order = torch.sort(scores_keep, 1, True) scores_keep = scores proposals_keep = proposals _, order = torch.sort(scores_keep, 1, True) output = scores.new(batch_size, post_nms_topN, 5).zero_() for i in range(batch_size): # # 3. remove predicted boxes with either height or width < threshold # # (NOTE: convert min_size to input image scale stored in im_info[2]) proposals_single = proposals_keep[i] scores_single = scores_keep[i] # # 4. sort all (proposal, score) pairs by score from highest to lowest # # 5. take top pre_nms_topN (e.g. 6000) order_single = order[i] if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel(): order_single = order_single[:pre_nms_topN] proposals_single = proposals_single[order_single, :] scores_single = scores_single[order_single].view(-1,1) # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1), nms_thresh, force_cpu=not cfg.USE_GPU_NMS) # keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1), nms_thresh, force_cpu=True) keep_idx_i = keep_idx_i.long().view(-1) if post_nms_topN > 0: keep_idx_i = keep_idx_i[:post_nms_topN] proposals_single = proposals_single[keep_idx_i, :] scores_single = scores_single[keep_idx_i, :] # padding 0 at the end. num_proposal = proposals_single.size(0) output[i,:,0] = i output[i,:num_proposal,1:] = proposals_single return output
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): ''' Return final proposal boxes Parameters ---------- rpn_cls_prob: ndarray 属于每一类的得分 rpn_bbox_pred: ndarray rpn网络输出结果 im_info: ndarray shape=[batch_size, 3] cfg_key: string Train or Test; _feat_stride: list [16, ] anchors: ndarray generate_anchors_pre(height, width, feat_stride, anchor_scales=(8, 16, 32), anchor_ratios=(0.5, 1, 2)) num_anchors: int32 num_anchors = 3 x 3 Return ------ blob:ndarray 记录需要保留的box信息,[0, x1, y1, x2, y2] scores: list 记录需要保留的box分数 ''' """A simplified version compared to fast/er RCNN For details please see the technical report """ if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') # 训练阶段 if cfg_key == "TRAIN": # Number of top scoring boxes to keep before apply NMS to RPN proposals # nms之前最多框限制 pre_nms_topN = cfg.FLAGS.rpn_train_pre_nms_top_n # 12000 post_nms_topN = cfg.FLAGS.rpn_train_post_nms_top_n # 2000 nms_thresh = cfg.FLAGS.rpn_train_nms_thresh # 0.7 # 测试阶段 else: pre_nms_topN = cfg.FLAGS.rpn_test_pre_nms_top_n # 6000 post_nms_topN = cfg.FLAGS.rpn_test_post_nms_top_n # 300 nms_thresh = cfg.FLAGS.rpn_test_nms_thresh # 0.7 im_info = im_info[0] # Get the scores and bounding boxes # 提取分类概率和bounding box位置 # 得到RPN预测框属于前景的分数(前9个是属于背景的概率,后9个是属于前景的概率) scores = rpn_cls_prob[:, :, :, num_anchors:] scores = scores.reshape((-1, 1)) # 得到回归后的boxes顶点坐标值,并将回归后的boxes裁剪到图片内 rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) proposals = bbox_transform_inv(anchors, rpn_bbox_pred) proposals = clip_boxes(proposals, im_info[:2]) # Pick the top region proposals # 提取前N个索引及分数 order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] # proposals记录topN的顶点坐标值信息,为回归并裁剪后的;scores记录topN的分数 proposals = proposals[order, :] scores = scores[order] # Non-maximal suppression # 调用非极大值抑制,记录nms后需要保留的box的索引 keep = nms(np.hstack((proposals, scores)), nms_thresh) # Pick th top region proposals after NMS # 若Keep过长,则保留前topN个,将proposals更新为需要保留的box的坐标信息 if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Only support single image as input # 在proposals前加一列,以备以后加入batch中的图片编号信息 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob, scores
def forward(self, blobs, mode='TRAIN', killing_inds=None): self._scene_info = blobs['data'].shape[2:] self._id = blobs['id'][0] self.cuda() self.batch_size = blobs['data'].shape[0] if mode == 'TRAIN': self.train() if cfg.USE_IMAGES and not cfg.USE_IMAGES_GT: # eval of enet self.image_enet_fixed.eval() self.image_enet_trainable.eval() self._mode = 'TRAIN' self._scene = Variable(blobs['data'].cuda()) self._gt_bbox = blobs['gt_box'] self._gt_mask = blobs['gt_mask'] if cfg.USE_MASK else None if cfg.USE_IMAGES: grid_shape = blobs['data'].shape[-3:] self._imageft = [] for i in range(self.batch_size): num_images = blobs['nearest_images']['images'][i].shape[0] if cfg.USE_IMAGES_GT: imageft = Variable(blobs['nearest_images']['images'][i].cuda()) #imageft = imageft.expand(imageft.shape[0], 128, imageft.shape[2], imageft.shape[3]).contiguous() else: imageft = self.image_enet_fixed(Variable(blobs['nearest_images']['images'][i].cuda())) imageft = self.image_enet_trainable(imageft) proj3d = Variable(blobs['proj_ind_3d'][i].cuda()) proj2d = Variable(blobs['proj_ind_2d'][i].cuda()) # project 2d to 3d imageft = [Projection.apply(ft, ind3d, ind2d, grid_shape) for ft, ind3d, ind2d in zip(imageft, proj3d, proj2d)] imageft = torch.stack(imageft, dim=4) # reshape to max pool over features sz = imageft.shape imageft = imageft.view(sz[0], -1, num_images) imageft = torch.nn.MaxPool1d(kernel_size=num_images)(imageft) imageft = imageft.view(sz[0], sz[1], sz[2], sz[3], 1) self._imageft.append(imageft.permute(4, 0, 3, 2, 1)) self._imageft = torch.cat(self._imageft, 0) #-------------------------- # visualization snippets #------------------------- #import ipdb #ipdb.set_trace() #data = np.where(self._scene[0,0].data.cpu().numpy() <=1.0, 1, 0) #data = self._imageft[0] #write_mask(data, 'data.ply') #data = blobs['gt_box'][0].numpy() #write_bbox(data, 'bbox.ply') if cfg.USE_BACKBONE: net_conv_level1, net_conv_level2, net_conv_level3 = self._backbone() if cfg.USE_RPN: # build the anchors for the scene if cfg.FIRST_TIME_ANCHORS: cfg.FIRST_TIME_ANCHORS = False # build the anchors for the scene if cfg.NUM_ANCHORS_LEVEL1 != 0: size_level1 = [net_conv_level1.size(2), net_conv_level1.size(3), net_conv_level1.size(4)] if cfg.NUM_ANCHORS_LEVEL2 != 0: size_level2 = [net_conv_level2.size(2), net_conv_level2.size(3), net_conv_level2.size(4)] if cfg.NUM_ANCHORS_LEVEL3 != 0: size_level3 = [net_conv_level3.size(2), net_conv_level3.size(3), net_conv_level3.size(4)] self._anchor_component(size_level1 if cfg.NUM_ANCHORS_LEVEL1 !=0 else [], size_level2 if cfg.NUM_ANCHORS_LEVEL2 !=0 else [], size_level3 if cfg.NUM_ANCHORS_LEVEL3 !=0 else []) self._region_proposal(net_conv_level1, net_conv_level2, net_conv_level3) else: # only predictions['rois']/['roi_scores']/['mask_pred'] batch is a list, since not even number/dim in each sample self._predictions['rois'] = [self._gt_bbox[i][:,:6].cuda() for i in range(self.batch_size)] self._predictions['roi_scores'] = [torch.ones(self._gt_bbox[i].size(0), 1).cuda() for i in range(self.batch_size)] if cfg.USE_CLASS: self._proposal_target_layer(self._predictions['rois'], self._predictions['roi_scores'], self._predictions['level_inds']) pool5 = self._roi_pool_layer(net_conv_level1, net_conv_level2, net_conv_level3, self._proposal_targets['rois'], self._proposal_targets['levelInds'], self._feat_stride, cfg.CLASS_POOLING_SIZE) fc7 = self._classifier(pool5) self._region_classification(fc7) else: self._predictions["cls_pred"] = Variable(self._gt_bbox[0][:,6].long()) self._predictions["cls_prob"] = Variable(torch.zeros((self._predictions['cls_pred'].shape[0], cfg.NUM_CLASSES))) self._predictions["bbox_pred"] = Variable(torch.zeros((self._predictions['cls_pred'].shape[0], cfg.NUM_CLASSES*6))) for ind_sample in range(self._predictions['cls_pred'].shape[0]): self._predictions['cls_prob'][ind_sample, self._predictions['cls_pred'].data[ind_sample]] = 1.0 if cfg.USE_MASK: self._mask_target_layer(self._predictions['rois']) mask_pred_batch = [] for i in range(self.batch_size): mask_pred = [] for roi in self._mask_targets['rois'][i]: mask_pred.append(self.mask_backbone(self._scene[i:i+1, :, int(round(roi[0].item())):int(round(roi[3].item())), int(round(roi[1].item())):int(round(roi[4].item())), int(round(roi[2].item())):int(round(roi[5].item())) ], self._imageft[i:i+1, :, int(round(roi[0].item())):int(round(roi[3].item())), int(round(roi[1].item())):int(round(roi[4].item())), int(round(roi[2].item())):int(round(roi[5].item()))] if cfg.USE_IMAGES else None)) mask_pred_batch.append(mask_pred) self._predictions['mask_pred'] = mask_pred_batch self._add_losses() elif mode == 'TEST': with torch.no_grad(): self.eval() self._mode = 'TEST' self._scene = blobs['data'].cuda() self._gt_bbox = blobs['gt_box'] self._gt_mask = blobs['gt_mask'] if cfg.USE_MASK else None if cfg.USE_IMAGES: grid_shape = blobs['data'].shape[-3:] self._imageft = [] for i in range(self.batch_size): num_images = blobs['nearest_images']['images'][i].shape[0] if cfg.USE_IMAGES_GT: with torch.no_grad(): imageft = Variable(blobs['nearest_images']['images'][i].cuda()) else: with torch.no_grad(): imageft = self.image_enet_fixed(Variable(blobs['nearest_images']['images'][i].cuda())) imageft = self.image_enet_trainable(imageft) proj3d = Variable(blobs['proj_ind_3d'][i]) proj2d = Variable(blobs['proj_ind_2d'][i]) if blobs['data'].shape[2]*blobs['data'].shape[3]*blobs['data'].shape[4] > cfg.MAX_VOLUME or len(proj3d) > cfg.MAX_IMAGE: print('on cpu') imageft = imageft.cpu() proj3d = proj3d.cpu() proj2d = proj2d.cpu() # project 2d to 3d counter = 0 init = True for ft, ind3d, ind2d in zip(imageft, proj3d, proj2d): counter += 1 if counter-1 in killing_inds: continue imageft_temp = Projection.apply(ft, ind3d, ind2d, grid_shape)[:, :,:, :].contiguous() sz = imageft_temp.shape if init: imageft = imageft_temp.view(sz[0], sz[1], sz[2], sz[3]) init = False continue imageft = torch.stack([imageft, imageft_temp], dim=4) # reshape to max pool over features imageft = imageft.view(sz[0], -1, 2) imageft = torch.nn.MaxPool1d(kernel_size=2)(imageft) imageft = imageft.view(sz[0], sz[1], sz[2], sz[3]) imageft = imageft.view(sz[0], sz[1], sz[2], sz[3], self.batch_size) self._imageft = imageft.permute(4, 0, 3, 2, 1) self._imageft = self._imageft.cuda() del proj3d del proj2d torch.cuda.empty_cache() if cfg.USE_BACKBONE: net_conv_level1, net_conv_level2, net_conv_level3 = self._backbone() if cfg.USE_RPN: # build the anchors for the scene if cfg.NUM_ANCHORS_LEVEL1 != 0: size_level1 = [net_conv_level1.size(2), net_conv_level1.size(3), net_conv_level1.size(4)] if cfg.NUM_ANCHORS_LEVEL2 != 0: size_level2 = [net_conv_level2.size(2), net_conv_level2.size(3), net_conv_level2.size(4)] if cfg.NUM_ANCHORS_LEVEL3 != 0: size_level3 = [net_conv_level3.size(2), net_conv_level3.size(3), net_conv_level3.size(4)] self._anchor_component(size_level1 if cfg.NUM_ANCHORS_LEVEL1 !=0 else [], size_level2 if cfg.NUM_ANCHORS_LEVEL2 !=0 else [], size_level3 if cfg.NUM_ANCHORS_LEVEL3 !=0 else []) self._region_proposal(net_conv_level1, net_conv_level2, net_conv_level3) else: # only predictions['rois']/['roi_scores'] batch is a list, since not even number in each sample self._predictions['rois'] = [self._gt_bbox[i][:,:6].cuda() for i in range(self.batch_size)] self._predictions['roi_scores'] = [torch.ones(self._gt_bbox[i].size(0), 1).cuda() for i in range(self.batch_size)] # especially for validation, since we don't want to resample in val for mAP if cfg.USE_CLASS: pool5 = self._roi_pool_layer(net_conv_level1, net_conv_level2, net_conv_level3, Variable(torch.cat(self._predictions['rois'], 0)), Variable(torch.cat(self._predictions['level_inds'], 0)), self._feat_stride, cfg.CLASS_POOLING_SIZE) fc7 = self._classifier(pool5) self._region_classification(fc7) else: self._predictions["cls_pred"] = Variable(self._gt_bbox[0][:,6].long()) self._predictions["cls_prob"] = Variable(torch.zeros((self._predictions['cls_pred'].shape[0], cfg.NUM_CLASSES))) self._predictions["bbox_pred"] = Variable(torch.zeros((self._predictions['cls_pred'].shape[0], cfg.NUM_CLASSES*6))) for ind_sample in range(self._predictions['cls_pred'].shape[0]): self._predictions['cls_prob'][ind_sample, self._predictions['cls_pred'].data[ind_sample]] = 1.0 if cfg.USE_MASK: mask_pred_batch = [] rois = self._predictions['rois'][0].cpu() box_reg_pre = self._predictions["bbox_pred"].data.cpu().numpy() box_reg = np.zeros((box_reg_pre.shape[0], 6)) pred_class = self._predictions['cls_pred'].data.cpu().numpy() pred_conf = np.zeros((pred_class.shape[0])) for pred_ind in range(pred_class.shape[0]): box_reg[pred_ind, :] = box_reg_pre[pred_ind, pred_class[pred_ind]*6:(pred_class[pred_ind]+1)*6] pred_conf[pred_ind] = self._predictions['cls_prob'].data.cpu().numpy()[pred_ind, pred_class.data[pred_ind]] pred_box = bbox_transform_inv(rois, torch.from_numpy(box_reg).float()) pred_box = clip_boxes(pred_box, self._scene_info[:3]).numpy() sort_index = pred_conf > cfg.CLASS_THRESH # eliminate bad box for idx, box in enumerate(pred_box): if round(box[0]) >= round(box[3]) or round(box[1]) >= round(box[4]) or round(box[2]) >= round(box[5]): sort_index[idx] = False for i in range(self.batch_size): mask_pred = [] for ind, roi in enumerate(pred_box): if sort_index[ind]: mask_pred.append(self.mask_backbone(self._scene[i:i+1, :, int(round(roi[0])):int(round(roi[3])), int(round(roi[1])):int(round(roi[4])), int(round(roi[2])):int(round(roi[5])) ], self._imageft[i:i+1, :, int(round(roi[0])):int(round(roi[3])), int(round(roi[1])):int(round(roi[4])), int(round(roi[2])):int(round(roi[5]))] if cfg.USE_IMAGES else None)) mask_pred_batch.append(mask_pred) self._predictions['mask_pred'] = mask_pred_batch
# box_deltas = box_deltas.view(1,-1,4) # pred_boxes = bbox_transform_inv_3d(tubes, box_deltas, 1) # pred_boxes = clip_boxes_3d(pred_boxes, im_info.data, 1) # pred_boxes = pred_boxes.view(1,rois.size(1),1,6) box_deltas_s = bbox_pred_s.view(-1, 4) * torch.FloatTensor(bbox_normalize_stds_s).to(device) \ + torch.FloatTensor(bbox_normalize_means_s).to(device) print('box_deltas_s :', box_deltas_s.shape) box_deltas_s = box_deltas_s.view(16,10,4) print(im_info.data) im_info_s = torch.Tensor([[112,112]] * 16).to(device) print('im_info_s.shape :',im_info_s) pred_boxes_s = bbox_transform_inv(rois, bbox_pred_s, 16) pred_boxes_s = clip_boxes(pred_boxes_s,im_info_s , 16) pred_boxes_s = pred_boxes_s.view(16,rois.size(1),1,4) print('pred_boxes_s :', pred_boxes_s.shape) print('pred_boxes_s.shape :', pred_boxes_s) # print('bbox_pred.shape :',pred_boxes.shape) # print(scores) # pred_boxes = pred_boxes.data # print(pred_boxes_s) colors = [ (255,0,0), (0,255,0), (0,0,255)] clips2 = clips2.squeeze().permute(1,2,3,0) print('rois.shape :',rois.shape ) for i in range(16): # frame # img = cv2.imread(os.path.join(path, 'image_{:0>5}.jpg'.format(frame_indices[i])))