def forward(self, rpn_scores, rpn_reg, xyz):
        """
        Note all the inputs are in batch form
        :param rpn_scores: (B, N): This is the regression(raw) output of the classification (no sigmoid applied) B:number of batches, N: number of points
        :param rpn_reg: (B, N, 9) we have 9 box features. (I don't think in the original code the third dimension is 8 it should be much more with all the classification)
        :param xyz: (B, N, 3)
        :return bbox3d: (B, M, 9)
        """
        batch_size = xyz.shape[0]
        proposals = decode_bbox_target(xyz.view(-1, 3),
                                       rpn_reg.view(-1, rpn_reg.shape[-1]),
                                       anchor_size=self.MEAN_SIZE,
                                       loc_scope=cfg.RPN.LOC_SCOPE,
                                       loc_bin_size=cfg.RPN.LOC_BIN_SIZE,
                                       num_head_bin=cfg.RPN.NUM_HEAD_BIN,
                                       get_xz_fine=cfg.RPN.LOC_XZ_FINE,
                                       get_y_by_bin=False,
                                       get_ry_fine=False)  # (N, 9)

        proposals = proposals.view(batch_size, -1, rpn_reg.shape[-1])

        # for the rpn classification output(raw) the more positive a point is the more likely it is in a box.
        # so we can sort by this value for most likely points in a box.
        scores = rpn_scores
        _, sorted_idxs = torch.sort(scores, dim=1, descending=True)

        batch_size = scores.size(0)
        # new is new_empty in the new pytorch version.
        # Returns a Tensor of given size filled with uninitialized data.
        # By default, the returned Tensor has the same torch.dtype and torch.device as this tensor.
        ret_bbox3d = scores.new(
            batch_size, cfg[self.mode].RPN_POST_NMS_TOP_N, rpn_reg.shape[-1]
        ).zero_()  # self.mode == 'TEST' , RPN_POST_NMS_TOP_N==100
        ret_scores = scores.new(batch_size,
                                cfg[self.mode].RPN_POST_NMS_TOP_N).zero_()
        for k in range(batch_size):
            # score of all the points in one scene
            scores_single = scores[k]
            # proposed bboxes of 1 scene (each point proposes a bbox)
            proposals_single = proposals[k]
            # sorted indexes of points in this scene accroding to score (first one has the highest score)
            order_single = sorted_idxs[k]
            '''if cfg.TEST.RPN_DISTANCE_BASED_PROPOSE: # this is true
                scores_single, proposals_single = self.distance_based_proposal(scores_single, proposals_single,
                                                                               order_single)
            else:'''

            scores_single, proposals_single = self.score_based_proposal(
                scores_single, proposals_single, order_single)

            proposals_tot = proposals_single.size(0)
            ret_bbox3d[k, :proposals_tot] = proposals_single
            ret_scores[k, :proposals_tot] = scores_single

        return ret_bbox3d, ret_scores
    def forward(self, rpn_scores, rpn_reg, xyz):
        """
        :param rpn_scores: (B, N)
        :param rpn_reg: (B, N, 8)
        :param xyz: (B, N, 3)
        :return bbox3d: (B, M, 7)
        """
        # pdb.set_trace()
        batch_size = xyz.shape[0]
        proposals = decode_bbox_target(xyz.view(-1, 3),
                                       rpn_reg.view(-1, rpn_reg.shape[-1]),
                                       anchor_size=self.MEAN_SIZE,
                                       loc_scope=cfg.RPN.LOC_SCOPE,
                                       loc_bin_size=cfg.RPN.LOC_BIN_SIZE,
                                       num_head_bin=cfg.RPN.NUM_HEAD_BIN,
                                       get_xz_fine=cfg.RPN.LOC_XZ_FINE,
                                       get_y_by_bin=False,
                                       get_ry_fine=False)  # (N, 7)
        proposals[:,
                  1] = proposals[:,
                                 1] + proposals[:,
                                                3] / 2  # set y as the center of bottom
        proposals = proposals.view(batch_size, -1, 7)

        scores = rpn_scores
        _, sorted_idxs = torch.sort(scores, dim=1, descending=True)

        batch_size = scores.size(0)
        ret_bbox3d = scores.new(batch_size, cfg[self.mode].RPN_POST_NMS_TOP_N,
                                7).zero_()
        ret_scores = scores.new(batch_size,
                                cfg[self.mode].RPN_POST_NMS_TOP_N).zero_()
        for k in range(batch_size):
            scores_single = scores[k]
            proposals_single = proposals[k]
            order_single = sorted_idxs[k]

            if cfg.TEST.RPN_DISTANCE_BASED_PROPOSE:
                scores_single, proposals_single = self.distance_based_proposal(
                    scores_single, proposals_single, order_single)
            else:
                scores_single, proposals_single = self.score_based_proposal(
                    scores_single, proposals_single, order_single)

            proposals_tot = proposals_single.size(0)
            ret_bbox3d[k, :proposals_tot] = proposals_single
            ret_scores[k, :proposals_tot] = scores_single

        return ret_bbox3d, ret_scores
Пример #3
0
    def forward(self, rpn_reg, xyz, gt_center=None):
        """
        :param rpn_scores: (B, N)
        :param rpn_reg: (B, N, 8)
        :param xyz: (B, N, 3)
        :return bbox3d: (B, M, 7)
        """

        batch_size = xyz.shape[0]
        proposals = decode_bbox_target(xyz.view(-1, 3),
                                       rpn_reg.view(-1, rpn_reg.shape[-1]),
                                       anchor_size=self.MEAN_SIZE,
                                       loc_scope=cfg.RPN.LOC_SCOPE,
                                       loc_bin_size=cfg.RPN.LOC_BIN_SIZE,
                                       num_head_bin=cfg.RPN.NUM_HEAD_BIN,
                                       get_xz_fine=cfg.RPN.LOC_XZ_FINE,
                                       get_y_by_bin=False,
                                       get_ry_fine=False)  # (N, 7)
        # proposals[:, 1] += proposals[:, 3] / 2  # set y as the center of bottom
        proposals = proposals.view(batch_size, -1, 3)

        return proposals[..., :3]
Пример #4
0
def eval_one_epoch_joint(model, dataloader, epoch_id, result_dir, logger):
    np.random.seed(666)
    MEAN_SIZE = torch.from_numpy(cfg.CLS_MEAN_SIZE[0]).cuda()
    mode = 'TEST' if args.test else 'EVAL'

    final_output_dir = os.path.join(result_dir, 'final_result', 'data')
    os.makedirs(final_output_dir, exist_ok=True)

    if args.save_result:
        roi_output_dir = os.path.join(result_dir, 'roi_result', 'data')
        refine_output_dir = os.path.join(result_dir, 'refine_result', 'data')
        rpn_output_dir = os.path.join(result_dir, 'rpn_result', 'data')
        os.makedirs(rpn_output_dir, exist_ok=True)
        os.makedirs(roi_output_dir, exist_ok=True)
        os.makedirs(refine_output_dir, exist_ok=True)

    logger.info('---- EPOCH %s JOINT EVALUATION ----' % epoch_id)
    logger.info('==> Output file: %s' % result_dir)
    model.eval()

    thresh_list = [0.1, 0.3, 0.5, 0.7, 0.9]
    total_recalled_bbox_list, total_gt_bbox = [0] * 5, 0
    total_roi_recalled_bbox_list = [0] * 5
    dataset = dataloader.dataset
    cnt = final_total = total_cls_acc = total_cls_acc_refined = total_rpn_iou = 0

    progress_bar = tqdm.tqdm(total=len(dataloader), leave=True, desc='eval')
    for data in dataloader:
        cnt += 1
        sample_id, pts_rect, pts_features, pts_input = \
            data['sample_id'], data['pts_rect'], data['pts_features'], data['pts_input']
        batch_size = len(sample_id)
        inputs = torch.from_numpy(pts_input).cuda(non_blocking=True).float()
        input_data = {'pts_input': inputs}

        # model inference
        ret_dict = model(input_data)

        roi_scores_raw = ret_dict['roi_scores_raw']  # (B, M)
        roi_boxes3d = ret_dict['rois']  # (B, M, 7)
        seg_result = ret_dict['seg_result'].long()  # (B, N)

        rcnn_cls = ret_dict['rcnn_cls'].view(batch_size, -1,
                                             ret_dict['rcnn_cls'].shape[1])
        rcnn_reg = ret_dict['rcnn_reg'].view(
            batch_size, -1, ret_dict['rcnn_reg'].shape[1])  # (B, M, C)

        # bounding box regression
        anchor_size = MEAN_SIZE
        if cfg.RCNN.SIZE_RES_ON_ROI:
            assert False

        pred_boxes3d = decode_bbox_target(
            roi_boxes3d.view(-1, 7),
            rcnn_reg.view(-1, rcnn_reg.shape[-1]),
            anchor_size=anchor_size,
            loc_scope=cfg.RCNN.LOC_SCOPE,
            loc_bin_size=cfg.RCNN.LOC_BIN_SIZE,
            num_head_bin=cfg.RCNN.NUM_HEAD_BIN,
            get_xz_fine=True,
            get_y_by_bin=cfg.RCNN.LOC_Y_BY_BIN,
            loc_y_scope=cfg.RCNN.LOC_Y_SCOPE,
            loc_y_bin_size=cfg.RCNN.LOC_Y_BIN_SIZE,
            get_ry_fine=True).view(batch_size, -1, 7)

        # scoring
        if rcnn_cls.shape[2] == 1:
            raw_scores = rcnn_cls  # (B, M, 1)

            norm_scores = torch.sigmoid(raw_scores)
            pred_classes = (norm_scores > cfg.RCNN.SCORE_THRESH).long()
        else:
            pred_classes = torch.argmax(rcnn_cls, dim=1).view(-1)
            cls_norm_scores = F.softmax(rcnn_cls, dim=1)
            raw_scores = rcnn_cls[:, pred_classes]
            norm_scores = cls_norm_scores[:, pred_classes]

        # evaluation
        recalled_num = gt_num = rpn_iou = 0
        if not args.test:
            if not cfg.RPN.FIXED:
                rpn_cls_label, rpn_reg_label = data['rpn_cls_label'], data[
                    'rpn_reg_label']
                rpn_cls_label = torch.from_numpy(rpn_cls_label).cuda(
                    non_blocking=True).long()

            gt_boxes3d = data['gt_boxes3d']

            for k in range(batch_size):
                # calculate recall
                cur_gt_boxes3d = gt_boxes3d[k]
                tmp_idx = cur_gt_boxes3d.__len__() - 1

                while tmp_idx >= 0 and cur_gt_boxes3d[tmp_idx].sum() == 0:
                    tmp_idx -= 1

                if tmp_idx >= 0:
                    cur_gt_boxes3d = cur_gt_boxes3d[:tmp_idx + 1]

                    cur_gt_boxes3d = torch.from_numpy(cur_gt_boxes3d).cuda(
                        non_blocking=True).float()
                    iou3d = iou3d_utils.boxes_iou3d_gpu(
                        pred_boxes3d[k], cur_gt_boxes3d)
                    gt_max_iou, _ = iou3d.max(dim=0)
                    refined_iou, _ = iou3d.max(dim=1)

                    for idx, thresh in enumerate(thresh_list):
                        total_recalled_bbox_list[idx] += (gt_max_iou >
                                                          thresh).sum().item()
                    recalled_num += (gt_max_iou > 0.7).sum().item()
                    gt_num += cur_gt_boxes3d.shape[0]
                    total_gt_bbox += cur_gt_boxes3d.shape[0]

                    # original recall
                    iou3d_in = iou3d_utils.boxes_iou3d_gpu(
                        roi_boxes3d[k], cur_gt_boxes3d)
                    gt_max_iou_in, _ = iou3d_in.max(dim=0)

                    for idx, thresh in enumerate(thresh_list):
                        total_roi_recalled_bbox_list[idx] += (
                            gt_max_iou_in > thresh).sum().item()

                if not cfg.RPN.FIXED:
                    fg_mask = rpn_cls_label > 0
                    correct = ((seg_result == rpn_cls_label)
                               & fg_mask).sum().float()
                    union = fg_mask.sum().float() + (seg_result >
                                                     0).sum().float() - correct
                    rpn_iou = correct / torch.clamp(union, min=1.0)
                    total_rpn_iou += rpn_iou.item()

        disp_dict = {
            'mode': mode,
            'recall': '%d/%d' % (total_recalled_bbox_list[3], total_gt_bbox)
        }
        progress_bar.set_postfix(disp_dict)
        progress_bar.update()

        if args.save_result:
            # save roi and refine results
            roi_boxes3d_np = roi_boxes3d.cpu().numpy()
            pred_boxes3d_np = pred_boxes3d.cpu().numpy()
            roi_scores_raw_np = roi_scores_raw.cpu().numpy()
            raw_scores_np = raw_scores.cpu().numpy()

            rpn_cls_np = ret_dict['rpn_cls'].cpu().numpy()
            rpn_xyz_np = ret_dict['backbone_xyz'].cpu().numpy()
            seg_result_np = seg_result.cpu().numpy()
            output_data = np.concatenate(
                (rpn_xyz_np, rpn_cls_np.reshape(batch_size, -1, 1),
                 seg_result_np.reshape(batch_size, -1, 1)),
                axis=2)

            for k in range(batch_size):
                cur_sample_id = sample_id[k]
                calib = dataset.get_calib(cur_sample_id)
                image_shape = dataset.get_image_shape(cur_sample_id)
                save_kitti_format(cur_sample_id, calib, roi_boxes3d_np[k],
                                  roi_output_dir, roi_scores_raw_np[k],
                                  image_shape)
                save_kitti_format(cur_sample_id, calib, pred_boxes3d_np[k],
                                  refine_output_dir, raw_scores_np[k],
                                  image_shape)

                output_file = os.path.join(rpn_output_dir,
                                           '%06d.npy' % cur_sample_id)
                np.save(output_file, output_data.astype(np.float32))

        # scores thresh
        inds = norm_scores > cfg.RCNN.SCORE_THRESH

        for k in range(batch_size):
            cur_inds = inds[k].view(-1)
            if cur_inds.sum() == 0:
                continue

            pred_boxes3d_selected = pred_boxes3d[k, cur_inds]
            raw_scores_selected = raw_scores[k, cur_inds]
            norm_scores_selected = norm_scores[k, cur_inds]

            # NMS thresh
            # rotated nms
            boxes_bev_selected = kitti_utils.boxes3d_to_bev_torch(
                pred_boxes3d_selected)
            keep_idx = iou3d_utils.nms_gpu(boxes_bev_selected,
                                           raw_scores_selected,
                                           cfg.RCNN.NMS_THRESH).view(-1)
            pred_boxes3d_selected = pred_boxes3d_selected[keep_idx]
            scores_selected = raw_scores_selected[keep_idx]
            pred_boxes3d_selected, scores_selected = pred_boxes3d_selected.cpu(
            ).numpy(), scores_selected.cpu().numpy()

            cur_sample_id = sample_id[k]
            calib = dataset.get_calib(cur_sample_id)
            final_total += pred_boxes3d_selected.shape[0]
            image_shape = dataset.get_image_shape(cur_sample_id)
            save_kitti_format(cur_sample_id, calib, pred_boxes3d_selected,
                              final_output_dir, scores_selected, image_shape)

    progress_bar.close()
    # dump empty files
    split_file = os.path.join(dataset.imageset_dir, '..', '..', 'ImageSets',
                              dataset.split + '.txt')
    split_file = os.path.abspath(split_file)
    image_idx_list = [x.strip() for x in open(split_file).readlines()]
    empty_cnt = 0
    for k in range(image_idx_list.__len__()):
        cur_file = os.path.join(final_output_dir, '%s.txt' % image_idx_list[k])
        if not os.path.exists(cur_file):
            with open(cur_file, 'w') as temp_f:
                pass
            empty_cnt += 1
            logger.info('empty_cnt=%d: dump empty file %s' %
                        (empty_cnt, cur_file))

    ret_dict = {'empty_cnt': empty_cnt}

    logger.info(
        '-------------------performance of epoch %s---------------------' %
        epoch_id)
    logger.info(str(datetime.now()))

    avg_rpn_iou = (total_rpn_iou / max(cnt, 1.0))
    avg_cls_acc = (total_cls_acc / max(cnt, 1.0))
    avg_cls_acc_refined = (total_cls_acc_refined / max(cnt, 1.0))
    avg_det_num = (final_total / max(len(dataset), 1.0))
    logger.info('final average detections: %.3f' % avg_det_num)
    logger.info('final average rpn_iou refined: %.3f' % avg_rpn_iou)
    logger.info('final average cls acc: %.3f' % avg_cls_acc)
    logger.info('final average cls acc refined: %.3f' % avg_cls_acc_refined)
    ret_dict['rpn_iou'] = avg_rpn_iou
    ret_dict['rcnn_cls_acc'] = avg_cls_acc
    ret_dict['rcnn_cls_acc_refined'] = avg_cls_acc_refined
    ret_dict['rcnn_avg_num'] = avg_det_num

    for idx, thresh in enumerate(thresh_list):
        cur_roi_recall = total_roi_recalled_bbox_list[idx] / max(
            total_gt_bbox, 1.0)
        logger.info('total roi bbox recall(thresh=%.3f): %d / %d = %f' %
                    (thresh, total_roi_recalled_bbox_list[idx], total_gt_bbox,
                     cur_roi_recall))
        ret_dict['rpn_recall(thresh=%.2f)' % thresh] = cur_roi_recall

    for idx, thresh in enumerate(thresh_list):
        cur_recall = total_recalled_bbox_list[idx] / max(total_gt_bbox, 1.0)
        logger.info(
            'total bbox recall(thresh=%.3f): %d / %d = %f' %
            (thresh, total_recalled_bbox_list[idx], total_gt_bbox, cur_recall))
        ret_dict['rcnn_recall(thresh=%.2f)' % thresh] = cur_recall

    if cfg.TEST.SPLIT != 'test':
        logger.info('Averate Precision:')
        name_to_class = {'Car': 0, 'Pedestrian': 1, 'Cyclist': 2}
        ap_result_str, ap_dict = kitti_evaluate(
            dataset.label_dir,
            final_output_dir,
            label_split_file=split_file,
            current_class=name_to_class[cfg.CLASSES])
        logger.info(ap_result_str)
        ret_dict.update(ap_dict)

    logger.info('result is saved to: %s' % result_dir)
    return ret_dict
Пример #5
0
def eval_one_epoch_rcnn(model, dataloader, epoch_id, result_dir, logger):
    np.random.seed(1024)
    MEAN_SIZE = torch.from_numpy(cfg.CLS_MEAN_SIZE[0]).cuda()
    mode = 'TEST' if args.test else 'EVAL'

    final_output_dir = os.path.join(result_dir, 'final_result', 'data')
    os.makedirs(final_output_dir, exist_ok=True)

    if args.save_result:
        roi_output_dir = os.path.join(result_dir, 'roi_result', 'data')
        refine_output_dir = os.path.join(result_dir, 'refine_result', 'data')
        os.makedirs(roi_output_dir, exist_ok=True)
        os.makedirs(refine_output_dir, exist_ok=True)

    logger.info('---- EPOCH %s RCNN EVALUATION ----' % epoch_id)
    model.eval()

    thresh_list = [0.1, 0.3, 0.5, 0.7, 0.9]
    total_recalled_bbox_list, total_gt_bbox = [0] * 5, 0
    total_roi_recalled_bbox_list = [0] * 5
    dataset = dataloader.dataset
    cnt = final_total = total_cls_acc = total_cls_acc_refined = 0

    progress_bar = tqdm.tqdm(total=len(dataloader), leave=True, desc='eval')
    for data in dataloader:
        sample_id = data['sample_id']
        cnt += 1
        assert args.batch_size == 1, 'Only support bs=1 here'
        input_data = {}
        for key, val in data.items():
            if key != 'sample_id':
                input_data[key] = torch.from_numpy(val).contiguous().cuda(
                    non_blocking=True).float()

        roi_boxes3d = input_data['roi_boxes3d']
        roi_scores = input_data['roi_scores']
        if cfg.RCNN.ROI_SAMPLE_JIT:
            for key, val in input_data.items():
                if key in ['gt_iou', 'gt_boxes3d']:
                    continue
                input_data[key] = input_data[key].unsqueeze(dim=0)
        else:
            pts_input = torch.cat(
                (input_data['pts_input'], input_data['pts_features']), dim=-1)
            input_data['pts_input'] = pts_input

        ret_dict = model(input_data)
        rcnn_cls = ret_dict['rcnn_cls']
        rcnn_reg = ret_dict['rcnn_reg']

        # bounding box regression
        anchor_size = MEAN_SIZE
        if cfg.RCNN.SIZE_RES_ON_ROI:
            roi_size = input_data['roi_size']
            anchor_size = roi_size

        pred_boxes3d = decode_bbox_target(
            roi_boxes3d,
            rcnn_reg,
            anchor_size=anchor_size,
            loc_scope=cfg.RCNN.LOC_SCOPE,
            loc_bin_size=cfg.RCNN.LOC_BIN_SIZE,
            num_head_bin=cfg.RCNN.NUM_HEAD_BIN,
            get_xz_fine=True,
            get_y_by_bin=cfg.RCNN.LOC_Y_BY_BIN,
            loc_y_scope=cfg.RCNN.LOC_Y_SCOPE,
            loc_y_bin_size=cfg.RCNN.LOC_Y_BIN_SIZE,
            get_ry_fine=True)

        # scoring
        if rcnn_cls.shape[1] == 1:
            raw_scores = rcnn_cls.view(-1)
            norm_scores = torch.sigmoid(raw_scores)
            pred_classes = (norm_scores > cfg.RCNN.SCORE_THRESH).long()
        else:
            pred_classes = torch.argmax(rcnn_cls, dim=1).view(-1)
            cls_norm_scores = F.softmax(rcnn_cls, dim=1)
            raw_scores = rcnn_cls[:, pred_classes]
            norm_scores = cls_norm_scores[:, pred_classes]

        # evaluation
        disp_dict = {'mode': mode}
        if not args.test:
            gt_boxes3d = input_data['gt_boxes3d']
            gt_iou = input_data['gt_iou']

            # calculate recall
            gt_num = gt_boxes3d.shape[0]
            if gt_num > 0:
                iou3d = iou3d_utils.boxes_iou3d_gpu(pred_boxes3d, gt_boxes3d)
                gt_max_iou, _ = iou3d.max(dim=0)
                refined_iou, _ = iou3d.max(dim=1)

                for idx, thresh in enumerate(thresh_list):
                    total_recalled_bbox_list[idx] += (gt_max_iou >
                                                      thresh).sum().item()
                recalled_num = (gt_max_iou > 0.7).sum().item()
                total_gt_bbox += gt_num

                iou3d_in = iou3d_utils.boxes_iou3d_gpu(roi_boxes3d, gt_boxes3d)
                gt_max_iou_in, _ = iou3d_in.max(dim=0)

                for idx, thresh in enumerate(thresh_list):
                    total_roi_recalled_bbox_list[idx] += (gt_max_iou_in >
                                                          thresh).sum().item()

            # classification accuracy
            cls_label = (gt_iou > cfg.RCNN.CLS_FG_THRESH).float()
            cls_valid_mask = ((gt_iou >= cfg.RCNN.CLS_FG_THRESH) |
                              (gt_iou <= cfg.RCNN.CLS_BG_THRESH)).float()
            cls_acc = ((pred_classes == cls_label.long()).float() *
                       cls_valid_mask).sum() / max(cls_valid_mask.sum(), 1.0)

            iou_thresh = 0.7 if cfg.CLASSES == 'Car' else 0.5
            cls_label_refined = (gt_iou >= iou_thresh).float()
            cls_acc_refined = (
                pred_classes == cls_label_refined.long()).float().sum() / max(
                    cls_label_refined.shape[0], 1.0)

            total_cls_acc += cls_acc.item()
            total_cls_acc_refined += cls_acc_refined.item()

            disp_dict['recall'] = '%d/%d' % (total_recalled_bbox_list[3],
                                             total_gt_bbox)
            disp_dict['cls_acc_refined'] = '%.2f' % cls_acc_refined.item()

        progress_bar.set_postfix(disp_dict)
        progress_bar.update()

        image_shape = dataset.get_image_shape(sample_id)
        if args.save_result:
            # save roi and refine results
            roi_boxes3d_np = roi_boxes3d.cpu().numpy()
            pred_boxes3d_np = pred_boxes3d.cpu().numpy()
            calib = dataset.get_calib(sample_id)

            save_kitti_format(sample_id, calib, roi_boxes3d_np, roi_output_dir,
                              roi_scores, image_shape)
            save_kitti_format(sample_id, calib, pred_boxes3d_np,
                              refine_output_dir,
                              raw_scores.cpu().numpy(), image_shape)

        # NMS and scoring
        # scores thresh
        inds = norm_scores > cfg.RCNN.SCORE_THRESH
        if inds.sum() == 0:
            continue

        pred_boxes3d_selected = pred_boxes3d[inds]
        raw_scores_selected = raw_scores[inds]

        # NMS thresh
        boxes_bev_selected = kitti_utils.boxes3d_to_bev_torch(
            pred_boxes3d_selected)
        keep_idx = iou3d_utils.nms_gpu(boxes_bev_selected, raw_scores_selected,
                                       cfg.RCNN.NMS_THRESH)
        pred_boxes3d_selected = pred_boxes3d_selected[keep_idx]

        scores_selected = raw_scores_selected[keep_idx]
        pred_boxes3d_selected, scores_selected = pred_boxes3d_selected.cpu(
        ).numpy(), scores_selected.cpu().numpy()

        calib = dataset.get_calib(sample_id)
        final_total += pred_boxes3d_selected.shape[0]
        save_kitti_format(sample_id, calib, pred_boxes3d_selected,
                          final_output_dir, scores_selected, image_shape)

    progress_bar.close()

    # dump empty files
    split_file = os.path.join(dataset.imageset_dir, '..', '..', 'ImageSets',
                              dataset.split + '.txt')
    split_file = os.path.abspath(split_file)
    image_idx_list = [x.strip() for x in open(split_file).readlines()]
    empty_cnt = 0
    for k in range(image_idx_list.__len__()):
        cur_file = os.path.join(final_output_dir, '%s.txt' % image_idx_list[k])
        if not os.path.exists(cur_file):
            with open(cur_file, 'w') as temp_f:
                pass
            empty_cnt += 1
            logger.info('empty_cnt=%d: dump empty file %s' %
                        (empty_cnt, cur_file))

    ret_dict = {'empty_cnt': empty_cnt}

    logger.info(
        '-------------------performance of epoch %s---------------------' %
        epoch_id)
    logger.info(str(datetime.now()))

    avg_cls_acc = (total_cls_acc / max(cnt, 1.0))
    avg_cls_acc_refined = (total_cls_acc_refined / max(cnt, 1.0))
    avg_det_num = (final_total / max(cnt, 1.0))
    logger.info('final average detections: %.3f' % avg_det_num)
    logger.info('final average cls acc: %.3f' % avg_cls_acc)
    logger.info('final average cls acc refined: %.3f' % avg_cls_acc_refined)
    ret_dict['rcnn_cls_acc'] = avg_cls_acc
    ret_dict['rcnn_cls_acc_refined'] = avg_cls_acc_refined
    ret_dict['rcnn_avg_num'] = avg_det_num

    for idx, thresh in enumerate(thresh_list):
        cur_roi_recall = total_roi_recalled_bbox_list[idx] / max(
            total_gt_bbox, 1.0)
        logger.info('total roi bbox recall(thresh=%.3f): %d / %d = %f' %
                    (thresh, total_roi_recalled_bbox_list[idx], total_gt_bbox,
                     cur_roi_recall))
        ret_dict['rpn_recall(thresh=%.2f)' % thresh] = cur_roi_recall

    for idx, thresh in enumerate(thresh_list):
        cur_recall = total_recalled_bbox_list[idx] / max(total_gt_bbox, 1.0)
        logger.info(
            'total bbox recall(thresh=%.3f): %d / %d = %f' %
            (thresh, total_recalled_bbox_list[idx], total_gt_bbox, cur_recall))
        ret_dict['rcnn_recall(thresh=%.2f)' % thresh] = cur_recall

    if cfg.TEST.SPLIT != 'test':
        logger.info('Averate Precision:')
        name_to_class = {'Car': 0, 'Pedestrian': 1, 'Cyclist': 2}
        ap_result_str, ap_dict = kitti_evaluate(
            dataset.label_dir,
            final_output_dir,
            label_split_file=split_file,
            current_class=name_to_class[cfg.CLASSES])
        logger.info(ap_result_str)
        ret_dict.update(ap_dict)

    logger.info('result is saved to: %s' % result_dir)

    return ret_dict
Пример #6
0
    def pc_cb(self, data):
        pts_input = self.extract_networks_input_from_pc2rosmsg(data)
        if self.pc_pub is not None:
            self.pc_pub.publish(numpy2pc2(pts_input, data.header.frame_id))

        np.random.seed(666)
        with torch.no_grad():
            # 准备输入数据
            MEAN_SIZE = torch.from_numpy(cfg.CLS_MEAN_SIZE[0]).cuda()
            inputs = torch.from_numpy(pts_input).cuda(non_blocking=True).float()
            inputs = torch.unsqueeze(inputs, 0)

            # 模型推理
            input_data = {'pts_input': inputs}
            ret_dict = self.model(input_data)

            # 分析结果
            batch_size = 1
            roi_scores_raw = ret_dict['roi_scores_raw']  # (B, M) 提案置信度预测
            roi_boxes3d = ret_dict['rois']  # (B, M, 7) 提案框
            seg_result = ret_dict['seg_result'].long()  # (B, N) 前景点分割

            rcnn_cls = ret_dict['rcnn_cls'].view(batch_size, -1, ret_dict['rcnn_cls'].shape[1])  # (B, M, n) bin分类结果
            rcnn_reg = ret_dict['rcnn_reg'].view(batch_size, -1, ret_dict['rcnn_reg'].shape[1])  # (B, M, C) res回归结果

            # 解算3D BBOx
            anchor_size = MEAN_SIZE
            pred_boxes3d = decode_bbox_target(roi_boxes3d.view(-1, 7), rcnn_reg.view(-1, rcnn_reg.shape[-1]),
                                              anchor_size=anchor_size,
                                              loc_scope=cfg.RCNN.LOC_SCOPE,
                                              loc_bin_size=cfg.RCNN.LOC_BIN_SIZE,
                                              num_head_bin=cfg.RCNN.NUM_HEAD_BIN,
                                              get_xz_fine=True, get_y_by_bin=cfg.RCNN.LOC_Y_BY_BIN,
                                              loc_y_scope=cfg.RCNN.LOC_Y_SCOPE, loc_y_bin_size=cfg.RCNN.LOC_Y_BIN_SIZE,
                                              get_ry_fine=True).view(batch_size, -1, 7)

            # cfg.SCORE_THRESH 置信度阈值
            if rcnn_cls.shape[2] == 1:
                batch_raw_scores = rcnn_cls  # (B, M, 1)
                batch_norm_scores = torch.sigmoid(batch_raw_scores)  # (B,M,1)
                batch_pred_classes = (batch_norm_scores > cfg.RCNN.SCORE_THRESH).long()  # (B,M,1)
            else:
                batch_pred_classes = torch.argmax(rcnn_cls, dim=1).view(-1)
                batch_raw_scores = rcnn_cls[:, batch_pred_classes]
                batch_norm_scores = F.softmax(rcnn_cls, dim=1)[:, batch_pred_classes]

            # scores threshold
            inds = batch_norm_scores > cfg.RCNN.SCORE_THRESH
            for batch in range(batch_size):
                inds_in_each_batch = inds[batch].view(-1)
                if inds_in_each_batch.sum() == 0:  # batch 内没有超过阈值的3dbbox
                    continue

                pred_boxes3d_in_each_batch = pred_boxes3d[batch, inds_in_each_batch]
                raw_scores_in_each_batch = batch_raw_scores[batch, inds_in_each_batch]
                norm_scores_in_each_batch = batch_norm_scores[batch, inds_in_each_batch]

                # 非极大值抑制
                boxes_bev_in_each_batch = kitti_utils.boxes3d_to_bev_torch(pred_boxes3d_in_each_batch)
                keep_idx = iou3d_utils.nms_gpu(boxes_bev_in_each_batch,
                                               raw_scores_in_each_batch,
                                               cfg.RCNN.NMS_THRESH).view(-1)
                pred_boxes3d_in_each_batch = pred_boxes3d_in_each_batch[keep_idx]
                raw_scores_in_each_batch = raw_scores_in_each_batch[keep_idx]

                output = {'boxes3d': pred_boxes3d_in_each_batch.cpu().numpy(),
                          'scores': raw_scores_in_each_batch.cpu().numpy()}
                self.visualize(output, data.header.frame_id)
Пример #7
0
def eval_one_epoch_joint_single_file(model, file_path, result_dir, logger):
    np.random.seed(666)

    input_list = get_lidar(file_path)

    # Loads the mean size of the CLASS from CFG YAML file
    MEAN_SIZE = torch.from_numpy(cfg.CLS_MEAN_SIZE[0]).cuda()

    # Assign the MODE as TEST unless EVAL specified
    mode = 'TEST'

    filename = file_path.split('/')[-1].split('.')[0]

    # Make output directory result_dir/final_result/data
    final_output_dir = os.path.join(result_dir, 'final_result', 'data')
    os.makedirs(final_output_dir, exist_ok=True)

    # Save data if args.save_result is True or not(default now True)
    if args.save_result:
        roi_output_dir = os.path.join(result_dir, 'roi_result', 'data')
        refine_output_dir = os.path.join(result_dir, 'refine_result', 'data')
        rpn_output_dir = os.path.join(result_dir, 'rpn_result', 'data')
        os.makedirs(rpn_output_dir, exist_ok=True)
        os.makedirs(roi_output_dir, exist_ok=True)
        os.makedirs(refine_output_dir, exist_ok=True)

    #logger.info('---- EPOCH %s JOINT EVALUATION ----' % epoch_id)
    logger.info('==> Output file: %s' % result_dir)
    model.eval()

    #input_data = input_data.cuda()

    cnt = final_total = total_cls_acc = total_cls_acc_refined = total_rpn_iou = 0
    # Iterate through data in dataloader
    #cnt += 1
    #sample_id, pts_rect, pts_features, pts_input = data['sample_id'], data['pts_rect'], data['pts_features'], data['pts_input']
    #batch_size = len(sample_id)
    #inputs = torch.from_numpy(pts_input).cuda(non_blocking=True).float()

    pts_lidar = input_list[0]
    rem_pts = input_list[1]
    input_data = {
        'pts_input': torch.from_numpy(pts_lidar).view(1, -1, 3).float().cuda()
    }

    # model inference
    ret_dict = model(input_data)

    roi_scores_raw = ret_dict['roi_scores_raw']  # (B, M)
    roi_boxes3d = ret_dict['rois']  # (B, M, 7)
    seg_result = ret_dict['seg_result'].long()  # (B, N)

    rcnn_cls = ret_dict['rcnn_cls'].view(1, -1, ret_dict['rcnn_cls'].shape[1])
    rcnn_reg = ret_dict['rcnn_reg'].view(
        1, -1, ret_dict['rcnn_reg'].shape[1])  # (B, M, C)

    # bounding box regression
    anchor_size = MEAN_SIZE
    if cfg.RCNN.SIZE_RES_ON_ROI:
        assert False

    pred_boxes3d = decode_bbox_target(roi_boxes3d.view(-1, 7),
                                      rcnn_reg.view(-1, rcnn_reg.shape[-1]),
                                      anchor_size=anchor_size,
                                      loc_scope=cfg.RCNN.LOC_SCOPE,
                                      loc_bin_size=cfg.RCNN.LOC_BIN_SIZE,
                                      num_head_bin=cfg.RCNN.NUM_HEAD_BIN,
                                      get_xz_fine=True,
                                      get_y_by_bin=cfg.RCNN.LOC_Y_BY_BIN,
                                      loc_y_scope=cfg.RCNN.LOC_Y_SCOPE,
                                      loc_y_bin_size=cfg.RCNN.LOC_Y_BIN_SIZE,
                                      get_ry_fine=True).view(1, -1, 7)

    # scoring
    if rcnn_cls.shape[2] == 1:
        raw_scores = rcnn_cls  # (B, M, 1)
        norm_scores = torch.sigmoid(raw_scores)
        pred_classes = (norm_scores > cfg.RCNN.SCORE_THRESH).long()
    else:
        pred_classes = torch.argmax(rcnn_cls, dim=1).view(-1)
        cls_norm_scores = F.softmax(rcnn_cls, dim=1)
        raw_scores = rcnn_cls[:, pred_classes]
        norm_scores = cls_norm_scores[:, pred_classes]

    if args.save_result:
        # save roi and refine results
        roi_boxes3d_np = roi_boxes3d.cpu().numpy()
        pred_boxes3d_np = pred_boxes3d.cpu().numpy()
        roi_scores_raw_np = roi_scores_raw.cpu().numpy()
        raw_scores_np = raw_scores.cpu().numpy()

        rpn_cls_np = ret_dict['rpn_cls'].cpu().numpy()
        rpn_xyz_np = ret_dict['backbone_xyz'].cpu().numpy()
        rpn_xyz_np = np.concatenate([
            rpn_xyz_np[0][:, 2].reshape(
                -1, 1), -rpn_xyz_np[0][:, 0].reshape(-1, 1),
            -rpn_xyz_np[0][:, 1].reshape(-1, 1)
        ],
                                    axis=1).reshape(1, -1, 3)
        seg_result_np = seg_result.cpu().numpy()

        rem_pts = np.concatenate([
            rem_pts[:, 2].reshape(-1, 1), -rem_pts[:, 0].reshape(-1, 1),
            -rem_pts[:, 1].reshape(-1, 1)
        ],
                                 axis=1)

        rest_lidar_pts = np.hstack(
            (rem_pts, np.zeros(rem_pts.shape[0]).reshape(-1, 1),
             np.zeros(rem_pts.shape[0]).reshape(-1, 1))).reshape(1, -1, 5)

        output_data = np.concatenate((rpn_xyz_np, rpn_cls_np.reshape(
            1, -1, 1), seg_result_np.reshape(1, -1, 1)),
                                     axis=2)

        output_data = np.hstack((rest_lidar_pts, output_data))

        cur_sample_id = 0
        output_file = os.path.join(rpn_output_dir, filename + '.npy')
        np.save(output_file, output_data.astype(np.float32))

    # scores thresh
    inds = norm_scores > cfg.RCNN.SCORE_THRESH

    cur_inds = inds[0].view(-1)
    pred_boxes3d_selected = pred_boxes3d[0, cur_inds]
    raw_scores_selected = raw_scores[0, cur_inds]
    norm_scores_selected = norm_scores[0, cur_inds]

    # NMS thresh
    # rotated nms
    boxes_bev_selected = kitti_utils.boxes3d_to_bev_torch(
        pred_boxes3d_selected)
    keep_idx = iou3d_utils.nms_gpu(boxes_bev_selected, raw_scores_selected,
                                   cfg.RCNN.NMS_THRESH).view(-1)
    pred_boxes3d_selected = pred_boxes3d_selected[keep_idx]
    scores_selected = raw_scores_selected[keep_idx]
    pred_boxes3d_selected, scores_selected = pred_boxes3d_selected.cpu().numpy(
    ), scores_selected.cpu().numpy()

    cur_sample_id = 0
    final_total += pred_boxes3d_selected.shape[0]
    save_kitti_format(cur_sample_id, pred_boxes3d_selected, final_output_dir,
                      scores_selected, filename)

    ret_dict = {}
    logger.info('final average detections: %.3f' % final_total)

    return ret_dict
Пример #8
0
    def forward(self, input_data):
        """
        :param input_data: input dict
        :return:
        """
        input_data2 = input_data.copy()
        pred_boxes3d_1st = input_data2['pred_boxes3d_1st']
        ret_dict = {}
        batch_size = input_data['roi_boxes3d'].size(0)
        if self.training:

            input_data2['roi_boxes3d'] = pred_boxes3d_1st
            with torch.no_grad():
                target_dict_2nd = self.proposal_target_layer(input_data2,
                                                             stage=2)
            pts_input_2 = torch.cat((target_dict_2nd['sampled_pts'],
                                     target_dict_2nd['pts_feature']),
                                    dim=2)
            target_dict_2nd['pts_input'] = pts_input_2
            roi = target_dict_2nd['roi_boxes3d']
            #roi = pred_boxes3d_1st

        else:
            input_data2['roi_boxes3d'] = pred_boxes3d_1st
            #input_data2['roi_boxes3d']=torch.cat((pred_boxes3d_1st, input_data['roi_boxes3d']), 1)
            roi = pred_boxes3d_1st
            #roi=torch.cat((pred_boxes3d_1st, input_data['roi_boxes3d']), 1)
            pts_input_2 = self.roipooling(input_data2)

        xyz_2, features_2 = self._break_up_pc(pts_input_2)
        #print(xyz_2.size(),xyz.size(),features_2.size(),features.size())
        if cfg.RCNN.USE_RPN_FEATURES:
            xyz_input_2 = pts_input_2[...,
                                      0:self.rcnn_input_channel].transpose(
                                          1, 2).unsqueeze(dim=3)
            xyz_feature_2 = self.xyz_up_layer(xyz_input_2)

            rpn_feature_2 = pts_input_2[...,
                                        self.rcnn_input_channel:].transpose(
                                            1, 2).unsqueeze(dim=3)

            merged_feature_2 = torch.cat((xyz_feature_2, rpn_feature_2), dim=1)
            merged_feature_2 = self.merge_down_layer(merged_feature_2)
            l_xyz_2, l_features_2 = [xyz_2], [merged_feature_2.squeeze(dim=3)]
        else:
            l_xyz__2, l_features_2 = [xyz_2], [features_2]
        #print(l_xyz_2[0].size(), l_xyz[0].size(), l_features_2[0].size(), l_features[0].size())
        for i in range(len(self.SA_modules)):
            li_xyz_2, li_features_2 = self.SA_modules[i](l_xyz_2[i],
                                                         l_features_2[i])
            l_xyz_2.append(li_xyz_2)
            l_features_2.append(li_features_2)

        batch_size_2 = pts_input_2.shape[0]
        anchor_size = torch.from_numpy(cfg.CLS_MEAN_SIZE[0]).cuda()
        rcnn_cls_2nd = self.cls_layer_2nd(l_features_2[-1]).transpose(
            1, 2).contiguous().squeeze(dim=1)  # (B*64, 1 or 2)
        rcnn_reg_2nd = self.reg_layer_2nd(l_features_2[-1]).transpose(
            1, 2).contiguous().squeeze(dim=1)  # (B*64, C)
        pre_iou2 = self.iou_layer(l_features_2[-1]).transpose(
            1, 2).contiguous().squeeze(dim=1)
        #loss

        if self.training:
            cls_label = target_dict_2nd['cls_label'].float()
            rcnn_cls_flat = rcnn_cls_2nd.view(-1)
            batch_loss_cls = F.binary_cross_entropy(
                torch.sigmoid(rcnn_cls_flat),
                cls_label.view(-1),
                reduction='none')
            cls_label_flat = cls_label.view(-1)
            cls_valid_mask = (cls_label_flat >= 0).float()
            rcnn_loss_cls = (batch_loss_cls *
                             cls_valid_mask).sum() / torch.clamp(
                                 cls_valid_mask.sum(), min=1.0)
            gt_boxes3d_ct = target_dict_2nd['gt_of_rois']
            reg_valid_mask = target_dict_2nd['reg_valid_mask']
            fg_mask = (reg_valid_mask > 0)
            #print(rcnn_reg_2nd.view(batch_size_2, -1)[fg_mask].size(0))
            if rcnn_reg_2nd.view(batch_size_2, -1)[fg_mask].size(0) == 0:
                fg_mask = (reg_valid_mask <= 0)
            loss_loc, loss_angle, loss_size, reg_loss_dict = \
                loss_utils.get_reg_loss(rcnn_reg_2nd.view(batch_size_2, -1)[fg_mask],
                                        gt_boxes3d_ct.view(batch_size_2, 7)[fg_mask],
                                        loc_scope=cfg.RCNN.LOC_SCOPE,
                                        loc_bin_size=cfg.RCNN.LOC_BIN_SIZE,
                                        num_head_bin=cfg.RCNN.NUM_HEAD_BIN,
                                        anchor_size=anchor_size,
                                        get_xz_fine=True, get_y_by_bin=cfg.RCNN.LOC_Y_BY_BIN,
                                        loc_y_scope=cfg.RCNN.LOC_Y_SCOPE, loc_y_bin_size=cfg.RCNN.LOC_Y_BIN_SIZE,
                                        get_ry_fine=True)
            rcnn_loss_reg = loss_loc + loss_angle + 3 * loss_size

            two = {
                'rcnn_loss_cls_2nd': rcnn_loss_cls,
                'rcnn_loss_reg_2nd': rcnn_loss_reg
            }

        else:
            two = {}

        sec = {'rcnn_cls_2nd': rcnn_cls_2nd, 'rcnn_reg_2nd': rcnn_reg_2nd}
        #print(input_data['roi_boxes3d'].shape,input_data2['roi_boxes3d'].shape)

        pred_boxes3d_2nd = decode_bbox_target(
            roi.view(-1, 7),
            rcnn_reg_2nd.view(-1, rcnn_reg_2nd.shape[-1]),
            anchor_size=anchor_size,
            loc_scope=cfg.RCNN.LOC_SCOPE,
            loc_bin_size=cfg.RCNN.LOC_BIN_SIZE,
            num_head_bin=cfg.RCNN.NUM_HEAD_BIN,
            get_xz_fine=True,
            get_y_by_bin=cfg.RCNN.LOC_Y_BY_BIN,
            loc_y_scope=cfg.RCNN.LOC_Y_SCOPE,
            loc_y_bin_size=cfg.RCNN.LOC_Y_BIN_SIZE,
            get_ry_fine=True).view(batch_size, -1, 7)
        input_data3 = input_data.copy()
        if self.training:

            input_data3['roi_boxes3d'] = pred_boxes3d_2nd
            # print(input_data3['roi_boxes3d'].shape)
            with torch.no_grad():
                target_dict_3rd = self.proposal_target_layer(input_data3,
                                                             stage=3)

            pts_input_3 = torch.cat((target_dict_3rd['sampled_pts'],
                                     target_dict_3rd['pts_feature']),
                                    dim=2)
            target_dict_3rd['pts_input'] = pts_input_3
            roi = target_dict_3rd['roi_boxes3d']
            #roi = pred_boxes3d_2nd
        else:
            input_data3['roi_boxes3d'] = pred_boxes3d_2nd
            # input_data3['roi_boxes3d']=torch.cat((pred_boxes3d_2nd, input_data2['roi_boxes3d']), 1)
            roi = pred_boxes3d_2nd
            # roi=torch.cat((pred_boxes3d_2nd, input_data2['roi_boxes3d']), 1)
            pts_input_3 = self.roipooling(input_data3)
        xyz_3, features_3 = self._break_up_pc(pts_input_3)

        if cfg.RCNN.USE_RPN_FEATURES:
            xyz_input_3 = pts_input_3[...,
                                      0:self.rcnn_input_channel].transpose(
                                          1, 2).unsqueeze(dim=3)
            xyz_feature_3 = self.xyz_up_layer_3(xyz_input_3)

            rpn_feature_3 = pts_input_3[...,
                                        self.rcnn_input_channel:].transpose(
                                            1, 2).unsqueeze(dim=3)

            merged_feature_3 = torch.cat((xyz_feature_3, rpn_feature_3), dim=1)
            merged_feature_3 = self.merge_down_layer_3(merged_feature_3)
            l_xyz_3, l_features_3 = [xyz_3], [merged_feature_3.squeeze(dim=3)]
        else:
            l_xyz, l_features = [xyz_3], [features_3]

        for i in range(len(self.SA_modules_3)):
            li_xyz_3, li_features_3 = self.SA_modules_3[i](l_xyz_3[i],
                                                           l_features_3[i])
            l_xyz_3.append(li_xyz_3)
            l_features_3.append(li_features_3)
        del xyz_2, features_2, l_features_2
        rcnn_cls_3rd = self.cls_layer_3rd(l_features_3[-1]).transpose(
            1, 2).contiguous().squeeze(dim=1)  # (B*64, 1 or 2)
        rcnn_reg_3rd = self.reg_layer_3rd(l_features_3[-1]).transpose(
            1, 2).contiguous().squeeze(dim=1)  # (B*64, C)
        pre_iou3 = self.iou_layer(l_features_3[-1]).transpose(
            1, 2).contiguous().squeeze(dim=1)
        # loss
        if self.training:
            cls_label = target_dict_3rd['cls_label'].float()
            rcnn_cls_flat = rcnn_cls_3rd.view(-1)
            batch_loss_cls = F.binary_cross_entropy(
                torch.sigmoid(rcnn_cls_flat), cls_label, reduction='none')
            cls_label_flat = cls_label.view(-1)
            cls_valid_mask = (cls_label_flat >= 0).float()
            rcnn_loss_cls = (batch_loss_cls *
                             cls_valid_mask).sum() / torch.clamp(
                                 cls_valid_mask.sum(), min=1.0)
            gt_boxes3d_ct = target_dict_3rd['gt_of_rois']
            reg_valid_mask = target_dict_3rd['reg_valid_mask']
            fg_mask = (reg_valid_mask > 0)

            if rcnn_reg_3rd.view(batch_size_2, -1)[fg_mask].size(0) == 0:
                fg_mask = (reg_valid_mask <= 0)
            loss_loc, loss_angle, loss_size, reg_loss_dict = \
                loss_utils.get_reg_loss(rcnn_reg_3rd.view(batch_size_2, -1)[fg_mask],
                                        gt_boxes3d_ct.view(batch_size_2, 7)[fg_mask],
                                        loc_scope=cfg.RCNN.LOC_SCOPE,
                                        loc_bin_size=cfg.RCNN.LOC_BIN_SIZE,
                                        num_head_bin=cfg.RCNN.NUM_HEAD_BIN,
                                        anchor_size=anchor_size,
                                        get_xz_fine=True, get_y_by_bin=cfg.RCNN.LOC_Y_BY_BIN,
                                        loc_y_scope=cfg.RCNN.LOC_Y_SCOPE, loc_y_bin_size=cfg.RCNN.LOC_Y_BIN_SIZE,
                                        get_ry_fine=True)
            rcnn_loss_reg = loss_loc + loss_angle + 3 * loss_size

            # three = {'rcnn_loss_cls_3rd': rcnn_loss_cls, 'rcnn_loss_reg_3rd': rcnn_loss_reg}

        else:
            three = {}
        pred_boxes3d_3rd = decode_bbox_target(
            roi.view(-1, 7),
            rcnn_reg_3rd.view(-1, rcnn_reg_3rd.shape[-1]),
            anchor_size=anchor_size,
            loc_scope=cfg.RCNN.LOC_SCOPE,
            loc_bin_size=cfg.RCNN.LOC_BIN_SIZE,
            num_head_bin=cfg.RCNN.NUM_HEAD_BIN,
            get_xz_fine=True,
            get_y_by_bin=cfg.RCNN.LOC_Y_BY_BIN,
            loc_y_scope=cfg.RCNN.LOC_Y_SCOPE,
            loc_y_bin_size=cfg.RCNN.LOC_Y_BIN_SIZE,
            get_ry_fine=True).view(batch_size, -1, 7)
        if self.training:
            gt = target_dict_3rd['real_gt']
            iou_label = []
            for i in range(batch_size_2):
                iou_label.append(
                    iou3d_utils.boxes_iou3d_gpu(
                        pred_boxes3d_3rd.view(-1, 7)[i].view(1, 7),
                        gt[i].view(1, 7)))
            iou_label = torch.cat(iou_label)
            iou_label = (iou_label - 0.5) * 2
            iou_loss = F.mse_loss((pre_iou3[fg_mask]), iou_label[fg_mask])
            #print(iou_loss.item())
            three = {
                'rcnn_loss_cls_3rd': rcnn_loss_cls,
                'rcnn_loss_reg_3rd': rcnn_loss_reg,
                'rcnn_iou_loss': iou_loss
            }
            del cls_label, rcnn_cls_flat, batch_loss_cls, cls_label_flat, cls_valid_mask, rcnn_loss_cls, gt_boxes3d_ct, reg_valid_mask, fg_mask
        pre_iou3 = pre_iou3 / 2 + 0.5
        pre_iou2 = pre_iou2 / 2 + 0.5
        ret_dict = {
            'rcnn_cls_3rd': rcnn_cls_3rd,
            'rcnn_reg_3rd': rcnn_reg_3rd,
            'pred_boxes3d_1st': pred_boxes3d_1st,
            'pred_boxes3d_2nd': pred_boxes3d_2nd,
            'pred_boxes3d_3rd': pred_boxes3d_3rd,
            'pre_iou3': pre_iou3,
            'pre_iou2': pre_iou2
        }
        ret_dict.update(sec)
        ret_dict.update(two)
        ret_dict.update(three)

        return ret_dict
Пример #9
0
    def forward(self, input_data):
        """
        :param input_data: input dict
        :return:
        """
        if cfg.RCNN.ROI_SAMPLE_JIT:
            if self.training:
                with torch.no_grad():
                    target_dict = self.proposal_target_layer(input_data,
                                                             stage=1)

                pts_input = torch.cat(
                    (target_dict['sampled_pts'], target_dict['pts_feature']),
                    dim=2)
                target_dict['pts_input'] = pts_input
            else:
                rpn_xyz, rpn_features = input_data['rpn_xyz'], input_data[
                    'rpn_features']
                batch_rois = input_data['roi_boxes3d']
                if cfg.RCNN.USE_INTENSITY:
                    pts_extra_input_list = [
                        input_data['rpn_intensity'].unsqueeze(dim=2),
                        input_data['seg_mask'].unsqueeze(dim=2)
                    ]
                else:
                    pts_extra_input_list = [
                        input_data['seg_mask'].unsqueeze(dim=2)
                    ]

                if cfg.RCNN.USE_DEPTH:
                    pts_depth = input_data['pts_depth'] / 70.0 - 0.5
                    pts_extra_input_list.append(pts_depth.unsqueeze(dim=2))
                pts_extra_input = torch.cat(pts_extra_input_list, dim=2)

                pts_feature = torch.cat((pts_extra_input, rpn_features), dim=2)
                pooled_features, pooled_empty_flag = \
                        roipool3d_utils.roipool3d_gpu(rpn_xyz, pts_feature, batch_rois, cfg.RCNN.POOL_EXTRA_WIDTH,
                                                      sampled_pt_num=cfg.RCNN.NUM_POINTS)

                # canonical transformation
                batch_size = batch_rois.shape[0]
                roi_center = batch_rois[:, :, 0:3]
                pooled_features[:, :, :, 0:3] -= roi_center.unsqueeze(dim=2)
                for k in range(batch_size):
                    pooled_features[k, :, :,
                                    0:3] = kitti_utils.rotate_pc_along_y_torch(
                                        pooled_features[k, :, :, 0:3],
                                        batch_rois[k, :, 6])

                pts_input = pooled_features.view(-1, pooled_features.shape[2],
                                                 pooled_features.shape[3])
        else:
            pts_input = input_data['pts_input']
            target_dict = {}
            target_dict['pts_input'] = input_data['pts_input']
            target_dict['roi_boxes3d'] = input_data['roi_boxes3d']
            if self.training:
                target_dict['cls_label'] = input_data['cls_label']
                target_dict['reg_valid_mask'] = input_data['reg_valid_mask']
                target_dict['gt_of_rois'] = input_data['gt_boxes3d_ct']

        xyz, features = self._break_up_pc(pts_input)

        if cfg.RCNN.USE_RPN_FEATURES:
            xyz_input = pts_input[..., 0:self.rcnn_input_channel].transpose(
                1, 2).unsqueeze(dim=3)
            xyz_feature = self.xyz_up_layer(xyz_input)

            rpn_feature = pts_input[..., self.rcnn_input_channel:].transpose(
                1, 2).unsqueeze(dim=3)

            merged_feature = torch.cat((xyz_feature, rpn_feature), dim=1)
            merged_feature = self.merge_down_layer(merged_feature)
            l_xyz, l_features = [xyz], [merged_feature.squeeze(dim=3)]
        else:
            l_xyz, l_features = [xyz], [features]

        for i in range(len(self.SA_modules)):
            li_xyz, li_features = self.SA_modules[i](l_xyz[i], l_features[i])
            l_xyz.append(li_xyz)
            l_features.append(li_features)

        batch_size = input_data['roi_boxes3d'].size(0)
        rcnn_cls = self.cls_layer(l_features[-1]).transpose(
            1, 2).contiguous().squeeze(dim=1)  # (B*64, 1 or 2)
        rcnn_reg = self.reg_layer(l_features[-1]).transpose(
            1, 2).contiguous().squeeze(dim=1)  # (B*64, C)
        #tt=torch.rand(rcnn_reg.shape[0],rcnn_reg.shape[1]).cuda()
        #tt=self.test_layer(pts_input.permute((0,2,1)))
        #tt=tt[:,:,0]
        #rcnn_reg=tt
        #rcnn_cls=tt[:,0]
        #print(tt.size(),rcnn_cls.size())
        roi_boxes3d = target_dict['roi_boxes3d'].view(-1, 7)
        anchor_size = torch.from_numpy(cfg.CLS_MEAN_SIZE[0]).cuda()
        #print(rcnn_reg.size(),roi_boxes3d.size())
        pred_boxes3d_1st = decode_bbox_target(
            roi_boxes3d.view(-1, 7),
            rcnn_reg.view(-1, rcnn_reg.shape[-1]),
            anchor_size=anchor_size,
            loc_scope=cfg.RCNN.LOC_SCOPE,
            loc_bin_size=cfg.RCNN.LOC_BIN_SIZE,
            num_head_bin=cfg.RCNN.NUM_HEAD_BIN,
            get_xz_fine=True,
            get_y_by_bin=cfg.RCNN.LOC_Y_BY_BIN,
            loc_y_scope=cfg.RCNN.LOC_Y_SCOPE,
            loc_y_bin_size=cfg.RCNN.LOC_Y_BIN_SIZE,
            get_ry_fine=True).view(batch_size, -1, 7)
        #print(pred_boxes3d.size()) (B,64,7)

        ## 2nd stage
        #print(input_data['roi_boxes3d'].size())
        input_data2 = input_data
        input_data2['roi_boxes3d'] = pred_boxes3d_1st
        #print(input_data['roi_boxes3d'].size())
        with torch.no_grad():
            target_dict_2nd = self.proposal_target_layer(input_data2, stage=2)

        pts_input_2 = torch.cat(
            (target_dict_2nd['sampled_pts'], target_dict_2nd['pts_feature']),
            dim=2)
        target_dict_2nd['pts_input'] = pts_input_2

        xyz_2, features_2 = self._break_up_pc(pts_input_2)
        #print(xyz_2.size(),xyz.size(),features_2.size(),features.size())
        if cfg.RCNN.USE_RPN_FEATURES:
            xyz_input_2 = pts_input_2[...,
                                      0:self.rcnn_input_channel].transpose(
                                          1, 2).unsqueeze(dim=3)
            xyz_feature_2 = self.xyz_up_layer(xyz_input_2)

            rpn_feature_2 = pts_input_2[...,
                                        self.rcnn_input_channel:].transpose(
                                            1, 2).unsqueeze(dim=3)

            merged_feature_2 = torch.cat((xyz_feature_2, rpn_feature_2), dim=1)
            merged_feature_2 = self.merge_down_layer(merged_feature_2)
            l_xyz_2, l_features_2 = [xyz_2], [merged_feature_2.squeeze(dim=3)]
        else:
            l_xyz__2, l_features_2 = [xyz_2], [features_2]
        #print(l_xyz_2[0].size(), l_xyz[0].size(), l_features_2[0].size(), l_features[0].size())
        for i in range(len(self.SA_modules)):
            li_xyz_2, li_features_2 = self.SA_modules[i](l_xyz_2[i],
                                                         l_features_2[i])
            l_xyz_2.append(li_xyz_2)
            l_features_2.append(li_features_2)
        del xyz, features, l_features

        rcnn_cls_2nd = self.cls_layer_2nd(l_features_2[-1]).transpose(
            1, 2).contiguous().squeeze(dim=1)  # (B*64, 1 or 2)
        rcnn_reg_2nd = self.reg_layer_2nd(l_features_2[-1]).transpose(
            1, 2).contiguous().squeeze(dim=1)  # (B*64, C)
        #loss
        '''
        cls_label = target_dict_2nd['cls_label'].float()
        cls_label_flat = cls_label.view(-1)
        rcnn_cls_flat_2nd = rcnn_cls_2nd.view(-1)
        cls_valid_mask = (cls_label_flat >= 0).float()
        batch_loss_cls_2nd = F.binary_cross_entropy(torch.sigmoid(rcnn_cls_flat_2nd), cls_label, reduction='none')
        rcnn_loss_cls_2nd = (batch_loss_cls_2nd * cls_valid_mask).sum() / torch.clamp(cls_valid_mask.sum(), min=1.0)
        #rcnn_loss_cls_2nd.backward()
        '''
        sec = {'rcnn_cls_2nd': rcnn_cls_2nd, 'rcnn_reg_2nd': rcnn_reg_2nd}
        pred_boxes3d_2nd = decode_bbox_target(
            pred_boxes3d_1st.view(-1, 7),
            rcnn_reg_2nd.view(-1, rcnn_reg_2nd.shape[-1]),
            anchor_size=anchor_size,
            loc_scope=cfg.RCNN.LOC_SCOPE,
            loc_bin_size=cfg.RCNN.LOC_BIN_SIZE,
            num_head_bin=cfg.RCNN.NUM_HEAD_BIN,
            get_xz_fine=True,
            get_y_by_bin=cfg.RCNN.LOC_Y_BY_BIN,
            loc_y_scope=cfg.RCNN.LOC_Y_SCOPE,
            loc_y_bin_size=cfg.RCNN.LOC_Y_BIN_SIZE,
            get_ry_fine=True).view(batch_size, -1, 7)

        ## 3rd
        #print(pred_boxes3d_2nd.size())
        input_data['roi_boxes3d'] = pred_boxes3d_2nd
        with torch.no_grad():
            target_dict_3nd = self.proposal_target_layer(input_data, stage=3)
        pts_input_3 = torch.cat(
            (target_dict_3nd['sampled_pts'], target_dict_3nd['pts_feature']),
            dim=2)
        target_dict_3nd['pts_input'] = pts_input_3
        xyz_3, features_3 = self._break_up_pc(pts_input_3)

        if cfg.RCNN.USE_RPN_FEATURES:
            xyz_input_3 = pts_input[..., 0:self.rcnn_input_channel].transpose(
                1, 2).unsqueeze(dim=3)
            xyz_feature_3 = self.xyz_up_layer(xyz_input_3)

            rpn_feature_3 = pts_input_3[...,
                                        self.rcnn_input_channel:].transpose(
                                            1, 2).unsqueeze(dim=3)

            merged_feature_3 = torch.cat((xyz_feature_3, rpn_feature_3), dim=1)
            merged_feature_3 = self.merge_down_layer(merged_feature_3)
            l_xyz_3, l_features_3 = [xyz_3], [merged_feature_3.squeeze(dim=3)]
        else:
            l_xyz, l_features = [xyz_3], [features_3]

        for i in range(len(self.SA_modules)):
            li_xyz_3, li_features_3 = self.SA_modules[i](l_xyz_3[i],
                                                         l_features_3[i])
            l_xyz_3.append(li_xyz_3)
            l_features_3.append(li_features_3)
        del xyz_2, features_2, l_features_2
        rcnn_cls_3rd = self.cls_layer_3rd(l_features_3[-1]).transpose(
            1, 2).contiguous().squeeze(dim=1)  # (B*64, 1 or 2)
        rcnn_reg_3rd = self.reg_layer_3rd(l_features_3[-1]).transpose(
            1, 2).contiguous().squeeze(dim=1)  # (B*64, C)
        pred_boxes3d_3rd = decode_bbox_target(
            pred_boxes3d_2nd.view(-1, 7),
            rcnn_reg_3rd.view(-1, rcnn_reg_3rd.shape[-1]),
            anchor_size=anchor_size,
            loc_scope=cfg.RCNN.LOC_SCOPE,
            loc_bin_size=cfg.RCNN.LOC_BIN_SIZE,
            num_head_bin=cfg.RCNN.NUM_HEAD_BIN,
            get_xz_fine=True,
            get_y_by_bin=cfg.RCNN.LOC_Y_BY_BIN,
            loc_y_scope=cfg.RCNN.LOC_Y_SCOPE,
            loc_y_bin_size=cfg.RCNN.LOC_Y_BIN_SIZE,
            get_ry_fine=True).view(batch_size, -1, 7)
        ret_dict = {
            'rcnn_cls': rcnn_cls,
            'rcnn_reg': rcnn_reg,
            'rcnn_cls_3rd': rcnn_cls_3rd,
            'rcnn_reg_3rd': rcnn_reg_3rd,
            'pred_boxes3d_1st': pred_boxes3d_1st,
            'pred_boxes3d_2nd': pred_boxes3d_2nd,
            'pred_boxes3d_3rd': pred_boxes3d_3rd
        }
        ret_dict.update(sec)
        if self.training:
            ret_dict.update(target_dict)
        return ret_dict
Пример #10
0
    def forward(self, input_data):
        """
        :param input_data: input dict
        :return:
        """

        if cfg.RCNN.ROI_SAMPLE_JIT:
            if self.training:
                with torch.no_grad():
                    target_dict = self.proposal_target_layer(input_data,
                                                             stage=1)

                pts_input = torch.cat(
                    (target_dict['sampled_pts'], target_dict['pts_feature']),
                    dim=2)
                target_dict['pts_input'] = pts_input
            else:
                rpn_xyz, rpn_features = input_data['rpn_xyz'], input_data[
                    'rpn_features']
                batch_rois = input_data['roi_boxes3d']
                if cfg.RCNN.USE_INTENSITY:
                    pts_extra_input_list = [
                        input_data['rpn_intensity'].unsqueeze(dim=2),
                        input_data['seg_mask'].unsqueeze(dim=2)
                    ]
                else:
                    pts_extra_input_list = [
                        input_data['seg_mask'].unsqueeze(dim=2)
                    ]

                if cfg.RCNN.USE_DEPTH:
                    pts_depth = input_data['pts_depth'] / 70.0 - 0.5
                    pts_extra_input_list.append(pts_depth.unsqueeze(dim=2))
                pts_extra_input = torch.cat(pts_extra_input_list, dim=2)

                pts_feature = torch.cat((pts_extra_input, rpn_features), dim=2)
                pooled_features, pooled_empty_flag = \
                        roipool3d_utils.roipool3d_gpu(rpn_xyz, pts_feature, batch_rois, cfg.RCNN.POOL_EXTRA_WIDTH,
                                                      sampled_pt_num=cfg.RCNN.NUM_POINTS)

                # canonical transformation
                batch_size = batch_rois.shape[0]
                roi_center = batch_rois[:, :, 0:3]
                pooled_features[:, :, :, 0:3] -= roi_center.unsqueeze(dim=2)
                for k in range(batch_size):
                    pooled_features[k, :, :,
                                    0:3] = kitti_utils.rotate_pc_along_y_torch(
                                        pooled_features[k, :, :, 0:3],
                                        batch_rois[k, :, 6])

                pts_input = pooled_features.view(-1, pooled_features.shape[2],
                                                 pooled_features.shape[3])
        else:
            pts_input = input_data['pts_input']
            target_dict = {}
            target_dict['pts_input'] = input_data['pts_input']
            target_dict['roi_boxes3d'] = input_data['roi_boxes3d']
            if self.training:
                #input_data['ori_roi'] = torch.cat((input_data['ori_roi'], input_data['roi_boxes3d']), 1)
                target_dict['cls_label'] = input_data['cls_label']
                target_dict['reg_valid_mask'] = input_data[
                    'reg_valid_mask'].view(-1)
                target_dict['gt_of_rois'] = input_data['gt_boxes3d_ct']
        #print(pts_input.shape)
        pts_input = pts_input.view(-1, 512, 128 + self.rcnn_input_channel)
        xyz, features = self._break_up_pc(pts_input)
        anchor_size = torch.from_numpy(cfg.CLS_MEAN_SIZE[0]).cuda()
        if cfg.RCNN.USE_RPN_FEATURES:
            xyz_input = pts_input[..., 0:self.rcnn_input_channel].transpose(
                1, 2).unsqueeze(dim=3)
            #xyz_input = pts_input[..., 0:self.rcnn_input_channel].transpose(1, 2)

            xyz_feature = self.xyz_up_layer(xyz_input)

            rpn_feature = pts_input[..., self.rcnn_input_channel:].transpose(
                1, 2).unsqueeze(dim=3)

            merged_feature = torch.cat((xyz_feature, rpn_feature), dim=1)
            merged_feature = self.merge_down_layer(merged_feature)
            l_xyz, l_features = [xyz], [merged_feature.squeeze(dim=3)]
        else:
            l_xyz, l_features = [xyz], [features]

        for i in range(len(self.SA_modules)):

            li_xyz, li_features = self.SA_modules[i](l_xyz[i], l_features[i])
            l_xyz.append(li_xyz)
            l_features.append(li_features)

        batch_size = input_data['roi_boxes3d'].size(0)
        batch_size_2 = pts_input.shape[0]  # for loss fun
        #print(input_data['roi_boxes3d'].shape,pts_input.shape)
        rcnn_cls = self.cls_layer(l_features[-1]).transpose(
            1, 2).contiguous().squeeze(dim=1)  # (B*64, 1 or 2)
        rcnn_reg = self.reg_layer(l_features[-1]).transpose(
            1, 2).contiguous().squeeze(dim=1)  # (B*64, C)
        if self.training:
            roi_boxes3d = target_dict['roi_boxes3d'].view(-1, 7)
            cls_label = target_dict['cls_label'].float()
            rcnn_cls_flat = rcnn_cls.view(-1)
            batch_loss_cls = F.binary_cross_entropy(
                torch.sigmoid(rcnn_cls_flat),
                cls_label.view(-1),
                reduction='none')
            cls_label_flat = cls_label.view(-1)
            cls_valid_mask = (cls_label_flat >= 0).float()
            rcnn_loss_cls = (batch_loss_cls *
                             cls_valid_mask).sum() / torch.clamp(
                                 cls_valid_mask.sum(), min=1.0)
            gt_boxes3d_ct = target_dict['gt_of_rois']
            reg_valid_mask = target_dict['reg_valid_mask']
            fg_mask = (reg_valid_mask > 0)
            #print(rcnn_reg.view(batch_size_2, -1)[fg_mask].shape)
            loss_loc, loss_angle, loss_size, reg_loss_dict = \
                loss_utils.get_reg_loss(rcnn_reg.view(batch_size_2, -1)[fg_mask],
                                        gt_boxes3d_ct.view(batch_size_2, 7)[fg_mask],
                                        loc_scope=cfg.RCNN.LOC_SCOPE,
                                        loc_bin_size=cfg.RCNN.LOC_BIN_SIZE,
                                        num_head_bin=cfg.RCNN.NUM_HEAD_BIN,
                                        anchor_size=anchor_size,
                                        get_xz_fine=True, get_y_by_bin=cfg.RCNN.LOC_Y_BY_BIN,
                                        loc_y_scope=cfg.RCNN.LOC_Y_SCOPE, loc_y_bin_size=cfg.RCNN.LOC_Y_BIN_SIZE,
                                        get_ry_fine=True)
            rcnn_loss_reg = loss_loc + loss_angle + 3 * loss_size

            one = {
                'rcnn_loss_cls': rcnn_loss_cls,
                'rcnn_loss_reg': rcnn_loss_reg
            }
            del cls_label, rcnn_cls_flat, batch_loss_cls, cls_label_flat, cls_valid_mask, rcnn_loss_cls, gt_boxes3d_ct, reg_valid_mask, fg_mask
        else:
            roi_boxes3d = input_data['roi_boxes3d'].view(-1, 7)
            one = {}
        #print(rcnn_reg.size(),roi_boxes3d.size())
        #print(roi_boxes3d.shape, rcnn_reg.shape)
        pred_boxes3d_1st = decode_bbox_target(
            roi_boxes3d.view(-1, 7),
            rcnn_reg.view(-1, rcnn_reg.shape[-1]),
            anchor_size=anchor_size,
            loc_scope=cfg.RCNN.LOC_SCOPE,
            loc_bin_size=cfg.RCNN.LOC_BIN_SIZE,
            num_head_bin=cfg.RCNN.NUM_HEAD_BIN,
            get_xz_fine=True,
            get_y_by_bin=cfg.RCNN.LOC_Y_BY_BIN,
            loc_y_scope=cfg.RCNN.LOC_Y_SCOPE,
            loc_y_bin_size=cfg.RCNN.LOC_Y_BIN_SIZE,
            get_ry_fine=True).view(batch_size, -1, 7)
        if self.training == False and cfg.RCNN.ENABLED and not cfg.RPN.ENABLED:
            pred_boxes3d_1st = pred_boxes3d_1st.view(-1, 7)

        input_data2 = input_data.copy()

        #print(input_data['roi_boxes3d'].size())
        if self.training:
            #input_data2['roi_boxes3d'] = torch.cat((pred_boxes3d_1st, input_data['ori_roi']), 1)
            input_data2['roi_boxes3d'] = torch.cat(
                (pred_boxes3d_1st, input_data['roi_boxes3d']), 1)
            #input_data2['roi_boxes3d'] = input_data['gt_boxes3d']
            #input_data2['roi_boxes3d'] = pred_boxes3d_1st
            #print(input_data2['roi_boxes3d'].shape)
            with torch.no_grad():
                target_dict_2nd = self.proposal_target_layer(input_data2,
                                                             stage=2)
            '''
            reg_valid_mask = target_dict_2nd['reg_valid_mask']
            fg_mask_num2 = (reg_valid_mask > 0).sum()
            if fg_mask_num2< 10*batch_size:
                input_data2['roi_boxes3d'] = torch.cat((pred_boxes3d_1st, input_data['roi_boxes3d']), 1)
                with torch.no_grad():
                    target_dict_2nd = self.proposal_target_layer(input_data2, stage=2)
            '''
            pts_input_2 = torch.cat((target_dict_2nd['sampled_pts'],
                                     target_dict_2nd['pts_feature']),
                                    dim=2)
            target_dict_2nd['pts_input'] = pts_input_2
            roi = target_dict_2nd['roi_boxes3d']

        else:
            input_data2['roi_boxes3d'] = pred_boxes3d_1st
            #input_data2['roi_boxes3d']=torch.cat((pred_boxes3d_1st, input_data['roi_boxes3d']), 1)
            roi = pred_boxes3d_1st
            #roi=torch.cat((pred_boxes3d_1st, input_data['roi_boxes3d']), 1)
            pts_input_2 = self.roipooling(input_data2)
        #print(pts_input_2.shape)
        xyz_2, features_2 = self._break_up_pc(pts_input_2)
        #print(xyz_2.size(),xyz.size(),features_2.size(),features.size())
        if cfg.RCNN.USE_RPN_FEATURES:
            xyz_input_2 = pts_input_2[...,
                                      0:self.rcnn_input_channel].transpose(
                                          1, 2).unsqueeze(dim=3)
            xyz_feature_2 = self.xyz_up_layer(xyz_input_2)

            rpn_feature_2 = pts_input_2[...,
                                        self.rcnn_input_channel:].transpose(
                                            1, 2).unsqueeze(dim=3)

            merged_feature_2 = torch.cat((xyz_feature_2, rpn_feature_2), dim=1)
            merged_feature_2 = self.merge_down_layer(merged_feature_2)
            l_xyz_2, l_features_2 = [xyz_2], [merged_feature_2.squeeze(dim=3)]
        else:
            l_xyz__2, l_features_2 = [xyz_2], [features_2]
        #print(l_xyz_2[0].size(), l_xyz[0].size(), l_features_2[0].size(), l_features[0].size())
        for i in range(len(self.SA_modules)):
            li_xyz_2, li_features_2 = self.SA_modules[i](l_xyz_2[i],
                                                         l_features_2[i])
            l_xyz_2.append(li_xyz_2)
            l_features_2.append(li_features_2)
        del xyz, features, l_features

        rcnn_cls_2nd = self.cls_layer_2nd(l_features_2[-1]).transpose(
            1, 2).contiguous().squeeze(dim=1)  # (B*64, 1 or 2)
        rcnn_reg_2nd = self.reg_layer_2nd(l_features_2[-1]).transpose(
            1, 2).contiguous().squeeze(dim=1)  # (B*64, C)
        #loss
        if self.training:
            cls_label = target_dict_2nd['cls_label'].float()
            rcnn_cls_flat = rcnn_cls_2nd.view(-1)
            batch_loss_cls = F.binary_cross_entropy(
                torch.sigmoid(rcnn_cls_flat),
                cls_label.view(-1),
                reduction='none')
            cls_label_flat = cls_label.view(-1)
            cls_valid_mask = (cls_label_flat >= 0).float()
            rcnn_loss_cls = (batch_loss_cls *
                             cls_valid_mask).sum() / torch.clamp(
                                 cls_valid_mask.sum(), min=1.0)
            gt_boxes3d_ct = target_dict_2nd['gt_of_rois']
            reg_valid_mask = target_dict_2nd['reg_valid_mask']
            fg_mask = (reg_valid_mask > 0)
            #print(rcnn_reg_2nd.view(batch_size_2, -1)[fg_mask].size(0))
            if rcnn_reg_2nd.view(batch_size_2, -1)[fg_mask].size(0) == 0:
                fg_mask = (reg_valid_mask <= 0)
            loss_loc, loss_angle, loss_size, reg_loss_dict = \
                loss_utils.get_reg_loss(rcnn_reg_2nd.view(batch_size_2, -1)[fg_mask],
                                        gt_boxes3d_ct.view(batch_size_2, 7)[fg_mask],
                                        loc_scope=cfg.RCNN.LOC_SCOPE,
                                        loc_bin_size=cfg.RCNN.LOC_BIN_SIZE,
                                        num_head_bin=cfg.RCNN.NUM_HEAD_BIN,
                                        anchor_size=anchor_size,
                                        get_xz_fine=True, get_y_by_bin=cfg.RCNN.LOC_Y_BY_BIN,
                                        loc_y_scope=cfg.RCNN.LOC_Y_SCOPE, loc_y_bin_size=cfg.RCNN.LOC_Y_BIN_SIZE,
                                        get_ry_fine=True)
            rcnn_loss_reg = loss_loc + loss_angle + 3 * loss_size

            two = {
                'rcnn_loss_cls_2nd': rcnn_loss_cls,
                'rcnn_loss_reg_2nd': rcnn_loss_reg
            }
            del cls_label, rcnn_cls_flat, batch_loss_cls, cls_label_flat, cls_valid_mask, rcnn_loss_cls, gt_boxes3d_ct, reg_valid_mask, fg_mask

        else:
            two = {}

        sec = {'rcnn_cls_2nd': rcnn_cls_2nd, 'rcnn_reg_2nd': rcnn_reg_2nd}
        #print(input_data['roi_boxes3d'].shape,input_data2['roi_boxes3d'].shape)

        pred_boxes3d_2nd = decode_bbox_target(
            roi.view(-1, 7),
            rcnn_reg_2nd.view(-1, rcnn_reg_2nd.shape[-1]),
            anchor_size=anchor_size,
            loc_scope=cfg.RCNN.LOC_SCOPE,
            loc_bin_size=cfg.RCNN.LOC_BIN_SIZE,
            num_head_bin=cfg.RCNN.NUM_HEAD_BIN,
            get_xz_fine=True,
            get_y_by_bin=cfg.RCNN.LOC_Y_BY_BIN,
            loc_y_scope=cfg.RCNN.LOC_Y_SCOPE,
            loc_y_bin_size=cfg.RCNN.LOC_Y_BIN_SIZE,
            get_ry_fine=True).view(batch_size, -1, 7)

        ## 3rd
        #print(target_dict['roi_boxes3d'].shape,target_dict_2nd['roi_boxes3d'].shape)
        #print(pred_boxes3d_1st.shape,input_data['roi_boxes3d'].shape)
        #print(target_dict['gt_of_rois']+target_dict['roi_boxes3d'],target_dict_2nd['gt_of_rois']+target_dict_2nd['roi_boxes3d'])
        input_data3 = input_data2.copy()
        #del input_data2

        if self.training:
            input_data3['roi_boxes3d'] = torch.cat(
                (pred_boxes3d_2nd, input_data2['roi_boxes3d']), 1)
            #input_data3['roi_boxes3d'] = input_data2['gt_boxes3d']
            #input_data3['roi_boxes3d'] = pred_boxes3d_2nd
            #print(input_data3['roi_boxes3d'].shape)
            with torch.no_grad():
                target_dict_3rd = self.proposal_target_layer(input_data3,
                                                             stage=3)
            '''
            reg_valid_mask = target_dict_3rd['reg_valid_mask']
            fg_mask_num3 = (reg_valid_mask > 0).sum()
            
            if fg_mask_num3.item() < 10 * batch_size:
                input_data3['roi_boxes3d'] = torch.cat((pred_boxes3d_2nd, input_data2['roi_boxes3d']), 1)
                with torch.no_grad():
                    target_dict_3rd = self.proposal_target_layer(input_data2, stage=3)
            '''
            #print(fg_mask_num2.item(),fg_mask_num3.item())
            pts_input_3 = torch.cat((target_dict_3rd['sampled_pts'],
                                     target_dict_3rd['pts_feature']),
                                    dim=2)
            target_dict_3rd['pts_input'] = pts_input_3
            roi = target_dict_3rd['roi_boxes3d']
        else:
            input_data3['roi_boxes3d'] = pred_boxes3d_2nd
            #input_data3['roi_boxes3d']=torch.cat((pred_boxes3d_2nd, input_data2['roi_boxes3d']), 1)
            roi = pred_boxes3d_2nd
            #roi=torch.cat((pred_boxes3d_2nd, input_data2['roi_boxes3d']), 1)
            pts_input_3 = self.roipooling(input_data3)
        xyz_3, features_3 = self._break_up_pc(pts_input_3)

        if cfg.RCNN.USE_RPN_FEATURES:
            xyz_input_3 = pts_input_3[...,
                                      0:self.rcnn_input_channel].transpose(
                                          1, 2).unsqueeze(dim=3)
            xyz_feature_3 = self.xyz_up_layer(xyz_input_3)

            rpn_feature_3 = pts_input_3[...,
                                        self.rcnn_input_channel:].transpose(
                                            1, 2).unsqueeze(dim=3)

            merged_feature_3 = torch.cat((xyz_feature_3, rpn_feature_3), dim=1)
            merged_feature_3 = self.merge_down_layer(merged_feature_3)
            l_xyz_3, l_features_3 = [xyz_3], [merged_feature_3.squeeze(dim=3)]
        else:
            l_xyz, l_features = [xyz_3], [features_3]

        for i in range(len(self.SA_modules)):
            li_xyz_3, li_features_3 = self.SA_modules[i](l_xyz_3[i],
                                                         l_features_3[i])
            l_xyz_3.append(li_xyz_3)
            l_features_3.append(li_features_3)
        del xyz_2, features_2, l_features_2
        rcnn_cls_3rd = self.cls_layer_3rd(l_features_3[-1]).transpose(
            1, 2).contiguous().squeeze(dim=1)  # (B*64, 1 or 2)
        rcnn_reg_3rd = self.reg_layer_3rd(l_features_3[-1]).transpose(
            1, 2).contiguous().squeeze(dim=1)  # (B*64, C)

        #loss
        if self.training:
            cls_label = target_dict_3rd['cls_label'].float()
            rcnn_cls_flat = rcnn_cls_3rd.view(-1)
            batch_loss_cls = F.binary_cross_entropy(
                torch.sigmoid(rcnn_cls_flat), cls_label, reduction='none')
            cls_label_flat = cls_label.view(-1)
            cls_valid_mask = (cls_label_flat >= 0).float()
            rcnn_loss_cls = (batch_loss_cls *
                             cls_valid_mask).sum() / torch.clamp(
                                 cls_valid_mask.sum(), min=1.0)
            gt_boxes3d_ct = target_dict_3rd['gt_of_rois']
            reg_valid_mask = target_dict_3rd['reg_valid_mask']
            fg_mask = (reg_valid_mask > 0)
            #cls_mask=(target_dict_3rd['cls_label']>0)
            #print(rcnn_reg_3rd.view(batch_size_2, -1)[cls_mask].size(0))
            #print(rcnn_reg_3rd.view(batch_size_2, -1)[fg_mask].size(0))
            if rcnn_reg_3rd.view(batch_size_2, -1)[fg_mask].size(0) == 0:
                fg_mask = (reg_valid_mask <= 0)
            loss_loc, loss_angle, loss_size, reg_loss_dict = \
                loss_utils.get_reg_loss(rcnn_reg_3rd.view(batch_size_2, -1)[fg_mask],
                                        gt_boxes3d_ct.view(batch_size_2, 7)[fg_mask],
                                        loc_scope=cfg.RCNN.LOC_SCOPE,
                                        loc_bin_size=cfg.RCNN.LOC_BIN_SIZE,
                                        num_head_bin=cfg.RCNN.NUM_HEAD_BIN,
                                        anchor_size=anchor_size,
                                        get_xz_fine=True, get_y_by_bin=cfg.RCNN.LOC_Y_BY_BIN,
                                        loc_y_scope=cfg.RCNN.LOC_Y_SCOPE, loc_y_bin_size=cfg.RCNN.LOC_Y_BIN_SIZE,
                                        get_ry_fine=True)
            rcnn_loss_reg = loss_loc + loss_angle + 3 * loss_size

            three = {
                'rcnn_loss_cls_3rd': rcnn_loss_cls,
                'rcnn_loss_reg_3rd': rcnn_loss_reg
            }
            del cls_label, rcnn_cls_flat, batch_loss_cls, cls_label_flat, cls_valid_mask, rcnn_loss_cls, gt_boxes3d_ct, reg_valid_mask, fg_mask

        else:
            three = {}
        pred_boxes3d_3rd = decode_bbox_target(
            roi.view(-1, 7),
            rcnn_reg_3rd.view(-1, rcnn_reg_3rd.shape[-1]),
            anchor_size=anchor_size,
            loc_scope=cfg.RCNN.LOC_SCOPE,
            loc_bin_size=cfg.RCNN.LOC_BIN_SIZE,
            num_head_bin=cfg.RCNN.NUM_HEAD_BIN,
            get_xz_fine=True,
            get_y_by_bin=cfg.RCNN.LOC_Y_BY_BIN,
            loc_y_scope=cfg.RCNN.LOC_Y_SCOPE,
            loc_y_bin_size=cfg.RCNN.LOC_Y_BIN_SIZE,
            get_ry_fine=True).view(batch_size, -1, 7)
        ret_dict = {
            'rcnn_cls': rcnn_cls,
            'rcnn_reg': rcnn_reg,
            'rcnn_cls_3rd': rcnn_cls_3rd,
            'rcnn_reg_3rd': rcnn_reg_3rd,
            'pred_boxes3d_1st': pred_boxes3d_1st,
            'pred_boxes3d_2nd': pred_boxes3d_2nd,
            'pred_boxes3d_3rd': pred_boxes3d_3rd
        }
        ret_dict.update(sec)
        ret_dict.update(one)
        ret_dict.update(two)
        ret_dict.update(three)
        if self.training:
            ret_dict.update(target_dict)
        return ret_dict
Пример #11
0
    def _eval_data(self, masked_pts=None):
        """eval data with sampled pts
        """
        with torch.no_grad():
            MEAN_SIZE = torch.from_numpy(cfg.CLS_MEAN_SIZE[0]).cuda()
            batch_size = self.config['batch_size']

            # get valid point (projected points should be in image)
            sample_id, pts_rect, pts_intensity, gt_boxes3d, npoints, labels = \
            self.data['sample_id'], self.data['pts_rect'], self.data['pts_intensity'], self.data['gt_boxes3d'], self.data['npoints'], self.data['label']

            cls_types = [[
                labels[k][i].cls_type for i in range(len(labels[k]))
            ] for k in range(batch_size)]

            calib = [
                self.test_loader.dataset.get_calib(idx) for idx in sample_id
            ]
            if self.use_masked:
                # use masked/sampled pts if True
                pts_rect = np.array([
                    c.lidar_to_rect(masked_pts[k][:, 0:3])
                    for k, c in enumerate(calib)
                ])
                pts_intensity = [
                    masked_pts[k][:, 3] for k in range(batch_size)
                ]
                npoints = masked_pts.shape[0]

            inputs = torch.from_numpy(pts_rect).cuda(
                non_blocking=True).float().view(self.config['batch_size'], -1,
                                                3)
            gt_boxes3d = torch.from_numpy(gt_boxes3d).cuda(non_blocking=True)
            input_data = {'pts_input': inputs}

            # model inference
            ret_dict = self.model(input_data)

            roi_scores_raw = ret_dict['roi_scores_raw']  # (B, M)
            roi_boxes3d = ret_dict['rois']  # (B, M, 7)
            # seg_result = ret_dict['seg_result'].long()  # (B, N)

            rcnn_cls = ret_dict['rcnn_cls'].view(batch_size, -1,
                                                 ret_dict['rcnn_cls'].shape[1])
            rcnn_reg = ret_dict['rcnn_reg'].view(
                batch_size, -1, ret_dict['rcnn_reg'].shape[1])  # (B, M, C)

            norm_scores = torch.sigmoid(rcnn_cls)

            # remove low confidence scores
            thresh_mask = norm_scores > cfg.RCNN.SCORE_THRESH

            # bounding box regression
            anchor_size = MEAN_SIZE

            pred_boxes3d = decode_bbox_target(
                roi_boxes3d.view(-1, 7),
                rcnn_reg.view(-1, rcnn_reg.shape[-1]),
                anchor_size=anchor_size,
                loc_scope=cfg.RCNN.LOC_SCOPE,
                loc_bin_size=cfg.RCNN.LOC_BIN_SIZE,
                num_head_bin=cfg.RCNN.NUM_HEAD_BIN,
                get_xz_fine=True,
                get_y_by_bin=cfg.RCNN.LOC_Y_BY_BIN,
                loc_y_scope=cfg.RCNN.LOC_Y_SCOPE,
                loc_y_bin_size=cfg.RCNN.LOC_Y_BIN_SIZE,
                get_ry_fine=True).view(batch_size, -1, 7)

            # select boxes (list of tensors)
            pred_boxes3d_selected = [
                pred_boxes3d[k][thresh_mask[k].view(-1)]
                for k in range(batch_size)
            ]
            raw_scores_selected = [
                roi_scores_raw[k][thresh_mask[k].view(-1)]
                for k in range(batch_size)
            ]
            norm_scores_selected = [
                norm_scores[k][thresh_mask[k].view(-1)]
                for k in range(batch_size)
            ]

            # rotated NMS
            boxes_bev_selected = [
                kitti_utils.boxes3d_to_bev_torch(bboxes)
                for bboxes in pred_boxes3d_selected
            ]
            keep_idx = [
                iou3d_utils.nms_gpu(boxes_bev_selected[k],
                                    raw_scores_selected[k],
                                    cfg.RCNN.NMS_THRESH).view(-1)
                for k in range(batch_size)
            ]
            pred_boxes3d_selected = [
                pred_boxes3d_selected[k][keep_idx[k]]
                for k in range(batch_size)
            ]
            scores_selected = [
                raw_scores_selected[k][keep_idx[k]] for k in range(batch_size)
            ]
            norm_scores_selected = [
                norm_scores_selected[k][keep_idx[k]] for k in range(batch_size)
            ]

            # want car gt_boxes
            keep_idx = [[
                i for i in range(len(cls_types[k])) if cls_types[k][i] == 'Car'
            ] for k in range(batch_size)]
            gt_boxes3d_selected = [
                gt_boxes3d[k][keep_idx[k]] for k in range(batch_size)
            ]

            # what if no boxes with cars?
            has_info = [k for k in range(batch_size) if len(keep_idx[k]) > 0]
            gt_boxes3d_selected = [gt_boxes3d_selected[x] for x in has_info]
            pred_boxes3d_selected = [
                pred_boxes3d_selected[x] for x in has_info
            ]
            batch_size = len(has_info)
            if batch_size == 0:
                return None

            # Intersect over union
            iou3d = [
                iou3d_utils.boxes_iou3d_gpu(gt_boxes3d_selected[k],
                                            pred_boxes3d_selected[k])
                for k in range(batch_size)
            ]

            # get the max iou for each ground truth bounding box
            gt_max_iou = [
                torch.max(iou3d[k], dim=0)[0] for k in range(batch_size)
            ]

            # get precision at each index (to get auc)
            precision_vals = []
            for k in range(batch_size):
                batch_iou = gt_max_iou[k]
                batch_precision = []
                num_correct = 0
                for i in range(len(batch_iou)):
                    if batch_iou[i] > 0.7:
                        num_correct += 1
                    batch_precision.append(num_correct / (i + 1))

                precision_vals.append(batch_precision)

            aps = []
            for k in range(batch_size):
                batch_prec = precision_vals[k]
                ap = 0
                for i in range(len(batch_prec)):
                    ap += max(batch_prec[i:])

                aps.append(ap)

            num_gt_boxes = sum([len(gt_max_iou[k]) for k in range(batch_size)])

            return sum(aps) / num_gt_boxes
Пример #12
0
    def forward(self, input_data):
        """
        :param input_data: input dict
        :return:
        """
        if cfg.RCNN.ROI_SAMPLE_JIT:
            if self.training:
                with torch.no_grad():
                    target_dict = self.proposal_target_layer(input_data)

                pts_input = torch.cat((target_dict['sampled_pts'], target_dict['pts_feature']), dim=2)
                target_dict['pts_input'] = pts_input
            else:
                rpn_xyz, rpn_features = input_data['rpn_xyz'], input_data['rpn_features']
                batch_rois = input_data['roi_boxes3d']
                if cfg.RCNN.USE_INTENSITY:
                    pts_extra_input_list = [input_data['rpn_intensity'].unsqueeze(dim=2),
                                            input_data['seg_mask'].unsqueeze(dim=2)]
                else:
                    pts_extra_input_list = [input_data['seg_mask'].unsqueeze(dim=2)]

                if cfg.RCNN.USE_DEPTH:
                    pts_depth = input_data['pts_depth'] / 70.0 - 0.5
                    pts_extra_input_list.append(pts_depth.unsqueeze(dim=2))
                pts_extra_input = torch.cat(pts_extra_input_list, dim=2)

                pts_feature = torch.cat((pts_extra_input, rpn_features), dim=2)
                pooled_features, pooled_empty_flag = \
                        roipool3d_utils.roipool3d_gpu(rpn_xyz, pts_feature, batch_rois, cfg.RCNN.POOL_EXTRA_WIDTH,
                                                      sampled_pt_num=cfg.RCNN.NUM_POINTS)

                # canonical transformation
                batch_size = batch_rois.shape[0]
                roi_center = batch_rois[:, :, 0:3]
                pooled_features[:, :, :, 0:3] -= roi_center.unsqueeze(dim=2)
                for k in range(batch_size):
                    pooled_features[k, :, :, 0:3] = kitti_utils.rotate_pc_along_y_torch(pooled_features[k, :, :, 0:3],
                                                                                        batch_rois[k, :, 6])

                pts_input = pooled_features.view(-1, pooled_features.shape[2], pooled_features.shape[3])
        else:
            pts_input = input_data['pts_input']
            target_dict = {}
            target_dict['pts_input'] = input_data['pts_input']
            target_dict['roi_boxes3d'] = input_data['roi_boxes3d']
            if self.training:
                target_dict['cls_label'] = input_data['cls_label']
                target_dict['reg_valid_mask'] = input_data['reg_valid_mask']
                target_dict['gt_of_rois'] = input_data['gt_boxes3d_ct']

        xyz, features = self._break_up_pc(pts_input)
        batch_size = input_data['roi_boxes3d'].size(0)
        if cfg.RCNN.USE_RPN_FEATURES:
            xyz_input = pts_input[..., 0:self.rcnn_input_channel].transpose(1, 2).unsqueeze(dim=3)
            xyz_feature = self.xyz_up_layer(xyz_input)

            rpn_feature = pts_input[..., self.rcnn_input_channel:].transpose(1, 2).unsqueeze(dim=3)

            merged_feature = torch.cat((xyz_feature, rpn_feature), dim=1)
            merged_feature = self.merge_down_layer(merged_feature)
            l_xyz, l_features = [xyz], [merged_feature.squeeze(dim=3)]
        else:
            l_xyz, l_features = [xyz], [features]

        for i in range(len(self.SA_modules)):
            li_xyz, li_features = self.SA_modules[i](l_xyz[i], l_features[i])
            l_xyz.append(li_xyz)
            l_features.append(li_features)

        rcnn_cls = self.cls_layer(l_features[-1]).transpose(1, 2).contiguous().squeeze(dim=1)  # (B, 1 or 2)
        rcnn_reg = self.reg_layer(l_features[-1]).transpose(1, 2).contiguous().squeeze(dim=1)  # (B, C)
        anchor_size = torch.from_numpy(cfg.CLS_MEAN_SIZE[0]).cuda()
        if self.training:
            roi_boxes3d=target_dict['roi_boxes3d'].view(-1,7)
            #roi_boxes3d = input_data['roi_boxes3d']
        else:
            roi_boxes3d=input_data['roi_boxes3d']
        pred_boxes3d_1st = decode_bbox_target(roi_boxes3d.view(-1, 7), rcnn_reg.view(-1, rcnn_reg.shape[-1]),
                                              anchor_size=anchor_size,
                                              loc_scope=cfg.RCNN.LOC_SCOPE,
                                              loc_bin_size=cfg.RCNN.LOC_BIN_SIZE,
                                              num_head_bin=cfg.RCNN.NUM_HEAD_BIN,
                                              get_xz_fine=True, get_y_by_bin=cfg.RCNN.LOC_Y_BY_BIN,
                                              loc_y_scope=cfg.RCNN.LOC_Y_SCOPE, loc_y_bin_size=cfg.RCNN.LOC_Y_BIN_SIZE,
                                              get_ry_fine=True).view(batch_size, -1, 7)
        ret_dict = {'rcnn_cls': rcnn_cls, 'rcnn_reg': rcnn_reg,'pred_boxes3d_1st':pred_boxes3d_1st}
        ret_dict['pooled_feature'] = l_features[-1]
        if cfg.TRAIN.IOU_LAYER == 'split' and self.training:

            gt = target_dict['real_gt']
            iou_label = []
            batch_size_2 = pts_input.shape[0]
            for i in range(batch_size_2):
                iou_label.append(
                    iou3d_utils.boxes_iou3d_gpu(pred_boxes3d_1st.view(-1, 7)[i].view(1, 7), gt[i].view(1, 7)))
            iou_label = torch.cat(iou_label)
            iou_label = (iou_label - 0.5) * 2
            ret_dict['iou_label']=iou_label
        if self.training:
            ret_dict.update(target_dict)
        return ret_dict
Пример #13
0
def eval_one_epoch_joint(model, dataloader, epoch_id, result_dir, logger):
    np.random.seed(666)

    # Loads the mean size of the CLASS from CFG YAML file
    MEAN_SIZE = torch.from_numpy(cfg.CLS_MEAN_SIZE[0]).cuda()

    # Assign the MODE as TEST unless EVAL specified
    mode = 'TEST' if args.test else 'EVAL'

    # Make output directory result_dir/final_result/data
    final_output_dir = os.path.join(result_dir, 'final_result', 'data')
    os.makedirs(final_output_dir, exist_ok=True)

    # Save data if args.save_result is True or not(default now True)
    if args.save_result:
        roi_output_dir = os.path.join(result_dir, 'roi_result', 'data')
        refine_output_dir = os.path.join(result_dir, 'refine_result', 'data')
        rpn_output_dir = os.path.join(result_dir, 'rpn_result', 'data')
        os.makedirs(rpn_output_dir, exist_ok=True)
        os.makedirs(roi_output_dir, exist_ok=True)
        os.makedirs(refine_output_dir, exist_ok=True)

    logger.info('---- EPOCH %s JOINT EVALUATION ----' % epoch_id)
    logger.info('==> Output file: %s' % result_dir)
    model.eval()

    # Threshold for IOU
    thresh_list = [0.1, 0.3, 0.5, 0.7, 0.9]
    total_recalled_bbox_list, total_gt_bbox = [0] * 5, 0
    total_roi_recalled_bbox_list = [0] * 5
    dataset = dataloader.dataset
    lidar_idx_table = dataset.lidar_idx_table
    cnt = final_total = total_cls_acc = total_cls_acc_refined = total_rpn_iou = 0

    progress_bar = tqdm.tqdm(total=len(dataloader), leave=True, desc='eval')

    # Iterate through data in dataloader
    for data in dataloader:
        cnt += 1
        sample_id, pts_rect, pts_features, pts_input = data['sample_id'], data[
            'pts_rect'], data['pts_features'], data['pts_input']
        batch_size = len(sample_id)
        inputs = torch.from_numpy(pts_input).cuda(non_blocking=True).float()
        input_data = {'pts_input': inputs}

        # model inference
        ret_dict = model(input_data)

        roi_scores_raw = ret_dict['roi_scores_raw']  # (B, M)
        roi_boxes3d = ret_dict['rois']  # (B, M, 7)
        seg_result = ret_dict['seg_result'].long()  # (B, N)

        rcnn_cls = ret_dict['rcnn_cls'].view(batch_size, -1,
                                             ret_dict['rcnn_cls'].shape[1])
        rcnn_reg = ret_dict['rcnn_reg'].view(
            batch_size, -1, ret_dict['rcnn_reg'].shape[1])  # (B, M, C)

        # bounding box regression
        anchor_size = MEAN_SIZE
        if cfg.RCNN.SIZE_RES_ON_ROI:
            assert False

        pred_boxes3d = decode_bbox_target(
            roi_boxes3d.view(-1, 7),
            rcnn_reg.view(-1, rcnn_reg.shape[-1]),
            anchor_size=anchor_size,
            loc_scope=cfg.RCNN.LOC_SCOPE,
            loc_bin_size=cfg.RCNN.LOC_BIN_SIZE,
            num_head_bin=cfg.RCNN.NUM_HEAD_BIN,
            get_xz_fine=True,
            get_y_by_bin=cfg.RCNN.LOC_Y_BY_BIN,
            loc_y_scope=cfg.RCNN.LOC_Y_SCOPE,
            loc_y_bin_size=cfg.RCNN.LOC_Y_BIN_SIZE,
            get_ry_fine=True).view(batch_size, -1, 7)

        # scoring
        if rcnn_cls.shape[2] == 1:
            raw_scores = rcnn_cls  # (B, M, 1)
            norm_scores = torch.sigmoid(raw_scores)
            pred_classes = (norm_scores > cfg.RCNN.SCORE_THRESH).long()
        else:
            pred_classes = torch.argmax(rcnn_cls, dim=1).view(-1)
            cls_norm_scores = F.softmax(rcnn_cls, dim=1)
            raw_scores = rcnn_cls[:, pred_classes]
            norm_scores = cls_norm_scores[:, pred_classes]

        # evaluation
        recalled_num = gt_num = rpn_iou = 0
        if not args.test:
            if not cfg.RPN.FIXED:
                rpn_cls_label, rpn_reg_label = data['rpn_cls_label'], data[
                    'rpn_reg_label']
                rpn_cls_label = torch.from_numpy(rpn_cls_label).cuda(
                    non_blocking=True).long()

            gt_boxes3d = data['gt_boxes3d']
            gt_boxes3d = filtrate_gtboxes(gt_boxes3d)

            for k in range(batch_size):
                # calculate recall
                cur_gt_boxes3d = gt_boxes3d[k]
                tmp_idx = cur_gt_boxes3d.__len__() - 1

                while tmp_idx >= 0 and cur_gt_boxes3d[tmp_idx].sum() == 0:
                    tmp_idx -= 1

                if tmp_idx >= 0:
                    cur_gt_boxes3d = cur_gt_boxes3d[:tmp_idx + 1]

                    cur_gt_boxes3d = torch.from_numpy(cur_gt_boxes3d).cuda(
                        non_blocking=True).float()
                    iou3d = iou3d_utils.boxes_iou3d_gpu(
                        pred_boxes3d[k], cur_gt_boxes3d)
                    gt_max_iou, _ = iou3d.max(dim=0)
                    refined_iou, _ = iou3d.max(dim=1)

                    for idx, thresh in enumerate(thresh_list):
                        total_recalled_bbox_list[idx] += (gt_max_iou >
                                                          thresh).sum().item()
                    recalled_num += (gt_max_iou > 0.7).sum().item()
                    gt_num += cur_gt_boxes3d.shape[0]
                    total_gt_bbox += cur_gt_boxes3d.shape[0]

                    # original recall
                    iou3d_in = iou3d_utils.boxes_iou3d_gpu(
                        roi_boxes3d[k], cur_gt_boxes3d)
                    gt_max_iou_in, _ = iou3d_in.max(dim=0)

                    for idx, thresh in enumerate(thresh_list):
                        total_roi_recalled_bbox_list[idx] += (
                            gt_max_iou_in > thresh).sum().item()

                if not cfg.RPN.FIXED:
                    fg_mask = rpn_cls_label > 0
                    correct = ((seg_result == rpn_cls_label)
                               & fg_mask).sum().float()
                    union = fg_mask.sum().float() + (seg_result >
                                                     0).sum().float() - correct
                    rpn_iou = correct / torch.clamp(union, min=1.0)
                    total_rpn_iou += rpn_iou.item()

        disp_dict = {
            'mode': mode,
            'recall': '%d/%d' % (total_recalled_bbox_list[3], total_gt_bbox)
        }
        progress_bar.set_postfix(disp_dict)
        progress_bar.update()

        if args.save_result:
            # save roi and refine results
            roi_boxes3d_np = roi_boxes3d.cpu().numpy()
            pred_boxes3d_np = pred_boxes3d.cpu().numpy()
            roi_scores_raw_np = roi_scores_raw.cpu().numpy()
            raw_scores_np = raw_scores.cpu().numpy()

            rpn_cls_np = ret_dict['rpn_cls'].cpu().numpy()
            rpn_xyz_np = ret_dict['backbone_xyz'].cpu().numpy()
            print(ret_dict['backbone_xyz'].cpu().numpy()[0].shape)
            rpn_xyz_np = np.dot(
                np.linalg.inv(argo_to_kitti),
                ret_dict['backbone_xyz'].cpu().numpy()[0].T).T.reshape(
                    1, -1, 3)
            seg_result_np = seg_result.cpu().numpy()

            output_data = np.concatenate(
                (rpn_xyz_np, rpn_cls_np.reshape(batch_size, -1, 1),
                 seg_result_np.reshape(batch_size, -1, 1)),
                axis=2)

            for k in range(batch_size):
                cur_sample_id = sample_id[k]
                #calib = dataset.get_calib(cur_sample_id)
                #image_shape = dataset.get_image_shape(cur_sample_id)
                save_argo_format(cur_sample_id, roi_boxes3d_np[k],
                                 roi_output_dir, roi_scores_raw_np[k],
                                 lidar_idx_table)
                save_argo_format(cur_sample_id, pred_boxes3d_np[k],
                                 refine_output_dir, raw_scores_np[k],
                                 lidar_idx_table)
                output_file = os.path.join(
                    rpn_output_dir,
                    lidar_idx_table['%06d' % cur_sample_id] + '.npy')
                np.save(output_file, output_data.astype(np.float32))

        # scores thresh
        inds = norm_scores > cfg.RCNN.SCORE_THRESH

        for k in range(batch_size):
            cur_inds = inds[k].view(-1)
            if cur_inds.sum() == 0:
                continue

            pred_boxes3d_selected = pred_boxes3d[k, cur_inds]
            raw_scores_selected = raw_scores[k, cur_inds]
            norm_scores_selected = norm_scores[k, cur_inds]

            # NMS thresh
            # rotated nms
            boxes_bev_selected = kitti_utils.boxes3d_to_bev_torch(
                pred_boxes3d_selected)
            keep_idx = iou3d_utils.nms_gpu(boxes_bev_selected,
                                           raw_scores_selected,
                                           cfg.RCNN.NMS_THRESH).view(-1)
            pred_boxes3d_selected = pred_boxes3d_selected[keep_idx]
            scores_selected = raw_scores_selected[keep_idx]
            pred_boxes3d_selected, scores_selected = pred_boxes3d_selected.cpu(
            ).numpy(), scores_selected.cpu().numpy()

            cur_sample_id = sample_id[k]
            #calib = dataset.get_calib(cur_sample_id)
            final_total += pred_boxes3d_selected.shape[0]
            #image_shape = dataset.get_image_shape(cur_sample_id)
            save_argo_format(cur_sample_id, pred_boxes3d_selected,
                             final_output_dir, scores_selected,
                             lidar_idx_table)

    progress_bar.close()

    ret_dict = {}
    logger.info(
        '-------------------performance of epoch %s---------------------' %
        epoch_id)
    logger.info(str(datetime.now()))

    avg_rpn_iou = (total_rpn_iou / max(cnt, 1.0))
    avg_cls_acc = (total_cls_acc / max(cnt, 1.0))
    avg_cls_acc_refined = (total_cls_acc_refined / max(cnt, 1.0))
    avg_det_num = (final_total / max(len(dataset), 1.0))
    logger.info('final average detections: %.3f' % avg_det_num)
    logger.info('final average rpn_iou refined: %.3f' % avg_rpn_iou)
    logger.info('final average cls acc: %.3f' % avg_cls_acc)
    logger.info('final average cls acc refined: %.3f' % avg_cls_acc_refined)
    ret_dict['rpn_iou'] = avg_rpn_iou
    ret_dict['rcnn_cls_acc'] = avg_cls_acc
    ret_dict['rcnn_cls_acc_refined'] = avg_cls_acc_refined
    ret_dict['rcnn_avg_num'] = avg_det_num

    for idx, thresh in enumerate(thresh_list):
        cur_roi_recall = total_roi_recalled_bbox_list[idx] / max(
            total_gt_bbox, 1.0)
        logger.info('total roi bbox recall(thresh=%.3f): %d / %d = %f' %
                    (thresh, total_roi_recalled_bbox_list[idx], total_gt_bbox,
                     cur_roi_recall))
        ret_dict['rpn_recall(thresh=%.2f)' % thresh] = cur_roi_recall

    for idx, thresh in enumerate(thresh_list):
        cur_recall = total_recalled_bbox_list[idx] / max(total_gt_bbox, 1.0)
        logger.info(
            'total bbox recall(thresh=%.3f): %d / %d = %f' %
            (thresh, total_recalled_bbox_list[idx], total_gt_bbox, cur_recall))
        ret_dict['rcnn_recall(thresh=%.2f)' % thresh] = cur_recall

    logger.info('result is saved to: %s' % result_dir)
    return ret_dict