def predict(self, example, output, pc_range): t = time.time() dets = ddd_decode(output['hm'], output['rot'], output['dim'], pc_range, example["ground"], reg=output['reg'], K=self.K) batch_size = example['rect'].shape[0] self._total_inference_count += batch_size batch_rect = example["rect"] batch_Trv2c = example["Trv2c"] batch_P2 = example["P2"] batch_imgidx = example['image_idx'] predictions_dicts = [] for det, rect, Trv2c, P2, img_idx in zip(dets, batch_rect, batch_Trv2c, batch_P2, batch_imgidx): final_alpha = get_alpha(det[:, 6:14]) final_rot_y = final_alpha + torch.atan2(-det[:, 1], det[:, 0]) final_box_preds = torch.cat( [det[:, :6], final_rot_y.unsqueeze(1)], dim=-1) final_scores = det[:, -2] final_labels = det[:, -1] final_box_preds_camera = box_torch_ops.box_lidar_to_camera( final_box_preds, rect, Trv2c) locs = final_box_preds_camera[:, :3] dims = final_box_preds_camera[:, 3:6] # change angles angles = final_box_preds_camera[:, 6] camera_box_origin = [0.5, 1.0, 0.5] box_corners = box_torch_ops.center_to_corner_box3d( locs, dims, angles, camera_box_origin, axis=1) box_corners_in_image = box_torch_ops.project_to_image( box_corners, P2) # box_corners_in_image: [N, 8, 2] minxy = torch.min(box_corners_in_image, dim=1)[0] maxxy = torch.max(box_corners_in_image, dim=1)[0] box_2d_preds = torch.cat([minxy, maxxy], dim=1) # predictions predictions_dict = { "bbox": box_2d_preds, "box3d_camera": final_box_preds_camera, "box3d_lidar": final_box_preds, "scores": final_scores, "label_preds": final_labels, "image_idx": img_idx, } predictions_dicts.append(predictions_dict) self._total_postprocess_time += time.time() - t return predictions_dicts
def predict(self, example, preds_dict): t = time.time() batch_size = example['anchors'].shape[0] batch_anchors = example["anchors"].view(batch_size, -1, 7) self._total_inference_count += batch_size batch_rect = example["rect"] batch_Trv2c = example["Trv2c"] batch_P2 = example["P2"] if "anchors_mask" not in example: batch_anchors_mask = [None] * batch_size else: batch_anchors_mask = example["anchors_mask"].view(batch_size, -1) batch_imgidx = example['image_idx'] self._total_forward_time += time.time() - t t = time.time() batch_box_preds = preds_dict["box_preds"] batch_cls_preds = preds_dict["cls_preds"] batch_box_preds = batch_box_preds.view(batch_size, -1, self._box_coder.code_size) num_class_with_bg = self._num_class if not self._encode_background_as_zeros: num_class_with_bg = self._num_class + 1 batch_cls_preds = batch_cls_preds.view(batch_size, -1, num_class_with_bg) batch_box_preds = self._box_coder.decode_torch(batch_box_preds, batch_anchors) if self._use_direction_classifier: batch_dir_preds = preds_dict["dir_cls_preds"] batch_dir_preds = batch_dir_preds.view(batch_size, -1, 2) else: batch_dir_preds = [None] * batch_size predictions_dicts = [] for box_preds, cls_preds, dir_preds, rect, Trv2c, P2, img_idx, a_mask in zip( batch_box_preds, batch_cls_preds, batch_dir_preds, batch_rect, batch_Trv2c, batch_P2, batch_imgidx, batch_anchors_mask): if a_mask is not None: box_preds = box_preds[a_mask] cls_preds = cls_preds[a_mask] if self._use_direction_classifier: if a_mask is not None: dir_preds = dir_preds[a_mask] # print(dir_preds.shape) dir_labels = torch.max(dir_preds, dim=-1)[1] if self._encode_background_as_zeros: # this don't support softmax assert self._use_sigmoid_score is True total_scores = torch.sigmoid(cls_preds) else: # encode background as first element in one-hot vector if self._use_sigmoid_score: total_scores = torch.sigmoid(cls_preds)[..., 1:] else: total_scores = F.softmax(cls_preds, dim=-1)[..., 1:] # Apply NMS in birdeye view if self._use_rotate_nms: nms_func = box_torch_ops.rotate_nms else: nms_func = box_torch_ops.nms selected_boxes = None selected_labels = None selected_scores = None selected_dir_labels = None if self._multiclass_nms: # curently only support class-agnostic boxes. boxes_for_nms = box_preds[:, [0, 1, 3, 4, 6]] if not self._use_rotate_nms: box_preds_corners = box_torch_ops.center_to_corner_box2d( boxes_for_nms[:, :2], boxes_for_nms[:, 2:4], boxes_for_nms[:, 4]) boxes_for_nms = box_torch_ops.corner_to_standup_nd( box_preds_corners) boxes_for_mcnms = boxes_for_nms.unsqueeze(1) selected_per_class = box_torch_ops.multiclass_nms( nms_func=nms_func, boxes=boxes_for_mcnms, scores=total_scores, num_class=self._num_class, pre_max_size=self._nms_pre_max_size, post_max_size=self._nms_post_max_size, iou_threshold=self._nms_iou_threshold, score_thresh=self._nms_score_threshold, ) selected_boxes, selected_labels, selected_scores = [], [], [] selected_dir_labels = [] for i, selected in enumerate(selected_per_class): if selected is not None: num_dets = selected.shape[0] selected_boxes.append(box_preds[selected]) selected_labels.append( torch.full([num_dets], i, dtype=torch.int64)) if self._use_direction_classifier: selected_dir_labels.append(dir_labels[selected]) selected_scores.append(total_scores[selected, i]) if len(selected_boxes) > 0: selected_boxes = torch.cat(selected_boxes, dim=0) selected_labels = torch.cat(selected_labels, dim=0) selected_scores = torch.cat(selected_scores, dim=0) if self._use_direction_classifier: selected_dir_labels = torch.cat(selected_dir_labels, dim=0) else: selected_boxes = None selected_labels = None selected_scores = None selected_dir_labels = None else: # get highest score per prediction, than apply nms # to remove overlapped box. if num_class_with_bg == 1: top_scores = total_scores.squeeze(-1) top_labels = torch.zeros(total_scores.shape[0], device=total_scores.device, dtype=torch.long) else: top_scores, top_labels = torch.max(total_scores, dim=-1) if self._nms_score_threshold > 0.0: thresh = torch.tensor( [self._nms_score_threshold], device=total_scores.device).type_as(total_scores) top_scores_keep = (top_scores >= thresh) top_scores = top_scores.masked_select(top_scores_keep) if top_scores.shape[0] != 0: if self._nms_score_threshold > 0.0: box_preds = box_preds[top_scores_keep] if self._use_direction_classifier: dir_labels = dir_labels[top_scores_keep] top_labels = top_labels[top_scores_keep] boxes_for_nms = box_preds[:, [0, 1, 3, 4, 6]] if not self._use_rotate_nms: box_preds_corners = box_torch_ops.center_to_corner_box2d( boxes_for_nms[:, :2], boxes_for_nms[:, 2:4], boxes_for_nms[:, 4]) boxes_for_nms = box_torch_ops.corner_to_standup_nd( box_preds_corners) # the nms in 3d detection just remove overlap boxes. selected = nms_func( boxes_for_nms, top_scores, pre_max_size=self._nms_pre_max_size, post_max_size=self._nms_post_max_size, iou_threshold=self._nms_iou_threshold, ) else: selected = None if selected is not None: selected_boxes = box_preds[selected] if self._use_direction_classifier: selected_dir_labels = dir_labels[selected] selected_labels = top_labels[selected] selected_scores = top_scores[selected] # finally generate predictions. if selected_boxes is not None: box_preds = selected_boxes scores = selected_scores label_preds = selected_labels if self._use_direction_classifier: dir_labels = selected_dir_labels opp_labels = (box_preds[..., -1] > 0) ^ dir_labels.byte() box_preds[..., -1] += torch.where( opp_labels, torch.tensor(np.pi).type_as(box_preds), torch.tensor(0.0).type_as(box_preds)) # box_preds[..., -1] += ( # ~(dir_labels.byte())).type_as(box_preds) * np.pi final_box_preds = box_preds final_scores = scores final_labels = label_preds final_box_preds_camera = box_torch_ops.box_lidar_to_camera( final_box_preds, rect, Trv2c) locs = final_box_preds_camera[:, :3] dims = final_box_preds_camera[:, 3:6] angles = final_box_preds_camera[:, 6] camera_box_origin = [0.5, 1.0, 0.5] box_corners = box_torch_ops.center_to_corner_box3d( locs, dims, angles, camera_box_origin, axis=1) box_corners_in_image = box_torch_ops.project_to_image( box_corners, P2) # box_corners_in_image: [N, 8, 2] minxy = torch.min(box_corners_in_image, dim=1)[0] maxxy = torch.max(box_corners_in_image, dim=1)[0] # minx = torch.min(box_corners_in_image[..., 0], dim=1)[0] # maxx = torch.max(box_corners_in_image[..., 0], dim=1)[0] # miny = torch.min(box_corners_in_image[..., 1], dim=1)[0] # maxy = torch.max(box_corners_in_image[..., 1], dim=1)[0] # box_2d_preds = torch.stack([minx, miny, maxx, maxy], dim=1) box_2d_preds = torch.cat([minxy, maxxy], dim=1) # predictions predictions_dict = { "bbox": box_2d_preds, "box3d_camera": final_box_preds_camera, "box3d_lidar": final_box_preds, "scores": final_scores, "label_preds": label_preds, "image_idx": img_idx, } else: predictions_dict = { "bbox": None, "box3d_camera": None, "box3d_lidar": None, "scores": None, "label_preds": None, "image_idx": img_idx, } predictions_dicts.append(predictions_dict) self._total_postprocess_time += time.time() - t return predictions_dicts
def get_projected_idx(input_size, calib, img_shape, z_sel, rot_noise, scal_noise, grid_size=4., right=False): '''Compute anchor boxes for each feature map. Args: input_size: (tensor) model input size of (w,h). Returns: boxes: (list) anchor boxes for each feature map. Each of size [#anchors,4], where #anchors = fmw * fmh * #anchors_per_cell ''' ## for FPN50 ## # fm_sizes = [(input_size/pow(2.,i+3)).ceil() for i in range(self.num_fms)] # grid_size = [8., 16., 32., 64., 128.] ## for PIXOR ## fm_size = input_size fm_w, fm_h = int(fm_size[0] / grid_size), int(fm_size[1] / grid_size) xy2 = meshgrid(fm_w, fm_h).to(torch.float64) + 0.5 xy = (xy2 * grid_size).view(fm_w, fm_h, 1, 2).expand(fm_w, fm_h, 1, 2) xy = xy.to(torch.float32) z = torch.Tensor([z_sel]).view(1, 1, 1, 1).expand(fm_w, fm_h, 1, 1) z = z.to(torch.float32) box = torch.cat([xy, z], 3) anchor_boxes = box.view(-1, 3) # Calculate Anchor Center anchor_center = torch.zeros(anchor_boxes.shape[0], 3, dtype=torch.float64) # anchor_center[:, 0] = 70.4 - (anchor_boxes[:, 0] / 10) ## x anchor_center[:, 0] = anchor_boxes[:, 0] / 10 anchor_center[:, 1] = (anchor_boxes[:, 1] / 10) - 40. ##y anchor_center[:, 2] = anchor_boxes[:, 2] / 10 # Convert to velodyne coordinates # anchor_center[:, 1] = -1 * anchor_center[:, 0] # Adjust center_z to center from bottom anchor_center[:, 2] += (1.52) / 2 # Apply inverse augmentation # import pdb; pdb.set_trace() anchor_center_np = anchor_center.numpy() anchor_center_np = box_np_ops.rotation_points_single_angle( anchor_center_np, -rot_noise, axis=2) anchor_center_np *= 1. / scal_noise # anchor_center_np = box_np_ops.rotation_points_single_angle(anchor_center_np, 1/scal_noise, axis=2) # import pdb; pdb.set_trace() anchor_center = torch.tensor(anchor_center_np, dtype=torch.float64) # # Get GT height # mask = ((max_ious>0.5)[0::2, ...].nonzero()*2).squeeze() # anchor_center[mask, 2] = -1 * boxes_[max_ids[mask], 2].to(torch.float64) # anchor_center[mask, 2] += (boxes_[max_ids[mask], 5].to(torch.float64)) / 2 # anchor_center = anchor_center[0::2, ...] # Project to image space # pts_2d, pts_2d_norm = anchor_projector.point_to_image(anchor_center, data_dir) r_rect = torch.tensor(calib['rect'], dtype=torch.float32, device=torch.device("cpu")).to(torch.float64) if right: P2 = torch.tensor(calib['P3'], dtype=torch.float32, device=torch.device("cpu")).to(torch.float64) else: P2 = torch.tensor(calib['P2'], dtype=torch.float32, device=torch.device("cpu")).to(torch.float64) velo2cam = torch.tensor(calib['Trv2c'], dtype=torch.float32, device=torch.device("cpu")).to(torch.float64) # anchor_center = anchor_center[:,[1,0,2]] anchor_center2 = box_torch_ops.lidar_to_camera(anchor_center, r_rect, velo2cam) idxs = box_torch_ops.project_to_image(anchor_center2, P2) # image_h = img_shape[2] ## # image_w = img_shape[1] # img_shape_torch = torch.tensor([2496, 768]).to(torch.float64).view(1,2) img_shape_torch = torch.tensor([1248, 384]).to(torch.float64).view(1, 2) idxs_norm = idxs / img_shape_torch # import pdb; pdb.set_trace() # idx = idxs_norm # # Filtering idx # mask = torch.mul(idx > 0, idx < 1).sum(dim=1) == 2 # mask = mask.view(-1,1) # import pdb; pdb.set_trace() return idxs, idxs_norm
def train_stage_2(self, example, preds_dict, top_predictions_left, top_predictions_right): t = time.time() batch_size = example['anchors'].shape[0] batch_anchors = example["anchors"].view(batch_size, -1, 7) batch_anchors_reshape = batch_anchors.reshape(1, 200, 176, 14) ## 预先设定的锚框? batch_rect = example["rect"] ## batch_Trv2c = example["Trv2c"] batch_P2 = example["P2"] batch_P3 = example["P3"] batch_image_shape = example["image_shape"] if "anchors_mask" not in example: batch_anchors_mask = [None] * batch_size else: batch_anchors_mask = example["anchors_mask"].view(batch_size, -1) batch_imgidx = example['image_idx'] t = time.time() batch_box_preds = preds_dict["box_preds"] ## 预测的3d box batch_cls_preds = preds_dict["cls_preds"] ## 预测的class batch_box_preds = batch_box_preds.view(batch_size, -1, self._box_coder.code_size) num_class_with_bg = self._num_class if not self._encode_background_as_zeros: num_class_with_bg = self._num_class + 1 batch_cls_preds = batch_cls_preds.view(batch_size, -1, num_class_with_bg) ## batch_box_preds = self._box_coder.decode_torch(batch_box_preds, batch_anchors) if self._use_direction_classifier: batch_dir_preds = preds_dict["dir_cls_preds"] batch_dir_preds = batch_dir_preds.view(batch_size, -1, 2) else: batch_dir_preds = [None] * batch_size predictions_dicts = [] for box_preds, cls_preds, dir_preds, rect, Trv2c, P2, P3, img_idx, a_mask in zip( batch_box_preds, batch_cls_preds, batch_dir_preds, batch_rect, batch_Trv2c, batch_P2, batch_P3, batch_imgidx, batch_anchors_mask): if a_mask is not None: box_preds = box_preds[a_mask] cls_preds = cls_preds[a_mask] box_preds = box_preds.float() cls_preds = cls_preds.float() rect = rect.float() Trv2c = Trv2c.float() P2 = P2.float() P3 = P3.float() if self._encode_background_as_zeros: # this don't support softmax assert self._use_sigmoid_score is True total_scores = torch.sigmoid(cls_preds) #total_scores = cls_preds # use this if you want to fuse raw log score else: # encode background as first element in one-hot vector if self._use_sigmoid_score: total_scores = torch.sigmoid(cls_preds)[..., 1:] else: total_scores = F.softmax(cls_preds, dim=-1)[..., 1:] # finally generate predictions. final_box_preds = box_preds final_scores = total_scores final_box_preds_camera = box_torch_ops.box_lidar_to_camera( final_box_preds, rect, Trv2c) ## 将3d box转换到图像坐标系 locs = final_box_preds_camera[:, :3] dims = final_box_preds_camera[:, 3:6] angles = final_box_preds_camera[:, 6] camera_box_origin = [0.5, 1.0, 0.5] box_corners = box_torch_ops.center_to_corner_box3d( locs, dims, angles, camera_box_origin, axis=1) ## box_corners_in_image_left = box_torch_ops.project_to_image( box_corners, P2) ## 将8个顶点投影到图像 box_corners_in_image_right = box_torch_ops.project_to_image( box_corners, P3) # box_corners_in_image: [N, 8, 2] minxy = torch.min(box_corners_in_image_left, dim=1)[0] maxxy = torch.max(box_corners_in_image_left, dim=1)[0] img_height = batch_image_shape[0, 0] img_width = batch_image_shape[0, 1] minxy[:, 0] = torch.clamp(minxy[:, 0], min=0, max=img_width) minxy[:, 1] = torch.clamp(minxy[:, 1], min=0, max=img_height) maxxy[:, 0] = torch.clamp(maxxy[:, 0], min=0, max=img_width) maxxy[:, 1] = torch.clamp(maxxy[:, 1], min=0, max=img_height) box_2d_preds_left = torch.cat([minxy, maxxy], dim=1) minxy = torch.min(box_corners_in_image_right, dim=1)[0] maxxy = torch.max(box_corners_in_image_right, dim=1)[0] minxy[:, 0] = torch.clamp(minxy[:, 0], min=0, max=img_width) minxy[:, 1] = torch.clamp(minxy[:, 1], min=0, max=img_height) maxxy[:, 0] = torch.clamp(maxxy[:, 0], min=0, max=img_width) maxxy[:, 1] = torch.clamp(maxxy[:, 1], min=0, max=img_height) box_2d_preds_right = torch.cat([minxy, maxxy], dim=1) # predictions predictions_dict = { "bbox": box_2d_preds_left, "box3d_camera": final_box_preds_camera, "box3d_lidar": final_box_preds, "scores": final_scores, #"label_preds": label_preds, "image_idx": img_idx, } predictions_dicts.append(predictions_dict) dis_to_lidar = torch.norm( box_preds[:, :2], p=2, dim=1, keepdim=True) / 82.0 ## 到雷达的距离 box_2d_detector_left = np.zeros((200, 4)) box_2d_detector_right = np.zeros((200, 4)) # if (top_predictions_left.shape[0] > 20): box_2d_detector_left = top_predictions_left[:20, :4] else: box_2d_detector_left = top_predictions_left[:, :4] if (top_predictions_right.shape[0] > 20): box_2d_detector_right = top_predictions_right[:20, :4] else: box_2d_detector_right = top_predictions_right[:, :4] # box_2d_detector[0:top_predictions.shape[0],:]=top_predictions[:,:4] ## 200个2d box # box_2d_detector = top_predictions[:,:4] # import ipdb;ipdb.set_trace() box_2d_scores_left = top_predictions_left[:, 4].reshape(-1, 1) box_2d_scores_right = top_predictions_right[:, 4].reshape(-1, 1) time_iou_build_start = time.time() overlaps = np.zeros((900000, 6), dtype=box_2d_preds_left.detach().cpu().numpy(). dtype) ## 9x1e5个可能组合 tensor_index1 = np.zeros( (900000, 2), dtype=box_2d_preds_left.detach().cpu().numpy().dtype) overlaps[:, :] = -1 tensor_index1[:, :] = -1 #final_scores[final_scores<0.1] = 0 #box_2d_preds[(final_scores<0.1).reshape(-1),:] = 0 iou_test, tensor_index, max_num = se.build_stage2_training( box_2d_preds_left.detach().cpu().numpy(), box_2d_preds_right.detach().cpu().numpy(), box_2d_detector_left, box_2d_detector_right, -1, final_scores.detach().cpu().numpy(), box_2d_scores_left, box_2d_scores_right, dis_to_lidar.detach().cpu().numpy(), overlaps, tensor_index1) time_iou_build_end = time.time() iou_test_tensor = torch.FloatTensor( iou_test) #iou_test_tensor shape: [160000,4] tensor_index_tensor = torch.LongTensor(tensor_index) iou_test_tensor = iou_test_tensor.permute(1, 0) iou_test_tensor = iou_test_tensor.reshape(1, 6, 1, 900000) tensor_index_tensor = tensor_index_tensor.reshape(-1, 2) if max_num == 0: non_empty_iou_test_tensor = torch.zeros(1, 6, 1, 2) non_empty_iou_test_tensor[:, :, :, :] = -1 non_empty_tensor_index_tensor = torch.zeros(2, 2) non_empty_tensor_index_tensor[:, :] = -1 else: non_empty_iou_test_tensor = iou_test_tensor[:, :, :, :max_num] non_empty_tensor_index_tensor = tensor_index_tensor[: max_num, :] ## return predictions_dicts, non_empty_iou_test_tensor, non_empty_tensor_index_tensor
def rpn_nms(box_preds, cls_preds, example, box_coder, nms_score_threshold, nms_pre_max_size, nms_post_max_size, nms_iou_threshold, training, range_thresh=0): anchors = example["anchors"] batch_size = anchors.shape[0] batch_anchors = anchors.view(batch_size, -1, 7) batch_rect = example["calib"]["rect"] batch_Trv2c = example["calib"]["Trv2c"] batch_P2 = example["calib"]["P2"] if training: batch_labels = example["labels"] batch_reg_targets = example["reg_targets"] batch_dir_targets = get_direction_target( batch_anchors, batch_reg_targets, dir_offset=0.0, num_bins=2) else: batch_labels = [None] * batch_size batch_reg_targets = [None] * batch_size batch_dir_targets = [None] * batch_size if "anchors_mask" not in example: batch_anchors_mask = [None] * batch_size else: anchors_mask = example["anchors_mask"] batch_anchors_mask = anchors_mask.view(batch_size, -1) batch_box_props = box_preds.view(batch_size, -1, box_coder.code_size) batch_box_props = box_coder.decode_torch(batch_box_props, batch_anchors) batch_cls_props = cls_preds.view(batch_size, -1, 1) batch_far_proposals_bev = [] batch_far_proposals_img = [] batch_near_proposals_bev = [] batch_near_proposals_img = [] batch_rcnn_labels = [] batch_rcnn_reg_target = [] batch_rcnn_dir_target = [] batch_rcnn_anchors = [] for box_props, cls_props, labels, reg_target, dir_targets, rect, Trv2c, P2, a_mask, anchors in zip( batch_box_props, batch_cls_props, batch_labels, batch_reg_targets, batch_dir_targets, batch_rect, batch_Trv2c, batch_P2, batch_anchors_mask, batch_anchors): if a_mask is not None: box_props = box_props[a_mask] cls_props = cls_props[a_mask] anchors = anchors[a_mask] if training: labels = labels[a_mask] reg_target = reg_target[a_mask] dir_targets = dir_targets[a_mask] cls_scores = torch.sigmoid(cls_props)[..., 1:] top_scores = cls_props.squeeze(-1) nms_func = box_torch_ops.nms if nms_score_threshold > 0.0: thresh = torch.Tensor([nms_score_threshold], device=cls_scores.cpu().device).type_as(cls_scores) top_scores_keep = (top_scores >= thresh) top_scores = top_scores.masked_select(top_scores_keep) if top_scores.shape[0] != 0: # score threshold if nms_score_threshold > 0.0: box_props = box_props[top_scores_keep] anchors = anchors[top_scores_keep] if training: labels = labels[top_scores_keep] reg_target = reg_target[top_scores_keep] dir_targets = dir_targets[top_scores_keep] # range range_thresh = torch.Tensor([range_thresh], device=box_props.cpu().device).type_as(box_props) # todo: uncertain, which is range far_boxes_idx = (box_props[:, 0] >= range_thresh) far_box_props = box_props[far_boxes_idx] far_top_socres = top_scores[far_boxes_idx] far_anchors = anchors[far_boxes_idx] if training: far_labels = labels[far_boxes_idx] far_reg_target = reg_target[far_boxes_idx] far_dir_target = dir_targets[far_boxes_idx] if far_box_props.shape[0] != 0: far_boxes_for_nms = far_box_props[:, [0, 1, 3, 4, 6]] far_box_props_corners = box_torch_ops.center_to_corner_box2d( far_boxes_for_nms[:, :2], far_boxes_for_nms[:, 2:4], far_boxes_for_nms[:, 4]) far_boxes_for_nms = box_torch_ops.corner_to_standup_nd( far_box_props_corners) far_selected = nms_func( far_boxes_for_nms, far_top_socres, pre_max_size=nms_pre_max_size // 2, post_max_size=nms_post_max_size // 2, iou_threshold=nms_iou_threshold) else: far_selected = None if range_thresh > 0: near_boxes_idx = (box_props[:, 0] < range_thresh) near_box_props = box_props[near_boxes_idx] near_anchors = anchors[near_boxes_idx] near_top_socres = top_scores[near_boxes_idx] if training: near_labels = labels[near_boxes_idx] near_reg_target = reg_target[near_boxes_idx] near_dir_target = dir_targets[near_boxes_idx] if near_box_props.shape[0] != 0: near_boxes_for_nms = near_box_props[:, [0, 1, 3, 4, 6]] near_box_props_corners = box_torch_ops.center_to_corner_box2d( near_boxes_for_nms[:, :2], near_boxes_for_nms[:, 2:4], near_boxes_for_nms[:, 4]) near_boxes_for_nms = box_torch_ops.corner_to_standup_nd( near_box_props_corners) near_selected = nms_func( near_boxes_for_nms, near_top_socres, pre_max_size=nms_pre_max_size, post_max_size=nms_post_max_size, iou_threshold=nms_iou_threshold) else: near_selected = None else: near_selected = None else: far_selected = None near_selected = None if far_selected is not None: far_proposals_3d = far_box_props[far_selected] num_far_selected = far_proposals_3d.shape[0] far_proposals_3d_fix = torch.zeros((nms_post_max_size // 2, 7)).cuda() far_anchors_fix = torch.zeros((nms_post_max_size // 2, 7)).cuda() far_proposals_3d_fix[:num_far_selected, :] = far_proposals_3d far_anchors_fix[:num_far_selected, :] = far_anchors[far_selected] far_anchors_fix = far_anchors_fix.unsqueeze(0) if training: far_labels_fix = torch.zeros((nms_post_max_size // 2)).cuda() far_reg_target_fix = torch.zeros((nms_post_max_size // 2, 7)).cuda() far_dir_target_fix = torch.zeros((nms_post_max_size // 2, 2)).cuda() far_labels_fix[:num_far_selected] = far_labels[far_selected] far_reg_target_fix[:num_far_selected, :] = far_reg_target[far_selected] far_dir_target_fix[:num_far_selected, :] = far_dir_target[far_selected] far_labels_fix = far_labels_fix.unsqueeze(0) far_reg_target_fix = far_reg_target_fix.unsqueeze(0) far_dir_target_fix = far_dir_target_fix.unsqueeze(0) far_proposals_bev_fix = far_proposals_3d_fix[:, [0, 1, 3, 4, 6]].unsqueeze(0) far_proposals_cam_fix = box_torch_ops.box_lidar_to_camera(far_proposals_3d_fix, rect, Trv2c) far_locs_cam = far_proposals_cam_fix[:, :3] far_dims_cam = far_proposals_cam_fix[:, 3:6] far_angles_cam = far_proposals_cam_fix[:, 6] camera_box_origin = [0.5, 1.0, 0.5] far_proposals_cam_corners = box_torch_ops.center_to_corner_box3d( far_locs_cam, far_dims_cam, far_angles_cam, camera_box_origin, axis=1) far_proposals_img_corners = box_torch_ops.project_to_image( far_proposals_cam_corners, P2) minxy = torch.min(far_proposals_img_corners, dim=1)[0] maxxy = torch.max(far_proposals_img_corners, dim=1)[0] far_proposals_img_fix = torch.cat([minxy, maxxy], dim=1).unsqueeze(0) else: far_proposals_bev_fix = torch.zeros((nms_post_max_size // 2, 5)).cuda().unsqueeze(0) far_proposals_img_fix = torch.zeros((nms_post_max_size // 2, 4)).cuda().unsqueeze(0) far_labels_fix = torch.zeros((nms_post_max_size // 2)).cuda().unsqueeze(0) far_reg_target_fix = torch.zeros((nms_post_max_size // 2, 7)).cuda().unsqueeze(0) far_dir_target_fix = torch.zeros((nms_post_max_size // 2, 2)).cuda().unsqueeze(0) far_anchors_fix = torch.zeros((nms_post_max_size // 2, 7)).cuda().unsqueeze(0) if near_selected is not None: near_proposals_3d = near_box_props[near_selected] num_near_selected = near_proposals_3d.shape[0] near_proposals_3d_fix = torch.zeros((nms_post_max_size, 7)).cuda() near_anchors_fix = torch.zeros((nms_post_max_size, 7)).cuda() near_proposals_3d_fix[:num_near_selected, :] = near_proposals_3d near_anchors_fix[:num_near_selected, :] = near_anchors[near_selected] near_anchors_fix = near_anchors_fix.unsqueeze(0) if training: near_labels_fix = torch.zeros((nms_post_max_size,)).cuda() near_reg_target_fix = torch.zeros((nms_post_max_size, 7)).cuda() near_dir_target_fix = torch.zeros((nms_post_max_size, 2)).cuda() near_labels_fix[:num_near_selected] = near_labels[near_selected] near_reg_target_fix[:num_near_selected, :] = near_reg_target[near_selected] near_dir_target_fix[:num_near_selected, :] = near_dir_target[near_selected] near_labels_fix = near_labels_fix.unsqueeze(0) near_reg_target_fix = near_reg_target_fix.unsqueeze(0) near_dir_target_fix = near_dir_target_fix.unsqueeze(0) near_proposals_bev_fix = near_proposals_3d_fix[:, [0, 1, 3, 4, 6]].unsqueeze(0) near_proposals_cam_fix = box_torch_ops.box_lidar_to_camera(near_proposals_3d_fix, rect, Trv2c) near_locs_cam = near_proposals_cam_fix[:, :3] near_dims_cam = near_proposals_cam_fix[:, 3:6] near_angles_cam = near_proposals_cam_fix[:, 6] camera_box_origin = [0.5, 1.0, 0.5] near_proposals_cam_corners = box_torch_ops.center_to_corner_box3d( near_locs_cam, near_dims_cam, near_angles_cam, camera_box_origin, axis=1) near_proposals_img_corners = box_torch_ops.project_to_image( near_proposals_cam_corners, P2) near_minxy = torch.min(near_proposals_img_corners, dim=1)[0] near_maxxy = torch.max(near_proposals_img_corners, dim=1)[0] near_proposals_img_fix = torch.cat([near_minxy, near_maxxy], dim=1).unsqueeze(0) else: near_proposals_bev_fix = torch.zeros((nms_post_max_size, 5)).cuda().unsqueeze(0) near_proposals_img_fix = torch.zeros((nms_post_max_size, 4)).cuda().unsqueeze(0) near_labels_fix = torch.zeros((nms_post_max_size)).cuda().unsqueeze(0) near_reg_target_fix = torch.zeros((nms_post_max_size, 7)).cuda().unsqueeze(0) near_dir_target_fix = torch.zeros((nms_post_max_size, 2)).cuda().unsqueeze(0) near_anchors_fix = torch.zeros((nms_post_max_size, 7)).cuda().unsqueeze(0) if training: rcnn_labels_fix = torch.cat([near_labels_fix, far_labels_fix], dim=1) rcnn_reg_target_fix = torch.cat([near_reg_target_fix, far_reg_target_fix], dim=1) rcnn_dir_target_fix = torch.cat([near_dir_target_fix, far_dir_target_fix], dim=1) else: rcnn_labels_fix = None rcnn_reg_target_fix = None rcnn_dir_target_fix = None if near_anchors_fix is not None: rcnn_anchors_fix = torch.cat([near_anchors_fix, far_anchors_fix], dim=1) batch_far_proposals_bev.append(far_proposals_bev_fix) batch_far_proposals_img.append(far_proposals_img_fix) batch_near_proposals_bev.append(near_proposals_bev_fix) batch_near_proposals_img.append(near_proposals_img_fix) batch_rcnn_labels.append(rcnn_labels_fix) batch_rcnn_reg_target.append(rcnn_reg_target_fix) batch_rcnn_dir_target.append(rcnn_dir_target_fix) batch_rcnn_anchors.append(rcnn_anchors_fix) batch_far_proposals_bev = torch.cat(batch_far_proposals_bev, dim=0) batch_far_proposals_img = torch.cat(batch_far_proposals_img, dim=0) if batch_near_proposals_bev[0] is not None: batch_near_proposals_bev = torch.cat(batch_near_proposals_bev, dim=0) batch_near_proposals_img = torch.cat(batch_near_proposals_img, dim=0) if training: batch_rcnn_labels = torch.cat(batch_rcnn_labels, dim=0) batch_rcnn_reg_target = torch.cat(batch_rcnn_reg_target, dim=0) batch_rcnn_dir_target = torch.cat(batch_rcnn_dir_target, dim=0) batch_rcnn_anchors = torch.cat(batch_rcnn_anchors, dim=0) rcnn_examples = { "far_props_bev": batch_far_proposals_bev, "far_props_img": batch_far_proposals_img, "near_props_bev": batch_near_proposals_bev, "near_props_img": batch_near_proposals_img, "rcnn_labels": batch_rcnn_labels, "rcnn_reg_targets": batch_rcnn_reg_target, "rcnn_dir_targets": batch_rcnn_dir_target, "rcnn_anchors": batch_rcnn_anchors } return rcnn_examples