def create_sliced_voxel_grid_2d(self, sample_name, source, image_shape=None): """Generates a filtered 2D voxel grid from point cloud data Args: sample_name: image name to generate stereo pointcloud from source: point cloud source - 'stereo', 'lidar', or 'depth' image_shape: image dimensions [h, w], only required when source is 'lidar' or 'depth' Returns: voxel_grid_2d: 3d voxel grid from the given image """ img_idx = int(sample_name) ground_plane = obj_utils.get_road_plane(img_idx, self.dataset.planes_dir) point_cloud = self.get_point_cloud(source, img_idx, image_shape=image_shape) filtered_points = self._apply_slice_filter(point_cloud, ground_plane) # Create Voxel Grid voxel_grid_2d = VoxelGrid2D() voxel_grid_2d.voxelize_2d(filtered_points, self.voxel_size, extents=self.area_extents, ground_plane=ground_plane, create_leaf_layout=True) return voxel_grid_2d
def get_ground_plane(self, sample_name): """Reads the ground plane for the sample Args: sample_name: name of the sample, e.g. '000123' Returns: ground_plane: ground plane coefficients """ ground_plane = obj_utils.get_road_plane(int(sample_name), self.dataset.planes_dir) return ground_plane
def create_sliced_voxel_grid_2d(self, sample_name, source, image_shape=None): """Generates a filtered 2D voxel grid from point cloud data Args: sample_name: image name to generate stereo pointcloud from source: point cloud source, e.g. 'lidar' image_shape: image dimensions [h, w], only required when source is 'lidar' or 'depth' Returns: voxel_grid_2d: 3d voxel grid from the given image """ img_idx = int(sample_name) ground_plane = obj_utils.get_road_plane(img_idx, self.dataset.planes_dir) # 点云->相机->像素(坐标)->图片(保留投影在图片里的点云投影坐标) # 返回在图片里的点云三维坐标,坐标在相机坐标系下 point_cloud = self.get_point_cloud(source, img_idx, image_shape=image_shape) filtered_points = self._apply_slice_filter( point_cloud, ground_plane) #保留在x,y,z范围里并且高度在[height_lo,height_hi]的点云 # 将point_cloud等投影到图片里,并显示 if img_idx == 23: self._project_and_show(sample_name, point_cloud, "red", "point_cloud") self._project_and_show(sample_name, filtered_points.T, "red", "filtered_points") # Create Voxel Grid # 将点云坐标离散化为体素voxel,相当于将空间划分为voxel_size大小的网格,统计在网格里的点云个数,网格坐标voxel_coords只保留一个。 # 创建体素网,网格内包含点云则用0表示,不包含点云则用-1表示 voxel_grid_2d = VoxelGrid2D() voxel_grid_2d.voxelize_2d(filtered_points, self.voxel_size, extents=self.area_extents, ground_plane=ground_plane, create_leaf_layout=True) return voxel_grid_2d
def load_pred_sample(self, rgb_image, point_cloud, frame_calib): """ This method is used for on-line prediction in the Avod model. It tries to mimic the effects of load samples, but instead of loading from an index, it takes and image and lidar point cloud as arguments. Returns one sample_dict """ obj_labels = None anchors_info = [] label_anchors = np.zeros((1, 6)) label_boxes_3d = np.zeros((1, 7)) label_classes = np.zeros(1) image_shape = rgb_image.shape[0:2] image_input = rgb_image # Get ground plane TODO: This should be calculated when we have IMU input from vehicle. For now, just use kitti road plane index 0 ground_plane = obj_utils.get_road_plane(0, self.planes_dir) # Create BEV maps bev_images = self.kitti_utils.create_bev_maps(point_cloud, ground_plane) slice_maps = bev_images.get('slice_maps') cloud_maps = bev_images.get('cloud_maps') bev_maps = slice_maps + cloud_maps bev_input = np.dstack(bev_maps) sample_dict = { constants.KEY_LABEL_BOXES_3D: label_boxes_3d, constants.KEY_LABEL_ANCHORS: label_anchors, constants.KEY_LABEL_CLASSES: label_classes, constants.KEY_IMAGE_INPUT: image_input, constants.KEY_BEV_INPUT: bev_input, constants.KEY_ANCHORS_INFO: anchors_info, constants.KEY_POINT_CLOUD: point_cloud, constants.KEY_GROUND_PLANE: ground_plane, constants.KEY_STEREO_CALIB_P2: frame_calib.p2, constants.KEY_SAMPLE_NAME: 0, # TODO: Find out how this is used later constants.KEY_SAMPLE_AUGS: [] # We don't need any augs for prediction } return sample_dict
def main(): """ Visualization of the mini batch anchors for RpnModel training. Keys: F1: Toggle mini batch anchors F2: Toggle positive/negative proposal anchors F3: Toggle easy ground truth objects (Green) F4: Toggle medium ground truth objects (Orange) F5: Toggle hard ground truth objects (Red) F6: Toggle all ground truth objects (default off) F7: Toggle ground-plane """ anchor_colour_scheme = { "Car": (255, 0, 0), # Red "Pedestrian": (255, 150, 50), # Orange "Cyclist": (150, 50, 100), # Purple "DontCare": (255, 255, 255), # White "Anchor": (150, 150, 150), # Gray "Positive": (0, 255, 255), # Teal "Negative": (255, 0, 255) # Bright Purple } ############################## # Options ############################## show_orientations = True # Classes name config_name = 'car' # config_name = 'ped' # config_name = 'cyc' # config_name = 'ppl' # # # Random sample # # # sample_name = None # Small cars # sample_name = '000008' # sample_name = '000639' # # # Cars # # # # sample_name = "000001" # sample_name = "000050" # sample_name = "000112" # sample_name = "000169" # sample_name = "000191" # # # People # # # # sample_name = '000000' # val_half # sample_name = '000001' # Hard, 1 far cyc # sample_name = '000005' # Easy, 1 ped # sample_name = '000122' # Easy, 1 cyc # sample_name = '000134' # Hard, lots of people # sample_name = '000167' # Medium, 1 ped, 2 cycs # sample_name = '000187' # Medium, 1 ped on left # sample_name = '000381' # Easy, 1 ped # sample_name = '000398' # Easy, 1 ped # sample_name = '000401' # Hard, obscured peds # sample_name = '000407' # Easy, 1 ped sample_name = '000448' # Hard, several far people # sample_name = '000486' # Hard 2 obscured peds # sample_name = '000509' # Easy, 1 ped # sample_name = '000718' # Hard, lots of people # sample_name = '002216' # Easy, 1 cyc # sample_name = "000000" # sample_name = "000011" # sample_name = "000015" # sample_name = "000028" # sample_name = "000035" # sample_name = "000134" # sample_name = "000167" # sample_name = '000379' # sample_name = '000381' # sample_name = '000397' # sample_name = '000398' # sample_name = '000401' # sample_name = '000407' # sample_name = '000486' # sample_name = '000509' # # Cyclists # # # # sample_name = '000122' # sample_name = '000448' # # # Multiple classes # # # # sample_name = "000764" ############################## # End of Options ############################## # Dataset config dataset_config_path = mlod.top_dir() + \ '/demos/configs/mb_rpn_{}.config'.format(config_name) # Create Dataset dataset = DatasetBuilder.load_dataset_from_config( dataset_config_path) # Random sample if sample_name is None: sample_idx = np.random.randint(0, dataset.num_samples) sample_name = dataset.sample_list[sample_idx].name anchor_strides = dataset.kitti_utils.anchor_strides img_idx = int(sample_name) print("Showing mini batch for sample {}".format(sample_name)) image = cv2.imread(dataset.get_rgb_image_path(sample_name)) image_shape = [image.shape[1], image.shape[0]] # KittiUtils class dataset_utils = dataset.kitti_utils ground_plane = obj_utils.get_road_plane(img_idx, dataset.planes_dir) point_cloud = obj_utils.get_depth_map_point_cloud(img_idx, dataset.calib_dir, dataset.depth_dir, image_shape) points = point_cloud.T point_colours = vis_utils.project_img_to_point_cloud(points, image, dataset.calib_dir, img_idx) clusters, _ = dataset.get_cluster_info() anchor_generator = grid_anchor_3d_generator.GridAnchor3dGenerator() # Read mini batch info anchors_info = dataset_utils.get_anchors_info( dataset.classes_name, anchor_strides, sample_name) if not anchors_info: # Exit early if anchors_info is empty print("Anchors info is empty, please try a different sample") return # Generate anchors for all classes all_anchor_boxes_3d = [] for class_idx in range(len(dataset.classes)): anchor_boxes_3d = anchor_generator.generate( area_3d=dataset.kitti_utils.area_extents, anchor_3d_sizes=clusters[class_idx], anchor_stride=anchor_strides[class_idx], ground_plane=ground_plane) all_anchor_boxes_3d.extend(anchor_boxes_3d) all_anchor_boxes_3d = np.asarray(all_anchor_boxes_3d) # Use anchors info indices, ious, offsets, classes = anchors_info # Get non empty anchors from the indices anchor_boxes_3d = all_anchor_boxes_3d[indices] # Sample an RPN mini batch from the non empty anchors mini_batch_utils = dataset.kitti_utils.mini_batch_utils mb_mask_tf, _ = mini_batch_utils.sample_rpn_mini_batch(ious) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) mb_mask = sess.run(mb_mask_tf) mb_anchor_boxes_3d = anchor_boxes_3d[mb_mask] mb_anchor_ious = ious[mb_mask] # ObjectLabel list that hold all boxes to visualize obj_list = [] num_positives = 0 # Convert the mini_batch anchors to object list mini_batch_size = mini_batch_utils.rpn_mini_batch_size for i in range(mini_batch_size): if mb_anchor_ious[i] > mini_batch_utils.rpn_pos_iou_range[0]: obj_type = "Positive" num_positives += 1 else: obj_type = "Negative" obj = box_3d_encoder.box_3d_to_object_label(mb_anchor_boxes_3d[i], obj_type) obj_list.append(obj) print('Num positives', num_positives) # Convert all non-empty anchors to object list non_empty_anchor_objs = \ [box_3d_encoder.box_3d_to_object_label( anchor_box_3d, obj_type='Anchor') for anchor_box_3d in anchor_boxes_3d] ############################## # Ground Truth ############################## if dataset.has_labels: easy_gt_objs, medium_gt_objs, \ hard_gt_objs, all_gt_objs = demo_utils.get_gts_based_on_difficulty( dataset, img_idx) else: easy_gt_objs = medium_gt_objs = hard_gt_objs = all_gt_objs = [] # Visualize 2D image vis_utils.visualization(dataset.rgb_image_dir, img_idx) plt.show(block=False) # Create VtkAxes axes = vtk.vtkAxesActor() axes.SetTotalLength(5, 5, 5) # Create VtkBoxes for mini batch anchors vtk_pos_anchor_boxes = VtkBoxes() vtk_pos_anchor_boxes.set_objects(obj_list, anchor_colour_scheme) # VtkBoxes for non empty anchors vtk_non_empty_anchors = VtkBoxes() vtk_non_empty_anchors.set_objects(non_empty_anchor_objs, anchor_colour_scheme) vtk_non_empty_anchors.set_line_width(0.1) # Create VtkBoxes for ground truth vtk_easy_gt_boxes, vtk_medium_gt_boxes, \ vtk_hard_gt_boxes, vtk_all_gt_boxes = \ demo_utils.create_gt_vtk_boxes(easy_gt_objs, medium_gt_objs, hard_gt_objs, all_gt_objs, show_orientations) vtk_point_cloud = VtkPointCloud() vtk_point_cloud.set_points(points, point_colours) vtk_point_cloud.vtk_actor.GetProperty().SetPointSize(2) vtk_ground_plane = VtkGroundPlane() vtk_ground_plane.set_plane(ground_plane, dataset.kitti_utils.bev_extents) # vtk_voxel_grid = VtkVoxelGrid() # vtk_voxel_grid.set_voxels(vx_grid) # Create Voxel Grid Renderer in bottom half vtk_renderer = vtk.vtkRenderer() vtk_renderer.AddActor(vtk_point_cloud.vtk_actor) vtk_renderer.AddActor(vtk_ground_plane.vtk_actor) vtk_renderer.AddActor(vtk_hard_gt_boxes.vtk_actor) vtk_renderer.AddActor(vtk_medium_gt_boxes.vtk_actor) vtk_renderer.AddActor(vtk_easy_gt_boxes.vtk_actor) vtk_renderer.AddActor(vtk_all_gt_boxes.vtk_actor) # vtk_renderer.AddActor(vtk_voxel_grid.vtk_actor) vtk_renderer.AddActor(vtk_non_empty_anchors.vtk_actor) vtk_renderer.AddActor(vtk_pos_anchor_boxes.vtk_actor) vtk_renderer.AddActor(axes) vtk_renderer.SetBackground(0.2, 0.3, 0.4) # Setup Camera current_cam = vtk_renderer.GetActiveCamera() current_cam.Pitch(160.0) current_cam.Roll(180.0) # Zooms out to fit all points on screen vtk_renderer.ResetCamera() # Zoom in slightly current_cam.Zoom(2.5) # Reset the clipping range to show all points vtk_renderer.ResetCameraClippingRange() # Setup Render Window vtk_render_window = vtk.vtkRenderWindow() mb_iou_thresholds = np.round( [mini_batch_utils.rpn_neg_iou_range[1], mini_batch_utils.rpn_pos_iou_range[0]], 3) vtk_render_window.SetWindowName( 'Sample {} RPN Mini Batch {}/{}, ' 'Num Positives {}'.format( sample_name, mb_iou_thresholds[0], mb_iou_thresholds[1], num_positives)) vtk_render_window.SetSize(900, 500) vtk_render_window.AddRenderer(vtk_renderer) # Setup custom interactor style, which handles mouse and key events vtk_render_window_interactor = vtk.vtkRenderWindowInteractor() vtk_render_window_interactor.SetRenderWindow(vtk_render_window) vtk_render_window_interactor.SetInteractorStyle( vis_utils.ToggleActorsInteractorStyle([ vtk_non_empty_anchors.vtk_actor, vtk_pos_anchor_boxes.vtk_actor, vtk_easy_gt_boxes.vtk_actor, vtk_medium_gt_boxes.vtk_actor, vtk_hard_gt_boxes.vtk_actor, vtk_all_gt_boxes.vtk_actor, vtk_ground_plane.vtk_actor ])) # Render in VTK vtk_render_window.Render() vtk_render_window_interactor.Start()
def preprocess(self, indices): """Preprocesses anchor info and saves info to files Args: indices (int array): sample indices to process. If None, processes all samples """ # Get anchor stride for class anchor_strides = self._anchor_strides dataset = self._dataset dataset_utils = self._dataset.kitti_utils classes_name = dataset.classes_name # Make folder if it doesn't exist yet output_dir = self.mini_batch_utils.get_file_path(classes_name, anchor_strides, sample_name=None) os.makedirs(output_dir, exist_ok=True) # Get clusters for class all_clusters_sizes, _ = dataset.get_cluster_info() anchor_generator = grid_anchor_3d_generator.GridAnchor3dGenerator() # Load indices of data_split all_samples = dataset.sample_list if indices is None: indices = np.arange(len(all_samples)) num_samples = len(indices) # For each image in the dataset, save info on the anchors for sample_idx in indices: # Get image name for given cluster sample_name = all_samples[sample_idx].name img_idx = int(sample_name) # Check for existing files and skip to the next if self._check_for_existing(classes_name, anchor_strides, sample_name): print("{} / {}: Sample already preprocessed".format( sample_idx + 1, num_samples, sample_name)) continue # Get ground truth and filter based on difficulty ground_truth_list = obj_utils.read_labels(dataset.label_dir, img_idx) # If no valid ground truth, skip this image if not ground_truth_list: print("{} / {} No {}s for sample {} " "(Ground Truth Filter)".format(sample_idx + 1, num_samples, classes_name, sample_name)) # Output an empty file and move on to the next image. self._save_to_file(classes_name, anchor_strides, sample_name) continue # Filter objects to dataset classes filtered_gt_list = dataset_utils.filter_labels(ground_truth_list) filtered_gt_list = np.asarray(filtered_gt_list) # Filtering by class has no valid ground truth, skip this image if len(filtered_gt_list) == 0: print("{} / {} No {}s for sample {} " "(Ground Truth Filter)".format(sample_idx + 1, num_samples, classes_name, sample_name)) # Output an empty file and move on to the next image. self._save_to_file(classes_name, anchor_strides, sample_name) continue # Get ground plane ground_plane = obj_utils.get_road_plane(img_idx, dataset.planes_dir) image = Image.open(dataset.get_rgb_image_path(sample_name)) image_shape = [image.size[1], image.size[0]] # Generate sliced 2D voxel grid for filtering vx_grid_2d = dataset_utils.create_sliced_voxel_grid_2d( sample_name, source=dataset.bev_source, image_shape=image_shape) # List for merging all anchors all_anchor_boxes_3d = [] # Create anchors for each class for class_idx in range(len(dataset.classes)): # Generate anchors for all classes grid_anchor_boxes_3d = anchor_generator.generate( area_3d=self._area_extents, anchor_3d_sizes=all_clusters_sizes[class_idx], anchor_stride=self._anchor_strides[class_idx], ground_plane=ground_plane) all_anchor_boxes_3d.extend(grid_anchor_boxes_3d) # Filter empty anchors all_anchor_boxes_3d = np.asarray(all_anchor_boxes_3d) anchors = box_3d_encoder.box_3d_to_anchor(all_anchor_boxes_3d) empty_anchor_filter = anchor_filter.get_empty_anchor_filter_2d( anchors, vx_grid_2d, self._density_threshold) # Calculate anchor info anchors_info = self._calculate_anchors_info( all_anchor_boxes_3d, empty_anchor_filter, filtered_gt_list) anchor_ious = anchors_info[:, self.mini_batch_utils.col_ious] valid_iou_indices = np.where(anchor_ious > 0.0)[0] print("{} / {}:" "{:>6} anchors, " "{:>6} iou > 0.0, " "for {:>3} {}(s) for sample {}".format( sample_idx + 1, num_samples, len(anchors_info), len(valid_iou_indices), len(filtered_gt_list), classes_name, sample_name)) # Save anchors info self._save_to_file(classes_name, anchor_strides, sample_name, anchors_info)
def load_samples(self, indices): """ Loads input-output data for a set of samples. Should only be called when a particular sample dict is required. Otherwise, samples should be provided by the next_batch function Args: indices: A list of sample indices from the dataset.sample_list to be loaded Return: samples: a list of data sample dicts """ sample_dicts = [] for sample_idx in indices: sample = self.sample_list[sample_idx] sample_name = sample.name # Only read labels if they exist if self.has_labels: # Read mini batch first to see if it is empty anchors_info = self.get_anchors_info(sample_name) if (not anchors_info) and self.train_val_test == 'train' \ and (not self.train_on_all_samples): empty_sample_dict = { constants.KEY_SAMPLE_NAME: sample_name, constants.KEY_ANCHORS_INFO: anchors_info } return [empty_sample_dict] obj_labels = obj_utils.read_labels(self.label_dir, int(sample_name)) # Only use objects that match dataset classes obj_labels = self.kitti_utils.filter_labels(obj_labels) else: obj_labels = None anchors_info = [] label_anchors = np.zeros((1, 6)) label_boxes_3d = np.zeros((1, 7)) label_classes = np.zeros(1) img_idx = int(sample_name) # Load image (BGR -> RGB) cv_bgr_image = cv2.imread(self.get_rgb_image_path( sample_name)) rgb_image = cv_bgr_image[..., :: -1] image_shape = rgb_image.shape[0:2] image_input = rgb_image # Load MRCNN mask and features # print('Load MRCNN mask and features') mrcnn_result = self.kitti_utils.get_mrcnn_result(img_idx) # If no pedestrian can be seen on the images, break if not mrcnn_result: print('+++++++++++++ No mrcnn_result. load_samples, early end ++++++++++++++++') return [] image_mrcnn_feature_input = mrcnn_result.item().get('features') image_mrcnn_bbox_input = mrcnn_result.item().get('rois') # rois: [batch, N, (y1, x1, y2, x2)] detection bounding boxes image_mask_input = mrcnn_result.item().get('masks') # Get ground plane ground_plane = obj_utils.get_road_plane(int(sample_name), self.planes_dir) # Get calibration stereo_calib_p2 = calib_utils.read_calibration(self.calib_dir, int(sample_name)).p2 point_cloud = self.kitti_utils.get_point_cloud(self.bev_source, img_idx, image_shape) # Augmentation (Flipping) if kitti_aug.AUG_FLIPPING in sample.augs: image_input = kitti_aug.flip_image(image_input) point_cloud = kitti_aug.flip_point_cloud(point_cloud) obj_labels = [kitti_aug.flip_label_in_3d_only(obj) for obj in obj_labels] ground_plane = kitti_aug.flip_ground_plane(ground_plane) stereo_calib_p2 = kitti_aug.flip_stereo_calib_p2( stereo_calib_p2, image_shape) # Augmentation (Image Jitter) if kitti_aug.AUG_PCA_JITTER in sample.augs: image_input[:, :, 0:3] = kitti_aug.apply_pca_jitter( image_input[:, :, 0:3]) if obj_labels is not None: label_boxes_3d = np.asarray( [box_3d_encoder.object_label_to_box_3d(obj_label) for obj_label in obj_labels]) label_classes = [ self.kitti_utils.class_str_to_index(obj_label.type) for obj_label in obj_labels] label_classes = np.asarray(label_classes, dtype=np.int32) # Return empty anchors_info if no ground truth after filtering if len(label_boxes_3d) == 0: anchors_info = [] if self.train_on_all_samples: # If training without any positive labels, we cannot # set these to zeros, because later on the offset calc # uses log on these anchors. So setting any arbitrary # number here that does not break the offset calculation # should work, since the negative samples won't be # regressed in any case. dummy_anchors = [[-1000, -1000, -1000, 1, 1, 1]] label_anchors = np.asarray(dummy_anchors) dummy_boxes = [[-1000, -1000, -1000, 1, 1, 1, 0]] label_boxes_3d = np.asarray(dummy_boxes) else: label_anchors = np.zeros((1, 6)) label_boxes_3d = np.zeros((1, 7)) label_classes = np.zeros(1) else: label_anchors = box_3d_encoder.box_3d_to_anchor( label_boxes_3d, ortho_rotate=True) # Create BEV maps bev_images = self.kitti_utils.create_bev_maps( point_cloud, ground_plane) height_maps = bev_images.get('height_maps') density_map = bev_images.get('density_map') bev_input = np.dstack((*height_maps, density_map)) # print('bev_input.shape = ', bev_input.shape) sample_dict = { constants.KEY_LABEL_BOXES_3D: label_boxes_3d, constants.KEY_LABEL_ANCHORS: label_anchors, constants.KEY_LABEL_CLASSES: label_classes, constants.KEY_IMAGE_INPUT: image_input, constants.KEY_BEV_INPUT: bev_input, constants.KEY_IMAGE_MASK_INPUT: image_mask_input, constants.KEY_IMAGE_MRCNN_FEATURE_INPUT: image_mrcnn_feature_input, constants.KEY_IMAGE_MRCNN_BBOX_INPUT: image_mrcnn_bbox_input, constants.KEY_ANCHORS_INFO: anchors_info, constants.KEY_POINT_CLOUD: point_cloud, constants.KEY_GROUND_PLANE: ground_plane, constants.KEY_STEREO_CALIB_P2: stereo_calib_p2, constants.KEY_SAMPLE_NAME: sample_name, constants.KEY_SAMPLE_AUGS: sample.augs } sample_dicts.append(sample_dict) return sample_dicts
def main(): # Create Dataset dataset_config_path = mlod.root_dir() + \ '/configs/mb_preprocessing/rpn_cars.config' dataset = DatasetBuilder.load_dataset_from_config(dataset_config_path) # Random sample sample_name = '000169' anchor_strides = dataset.kitti_utils.anchor_strides img_idx = int(sample_name) print("Showing mini batch for sample {}".format(sample_name)) image = cv2.imread(dataset.get_rgb_image_path(sample_name)) image_shape = [image.shape[1], image.shape[0]] # KittiUtils class dataset_utils = dataset.kitti_utils ground_plane = obj_utils.get_road_plane(img_idx, dataset.planes_dir) point_cloud = obj_utils.get_depth_map_point_cloud(img_idx, dataset.calib_dir, dataset.depth_dir, image_shape) # Grab ground truth ground_truth_list = obj_utils.read_labels(dataset.label_dir, img_idx) ground_truth_list = dataset_utils.filter_labels(ground_truth_list) stereo_calib_p2 = calib_utils.read_calibration(dataset.calib_dir, img_idx).p2 ############################## # Flip sample info ############################## start_time = time.time() flipped_image = kitti_aug.flip_image(image) flipped_point_cloud = kitti_aug.flip_point_cloud(point_cloud) flipped_gt_list = [kitti_aug.flip_label_in_3d_only(obj) for obj in ground_truth_list] flipped_ground_plane = kitti_aug.flip_ground_plane(ground_plane) flipped_calib_p2 = kitti_aug.flip_stereo_calib_p2( stereo_calib_p2, image_shape) flipped_points = flipped_point_cloud.T print('flip sample', time.time() - start_time) ############################## # Generate anchors ############################## clusters, _ = dataset.get_cluster_info() anchor_generator = grid_anchor_3d_generator.GridAnchor3dGenerator() # Read mini batch info anchors_info = dataset_utils.get_anchors_info( dataset.classes_name, anchor_strides, sample_name) all_anchor_boxes_3d = [] all_ious = [] for class_idx in range(len(dataset.classes)): anchor_boxes_3d = anchor_generator.generate( area_3d=dataset.kitti_utils.area_extents, anchor_3d_sizes=clusters[class_idx], anchor_stride=anchor_strides[class_idx], ground_plane=ground_plane) if anchors_info: indices, ious, offsets, classes = anchors_info # Get non empty anchors from the indices non_empty_anchor_boxes_3d = anchor_boxes_3d[indices] all_anchor_boxes_3d.extend(non_empty_anchor_boxes_3d) all_ious.extend(ious) if not len(all_anchor_boxes_3d) > 0: # Exit early if anchors_info is empty print("No anchors, Please try a different sample") return # Convert to ndarrays all_anchor_boxes_3d = np.asarray(all_anchor_boxes_3d) all_ious = np.asarray(all_ious) ############################## # Flip anchors ############################## start_time = time.time() flipped_anchor_boxes_3d = kitti_aug.flip_boxes_3d(all_anchor_boxes_3d, flip_ry=False) print('flip anchors', time.time() - start_time) # Overwrite with flipped things all_anchor_boxes_3d = flipped_anchor_boxes_3d points = flipped_points ground_truth_list = flipped_gt_list ground_plane = flipped_ground_plane
def load_samples(self, indices): """ Loads input-output data for a set of samples. Should only be called when a particular sample dict is required. Otherwise, samples should be provided by the next_batch function Args: indices: A list of sample indices from the dataset.sample_list to be loaded Return: samples: a list of data sample dicts """ sample_dicts = [] for sample_idx in indices: sample = self.sample_list[sample_idx] sample_name = sample.name # Only read labels if they exist if self.has_labels: # Read mini batch first to see if it is empty anchors_info = self.get_anchors_info(sample_name) if (not anchors_info) and self.train_val_test == 'train' \ and (not self.train_on_all_samples): empty_sample_dict = { constants.KEY_SAMPLE_NAME: sample_name, constants.KEY_ANCHORS_INFO: anchors_info } return [empty_sample_dict] obj_labels = obj_utils.read_labels(self.label_dir, int(sample_name)) # Only use objects that match dataset classes obj_labels = self.kitti_utils.filter_labels(obj_labels) else: obj_labels = None anchors_info = [] label_anchors = np.zeros((1, 6)) label_boxes_3d = np.zeros((1, 7)) label_boxes_2d = np.zeros((1, 4)) label_classes = np.zeros(1) img_idx = int(sample_name) lidar_only = False num_views = 1 if not lidar_only: # Load image (BGR -> RGB) cv_bgr_image = cv2.imread(self.get_rgb_image_path(sample_name)) rgb_image = cv_bgr_image[..., ::-1] image_shape = rgb_image.shape[0:2] # Append the depth channel if self.add_depth: depth_map = obj_utils.get_depth_map( img_idx, self.depth_dir) # Set invalid pixels to max depth depth_map[np.asarray(depth_map == 0.0)] = \ self.kitti_utils.bev_extents[1, 1] # Add channel dimension to make stacking easier depth_map = np.expand_dims(depth_map, 2) image_input = np.concatenate([rgb_image, depth_map], axis=2) else: image_input = rgb_image else: image_shape = (370, 1224) # Get ground plane ground_plane = obj_utils.get_road_plane(int(sample_name), self.planes_dir) #ground_plane = np.array([0,-1,0,1.68]) if lidar_only: p_matrix = np.zeros((num_views, 3, 4), dtype=float) if num_views > 0: p_matrix[0] = np.array([[ 8.39713500e+02, 3.58853400e+01, 4.48566750e+02, 2.31460650e+03 ], [ 1.02835238e-13, 8.54979440e+02, 1.57320433e+02, 2.49655872e+03 ], [ 0.00000000e+00, 7.97452000e-02, 9.96815000e-01, 5.14357000e+00 ]]) p_matrix[1] = np.array([[ 1.20171708e+03, 9.73326000e+01, 3.99933320e+02, 1.04945816e+04 ], [ 1.41054657e+01, 8.65088160e+02, 8.46334690e+01, 5.24229862e+03 ], [ 1.62221000e-01, 1.62221000e-01, 9.73329000e-01, 1.13555000e+01 ]]) else: # Get calibration stereo_calib_p2 = calib_utils.read_calibration( self.calib_dir, int(sample_name)).p2 point_cloud = self.kitti_utils.get_point_cloud( self.bev_source, img_idx, image_shape) # Augmentation (Flipping) if kitti_aug.AUG_FLIPPING in sample.augs: if not lidar_only: image_input = kitti_aug.flip_image(image_input) point_cloud = kitti_aug.flip_point_cloud(point_cloud) obj_labels = [ kitti_aug.flip_label(obj, image_shape) for obj in obj_labels ] ground_plane = kitti_aug.flip_ground_plane(ground_plane) if lidar_only: for i in range(num_views): p_matrix[i] = kitti_aug.flip_stereo_calib_p2( p_matrix[i], image_shape) else: stereo_calib_p2 = kitti_aug.flip_stereo_calib_p2( stereo_calib_p2, image_shape) # Augmentation (Image Jitter) if (kitti_aug.AUG_PCA_JITTER in sample.augs) and not lidar_only: image_input[:, :, 0:3] = kitti_aug.apply_pca_jitter( image_input[:, :, 0:3], aug_img_noise=self.aug_img_noise) # Augmentation (Random Occlusion) if kitti_aug.AUG_RANDOM_OCC in sample.augs: point_cloud = kitti_aug.occ_aug(point_cloud, stereo_calib_p2, obj_labels) if obj_labels is not None: label_boxes_3d = np.asarray([ box_3d_encoder.object_label_to_box_3d(obj_label) for obj_label in obj_labels ]) label_boxes_2d = np.asarray([ box_3d_encoder.object_label_to_box_2d(obj_label) for obj_label in obj_labels ]) label_classes = [ self.kitti_utils.class_str_to_index(obj_label.type) for obj_label in obj_labels ] label_classes = np.asarray(label_classes, dtype=np.int32) # Return empty anchors_info if no ground truth after filtering if len(label_boxes_3d) == 0: anchors_info = [] if self.train_on_all_samples: # If training without any positive labels, we cannot # set these to zeros, because later on the offset calc # uses log on these anchors. So setting any arbitrary # number here that does not break the offset calculation # should work, since the negative samples won't be # regressed in any case. dummy_anchors = [[-1000, -1000, -1000, 1, 1, 1]] label_anchors = np.asarray(dummy_anchors) dummy_boxes = [[-1000, -1000, -1000, 1, 1, 1, 0]] label_boxes_3d = np.asarray(dummy_boxes) label_boxes_2d = np.asarray([[-1.0, -1.0, -1.0, -1.0]]) else: label_anchors = np.zeros((1, 6)) label_boxes_3d = np.zeros((1, 7)) label_boxes_2d = np.zeros((1, 4)) label_classes = np.zeros(1) else: label_anchors = box_3d_encoder.box_3d_to_anchor( label_boxes_3d, ortho_rotate=True) # Create BEV maps bev_images = self.kitti_utils.create_bev_maps( point_cloud, ground_plane) height_maps = bev_images.get('height_maps') #bev random masking """ bev_drop_p = 0.5 rand_01 = random.random() mask_bev_layer = np.zeros(height_maps[0].shape,dtype=np.float32) if rand_01 > bev_drop_p: mask_idx = random.randint(0,4) height_maps[mask_idx] = mask_bev_layer """ #print(height_maps[0].shape) density_map = bev_images.get('density_map') bev_input = np.dstack((*height_maps, density_map)) #bev_input = np.transpose(np.array(height_maps),(1,2,0)) point_cloud = self.kitti_utils._apply_slice_filter( point_cloud, ground_plane).T if lidar_only: depth_map = np.zeros( (num_views, image_shape[0], image_shape[1]), dtype=float) for i in range(num_views): depth_map[i, :, :] = project_depths( point_cloud, p_matrix[i], image_shape[0:2]) depth_map_expand_dims = np.expand_dims(depth_map, axis=-1) sample_dict = { constants.KEY_LABEL_BOXES_3D: label_boxes_3d, constants.KEY_LABEL_ANCHORS: label_anchors, constants.KEY_LABEL_CLASSES: label_classes, constants.KEY_IMAGE_INPUT: depth_map_expand_dims, constants.KEY_BEV_INPUT: bev_input, constants.KEY_ANCHORS_INFO: anchors_info, constants.KEY_POINT_CLOUD: point_cloud, constants.KEY_GROUND_PLANE: ground_plane, constants.KEY_STEREO_CALIB_P2: p_matrix[0:num_views], constants.KEY_SAMPLE_NAME: sample_name, constants.KEY_SAMPLE_AUGS: sample.augs, constants.KEY_DPT_INPUT: depth_map } else: depth_map = project_depths(point_cloud, stereo_calib_p2, image_shape[0:2]) depth_map = np.expand_dims(depth_map, axis=0) sample_dict = { constants.KEY_LABEL_BOXES_3D: label_boxes_3d, constants.KEY_LABEL_BOXES_2D: label_boxes_2d, constants.KEY_LABEL_ANCHORS: label_anchors, constants.KEY_LABEL_CLASSES: label_classes, constants.KEY_IMAGE_INPUT: image_input, constants.KEY_BEV_INPUT: bev_input, constants.KEY_ANCHORS_INFO: anchors_info, constants.KEY_POINT_CLOUD: point_cloud, constants.KEY_GROUND_PLANE: ground_plane, constants.KEY_STEREO_CALIB_P2: stereo_calib_p2, constants.KEY_SAMPLE_NAME: sample_name, constants.KEY_SAMPLE_AUGS: sample.augs, constants.KEY_DPT_INPUT: depth_map } sample_dicts.append(sample_dict) return sample_dicts
def test_get_road_plane(self): plane = obj_utils.get_road_plane(0, self.test_data_planes_dir) np.testing.assert_allclose(plane, [-7.051729e-03, -9.997791e-01, -1.980151e-02, 1.680367e+00])
def main(): """This demo shows RPN proposals and MLOD predictions in the 3D point cloud. Keys: F1: Toggle proposals F2: Toggle predictions F3: Toggle 3D voxel grid F4: Toggle point cloud F5: Toggle easy ground truth objects (Green) F6: Toggle medium ground truth objects (Orange) F7: Toggle hard ground truth objects (Red) F8: Toggle all ground truth objects (default off) F9: Toggle ground slice filter (default off) F10: Toggle offset slice filter (default off) """ ############################## # Options ############################## rpn_score_threshold = 0.1 mlod_score_threshold = 0.1 proposals_line_width = 1.0 predictions_line_width = 3.0 show_orientations = True point_cloud_source = 'lidar' # Config file folder, default (<mlod_root>/data/outputs/<checkpoint_name>) config_dir = None checkpoint_name = 'mlod_fpn_people_n_m' global_step = 135000 # Latest checkpoint #data_split = 'val_half' #data_split = 'val' data_split = 'test' # Show 3D iou text draw_ious_3d = False sample_name = '000031' # # # Cars # # # # sample_name = '000050' # sample_name = '000104' # sample_name = '000169' # sample_name = '000191' # sample_name = '000360' # sample_name = '001783' # sample_name = '001820' # val split # sample_name = '000181' # sample_name = '000751' # sample_name = '000843' # sample_name = '000944' # sample_name = '006338' # # # People # # # # val_half split # sample_name = '000001' # Hard, 1 far cyc # sample_name = '000005' # Easy, 1 ped # sample_name = '000122' # Easy, 1 cyc # sample_name = '000134' # Hard, lots of people # sample_name = '000167' # Medium, 1 ped, 2 cycs # sample_name = '000187' # Medium, 1 ped on left # sample_name = '000381' # Easy, 1 ped # sample_name = '000398' # Easy, 1 ped # sample_name = '000401' # Hard, obscured peds # sample_name = '000407' # Easy, 1 ped # sample_name = '000448' # Hard, several far people # sample_name = '000486' # Hard 2 obscured peds # sample_name = '000509' # Easy, 1 ped # sample_name = '000718' # Hard, lots of people # sample_name = '002216' # Easy, 1 cyc # val split # sample_name = '000015' # sample_name = '000048' # sample_name = '000058' # sample_name = '000076' # Medium, few ped, 1 cyc # sample_name = '000108' # sample_name = '000118' # sample_name = '000145' # sample_name = '000153' # sample_name = '000186' # sample_name = '000195' # sample_name = '000199' # sample_name = '000397' # sample_name = '004425' # sample_name = '004474' # Hard, many ped, 1 cyc # sample_name = '004657' # Hard, Few cycl, few ped # sample_name = '006071' # sample_name = '006828' # Hard, Few cycl, few ped # sample_name = '006908' # Hard, Few cycl, few ped # sample_name = '007412' # sample_name = '007318' # Hard, Few cycl, few ped ############################## # End of Options ############################## if data_split == 'test': draw_ious_3d = False if config_dir is None: config_dir = mlod.root_dir() + '/data/outputs/' + checkpoint_name # Parse experiment config pipeline_config_file = \ config_dir + '/' + checkpoint_name + '.config' _, _, _, dataset_config = \ config_builder_util.get_configs_from_pipeline_file( pipeline_config_file, is_training=False) dataset_config.data_split = data_split if data_split == 'test': dataset_config.data_split_dir = 'testing' dataset_config.has_labels = False dataset = DatasetBuilder.build_kitti_dataset(dataset_config, use_defaults=False) # Random sample if sample_name is None: sample_idx = np.random.randint(0, dataset.num_samples) sample_name = dataset.sample_names[sample_idx] ############################## # Setup Paths ############################## img_idx = int(sample_name) # Text files directory proposals_and_scores_dir = mlod.root_dir() + \ '/data/outputs/' + checkpoint_name + '/predictions' + \ '/proposals_and_scores/' + dataset.data_split predictions_and_scores_dir = mlod.root_dir() + \ '/data/outputs/' + checkpoint_name + '/predictions' + \ '/final_predictions_and_scores/' + dataset.data_split # Get checkpoint step steps = os.listdir(proposals_and_scores_dir) steps.sort(key=int) print('Available steps: {}'.format(steps)) # Use latest checkpoint if no index provided if global_step is None: global_step = steps[-1] # Output images directory img_out_dir = mlod.root_dir() + '/data/outputs/' + checkpoint_name + \ '/predictions/images_3d/{}/{}/{}'.format(dataset.data_split, global_step, rpn_score_threshold) if not os.path.exists(img_out_dir): os.makedirs(img_out_dir) ############################## # Proposals ############################## # Load proposals from files proposals_and_scores = np.loadtxt( proposals_and_scores_dir + "/{}/{}.txt".format(global_step, sample_name)) proposals = proposals_and_scores[:, 0:7] proposal_scores = proposals_and_scores[:, 7] rpn_score_mask = proposal_scores > rpn_score_threshold proposals = proposals[rpn_score_mask] proposal_scores = proposal_scores[rpn_score_mask] print('Proposals:', len(proposal_scores), proposal_scores) proposal_objs = \ [box_3d_encoder.box_3d_to_object_label(proposal, obj_type='Proposal') for proposal in proposals] ############################## # Predictions ############################## # Load proposals from files predictions_and_scores = np.loadtxt( predictions_and_scores_dir + "/{}/{}.txt".format(global_step, sample_name)).reshape(-1, 9) prediction_boxes_3d = predictions_and_scores[:, 0:7] prediction_scores = predictions_and_scores[:, 7] prediction_types = np.asarray(predictions_and_scores[:, 8], dtype=np.int32) mlod_score_mask = prediction_scores >= mlod_score_threshold prediction_boxes_3d = prediction_boxes_3d[mlod_score_mask] prediction_scores = prediction_scores[mlod_score_mask] print('Predictions: ', len(prediction_scores), prediction_scores) final_predictions = np.copy(prediction_boxes_3d) # # Swap l, w for predictions where w > l # swapped_indices = predictions[:, 4] > predictions[:, 3] # final_predictions[swapped_indices, 3] = predictions[swapped_indices, 4] # final_predictions[swapped_indices, 4] = predictions[swapped_indices, 3] prediction_objs = [] dataset.classes = ['Pedestrian', 'Cyclist', 'Car'] for pred_idx in range(len(final_predictions)): prediction_box_3d = final_predictions[pred_idx] prediction_type = dataset.classes[prediction_types[pred_idx]] prediction_obj = box_3d_encoder.box_3d_to_object_label( prediction_box_3d, obj_type=prediction_type) prediction_objs.append(prediction_obj) ############################## # Ground Truth ############################## dataset.has_labels = False if dataset.has_labels: # Get ground truth labels easy_gt_objs, medium_gt_objs, \ hard_gt_objs, all_gt_objs = \ demo_utils.get_gts_based_on_difficulty(dataset, img_idx) else: easy_gt_objs = medium_gt_objs = hard_gt_objs = all_gt_objs = [] ############################## # 3D IoU ############################## if draw_ious_3d: # Convert to box_3d all_gt_boxes_3d = [ box_3d_encoder.object_label_to_box_3d(gt_obj) for gt_obj in all_gt_objs ] pred_boxes_3d = [ box_3d_encoder.object_label_to_box_3d(pred_obj) for pred_obj in prediction_objs ] max_ious_3d = demo_utils.get_max_ious_3d(all_gt_boxes_3d, pred_boxes_3d) ############################## # Point Cloud ############################## image_path = dataset.get_rgb_image_path(sample_name) image = cv2.imread(image_path) point_cloud = dataset.kitti_utils.get_point_cloud(point_cloud_source, img_idx, image_shape=image.shape) point_cloud = np.asarray(point_cloud) # Filter point cloud to extents area_extents = np.asarray([[-40, 40], [-5, 3], [0, 70]]) bev_extents = area_extents[[0, 2]] points = point_cloud.T point_filter = obj_utils.get_point_filter(point_cloud, area_extents) points = points[point_filter] point_colours = vis_utils.project_img_to_point_cloud( points, image, dataset.calib_dir, img_idx) # Voxelize the point cloud for visualization voxel_grid = VoxelGrid() voxel_grid.voxelize(points, voxel_size=0.1, create_leaf_layout=False) # Ground plane ground_plane = obj_utils.get_road_plane(img_idx, dataset.planes_dir) ############################## # Visualization ############################## # Create VtkVoxelGrid vtk_voxel_grid = VtkVoxelGrid() vtk_voxel_grid.set_voxels(voxel_grid) vtk_point_cloud = VtkPointCloud() vtk_point_cloud.set_points(points, point_colours) # Create VtkAxes vtk_axes = vtk.vtkAxesActor() vtk_axes.SetTotalLength(5, 5, 5) # Create VtkBoxes for proposal boxes vtk_proposal_boxes = VtkBoxes() vtk_proposal_boxes.set_line_width(proposals_line_width) vtk_proposal_boxes.set_objects(proposal_objs, COLOUR_SCHEME_PREDICTIONS) # Create VtkBoxes for prediction boxes vtk_prediction_boxes = VtkPyramidBoxes() vtk_prediction_boxes.set_line_width(predictions_line_width) vtk_prediction_boxes.set_objects(prediction_objs, COLOUR_SCHEME_PREDICTIONS, show_orientations) # Create VtkBoxes for ground truth vtk_hard_gt_boxes = VtkBoxes() vtk_medium_gt_boxes = VtkBoxes() vtk_easy_gt_boxes = VtkBoxes() vtk_all_gt_boxes = VtkBoxes() vtk_hard_gt_boxes.set_objects(hard_gt_objs, COLOUR_SCHEME_PREDICTIONS, show_orientations) vtk_medium_gt_boxes.set_objects(medium_gt_objs, COLOUR_SCHEME_PREDICTIONS, show_orientations) vtk_easy_gt_boxes.set_objects(easy_gt_objs, COLOUR_SCHEME_PREDICTIONS, show_orientations) vtk_all_gt_boxes.set_objects(all_gt_objs, VtkBoxes.COLOUR_SCHEME_KITTI, show_orientations) # Create VtkTextLabels for 3D ious vtk_text_labels = VtkTextLabels() if draw_ious_3d and len(all_gt_boxes_3d) > 0: gt_positions_3d = np.asarray(all_gt_boxes_3d)[:, 0:3] vtk_text_labels.set_text_labels( gt_positions_3d, ['{:0.3f}'.format(iou_3d) for iou_3d in max_ious_3d]) # Create VtkGroundPlane vtk_ground_plane = VtkGroundPlane() vtk_slice_bot_plane = VtkGroundPlane() vtk_slice_top_plane = VtkGroundPlane() vtk_ground_plane.set_plane(ground_plane, bev_extents) vtk_slice_bot_plane.set_plane(ground_plane + [0, 0, 0, -0.2], bev_extents) vtk_slice_top_plane.set_plane(ground_plane + [0, 0, 0, -2.0], bev_extents) # Create Voxel Grid Renderer in bottom half vtk_renderer = vtk.vtkRenderer() vtk_renderer.AddActor(vtk_voxel_grid.vtk_actor) vtk_renderer.AddActor(vtk_point_cloud.vtk_actor) vtk_renderer.AddActor(vtk_proposal_boxes.vtk_actor) vtk_renderer.AddActor(vtk_prediction_boxes.vtk_actor) vtk_renderer.AddActor(vtk_hard_gt_boxes.vtk_actor) vtk_renderer.AddActor(vtk_medium_gt_boxes.vtk_actor) vtk_renderer.AddActor(vtk_easy_gt_boxes.vtk_actor) vtk_renderer.AddActor(vtk_all_gt_boxes.vtk_actor) vtk_renderer.AddActor(vtk_text_labels.vtk_actor) # Add ground plane and slice planes vtk_renderer.AddActor(vtk_ground_plane.vtk_actor) vtk_renderer.AddActor(vtk_slice_bot_plane.vtk_actor) vtk_renderer.AddActor(vtk_slice_top_plane.vtk_actor) #vtk_renderer.AddActor(vtk_axes) vtk_renderer.SetBackground(0.2, 0.3, 0.4) # Set initial properties for some actors vtk_point_cloud.vtk_actor.GetProperty().SetPointSize(3) vtk_proposal_boxes.vtk_actor.SetVisibility(0) vtk_voxel_grid.vtk_actor.SetVisibility(0) vtk_all_gt_boxes.vtk_actor.SetVisibility(0) vtk_ground_plane.vtk_actor.SetVisibility(0) vtk_slice_bot_plane.vtk_actor.SetVisibility(0) vtk_slice_top_plane.vtk_actor.SetVisibility(0) vtk_ground_plane.vtk_actor.GetProperty().SetOpacity(0.9) vtk_slice_bot_plane.vtk_actor.GetProperty().SetOpacity(0.9) vtk_slice_top_plane.vtk_actor.GetProperty().SetOpacity(0.9) # Setup Camera current_cam = vtk_renderer.GetActiveCamera() current_cam.Pitch(160.0) current_cam.Roll(180.0) # Zooms out to fit all points on screen vtk_renderer.ResetCamera() # Zoom in slightly current_cam.Zoom(3.5) # Reset the clipping range to show all points vtk_renderer.ResetCameraClippingRange() # Setup Render Window vtk_render_window = vtk.vtkRenderWindow() vtk_render_window.SetWindowName( "Predictions: Step {}, Sample {}, Min Score {}".format( global_step, sample_name, mlod_score_threshold, )) vtk_render_window.SetSize(900, 600) vtk_render_window.AddRenderer(vtk_renderer) # Setup custom interactor style, which handles mouse and key events vtk_render_window_interactor = vtk.vtkRenderWindowInteractor() vtk_render_window_interactor.SetRenderWindow(vtk_render_window) # Add custom interactor to toggle actor visibilities custom_interactor = vis_utils.CameraInfoInteractorStyle([ vtk_proposal_boxes.vtk_actor, vtk_prediction_boxes.vtk_actor, vtk_voxel_grid.vtk_actor, vtk_point_cloud.vtk_actor, vtk_easy_gt_boxes.vtk_actor, vtk_medium_gt_boxes.vtk_actor, vtk_hard_gt_boxes.vtk_actor, vtk_all_gt_boxes.vtk_actor, vtk_ground_plane.vtk_actor, vtk_slice_bot_plane.vtk_actor, vtk_slice_top_plane.vtk_actor, vtk_text_labels.vtk_actor, ]) vtk_render_window_interactor.SetInteractorStyle(custom_interactor) # Render in VTK vtk_render_window.Render() # Take a screenshot window_to_image_filter = vtk.vtkWindowToImageFilter() window_to_image_filter.SetInput(vtk_render_window) window_to_image_filter.Update() png_writer = vtk.vtkPNGWriter() file_name = img_out_dir + "/{}.png".format(sample_name) png_writer.SetFileName(file_name) png_writer.SetInputData(window_to_image_filter.GetOutput()) png_writer.Write() print('Screenshot saved to ', file_name) vtk_render_window_interactor.Start() # Blocking
def preprocess(self, indices): """Preprocesses anchor info and saves info to files Args: indices (int array): sample indices to process. If None, processes all samples """ # Get anchor stride for class anchor_params = self._anchor_params dataset = self._dataset dataset_utils = self._dataset.kitti_utils classes_name = dataset.classes_name anchor_strides = anchor_params['anchor_strides'] # Make folder if it doesn't exist yet output_dir = self.mini_batch_utils.get_file_path(classes_name, anchor_strides, sample_name=None) os.makedirs(output_dir, exist_ok=True) # Get clusters for class #all_clusters_sizes, _ = dataset.get_cluster_info() anchor_generator = grid_anchor_bev_generator.GridAnchorBevGenerator() #anchor_type = self._dataset.kitti_utils.anchor_type # Load indices of data_split all_samples = dataset.sample_list if indices is None: indices = np.arange(len(all_samples)) #indices = indices[:10] num_samples = len(indices) # For each image in the dataset, save info on the anchors for sample_idx in indices: # Get image name for given cluster sample_name = all_samples[sample_idx].name img_idx = int(sample_name) # Check for existing files and skip to the next if self._check_for_existing(classes_name, anchor_strides, sample_name): print("{} / {}: Sample already preprocessed".format( sample_idx + 1, num_samples, sample_name)) #continue # Get ground truth and filter based on difficulty ground_truth_list = obj_utils.read_labels(dataset.label_dir, img_idx) # Filter objects to dataset classes filtered_gt_list = dataset_utils.filter_labels(ground_truth_list) filtered_gt_list = np.asarray(filtered_gt_list) # Filtering by class has no valid ground truth, skip this image if len(filtered_gt_list) == 0: print("{} / {} No {}s for sample {} " "(Ground Truth Filter)".format(sample_idx + 1, num_samples, classes_name, sample_name)) # Output an empty file and move on to the next image. #comment out for DEBUG self._save_to_file(classes_name, anchor_strides, sample_name) continue # Get ground plane ground_plane = obj_utils.get_road_plane(img_idx, dataset.planes_dir) image = Image.open(dataset.get_rgb_image_path(sample_name)) image_shape = [image.size[1], image.size[0]] # List for merging all anchors all_level_anchor_boxes_bev = anchor_generator.generate(\ image_shapes=anchor_params['image_shapes'], anchor_base_sizes=anchor_params['anchor_base_sizes'], anchor_strides=anchor_params['anchor_strides'], anchor_ratios=anchor_params['anchor_ratios'], anchor_scales=anchor_params['anchor_scales'], anchor_init_ry_type=anchor_params['anchor_init_ry_type']) #concate all levels anchors #commentt out for DEBUG all_anchor_boxes_bev = np.concatenate(all_level_anchor_boxes_bev) #all_anchor_boxes_bev = all_level_anchor_boxes_bev[-1] # Filter empty anchors (whose pts num < density_threshold) # prepare for anchors_3d which dont need ry. anchors_bev = all_anchor_boxes_bev.copy() if anchor_params['anchor_init_ry_type'] == -90: anchors_bev[:, [2, 3]] = anchors_bev[:, [3, 2]] anchors_3d = box_bev_encoder.box_bev_to_anchor_3d(anchors_bev, \ bev_shape=self._bev_shape, \ bev_extents=self._dataset.kitti_utils.area_extents[[0, 2]]) #print(anchors_3d) image = Image.open(dataset.get_rgb_image_path(sample_name)) image_shape = [image.size[1], image.size[0]] # Generate sliced 2D voxel grid for filtering vx_grid_2d = dataset_utils.create_sliced_voxel_grid_2d( sample_name, source=dataset.bev_source, image_shape=image_shape) empty_anchor_filter = anchor_filter.get_empty_anchor_filter_2d( anchors_3d, vx_grid_2d, self._density_threshold) print( f'Non empty anchor: {np.sum(empty_anchor_filter)} / {len(all_anchor_boxes_bev)}, \ sample_name: {sample_name}') #empty_anchor_filter = np.ones(all_anchor_boxes_bev.shape[0], dtype=bool) # Calculate anchor info anchors_info = self._calculate_anchors_info( all_anchor_boxes_bev, empty_anchor_filter, filtered_gt_list) n_invalid = np.sum(np.isnan(anchors_info)) if n_invalid > 0: raise ValueError( 'Invalid value occur at anchors_info: nan, sample: ', sample_name) # anchor_ious = anchors_info[:, self.mini_batch_utils.col_ious] valid_iou_indices = np.where(anchor_ious > 0.0)[0] print("{} / {}:" "{:>6} anchors, " "{:>6} iou > 0.0, " "for {:>3} {}(s) for sample {}".format( sample_idx + 1, num_samples, len(anchors_info), len(valid_iou_indices), len(filtered_gt_list), classes_name, sample_name)) # Save anchors info #comment out for DEBUG self._save_to_file(classes_name, anchor_strides, sample_name, anchors_info)
def load_samples(self, indices): """ Loads input-output data for a set of samples. Should only be called when a particular sample dict is required. Otherwise, samples should be provided by the next_batch function Args: indices: A list of sample indices from the dataset.sample_list to be loaded Return: samples: a list of data sample dicts """ sample_dicts = [] for sample_idx in indices: sample = self.sample_list[sample_idx] sample_name = sample.name # Only read labels if they exist if self.has_labels: # Read mini batch first to see if it is empty anchors_info = self.get_anchors_info(sample_name) if (not anchors_info) and self.train_val_test == 'train' \ and (not self.train_on_all_samples): empty_sample_dict = { constants.KEY_SAMPLE_NAME: sample_name, constants.KEY_ANCHORS_INFO: anchors_info } return [empty_sample_dict] obj_labels = obj_utils.read_labels(self.label_dir, int(sample_name)) # Only use objects that match dataset classes obj_labels = self.kitti_utils.filter_labels(obj_labels) else: obj_labels = None anchors_info = [] label_anchors = np.zeros((1, 6)) label_boxes_3d = np.zeros((1, 7)) label_classes = np.zeros(1) img_idx = int(sample_name) # Load image (BGR -> RGB) cv_bgr_image = cv2.imread(self.get_rgb_image_path(sample_name)) rgb_image = cv_bgr_image[..., ::-1] image_shape = rgb_image.shape[0:2] image_input = rgb_image # Get ground plane ground_plane = obj_utils.get_road_plane(int(sample_name), self.planes_dir) # Get calibration stereo_calib = calib_utils.read_calibration( self.calib_dir, int(sample_name)) stereo_calib_p2 = stereo_calib.p2 point_cloud = self.kitti_utils.get_point_cloud( self.bev_source, img_idx, image_shape) # Augmentation (Flipping) # WZN: the flipping augmentation flips both image(in camera frame), pointcloud (in Lidar frame), and calibration #matrix(between cam and Lidar) so the correspondence is still true. if kitti_aug.AUG_FLIPPING in sample.augs: image_input = kitti_aug.flip_image(image_input) point_cloud = kitti_aug.flip_point_cloud(point_cloud) obj_labels = [ kitti_aug.flip_label_in_3d_only(obj) for obj in obj_labels ] ground_plane = kitti_aug.flip_ground_plane(ground_plane) stereo_calib_p2 = kitti_aug.flip_stereo_calib_p2( stereo_calib_p2, image_shape) # Augmentation (Image Jitter) if kitti_aug.AUG_PCA_JITTER in sample.augs: image_input[:, :, 0:3] = kitti_aug.apply_pca_jitter(image_input[:, :, 0:3]) if obj_labels is not None: label_boxes_3d = np.asarray([ box_3d_encoder.object_label_to_box_3d(obj_label) for obj_label in obj_labels ]) label_classes = [ self.kitti_utils.class_str_to_index(obj_label.type) for obj_label in obj_labels ] label_classes = np.asarray(label_classes, dtype=np.int32) # Return empty anchors_info if no ground truth after filtering if len(label_boxes_3d) == 0: anchors_info = [] if self.train_on_all_samples: # If training without any positive labels, we cannot # set these to zeros, because later on the offset calc # uses log on these anchors. So setting any arbitrary # number here that does not break the offset calculation # should work, since the negative samples won't be # regressed in any case. dummy_anchors = [[-1000, -1000, -1000, 1, 1, 1]] label_anchors = np.asarray(dummy_anchors) dummy_boxes = [[-1000, -1000, -1000, 1, 1, 1, 0]] label_boxes_3d = np.asarray(dummy_boxes) else: label_anchors = np.zeros((1, 6)) label_boxes_3d = np.zeros((1, 7)) label_classes = np.zeros(1) else: label_anchors = box_3d_encoder.box_3d_to_anchor( label_boxes_3d, ortho_rotate=True) # Create BEV maps bev_images = self.kitti_utils.create_bev_maps( point_cloud, ground_plane, output_indices=self.output_indices) #WZN produce input for sparse pooling if self.output_indices: voxel_indices = bev_images[1] pts_in_voxel = bev_images[2] bev_images = bev_images[0] height_maps = bev_images.get('height_maps') density_map = bev_images.get('density_map') bev_input = np.dstack((*height_maps, density_map)) #import pdb #pdb.set_trace() #WZN produce input for sparse pooling if self.output_indices: sparse_pooling_input1 = produce_sparse_pooling_input( gen_sparse_pooling_input_avod( pts_in_voxel, voxel_indices, stereo_calib, [image_shape[1], image_shape[0]], bev_input.shape[0:2]), stride=[1, 1]) #WZN: Note here avod padded the vgg input by 4, so add it bev_input_padded = np.copy(bev_input.shape[0:2]) bev_input_padded[0] = bev_input_padded[0] + 4 sparse_pooling_input2 = produce_sparse_pooling_input( gen_sparse_pooling_input_avod( pts_in_voxel, voxel_indices, stereo_calib, [image_shape[1], image_shape[0]], bev_input_padded), stride=[8, 8]) sparse_pooling_input = [ sparse_pooling_input1, sparse_pooling_input2 ] else: sparse_pooling_input = None sample_dict = { constants.KEY_LABEL_BOXES_3D: label_boxes_3d, constants.KEY_LABEL_ANCHORS: label_anchors, constants.KEY_LABEL_CLASSES: label_classes, constants.KEY_IMAGE_INPUT: image_input, constants.KEY_BEV_INPUT: bev_input, #WZN: for sparse pooling constants.KEY_SPARSE_POOLING_INPUT: sparse_pooling_input, constants.KEY_ANCHORS_INFO: anchors_info, constants.KEY_POINT_CLOUD: point_cloud, constants.KEY_GROUND_PLANE: ground_plane, constants.KEY_STEREO_CALIB_P2: stereo_calib_p2, constants.KEY_SAMPLE_NAME: sample_name, constants.KEY_SAMPLE_AUGS: sample.augs } sample_dicts.append(sample_dict) return sample_dicts
def main(): """ Visualization of anchor filtering using 3D integral images """ anchor_colour_scheme = { "Car": (0, 255, 0), # Green "Pedestrian": (255, 150, 50), # Orange "Cyclist": (150, 50, 100), # Purple "DontCare": (255, 0, 0), # Red "Anchor": (0, 0, 255), # Blue } # Create Dataset dataset = DatasetBuilder.build_kitti_dataset(DatasetBuilder.KITTI_TRAINVAL) # Options clusters, _ = dataset.get_cluster_info() sample_name = "000000" img_idx = int(sample_name) anchor_stride = [0.5, 0.5] ground_plane = obj_utils.get_road_plane(img_idx, dataset.planes_dir) anchor_3d_generator = grid_anchor_3d_generator.GridAnchor3dGenerator( anchor_3d_sizes=clusters, anchor_stride=anchor_stride) area_extents = np.array([[-40, 40], [-5, 3], [0, 70]]) # Generate anchors in box_3d format start_time = time.time() anchor_boxes_3d = anchor_3d_generator.generate(area_3d=area_extents, ground_plane=ground_plane) end_time = time.time() print("Anchors generated in {} s".format(end_time - start_time)) point_cloud = obj_utils.get_lidar_point_cloud(img_idx, dataset.calib_dir, dataset.velo_dir) offset_dist = 2.0 # Filter points within certain xyz range and offset from ground plane offset_filter = obj_utils.get_point_filter(point_cloud, area_extents, ground_plane, offset_dist) # Filter points within 0.2m of the road plane road_filter = obj_utils.get_point_filter(point_cloud, area_extents, ground_plane, 0.1) slice_filter = np.logical_xor(offset_filter, road_filter) point_cloud = point_cloud.T[slice_filter] # Generate Voxel Grid vx_grid_3d = voxel_grid.VoxelGrid() vx_grid_3d.voxelize(point_cloud, 0.1, area_extents) # Anchors in anchor format all_anchors = box_3d_encoder.box_3d_to_anchor(anchor_boxes_3d) # Filter the boxes here! start_time = time.time() empty_filter = \ anchor_filter.get_empty_anchor_filter(anchors=all_anchors, voxel_grid_3d=vx_grid_3d, density_threshold=1) anchor_boxes_3d = anchor_boxes_3d[empty_filter] end_time = time.time() print("Anchors filtered in {} s".format(end_time - start_time)) # Visualize GT boxes # Grab ground truth ground_truth_list = obj_utils.read_labels(dataset.label_dir, img_idx) # ---------- # Test Sample extraction # Visualize from here vis_utils.visualization(dataset.rgb_image_dir, img_idx) plt.show(block=False) image_path = dataset.get_rgb_image_path(sample_name) image_shape = np.array(Image.open(image_path)).shape rgb_boxes, rgb_normalized_boxes = \ anchor_projector.project_to_image_space(all_anchors, dataset, image_shape, img_idx) # Overlay boxes on images anchor_objects = [] for anchor_idx in range(len(anchor_boxes_3d)): anchor_box_3d = anchor_boxes_3d[anchor_idx] obj_label = box_3d_encoder.box_3d_to_object_label( anchor_box_3d, 'Anchor') # Append to a list for visualization in VTK later anchor_objects.append(obj_label) for idx in range(len(ground_truth_list)): ground_truth_obj = ground_truth_list[idx] # Append to a list for visualization in VTK later anchor_objects.append(ground_truth_obj) # Create VtkAxes axes = vtk.vtkAxesActor() axes.SetTotalLength(5, 5, 5) # Create VtkBoxes for boxes vtk_boxes = VtkBoxes() vtk_boxes.set_objects(anchor_objects, anchor_colour_scheme) vtk_point_cloud = VtkPointCloud() vtk_point_cloud.set_points(point_cloud) vtk_voxel_grid = VtkVoxelGrid() vtk_voxel_grid.set_voxels(vx_grid_3d) # Create Voxel Grid Renderer in bottom half vtk_renderer = vtk.vtkRenderer() vtk_renderer.AddActor(vtk_boxes.vtk_actor) # vtk_renderer.AddActor(vtk_point_cloud.vtk_actor) vtk_renderer.AddActor(vtk_voxel_grid.vtk_actor) vtk_renderer.AddActor(axes) vtk_renderer.SetBackground(0.2, 0.3, 0.4) # Setup Camera current_cam = vtk_renderer.GetActiveCamera() current_cam.Pitch(170.0) current_cam.Roll(180.0) # Zooms out to fit all points on screen vtk_renderer.ResetCamera() # Zoom in slightly current_cam.Zoom(2.5) # Reset the clipping range to show all points vtk_renderer.ResetCameraClippingRange() # Setup Render Window vtk_render_window = vtk.vtkRenderWindow() vtk_render_window.SetWindowName("Anchors") vtk_render_window.SetSize(900, 500) vtk_render_window.AddRenderer(vtk_renderer) # Setup custom interactor style, which handles mouse and key events vtk_render_window_interactor = vtk.vtkRenderWindowInteractor() vtk_render_window_interactor.SetRenderWindow(vtk_render_window) vtk_render_window_interactor.SetInteractorStyle( vtk.vtkInteractorStyleTrackballCamera()) # Render in VTK vtk_render_window.Render() vtk_render_window_interactor.Start() # Blocking
def load_samples(self, indices, sin_type=None, sin_level=None, sin_input_name=None, gen_all_sin_inputs=False, list_mask_2d=None): """ Loads input-output data for a set of samples. Should only be called when a particular sample dict is required. Otherwise, samples should be provided by the next_batch function Args: indices: A list of sample indices from the dataset.sample_list to be loaded Return: samples: a list of data sample dicts """ sample_dicts = [] for idx, sample_idx in enumerate(indices): sample = self.sample_list[sample_idx] sample_name = sample.name if list_mask_2d: mask_2d = list_mask_2d[idx] else: mask_2d = None # Only read labels if they exist if self.has_labels: # Read mini batch first to see if it is empty anchors_info = self.get_anchors_info(sample_name) if (not anchors_info) and self.train_val_test == 'train' \ and (not self.train_on_all_samples): empty_sample_dict = { constants.KEY_SAMPLE_NAME: sample_name, constants.KEY_ANCHORS_INFO: anchors_info } return [empty_sample_dict] obj_labels = obj_utils.read_labels(self.label_dir, int(sample_name)) # Only use objects that match dataset classes obj_labels = self.kitti_utils.filter_labels(obj_labels) else: obj_labels = None anchors_info = [] label_anchors = np.zeros((1, 6)) label_boxes_3d = np.zeros((1, 7)) label_classes = np.zeros(1) img_idx = int(sample_name) # Load image (BGR -> RGB) cv_bgr_image = cv2.imread(self.get_rgb_image_path(sample_name)) rgb_image = cv_bgr_image[..., ::-1] image_shape = rgb_image.shape[0:2] image_input = rgb_image # Get ground plane ground_plane = obj_utils.get_road_plane(int(sample_name), self.planes_dir) # Get calibration stereo_calib_p2 = calib_utils.read_calibration( self.calib_dir, int(sample_name)).p2 # Read lidar with subsampling (handled before other preprocessing) if (sin_type == 'lowres') and (sin_input_name == 'lidar'): stride_sub = get_stride_sub(sin_level) point_cloud = get_point_cloud_sub(img_idx, self.calib_dir, self.velo_dir, image_shape, stride_sub) elif (sin_type == 'lowres') and gen_all_sin_inputs: stride_sub = get_stride_sub(sin_level) point_cloud = get_point_cloud_sub(img_idx, self.calib_dir, self.velo_dir, image_shape, stride_sub) else: point_cloud = self.kitti_utils.get_point_cloud( self.bev_source, img_idx, image_shape) # Augmentation (Flipping) if kitti_aug.AUG_FLIPPING in sample.augs: image_input = kitti_aug.flip_image(image_input) point_cloud = kitti_aug.flip_point_cloud(point_cloud) obj_labels = [ kitti_aug.flip_label_in_3d_only(obj) for obj in obj_labels ] ground_plane = kitti_aug.flip_ground_plane(ground_plane) stereo_calib_p2 = kitti_aug.flip_stereo_calib_p2( stereo_calib_p2, image_shape) # Augmentation (Image Jitter) if kitti_aug.AUG_PCA_JITTER in sample.augs: image_input[:, :, 0:3] = kitti_aug.apply_pca_jitter(image_input[:, :, 0:3]) # Add Single Input Noise if (sin_input_name in SINFields.SIN_INPUT_NAMES) and ( sin_type in SINFields.VALID_SIN_TYPES): image_input, point_cloud = genSINtoInputs( image_input, point_cloud, sin_type=sin_type, sin_level=sin_level, sin_input_name=sin_input_name, mask_2d=mask_2d, frame_calib_p2=stereo_calib_p2) # Add Input Noise to all if gen_all_sin_inputs: image_input, point_cloud = genSINtoAllInputs( image_input, point_cloud, sin_type=sin_type, sin_level=sin_level, mask_2d=mask_2d, frame_calib_p2=stereo_calib_p2) if obj_labels is not None: label_boxes_3d = np.asarray([ box_3d_encoder.object_label_to_box_3d(obj_label) for obj_label in obj_labels ]) label_classes = [ self.kitti_utils.class_str_to_index(obj_label.type) for obj_label in obj_labels ] label_classes = np.asarray(label_classes, dtype=np.int32) # Return empty anchors_info if no ground truth after filtering if len(label_boxes_3d) == 0: anchors_info = [] if self.train_on_all_samples: # If training without any positive labels, we cannot # set these to zeros, because later on the offset calc # uses log on these anchors. So setting any arbitrary # number here that does not break the offset calculation # should work, since the negative samples won't be # regressed in any case. dummy_anchors = [[-1000, -1000, -1000, 1, 1, 1]] label_anchors = np.asarray(dummy_anchors) dummy_boxes = [[-1000, -1000, -1000, 1, 1, 1, 0]] label_boxes_3d = np.asarray(dummy_boxes) else: label_anchors = np.zeros((1, 6)) label_boxes_3d = np.zeros((1, 7)) label_classes = np.zeros(1) else: label_anchors = box_3d_encoder.box_3d_to_anchor( label_boxes_3d, ortho_rotate=True) # Create BEV maps bev_images = self.kitti_utils.create_bev_maps( point_cloud, ground_plane) height_maps = bev_images.get('height_maps') density_map = bev_images.get('density_map') bev_input = np.dstack((*height_maps, density_map)) sample_dict = { constants.KEY_LABEL_BOXES_3D: label_boxes_3d, constants.KEY_LABEL_ANCHORS: label_anchors, constants.KEY_LABEL_CLASSES: label_classes, constants.KEY_IMAGE_INPUT: image_input, constants.KEY_BEV_INPUT: bev_input, constants.KEY_ANCHORS_INFO: anchors_info, constants.KEY_POINT_CLOUD: point_cloud, constants.KEY_GROUND_PLANE: ground_plane, constants.KEY_STEREO_CALIB_P2: stereo_calib_p2, constants.KEY_SAMPLE_NAME: sample_name, constants.KEY_SAMPLE_AUGS: sample.augs } sample_dicts.append(sample_dict) return sample_dicts
def main(): """Flip RPN Mini Batch Visualization of the mini batch anchors for RpnModel training. Keys: F1: Toggle mini batch anchors F2: Flipped """ anchor_colour_scheme = { "Car": (255, 0, 0), # Red "Pedestrian": (255, 150, 50), # Orange "Cyclist": (150, 50, 100), # Purple "DontCare": (255, 255, 255), # White "Anchor": (150, 150, 150), # Gray "Regressed Anchor": (255, 255, 0), # Yellow "Positive": (0, 255, 255), # Teal "Negative": (255, 0, 255) # Purple } dataset_config_path = mlod.root_dir() + \ '/configs/mb_rpn_demo_cars.config' # dataset_config_path = mlod.root_dir() + \ # '/configs/mb_rpn_demo_people.config' ############################## # Options ############################## # # # Random sample # # # sample_name = None # # # Cars # # # # sample_name = "000001" # sample_name = "000050" # sample_name = "000104" # sample_name = "000112" # sample_name = "000169" # sample_name = "000191" sample_name = "003801" # # # Pedestrians # # # # sample_name = "000000" # sample_name = "000011" # sample_name = "000015" # sample_name = "000028" # sample_name = "000035" # sample_name = "000134" # sample_name = "000167" # sample_name = '000379' # sample_name = '000381' # sample_name = '000397' # sample_name = '000398' # sample_name = '000401' # sample_name = '000407' # sample_name = '000486' # sample_name = '000509' # # Cyclists # # # # sample_name = '000122' # sample_name = '000448' # # # Multiple classes # # # # sample_name = "000764" ############################## # End of Options ############################## # Create Dataset dataset = DatasetBuilder.load_dataset_from_config(dataset_config_path) # Random sample if sample_name is None: sample_idx = np.random.randint(0, dataset.num_samples) sample_name = dataset.sample_list[sample_idx] anchor_strides = dataset.kitti_utils.anchor_strides img_idx = int(sample_name) print("Showing mini batch for sample {}".format(sample_name)) image = cv2.imread(dataset.get_rgb_image_path(sample_name)) image_shape = [image.shape[1], image.shape[0]] # KittiUtils class dataset_utils = dataset.kitti_utils ground_plane = obj_utils.get_road_plane(img_idx, dataset.planes_dir) point_cloud = obj_utils.get_depth_map_point_cloud(img_idx, dataset.calib_dir, dataset.depth_dir, image_shape) points = point_cloud.T # Grab ground truth ground_truth_list = obj_utils.read_labels(dataset.label_dir, img_idx) ground_truth_list = dataset_utils.filter_labels(ground_truth_list) stereo_calib_p2 = calib_utils.read_calibration(dataset.calib_dir, img_idx).p2 ############################## # Flip sample info ############################## start_time = time.time() flipped_image = kitti_aug.flip_image(image) flipped_point_cloud = kitti_aug.flip_point_cloud(point_cloud) flipped_gt_list = [ kitti_aug.flip_label_in_3d_only(obj) for obj in ground_truth_list ] flipped_ground_plane = kitti_aug.flip_ground_plane(ground_plane) flipped_calib_p2 = kitti_aug.flip_stereo_calib_p2(stereo_calib_p2, image_shape) print('flip sample', time.time() - start_time) flipped_points = flipped_point_cloud.T point_colours = vis_utils.project_img_to_point_cloud( points, image, dataset.calib_dir, img_idx) ############################## # Generate anchors ############################## clusters, _ = dataset.get_cluster_info() anchor_generator = grid_anchor_3d_generator.GridAnchor3dGenerator() # Read mini batch info anchors_info = dataset_utils.get_anchors_info(sample_name) all_anchor_boxes_3d = [] all_ious = [] all_offsets = [] for class_idx in range(len(dataset.classes)): anchor_boxes_3d = anchor_generator.generate( area_3d=dataset.kitti_utils.area_extents, anchor_3d_sizes=clusters[class_idx], anchor_stride=anchor_strides[class_idx], ground_plane=ground_plane) if len(anchors_info[class_idx]) > 0: indices, ious, offsets, classes = anchors_info[class_idx] # Get non empty anchors from the indices non_empty_anchor_boxes_3d = anchor_boxes_3d[indices] all_anchor_boxes_3d.extend(non_empty_anchor_boxes_3d) all_ious.extend(ious) all_offsets.extend(offsets) if not len(all_anchor_boxes_3d) > 0: # Exit early if anchors_info is empty print("No anchors, Please try a different sample") return # Convert to ndarrays all_anchor_boxes_3d = np.asarray(all_anchor_boxes_3d) all_ious = np.asarray(all_ious) all_offsets = np.asarray(all_offsets) ############################## # Flip anchors ############################## start_time = time.time() # Flip anchors and offsets flipped_anchor_boxes_3d = kitti_aug.flip_boxes_3d(all_anchor_boxes_3d, flip_ry=False) all_offsets[:, 0] = -all_offsets[:, 0] print('flip anchors and offsets', time.time() - start_time) # Overwrite with flipped things all_anchor_boxes_3d = flipped_anchor_boxes_3d points = flipped_points ground_truth_list = flipped_gt_list ground_plane = flipped_ground_plane ############################## # Mini batch sampling ############################## # Sample an RPN mini batch from the non empty anchors mini_batch_utils = dataset.kitti_utils.mini_batch_utils mb_mask_tf, _ = mini_batch_utils.sample_rpn_mini_batch(all_ious) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) mb_mask = sess.run(mb_mask_tf) mb_anchor_boxes_3d = all_anchor_boxes_3d[mb_mask] mb_anchor_ious = all_ious[mb_mask] mb_anchor_offsets = all_offsets[mb_mask] # ObjectLabel list that hold all boxes to visualize obj_list = [] # Convert the mini_batch anchors to object list for i in range(len(mb_anchor_boxes_3d)): if mb_anchor_ious[i] > mini_batch_utils.rpn_pos_iou_range[0]: obj_type = "Positive" else: obj_type = "Negative" obj = box_3d_encoder.box_3d_to_object_label(mb_anchor_boxes_3d[i], obj_type) obj_list.append(obj) # Convert all non-empty anchors to object list non_empty_anchor_objs = \ [box_3d_encoder.box_3d_to_object_label( anchor_box_3d, obj_type='Anchor') for anchor_box_3d in all_anchor_boxes_3d] ############################## # Regress Positive Anchors ############################## # Convert anchor_boxes_3d to anchors and apply offsets mb_pos_mask = mb_anchor_ious > mini_batch_utils.rpn_pos_iou_range[0] mb_pos_anchor_boxes_3d = mb_anchor_boxes_3d[mb_pos_mask] mb_pos_anchor_offsets = mb_anchor_offsets[mb_pos_mask] mb_pos_anchors = box_3d_encoder.box_3d_to_anchor(mb_pos_anchor_boxes_3d) regressed_pos_anchors = anchor_encoder.offset_to_anchor( mb_pos_anchors, mb_pos_anchor_offsets) # Convert regressed anchors to ObjectLabels for visualization regressed_anchor_boxes_3d = box_3d_encoder.anchors_to_box_3d( regressed_pos_anchors, fix_lw=True) regressed_anchor_objs = \ [box_3d_encoder.box_3d_to_object_label( box_3d, obj_type='Regressed Anchor') for box_3d in regressed_anchor_boxes_3d] ############################## # Visualization ############################## cv2.imshow('{} flipped'.format(sample_name), flipped_image) cv2.waitKey() # Create VtkAxes axes = vtk.vtkAxesActor() axes.SetTotalLength(5, 5, 5) # Create VtkBoxes for mini batch anchors vtk_pos_anchor_boxes = VtkBoxes() vtk_pos_anchor_boxes.set_objects(obj_list, anchor_colour_scheme) # VtkBoxes for non empty anchors vtk_non_empty_anchors = VtkBoxes() vtk_non_empty_anchors.set_objects(non_empty_anchor_objs, anchor_colour_scheme) vtk_non_empty_anchors.set_line_width(0.1) # VtkBoxes for regressed anchors vtk_regressed_anchors = VtkBoxes() vtk_regressed_anchors.set_objects(regressed_anchor_objs, anchor_colour_scheme) vtk_regressed_anchors.set_line_width(5.0) # Create VtkBoxes for ground truth vtk_gt_boxes = VtkBoxes() vtk_gt_boxes.set_objects(ground_truth_list, anchor_colour_scheme, show_orientations=True) vtk_point_cloud = VtkPointCloud() vtk_point_cloud.set_points(points, point_colours) vtk_ground_plane = VtkGroundPlane() vtk_ground_plane.set_plane(ground_plane, dataset.kitti_utils.bev_extents) # Create Voxel Grid Renderer in bottom half vtk_renderer = vtk.vtkRenderer() vtk_renderer.AddActor(vtk_point_cloud.vtk_actor) vtk_renderer.AddActor(vtk_non_empty_anchors.vtk_actor) vtk_renderer.AddActor(vtk_pos_anchor_boxes.vtk_actor) vtk_renderer.AddActor(vtk_regressed_anchors.vtk_actor) vtk_renderer.AddActor(vtk_gt_boxes.vtk_actor) vtk_renderer.AddActor(vtk_ground_plane.vtk_actor) vtk_renderer.AddActor(axes) vtk_renderer.SetBackground(0.2, 0.3, 0.4) # Setup Camera current_cam = vtk_renderer.GetActiveCamera() current_cam.Pitch(160.0) current_cam.Roll(180.0) # Zooms out to fit all points on screen vtk_renderer.ResetCamera() # Zoom in slightly current_cam.Zoom(2.5) # Reset the clipping range to show all points vtk_renderer.ResetCameraClippingRange() # Setup Render Window vtk_render_window = vtk.vtkRenderWindow() vtk_render_window.SetWindowName("RPN Mini Batch") vtk_render_window.SetSize(900, 500) vtk_render_window.AddRenderer(vtk_renderer) # Setup custom interactor style, which handles mouse and key events vtk_render_window_interactor = vtk.vtkRenderWindowInteractor() vtk_render_window_interactor.SetRenderWindow(vtk_render_window) vtk_render_window_interactor.SetInteractorStyle( vis_utils.ToggleActorsInteractorStyle([ vtk_non_empty_anchors.vtk_actor, vtk_pos_anchor_boxes.vtk_actor, vtk_regressed_anchors.vtk_actor, vtk_ground_plane.vtk_actor, ])) # Render in VTK vtk_render_window.Render() vtk_render_window_interactor.Start()
def main(): """Shows a flipped sample in 3D """ # Create Dataset dataset = DatasetBuilder.build_kitti_dataset(DatasetBuilder.KITTI_TRAINVAL) ############################## # Options ############################## # sample_name = "000191" sample_name = "000104" img_idx = int(sample_name) print("Showing anchors for sample {}".format(sample_name)) ############################## # Load Sample Data ############################## ground_plane = obj_utils.get_road_plane(img_idx, dataset.planes_dir) image = cv2.imread(dataset.get_rgb_image_path(sample_name)) image_shape = [image.shape[1], image.shape[0]] # Get point cloud point_cloud = obj_utils.get_depth_map_point_cloud(img_idx, dataset.calib_dir, dataset.depth_dir, image_shape) points = np.array(point_cloud).T # Ground truth gt_labels = obj_utils.read_labels(dataset.label_dir, img_idx) # Filter ground truth gt_labels = dataset.kitti_utils.filter_labels(gt_labels) ############################## # Flip stuff ############################## image_flipped = np.fliplr(image) # Flip ground plane coeff (x) ground_plane_flipped = np.copy(ground_plane) ground_plane_flipped[0] = -ground_plane_flipped[0] # Flip 3D points points_flipped = kitti_aug.flip_points(points) # Get point cloud colours point_colours_flipped = project_flipped_img_to_point_cloud( points_flipped, image_flipped, dataset.calib_dir, img_idx) # Flip ground truth boxes gt_labels_flipped = [ kitti_aug.flip_label_in_3d_only(obj) for obj in gt_labels ] ############################## # VTK Visualization ############################## # Axes axes = vtk.vtkAxesActor() axes.SetTotalLength(5, 5, 5) # Point cloud vtk_point_cloud = VtkPointCloud() vtk_point_cloud.set_points(points_flipped, point_colours=point_colours_flipped) # # Ground Truth Boxes vtk_boxes = VtkBoxes() vtk_boxes.set_objects(gt_labels_flipped, VtkBoxes.COLOUR_SCHEME_KITTI, show_orientations=True) # Renderer vtk_renderer = vtk.vtkRenderer() vtk_renderer.SetBackground(0.2, 0.3, 0.4) # Add Actors to Rendered vtk_renderer.AddActor(axes) vtk_renderer.AddActor(vtk_point_cloud.vtk_actor) vtk_renderer.AddActor(vtk_boxes.vtk_actor) # Setup Camera current_cam = vtk_renderer.GetActiveCamera() current_cam.Pitch(170.0) current_cam.Roll(180.0) # Zooms out to fit all points on screen vtk_renderer.ResetCamera() # Zoom in slightly current_cam.Zoom(2.5) # Reset the clipping range to show all points vtk_renderer.ResetCameraClippingRange() # Setup Render Window vtk_render_window = vtk.vtkRenderWindow() vtk_render_window.SetWindowName("Anchors") vtk_render_window.SetSize(900, 500) vtk_render_window.AddRenderer(vtk_renderer) # Setup custom interactor style, which handles mouse and key events vtk_render_window_interactor = vtk.vtkRenderWindowInteractor() vtk_render_window_interactor.SetRenderWindow(vtk_render_window) vtk_render_window_interactor.SetInteractorStyle( vis_utils.ToggleActorsInteractorStyle([ vtk_point_cloud.vtk_actor, ])) # Render in VTK vtk_render_window.Render() vtk_render_window_interactor.Start()
def main(): """ Visualization for comparison of anchor filtering with 2D vs 3D integral images Keys: F1: Toggle 3D integral image filtered anchors F2: Toggle 2D integral image filtered anchors F3: Toggle 2D integral image empty anchors """ anchor_2d_colour_scheme = {"Anchor": (0, 0, 255)} # Blue anchor_3d_colour_scheme = {"Anchor": (0, 255, 0)} # Green anchor_unfiltered_colour_scheme = {"Anchor": (255, 0, 255)} # Purple # Create Dataset dataset = DatasetBuilder.build_kitti_dataset( DatasetBuilder.KITTI_TRAINVAL) sample_name = "000001" img_idx = int(sample_name) print("Showing anchors for sample {}".format(sample_name)) # Options # These clusters are from the trainval set and give more 2D anchors than 3D clusters = np.array([[3.55, 1.835, 1.525], [4.173, 1.69, 1.49]]) anchor_stride = [3.0, 3.0] ground_plane = obj_utils.get_road_plane(img_idx, dataset.planes_dir) area_extents = np.array([[-40, 40], [-5, 3], [0, 70]]) anchor_3d_generator = grid_anchor_3d_generator.GridAnchor3dGenerator() # Generate anchors start_time = time.time() anchor_boxes_3d = anchor_3d_generator.generate(area_3d=area_extents, anchor_3d_sizes=clusters, anchor_stride=anchor_stride, ground_plane=ground_plane) end_time = time.time() print("Anchors generated in {} s".format(end_time - start_time)) # Get point cloud point_cloud = obj_utils.get_stereo_point_cloud(img_idx, dataset.calib_dir, dataset.disp_dir) ground_offset_dist = 0.2 offset_dist = 2.0 # Filter points within certain xyz range and offset from ground plane # Filter points within 0.2m of the road plane slice_filter = dataset.kitti_utils.create_slice_filter(point_cloud, area_extents, ground_plane, ground_offset_dist, offset_dist) points = np.array(point_cloud).T points = points[slice_filter] anchors = box_3d_encoder.box_3d_to_anchor(anchor_boxes_3d) # Create 2D voxel grid vx_grid_2d = voxel_grid_2d.VoxelGrid2D() vx_grid_2d.voxelize_2d(points, 0.1, area_extents) # Create 3D voxel grid vx_grid_3d = voxel_grid.VoxelGrid() vx_grid_3d.voxelize(points, 0.1, area_extents) # Filter the boxes here! start_time = time.time() empty_filter_2d = anchor_filter.get_empty_anchor_filter_2d( anchors=anchors, voxel_grid_2d=vx_grid_2d, density_threshold=1) anchors_2d = anchor_boxes_3d[empty_filter_2d] end_time = time.time() print("2D Anchors filtered in {} s".format(end_time - start_time)) print("Number of 2D anchors remaining: %d" % (anchors_2d.shape[0])) unfiltered_anchors_2d = anchor_boxes_3d[np.logical_not(empty_filter_2d)] # 3D filtering start_time = time.time() empty_filter_3d = anchor_filter.get_empty_anchor_filter( anchors=anchors, voxel_grid_3d=vx_grid_3d, density_threshold=1) anchor_boxes_3d = anchor_boxes_3d[empty_filter_3d] end_time = time.time() print("3D Anchors filtered in {} s".format(end_time - start_time)) print("Number of 3D anchors remaining: %d" % (anchor_boxes_3d.shape[0])) anchor_2d_objects = [] for anchor_idx in range(len(anchors_2d)): anchor = anchors_2d[anchor_idx] obj_label = box_3d_encoder.box_3d_to_object_label(anchor, 'Anchor') # Append to a list for visualization in VTK later anchor_2d_objects.append(obj_label) anchor_3d_objects = [] for anchor_idx in range(len(anchor_boxes_3d)): anchor = anchor_boxes_3d[anchor_idx] obj_label = box_3d_encoder.box_3d_to_object_label(anchor, 'Anchor') # Append to a list for visualization in VTK later anchor_3d_objects.append(obj_label) unfiltered_anchor_objects = [] for anchor_idx in range(len(unfiltered_anchors_2d)): anchor = unfiltered_anchors_2d[anchor_idx] obj_label = box_3d_encoder.box_3d_to_object_label(anchor, 'Anchor') # Append to a list for visualization in VTK later unfiltered_anchor_objects.append(obj_label) # Create VtkAxes axes = vtk.vtkAxesActor() axes.SetTotalLength(5, 5, 5) # Create VtkBoxes for boxes vtk_2d_boxes = VtkBoxes() vtk_2d_boxes.set_objects(anchor_2d_objects, anchor_2d_colour_scheme) vtk_3d_boxes = VtkBoxes() vtk_3d_boxes.set_objects(anchor_3d_objects, anchor_3d_colour_scheme) vtk_unfiltered_boxes = VtkBoxes() vtk_unfiltered_boxes.set_objects(unfiltered_anchor_objects, anchor_unfiltered_colour_scheme) vtk_voxel_grid = VtkVoxelGrid() vtk_voxel_grid.set_voxels(vx_grid_3d) vtk_voxel_grid_2d = VtkVoxelGrid() vtk_voxel_grid_2d.set_voxels(vx_grid_2d) # Create Voxel Grid Renderer in bottom half vtk_renderer = vtk.vtkRenderer() vtk_renderer.AddActor(vtk_2d_boxes.vtk_actor) vtk_renderer.AddActor(vtk_3d_boxes.vtk_actor) vtk_renderer.AddActor(vtk_unfiltered_boxes.vtk_actor) vtk_renderer.AddActor(vtk_voxel_grid.vtk_actor) vtk_renderer.AddActor(vtk_voxel_grid_2d.vtk_actor) vtk_renderer.AddActor(axes) vtk_renderer.SetBackground(0.2, 0.3, 0.4) # Setup Camera current_cam = vtk_renderer.GetActiveCamera() current_cam.Pitch(170.0) current_cam.Roll(180.0) # Zooms out to fit all points on screen vtk_renderer.ResetCamera() # Zoom in slightly current_cam.Zoom(2.5) # Reset the clipping range to show all points vtk_renderer.ResetCameraClippingRange() # Setup Render Window vtk_render_window = vtk.vtkRenderWindow() vtk_render_window.SetWindowName("Anchors") vtk_render_window.SetSize(900, 500) vtk_render_window.AddRenderer(vtk_renderer) # Setup custom interactor style, which handles mouse and key events vtk_render_window_interactor = vtk.vtkRenderWindowInteractor() vtk_render_window_interactor.SetRenderWindow(vtk_render_window) vtk_render_window_interactor.SetInteractorStyle( vis_utils.ToggleActorsInteractorStyle([ vtk_2d_boxes.vtk_actor, vtk_3d_boxes.vtk_actor, vtk_unfiltered_boxes.vtk_actor, ])) # Render in VTK vtk_render_window.Render() vtk_render_window_interactor.Start()
def load_samples(self, indices): """ Loads input-output data for a set of samples. Should only be called when a particular sample dict is required. Otherwise, samples should be provided by the next_batch function Args: indices: A list of sample indices from the dataset.sample_list to be loaded Return: samples: a list of data sample dicts """ sample_dicts = [] for sample_idx in indices: sample = self.sample_list[sample_idx] sample_name = sample.name # Only read labels if they exist if self.has_labels: # Read mini batch first to see if it is empty anchors_info = self.get_anchors_info(sample_name) img_roi_all = self.get_img_roi_data(sample_name) #img_roi = all_img_rois[0] #img_roi_norm = all_img_rois[1] if (not anchors_info) and self.train_val_test == 'train' \ and (not self.train_on_all_samples): empty_sample_dict = { constants.KEY_SAMPLE_NAME: sample_name, constants.KEY_ANCHORS_INFO: anchors_info, # constants.KEY_IMG_ROI: img_roi, constants.KEY_IMG_ROI_ALL: img_roi_all } return [empty_sample_dict] obj_labels = obj_utils.read_labels(self.label_dir, int(sample_name)) # Only use objects that match dataset classes obj_labels = self.kitti_utils.filter_labels(obj_labels) else: obj_labels = None anchors_info = [] label_anchors = np.zeros((1, 6)) label_boxes_3d = np.zeros((1, 7)) label_classes = np.zeros(1) img_idx = int(sample_name) # Load image (BGR -> RGB) ## decoded images will have the channels # stored in B G R order. cv_bgr_image = cv2.imread(self.get_rgb_image_path(sample_name)) rgb_image = cv_bgr_image[..., ::-1] image_shape = rgb_image.shape[0:2] image_input = rgb_image # Get ground plane ground_plane = obj_utils.get_road_plane(int(sample_name), self.planes_dir) # Get calibration stereo_calib_p2 = calib_utils.read_calibration( self.calib_dir, int(sample_name)).p2 point_cloud = self.kitti_utils.get_point_cloud( self.bev_source, img_idx, image_shape) # Augmentation (Flipping) if kitti_aug.AUG_FLIPPING in sample.augs: image_input = kitti_aug.flip_image(image_input) point_cloud = kitti_aug.flip_point_cloud(point_cloud) obj_labels = [ kitti_aug.flip_label_in_3d_only(obj) for obj in obj_labels ] ground_plane = kitti_aug.flip_ground_plane(ground_plane) stereo_calib_p2 = kitti_aug.flip_stereo_calib_p2( stereo_calib_p2, image_shape) ## flipping the image rois img_roi_all = kitti_aug.flip_roi( img_roi_all[0], image_shape), kitti_aug.flip_roi_norm(img_roi_all[1]) if anchors_info: anchor_indices, anchors_ious, anchor_offsets, anchor_classes = anchors_info anchor_offsets[:, 0] = -anchor_offsets[:, 0] anchor_offsets = anchor_indices, anchors_ious, anchor_offsets, anchor_classes # Augmentation (Image Jitter) if kitti_aug.AUG_PCA_JITTER in sample.augs: image_input[:, :, 0:3] = kitti_aug.apply_pca_jitter(image_input[:, :, 0:3]) if obj_labels is not None: label_boxes_3d = np.asarray([ box_3d_encoder.object_label_to_box_3d(obj_label) for obj_label in obj_labels ]) label_boxes_2d = np.asarray([ box_2d_encoder.object_label_to_box_2d(obj_label) for obj_label in obj_labels ]) ## augmentation of dataset if kitti_aug.AUG_FLIPPING in sample.augs: label_boxes_2d = kitti_aug.flip_roi( label_boxes_2d, image_shape) label_classes = [ self.kitti_utils.class_str_to_index(obj_label.type) for obj_label in obj_labels ] label_classes = np.asarray(label_classes, dtype=np.int32) # Return empty anchors_info if no ground truth after filtering if len(label_boxes_3d) == 0: anchors_info = [] img_roi_all = [] #img_roi = [] #img_roi_norm= [] if self.train_on_all_samples: # If training without any positive labels, we cannot # set these to zeros, because later on the offset calc # uses log on these anchors. So setting any arbitrary # number here that does not break the offset calculation # should work, since the negative samples won't be # regressed in any case. dummy_anchors = [[-1000, -1000, -1000, 1, 1, 1]] label_anchors = np.asarray(dummy_anchors) dummy_boxes = [[-1000, -1000, -1000, 1, 1, 1, 0]] label_boxes_3d = np.asarray(dummy_boxes) else: label_anchors = np.zeros((1, 6)) label_boxes_3d = np.zeros((1, 7)) label_classes = np.zeros(1) else: label_anchors = box_3d_encoder.box_3d_to_anchor( label_boxes_3d, ortho_rotate=True) # Create BEV maps bev_images = self.kitti_utils.create_bev_maps( point_cloud, ground_plane) height_maps = bev_images.get('height_maps') density_map = bev_images.get('density_map') bev_input = np.dstack((*height_maps, density_map)) sample_dict = { # constants.KEY_LABEL_BOXES_3D: label_boxes_3d, constants.KEY_LABEL_BOXES_2D: label_boxes_2d, constants.KEY_LABEL_ANCHORS: label_anchors, constants.KEY_LABEL_CLASSES: label_classes, constants.KEY_IMAGE_INPUT: image_input, constants.KEY_BEV_INPUT: bev_input, constants.KEY_ANCHORS_INFO: anchors_info, constants.KEY_IMG_ROI_ALL: img_roi_all, #constants.KEY_IMG_ROI: img_roi, #constants.KEY_IMG_ROI_NORM:img_roi_norm, constants.KEY_POINT_CLOUD: point_cloud, constants.KEY_GROUND_PLANE: ground_plane, constants.KEY_STEREO_CALIB_P2: stereo_calib_p2, constants.KEY_SAMPLE_NAME: sample_name, constants.KEY_SAMPLE_AUGS: sample.augs } sample_dicts.append(sample_dict) return sample_dicts
def main(): """This demo visualizes box 8C format predicted by MLOD, before getting converted to Box 3D. Keys: F1: Toggle predictions F2: Toggle easy ground truth objects (Green) F3: Toggle medium ground truth objects (Orange) F4: Toggle hard ground truth objects (Red) F5: Toggle all ground truth objects (default off) F6: Toggle 3D voxel grid F7: Toggle point cloud """ ############################## # Options ############################## mlod_score_threshold = 0.1 show_orientations = True checkpoint_name = 'mlod_exp_example' global_step = None global_step = 100000 sample_name = None # # # Cars # # # sample_name = '000050' # sample_name = '000104' # sample_name = '000169' # sample_name = '000175' # sample_name = '000191' # sample_name = '000335' # sample_name = '000360' # sample_name = '001783' # sample_name = '001820' # sample_name = '006338' # # # People # # # # val_half split # sample_name = '000001' # Hard, 1 far cyc # sample_name = '000005' # Easy, 1 ped # sample_name = '000122' # Easy, 1 cyc # sample_name = '000134' # Hard, lots of people # sample_name = '000167' # Medium, 1 ped, 2 cycs # sample_name = '000187' # Medium, 1 ped on left # sample_name = '000381' # Easy, 1 ped # sample_name = '000398' # Easy, 1 ped # sample_name = '000401' # Hard, obscured peds # sample_name = '000407' # Easy, 1 ped # sample_name = '000448' # Hard, several far people # sample_name = '000486' # Hard 2 obscured peds # sample_name = '000509' # Easy, 1 ped # sample_name = '000718' # Hard, lots of people # sample_name = '002216' # Easy, 1 cyc dataset_config = DatasetBuilder.copy_config(DatasetBuilder.KITTI_VAL_HALF) dataset = DatasetBuilder.build_kitti_dataset(dataset_config) ############################## # Setup Paths ############################## # Random sample if sample_name is None: sample_idx = np.random.randint(0, dataset.num_samples) sample_name = dataset.sample_list[sample_idx] img_idx = int(sample_name) # Text files directory prediction_boxes_3d_dir = mlod.root_dir() + \ '/data/outputs/' + checkpoint_name + '/predictions' + \ '/final_predictions_and_scores/' + dataset.data_split prediction_boxes_4c_dir = mlod.root_dir() + \ '/data/outputs/' + checkpoint_name + '/predictions' + \ '/final_boxes_4c_and_scores/' + dataset.data_split # Get checkpoint step steps = os.listdir(prediction_boxes_3d_dir) steps.sort(key=int) print('Available steps: {}'.format(steps)) # Use latest checkpoint if no index provided if global_step is None: global_step = steps[-1] ############################## # Load Predictions ############################## # Load prediction boxes_3d from files prediction_boxes_3d_and_scores = np.loadtxt( prediction_boxes_3d_dir + "/{}/{}.txt".format(global_step, sample_name)) pred_boxes_3d = prediction_boxes_3d_and_scores[:, 0:7] # pred_boxes_3d_scores = prediction_boxes_3d_and_scores[:, 8] # Load prediction boxes_4c prediction_boxes_4c_and_scores = np.loadtxt( prediction_boxes_4c_dir + "/{}/{}.txt".format(global_step, sample_name)) pred_boxes_4c = prediction_boxes_4c_and_scores[:, 0:10] pred_boxes_4c_scores = prediction_boxes_4c_and_scores[:, 10] # Filter by score score_mask = pred_boxes_4c_scores >= mlod_score_threshold pred_boxes_3d = pred_boxes_3d[score_mask] pred_boxes_4c = pred_boxes_4c[score_mask] ground_plane = obj_utils.get_road_plane(img_idx, dataset.planes_dir) # Visualization all_vtk_boxes_4c = [] for box_4c in pred_boxes_4c: vtk_box_4c = VtkBox4c() vtk_box_4c.set_box_4c(box_4c, ground_plane) all_vtk_boxes_4c.append(vtk_box_4c) # Convert boxes_3d to ObjectLabels pred_objs = np.asarray([ box_3d_encoder.box_3d_to_object_label(box_3d, obj_type='Car') for box_3d in pred_boxes_3d ]) vtk_boxes_3d = VtkBoxes() vtk_boxes_3d.set_objects(pred_objs, VtkBoxes.COLOUR_SCHEME_KITTI, show_orientations=True) ############################## # Ground Truth ############################## if dataset.has_labels: easy_gt_objs, medium_gt_objs, \ hard_gt_objs, all_gt_objs = \ demo_utils.get_gts_based_on_difficulty(dataset, img_idx) else: easy_gt_objs = medium_gt_objs = hard_gt_objs = all_gt_objs = [] ############################## # Point Cloud ############################## image_path = dataset.get_rgb_image_path(sample_name) image = cv2.imread(image_path) img_idx = int(sample_name) points, point_colours = demo_utils.get_filtered_pc_and_colours( dataset, image, img_idx) # # Voxelize the point cloud for visualization # voxel_grid = VoxelGrid() # voxel_grid.voxelize(points, voxel_size=0.1, # create_leaf_layout=False) ############################## # Visualization ############################## # # Create VtkVoxelGrid # vtk_voxel_grid = VtkVoxelGrid() # vtk_voxel_grid.set_voxels(voxel_grid) vtk_point_cloud = VtkPointCloud() vtk_point_cloud.set_points(points, point_colours) # Create VtkAxes axes = vtk.vtkAxesActor() axes.SetTotalLength(5, 5, 5) # Create VtkBoxes for ground truth vtk_easy_gt_boxes, vtk_medium_gt_boxes, \ vtk_hard_gt_boxes, vtk_all_gt_boxes = \ demo_utils.create_gt_vtk_boxes(easy_gt_objs, medium_gt_objs, hard_gt_objs, all_gt_objs, show_orientations) # Create Voxel Grid Renderer in bottom half vtk_renderer = vtk.vtkRenderer() vtk_renderer.SetBackground(0.2, 0.3, 0.4) vtk_renderer.AddActor(vtk_point_cloud.vtk_actor) vtk_box_actors = vtk.vtkAssembly() # Prediction boxes for i in range(len(all_vtk_boxes_4c)): # Adding labels, slows down rendering vtk_renderer.AddActor(all_vtk_boxes_4c[i].vtk_text_labels.vtk_actor) vtk_box_actors.AddPart(all_vtk_boxes_4c[i].vtk_actor) vtk_renderer.AddActor(vtk_boxes_3d.vtk_actor) vtk_renderer.AddActor(vtk_point_cloud.vtk_actor) vtk_renderer.AddActor(vtk_hard_gt_boxes.vtk_actor) vtk_renderer.AddActor(vtk_medium_gt_boxes.vtk_actor) vtk_renderer.AddActor(vtk_easy_gt_boxes.vtk_actor) vtk_renderer.AddActor(vtk_all_gt_boxes.vtk_actor) vtk_renderer.AddActor(vtk_box_actors) vtk_renderer.AddActor(axes) # Set initial properties for some actors vtk_point_cloud.vtk_actor.GetProperty().SetPointSize(2) vtk_all_gt_boxes.vtk_actor.SetVisibility(0) vtk_boxes_3d.vtk_actor.SetVisibility(0) # Setup Camera current_cam = vtk_renderer.GetActiveCamera() current_cam.Pitch(160.0) current_cam.Roll(180.0) # Zooms out to fit all points on screen vtk_renderer.ResetCamera() # Zoom in slightly current_cam.Zoom(2.5) # Reset the clipping range to show all points vtk_renderer.ResetCameraClippingRange() # Setup Render Window vtk_render_window = vtk.vtkRenderWindow() vtk_render_window.SetWindowName( "Predictions: Step {}, Sample {}, Min Score {}".format( global_step, sample_name, mlod_score_threshold, )) vtk_render_window.SetSize(900, 600) vtk_render_window.AddRenderer(vtk_renderer) # Setup custom interactor style, which handles mouse and key events vtk_render_window_interactor = vtk.vtkRenderWindowInteractor() vtk_render_window_interactor.SetRenderWindow(vtk_render_window) vtk_render_window_interactor.SetInteractorStyle( vis_utils.ToggleActorsInteractorStyle([ vtk_box_actors, vtk_boxes_3d.vtk_actor, vtk_easy_gt_boxes.vtk_actor, vtk_medium_gt_boxes.vtk_actor, vtk_hard_gt_boxes.vtk_actor, vtk_all_gt_boxes.vtk_actor, vtk_point_cloud.vtk_actor, ])) vtk_render_window_interactor.Start()