def setUpClass(cls): cls.fake_kitti_dir = tests.test_path() + "/datasets/Kitti/object" cls.dataset = generate_fake_dataset() # create generic ground plane (normal vector is straight up) cls.ground_plane = np.array([0., -1., 0., 0.]) cls.clusters = np.array([[1., 1., 1.], [2., 1., 1.]]) cls.anchor_generator = grid_anchor_3d_generator.GridAnchor3dGenerator()
def __init__(self, model_config, train_val_test, dataset): """ Args: model_config: configuration for the model train_val_test: "train", "val", or "test" dataset: the dataset that will provide samples and ground truth """ # Sets model configs (_config) super(RpnModel, self).__init__(model_config) if train_val_test not in ["train", "val", "test"]: raise ValueError('Invalid train_val_test value,' 'should be one of ["train", "val", "test"]') self._train_val_test = train_val_test self._is_training = (self._train_val_test == 'train') # Input config input_config = self._config.input_config self._bev_pixel_size = np.asarray( [input_config.bev_dims_h, input_config.bev_dims_w]) self._bev_depth = input_config.bev_depth self._img_pixel_size = np.asarray( [input_config.img_dims_h, input_config.img_dims_w]) self._img_depth = input_config.img_depth # Rpn config rpn_config = self._config.rpn_config self._proposal_roi_crop_size = \ [rpn_config.rpn_proposal_roi_crop_size] * 2 self._fusion_method = rpn_config.rpn_fusion_method if self._train_val_test in ["train", "val"]: self._nms_size = rpn_config.rpn_train_nms_size else: self._nms_size = rpn_config.rpn_test_nms_size self._nms_iou_thresh = rpn_config.rpn_nms_iou_thresh # Feature Extractor Nets self._bev_feature_extractor = \ feature_extractor_builder.get_extractor( self._config.layers_config.bev_feature_extractor) self._img_feature_extractor = \ feature_extractor_builder.get_extractor( self._config.layers_config.img_feature_extractor) # Network input placeholders self.placeholders = dict() # Inputs to network placeholders self._placeholder_inputs = dict() # Information about the current sample self.sample_info = dict() # Dataset self.dataset = dataset self.dataset.train_val_test = self._train_val_test self._area_extents = self.dataset.kitti_utils.area_extents self._bev_extents = self.dataset.kitti_utils.bev_extents self._cluster_sizes, _ = self.dataset.get_cluster_info() self._anchor_strides = self.dataset.kitti_utils.anchor_strides self._anchor_generator = \ grid_anchor_3d_generator.GridAnchor3dGenerator() self._path_drop_probabilities = self._config.path_drop_probabilities self._train_on_all_samples = self._config.train_on_all_samples self._eval_all_samples = self._config.eval_all_samples # Overwrite the dataset's variable with the config self.dataset.train_on_all_samples = self._train_on_all_samples if self._train_val_test in ["val", "test"]: # Disable path-drop, this should already be disabled inside the # evaluator, but just in case. self._path_drop_probabilities[0] = 1.0 self._path_drop_probabilities[1] = 1.0
def preprocess(self, indices): """Preprocesses anchor info and saves info to files Args: indices (int array): sample indices to process. If None, processes all samples """ # Get anchor stride for class anchor_strides = self._anchor_strides dataset = self._dataset dataset_utils = self._dataset.kitti_utils classes_name = dataset.classes_name # Make folder if it doesn't exist yet output_dir = self.mini_batch_utils.get_file_path(classes_name, anchor_strides, sample_name=None) os.makedirs(output_dir, exist_ok=True) # Get clusters for class all_clusters_sizes, _ = dataset.get_cluster_info() anchor_generator = grid_anchor_3d_generator.GridAnchor3dGenerator() # Load indices of data_split all_samples = dataset.sample_list if indices is None: indices = np.arange(len(all_samples)) num_samples = len(indices) # For each image in the dataset, save info on the anchors for sample_idx in indices: # Get image name for given cluster sample_name = all_samples[sample_idx].name img_idx = int(sample_name) # Check for existing files and skip to the next if self._check_for_existing(classes_name, anchor_strides, sample_name): print("{} / {}: Sample already preprocessed".format( sample_idx + 1, num_samples, sample_name)) continue # Get ground truth and filter based on difficulty ground_truth_list = obj_utils.read_labels(dataset.label_dir, img_idx) # Filter objects to dataset classes filtered_gt_list = dataset_utils.filter_labels(ground_truth_list) filtered_gt_list = np.asarray(filtered_gt_list) # Filtering by class has no valid ground truth, skip this image if len(filtered_gt_list) == 0: print("{} / {} No {}s for sample {} " "(Ground Truth Filter)".format( sample_idx + 1, num_samples, classes_name, sample_name)) # Output an empty file and move on to the next image. self._save_to_file(classes_name, anchor_strides, sample_name) continue # Get ground plane ground_plane = obj_utils.get_road_plane(img_idx, dataset.planes_dir) image = Image.open(dataset.get_rgb_image_path(sample_name)) image_shape = [image.size[1], image.size[0]] # Generate sliced 2D voxel grid for filtering vx_grid_2d = dataset_utils.create_sliced_voxel_grid_2d( sample_name, source=dataset.bev_source, image_shape=image_shape) # List for merging all anchors all_anchor_boxes_3d = [] # Create anchors for each class for class_idx in range(len(dataset.classes)): # Generate anchors for all classes grid_anchor_boxes_3d = anchor_generator.generate( area_3d=self._area_extents, anchor_3d_sizes=all_clusters_sizes[class_idx], anchor_stride=self._anchor_strides[class_idx], ground_plane=ground_plane) all_anchor_boxes_3d.extend(grid_anchor_boxes_3d) # Filter empty anchors all_anchor_boxes_3d = np.asarray(all_anchor_boxes_3d) anchors = box_3d_encoder.box_3d_to_anchor(all_anchor_boxes_3d) empty_anchor_filter = anchor_filter.get_empty_anchor_filter_2d( anchors, vx_grid_2d, self._density_threshold) # Calculate anchor info anchors_info = self._calculate_anchors_info( all_anchor_boxes_3d, empty_anchor_filter, filtered_gt_list) anchor_ious = anchors_info[:, self.mini_batch_utils.col_ious] valid_iou_indices = np.where(anchor_ious > 0.0)[0] print("{} / {}:" "{:>6} anchors, " "{:>6} iou > 0.0, " "for {:>3} {}(s) for sample {}".format( sample_idx + 1, num_samples, len(anchors_info), len(valid_iou_indices), len(filtered_gt_list), classes_name, sample_name )) # Save anchors info self._save_to_file(classes_name, anchor_strides, sample_name, anchors_info)
def main(): """ Visualization of 3D grid anchor generation, showing 2D projections in BEV and image space, and a 3D display of the anchors """ dataset_config = DatasetBuilder.copy_config(DatasetBuilder.KITTI_TRAIN) dataset_config.num_clusters[0] = 1 dataset = DatasetBuilder.build_kitti_dataset(dataset_config) label_cluster_utils = LabelClusterUtils(dataset) clusters, _ = label_cluster_utils.get_clusters() # Options img_idx = 1 # fake_clusters = np.array([[5, 4, 3], [6, 5, 4]]) # fake_clusters = np.array([[3, 3, 3], [4, 4, 4]]) fake_clusters = np.array([[4, 2, 3]]) fake_anchor_stride = [5.0, 5.0] ground_plane = [0, -1, 0, 1.72] anchor_3d_generator = grid_anchor_3d_generator.GridAnchor3dGenerator() area_extents = np.array([[-40, 40], [-5, 5], [0, 70]]) # Generate anchors for cars only start_time = time.time() anchor_boxes_3d = anchor_3d_generator.generate( area_3d=dataset.kitti_utils.area_extents, anchor_3d_sizes=fake_clusters, anchor_stride=fake_anchor_stride, ground_plane=ground_plane) all_anchors = box_3d_encoder.box_3d_to_anchor(anchor_boxes_3d) end_time = time.time() print("Anchors generated in {} s".format(end_time - start_time)) # Project into bev bev_boxes, bev_normalized_boxes = \ anchor_projector.project_to_bev(all_anchors, area_extents[[0, 2]]) bev_fig, (bev_axes, bev_normalized_axes) = \ plt.subplots(1, 2, figsize=(16, 7)) bev_axes.set_xlim(0, 80) bev_axes.set_ylim(70, 0) bev_normalized_axes.set_xlim(0, 1.0) bev_normalized_axes.set_ylim(1, 0.0) plt.show(block=False) for box in bev_boxes: box_w = box[2] - box[0] box_h = box[3] - box[1] rect = patches.Rectangle((box[0], box[1]), box_w, box_h, linewidth=2, edgecolor='b', facecolor='none') bev_axes.add_patch(rect) for normalized_box in bev_normalized_boxes: box_w = normalized_box[2] - normalized_box[0] box_h = normalized_box[3] - normalized_box[1] rect = patches.Rectangle((normalized_box[0], normalized_box[1]), box_w, box_h, linewidth=2, edgecolor='b', facecolor='none') bev_normalized_axes.add_patch(rect) rgb_fig, rgb_2d_axes, rgb_3d_axes = \ vis_utils.visualization(dataset.rgb_image_dir, img_idx) plt.show(block=False) image_path = dataset.get_rgb_image_path(dataset.sample_names[img_idx]) image_shape = np.array(Image.open(image_path)).shape stereo_calib_p2 = calib_utils.read_calibration(dataset.calib_dir, img_idx).p2 start_time = time.time() rgb_boxes, rgb_normalized_boxes = \ anchor_projector.project_to_image_space(all_anchors, stereo_calib_p2, image_shape) end_time = time.time() print("Anchors projected in {} s".format(end_time - start_time)) # Read the stereo calibration matrix for visualization stereo_calib = calib_utils.read_calibration(dataset.calib_dir, 0) p = stereo_calib.p2 # Overlay boxes on images for anchor_idx in range(len(anchor_boxes_3d)): anchor_box_3d = anchor_boxes_3d[anchor_idx] obj_label = box_3d_encoder.box_3d_to_object_label(anchor_box_3d) # Draw 3D boxes vis_utils.draw_box_3d(rgb_3d_axes, obj_label, p) # Draw 2D boxes rgb_box_2d = rgb_boxes[anchor_idx] box_x1 = rgb_box_2d[0] box_y1 = rgb_box_2d[1] box_w = rgb_box_2d[2] - box_x1 box_h = rgb_box_2d[3] - box_y1 rect = patches.Rectangle((box_x1, box_y1), box_w, box_h, linewidth=2, edgecolor='b', facecolor='none') rgb_2d_axes.add_patch(rect) if anchor_idx % 32 == 0: rgb_fig.canvas.draw() plt.show(block=True)