def get_stereo_point_cloud(sample_name, calib_dir, disp_dir): """ Gets the point cloud for an image calculated from the disparity map :param sample_name: sample name :param calib_dir: directory with calibration files :param disp_dir: directory with disparity images :return: (3, N) point_cloud in the form [[x,...][y,...][z,...]] """ # Read calibration info frame_calib = calib_utils.get_frame_calib(calib_dir, sample_name) stereo_calibration_info = calib_utils.get_stereo_calibration(frame_calib.p2, frame_calib.p3) # Read disparity disp = cv2.imread(disp_dir + '/{}.png'.format(sample_name), cv2.IMREAD_ANYDEPTH) disp = np.float32(disp) disp = np.divide(disp, 256) disp[disp == 0] = 0.1 # Calculate the point cloud point_cloud = calib_utils.depth_from_disparity(disp, stereo_calibration_info) return point_cloud
def main(): # Paths kitti_dir = os.path.expanduser('~/Kitti/object/') data_split_dir = 'training' image_dir = os.path.join(kitti_dir, data_split_dir) + '/image_2' label_dir = os.path.join(kitti_dir, data_split_dir) + '/label_2' calib_dir = os.path.join(kitti_dir, data_split_dir) + '/calib' sample_name = '000050' frame_calib = calib_utils.get_frame_calib(calib_dir, sample_name) cam_p = frame_calib.p2 f, axes = vis_utils.plots_from_sample_name(image_dir, sample_name, 2, 1) # Load labels obj_labels = obj_utils.read_labels(label_dir, sample_name) for obj in obj_labels: # Draw 2D and 3D boxes vis_utils.draw_obj_as_box_2d(axes[0], obj) vis_utils.draw_obj_as_box_3d(axes[1], obj, cam_p) plt.show(block=True)
def test_tf_project_pc_to_image(self): """Check that tf_project_pc_to_image matches numpy version""" dataset = DatasetBuilder.build_kitti_dataset( DatasetBuilder.KITTI_TRAINVAL) np.random.seed(12345) point_cloud_batch = np.random.rand(32, 3, 2304) frame_calib = calib_utils.get_frame_calib(dataset.calib_dir, '000050') cam_p = frame_calib.p2 exp_proj_uv = [ calib_utils.project_pc_to_image(point_cloud, cam_p) for point_cloud in point_cloud_batch ] tf_proj_uv = calib_utils.tf_project_pc_to_image( point_cloud_batch, cam_p, 32) with self.test_session() as sess: proj_uv_out = sess.run(tf_proj_uv) np.testing.assert_allclose(exp_proj_uv, proj_uv_out)
def main(): ############################## # Options ############################## point_cloud_source = 'depth_2_multiscale' samples_to_use = None # all samples dataset = DatasetBuilder.build_kitti_dataset(DatasetBuilder.KITTI_TRAINVAL) out_instance_dir = 'outputs/instance_2_{}'.format(point_cloud_source) required_classes = [ 'Car', 'Pedestrian', 'Cyclist', 'Van', 'Truck', 'Person_sitting', 'Tram', 'Misc', ] ############################## # End of Options ############################## # Create instance folder os.makedirs(out_instance_dir, exist_ok=True) # Get frame ids to process if samples_to_use is None: samples_to_use = dataset.get_sample_names() # Begin instance mask generation for sample_idx, sample_name in enumerate(samples_to_use): sys.stdout.write( '\r{} / {} Generating {} instances for sample {}'.format( sample_idx, dataset.num_samples - 1, point_cloud_source, sample_name)) # Get image image = obj_utils.get_image(sample_name, dataset.image_2_dir) image_shape = image.shape[0:2] # Get calibration frame_calib = calib_utils.get_frame_calib(dataset.calib_dir, sample_name) # Get point cloud if point_cloud_source.startswith('depth'): point_cloud = obj_utils.get_depth_map_point_cloud( sample_name, frame_calib, dataset.depth_dir) elif point_cloud_source == 'velo': point_cloud = obj_utils.get_lidar_point_cloud_for_cam( sample_name, frame_calib, dataset.velo_dir, image_shape) else: raise ValueError('Invalid point cloud source', point_cloud_source) # Filter according to classes obj_labels = obj_utils.read_labels(dataset.kitti_label_dir, sample_name) obj_labels, _ = obj_utils.filter_labels_by_class( obj_labels, required_classes) # Get 2D and 3D bounding boxes from labels gt_boxes_2d = [ box_3d_encoder.object_label_to_box_2d(obj_label) for obj_label in obj_labels ] gt_boxes_3d = [ box_3d_encoder.object_label_to_box_3d(obj_label) for obj_label in obj_labels ] instance_image = np.full(image_shape, 255, dtype=np.uint8) # Start instance index at 0 and generate instance masks for all boxes inst_idx = 0 for obj_label, box_2d, box_3d in zip(obj_labels, gt_boxes_2d, gt_boxes_3d): # Apply inflation and offset to box_3d modified_box_3d = modify_box_3d(box_3d, obj_label) # Get points in 3D box box_points, mask = obj_utils.points_in_box_3d( modified_box_3d, point_cloud.T) # Get points in 2D box points_in_im = calib_utils.project_pc_to_image( box_points.T, cam_p=frame_calib.p2) mask_2d = \ (points_in_im[0] >= box_2d[1]) & \ (points_in_im[0] <= box_2d[3]) & \ (points_in_im[1] >= box_2d[0]) & \ (points_in_im[1] <= box_2d[2]) if point_cloud_source.startswith('depth'): mask_points_in_im = np.where(mask.reshape(image_shape)) mask_points_in_im = [ mask_points_in_im[0][mask_2d], mask_points_in_im[1][mask_2d] ] instance_pixels = np.asarray( [mask_points_in_im[1], mask_points_in_im[0]]) elif point_cloud_source == 'velo': # image_points = box_utils.project_to_image( # box_points.T, frame.p_left).astype(np.int32) pass # Guarantees that indices don't exceed image dimensions instance_pixels[0, :] = np.clip(instance_pixels[0, :], 0, image_shape[1] - 1) instance_pixels[1, :] = np.clip(instance_pixels[1, :], 0, image_shape[0] - 1) instance_image[instance_pixels[1, :], instance_pixels[0, :]] = np.uint8(inst_idx) inst_idx += 1 # Write image to directory cv2.imwrite(out_instance_dir + '/{}.png'.format(sample_name), instance_image, [cv2.IMWRITE_PNG_COMPRESSION, 1])
def save_predictions_box_3d_in_kitti_format(score_threshold, dataset, predictions_base_dir, predictions_box_3d_dir, predictions_box_2d_dir, global_step, project_3d_box=False): """Converts and saves predictions (box_3d) into text files required for KITTI evaluation Args: score_threshold: score threshold to filter predictions dataset: Dataset object predictions_box_3d_dir: predictions (box_3d) folder predictions_box_2d_dir: predictions (box_2d) folder predictions_base_dir: predictions base folder global_step: global step project_3d_box: Bool whether to project 3D box to image space to get 2D box """ score_threshold = round(score_threshold, 3) data_split = dataset.data_split # Output folder kitti_predictions_3d_dir = predictions_base_dir + \ '/kitti_predictions_3d/{}/{}/{}/data'.format(data_split, score_threshold, global_step) if not os.path.exists(kitti_predictions_3d_dir): os.makedirs(kitti_predictions_3d_dir) # Do conversion num_samples = dataset.num_samples num_valid_samples = 0 print('\nGlobal step:', global_step) print('Converting detections from:', predictions_box_3d_dir) print('3D Detections being saved to:', kitti_predictions_3d_dir) for sample_idx in range(num_samples): # Print progress sys.stdout.write('\rConverting {} / {}'.format(sample_idx + 1, num_samples)) sys.stdout.flush() sample_name = dataset.sample_list[sample_idx].name prediction_file = sample_name + '.txt' kitti_predictions_3d_file_path = kitti_predictions_3d_dir + '/' + prediction_file predictions_3d_file_path = predictions_box_3d_dir + '/' + prediction_file predictions_2d_file_path = predictions_box_2d_dir + '/' + prediction_file # If no predictions, skip to next file if not os.path.exists(predictions_3d_file_path): np.savetxt(kitti_predictions_3d_file_path, []) continue all_predictions_3d = np.loadtxt(predictions_3d_file_path) if len(all_predictions_3d) == 0: np.savetxt(kitti_predictions_3d_file_path, []) continue all_predictions_3d = all_predictions_3d.reshape(-1, 9) all_predictions_2d = np.loadtxt(predictions_2d_file_path).reshape( -1, 7) # # Swap l, w for predictions where w > l # swapped_indices = all_predictions[:, 4] > all_predictions[:, 3] # fixed_predictions = np.copy(all_predictions) # fixed_predictions[swapped_indices, 3] = all_predictions[ # swapped_indices, 4] # fixed_predictions[swapped_indices, 4] = all_predictions[ # swapped_indices, 3] score_filter = all_predictions_3d[:, 7] >= score_threshold all_predictions_3d = all_predictions_3d[score_filter] all_predictions_2d = all_predictions_2d[score_filter] # If no predictions, skip to next file if len(all_predictions_3d) == 0: np.savetxt(kitti_predictions_3d_file_path, []) continue # Project to image space sample_name = prediction_file.split('.')[0] # Load image for truncation image = Image.open(dataset.get_rgb_image_path(sample_name)) # TODO: Check which camera cam_p = calib_utils.get_frame_calib(dataset.calib_dir, sample_name).p2 if project_3d_box: boxes = [] image_filter = [] for i in range(len(all_predictions_3d)): box_3d = all_predictions_3d[i, 0:7] img_box = box_3d_projector.project_to_image_space( box_3d, cam_p, truncate=True, image_size=image.size) # Skip invalid boxes (outside image space) if img_box is None: image_filter.append(False) continue image_filter.append(True) boxes.append(img_box) boxes_2d = np.asarray(boxes) all_predictions_3d = all_predictions_3d[image_filter] all_predictions_2d = all_predictions_2d[image_filter] else: # Get 2D boxes from 2D predictions boxes_2d = all_predictions_2d[:, [1, 0, 3, 2]] # If no predictions, skip to next file if len(all_predictions_3d) == 0: np.savetxt(kitti_predictions_3d_file_path, []) continue num_valid_samples += 1 # To keep each value in its appropriate position, an array of zeros # (N, 16) is allocated but only values [4:16] are used kitti_predictions = np.zeros([len(all_predictions_3d), 16]) # Get object types all_pred_classes = all_predictions_3d[:, 8].astype(np.int32) obj_types = [ dataset.classes[class_idx] for class_idx in all_pred_classes ] # Truncation and Occlusion are always empty (see below) # Alpha kitti_predictions[:, 3] = all_predictions_2d[:, 4] # 2D predictions kitti_predictions[:, 4:8] = boxes_2d # 3D predictions # (l, w, h) kitti_predictions[:, 8] = all_predictions_3d[:, 5] kitti_predictions[:, 9] = all_predictions_3d[:, 4] kitti_predictions[:, 10] = all_predictions_3d[:, 3] # (x, y, z) kitti_predictions[:, 11:14] = all_predictions_3d[:, 0:3] # (ry, score) kitti_predictions[:, 14:16] = all_predictions_3d[:, 6:8] # Round detections to 3 decimal places kitti_predictions = np.round(kitti_predictions, 3) # Empty Truncation, Occlusion kitti_empty_1 = -1 * np.ones( (len(kitti_predictions), 2), dtype=np.int32) # Stack 3D predictions text kitti_text_3d = np.column_stack( [obj_types, kitti_empty_1, kitti_predictions[:, 3:16]]) # Save to text files np.savetxt(kitti_predictions_3d_file_path, kitti_text_3d, newline='\r\n', fmt='%s') print('\nNum valid:', num_valid_samples) print('Num samples:', num_samples)
def main(): """Interpolates the lidar point cloud to and saves a dense depth map of the scene. """ ############################## # Options ############################## dataset = DatasetBuilder.build_kitti_dataset(DatasetBuilder.KITTI_TRAINVAL) data_split = dataset.data_split # Fill algorithm fill_type = 'multiscale' save_depth_maps = True out_depth_map_dir = 'outputs/{}/depth_2_{}'.format(data_split, fill_type) samples_to_use = None ############################## # End of Options ############################## os.makedirs(out_depth_map_dir, exist_ok=True) # Rolling average array of times for time estimation avg_time_arr_length = 5 last_fill_times = np.repeat([1.0], avg_time_arr_length) last_total_times = np.repeat([1.0], avg_time_arr_length) if samples_to_use is None: samples_to_use = [sample.name for sample in dataset.sample_list] for sample_idx, sample_name in enumerate(samples_to_use): # Calculate average time with last n fill times avg_fill_time = np.mean(last_fill_times) avg_total_time = np.mean(last_total_times) # Print progress sys.stdout.write('\rProcessing {} / {}, Idx {}, Avg Fill Time: {:.5f}s, ' 'Avg Time: {:.5f}s, Est Time: {:.3f}s'.format( sample_idx, dataset.num_samples - 1, sample_name, avg_fill_time, avg_total_time, avg_total_time * (dataset.num_samples - sample_idx))) sys.stdout.flush() # Start timing start_total_time = time.time() # Load sample info image = obj_utils.get_image(sample_name, dataset.image_2_dir) image_shape = image.shape[0:2] frame_calib = calib_utils.get_frame_calib(dataset.calib_dir, sample_name) cam_p = frame_calib.p2 # Load point cloud point_cloud = obj_utils.get_lidar_point_cloud(sample_name, frame_calib, dataset.velo_dir) # Fill depth map if fill_type == 'multiscale': # Project point cloud to depth map projected_depths = depth_map_utils.project_depths(point_cloud, cam_p, image_shape) start_fill_time = time.time() final_depth_map, _ = ip_basic.fill_in_multiscale(projected_depths) end_fill_time = time.time() else: raise ValueError('Invalid fill algorithm') # Save depth maps if save_depth_maps: out_depth_map_path = out_depth_map_dir + '/{}.png'.format(sample_name) depth_map_utils.save_depth_map(out_depth_map_path, final_depth_map) # Stop timing end_total_time = time.time() # Update fill times last_fill_times = np.roll(last_fill_times, -1) last_fill_times[-1] = end_fill_time - start_fill_time # Update total times last_total_times = np.roll(last_total_times, -1) last_total_times[-1] = end_total_time - start_total_time
def score_boxes(dataset, sample_name, img_shape, boxes_2d, boxes_3d, valid_scores, max_depth=45.0): """Score 3D boxes based on 2D classification, depth, and fit between projected 3D box and the 2D detection Args: dataset: Dataset object sample_name: Sample name, e.g. '000050' img_shape: Image shape [h, w] boxes_2d: List of 2D boxes boxes_3d: List of 3D boxes valid_scores: List of box scores max_depth: Maximum depth, default 45m (95% of KITTI objects) """ all_new_scores = np.zeros_like(valid_scores) for pred_idx, (box_2d, box_3d) in enumerate(zip(boxes_2d, boxes_3d)): # Project 3D box to 2D [x1, y1, x2, y2] cam_p = calib_utils.get_frame_calib(dataset.calib_dir, sample_name).p2 projected_box_3d = box_3d_projector.project_to_image_space( box_3d, cam_p, truncate=True, image_size=(img_shape[1], img_shape[0])) # Change box_2d to iou format box_2d_iou_fmt = np.squeeze( box_3d_encoder.boxes_2d_to_iou_fmt([box_2d])) if projected_box_3d is None: # Truncated box new_score_box_fit = 0.1 else: # Calculate corner error height = box_2d_iou_fmt[3] - box_2d_iou_fmt[1] width = box_2d_iou_fmt[2] - box_2d_iou_fmt[0] x1_err = np.abs((box_2d_iou_fmt[0] - projected_box_3d[0]) / width) x2_err = np.abs((box_2d_iou_fmt[2] - projected_box_3d[2]) / width) y1_err = np.abs((box_2d_iou_fmt[1] - projected_box_3d[1]) / height) y2_err = np.abs((box_2d_iou_fmt[3] - projected_box_3d[3]) / height) corner_err = x1_err + x2_err + y1_err + y2_err new_score_box_fit = 1.0 - corner_err depth = box_3d[2] new_score_depth = np.clip(1.0 - (depth / max_depth), 0.1, 1.0) new_score_depth_box_fit = (new_score_depth + new_score_box_fit) / 2.0 mscnn_score = valid_scores[pred_idx] new_score = 0.95 * mscnn_score + 0.05 * new_score_depth_box_fit all_new_scores[pred_idx] = new_score return all_new_scores
def get_sample_dict(self, indices): """ Loads input-output data for a set of samples. Should only be called when a particular sample dict is required. Otherwise, samples should be provided by the next_batch function Args: indices: A list of sample indices from the dataset.sample_list to be loaded Return: samples: a list of data sample dicts """ sample_dicts = [] for sample_idx in indices: sample = self.sample_list[sample_idx] sample_name = sample.name # Load image (BGR -> RGB) bgr_image = cv2.imread(self.get_rgb_image_path(sample_name)) rgb_image = bgr_image[..., ::-1] image_shape = rgb_image.shape[0:2] image_input = rgb_image # Get calibration frame_calib = calib_utils.get_frame_calib(self.calib_dir, sample_name) cam_p = frame_calib.p2 # Only read labels if they exist if self.train_val_test in ['train', 'val']: # Read KITTI object labels kitti_obj_labels = obj_utils.read_labels( self.kitti_label_dir, sample_name) if self.use_mscnn_detections and self.train_val_test == 'val': # Read mscnn obj labels and replace the KITTI obj label box coords and scores mscnn_obj_labels = obj_utils.read_labels( self.mscnn_label_dir, sample_name) obj_labels = obj_utils.merge_kitti_and_mscnn_obj_labels( kitti_obj_labels, mscnn_obj_labels, min_iou=self.mscnn_merge_min_iou, default_score_type='distance') else: obj_labels = kitti_obj_labels num_all_objs = len(obj_labels) # Filter labels obj_labels, obj_mask = obj_utils.apply_obj_filter( obj_labels, self.obj_filter) num_objs = len(obj_labels) if num_objs < 1: sample_dicts.append(None) continue if self.use_mscnn_detections: # Get filtered original kitti_obj_labels kitti_obj_labels, kitti_obj_mask = obj_utils.apply_obj_filter( kitti_obj_labels, self.obj_filter) num_kitti_objs = len(kitti_obj_labels) if num_kitti_objs < 1: sample_dicts.append(None) continue # Load instance masks instance_image = instance_utils.get_instance_image( sample_name, self.instance_dir) instance_masks = instance_utils.get_instance_mask_list( instance_image, num_all_objs) instance_masks = instance_masks[obj_mask] if self.oversample: # Oversample to required number of boxes num_to_oversample = self.num_boxes - num_objs oversample_indices = np.random.choice(num_objs, num_to_oversample, replace=True) oversample_indices = np.hstack( [np.arange(0, num_objs), oversample_indices]) obj_labels = obj_labels[oversample_indices] instance_masks = instance_masks[oversample_indices] # Augmentation if in train mode if self.train_val_test == 'train': # Image augmentation use_image_aug = self.aug_config.use_image_aug if use_image_aug: image_input = kitti_aug.apply_image_noise(rgb_image) # Box jittering box_jitter_type = self.aug_config.box_jitter_type if box_jitter_type is None: pass elif box_jitter_type == 'oversample': # Replace oversampled boxes with jittered boxes if not self.oversample: raise ValueError( 'Must oversample object labels to use {} ' 'box jitter type'.format(box_jitter_type)) aug_labels = kitti_aug.jitter_obj_boxes_2d( obj_labels[num_objs:], 0.7, image_shape) obj_labels[num_objs:] = aug_labels elif box_jitter_type == 'oversample_gt': # Replace oversampled boxes with jittered gt boxes if not self.oversample: raise ValueError( 'Must oversample object labels to use {} ' 'box jitter type'.format(box_jitter_type)) # Get enough gt boxes to jitter gt_num_to_oversample = self.num_boxes - num_objs gt_oversample_indices = np.random.choice( num_kitti_objs, gt_num_to_oversample, replace=True) kitti_obj_labels = kitti_obj_labels[ gt_oversample_indices] aug_labels = kitti_aug.jitter_obj_boxes_2d( kitti_obj_labels, 0.7, image_shape) obj_labels[num_objs:] = aug_labels elif box_jitter_type == 'all': # Apply data augmentation on all labels obj_labels = kitti_aug.jitter_obj_boxes_2d( obj_labels, 0.7, image_shape) else: raise ValueError('Invalid box_jitter_type', box_jitter_type) # TODO: Do this some other way # Get 2D and 3D boxes label_boxes_2d = obj_utils.boxes_2d_from_obj_labels(obj_labels) label_boxes_3d = obj_utils.boxes_3d_from_obj_labels(obj_labels) label_alphas = np.asarray( [obj_label.alpha for obj_label in obj_labels], dtype=np.float32) label_alpha_bins, label_alpha_regs, label_valid_alpha_bins = \ zip(*[orientation_encoder.np_orientation_to_angle_bin( obj_label.alpha, self.num_alpha_bins, self.alpha_bin_overlap) for obj_label in obj_labels]) # Get viewing angles label_viewing_angles_2d = np.asarray([ obj_utils.get_viewing_angle_box_2d(box_2d, cam_p) for box_2d in label_boxes_2d ], dtype=np.float32) label_viewing_angles_3d = np.asarray([ obj_utils.get_viewing_angle_box_3d(box_3d, cam_p) for box_3d in label_boxes_3d ], dtype=np.float32) # Parse class indices label_class_indices = [ obj_utils.class_str_to_index(obj_label.type, self.classes) for obj_label in obj_labels ] label_class_indices = np.expand_dims(np.asarray( label_class_indices, dtype=np.int32), axis=1) label_class_strs = [obj_label.type for obj_label in obj_labels] # Get proposal z centroid offset prop_cen_z_offset_list = np.asarray([ instance_utils.get_prop_cen_z_offset(class_str) for class_str in label_class_strs ]) # Get xyz map in cam_N frame depth_map = obj_utils.get_depth_map(sample_name, self.depth_dir) # Get scores label_scores = np.asarray( [obj_label.score for obj_label in obj_labels], np.float32) # Get lwh average lwh_means = np.asarray([ obj_utils.get_mean_lwh_and_std_dev(class_str)[0] for class_str in label_class_strs ]) elif self.train_val_test == 'test': # Read object test labels obj_labels = obj_utils.read_labels(self.mscnn_label_dir, sample_name) num_objs = len(obj_labels) if num_objs < 1: sample_dicts.append(None) continue # Just filter classes obj_labels, obj_mask = obj_utils.apply_obj_filter( obj_labels, self.obj_filter) num_objs = len(obj_labels) if num_objs < 1: sample_dicts.append(None) continue # Oversample to required number of boxes num_to_oversample = self.num_boxes - num_objs oversample_indices = np.random.choice(num_objs, num_to_oversample, replace=True) oversample_indices = np.hstack( [np.arange(0, num_objs), oversample_indices]) obj_labels = obj_labels[oversample_indices] # Get 2D boxes label_boxes_2d = obj_utils.boxes_2d_from_obj_labels(obj_labels) # Get score label_scores = np.asarray( [obj_label.score for obj_label in obj_labels], np.float32) # Calculate viewing angles label_viewing_angles_2d = np.asarray([ obj_utils.get_viewing_angle_box_2d(box_2d, cam_p) for box_2d in label_boxes_2d ], dtype=np.float32) label_class_indices = [ obj_utils.class_str_to_index(obj_label.type, self.classes) for obj_label in obj_labels ] label_class_indices = np.expand_dims(np.asarray( label_class_indices, dtype=np.int32), axis=1) label_class_strs = [obj_label.type for obj_label in obj_labels] # Get lwh average lwh_means = np.asarray([ obj_utils.get_mean_lwh_and_std_dev(class_str)[0] for class_str in label_class_strs ]) # Get proposal z centroid offset prop_cen_z_offset_list = np.asarray([ instance_utils.get_prop_cen_z_offset(class_str) for class_str in label_class_strs ]) else: raise ValueError('Invalid run mode', self.train_val_test) # Common inputs for all train_val_test modes # Normalize 2D boxes label_boxes_2d_norm = label_boxes_2d / np.tile(image_shape, 2) sample_dict = { constants.SAMPLE_NUM_OBJS: num_objs, constants.SAMPLE_IMAGE_INPUT: image_input, constants.SAMPLE_CAM_P: cam_p, constants.SAMPLE_NAME: sample_name, constants.SAMPLE_LABEL_BOXES_2D_NORM: label_boxes_2d_norm, constants.SAMPLE_LABEL_BOXES_2D: label_boxes_2d, constants.SAMPLE_LABEL_SCORES: label_scores, constants.SAMPLE_LABEL_CLASS_STRS: np.expand_dims(label_class_strs, 1), constants.SAMPLE_LABEL_CLASS_INDICES: label_class_indices, constants.SAMPLE_MEAN_LWH: lwh_means, constants.SAMPLE_PROP_CEN_Z_OFFSET: prop_cen_z_offset_list, constants.SAMPLE_VIEWING_ANGLES_2D: label_viewing_angles_2d, } if self.train_val_test in ['train', 'val']: sample_dict.update({ constants.SAMPLE_LABEL_BOXES_3D: label_boxes_3d, constants.SAMPLE_ALPHAS: label_alphas, constants.SAMPLE_ALPHA_BINS: np.asarray(label_alpha_bins), constants.SAMPLE_ALPHA_REGS: np.asarray(label_alpha_regs), constants.SAMPLE_ALPHA_VALID_BINS: np.asarray(label_valid_alpha_bins), constants.SAMPLE_VIEWING_ANGLES_3D: label_viewing_angles_3d, constants.SAMPLE_INSTANCE_MASKS: instance_masks, constants.SAMPLE_DEPTH_MAP: depth_map, }) elif self.train_val_test == 'test': # No additional labels for test mode pass sample_dicts.append(sample_dict) return sample_dicts