def load_img_with_targets(self, index): """Load images and targets for the training and validation phase""" sample_id = int(self.sample_id_list[index]) lidarData = self.get_lidar(sample_id) objects = self.get_label(sample_id) calib = self.get_calib(sample_id) labels, noObjectLabels = kitti_bev_utils.read_labels_for_bevbox(objects) if not noObjectLabels: labels[:, 1:] = transformation.camera_to_lidar_box(labels[:, 1:], calib.V2C, calib.R0, calib.P) # convert rect cam to velo cord if self.aug_transforms is not None: lidarData, labels[:, 1:] = self.aug_transforms(lidarData, labels[:, 1:]) b = kitti_bev_utils.removePoints(lidarData, cnf.boundary) rgb_map = kitti_bev_utils.makeBVFeature(b, cnf.DISCRETIZATION, cnf.boundary) target = kitti_bev_utils.build_yolo_target(labels) img_file = os.path.join(self.image_dir, '{:06d}.png'.format(sample_id)) # on image space: targets are formatted as (box_idx, class, x, y, w, l, sin(yaw), cos(yaw)) n_target = len(target) targets = torch.zeros((n_target, 8)) if n_target > 0: targets[:, 1:] = torch.from_numpy(target) rgb_map = torch.from_numpy(rgb_map).float() if self.hflip_transform is not None: rgb_map, targets = self.hflip_transform(rgb_map, targets) return img_file, rgb_map, targets
def remove_invalid_idx(self, image_idx_list): """Discard samples which don't have current training class objects, which will not be used for training.""" sample_id_list = [] for sample_id in image_idx_list: sample_id = int(sample_id) objects = self.get_label(sample_id) calib = self.get_calib(sample_id) labels, noObjectLabels = kitti_bev_utils.read_labels_for_bevbox( objects) if not noObjectLabels: labels[:, 1:] = transformation.camera_to_lidar_box( labels[:, 1:], calib.V2C, calib.R0, calib.P) # convert rect cam to velo cord valid_list = [] for i in range(labels.shape[0]): if int(labels[i, 0]) in cnf.CLASS_NAME_TO_ID.values(): if self.check_point_cloud_range(labels[i, 1:4]): valid_list.append(labels[i, 0]) if len(valid_list) > 0: sample_id_list.append(sample_id) return sample_id_list
def load_targets(self, sample_id): """Load images and targets for the training and validation phase""" objects = self.get_label(sample_id) labels, noObjectLabels = kitti_bev_utils.read_labels_for_bevbox(objects) # on image space: targets are formatted as (class, x, y, w, l, sin(yaw), cos(yaw)) targets = kitti_bev_utils.build_yolo_target(labels) return targets
def __getitem__(self, index): sample_id = int(self.sample_id_list[index]) if self.mode in ['train', 'val']: lidarData = self.get_lidar(sample_id) objects = self.get_label(sample_id) calib = self.get_calib(sample_id) labels, noObjectLabels = kitti_bev_utils.read_labels_for_bevbox( objects) if not noObjectLabels: labels[:, 1:] = transformation.camera_to_lidar_box( labels[:, 1:], calib.V2C, calib.R0, calib.P) # convert rect cam to velo cord if self.data_aug and self.mode == 'train': lidarData, labels[:, 1:] = transformation.complex_yolo_pc_augmentation( lidarData, labels[:, 1:], True) b = kitti_bev_utils.removePoints(lidarData, cnf.boundary) rgb_map = kitti_bev_utils.makeBVFeature(b, cnf.DISCRETIZATION, cnf.boundary) target = kitti_bev_utils.build_yolo_target(labels) img_file = os.path.join(self.image_dir, '{:06d}.png'.format(sample_id)) ntargets = 0 for i, t in enumerate(target): if t.sum(0): ntargets += 1 # (box_idx, class, y, x, w, l, sin(yaw), cos(yaw)) targets = torch.zeros((ntargets, 8)) for i, t in enumerate(target): if t.sum(0): targets[i, 1:] = torch.from_numpy(t) img = torch.from_numpy(rgb_map).float() if self.data_aug: if np.random.random() < 0.5: img, targets = self.horizontal_flip(img, targets) return img_file, img, targets else: lidarData = self.get_lidar(sample_id) b = kitti_bev_utils.removePoints(lidarData, cnf.boundary) rgb_map = kitti_bev_utils.makeBVFeature(b, cnf.DISCRETIZATION, cnf.boundary) img_file = os.path.join(self.image_dir, '{:06d}.png'.format(sample_id)) return img_file, rgb_map
def load_targets(self, sample_id): """Load images and targets for the training and validation phase""" objects = self.get_label(sample_id) labels, noObjectLabels = kitti_bev_utils.read_labels_for_bevbox( objects) calib = self.get_calib(sample_id) if not noObjectLabels: labels[:, 1:] = transformation.camera_to_lidar_box( labels[:, 1:], calib.V2C, calib.R0, calib.P) # on image space: targets are formatted as (class, x, y, z, h, w, l, yaw) targets = kitti_bev_utils.build_yolo_target(labels) return targets
def predictions_to_kitti_format(img_detections, calib, img_shape_2d, img_size, RGB_Map=None): predictions = [] for detections in img_detections: if detections is None: continue # Rescale boxes to original image for x, y, z, h, w, l, im, re, *_, cls_pred in detections: predictions.append([ cls_pred, x / img_size, y / img_size, z, h / img_size, w / img_size, l / img_size, im, re ]) predictions = kitti_bev_utils.inverse_yolo_target(np.array(predictions), cnf.boundary) if predictions.shape[0]: predictions[:, 1:] = transformation.lidar_to_camera_box( predictions[:, 1:], calib.V2C, calib.R0, calib.P) objects_new = [] corners3d = [] for index, l in enumerate(predictions): if l[0] == 0: str = "Car" elif l[0] == 1: str = "Pedestrian" elif l[0] == 2: str = "Cyclist" else: str = "Ignore" line = '%s -1 -1 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0' % str obj = kitti_data_utils.Object3d(line) obj.t = l[1:4] obj.h, obj.w, obj.l = l[4:7] obj.ry = np.arctan2(math.sin(l[7]), math.cos(l[7])) _, corners_3d = kitti_data_utils.compute_box_3d(obj, calib.P) corners3d.append(corners_3d) objects_new.append(obj) if len(corners3d) > 0: corners3d = np.array(corners3d) img_boxes, _ = calib.corners3d_to_img_boxes(corners3d) img_boxes[:, 0] = np.clip(img_boxes[:, 0], 0, img_shape_2d[1] - 1) img_boxes[:, 1] = np.clip(img_boxes[:, 1], 0, img_shape_2d[0] - 1) img_boxes[:, 2] = np.clip(img_boxes[:, 2], 0, img_shape_2d[1] - 1) img_boxes[:, 3] = np.clip(img_boxes[:, 3], 0, img_shape_2d[0] - 1) img_boxes_w = img_boxes[:, 2] - img_boxes[:, 0] img_boxes_h = img_boxes[:, 3] - img_boxes[:, 1] box_valid_mask = np.logical_and(img_boxes_w < img_shape_2d[1] * 0.8, img_boxes_h < img_shape_2d[0] * 0.8) for i, obj in enumerate(objects_new): x, z, ry = obj.t[0], obj.t[2], obj.ry beta = np.arctan2(z, x) alpha = -np.sign(beta) * np.pi / 2 + beta + ry obj.alpha = alpha obj.box2d = img_boxes[i, :] if RGB_Map is not None: labels, noObjectLabels = kitti_bev_utils.read_labels_for_bevbox( objects_new) if not noObjectLabels: labels[:, 1:] = transformation.camera_to_lidar_box( labels[:, 1:], calib.V2C, calib.R0, calib.P) # convert rect cam to velo cord target = kitti_bev_utils.build_yolo_target(labels) kitti_bev_utils.draw_box_in_bev(RGB_Map, target) return objects_new