def get_3d_info(anno, cam_calib, cam_pose): h, w, l = anno['dimension'] depth = anno['location'][2] alpha = anno['alpha'] xc, yc = anno['box_center'] obj_class = anno['obj_type'] points_cam = tu.imagetocamera(np.array([[xc, yc]]), np.array([depth]), cam_calib) bev_center = points_cam[0, [0, 2]] yaw = tu.alpha2rot_y(alpha, bev_center[0], bev_center[1]) # rad quat_yaw = Quaternion(axis=[0, 1, 0], radians=yaw) quat_cam_rot = Quaternion(matrix=cam_pose.rotation) quat_yaw_world = quat_cam_rot * quat_yaw box3d = tu.computeboxes([yaw], (h, w, l), points_cam) points_world = tu.cameratoworld(points_cam, cam_pose) output = { 'center': bev_center, 'loc_cam': points_cam, 'loc_world': points_world, 'yaw': yaw, 'yaw_world_quat': quat_yaw_world, 'box3d': box3d, 'class': obj_class } return output
def get_box_obj(depth, alpha, dim, cen, cam_calib, pose) -> np.ndarray: objs_list = [] roll_pitch_list = [] for i in range(len(depth)): loc_cam = tu.imagetocamera(cen[i:i + 1], depth[i:i + 1], cam_calib[i]) yaw = tu.alpha2rot_y(alpha[i:i + 1], loc_cam[:, 0:1], loc_cam[:, 2:3]) quat_yaw = Quaternion(axis=[0, 1, 0], radians=yaw) quat_cam_rot = Quaternion(matrix=pose[i].rotation) quat_yaw_world = quat_cam_rot * quat_yaw if quat_yaw_world.z < 0: quat_yaw_world *= -1 roll_world, pitch_world, yaw_world = tu.quaternion_to_euler( quat_yaw_world.w, quat_yaw_world.x, quat_yaw_world.y, quat_yaw_world.z) loc_glb = tu.cameratoworld(loc_cam, pose[i]) roll_pitch_list.append([roll_world, pitch_world]) objs_list.append( np.hstack([loc_glb, np.array([[yaw_world]]), dim[i:i + 1]]).flatten()) return np.array(objs_list), np.array(roll_pitch_list)
def save_trk_txt(self, outputs, cfg, img_meta, use_3d_box_center=False, adjust_center=False): """ #Values Name Description ---------------------------------------------------------------------- 1 frame Frame within the sequence where the object appearers 1 track id Unique tracking id of this object within this sequence 1 type Describes the type of object: 'Car', 'Van', 'Truck', 'Pedestrian', 'Person_sitting', 'Cyclist', 'Tram', 'Misc' or 'DontCare' 1 truncated Float from 0 (non-truncated) to 1 (truncated), where truncated refers to the object leaving image boundaries. Truncation 2 indicates an ignored object (in particular in the beginning or end of a track) introduced by manual labeling. 1 occluded Integer (0,1,2,3) indicating occlusion state: 0 = fully visible, 1 = partly occluded 2 = largely occluded, 3 = unknown 1 alpha Observation angle of object, ranging [-pi..pi] 4 bbox 2D bounding box of object in the image (0-based index): contains left, top, right, bottom pixel coordinates 3 dimensions 3D object dimensions: height, width, length (in meters) 3 location 3D object location x,y,z in camera coordinates (in meters) 1 rotation_y Rotation ry around Y-axis in camera coordinates [-pi..pi] 1 score Only for results: Float, indicating confidence in detection, needed for p/r curves, higher is better. Args: outputs (dict): prediction results class_cfg (dict): a dict to convert class. img_meta (dict): image meta information. """ out_folder = os.path.join(self.out, 'txts') os.makedirs(out_folder, exist_ok=True) img_info = img_meta[0]['img_info'] vid_name = os.path.dirname(img_info['file_name']).split('/')[-1] txt_file = os.path.join(out_folder, '{}.txt'.format(vid_name)) # Expand dimension of results n_obj_detect = len(outputs['track_results']) if outputs.get('depth_results', None) is not None: depths = outputs['depth_results'].cpu().numpy().reshape(-1, 1) else: depths = np.full((n_obj_detect, 1), -1000) if outputs.get('dim_results', None) is not None: dims = outputs['dim_results'].cpu().numpy().reshape(-1, 3) else: dims = np.full((n_obj_detect, 3), -1000) if outputs.get('alpha_results', None) is not None: alphas = outputs['alpha_results'].cpu().numpy().reshape(-1, 1) else: alphas = np.full((n_obj_detect, 1), -10) if outputs.get('cen_2ds_results', None) is not None: centers = outputs['cen_2ds_results'].cpu().numpy().reshape(-1, 2) else: centers = [None] * n_obj_detect lines = [] for (trackId, bbox), depth, dim, alpha, cen in zip( outputs['track_results'].items(), depths, dims, alphas, centers): loc, label = bbox['bbox'], bbox['label'] if use_3d_box_center and cen is not None: box_cen = cen else: box_cen = np.array([loc[0] + loc[2], loc[1] + loc[3]]) / 2 if alpha == -10: roty = np.full((1, ), -10) else: roty = tu.alpha2rot_y(alpha, box_cen[0] - img_info['width'] / 2, img_info['cali'][0][0]) if np.all(depths == -1000): trans = np.full((3, ), -1000) else: trans = tu.imagetocamera(box_cen[None], depth, np.array(img_info['cali'])).flatten() if adjust_center: # KITTI GT uses the bottom of the car as center (x, 0, z). # Prediction uses center of the bbox as center (x, y, z). # So we align them to the bottom center as GT does trans[1] += dim[0] / 2.0 cat = '' for key in cfg: if bbox['label'] == cfg[key]: cat = key.lower() break if cat == '': continue # Create lines of results line = f"{img_info['index']} {trackId} {cat} {-1} {-1} " \ f"{alpha.item():.6f} " \ f"{loc[0]:.6f} {loc[1]:.6f} {loc[2]:.6f} {loc[3]:.6f} " \ f"{dim[0]:.6f} {dim[1]:.6f} {dim[2]:.6f} " \ f"{trans[0]:.6f} {trans[1]:.6f} {trans[2]:.6f} " \ f"{roty.item():.6f} {loc[4]:.6f}\n" lines.append(line) if txt_file in self.writed: mode = 'a' else: mode = 'w' self.writed.append(txt_file) if len(lines) > 0: with open(txt_file, mode) as f: f.writelines(lines) else: with open(txt_file, mode): pass
def general_output(coco_json, outputs, img_info, use_3d_box_center, pred_id, modelcats, out_path): if 'Nusc' in out_path: cats_mapping = nusc_mapping elif 'Waymo' in out_path: cats_mapping = waymo_mapping else: cats_mapping = kitti_gta_mapping if not ('categories' in coco_json.keys()): for k, v in cats_mapping.items(): coco_json['categories'].append(dict(id=v, name=k)) if img_info.get('is_key_frame') is not None and img_info['is_key_frame']: img_info['index'] = img_info['key_frame_index'] img_info['id'] = len(coco_json['images']) vid_name = osp.dirname(img_info['file_name']).split('/')[-1] if img_info['first_frame']: coco_json['videos'].append(dict(id=img_info['video_id'], name=vid_name)) # pruning img_info img_info.pop('filename') img_info.pop('type') coco_json['images'].append(img_info) # Expand dimension of results n_obj_detect = len(outputs['track_results']) if outputs.get('depth_results', None) is not None: depths = outputs['depth_results'].cpu().numpy().reshape(-1, 1) else: depths = np.ones([n_obj_detect, 1]) * -1000 if outputs.get('dim_results', None) is not None: dims = outputs['dim_results'].cpu().numpy().reshape(-1, 3) else: dims = np.ones([n_obj_detect, 3]) * -1000 if outputs.get('alpha_results', None) is not None: alphas = outputs['alpha_results'].cpu().numpy().reshape(-1, 1) else: alphas = np.ones([n_obj_detect, 1]) * -10 if outputs.get('cen_2ds_results', None) is not None: centers = outputs['cen_2ds_results'].cpu().numpy().reshape(-1, 2) else: centers = [None] * n_obj_detect if outputs.get('depth_uncertainty_results', None) is not None: depths_uncertainty = outputs['depth_uncertainty_results'].cpu().numpy( ).reshape(-1, 1) else: depths_uncertainty = [None] * n_obj_detect for (trackId, bbox), depth, dim, alpha, cen, depth_uncertainty, in zip( outputs['track_results'].items(), depths, dims, alphas, centers, depths_uncertainty): box = bbox['bbox'].astype(float).tolist() cat = '' for key in modelcats: if bbox['label'] == modelcats[key]: cat = key.lower() break if cat == '': continue x1 = box[0] y1 = box[1] x2 = box[2] y2 = box[3] score = box[4] if use_3d_box_center and cen is not None: box_cen = cen else: box_cen = np.array([x1 + x2, y1 + y2]) / 2 if alpha == -10: rot_y = -10 else: rot_y = tu.alpha2rot_y(alpha, box_cen[0] - img_info['width'] / 2, img_info['cali'][0][0]) if np.all(depths == -1000): trans = np.ones([1, 3]) * -1000 else: trans = tu.imagetocamera(box_cen[np.newaxis], depth, np.array(img_info['cali'])).flatten() ann = dict(id=pred_id, image_id=img_info['id'], category_id=cats_mapping[cat], instance_id=trackId.tolist(), alpha=float(alpha), roty=float(rot_y), dimension=dim.astype(float).tolist(), translation=trans.astype(float).tolist(), is_occluded=False, is_truncated=False, bbox=[x1, y1, x2 - x1, y2 - y1], area=(x2 - x1) * (y2 - y1), center_2d=box_cen.astype(float).tolist(), uncertainty=float(depth_uncertainty), depth=depth.tolist(), iscrowd=False, ignore=False, segmentation=[[x1, y1, x1, y2, x2, y2, x2, y1]], score=score) coco_json['annotations'].append(ann) pred_id += 1 return coco_json, pred_id