示例#1
0
    def get_3d_info(anno, cam_calib, cam_pose):
        h, w, l = anno['dimension']
        depth = anno['location'][2]
        alpha = anno['alpha']
        xc, yc = anno['box_center']
        obj_class = anno['obj_type']

        points_cam = tu.imagetocamera(np.array([[xc, yc]]), np.array([depth]),
                                      cam_calib)

        bev_center = points_cam[0, [0, 2]]
        yaw = tu.alpha2rot_y(alpha, bev_center[0], bev_center[1])  # rad
        quat_yaw = Quaternion(axis=[0, 1, 0], radians=yaw)
        quat_cam_rot = Quaternion(matrix=cam_pose.rotation)
        quat_yaw_world = quat_cam_rot * quat_yaw

        box3d = tu.computeboxes([yaw], (h, w, l), points_cam)
        points_world = tu.cameratoworld(points_cam, cam_pose)

        output = {
            'center': bev_center,
            'loc_cam': points_cam,
            'loc_world': points_world,
            'yaw': yaw,
            'yaw_world_quat': quat_yaw_world,
            'box3d': box3d,
            'class': obj_class
        }
        return output
示例#2
0
        def get_box_obj(depth, alpha, dim, cen, cam_calib, pose) -> np.ndarray:
            objs_list = []
            roll_pitch_list = []
            for i in range(len(depth)):
                loc_cam = tu.imagetocamera(cen[i:i + 1], depth[i:i + 1],
                                           cam_calib[i])
                yaw = tu.alpha2rot_y(alpha[i:i + 1], loc_cam[:, 0:1],
                                     loc_cam[:, 2:3])
                quat_yaw = Quaternion(axis=[0, 1, 0], radians=yaw)
                quat_cam_rot = Quaternion(matrix=pose[i].rotation)
                quat_yaw_world = quat_cam_rot * quat_yaw
                if quat_yaw_world.z < 0:
                    quat_yaw_world *= -1
                roll_world, pitch_world, yaw_world = tu.quaternion_to_euler(
                    quat_yaw_world.w, quat_yaw_world.x, quat_yaw_world.y,
                    quat_yaw_world.z)
                loc_glb = tu.cameratoworld(loc_cam, pose[i])
                roll_pitch_list.append([roll_world, pitch_world])

                objs_list.append(
                    np.hstack([loc_glb,
                               np.array([[yaw_world]]),
                               dim[i:i + 1]]).flatten())
            return np.array(objs_list), np.array(roll_pitch_list)
示例#3
0
    def save_trk_txt(self,
                     outputs,
                     cfg,
                     img_meta,
                     use_3d_box_center=False,
                     adjust_center=False):
        """
        #Values    Name      Description
        ----------------------------------------------------------------------
        1   frame       Frame within the sequence where the object appearers
        1   track id    Unique tracking id of this object within this sequence
        1   type        Describes the type of object: 'Car', 'Van', 'Truck',
                        'Pedestrian', 'Person_sitting', 'Cyclist', 'Tram',
                        'Misc' or 'DontCare'
        1   truncated   Float from 0 (non-truncated) to 1 (truncated), where
                        truncated refers to the object leaving image boundaries.
                        Truncation 2 indicates an ignored object (in particular
                        in the beginning or end of a track) introduced by manual
                        labeling.
        1   occluded    Integer (0,1,2,3) indicating occlusion state:
                        0 = fully visible, 1 = partly occluded
                        2 = largely occluded, 3 = unknown
        1   alpha       Observation angle of object, ranging [-pi..pi]
        4   bbox        2D bounding box of object in the image (0-based index):
                        contains left, top, right, bottom pixel coordinates
        3   dimensions  3D object dimensions: height, width, length (in meters)
        3   location    3D object location x,y,z in camera coordinates (in meters)
        1   rotation_y  Rotation ry around Y-axis in camera coordinates [-pi..pi]
        1   score       Only for results: Float, indicating confidence in
                        detection, needed for p/r curves, higher is better.

        Args:
            outputs (dict): prediction results
            class_cfg (dict): a dict to convert class.
            img_meta (dict): image meta information.
        """
        out_folder = os.path.join(self.out, 'txts')
        os.makedirs(out_folder, exist_ok=True)
        img_info = img_meta[0]['img_info']
        vid_name = os.path.dirname(img_info['file_name']).split('/')[-1]
        txt_file = os.path.join(out_folder, '{}.txt'.format(vid_name))

        # Expand dimension of results
        n_obj_detect = len(outputs['track_results'])
        if outputs.get('depth_results', None) is not None:
            depths = outputs['depth_results'].cpu().numpy().reshape(-1, 1)
        else:
            depths = np.full((n_obj_detect, 1), -1000)
        if outputs.get('dim_results', None) is not None:
            dims = outputs['dim_results'].cpu().numpy().reshape(-1, 3)
        else:
            dims = np.full((n_obj_detect, 3), -1000)
        if outputs.get('alpha_results', None) is not None:
            alphas = outputs['alpha_results'].cpu().numpy().reshape(-1, 1)
        else:
            alphas = np.full((n_obj_detect, 1), -10)

        if outputs.get('cen_2ds_results', None) is not None:
            centers = outputs['cen_2ds_results'].cpu().numpy().reshape(-1, 2)
        else:
            centers = [None] * n_obj_detect

        lines = []
        for (trackId, bbox), depth, dim, alpha, cen in zip(
                outputs['track_results'].items(), depths, dims, alphas,
                centers):
            loc, label = bbox['bbox'], bbox['label']
            if use_3d_box_center and cen is not None:
                box_cen = cen
            else:
                box_cen = np.array([loc[0] + loc[2], loc[1] + loc[3]]) / 2
            if alpha == -10:
                roty = np.full((1, ), -10)
            else:
                roty = tu.alpha2rot_y(alpha,
                                      box_cen[0] - img_info['width'] / 2,
                                      img_info['cali'][0][0])
            if np.all(depths == -1000):
                trans = np.full((3, ), -1000)
            else:
                trans = tu.imagetocamera(box_cen[None], depth,
                                         np.array(img_info['cali'])).flatten()

            if adjust_center:
                # KITTI GT uses the bottom of the car as center (x, 0, z).
                # Prediction uses center of the bbox as center (x, y, z).
                # So we align them to the bottom center as GT does
                trans[1] += dim[0] / 2.0

            cat = ''
            for key in cfg:
                if bbox['label'] == cfg[key]:
                    cat = key.lower()
                    break

            if cat == '':
                continue

            # Create lines of results
            line = f"{img_info['index']} {trackId} {cat} {-1} {-1} " \
                   f"{alpha.item():.6f} " \
                   f"{loc[0]:.6f} {loc[1]:.6f} {loc[2]:.6f} {loc[3]:.6f} " \
                   f"{dim[0]:.6f} {dim[1]:.6f} {dim[2]:.6f} " \
                   f"{trans[0]:.6f} {trans[1]:.6f} {trans[2]:.6f} " \
                   f"{roty.item():.6f} {loc[4]:.6f}\n"
            lines.append(line)

        if txt_file in self.writed:
            mode = 'a'
        else:
            mode = 'w'
            self.writed.append(txt_file)
        if len(lines) > 0:
            with open(txt_file, mode) as f:
                f.writelines(lines)
        else:
            with open(txt_file, mode):
                pass
示例#4
0
def general_output(coco_json, outputs, img_info, use_3d_box_center, pred_id,
                   modelcats, out_path):
    if 'Nusc' in out_path:
        cats_mapping = nusc_mapping
    elif 'Waymo' in out_path:
        cats_mapping = waymo_mapping
    else:
        cats_mapping = kitti_gta_mapping

    if not ('categories' in coco_json.keys()):
        for k, v in cats_mapping.items():
            coco_json['categories'].append(dict(id=v, name=k))

    if img_info.get('is_key_frame') is not None and img_info['is_key_frame']:
        img_info['index'] = img_info['key_frame_index']

    img_info['id'] = len(coco_json['images'])
    vid_name = osp.dirname(img_info['file_name']).split('/')[-1]
    if img_info['first_frame']:
        coco_json['videos'].append(dict(id=img_info['video_id'],
                                        name=vid_name))

    # pruning img_info
    img_info.pop('filename')
    img_info.pop('type')
    coco_json['images'].append(img_info)

    # Expand dimension of results
    n_obj_detect = len(outputs['track_results'])
    if outputs.get('depth_results', None) is not None:
        depths = outputs['depth_results'].cpu().numpy().reshape(-1, 1)
    else:
        depths = np.ones([n_obj_detect, 1]) * -1000
    if outputs.get('dim_results', None) is not None:
        dims = outputs['dim_results'].cpu().numpy().reshape(-1, 3)
    else:
        dims = np.ones([n_obj_detect, 3]) * -1000
    if outputs.get('alpha_results', None) is not None:
        alphas = outputs['alpha_results'].cpu().numpy().reshape(-1, 1)
    else:
        alphas = np.ones([n_obj_detect, 1]) * -10
    if outputs.get('cen_2ds_results', None) is not None:
        centers = outputs['cen_2ds_results'].cpu().numpy().reshape(-1, 2)
    else:
        centers = [None] * n_obj_detect
    if outputs.get('depth_uncertainty_results', None) is not None:
        depths_uncertainty = outputs['depth_uncertainty_results'].cpu().numpy(
        ).reshape(-1, 1)
    else:
        depths_uncertainty = [None] * n_obj_detect

    for (trackId, bbox), depth, dim, alpha, cen, depth_uncertainty, in zip(
            outputs['track_results'].items(), depths, dims, alphas, centers,
            depths_uncertainty):
        box = bbox['bbox'].astype(float).tolist()
        cat = ''

        for key in modelcats:
            if bbox['label'] == modelcats[key]:
                cat = key.lower()
                break

        if cat == '':
            continue

        x1 = box[0]
        y1 = box[1]
        x2 = box[2]
        y2 = box[3]
        score = box[4]
        if use_3d_box_center and cen is not None:
            box_cen = cen
        else:
            box_cen = np.array([x1 + x2, y1 + y2]) / 2
        if alpha == -10:
            rot_y = -10
        else:
            rot_y = tu.alpha2rot_y(alpha, box_cen[0] - img_info['width'] / 2,
                                   img_info['cali'][0][0])
        if np.all(depths == -1000):
            trans = np.ones([1, 3]) * -1000
        else:
            trans = tu.imagetocamera(box_cen[np.newaxis], depth,
                                     np.array(img_info['cali'])).flatten()
        ann = dict(id=pred_id,
                   image_id=img_info['id'],
                   category_id=cats_mapping[cat],
                   instance_id=trackId.tolist(),
                   alpha=float(alpha),
                   roty=float(rot_y),
                   dimension=dim.astype(float).tolist(),
                   translation=trans.astype(float).tolist(),
                   is_occluded=False,
                   is_truncated=False,
                   bbox=[x1, y1, x2 - x1, y2 - y1],
                   area=(x2 - x1) * (y2 - y1),
                   center_2d=box_cen.astype(float).tolist(),
                   uncertainty=float(depth_uncertainty),
                   depth=depth.tolist(),
                   iscrowd=False,
                   ignore=False,
                   segmentation=[[x1, y1, x1, y2, x2, y2, x2, y1]],
                   score=score)
        coco_json['annotations'].append(ann)
        pred_id += 1

    return coco_json, pred_id