def simple_setup_4_cams_pinhole():
    vcd = core.VCD()
    vcd.add_coordinate_system("vehicle-iso8855",
                              cs_type=types.CoordinateSystemType.local_cs)

    # Let's build the cameras
    img_width_px = 960
    img_height_px = 604
    ################################
    # CAM_FRONT
    ################################
    # Intrinsics
    # ------------------------------
    # This matrix converts from scs to ics
    K_3x4 = np.array([[6.038e+002, 0.0, 4.67e+002, 0.0],
                      [0.0, 6.038e+002, 2.94e+002, 0.0], [0.0, 0.0, 1.0, 0.0]])
    d_1x5 = np.array([[
        -4.0569640920796241e-001, 1.9116055332155032e-001, 0., 0.,
        -4.7033609773998064e-002
    ]])

    # Extrinsics
    # ------------------------------
    # Extrinsics (1/2): Rotation using yaw(Z), pitch(Y), roll(X). This is rotation of SCS wrt LCS
    pitch_rad = (10.0 * np.pi) / 180.0
    yaw_rad = (0.0 * np.pi) / 180.0
    roll_rad = (0.0 * np.pi) / 180.0
    R_scs_wrt_lcs = utils.euler2R([yaw_rad, pitch_rad,
                                   roll_rad])  # default is ZYX
    C_lcs = np.array([
        [2.3],  # frontal part of the car
        [0.0],  # centered in the symmetry axis of the car
        [1.3]
    ])  # at some height over the ground

    P_scs_wrt_lcs = utils.create_pose(R_scs_wrt_lcs, C_lcs)

    # As explained in core.py, a pose matrix of scs wrt lcs can be also seen as the inverse transformation from lcs to scs
    T_lcs_to_scs = utils.inv(P_scs_wrt_lcs)

    # Extrinsics (2/2): Additional rotation, because usually cameras scs are (x-to-right, y-down, z-to-front)
    # to match better with ics (x-to-right, y-bottom); while the lcs is (x-to-front, y-to-left, z-up)
    T_scs_to_scsics = np.array([[0.0, -1.0, 0.0, 0.0], [0.0, 0.0, -1.0, 0.0],
                                [1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0]])

    # Now we can compose the final transform
    T_lcs_to_scs = T_scs_to_scsics.dot(T_lcs_to_scs)
    P_scs_wrt_lcs = utils.inv(T_lcs_to_scs)

    vcd.add_stream(stream_name="CAM_FRONT",
                   uri="",
                   description="Virtual camera",
                   stream_type=core.StreamType.camera)
    vcd.add_stream_properties(stream_name="CAM_FRONT",
                              intrinsics=types.IntrinsicsPinhole(
                                  width_px=img_width_px,
                                  height_px=img_height_px,
                                  camera_matrix_3x4=list(K_3x4.flatten()),
                                  distortion_coeffs_1xN=list(d_1x5.flatten())))
    vcd.add_coordinate_system("CAM_FRONT",
                              cs_type=types.CoordinateSystemType.sensor_cs,
                              parent_name="vehicle-iso8855",
                              pose_wrt_parent=list(P_scs_wrt_lcs.flatten()))

    ################################
    # CAM_REAR
    ################################
    K_3x4 = np.array([[6.038e+002, 0.0, 4.67e+002, 0.0],
                      [0.0, 6.038e+002, 2.94e+002, 0.0], [0.0, 0.0, 1.0, 0.0]])
    d_1x5 = np.array([[
        -4.0569640920796241e-001, 1.9116055332155032e-001, 0., 0.,
        -4.7033609773998064e-002
    ]])
    pitch_rad = (2.0 * np.pi) / 180.0
    yaw_rad = (180.0 * np.pi) / 180.0
    roll_rad = (0.0 * np.pi) / 180.0
    R_scs_wrt_lcs = utils.euler2R([yaw_rad, pitch_rad,
                                   roll_rad])  # default is ZYX
    C_lcs = np.array([
        [-0.725],  # rear part of the car
        [0.0],  # centered in the symmetry axis of the car
        [0.4]
    ])  # at some height over the ground
    P_scs_wrt_lcs = utils.create_pose(R_scs_wrt_lcs, C_lcs)
    T_lcs_to_scs = utils.inv(P_scs_wrt_lcs)
    T_scs_to_scsics = np.array([[0.0, -1.0, 0.0, 0.0], [0.0, 0.0, -1.0, 0.0],
                                [1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0]])
    T_lcs_to_scs = T_scs_to_scsics.dot(T_lcs_to_scs)
    P_scs_wrt_lcs = utils.inv(T_lcs_to_scs)

    vcd.add_stream(stream_name="CAM_REAR",
                   uri="",
                   description="Virtual camera",
                   stream_type=core.StreamType.camera)
    vcd.add_stream_properties(stream_name="CAM_REAR",
                              intrinsics=types.IntrinsicsPinhole(
                                  width_px=img_width_px,
                                  height_px=img_height_px,
                                  camera_matrix_3x4=list(K_3x4.flatten()),
                                  distortion_coeffs_1xN=list(d_1x5.flatten())))
    vcd.add_coordinate_system("CAM_REAR",
                              cs_type=types.CoordinateSystemType.sensor_cs,
                              parent_name="vehicle-iso8855",
                              pose_wrt_parent=list(P_scs_wrt_lcs.flatten()))

    ################################
    # CAM_LEFT
    ################################
    K_3x4 = np.array([[6.038e+002, 0.0, 4.67e+002, 0.0],
                      [0.0, 6.038e+002, 2.94e+002, 0.0], [0.0, 0.0, 1.0, 0.0]])
    d_1x5 = np.array([[
        -4.0569640920796241e-001, 1.9116055332155032e-001, 0., 0.,
        -4.7033609773998064e-002
    ]])
    pitch_rad = (2.0 * np.pi) / 180.0
    yaw_rad = (90.0 * np.pi) / 180.0
    roll_rad = (0.0 * np.pi) / 180.0
    R_scs_wrt_lcs = utils.euler2R([yaw_rad, pitch_rad,
                                   roll_rad])  # default is ZYX
    C_lcs = np.array([
        [1.2],
        [0.6],  # at the left
        [1.45]
    ])  # at the roof
    P_scs_wrt_lcs = utils.create_pose(R_scs_wrt_lcs, C_lcs)
    T_lcs_to_scs = utils.inv(P_scs_wrt_lcs)
    T_scs_to_scsics = np.array([[0.0, -1.0, 0.0, 0.0], [0.0, 0.0, -1.0, 0.0],
                                [1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0]])
    T_lcs_to_scs = T_scs_to_scsics.dot(T_lcs_to_scs)
    P_scs_wrt_lcs = utils.inv(T_lcs_to_scs)

    vcd.add_stream(stream_name="CAM_LEFT",
                   uri="",
                   description="Virtual camera",
                   stream_type=core.StreamType.camera)
    vcd.add_stream_properties(stream_name="CAM_LEFT",
                              intrinsics=types.IntrinsicsPinhole(
                                  width_px=img_width_px,
                                  height_px=img_height_px,
                                  camera_matrix_3x4=list(K_3x4.flatten()),
                                  distortion_coeffs_1xN=list(d_1x5.flatten())))
    vcd.add_coordinate_system("CAM_LEFT",
                              cs_type=types.CoordinateSystemType.sensor_cs,
                              parent_name="vehicle-iso8855",
                              pose_wrt_parent=list(P_scs_wrt_lcs.flatten()))

    ################################
    # CAM_RIGHT
    ################################
    K_3x4 = np.array([[6.038e+002, 0.0, 4.67e+002, 0.0],
                      [0.0, 6.038e+002, 2.94e+002, 0.0], [0.0, 0.0, 1.0, 0.0]])
    d_1x5 = np.array([[
        -4.0569640920796241e-001, 1.9116055332155032e-001, 0., 0.,
        -4.7033609773998064e-002
    ]])
    pitch_rad = (2.0 * np.pi) / 180.0
    yaw_rad = (-90.0 * np.pi) / 180.0
    roll_rad = (0.0 * np.pi) / 180.0
    R_scs_wrt_lcs = utils.euler2R([yaw_rad, pitch_rad,
                                   roll_rad])  # default is ZYX
    C_lcs = np.array([
        [1.2],
        [-0.6],  # at the left
        [1.45]
    ])  # at the roof
    P_scs_wrt_lcs = utils.create_pose(R_scs_wrt_lcs, C_lcs)
    T_lcs_to_scs = utils.inv(P_scs_wrt_lcs)
    T_scs_to_scsics = np.array([[0.0, -1.0, 0.0, 0.0], [0.0, 0.0, -1.0, 0.0],
                                [1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0]])
    T_lcs_to_scs = T_scs_to_scsics.dot(T_lcs_to_scs)
    P_scs_wrt_lcs = utils.inv(T_lcs_to_scs)

    vcd.add_stream(stream_name="CAM_RIGHT",
                   uri="",
                   description="Virtual camera",
                   stream_type=core.StreamType.camera)
    vcd.add_stream_properties(stream_name="CAM_RIGHT",
                              intrinsics=types.IntrinsicsPinhole(
                                  width_px=img_width_px,
                                  height_px=img_height_px,
                                  camera_matrix_3x4=list(K_3x4.flatten()),
                                  distortion_coeffs_1xN=list(d_1x5.flatten())))
    vcd.add_coordinate_system("CAM_RIGHT",
                              cs_type=types.CoordinateSystemType.sensor_cs,
                              parent_name="vehicle-iso8855",
                              pose_wrt_parent=list(P_scs_wrt_lcs.flatten()))

    return vcd
    def parse_sequence(self, seq_number):
        vcd = core.VCD()

        #########################################
        # OPEN files
        #########################################
        calib_file_name = os.path.join(self.kitti_tracking_calib_path,
                                       str(seq_number).zfill(4) + ".txt")
        oxts_file_name = os.path.join(self.kitti_tracking_oxts_path,
                                      str(seq_number).zfill(4) + ".txt")
        object_file_name = os.path.join(self.kitti_tracking_objects_path,
                                        str(seq_number).zfill(4) + '.txt')

        calib_file = open(calib_file_name, newline='')
        oxts_file = open(oxts_file_name, newline='')
        object_file = open(object_file_name, newline='')
        calib_reader = csv.reader(calib_file, delimiter=' ')
        oxts_reader = csv.reader(oxts_file, delimiter=' ')
        object_reader = csv.reader(object_file, delimiter=' ')

        #########################################
        # READ calibration matrices
        #########################################
        img_width_px = 1236
        img_height_px = 366  # these are rectified dimensions
        calib_matrices = {}
        for row in calib_reader:
            calib_matrices[row[0]] = [
                float(x) for x in row[1:] if len(x) > 0
            ]  # filter out some spaces at the end of the row

        left_camera_K3x4 = np.reshape(calib_matrices["P2:"], (3, 4))
        right_camera_K3x4 = np.reshape(calib_matrices["P3:"], (3, 4))
        camera_rectification_3x3 = np.reshape(calib_matrices["R_rect"], (3, 3))
        transform_velo_to_camleft_3x4 = np.reshape(
            calib_matrices["Tr_velo_cam"], (3, 4))  # WRT to LEFT CAMERA ONLY

        #########################################
        # LIDAR info
        #########################################
        # http://www.cvlibs.net/datasets/kitti/setup.php
        location_velo_wrt_lcs_3x1 = np.array(
            [[0.76], [0.0], [1.73]])  # according to the documentation
        # Create pose (p=[[R|C],[0001]])
        pose_velo_wrt_lcs_4x4 = utils.create_pose(
            np.array([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]),
            location_velo_wrt_lcs_3x1)
        transform_lcs_to_velo_4x4 = utils.inv(pose_velo_wrt_lcs_4x4)

        vcd.add_stream(stream_name="VELO_TOP",
                       uri="",
                       description="Velodyne roof",
                       stream_type=core.StreamType.lidar)
        vcd.add_stream_properties(
            stream_name="VELO_TOP",
            extrinsics=types.Extrinsics(
                pose_scs_wrt_lcs_4x4=list(pose_velo_wrt_lcs_4x4.flatten())))

        #########################################
        # GPS/IMU info
        #########################################
        # Let's build also the pose of the imu
        location_imu_wrt_lcs_4x4 = np.array([[-0.05], [0.32], [0.93]
                                             ])  # according to documentation
        pose_imu_wrt_lcs_4x4 = utils.create_pose(
            np.array([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]),
            location_imu_wrt_lcs_4x4)

        vcd.add_stream(stream_name="IMU",
                       uri="",
                       description="GPS/IMU",
                       stream_type=core.StreamType.other)
        vcd.add_stream_properties(
            stream_name="IMU",
            extrinsics=types.Extrinsics(
                pose_scs_wrt_lcs_4x4=list(pose_imu_wrt_lcs_4x4.flatten())))

        #########################################
        # CAMERAS
        #########################################
        # From KITTI readme.txt:
        # To project a point from Velodyne coordinates into the left color image,
        # you can use this formula: x = P2 * R0_rect * Tr_velo_to_cam * y
        # For the right color image: x = P3 * R0_rect * Tr_velo_to_cam * y
        # Note: All matrices are stored row-major, i.e., the first values correspond
        # to the first row. R0_rect contains a 3x3 matrix which you need to extend to
        # a 4x4 matrix by adding a 1 as the bottom-right element and 0's elsewhere.
        # Tr_xxx is a 3x4 matrix (R|t), which you need to extend to a 4x4 matrix
        # in the same way!

        # Virtually, cam_left and cam_right are defined as the same coordinate systems, so their scs are the same
        # But their projection matrices (3x4) include a right-most non-zero column which shifts 3d points when projected
        # into the images, that is why projecting from velodyne to left and right use the same "extrinsics", and just differ
        # in the usage of the "intrinsic" matrices P2 and P3
        # P2 and P3 might be decomposed so P2 = K2*T2 and P3=K3*T3, so T2 and T3 could host extrinsic information
        # while K2 and K3 could host the intrinsic information. This way, the pose of cam_left would be T2*R_rect*Tr_velo
        # However, such decomposition seems to be non-trivial.
        # x = P2 * R0_rect * Tr_velo_to_cam * y
        # x = P3 * R0_rect * Tr_velo_to_cam * y
        camera_rectification_4x4 = np.vstack((np.hstack(
            (camera_rectification_3x3, [[0], [0], [0]])), [0, 0, 0, 1]))
        transform_velo_to_camleft_4x4 = np.vstack(
            (transform_velo_to_camleft_3x4, [0, 0, 0, 1]))
        transform_velo_to_camleft_4x4 = np.dot(
            camera_rectification_4x4, transform_velo_to_camleft_4x4
        )  # such that X_cam = transform_velo_to_cam_4x4 * X_velo

        # The pose of cameras can't be read from documentation, as these are virtual cameras created via a rectification
        # process, therefore, we need to build them using the velo_to_cam calibration
        # Pose_camLeft_wrt_ccs = RT_camLeft_to_ccs
        transform_lcs_to_camleft_4x4 = np.dot(transform_velo_to_camleft_4x4,
                                              transform_lcs_to_velo_4x4)
        pose_camleft_wrt_lcs_4x4 = utils.inv(transform_lcs_to_camleft_4x4)
        pose_camright_wrt_lcs_4x4 = pose_camleft_wrt_lcs_4x4

        # Create cams and fill scene
        vcd.add_stream(stream_name="CAM_LEFT",
                       uri="",
                       description="Virtual Left color camera",
                       stream_type=core.StreamType.camera)
        vcd.add_stream_properties(
            stream_name="CAM_LEFT",
            intrinsics=types.IntrinsicsPinhole(width_px=img_width_px,
                                               height_px=img_height_px,
                                               camera_matrix_3x4=list(
                                                   left_camera_K3x4.flatten()),
                                               distortion_coeffs_1xN=None),
            extrinsics=types.Extrinsics(
                pose_scs_wrt_lcs_4x4=list(pose_camleft_wrt_lcs_4x4.flatten())))

        vcd.add_stream(stream_name="CAM_RIGHT",
                       uri="",
                       description="Virtual Right color camera",
                       stream_type=core.StreamType.camera)
        vcd.add_stream_properties(
            stream_name="CAM_RIGHT",
            intrinsics=types.IntrinsicsPinhole(
                width_px=img_width_px,
                height_px=img_height_px,
                camera_matrix_3x4=list(right_camera_K3x4.flatten()),
                distortion_coeffs_1xN=None),
            extrinsics=types.Extrinsics(pose_scs_wrt_lcs_4x4=list(
                pose_camright_wrt_lcs_4x4.flatten())))

        #########################################
        # ODOMETRY
        #########################################
        oxts = []
        for row in oxts_reader:
            row = row[0:len(row) - 1]
            floats = [float(i) for i in row]
            oxts.append(floats)
            '''lat_deg = row[0]  # deg
            lon_deg = row[1]
            alt_deg = row[2]
            roll_rad = row[3]  # 0 = level, positive = left side up (-pi..pi)
            pitch_rad = row[4]  # 0 = level, positive = front down (-pi/2..pi/2)
            yaw_rad = row[5]  # 0 = east,  positive = counter clockwise (-pi..pi)
            vn = row[6] # velocity towards north(m / s)
            ve = row[7] # velocity towards east(m / s)
            vf = row[8]  # forward velocity, i.e.parallel to earth - surface(m / s)
            vl = row[9] # leftward velocity, i.e.parallel to earth - surface(m / s)
            vu = row[10] # upward velocity, i.e.perpendicular to earth - surface(m / s)
            ax = row[11] # acceleration in x, i.e. in direction of vehicle front(m / s ^ 2)
            ay = row[12] # acceleration in y, i.e. in direction of vehicle left(m / s ^ 2)
            az = row[13] # acceleration in z, i.e. in direction of vehicle top(m / s ^ 2)
            af = row[14] # forward acceleration(m / s ^ 2)
            al = row[15] # leftward acceleration(m / s ^ 2)
            au = row[16] # upward acceleration(m / s ^ 2)
            wx = row[17] # angular rate around x(rad / s)
            wy = row[18] # angular rate around y(rad / s)
            wz = row[19] # angular rate around z(rad / s)
            wf = row[20] # angular rate around forward axis(rad / s)
            wl = row[21] # angular rate around leftward axis(rad / s)
            wu = row[22] # angular rate around upward axis(rad / s)
            posacc = row[23] # velocity accuracy(north / east in m)
            velacc = row[24] # velocity accuracy(north / east in m / s)
            navstat = row[25] # navigation status
            numsats = row[26] # number of satellites tracked by primary GPS receiver
            posmode = row[27] # position mode of primary GPS receiver
            velmode = row[28] # velocity mode of primary GPS receiver
            orimode = row[29] # orientation mode of primary GPS receiver
            '''

        # Convert odometry (GPS) to poses
        odometry_4x4xN = utils.convert_oxts_to_pose(oxts)
        # An odometry entry is a 4x4 pose matrix of the lcs wrt wcs
        # poses_4x4xN_lcs_wrt_wcs = odometry_4x4xN
        frames_1xN = np.arange(0, odometry_4x4xN.shape[2], 1).reshape(
            (1, odometry_4x4xN.shape[2]))
        r, c = frames_1xN.shape
        for i in range(0, c):
            vcd.add_odometry(
                int(frames_1xN[0, i]),
                types.Odometry(
                    pose_lcs_wrt_wcs_4x4=list(odometry_4x4xN[:, :,
                                                             i].flatten())))

        #########################################
        # LABELS
        #########################################
        for row in object_reader:
            frameNum = int(row[0])
            trackID = int(row[1]) + 1  # VCD can't handle negative ids

            semantic_class = row[2]
            truncated = utils.float_2dec(float(row[3]))
            occluded = int(row[4])
            alpha = utils.float_2dec(float(row[5]))

            left = utils.float_2dec(float(row[6]))
            top = utils.float_2dec(float(row[7]))
            width = utils.float_2dec(float(row[8]) - left)
            height = utils.float_2dec(float(row[9]) - top)

            bounding_box = types.bbox(name="",
                                      val=(left, top, width, height),
                                      stream='CAM_LEFT')

            dimHeight = utils.float_2dec(float(row[10]))
            dimWidth = utils.float_2dec(float(row[11]))
            dimLength = utils.float_2dec(float(row[12]))

            locX = utils.float_2dec(float(row[13]))
            locY = utils.float_2dec(float(row[14]))
            locZ = utils.float_2dec(float(row[15]))

            rotY = utils.float_2dec(float(row[16]))

            # Note KITTI uses (h, w, l, x, y, z, ry) for cuboids, in camera coordinates (X-to-right, Y-to-bottom, Z-to-front)
            # while in VCD (x,y,z, rx, ry, rz, sx, sy, sz) is defined as a dextrogire system
            # To express the cuboid in LCS (Local-Coordinate-System), we can add the pose of the camera
            # Cameras are 1.65 m height wrt ground
            # Cameras are 1.03 meters wrt to rear axle
            cam_wrt_rear_axle_z = 1.03
            cam_height = 1.65
            cuboid = types.cuboid(
                name="",
                val=(utils.float_2dec(locZ + cam_wrt_rear_axle_z),
                     utils.float_2dec(-locX),
                     utils.float_2dec(-locY + cam_height), 0, 0,
                     utils.float_2dec(rotY), utils.float_2dec(dimWidth),
                     utils.float_2dec(dimLength), utils.float_2dec(dimHeight)))
            # Note that if no "stream" parameter is given to this cuboid, LCS is assumed

            if not vcd.has(core.ElementType.object, trackID):
                vcd.add_object(name="",
                               semantic_type=semantic_class,
                               uid=trackID)

            vcd.add_object_data(trackID, bounding_box, frameNum)
            if semantic_class != "DontCare":
                vcd.add_object_data(trackID, cuboid, frameNum)
            vcd.add_object_data(trackID,
                                types.num(name="truncated", val=truncated),
                                frameNum)
            vcd.add_object_data(trackID,
                                types.num(name="occluded", val=occluded),
                                frameNum)
            vcd.add_object_data(trackID, types.num(name="alpha", val=alpha),
                                frameNum)

        # Return
        return vcd
예제 #3
0
def simple_setup_4_cams_fisheye():
    vcd = core.VCD()
    vcd.add_coordinate_system("vehicle-iso8855", cs_type=types.CoordinateSystemType.local_cs)

    # Let's build the cameras
    img_width_px = 1280
    img_height_px = 966
    cX = -0.302159995
    cY = -3.44617009
    ################################
    # CAM_FRONT
    ################################
    # Intrinsics
    # ------------------------------
    # This matrix converts from scs to ics
    d_1x4 = np.array([333.437012, 0.307729989, 2.4235599, 11.0495005])

    # Extrinsics
    # ------------------------------
    # Extrinsics (1/2): Rotation using yaw(Z), pitch(Y), roll(X). This is rotation of SCS wrt LCS
    pitch_rad = (10.0 * np.pi) / 180.0
    yaw_rad = (0.0 * np.pi) / 180.0
    roll_rad = (0.0 * np.pi) / 180.0
    R_scs_wrt_lcs = utils.euler2R([yaw_rad, pitch_rad, roll_rad])  # default is ZYX
    C_lcs = np.array([[2.3],  # frontal part of the car
                      [0.0],  # centered in the symmetry axis of the car
                      [1.3]])  # at some height over the ground

    P_scs_wrt_lcs = utils.create_pose(R_scs_wrt_lcs, C_lcs)

    # As explained in core.py, a pose matrix of scs wrt lcs can be also seen as the inverse transformation from lcs to scs
    T_lcs_to_scs = utils.inv(P_scs_wrt_lcs)

    # Extrinsics (2/2): Additional rotation, because usually cameras scs are (x-to-right, y-down, z-to-front)
    # to match better with ics (x-to-right, y-bottom); while the lcs is (x-to-front, y-to-left, z-up)
    T_scs_to_scsics = np.array([[0.0, -1.0, 0.0, 0.0],
                                [0.0, 0.0, -1.0, 0.0],
                                [1.0, 0.0, 0.0, 0.0],
                                [0.0, 0.0, 0.0, 1.0]])

    # Now we can compose the final transform
    T_lcs_to_scs = T_scs_to_scsics.dot(T_lcs_to_scs)
    P_scs_wrt_lcs = utils.inv(T_lcs_to_scs)

    vcd.add_stream(stream_name="CAM_FRONT",
                   uri="",
                   description="Virtual camera",
                   stream_type=core.StreamType.camera)
    vcd.add_stream_properties(stream_name="CAM_FRONT",
                              intrinsics=types.IntrinsicsFisheye(
                                  width_px=img_width_px,
                                  height_px=img_height_px,
                                  lens_coeffs_1x4=list(d_1x4.flatten()),
                                  center_x=cX,
                                  center_y=cY,
                                  fov_deg=0.0,
                                  radius_x=0.0,
                                  radius_y=0.0
                              )
                              )
    vcd.add_coordinate_system("CAM_FRONT", cs_type=types.CoordinateSystemType.sensor_cs,
                              parent_name="vehicle-iso8855",
                              pose_wrt_parent=list(P_scs_wrt_lcs.flatten()))

    ################################
    # CAM_REAR
    ################################
    # This matrix converts from scs to ics
    d_1x4 = np.array([333.437012, 0.307729989, 2.4235599, 11.0495005])
    pitch_rad = (2.0 * np.pi) / 180.0
    yaw_rad = (180.0 * np.pi) / 180.0
    roll_rad = (0.0 * np.pi) / 180.0
    R_scs_wrt_lcs = utils.euler2R([yaw_rad, pitch_rad, roll_rad])  # default is ZYX
    C_lcs = np.array([[-0.725],  # rear part of the car
                      [0.0],  # centered in the symmetry axis of the car
                      [0.4]])  # at some height over the ground
    P_scs_wrt_lcs = utils.create_pose(R_scs_wrt_lcs, C_lcs)
    T_lcs_to_scs = utils.inv(P_scs_wrt_lcs)
    T_scs_to_scsics = np.array([[0.0, -1.0, 0.0, 0.0],
                                [0.0, 0.0, -1.0, 0.0],
                                [1.0, 0.0, 0.0, 0.0],
                                [0.0, 0.0, 0.0, 1.0]])
    T_lcs_to_scs = T_scs_to_scsics.dot(T_lcs_to_scs)
    P_scs_wrt_lcs = utils.inv(T_lcs_to_scs)

    vcd.add_stream(stream_name="CAM_REAR",
                   uri="",
                   description="Virtual camera",
                   stream_type=core.StreamType.camera)
    vcd.add_stream_properties(stream_name="CAM_REAR",
                              intrinsics=types.IntrinsicsFisheye(
                                  width_px=img_width_px,
                                  height_px=img_height_px,
                                  lens_coeffs_1x4=list(d_1x4.flatten()),
                                  center_x=cX,
                                  center_y=cY,
                                  fov_deg=0.0,
                                  radius_x=0.0,
                                  radius_y=0.0
                              )
                              )
    vcd.add_coordinate_system("CAM_REAR", cs_type=types.CoordinateSystemType.sensor_cs,
                              parent_name="vehicle-iso8855",
                              pose_wrt_parent=list(P_scs_wrt_lcs.flatten()))

    ################################
    # CAM_LEFT
    ################################
    # This matrix converts from scs to ics
    d_1x4 = np.array([333.437012, 0.307729989, 2.4235599, 11.0495005])
    pitch_rad = (2.0 * np.pi) / 180.0
    yaw_rad = (90.0 * np.pi) / 180.0
    roll_rad = (0.0 * np.pi) / 180.0
    R_scs_wrt_lcs = utils.euler2R([yaw_rad, pitch_rad, roll_rad])  # default is ZYX
    C_lcs = np.array([[1.2],
                      [0.6],  # at the left
                      [1.45]])  # at the roof
    P_scs_wrt_lcs = utils.create_pose(R_scs_wrt_lcs, C_lcs)
    T_lcs_to_scs = utils.inv(P_scs_wrt_lcs)
    T_scs_to_scsics = np.array([[0.0, -1.0, 0.0, 0.0],
                                [0.0, 0.0, -1.0, 0.0],
                                [1.0, 0.0, 0.0, 0.0],
                                [0.0, 0.0, 0.0, 1.0]])
    T_lcs_to_scs = T_scs_to_scsics.dot(T_lcs_to_scs)
    P_scs_wrt_lcs = utils.inv(T_lcs_to_scs)

    vcd.add_stream(stream_name="CAM_LEFT",
                   uri="",
                   description="Virtual camera",
                   stream_type=core.StreamType.camera)
    vcd.add_stream_properties(stream_name="CAM_LEFT",
                              intrinsics=types.IntrinsicsFisheye(
                                  width_px=img_width_px,
                                  height_px=img_height_px,
                                  lens_coeffs_1x4=list(d_1x4.flatten()),
                                  center_x=cX,
                                  center_y=cY,
                                  fov_deg=0.0,
                                  radius_x=0.0,
                                  radius_y=0.0
                              )
                              )
    vcd.add_coordinate_system("CAM_LEFT", cs_type=types.CoordinateSystemType.sensor_cs,
                              parent_name="vehicle-iso8855",
                              pose_wrt_parent=list(P_scs_wrt_lcs.flatten()))

    ################################
    # CAM_RIGHT
    ################################
    # This matrix converts from scs to ics
    d_1x4 = np.array([333.437012, 0.307729989, 2.4235599, 11.0495005])
    pitch_rad = (2.0 * np.pi) / 180.0
    yaw_rad = (-90.0 * np.pi) / 180.0
    roll_rad = (0.0 * np.pi) / 180.0
    R_scs_wrt_lcs = utils.euler2R([yaw_rad, pitch_rad, roll_rad])  # default is ZYX
    C_lcs = np.array([[1.2],
                      [-0.6],  # at the left
                      [1.45]])  # at the roof
    P_scs_wrt_lcs = utils.create_pose(R_scs_wrt_lcs, C_lcs)
    T_lcs_to_scs = utils.inv(P_scs_wrt_lcs)
    T_scs_to_scsics = np.array([[0.0, -1.0, 0.0, 0.0],
                                [0.0, 0.0, -1.0, 0.0],
                                [1.0, 0.0, 0.0, 0.0],
                                [0.0, 0.0, 0.0, 1.0]])
    T_lcs_to_scs = T_scs_to_scsics.dot(T_lcs_to_scs)
    P_scs_wrt_lcs = utils.inv(T_lcs_to_scs)

    vcd.add_stream(stream_name="CAM_RIGHT",
                   uri="",
                   description="Virtual camera",
                   stream_type=core.StreamType.camera)
    vcd.add_stream_properties(stream_name="CAM_RIGHT",
                              intrinsics=types.IntrinsicsFisheye(
                                  width_px=img_width_px,
                                  height_px=img_height_px,
                                  lens_coeffs_1x4=list(d_1x4.flatten()),
                                  center_x=cX,
                                  center_y=cY,
                                  fov_deg=0.0,
                                  radius_x=0.0,
                                  radius_y=0.0
                              )
                              )
    vcd.add_coordinate_system("CAM_RIGHT", cs_type=types.CoordinateSystemType.sensor_cs,
                              parent_name="vehicle-iso8855",
                              pose_wrt_parent=list(P_scs_wrt_lcs.flatten()))

    return vcd
예제 #4
0
    def parse_sequence_direct(self, seq_number):
        # This is a variant approach for creating a VCD 4.3.0 file reading the KITTI calibration files,
        # trying to avoid additional computation at this level, and exploiting the ability of VCD 4.3.0 to
        # express arbitrary transforms across coordinate systems

        vcd = core.VCD()

        #########################################
        # OPEN files
        #########################################
        calib_file_name = os.path.join(self.kitti_tracking_calib_path,
                                       str(seq_number).zfill(4) + ".txt")
        oxts_file_name = os.path.join(self.kitti_tracking_oxts_path,
                                      str(seq_number).zfill(4) + ".txt")
        object_file_name = os.path.join(self.kitti_tracking_objects_path,
                                        str(seq_number).zfill(4) + '.txt')

        calib_file = open(calib_file_name, newline='')
        oxts_file = open(oxts_file_name, newline='')
        object_file = open(object_file_name, newline='')
        calib_reader = csv.reader(calib_file, delimiter=' ')
        oxts_reader = csv.reader(oxts_file, delimiter=' ')
        object_reader = csv.reader(object_file, delimiter=' ')

        #########################################
        # CREATE base coordinate system
        #########################################
        # The main coordinate system for the scene "odom" represents a static cs (which coincides with first local cs).
        vcd.add_coordinate_system("odom",
                                  cs_type=types.CoordinateSystemType.scene_cs)

        #########################################
        # CREATE vehicle coordinate system
        #########################################
        # Local coordinate system, moving with the vehicle. Following iso8855 (x-front, y-left, z-up)
        vcd.add_coordinate_system("vehicle-iso8855",
                                  cs_type=types.CoordinateSystemType.local_cs,
                                  parent_name="odom")
        # Sensor coordinate systems are added

        # Add transforms for each time instant
        odometry_4x4xN = self.read_odometry_from_oxts(oxts_reader)

        # An odometry entry is a 4x4 pose matrix of the lcs wrt wcs (ergo a transform lcs_to_wcs)
        # poses_4x4xN_lcs_wrt_wcs = odometry_4x4xN
        frames_1xN = np.arange(0, odometry_4x4xN.shape[2], 1).reshape(
            (1, odometry_4x4xN.shape[2]))
        r, c = frames_1xN.shape
        for i in range(0, c):
            vcd.add_transform(int(frames_1xN[0, i]),
                              transform=types.Transform(
                                  src_name="vehicle-iso8855",
                                  dst_name="odom",
                                  transform_src_to_dst_4x4=list(
                                      odometry_4x4xN[:, :, i].flatten())))

        #########################################
        # CREATE SENSORS coordinate system: LASER
        #########################################
        # http://www.cvlibs.net/datasets/kitti/setup.php
        location_velo_wrt_vehicle_3x1 = np.array(
            [[0.76], [0.0], [1.73]])  # according to the documentation
        pose_velo_wrt_vehicle_4x4 = utils.create_pose(
            utils.identity(3), location_velo_wrt_vehicle_3x1)
        vcd.add_stream(stream_name="VELO_TOP",
                       uri="",
                       description="Velodyne roof",
                       stream_type=core.StreamType.lidar)
        vcd.add_coordinate_system("VELO_TOP",
                                  cs_type=types.CoordinateSystemType.sensor_cs,
                                  parent_name="vehicle-iso8855",
                                  pose_wrt_parent=list(
                                      pose_velo_wrt_vehicle_4x4.flatten()))
        #########################################
        # CREATE SENSORS coordinate system: GPS/IMU
        #########################################
        # Let's build also the pose of the imu
        location_imu_wrt_vehicle_4x4 = np.array(
            [[-0.05], [0.32], [0.93]])  # according to documentation
        pose_imu_wrt_vehicle_4x4 = utils.create_pose(
            utils.identity(3), location_imu_wrt_vehicle_4x4)
        vcd.add_stream(stream_name="IMU",
                       uri="",
                       description="GPS/IMU",
                       stream_type=core.StreamType.other)
        vcd.add_coordinate_system("IMU",
                                  cs_type=types.CoordinateSystemType.sensor_cs,
                                  parent_name="vehicle-iso8855",
                                  pose_wrt_parent=list(
                                      pose_imu_wrt_vehicle_4x4.flatten()))

        #########################################
        # CREATE SENSORS coordinate system: CAM
        #########################################
        img_width_px = 1242
        img_height_px = 375  # these are rectified dimensions
        calib_matrices = {}
        for row in calib_reader:
            calib_matrices[row[0]] = [
                float(x) for x in row[1:] if len(x) > 0
            ]  # filter out some spaces at the end of the row
        # From KITTI readme.txt:
        # To project a point from Velodyne coordinates into the left color image,
        # you can use this formula: x = P2 * R0_rect * Tr_velo_to_cam * y
        # For the right color image: x = P3 * R0_rect * Tr_velo_to_cam * y
        left_camera_K3x4 = np.reshape(calib_matrices["P2:"], (3, 4))
        right_camera_K3x4 = np.reshape(calib_matrices["P3:"], (3, 4))
        camera_rectification_3x3 = np.reshape(calib_matrices["R_rect"], (3, 3))

        transform_velo_to_camleft_3x4 = np.reshape(
            calib_matrices["Tr_velo_cam"], (3, 4))  # WRT to LEFT CAMERA ONLY
        camera_rectification_4x4 = np.vstack((np.hstack(
            (camera_rectification_3x3, [[0], [0], [0]])), [0, 0, 0, 1]))
        transform_velo_to_camleft_4x4 = np.vstack(
            (transform_velo_to_camleft_3x4, [0, 0, 0, 1]))
        transform_velo_to_camleft_4x4 = np.dot(
            camera_rectification_4x4, transform_velo_to_camleft_4x4
        )  # such that X_cam = transform_velo_to_cam_4x4 * X_velo
        pose_camleft_wrt_velo_4x4 = utils.inv(transform_velo_to_camleft_4x4)

        vcd.add_stream(stream_name="CAM_LEFT",
                       uri="",
                       description="Virtual Left color camera",
                       stream_type=core.StreamType.camera)

        vcd.add_stream_properties(stream_name="CAM_LEFT",
                                  intrinsics=types.IntrinsicsPinhole(
                                      width_px=img_width_px,
                                      height_px=img_height_px,
                                      camera_matrix_3x4=list(
                                          left_camera_K3x4.flatten()),
                                      distortion_coeffs_1xN=None))
        vcd.add_coordinate_system("CAM_LEFT",
                                  cs_type=types.CoordinateSystemType.sensor_cs,
                                  parent_name="VELO_TOP",
                                  pose_wrt_parent=list(
                                      pose_camleft_wrt_velo_4x4.flatten()))

        # Virtually, cam_left and cam_right are defined as the same coordinate systems, so their scs are the same
        # But their projection matrices (3x4) include a right-most non-zero column which shifts 3d points when projected
        # into the images, that is why projecting from velodyne to left and right use the same "extrinsics", and just differ
        # in the usage of the "intrinsic" matrices P2 and P3
        # P2 and P3 might be decomposed so P2 = K2*T2 and P3=K3*T3, so T2 and T3 could host extrinsic information
        # while K2 and K3 could host the intrinsic information. This way, the pose of cam_left would be T2*R_rect*Tr_velo
        # However, such decomposition seems to be non-trivial.
        # x = P2 * R0_rect * Tr_velo_to_cam * y
        # x = P3 * R0_rect * Tr_velo_to_cam * y
        vcd.add_stream(stream_name="CAM_RIGHT",
                       uri="",
                       description="Virtual Right color camera",
                       stream_type=core.StreamType.camera)

        vcd.add_stream_properties(stream_name="CAM_RIGHT",
                                  intrinsics=types.IntrinsicsPinhole(
                                      width_px=img_width_px,
                                      height_px=img_height_px,
                                      camera_matrix_3x4=list(
                                          right_camera_K3x4.flatten()),
                                      distortion_coeffs_1xN=None))
        vcd.add_coordinate_system("CAM_RIGHT",
                                  cs_type=types.CoordinateSystemType.sensor_cs,
                                  parent_name="VELO_TOP",
                                  pose_wrt_parent=list(
                                      pose_camleft_wrt_velo_4x4.flatten()))

        #########################################
        # LABELS
        #########################################
        for row in object_reader:
            frameNum = int(row[0])
            #trackID = int(row[1]) + 1  # VCD can't handle negative ids
            trackID = int(row[1])
            #if trackID == 0:
            #    continue  # Let's ignore DontCare labels

            semantic_class = row[2]
            truncated = utils.float_2dec(float(row[3]))
            occluded = int(row[4])

            alpha = utils.float_2dec(float(
                row[5]))  # this is the observation angle (see cs_overview.pdf)

            left = utils.float_2dec(float(row[6]))
            top = utils.float_2dec(float(row[7]))
            width = utils.float_2dec(float(row[8]) - left)
            height = utils.float_2dec(float(row[9]) - top)

            if trackID == -1:  # This is DontCare, there are multiple boxes
                count = vcd.get_element_data_count_per_type(
                    core.ElementType.object, trackID,
                    types.ObjectDataType.bbox, frameNum)
                name_box = "box2D" + str(count)
            else:
                name_box = "box2D"
            bounding_box = types.bbox(name=name_box,
                                      val=(left + width / 2, top + height / 2,
                                           width, height),
                                      coordinate_system='CAM_LEFT')
            # see cs_overview.pdf
            dimH = utils.float_2dec(float(row[10]))
            dimW = utils.float_2dec(float(row[11]))
            dimL = utils.float_2dec(float(row[12]))

            locX = utils.float_2dec(float(row[13]))
            locY = utils.float_2dec(float(row[14]))
            locZ = utils.float_2dec(float(row[15]))

            rotY = utils.float_2dec(float(row[16]))

            # Note KITTI uses (h, w, l, x,  y,  z,  ry) for cuboids, in camera coordinates (X-to-right, Y-to-bottom, Z-to-front)
            # while in VCD    (x, y, z, rx, ry, rz, sx, sy, sz) is defined as a dextrogire system, centroid-based
            # NOTE: changing locY by locY + dimH/2 as VCD uses centroid and KITTI uses bottom face
            # NOTE: All in Camera coordinate system
            # NOTE: x = length, y = height, z = width because of convention in readme.txt
            # The reference point for the 3D bounding box for each object is centered on the
            # bottom face of the box. The corners of bounding box are computed as follows with
            # respect to the reference point and in the object coordinate system:
            # x_corners = [l/2, l/2, -l/2, -l/2,  l/2,  l/2, -l/2, -l/2]^T
            # y_corners = [0,   0,    0,    0,   -h,   -h,   -h,   -h  ]^T
            # z_corners = [w/2, -w/2, -w/2, w/2, w/2, -w/2, -w/2, w/2  ]^T
            # with l=length, h=height, and w=width.
            cuboid = types.cuboid(name="box3D",
                                  val=(utils.float_2dec(locX),
                                       utils.float_2dec(locY - dimH / 2),
                                       utils.float_2dec(locZ), 0,
                                       utils.float_2dec(rotY), 0,
                                       utils.float_2dec(dimL),
                                       utils.float_2dec(dimH),
                                       utils.float_2dec(dimW)),
                                  coordinate_system="CAM_LEFT")

            if not vcd.has(core.ElementType.object, str(trackID)):
                # First time
                if trackID >= 0:
                    vcd.add_object(name=semantic_class + str(trackID),
                                   semantic_type=semantic_class,
                                   uid=str(trackID),
                                   frame_value=frameNum)
                else:  # so this is for DontCare object
                    vcd.add_object(name=semantic_class,
                                   semantic_type=semantic_class,
                                   uid=str(trackID),
                                   frame_value=frameNum)

            vcd.add_object_data(str(trackID), bounding_box, frameNum)
            vcd.add_object_data(str(trackID), cuboid, frameNum)
            vcd.add_object_data(trackID,
                                types.num(name="truncated", val=truncated),
                                frameNum)
            vcd.add_object_data(trackID,
                                types.num(name="occluded", val=occluded),
                                frameNum)
            vcd.add_object_data(trackID, types.num(name="alpha", val=alpha),
                                frameNum)

        #########################################
        # Ego-vehicle
        #########################################
        vcd.add_object(name="Egocar", semantic_type="Egocar", uid=str(-2))

        cuboid_ego = types.cuboid(name="box3D",
                                  val=(1.35, 0.0, 0.736, 0.0, 0.0, 0.0, 4.765,
                                       1.82, 1.47),
                                  coordinate_system="vehicle-iso8855")
        vcd.add_object_data(str(-2), cuboid_ego)

        # Return
        return vcd