Exemplo n.º 1
0
    def test_3d_to_2d_point_projection(self):

        anchor_corners = np.asarray(
            [[
                1., 1., -1., -1., 1., 1., -1., -1., 4., 4., 2., 2., 4., 4., 2.,
                2.
            ],
             [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
             [6., 0., 0., 6., 6., 0., 0., 6., 4., 2., 2., 4., 4., 2., 2., 4.]])

        stereo_calib_p2 = \
            np.asarray([[7.21537700e+02, 0.0, 6.09559300e+02, 4.48572800e+01],
                        [0.0, 7.21537700e+02, 1.72854000e+02, 2.16379100e-01],
                        [0.0, 0.0, 1.0, 2.74588400e-03]])

        # Do projection in numpy space
        points_2d = calib_utils.project_to_image(anchor_corners,
                                                 stereo_calib_p2)

        # Do projection in tensor space
        tf_anchor_corners = tf.convert_to_tensor(anchor_corners,
                                                 dtype=tf.float32)
        tf_stereo_calib_p2 = tf.convert_to_tensor(stereo_calib_p2,
                                                  dtype=tf.float32)
        tf_points_2d = anchor_projector.project_to_image_tensor(
            tf_anchor_corners, tf_stereo_calib_p2)

        sess = tf.Session()
        with sess.as_default():
            points_2d_out = tf_points_2d.eval()
            np.testing.assert_allclose(
                points_2d,
                points_2d_out,
                err_msg='Incorrect tensor 3D->2D projection')
Exemplo n.º 2
0
def project_img_to_point_cloud(points, image, calib_dir, img_idx):
    """ Projects image colours to point cloud points

    Arguments:
        points (N by [x,y,z]): list of points where N is
            the number of points
        image (X by Y by [r,g,b]): colour values in image space
        calib_dir (str): calibration directory
        img_idx (int): index of the requested image

    Returns:
        [N by [r,g,b]]: Matrix of colour codes. Indices of colours correspond
            to the indices of the points in the 'points' argument

    """
    # Save the pixel colour corresponding to each point
    frame_calib = calib.read_calibration(calib_dir, img_idx)
    point_in_im = calib.project_to_image(points.T, p=frame_calib.p2).T
    point_in_im_rounded = np.floor(point_in_im)
    point_in_im_rounded = point_in_im_rounded.astype(np.int32)

    point_colours = []
    for point in point_in_im_rounded:
        point_colours.append(image[point[1], point[0], :])

    point_colours = np.asanyarray(point_colours)

    return point_colours
Exemplo n.º 3
0
def project_box3d_to_image(corners_3d, p):
    """Computes the 3D bounding box projected onto
    image space.

    Keyword arguments:
    obj -- object file to draw bounding box
    p -- transform matrix

    Returns:
        corners : numpy array of corner points projected
        onto image space.
        face_idx: numpy array of 3D bounding box face
    """
    # index for 3d bounding box face
    # it is converted to 4x4 matrix
    face_idx = np.array([
        0,
        1,
        5,
        4,  # front face
        1,
        2,
        6,
        5,  # left face
        2,
        3,
        7,
        6,  # back face
        3,
        0,
        4,
        7
    ]).reshape((4, 4))  # right face
    return calib.project_to_image(corners_3d, p), face_idx
Exemplo n.º 4
0
def compute_orientation_3d(obj, p):
    """Computes the orientation given object and camera matrix

    Keyword arguments:
    obj -- object file to draw bounding box
    p -- transform matrix
    """

    # compute rotational matrix
    rot = np.array([[+np.cos(obj.ry), 0, +np.sin(obj.ry)], [0, 1, 0],
                    [-np.sin(obj.ry), 0, +np.cos(obj.ry)]])

    orientation3d = np.array([0.0, obj.l, 0.0, 0.0, 0.0, 0.0]).reshape(3, 2)
    orientation3d = np.dot(rot, orientation3d)

    orientation3d[0, :] = orientation3d[0, :] + obj.t[0]
    orientation3d[1, :] = orientation3d[1, :] + obj.t[1]
    orientation3d[2, :] = orientation3d[2, :] + obj.t[2]

    # only draw for boxes that are in front of the camera
    for idx in np.arange(orientation3d.shape[1]):
        if orientation3d[2, idx] < 0.1:
            return None

    return calib.project_to_image(orientation3d, p)
Exemplo n.º 5
0
def project_depths_xy(point_cloud, camera_p, image_shape, max_depth=100.0):
    """Projects a point cloud into image space and saves depths per pixel.

    :param point_cloud: point cloud (N, 3)
    :param camera_p: stereo calibration p matrix
    :param image_shape: image shape [h, w]
    :param max_depth: image shape [h, w]

    :return all_depths: projected depth map
    """

    # Only keep points in front of the camera
    point_cloud = point_cloud.T
    #point_cloud = point_cloud[point_cloud[:,0] > 0]

    # Save the depth corresponding to each point
    point_in_im = calib_utils.project_to_image(point_cloud.T, p=camera_p).T
    point_in_im_rounded = np.array(np.int32(np.floor(point_in_im)))

    #filtered out out of boxes points
    image_filter = (point_in_im_rounded[:, 0] > 0) & \
                       (point_in_im_rounded[:, 0] < image_shape[1]) & \
                       (point_in_im_rounded[:, 1] > 0) & \
                       (point_in_im_rounded[:, 1] < image_shape[0])
    point_in_im_rounded = point_in_im_rounded[image_filter]

    all_points = np.array(point_cloud)

    # Invert depths
    all_points[:, 2] = max_depth - all_points[:, 2]

    # Only save valid pixels, keep closer points when overlapping
    projected_depths = np.zeros(image_shape)
    x_proj = np.zeros(image_shape)
    y_proj = np.zeros(image_shape)
    valid_indices = tuple(
        [point_in_im_rounded[:, 1], point_in_im_rounded[:, 0]])

    projected_depths[valid_indices] = [
        max(
            projected_depths[point_in_im_rounded[idx, 1],
                             point_in_im_rounded[idx, 0]], all_points[idx, 2])
        for idx in range(len(point_in_im_rounded))
    ]

    projected_depths[valid_indices] = \
        max_depth - projected_depths[valid_indices]

    x_proj[valid_indices] = [
        all_points[idx, 0] for idx in range(len(point_in_im_rounded))
    ]

    y_proj[valid_indices] = [
        all_points[idx, 1] for idx in range(len(point_in_im_rounded))
    ]

    return projected_depths, x_proj, y_proj
Exemplo n.º 6
0
def get_lidar_point_cloud_jhuang(img_idx,
                                 frame_calib,
                                 velo_dir,
                                 im_size=None,
                                 min_intensity=None,
                                 prefix=''):
    """ Calculates the lidar point cloud, and optionally returns only the
    points that are projected to the image.

    :param img_idx: image index
    :param calib_dir: directory with calibration files
    :param velo_dir: directory with velodyne files
    :param im_size: (optional) 2 x 1 list containing the size of the image
                      to filter the point cloud [w, h]
    :param min_intensity: (optional) minimum intensity required to keep a point

    :return: (3, N) point_cloud in the form [[x,...][y,...][z,...]]
    """

    # Read lidar data
    x, y, z, i = calib_utils.read_lidar_jhuang(velo_dir=velo_dir,
                                               img_idx=img_idx,
                                               prefix=prefix)

    # Calculate the point cloud
    pts = np.vstack((x, y, z)).T
    pts = calib_utils.lidar_to_cam_frame(pts, frame_calib)

    # The given image is assumed to be a 2D image
    if not im_size:
        point_cloud = pts.T
        return point_cloud

    else:
        # Only keep points in front of camera (positive z)
        pts = pts[pts[:, 2] > 0]
        point_cloud = pts.T

        # Project to image frame
        point_in_im = calib_utils.project_to_image(point_cloud,
                                                   p=frame_calib.p2).T

        # Filter based on the given image size
        image_filter = (point_in_im[:, 0] > 0) & \
                       (point_in_im[:, 0] < im_size[0]) & \
                       (point_in_im[:, 1] > 0) & \
                       (point_in_im[:, 1] < im_size[1])

    if not min_intensity:
        return pts[image_filter].T

    else:
        intensity_filter = i > min_intensity
        point_filter = np.logical_and(image_filter, intensity_filter)
        return pts[point_filter].T
Exemplo n.º 7
0
def get_lidar_point_cloud_with_color(img_idx,
                                     img_dir,
                                     calib_dir,
                                     velo_dir,
                                     im_size=None):
    """ Calculates the lidar point cloud, and optionally returns only the
    points that are projected to the image.

    :param img_idx: image index
    :param calib_dir: directory with calibration files
    :param velo_dir: directory with velodyne files
    :param im_size: (optional) 2 x 1 list containing the size of the image
                      to filter the point cloud [w, h]
    :param min_intensity: (optional) minimum intensity required to keep a point

    :return: (3, N) point_cloud in the form [[x,...][y,...][z,...]]
    """

    # Read calibration info
    frame_calib = calib_utils.read_calibration(calib_dir, img_idx)
    x, y, z, i = calib_utils.read_lidar(velo_dir=velo_dir, img_idx=img_idx)

    # Calculate the point cloud
    pts = np.vstack((x, y, z)).T
    pts = calib_utils.lidar_to_cam_frame(pts, frame_calib)

    # The given image is assumed to be a 2D image
    if not im_size:
        point_cloud = pts.T
        return point_cloud

    else:
        # Only keep points in front of camera (positive z)
        pts = pts[pts[:, 2] > 0]
        point_cloud = pts.T

        # Project to image frame
        point_in_im = calib_utils.project_to_image(point_cloud,
                                                   p=frame_calib.p2).T

        # Filter based on the given image size
        image_filter = (point_in_im[:, 0] > 0) & \
                       (point_in_im[:, 0] < im_size[0]) & \
                       (point_in_im[:, 1] > 0) & \
                       (point_in_im[:, 1] < im_size[1])

        img_dir = img_dir + "/%06d.png" % img_idx
        img = Image.open(img_dir)
        img = np.array(img)
        point_colors = img[point_in_im[image_filter, 1].astype(np.int),
                           point_in_im[image_filter, 0].astype(np.int)]

    # return np.vstack((pts[image_filter].T, point_colors[image_filter].T))
    return pts[image_filter].T, point_colors.T
Exemplo n.º 8
0
def get_lidar_point_cloud(img_idx, calib_dir, velo_dir,
                          im_size=None, min_intensity=None):
    """ Calculates the lidar point cloud, and optionally returns only the
    points that are projected to the image.

    :param img_idx: image index
    :param calib_dir: directory with calibration files
    :param velo_dir: directory with velodyne files
    :param im_size: (optional) 2 x 1 list containing the size of the image
                      to filter the point cloud [w, h]
    :param min_intensity: (optional) minimum intensity required to keep a point

    :return: (3, N) point_cloud in the form [[x,...][y,...][z,...]]
    """

    # Read calibration info
    frame_calib = calib_utils.read_calibration(calib_dir, img_idx)#读取calib文件信息并保存到对象中
    x, y, z, i = calib_utils.read_lidar(velo_dir=velo_dir, img_idx=img_idx)#从文件读取点云数据的x,y,z,和密度

    # Calculate the point cloud
    pts = np.vstack((x, y, z)).T#点云位置信息
    pts = calib_utils.lidar_to_cam_frame(pts, frame_calib)#点云投射到相机坐标

    # The given image is assumed to be a 2D image
    if not im_size:
        point_cloud = pts.T
        return point_cloud

    else:
        # Only keep points in front of camera (positive z) 相机坐标是z轴,已经投影到相机坐标了
        pts = pts[pts[:, 2] > 0]
        point_cloud = pts.T

        # Project to image frame #投影到像素坐标
        point_in_im = calib_utils.project_to_image(point_cloud, p=frame_calib.p2).T

        # Filter based on the given image size 保留在图片范围的点云,坐标在相机坐标系下
        image_filter = (point_in_im[:, 0] > 0) & \
                       (point_in_im[:, 0] < im_size[0]) & \
                       (point_in_im[:, 1] > 0) & \
                       (point_in_im[:, 1] < im_size[1])#索引值

    if not min_intensity:
        return pts[image_filter].T

    else:
        intensity_filter = i > min_intensity
        point_filter = np.logical_and(image_filter, intensity_filter)
        return pts[point_filter].T
Exemplo n.º 9
0
def genRectOcc(image_input,
               point_cloud,
               sin_input_name,
               mask_2d=None,
               frame_calib_p2=None):
    """
        mask_2d: 2x2 array. 
                 mask_2d[0] -> r_len, r_corner (0~1 scale) y-axis (height)
                 mask_2d[1] -> r_len, r_corner (0~1 scale) x-axis (width)

    """
    image_shape = np.shape(image_input)

    if mask_2d is None:
        mask_2d = genMask2D()

    # Generate rectangular occlusion in 2D image plane
    im_y_start = np.round(mask_2d[0, 1] * image_shape[0]).astype(np.int)
    im_y_end = min(
        image_shape[0],
        im_y_start + np.round(mask_2d[0, 0] * image_shape[0]).astype(np.int))
    im_x_start = np.round(mask_2d[1, 1] * image_shape[1]).astype(np.int)
    im_x_end = min(
        image_shape[1],
        im_x_start + np.round(mask_2d[1, 0] * image_shape[1]).astype(np.int))

    if sin_input_name == 'image':
        if len(image_shape) == 2:
            image_input[im_y_start:im_y_end, im_x_start:im_x_end] = 0
        elif len(image_shape) == 3:
            image_input[im_y_start:im_y_end, im_x_start:im_x_end, :] = 0
        else:
            raise ValueError(
                'Image shape is wrong. Must be either 2d (b/w) or 3d.')
        return image_input
    elif sin_input_name == 'lidar':
        # Occlude lidar point cloudes to match 2d mask in image plane
        point_in_im = calib_utils.project_to_image(point_cloud,
                                                   p=frame_calib_p2).T

        # Filter based on the given image size
        im_occ_filter = (point_in_im[:, 1] >= im_y_start) & \
                        (point_in_im[:, 1] < im_y_end) & \
                        (point_in_im[:, 0] > im_x_start) & \
                        (point_in_im[:, 0] < im_x_end)
        return point_cloud[:, np.invert(im_occ_filter)]
    else:
        raise ValueError('Currently supports {}'.format(
            ','.join(SIN_INPUT_NAMES)))
Exemplo n.º 10
0
def genVertOcc(image_input,
               point_cloud,
               sin_input_name,
               frame_calib_p2=None,
               sin_level=1):
    """
        mask_2d: 2x2 array. 
                 mask_2d[0] -> r_len, r_corner (0~1 scale) y-axis (height)
                 mask_2d[1] -> r_len, r_corner (0~1 scale) x-axis (width)

    """
    image_shape = np.shape(image_input)
    im_width = image_shape[1]
    width_vert = int(im_width * minmax_scale(
        sin_level, SINFields.SIN_LEVEL_MIN, SINFields.SIN_LEVEL_MAX,
        SINFields.VERT_W_MIN, SINFields.VERT_W_MAX))

    idx_x_starts = np.arange(0, im_width, 2 * width_vert)
    idx_x_ends = np.arange(width_vert, im_width, 2 * width_vert)
    m_verts = len(idx_x_ends)
    if len(idx_x_starts) > len(idx_x_ends):
        idx_x_ends = np.append(idx_x_ends, im_width)
        m_verts += 1

    idx_zeros = np.zeros(im_width, dtype=np.bool)
    for j in range(m_verts):
        idx_zeros[idx_x_starts[j]:idx_x_ends[j]] = True

    if sin_input_name == 'image':
        if len(image_shape) == 2:
            image_input[:, idx_zeros] = 0
        elif len(image_shape) == 3:
            image_input[:, idx_zeros, :] = 0
        else:
            raise ValueError(
                'Image shape is wrong. Must be either 2d (b/w) or 3d.')
        return image_input
    elif sin_input_name == 'lidar':
        # Occlude lidar point cloudes to match 2d mask in image plane
        point_in_im = calib_utils.project_to_image(point_cloud,
                                                   p=frame_calib_p2).T

        # Filter based on the given image size
        im_occ_filter = idx_zeros[point_in_im[:, 0].astype(int)]
        return point_cloud[:, np.invert(im_occ_filter)]
    else:
        raise ValueError('Currently supports {}'.format(
            ','.join(SIN_INPUT_NAMES)))
Exemplo n.º 11
0
    def _project_and_show(self, sample_name, point_cloud, color, title):
        "将点云投影到像素坐标,并在对应的图像中显示"
        img_idx = int(sample_name)
        img = Image.open(self.dataset.get_rgb_image_path(sample_name))
        img_array = np.array(
            img)  #np.array(默认情况下)将会copy该对象,而np.asarray除非必要,否则不会copy该对象

        frame_calib = calib_utils.read_calibration(
            self.dataset.calib_dir, img_idx)  #读取calib文件信息并保存到对象中
        point_in_im = calib_utils.project_to_image(point_cloud,
                                                   p=frame_calib.p2).T
        point_in_im = point_in_im[:, [1, 0]]
        point_in_im = point_in_im.astype(int)
        img_array[point_in_im[:, 0],
                  point_in_im[:, 1], :] = ImageColor.getrgb(color)  #相当于zip

        img = Image.fromarray(img_array)
        img.show()
Exemplo n.º 12
0
def get_depth_map_point_cloud(img_idx, calib_dir, depth_dir, im_size):
    """ Calculates the point cloud from a depth map

    :param img_idx: image index
    :param calib_dir: directory with calibration files
    :param depth_dir: directory with depth maps
    :param im_size: size of the image [h, w]

    :return: (3, N) point_cloud in the form [[x,...][y,...][z,...]]
    """
    depth_map = depth_map_utils.get_depth_map(img_idx, depth_dir)

    # Calculate point cloud from depth map
    frame_calib = calib.read_calibration(calib_dir, img_idx)
    stereo_calibration_info = calib.get_stereo_calibration(
        frame_calib.p2, frame_calib.p3)

    # Calculate points from depth map
    depth_map_flattened = depth_map.flatten()
    xx, yy = np.meshgrid(np.arange(1, im_size[0] + 1, 1),
                         np.arange(1, im_size[1] + 1, 1))
    xx = xx.flatten() - stereo_calibration_info.center_u
    yy = yy.flatten() - stereo_calibration_info.center_v

    temp = np.divide(depth_map_flattened, stereo_calibration_info.f)
    x = np.multiply(xx, temp)
    y = np.multiply(yy, temp)
    z = depth_map_flattened

    # Get x offset (b_cam) from calibration: cam_mat[0, 3] = (-f_x * b_cam)
    x_offset = -stereo_calibration_info.p[0, 3] / stereo_calibration_info.f

    point_cloud = np.asarray([x + x_offset, y, z])
    points = point_cloud.T

    # Filter points to image frame
    point_in_im = calib.project_to_image(points.T, p=frame_calib.p2).T
    image_filter = \
        (point_in_im[:, 0] > 0) & (point_in_im[:, 0] < im_size[0]) & \
        (point_in_im[:, 1] > 0) & (point_in_im[:, 1] < im_size[1])
    filtered_point_cloud = points[image_filter].T

    return filtered_point_cloud
Exemplo n.º 13
0
def occ_aug(point_cloud, calib, labels):
    # point cloud
    # calib: calibration matrix
    # masks labels list
    # return augumentated point cloud

    point_in_im = calib_utils.project_to_image(point_cloud, p=calib).T
    point_cloud = point_cloud.T
    mask_labels = occ_aug_mask(labels)

    occ_filter = False

    for obj in mask_labels:
        occ_filter = occ_filter | (point_in_im[:, 0] > obj[0]) & \
                     (point_in_im[:, 0] < obj[2]) & \
                     (point_in_im[:, 1] > obj[1]) & \
                     (point_in_im[:, 1] < obj[3])

    return point_cloud[np.logical_not(occ_filter)].T
Exemplo n.º 14
0
    def _get_point_cloud(self,
                         image_shape,
                         pointcloud,
                         frame_calib,
                         min_intensity=None):
        im_size = [image_shape[1], image_shape[0]]

        x, y, z, i = pointcloud
        print("Shape of x, y, z, i: ", x.shape, y.shape, z.shape, i.shape)
        # Calculate the point cloud
        pts = np.vstack((x, y, z)).T
        pts = calib_utils.lidar_to_cam_frame(pts, frame_calib)

        # The given image is assumed to be a 2D image
        if not im_size:
            point_cloud = pts.T
            return point_cloud

        else:
            # Only keep points in front of camera (positive z)
            pts = pts[pts[:, 2] > 0]
            point_cloud = pts.T

            # Project to image frame
            point_in_im = calib_utils.project_to_image(point_cloud,
                                                       p=frame_calib.p2).T

            # Filter based on the given image size
            image_filter = (point_in_im[:, 0] > 0) & \
                (point_in_im[:, 0] < im_size[0]) & \
                (point_in_im[:, 1] > 0) & \
                (point_in_im[:, 1] < im_size[1])

        if not min_intensity:
            point_cloud = pts[image_filter].T

        else:
            intensity_filter = i > min_intensity
            point_filter = np.logical_and(image_filter, intensity_filter)
            point_cloud = pts[point_filter].T
        return point_cloud
Exemplo n.º 15
0
def pc_to_xyz(point_cloud, calib_p, image_shape):
    """Projects Lidar Point cloud to XYZ image encoding.

    :param point_cloud: input point cloud (3,N)
    :param calib_p: stereo calibration p2 matrix
    :param image_shape: image shape [h, w]
    :return xyz_enc: point cloud encoded as an (3 x h x w) with x y z
    """

    num_of_points = len(point_cloud.T)

    point_in_im = calib_utils.project_to_image(point_cloud, p=calib_p).T

    point_in_im_rounded = np.int32(np.floor(point_in_im))

    all_x, all_y, all_z = fill_xyz(point_cloud, image_shape, num_of_points,
                                   point_in_im_rounded)

    xyz_enc = np.dstack((all_x, all_y, all_z))

    return xyz_enc
Exemplo n.º 16
0
def project_flipped_img_to_point_cloud(points, image_flipped, calib_dir,
                                       img_idx):
    """ Projects image colours to point cloud points

    Arguments:
        points (N by [x,y,z]): list of points where N is
            the number of points
        image (Y by X by [r,g,b]): colour values in image space
        calib_dir (str): calibration directory
        img_idx (int): index of the requested image

    Returns:
        [N by [r,g,b]]: Matrix of colour codes. Indices of colours correspond
            to the indices of the points in the 'points' argument

    """
    # Save the pixel colour corresponding to each point
    frame_calib = calib_utils.read_calibration(calib_dir, img_idx)

    # Fix flipped p2 matrix
    flipped_p2 = np.copy(frame_calib.p2)
    flipped_p2[0, 2] = image_flipped.shape[1] - flipped_p2[0, 2]
    flipped_p2[0, 3] = -flipped_p2[0, 3]

    # Use fixed matrix
    point_in_im = calib_utils.project_to_image(points.T, p=flipped_p2).T

    point_in_im_rounded = np.floor(point_in_im)
    point_in_im_rounded = point_in_im_rounded.astype(np.int32)

    # image_shape = image_flipped.shape
    point_colours = []
    for point in point_in_im_rounded:
        point_colours.append(image_flipped[point[1], point[0], :])

    point_colours = np.asanyarray(point_colours)

    return point_colours
Exemplo n.º 17
0
def draw_boxes(prediction, sample, plot_axes):
    all_corners = []
    for pred in prediction:
        box = np.array(pred[0:7])
        cls_idx = int(pred[8])
        obj = box_3d_encoder.box_3d_to_object_label(box, obj_type=type_whitelist[cls_idx])
        obj.score = pred[7]

        vis_utils.draw_box_3d(plot_axes, obj, sample[constants.KEY_STEREO_CALIB_P2],
                          show_orientation=False,
                          color_table=['r', 'y', 'r', 'w'],
                          line_width=2,
                          double_line=False)
        corners = compute_box_3d(obj)
        all_corners.append(corners)

        # draw text info
        projected = calib_utils.project_to_image(corners.T, sample[constants.KEY_STEREO_CALIB_P2])
        x1 = np.amin(projected[0])
        y1 = np.amin(projected[1])
        x2 = np.amax(projected[0])
        y2 = np.amax(projected[1])
        text_x = (x1 + x2) / 2
        text_y = y1
        text = "{}\n{:.2f}".format(obj.type, obj.score)
        plot_axes.text(text_x, text_y - 4,
            text,
            verticalalignment='bottom',
            horizontalalignment='center',
            color=BOX_COLOUR_SCHEME[obj.type],
            fontsize=10,
            fontweight='bold',
            path_effects=[
                patheffects.withStroke(linewidth=2,
                                       foreground='black')])
    return all_corners
Exemplo n.º 18
0
def plotSINImage(data_dir,
                 out_dir,
                 img_idx,
                 sin_type,
                 sin_level,
                 on_img,
                 sin_input_name,
                 mask_2d=None):
    fname = '{:06d}'.format(img_idx)
    path_img = os.path.join(data_dir, 'image_2')
    path_velo = os.path.join(data_dir, 'velodyne')
    path_calib = os.path.join(data_dir, 'calib')

    # Load image
    cv_bgr_image = cv2.imread(os.path.join(path_img, fname + '.png'))
    rgb_image = cv_bgr_image[..., ::-1]
    image_shape = rgb_image.shape[0:2]

    # Load point cloud
    frame_calib = calib_utils.read_calibration(path_calib, img_idx)
    x, y, z, _ = calib_utils.read_lidar(velo_dir=path_velo, img_idx=img_idx)
    if sin_type == 'lowres':
        starts = np.where(np.diff(np.sign(y)) > 0)[0] + 1
        true_starts = np.append(np.diff(starts) > 2, [True])
        starts = starts[true_starts]
        n_lasers = starts.shape[0] + 1
        starts = [0] + starts.tolist() + [len(x)]

        include = np.zeros(len(x), dtype=bool)
        stride_sub = get_stride_sub(sin_level)
        lasers_num = range(0, SINFields.LASERS_NUM, stride_sub)
        for laser in lasers_num:
            if laser < n_lasers:
                include[starts[laser]:starts[laser + 1]] = True

        pts_lowres = np.vstack((x[include], y[include], z[include])).T  # N x 3
        pts_lowres = calib_utils.lidar_to_cam_frame(pts_lowres, frame_calib)
        pts_lowres = pts_lowres[
            pts_lowres[:, 2] >
            0]  # Only keep points in front of camera (positive z)
        # Project to image frame
        point_in_im_lowres = calib_utils.project_to_image(
            pts_lowres.T, p=frame_calib.p2).T  # N x 3
        im_size = [image_shape[1], image_shape[0]]
        # Filter based on the given image size
        image_filter_lowres = (point_in_im_lowres[:, 0] > 0) & \
                              (point_in_im_lowres[:, 0] < im_size[0]) & \
                              (point_in_im_lowres[:, 1] > 0) & \
                              (point_in_im_lowres[:, 1] < im_size[1])
        point_cloud_lowres = pts_lowres[image_filter_lowres, :].T
    else:
        include = np.ones(len(x), dtype=bool)

    pts = np.vstack((x, y, z)).T  # N x 3
    pts = calib_utils.lidar_to_cam_frame(pts, frame_calib)
    pts = pts[pts[:, 2] >
              0]  # Only keep points in front of camera (positive z)
    # Project to image frame
    point_in_im = calib_utils.project_to_image(pts.T,
                                               p=frame_calib.p2).T  # N x 3
    im_size = [image_shape[1], image_shape[0]]
    # Filter based on the given image size
    image_filter = (point_in_im[:, 0] > 0) & \
                   (point_in_im[:, 0] < im_size[0]) & \
                   (point_in_im[:, 1] > 0) & \
                   (point_in_im[:, 1] < im_size[1])
    point_cloud = pts[image_filter, :].T
    point_in_im = point_in_im[image_filter, :]

    image_input_sin, point_cloud_sin = genSINtoInputs(
        rgb_image,
        point_cloud,
        sin_type=sin_type,
        sin_level=sin_level,
        sin_input_name=sin_input_name,
        mask_2d=mask_2d,
        frame_calib_p2=frame_calib.p2)
    if sin_type == 'lowres':
        point_cloud_sin = point_cloud_lowres

    fname_out = fname + '_{}_sin_{}_{}'.format(sin_input_name, sin_type,
                                               sin_level)
    if sin_input_name == 'image':
        cv2.imwrite(os.path.join(out_dir, fname + '_image_org.png'),
                    cv_bgr_image)
        cv2.imwrite(os.path.join(out_dir, fname_out + '.png'),
                    image_input_sin[..., ::-1])
    elif sin_input_name == 'lidar':
        # Clip distance with min/max values (for visualization)
        pointsDist = point_cloud[2, :]
        # for i_pt,pdist in enumerate(pointsDist):
        #     pointsDist[i_pt] = D_MAX if pdist>D_MAX \
        #                        else pdist if pdist>D_MIN \
        #                        else D_MIN
        image_w_pts = points_on_img(point_in_im,
                                    pointsDist,
                                    rgb_image,
                                    on_img=on_img)

        point_in_im2 = calib_utils.project_to_image(point_cloud_sin,
                                                    p=frame_calib.p2).T
        # Clip distance with min/max values (for visualization)
        pointsDist2 = point_cloud_sin[2, :]
        # for i_pt,pdist in enumerate(pointsDist2):
        #     pointsDist2[i_pt] =  D_MAX if pdist>D_MAX \
        #                          else pdist if pdist>D_MIN \
        #                          else D_MIN

        image_filter2 = (point_in_im2[:, 0] > 0) & \
                        (point_in_im2[:, 0] < im_size[0]) & \
                        (point_in_im2[:, 1] > 0) & \
                        (point_in_im2[:, 1] < im_size[1])
        point_in_im2 = point_in_im2[image_filter2, :]
        pointsDist2 = pointsDist2[image_filter2]
        image_w_pts2 = points_on_img(point_in_im2,
                                     pointsDist2,
                                     rgb_image,
                                     on_img=on_img)

        cv2.imwrite(os.path.join(out_dir, fname + '_lidar_org.png'),
                    image_w_pts[..., ::-1])
        if on_img:
            cv2.imwrite(os.path.join(out_dir, fname_out + '.png'),
                        image_w_pts2[..., ::-1])
        else:
            cv2.imwrite(os.path.join(out_dir, fname_out + '_black.png'),
                        image_w_pts2[..., ::-1])
    else:
        raise ValueError("Invalid sin_input_name {}".format(sin_input_name))
Exemplo n.º 19
0
def inference(rpn_model_path, detect_model_path, avod_config_path):
    model_config, _, eval_config, dataset_config = \
    config_builder.get_configs_from_pipeline_file(
        avod_config_path, is_training=False)

    # Setup the model
    model_name = model_config.model_name
    # Overwrite repeated field
    model_config = config_builder.proto_to_obj(model_config)
    # Switch path drop off during evaluation
    model_config.path_drop_probabilities = [1.0, 1.0]

    dataset = get_dataset(dataset_config, 'val')

    # run avod proposal network
    rpn_endpoints, sess1, rpn_model = get_proposal_network(model_config, dataset, rpn_model_path)
    end_points, sess2 = get_detection_network(detect_model_path)

    all_prediction = []
    all_id_list = None
    all_2d_boxes = []
    for idx in range(3769):
        feed_dict1 = rpn_model.create_feed_dict()
        kitti_samples = dataset.load_samples([idx])
        sample = kitti_samples[0]
        '''
        if sample[constants.KEY_SAMPLE_NAME] < '001100':
            continue
        if sample[constants.KEY_SAMPLE_NAME] > '001200':
            break
        '''
        start_time = time.time()
        rpn_predictions = sess1.run(rpn_endpoints, feed_dict=feed_dict1)
        top_anchors = rpn_predictions[RpnModel.PRED_TOP_ANCHORS]
        top_proposals = box_3d_encoder.anchors_to_box_3d(top_anchors)
        softmax_scores = rpn_predictions[RpnModel.PRED_TOP_OBJECTNESS_SOFTMAX]

        proposals_and_scores = np.column_stack((top_proposals,
                                                softmax_scores))
        top_img_roi = rpn_predictions[RpnModel.PRED_TOP_IMG_ROI]
        top_bev_roi = rpn_predictions[RpnModel.PRED_TOP_BEV_ROI]
        roi_num = len(top_img_roi)
        top_img_roi = np.reshape(top_img_roi, (roi_num, -1))
        top_bev_roi = np.reshape(top_bev_roi, (roi_num, -1))
        roi_features = np.column_stack((top_img_roi, top_bev_roi))
        '''
        # save proposal
        if os.path.exists(os.path.join('/data/ssd/public/jlliu/Kitti/object/training/proposal', '%s.txt'%(sample[constants.KEY_SAMPLE_NAME]))):
            continue
        np.savetxt(os.path.join('./proposals_and_scores/', '%s.txt'%sample[constants.KEY_SAMPLE_NAME]), proposals_and_scores, fmt='%.3f')
        np.savetxt(os.path.join('./roi_features/', '%s_roi.txt'%sample[constants.KEY_SAMPLE_NAME]), roi_features, fmt='%.5f')
        print('save ' + sample[constants.KEY_SAMPLE_NAME])
        '''
        # run frustum_pointnets_v2
        point_clouds, feature_vec, rot_angle_list, prop_cls_labels = get_pointnet_input(sample, proposals_and_scores, roi_features)
        try:
            prediction = detect_batch(sess2, end_points, point_clouds, feature_vec, rot_angle_list, prop_cls_labels)
        except:
            traceback.print_exc()
            continue

        elapsed_time = time.time() - start_time
        print(sample[constants.KEY_SAMPLE_NAME], elapsed_time)
        # concat all predictions for kitti eval
        id_list = np.ones((len(prediction),)) * int(sample[constants.KEY_SAMPLE_NAME])
        if all_id_list is None:
            all_id_list = id_list
        else:
            all_id_list = np.concatenate((all_id_list, id_list), axis=0)
        for pred in prediction:
            obj = box_3d_encoder.box_3d_to_object_label(np.array(pred[0:7]), obj_type=type_whitelist[pred[8]])
            corners = compute_box_3d(obj)
            projected = calib_utils.project_to_image(corners.T, sample[constants.KEY_STEREO_CALIB_P2])
            x1 = np.amin(projected[0])
            y1 = np.amin(projected[1])
            x2 = np.amax(projected[0])
            y2 = np.amax(projected[1])
            all_2d_boxes.append([x1, y1, x2, y2])
        all_prediction += prediction
        # save result
        pickle.dump({'proposals_and_scores': proposals_and_scores, 'roi_features': roi_features}, open("rpn_out/%s"%sample[constants.KEY_SAMPLE_NAME], "wb"))
        pickle.dump(prediction, open('final_out/%s' % sample[constants.KEY_SAMPLE_NAME], 'wb'))
        visualize(dataset, sample, prediction)
    # for kitti eval
    write_detection_results('./detection_results', all_prediction, all_id_list, all_2d_boxes)
def project_3d_box_to_image_space(box_3d, stereo_calib_p2, image_shape):
    """
    Projects 3D anchors into image space

    Args:
        3d_boxes: list of anchors in anchor format N x [x, y, z,
            l, w, h, ry]
        stereo_calib_p2: stereo camera calibration p2 matrix
        image_shape: dimensions of the image [h, w]

    Returns:
        box_corners: corners in image space - N x [x1, y1, x2, y2]
        box_corners_norm: corners as a percentage of the image size -
            N x [x1, y1, x2, y2]
    """
    if box_3d.shape[1] != 7:
        raise ValueError("Invalid shape for anchors {}, should be "
                         "(N, 6)".format(anchors.shape[1]))

    # Figure out box mins and maxes
    x = (box_3d[:, 0])
    y = (box_3d[:, 1])
    z = (box_3d[:, 2])
    l = (box_3d[:, 3])
    w = (box_3d[:, 4])
    h = (box_3d[:, 5])
    ry = (box_3d[:, 6])

    #dim_x = (anchors[:, 3])
    #dim_y = (anchors[:, 4])
    #dim_z = (anchors[:, 5])

    #dim_x_half = dim_x / 2.
    #dim_z_half = dim_z / 2.

    # Calculate 3D BB corners
    x_corners = np.array([
        x + l / 2.0 * np.cos(ry) + w / 2.0 * np.sin(ry),  # dim_x_half,
        x - l / 2.0 * np.cos(ry) + w / 2.0 * np.sin(ry),  #dim_x_half,
        x - l / 2.0 * np.cos(ry) - w / 2.0 * np.sin(ry),  # dim_x_half,
        x + l / 2.0 * np.cos(ry) - w / 2.0 * np.sin(ry),  # dim_x_half,
        x + l / 2.0 * np.cos(ry) + w / 2.0 * np.sin(ry),  # + dim_x_half,
        x - l / 2.0 * np.cos(ry) + w / 2.0 * np.sin(ry),  #+ dim_x_half,
        x - l / 2.0 * np.cos(ry) - w / 2.0 * np.sin(ry),  #- dim_x_half,
        x + l / 2.0 * np.cos(ry) - w / 2.0 * np.sin(ry)
    ]).T.reshape(1, -1)

    y_corners = np.array([y, y, y, y, y - h, y - h, y - h,
                          y - h]).T.reshape(1, -1)

    z_corners = np.array([
        z - l / 2.0 * np.sin(ry) + w / 2.0 * np.cos(ry),
        z + l / 2.0 * np.sin(ry) + w / 2.0 * np.cos(ry),  # - dim_z_half,
        z + l / 2.0 * np.sin(ry) - w / 2.0 * np.cos(ry),  # - dim_z_half,
        z - l / 2.0 * np.sin(ry) - w / 2.0 * np.cos(ry),  # + dim_z_half,
        z - l / 2.0 * np.sin(ry) + w / 2.0 * np.cos(ry),  #+ dim_z_half,
        z + l / 2.0 * np.sin(ry) + w / 2.0 * np.cos(ry),  #- dim_z_half,
        z + l / 2.0 * np.sin(ry) - w / 2.0 * np.cos(ry),  #- dim_z_half,
        z - l / 2.0 * np.sin(ry) - w / 2.0 * np.cos(ry)
    ]).T.reshape(1, -1)

    anchor_corners = np.vstack([x_corners, y_corners, z_corners])

    # Apply the 2D image plane transformation
    pts_2d = calib_utils.project_to_image(anchor_corners, stereo_calib_p2)

    image_shape_h = image_shape[0]
    image_shape_w = image_shape[1]

    # Get the min and maxes of image coordinates
    # TODO, remove out-of-boundary points
    i_axis_min_points = np.amin(pts_2d[0, :].reshape(-1, 8), axis=1)
    mask_i_min = i_axis_min_points < 0  ## remove negative
    i_axis_min_points[mask_i_min] = 0
    mask_i_min = i_axis_min_points > image_shape_w - 2  ## remove too large ones
    i_axis_min_points[mask_i_min] = image_shape_w - 3

    j_axis_min_points = np.amin(pts_2d[1, :].reshape(-1, 8), axis=1)
    mask_j_min = j_axis_min_points < 0
    j_axis_min_points[mask_j_min] = 0
    mask_j_min = j_axis_min_points > image_shape_h - 2
    j_axis_min_points[mask_j_min] = image_shape_h - 3

    i_axis_max_points = np.amax(pts_2d[0, :].reshape(-1, 8), axis=1)
    mask_i_max = i_axis_max_points > image_shape_w - 1
    i_axis_max_points[mask_i_max] = image_shape_w - 1

    mask_i_max = i_axis_max_points <= 0
    i_axis_max_points[mask_i_max] = 2

    j_axis_max_points = np.amax(pts_2d[1, :].reshape(-1, 8), axis=1)
    mask_j_max = j_axis_max_points > image_shape_h - 1
    j_axis_max_points[mask_j_max] = image_shape_h - 1

    mask_j_max = j_axis_max_points <= 0
    j_axis_max_points[mask_j_max] = 2

    box_corners = np.vstack([
        i_axis_min_points, j_axis_min_points, i_axis_max_points,
        j_axis_max_points
    ]).T

    # Normalize

    image_shape_tiled = [
        image_shape_w, image_shape_h, image_shape_w, image_shape_h
    ]

    box_corners_norm = box_corners / image_shape_tiled

    return np.array(box_corners, dtype=np.float32), \
        np.array(box_corners_norm, dtype=np.float32)
Exemplo n.º 21
0
def project_to_image_space(anchors, stereo_calib_p2, image_shape):
    """
    Projects 3D anchors into image space

    Args:
        anchors: list of anchors in anchor format N x [x, y, z,
            dim_x, dim_y, dim_z]
        stereo_calib_p2: stereo camera calibration p2 matrix
        image_shape: dimensions of the image [h, w]

    Returns:
        box_corners: corners in image space - N x [x1, y1, x2, y2]
        box_corners_norm: corners as a percentage of the image size -
            N x [x1, y1, x2, y2]
    """
    if anchors.shape[1] != 6:
        raise ValueError("Invalid shape for anchors {}, should be "
                         "(N, 6)".format(anchors.shape[1]))

    # Figure out box mins and maxes
    x = (anchors[:, 0])
    y = (anchors[:, 1])
    z = (anchors[:, 2])

    dim_x = (anchors[:, 3])
    dim_y = (anchors[:, 4])
    dim_z = (anchors[:, 5])

    dim_x_half = dim_x / 2.
    dim_z_half = dim_z / 2.

    # Calculate 3D BB corners
    x_corners = np.array([
        x + dim_x_half, x + dim_x_half, x - dim_x_half, x - dim_x_half,
        x + dim_x_half, x + dim_x_half, x - dim_x_half, x - dim_x_half
    ]).T.reshape(1, -1)

    y_corners = np.array(
        [y, y, y, y, y - dim_y, y - dim_y, y - dim_y,
         y - dim_y]).T.reshape(1, -1)

    z_corners = np.array([
        z + dim_z_half, z - dim_z_half, z - dim_z_half, z + dim_z_half,
        z + dim_z_half, z - dim_z_half, z - dim_z_half, z + dim_z_half
    ]).T.reshape(1, -1)

    anchor_corners = np.vstack([x_corners, y_corners, z_corners])

    # Apply the 2D image plane transformation
    pts_2d = calib_utils.project_to_image(anchor_corners, stereo_calib_p2)

    # Get the min and maxes of image coordinates
    i_axis_min_points = np.amin(pts_2d[0, :].reshape(-1, 8), axis=1)
    j_axis_min_points = np.amin(pts_2d[1, :].reshape(-1, 8), axis=1)

    i_axis_max_points = np.amax(pts_2d[0, :].reshape(-1, 8), axis=1)
    j_axis_max_points = np.amax(pts_2d[1, :].reshape(-1, 8), axis=1)

    box_corners = np.vstack([
        i_axis_min_points, j_axis_min_points, i_axis_max_points,
        j_axis_max_points
    ]).T

    # Normalize
    image_shape_h = image_shape[0]
    image_shape_w = image_shape[1]

    image_shape_tiled = [
        image_shape_w, image_shape_h, image_shape_w, image_shape_h
    ]

    box_corners_norm = box_corners / image_shape_tiled

    return np.array(box_corners, dtype=np.float32), \
        np.array(box_corners_norm, dtype=np.float32)
Exemplo n.º 22
0
def project_to_image_space(box_3d,
                           calib_p2,
                           truncate=False,
                           image_size=None,
                           discard_before_truncation=True):
    """ Projects a box_3d into image space

    Args:
        box_3d: single box_3d to project
        calib_p2: stereo calibration p2 matrix
        truncate: if True, 2D projections are truncated to be inside the image
        image_size: [w, h] must be provided if truncate is True,
            used for truncation
        discard_before_truncation: If True, discard boxes that are larger than
            80% of the image in width OR height BEFORE truncation. If False,
            discard boxes that are larger than 80% of the width AND
            height AFTER truncation.

    Returns:
        Projected box in image space [x1, y1, x2, y2]
            Returns None if box is not inside the image
    """

    format_checker.check_box_3d_format(box_3d)

    obj_label = box_3d_encoder.box_3d_to_object_label(box_3d)
    corners_3d = obj_utils.compute_box_corners_3d(obj_label)

    projected = calib_utils.project_to_image(corners_3d, calib_p2)

    x1 = np.amin(projected[0])
    y1 = np.amin(projected[1])
    x2 = np.amax(projected[0])
    y2 = np.amax(projected[1])

    img_box = np.array([x1, y1, x2, y2])

    if truncate:
        if not image_size:
            raise ValueError('Image size must be provided')

        image_w = image_size[0]
        image_h = image_size[1]

        # Discard invalid boxes (outside image space)
        if img_box[0] > image_w or \
                img_box[1] > image_h or \
                img_box[2] < 0 or \
                img_box[3] < 0:
            return None

        # Discard boxes that are larger than 80% of the image width OR height
        if discard_before_truncation:
            img_box_w = img_box[2] - img_box[0]
            img_box_h = img_box[3] - img_box[1]
            if img_box_w > (image_w * 0.8) or img_box_h > (image_h * 0.8):
                return None

        # Truncate remaining boxes into image space
        if img_box[0] < 0:
            img_box[0] = 0
        if img_box[1] < 0:
            img_box[1] = 0
        if img_box[2] > image_w:
            img_box[2] = image_w
        if img_box[3] > image_h:
            img_box[3] = image_h

        # Discard boxes that are covering the the whole image after truncation
        if not discard_before_truncation:
            img_box_w = img_box[2] - img_box[0]
            img_box_h = img_box[3] - img_box[1]
            if img_box_w > (image_w * 0.8) and img_box_h > (image_h * 0.8):
                return None

    return img_box
Exemplo n.º 23
0
def interpolate(point_cloud,
                calib_p2,
                image_shape,
                custom_kernel,
                max_depth,
                show_process=False):
    """Interpolates the lidar point cloud by projecting the points into image
    space, and then applying classical image processing steps. The projected
    image is dilated with a custom kernel, closed, and a bilateral blur is
    applied to smooth the final output depth map.

    :param point_cloud: input point cloud (N, 3)
    :param calib_p2: stereo calibration p2 matrix
    :param image_shape: image shape [h, w]
    :param custom_kernel: custom kernel for initial dilation
    :param max_depth: maximum output depth
    :param show_process: (optional) flag to return image processing steps

    :return final_depths: interpolated lidar depth map
    :return process_dict: if show_process is True, this is an OrderedDict
        with entries showing the image processing steps, None otherwise
    """

    all_points = point_cloud.T

    # Save the depth corresponding to each point
    point_in_im = calib_utils.project_to_image(all_points.T, p=calib_p2).T
    point_in_im_rounded = np.int32(np.floor(point_in_im))

    # Invert depths
    all_points[:, 2] = max_depth - all_points[:, 2]

    # Vectorized version
    all_depths = np.zeros(image_shape)
    valid_indices = [point_in_im_rounded[:, 1], point_in_im_rounded[:, 0]]
    all_depths[valid_indices] = [
        max(
            all_depths[point_in_im_rounded[idx, 1],
                       point_in_im_rounded[idx, 0]], all_points[idx, 2])
        for idx in range(len(all_points))
    ]

    # Loop version (obsolete, keeping to show logic and as a backup)
    # all_depths = np.zeros((image_shape[1], image_shape[0]))
    # for point_idx in range(len(point_in_im_rounded)):
    #     map_x = point_in_im_rounded[point_idx, 1]
    #     map_y = point_in_im_rounded[point_idx, 0]
    #
    #     point_depth = all_points[point_idx, 2]
    #
    #     # Keep the farther distance for overlapping points
    #     if all_depths[map_x, map_y] > 0.0:
    #         all_depths[map_x, map_y] = \
    #             max(all_depths[map_x, map_y], point_depth)
    #     else:
    #         all_depths[map_x, map_y] = point_depth
    #
    #     # Clip to specified maximum depth
    #     all_depths[map_x, map_y] = \
    #         np.minimum(all_depths[map_x, map_y], max_depth)

    # Fill in the depth map
    lidar_depths = np.float32(all_depths)

    # Operations
    depths_in = lidar_depths
    dilated_depths = cv2.dilate(depths_in, custom_kernel)
    closed_depths = cv2.morphologyEx(dilated_depths, cv2.MORPH_CLOSE,
                                     FULL_KERNEL_5)
    blurred_depths = cv2.bilateralFilter(closed_depths, 3, 1, 2)
    depths_out = blurred_depths

    # Save final version to final_depths variable to be used later
    final_depths = depths_out.copy()

    # Invert
    valid_pixels = np.where(final_depths > 0.5)
    valid_pixels = np.asarray(valid_pixels).T
    final_depths[valid_pixels[:, 0], valid_pixels[:, 1]] = \
        max_depth - final_depths[valid_pixels[:, 0], valid_pixels[:, 1]]

    process_dict = None
    if show_process:
        process_dict = collections.OrderedDict()
        process_dict['lidar_depths'] = lidar_depths

        process_dict['dilated_depths'] = dilated_depths
        process_dict['closed_depths'] = closed_depths
        process_dict['blurred_depths'] = blurred_depths

        process_dict['final_depths'] = final_depths

    return final_depths, process_dict
Exemplo n.º 24
0
def get_lidar_point_cloud_sub(img_idx,
                              calib_dir,
                              velo_dir,
                              im_size=None,
                              min_intensity=None,
                              stride_sub=1):
    """ Calculates the lidar point cloud, and optionally returns only the
    points that are projected to the image.

    :param img_idx: image index
    :param calib_dir: directory with calibration files
    :param velo_dir: directory with velodyne files
    :param im_size: (optional) 2 x 1 list containing the size of the image
                      to filter the point cloud [w, h]
    :param min_intensity: (optional) minimum intensity required to keep a point

    :return: (3, N) point_cloud in the form [[x,...][y,...][z,...]]
    """

    # Read calibration info
    frame_calib = calib_utils.read_calibration(calib_dir, img_idx)
    x, y, z, i = calib_utils.read_lidar(velo_dir=velo_dir, img_idx=img_idx)

    starts = np.where(np.diff(np.sign(y)) > 0)[0] + 1
    true_starts = np.append(np.diff(starts) > 2, [True])
    starts = starts[true_starts]
    n_lasers = starts.shape[0] + 1
    starts = [0] + starts.tolist() + [len(x)]

    include = np.zeros(len(x), dtype=bool)
    lasers_num = range(0, SINFields.LASERS_NUM, stride_sub)
    for laser in lasers_num:
        if laser < n_lasers:
            include[starts[laser]:starts[laser + 1]] = True

    i = i[include]

    # Calculate the point cloud
    pts = np.vstack((x[include], y[include], z[include])).T
    pts = calib_utils.lidar_to_cam_frame(pts, frame_calib)

    # The given image is assumed to be a 2D image
    if not im_size:
        point_cloud = pts.T
        return point_cloud

    else:
        # Only keep points in front of camera (positive z)
        pts = pts[pts[:, 2] > 0]
        point_cloud = pts.T

        # Project to image frame
        point_in_im = calib_utils.project_to_image(point_cloud,
                                                   p=frame_calib.p2).T

        # Filter based on the given image size
        image_filter = (point_in_im[:, 0] > 0) & \
                       (point_in_im[:, 0] < im_size[0]) & \
                       (point_in_im[:, 1] > 0) & \
                       (point_in_im[:, 1] < im_size[1])

    if not min_intensity:
        return pts[image_filter].T

    else:
        intensity_filter = i > min_intensity
        point_filter = np.logical_and(image_filter, intensity_filter)
        return pts[point_filter].T