示例#1
0
def project_img_to_point_cloud(points, image, calib_dir, img_idx):
    """ Projects image colours to point cloud points

    Arguments:
        points (N by [x,y,z]): list of points where N is
            the number of points
        image (X by Y by [r,g,b]): colour values in image space
        calib_dir (str): calibration directory
        img_idx (int): index of the requested image

    Returns:
        [N by [r,g,b]]: Matrix of colour codes. Indices of colours correspond
            to the indices of the points in the 'points' argument

    """
    # Save the pixel colour corresponding to each point
    frame_calib = calib.read_calibration(calib_dir, img_idx)
    point_in_im = calib.project_to_image(points.T, p=frame_calib.p2).T
    point_in_im_rounded = np.floor(point_in_im)
    point_in_im_rounded = point_in_im_rounded.astype(np.int32)

    point_colours = []
    for point in point_in_im_rounded:
        point_colours.append(image[point[1], point[0], :])

    point_colours = np.asanyarray(point_colours)

    return point_colours
示例#2
0
def main():

    test_pipeline_config_path = mlod.root_dir() + \
        '/data/configs/official/cars/cars_000_vanilla.config'
    model_config, train_config, _, dataset_config = \
        config_builder_util.get_configs_from_pipeline_file(
            test_pipeline_config_path, is_training=True)

    # train_val_test = 'val'
    # dataset_config.data_split = 'val'

    train_val_test = 'test'
    dataset_config.data_split = 'trainval'
    dataset_config.data_split_dir = 'training'
    dataset_config.has_labels = False

    # dataset_config.cache_config.cache_images = True
    # dataset_config.cache_config.cache_depth_maps = True

    dataset = DatasetBuilder.build_kitti_dataset(dataset_config,
                                                 use_defaults=False)
    kitti_utils = dataset.kitti_utils

    bev_source = 'lidar'
    # sample_name = '000000'
    # img_idx = np.random.randint(0, 1000)
    # sample_name = '{:06d}'.format(img_idx)

    num_samples = 200

    all_load_times = []
    all_bev_times = []
    for sample_idx in range(num_samples):
        sys.stdout.write('\rSample {} / {}'.format(sample_idx,
                                                   num_samples - 1))

        img_idx = sample_idx
        sample_name = '{:06d}'.format(sample_idx)

        loading_start_time = time.time()
        # Load image
        image = cv2.imread(dataset.get_rgb_image_path(sample_name))
        image_shape = image.shape[0:2]
        calib_p2 = calib_utils.read_calibration(dataset.calib_dir, img_idx)

        point_cloud = kitti_utils.get_point_cloud(bev_source, int(sample_name),
                                                  image_shape)
        ground_plane = kitti_utils.get_ground_plane(sample_name)
        all_load_times.append(time.time() - loading_start_time)

        bev_start_time = time.time()
        bev_maps = kitti_utils.create_bev_maps(point_cloud, ground_plane)
        bev_end_time = time.time()
        all_bev_times.append(bev_end_time - bev_start_time)

    print('')
    print('Load mean:', np.mean(all_load_times))
    print('Load median:', np.median(all_load_times))
    print('BEV mean:', np.mean(all_bev_times))
    print('BEV median:', np.median(all_bev_times))
示例#3
0
    def load_samples(self, indices):
        sample_dicts = []
        for sample_idx in indices:
            sample = self.sample_list[sample_idx]
            sample_name = sample.name

            if self.has_labels:
                obj_labels = obj_utils.read_labels(self.label_dir,
                                                   int(sample_name))

                label_classes, label_boxes_3d, label_boxes_2d = self.parse_obj_labels(
                    obj_labels, self.label_map)
            else:
                obj_labels = None
                label_classes = np.zeros(1)
                label_boxes_2d = np.zeros((1, 4))
                label_boxes_3d = np.zeros((1, 7))

            # image
            cv_bgr_image = cv2.imread(self.get_rbg_image_path(
                int(sample_name)))
            rgb_image = cv_bgr_image[..., ::-1]
            im_shape = rgb_image.shape[0:2]
            image_input = rgb_image

            # calibration
            stereo_calib_p2 = calib_utils.read_calibration(
                self.calib_dir, int(sample_name)).p2

            # point cloud
            # just project point to camera frame and then keep point in front of image
            point_cloud = obj_utils.get_lidar_point_cloud(int(sample_name),
                                                          self.calib_dir,
                                                          self.velo_dir,
                                                          im_size=im_shape)

            #################################
            # Data Augmentation
            #################################
            if kitti_aug.AUG_FLIPPING in sample.augs:
                pass

            if kitti_aug.AUG_PCA_JITTER in sample.augs:
                pass

            sample_dict = {
                constants.KEY_IMAGE_INPUT: image_input,
                constants.KEY_POINT_CLOUD: point_cloud,
                constants.KEY_LABEL_CLASSES: label_classes,
                constants.KEY_LABEL_BOXES_2D: label_boxes_2d,
                constants.KEY_LABEL_BOXES_3D: label_boxes_3d,
                constants.KEY_STEREO_CALIB_P2: stereo_calib_p2
            }

            sample_dicts.append(sample_dict)
        return sample_dicts
示例#4
0
def get_lidar_point_cloud_with_color(img_idx,
                                     img_dir,
                                     calib_dir,
                                     velo_dir,
                                     im_size=None):
    """ Calculates the lidar point cloud, and optionally returns only the
    points that are projected to the image.

    :param img_idx: image index
    :param calib_dir: directory with calibration files
    :param velo_dir: directory with velodyne files
    :param im_size: (optional) 2 x 1 list containing the size of the image
                      to filter the point cloud [w, h]
    :param min_intensity: (optional) minimum intensity required to keep a point

    :return: (3, N) point_cloud in the form [[x,...][y,...][z,...]]
    """

    # Read calibration info
    frame_calib = calib_utils.read_calibration(calib_dir, img_idx)
    x, y, z, i = calib_utils.read_lidar(velo_dir=velo_dir, img_idx=img_idx)

    # Calculate the point cloud
    pts = np.vstack((x, y, z)).T
    pts = calib_utils.lidar_to_cam_frame(pts, frame_calib)

    # The given image is assumed to be a 2D image
    if not im_size:
        point_cloud = pts.T
        return point_cloud

    else:
        # Only keep points in front of camera (positive z)
        pts = pts[pts[:, 2] > 0]
        point_cloud = pts.T

        # Project to image frame
        point_in_im = calib_utils.project_to_image(point_cloud,
                                                   p=frame_calib.p2).T

        # Filter based on the given image size
        image_filter = (point_in_im[:, 0] > 0) & \
                       (point_in_im[:, 0] < im_size[0]) & \
                       (point_in_im[:, 1] > 0) & \
                       (point_in_im[:, 1] < im_size[1])

        img_dir = img_dir + "/%06d.png" % img_idx
        img = Image.open(img_dir)
        img = np.array(img)
        point_colors = img[point_in_im[image_filter, 1].astype(np.int),
                           point_in_im[image_filter, 0].astype(np.int)]

    # return np.vstack((pts[image_filter].T, point_colors[image_filter].T))
    return pts[image_filter].T, point_colors.T
示例#5
0
    def test_depth_from_disparity(self):
        # Just to check if method works without errors.
        calib_dir = ROOTDIR + '/tests/test_data/calib'
        disp_dir = ROOTDIR + '/tests/test_data'
        img_idx = 1
        disp = calib.read_disparity(disp_dir, img_idx)
        frame_calib = calib.read_calibration(calib_dir, img_idx)
        stereo_calibration_info = calib.get_stereo_calibration(frame_calib.p2,
                                                               frame_calib.p3)

        x, y, z = calib.depth_from_disparity(disp, stereo_calibration_info)
示例#6
0
def get_lidar_point_cloud(img_idx, calib_dir, velo_dir,
                          im_size=None, min_intensity=None):
    """ Calculates the lidar point cloud, and optionally returns only the
    points that are projected to the image.

    :param img_idx: image index
    :param calib_dir: directory with calibration files
    :param velo_dir: directory with velodyne files
    :param im_size: (optional) 2 x 1 list containing the size of the image
                      to filter the point cloud [w, h]
    :param min_intensity: (optional) minimum intensity required to keep a point

    :return: (3, N) point_cloud in the form [[x,...][y,...][z,...]]
    """

    # Read calibration info
    frame_calib = calib_utils.read_calibration(calib_dir, img_idx)#读取calib文件信息并保存到对象中
    x, y, z, i = calib_utils.read_lidar(velo_dir=velo_dir, img_idx=img_idx)#从文件读取点云数据的x,y,z,和密度

    # Calculate the point cloud
    pts = np.vstack((x, y, z)).T#点云位置信息
    pts = calib_utils.lidar_to_cam_frame(pts, frame_calib)#点云投射到相机坐标

    # The given image is assumed to be a 2D image
    if not im_size:
        point_cloud = pts.T
        return point_cloud

    else:
        # Only keep points in front of camera (positive z) 相机坐标是z轴,已经投影到相机坐标了
        pts = pts[pts[:, 2] > 0]
        point_cloud = pts.T

        # Project to image frame #投影到像素坐标
        point_in_im = calib_utils.project_to_image(point_cloud, p=frame_calib.p2).T

        # Filter based on the given image size 保留在图片范围的点云,坐标在相机坐标系下
        image_filter = (point_in_im[:, 0] > 0) & \
                       (point_in_im[:, 0] < im_size[0]) & \
                       (point_in_im[:, 1] > 0) & \
                       (point_in_im[:, 1] < im_size[1])#索引值

    if not min_intensity:
        return pts[image_filter].T

    else:
        intensity_filter = i > min_intensity
        point_filter = np.logical_and(image_filter, intensity_filter)
        return pts[point_filter].T
示例#7
0
    def test_read_calibration(self):
        # The test file used for calibration is 724513.txt
        test_data_dir = ROOTDIR + "/tests/test_data/calib/"
        calib_out = calib.read_calibration(test_data_dir, 724513)
        test_data_dir = ROOTDIR + "/tests/test_data/"
        calib_true = scipy.io.loadmat(test_data_dir+'readcalib.mat')

        np.testing.assert_almost_equal(calib_out.p0, calib_true['p0'])
        np.testing.assert_almost_equal(calib_out.p1, calib_true['p1'])
        np.testing.assert_almost_equal(calib_out.p2, calib_true['p2'])
        np.testing.assert_almost_equal(calib_out.p3, calib_true['p3'])
        np.testing.assert_almost_equal(calib_out.r0_rect, calib_true['r0_rect'])

        np.testing.assert_almost_equal(calib_out.tr_velodyne_to_cam,
                                       calib_true['tr_velo_to_cam'])
示例#8
0
    def test_project_to_image_space_tensors(self):

        anchors = np.asarray([[0, 0, 3, 2, 0, 6], [3, 0, 3, 2, 0, 2]],
                             dtype=np.float64)
        img_idx = int('000217')
        img_shape = [375, 1242]

        dataset_config = DatasetBuilder.copy_config(
            DatasetBuilder.KITTI_UNITTEST)

        dataset_config.data_split = 'train'
        dataset_config.dataset_dir = tests.test_path() + \
            "/datasets/Kitti/object"

        dataset = DatasetBuilder().build_kitti_dataset(dataset_config)

        stereo_calib_p2 = calib_utils.read_calibration(dataset.calib_dir,
                                                       img_idx).p2

        # Project the 3D points in numpy space
        img_corners, img_corners_norm = anchor_projector.project_to_image_space(
            anchors, stereo_calib_p2, img_shape)

        # convert the required params to tensors
        tf_stereo_calib_p2 = tf.convert_to_tensor(stereo_calib_p2,
                                                  dtype=tf.float32)
        tf_anchors = tf.convert_to_tensor(anchors, dtype=tf.float32)
        tf_img_shape = tf.convert_to_tensor(img_shape, dtype=tf.float32)

        # Project the 3D points in tensor space
        img_corners_tensor, img_corners_norm_tensor = \
            anchor_projector.tf_project_to_image_space(tf_anchors,
                                                       tf_stereo_calib_p2,
                                                       tf_img_shape)

        sess = tf.Session()
        with sess.as_default():
            img_corners_out = img_corners_tensor.eval()
            img_corners_norm_out = img_corners_norm_tensor.eval()
            np.testing.assert_allclose(img_corners,
                                       img_corners_out,
                                       atol=1e-04,
                                       err_msg='Incorrect corner projection')
            np.testing.assert_allclose(
                img_corners_norm,
                img_corners_norm_out,
                atol=1e-04,
                err_msg='Incorrect normalized corner projection')
示例#9
0
    def _project_and_show(self, sample_name, point_cloud, color, title):
        "将点云投影到像素坐标,并在对应的图像中显示"
        img_idx = int(sample_name)
        img = Image.open(self.dataset.get_rgb_image_path(sample_name))
        img_array = np.array(
            img)  #np.array(默认情况下)将会copy该对象,而np.asarray除非必要,否则不会copy该对象

        frame_calib = calib_utils.read_calibration(
            self.dataset.calib_dir, img_idx)  #读取calib文件信息并保存到对象中
        point_in_im = calib_utils.project_to_image(point_cloud,
                                                   p=frame_calib.p2).T
        point_in_im = point_in_im[:, [1, 0]]
        point_in_im = point_in_im.astype(int)
        img_array[point_in_im[:, 0],
                  point_in_im[:, 1], :] = ImageColor.getrgb(color)  #相当于zip

        img = Image.fromarray(img_array)
        img.show()
示例#10
0
    def test_read_lidar(self):
        test_data_dir = ROOTDIR + "/tests/test_data/calib"
        velo_mat = scipy.io.loadmat(test_data_dir + '/test_velo.mat')
        velo_true = velo_mat['current_frame']['xyz_velodyne'][0][0][:,0:3]

        x, y, z, i = calib.read_lidar(velo_dir=test_data_dir,
                                      img_idx=0)

        velo_test = np.vstack((x, y, z)).T
        np.testing.assert_almost_equal(velo_true, velo_test, decimal=5, verbose=True)

        velo_mat = scipy.io.loadmat(test_data_dir + '/test_velo_tf.mat')
        velo_true_tf = velo_mat['velo_cam_frame']

        calib_out = calib.read_calibration(test_data_dir, 0)
        xyz_cam = calib.lidar_to_cam_frame(velo_test, calib_out)

        np.testing.assert_almost_equal(velo_true_tf, xyz_cam, decimal=5, verbose=True)
示例#11
0
def get_depth_map_point_cloud(img_idx, calib_dir, depth_dir, im_size):
    """ Calculates the point cloud from a depth map

    :param img_idx: image index
    :param calib_dir: directory with calibration files
    :param depth_dir: directory with depth maps
    :param im_size: size of the image [h, w]

    :return: (3, N) point_cloud in the form [[x,...][y,...][z,...]]
    """
    depth_map = depth_map_utils.get_depth_map(img_idx, depth_dir)

    # Calculate point cloud from depth map
    frame_calib = calib.read_calibration(calib_dir, img_idx)
    stereo_calibration_info = calib.get_stereo_calibration(
        frame_calib.p2, frame_calib.p3)

    # Calculate points from depth map
    depth_map_flattened = depth_map.flatten()
    xx, yy = np.meshgrid(np.arange(1, im_size[0] + 1, 1),
                         np.arange(1, im_size[1] + 1, 1))
    xx = xx.flatten() - stereo_calibration_info.center_u
    yy = yy.flatten() - stereo_calibration_info.center_v

    temp = np.divide(depth_map_flattened, stereo_calibration_info.f)
    x = np.multiply(xx, temp)
    y = np.multiply(yy, temp)
    z = depth_map_flattened

    # Get x offset (b_cam) from calibration: cam_mat[0, 3] = (-f_x * b_cam)
    x_offset = -stereo_calibration_info.p[0, 3] / stereo_calibration_info.f

    point_cloud = np.asarray([x + x_offset, y, z])
    points = point_cloud.T

    # Filter points to image frame
    point_in_im = calib.project_to_image(points.T, p=frame_calib.p2).T
    image_filter = \
        (point_in_im[:, 0] > 0) & (point_in_im[:, 0] < im_size[0]) & \
        (point_in_im[:, 1] > 0) & (point_in_im[:, 1] < im_size[1])
    filtered_point_cloud = points[image_filter].T

    return filtered_point_cloud
示例#12
0
def create_framecalib(from_pandora=True):
    # These values are collected from the ROS calibration matrix
    if from_pandora:
        frame_calib = FrameCalibrationData()
        p2 = [
            1275.28898946, 0.0, 622.0, 0.0, 0.0, 725.783914414, 185.0, 0.0,
            0.0, 0.0, 1.0, 0.0
        ]
        p2 = np.reshape(p2, (3, 4))
        tr_velodyne_to_cam = [0, -1, 0, 0, 0, 0, -1, 0, 1, 0, 0, 0]
        tr_velodyne_to_cam = np.reshape(tr_velodyne_to_cam, (3, 4))
        r0 = [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0]

        frame_calib.p2 = p2
        frame_calib.tr_velodyne_to_cam = tr_velodyne_to_cam
        frame_calib.r0_rect = np.reshape(r0, (3, 3))
        return frame_calib
    else:
        # This is the correct form if testing with kitti data
        return calib_utils.read_calibration(
            "/notebooks/DATA/Kitti/object/testing/calib", 1)
示例#13
0
def main():
    # Start of the Kitti demo code
    print('=== Python Kitti Wrapper Demo ===')

    # Setting Paths
    data_set = 'training'
    cam = 2

    root_dir = '/notebooks/DATA/Kitti/object/'  #os.path.expanduser('~') + '/Kitti/object/'

    image_dir = os.path.join(root_dir, data_set) + '/image_' + str(cam)
    label_dir = os.path.join(root_dir, data_set) + '/label_' + str(cam)
    calib_dir = os.path.join(root_dir, data_set) + '/calib'

    img_idx = int(random.random() * 100)
    print('img_idx', img_idx)

    # Run Visualization Function
    f, ax1, ax2 = vis_utils.visualization(image_dir, img_idx)

    # Run the main loop to run throughout the images
    frame_calibration_info = calib_utils.read_calibration(calib_dir, img_idx)

    p = frame_calibration_info.p2

    # Load labels
    objects = obj_utils.read_labels(label_dir, img_idx)

    # For all annotated objects
    for obj in objects:

        # Draw 2D and 3D boxes
        vis_utils.draw_box_2d(ax1, obj)
        vis_utils.draw_box_3d(ax2, obj, p)

    # Render results
    plt.draw()
    plt.show()
示例#14
0
def project_flipped_img_to_point_cloud(points, image_flipped, calib_dir,
                                       img_idx):
    """ Projects image colours to point cloud points

    Arguments:
        points (N by [x,y,z]): list of points where N is
            the number of points
        image (Y by X by [r,g,b]): colour values in image space
        calib_dir (str): calibration directory
        img_idx (int): index of the requested image

    Returns:
        [N by [r,g,b]]: Matrix of colour codes. Indices of colours correspond
            to the indices of the points in the 'points' argument

    """
    # Save the pixel colour corresponding to each point
    frame_calib = calib_utils.read_calibration(calib_dir, img_idx)

    # Fix flipped p2 matrix
    flipped_p2 = np.copy(frame_calib.p2)
    flipped_p2[0, 2] = image_flipped.shape[1] - flipped_p2[0, 2]
    flipped_p2[0, 3] = -flipped_p2[0, 3]

    # Use fixed matrix
    point_in_im = calib_utils.project_to_image(points.T, p=flipped_p2).T

    point_in_im_rounded = np.floor(point_in_im)
    point_in_im_rounded = point_in_im_rounded.astype(np.int32)

    # image_shape = image_flipped.shape
    point_colours = []
    for point in point_in_im_rounded:
        point_colours.append(image_flipped[point[1], point[0], :])

    point_colours = np.asanyarray(point_colours)

    return point_colours
示例#15
0
    def test_compute_box_3d(self):
        # read in calib file and label file and mat file
        calib_frame = calib.read_calibration(self.test_data_calib_dir, 724513)
        objects = obj_utils.read_labels(self.test_data_label_dir, 5258)
        label_true = scipy.io.loadmat(self.test_data_dir + '/compute3d.mat')

        # compute
        corners_3d = obj_utils.compute_box_corners_3d(objects[0])
        corners, face_idx = obj_utils.project_box3d_to_image(
            corners_3d, calib_frame.p2)
        # compare data
        np.testing.assert_almost_equal(corners, label_true['corners'])

        orientation = obj_utils.compute_orientation_3d(objects[0], calib_frame.p2)

        # -1 for index in python vs matlab
        self.assertTrue((face_idx == label_true['face_idx']-1).all())

        # Test orientation
        self.assertAlmostEqual(orientation.all(),
                               label_true['orientation'].all())

        return
示例#16
0
def get_stereo_point_cloud(img_idx, calib_dir, disp_dir):
    """
    Gets the point cloud for an image calculated from the disparity map

    :param img_idx: image index
    :param calib_dir: directory with calibration files
    :param disp_dir: directory with disparity images

    :return: (3, N) point_cloud in the form [[x,...][y,...][z,...]]
    """

    disp = cv2.imread(disp_dir + "/%06d_left_disparity.png" % img_idx,
                      cv2.IMREAD_ANYDEPTH)

    # Read calibration info
    frame_calib = calib.read_calibration(calib_dir, img_idx)
    stereo_calibration_info = calib.get_stereo_calibration(
        frame_calib.p2, frame_calib.p3)

    # Calculate the point cloud
    point_cloud = calib.depth_from_disparity(disp, stereo_calibration_info)

    return point_cloud
def main():
    """This demo shows RPN proposals and AVOD predictions in 3D
    and 2D in image space. Given certain thresholds for proposals
    and predictions, it selects and draws the bounding boxes on
    the image sample. It goes through the entire proposal and
    prediction samples for the given dataset split.

    The proposals, overlaid, and prediction images can be toggled on or off
    separately in the options section.
    The prediction score and IoU with ground truth can be toggled on or off
    as well, shown as (score, IoU) above the detection.
    """
    dataset_config = DatasetBuilder.copy_config(DatasetBuilder.KITTI_VAL)

    ##############################
    # Options
    ##############################
    dataset_config.data_split = 'val'

    fig_size = (10, 6.1)

    rpn_score_threshold = 0.1
    avod_score_threshold = 0.1

    # gt_classes = ['Car']
    gt_classes = ['Pedestrian', 'Cyclist']
    # gt_classes = ['Car', 'Pedestrian', 'Cyclist']

    # Overwrite this to select a specific checkpoint
    global_step = None
    checkpoint_name = sys.argv[1]  #'pyramid_cars_with_aug_example'

    # Drawing Toggles
    draw_proposals_separate = False
    draw_overlaid = False
    draw_predictions_separate = True

    # Show orientation for both GT and proposals/predictions
    draw_orientations_on_prop = False
    draw_orientations_on_pred = False

    # Draw 2D bounding boxes
    draw_projected_2d_boxes = True

    # Save images for samples with no detections
    save_empty_images = True

    draw_score = True
    draw_iou = True
    ##############################
    # End of Options
    ##############################

    # Get the dataset
    dataset = DatasetBuilder.build_kitti_dataset(dataset_config)

    # Setup Paths
    predictions_dir = avod.root_dir() + \
        '/data/outputs/' + checkpoint_name + '/predictions'

    proposals_and_scores_dir = predictions_dir + \
        '/proposals_and_scores/' + dataset.data_split

    predictions_and_scores_dir = predictions_dir + \
        '/final_predictions_and_scores/' + dataset.data_split

    # Output images directories
    output_dir_base = predictions_dir + '/images_2d'

    # Get checkpoint step
    steps = os.listdir(proposals_and_scores_dir)
    steps.sort(key=int)
    print('Available steps: {}'.format(steps))

    # Use latest checkpoint if no index provided
    if global_step is None:
        global_step = steps[-1]

    if draw_proposals_separate:
        prop_out_dir = output_dir_base + '/proposals/{}/{}/{}'.format(
            dataset.data_split, global_step, rpn_score_threshold)

        if not os.path.exists(prop_out_dir):
            os.makedirs(prop_out_dir)

        print('Proposal images saved to:', prop_out_dir)

    if draw_overlaid:
        overlaid_out_dir = output_dir_base + '/overlaid/{}/{}/{}'.format(
            dataset.data_split, global_step, avod_score_threshold)

        if not os.path.exists(overlaid_out_dir):
            os.makedirs(overlaid_out_dir)

        print('Overlaid images saved to:', overlaid_out_dir)

    if draw_predictions_separate:
        pred_out_dir = output_dir_base + '/predictions/{}/{}/{}'.format(
            dataset.data_split, global_step, avod_score_threshold)

        if not os.path.exists(pred_out_dir):
            os.makedirs(pred_out_dir)

        print('Prediction images saved to:', pred_out_dir)

    # Rolling average array of times for time estimation
    avg_time_arr_length = 10
    last_times = np.repeat(time.time(), avg_time_arr_length) + \
        np.arange(avg_time_arr_length)

    for sample_idx in range(dataset.num_samples):
        # Estimate time remaining with 5 slowest times
        start_time = time.time()
        last_times = np.roll(last_times, -1)
        last_times[-1] = start_time
        avg_time = np.mean(np.sort(np.diff(last_times))[-5:])
        samples_remaining = dataset.num_samples - sample_idx
        est_time_left = avg_time * samples_remaining

        # Print progress and time remaining estimate
        sys.stdout.write('\rSaving {} / {}, Avg Time: {:.3f}s, '
                         'Time Remaining: {:.2f}s'.format(
                             sample_idx + 1, dataset.num_samples, avg_time,
                             est_time_left))
        sys.stdout.flush()

        sample_name = dataset.sample_names[sample_idx]
        img_idx = int(sample_name)

        ##############################
        # Proposals
        ##############################
        if draw_proposals_separate or draw_overlaid:
            # Load proposals from files
            proposals_file_path = proposals_and_scores_dir + \
                "/{}/{}.txt".format(global_step, sample_name)
            if not os.path.exists(proposals_file_path):
                print('Sample {}: No proposals, skipping'.format(sample_name))
                continue
            print('Sample {}: Drawing proposals'.format(sample_name))

            proposals_and_scores = np.loadtxt(proposals_file_path)

            proposal_boxes_3d = proposals_and_scores[:, 0:7]
            proposal_scores = proposals_and_scores[:, 7]

            # Apply score mask to proposals
            score_mask = proposal_scores > rpn_score_threshold
            proposal_boxes_3d = proposal_boxes_3d[score_mask]
            proposal_scores = proposal_scores[score_mask]

            proposal_objs = \
                [box_3d_encoder.box_3d_to_object_label(proposal,
                                                       obj_type='Proposal')
                 for proposal in proposal_boxes_3d]

        ##############################
        # Predictions
        ##############################
        if draw_predictions_separate or draw_overlaid:
            predictions_file_path = predictions_and_scores_dir + \
                "/{}/{}.txt".format(global_step,
                                    sample_name)
            if not os.path.exists(predictions_file_path):
                continue

            # Load predictions from files
            predictions_and_scores = np.loadtxt(
                predictions_and_scores_dir +
                "/{}/{}.txt".format(global_step, sample_name))

            prediction_boxes_3d = predictions_and_scores[:, 0:7]
            prediction_scores = predictions_and_scores[:, 7]
            prediction_class_indices = predictions_and_scores[:, 8]

            # process predictions only if we have any predictions left after
            # masking
            if len(prediction_boxes_3d) > 0:

                # Apply score mask
                avod_score_mask = prediction_scores >= avod_score_threshold
                prediction_boxes_3d = prediction_boxes_3d[avod_score_mask]
                prediction_scores = prediction_scores[avod_score_mask]
                prediction_class_indices = \
                    prediction_class_indices[avod_score_mask]

                # # Swap l, w for predictions where w > l
                # swapped_indices = \
                #     prediction_boxes_3d[:, 4] > prediction_boxes_3d[:, 3]
                # prediction_boxes_3d = np.copy(prediction_boxes_3d)
                # prediction_boxes_3d[swapped_indices, 3] = \
                #     prediction_boxes_3d[swapped_indices, 4]
                # prediction_boxes_3d[swapped_indices, 4] = \
                #     prediction_boxes_3d[swapped_indices, 3]

        ##############################
        # Ground Truth
        ##############################

        # Get ground truth labels
        if dataset.has_labels:
            gt_objects = obj_utils.read_labels(dataset.label_dir, img_idx)
        else:
            gt_objects = []

        # Filter objects to desired difficulty
        filtered_gt_objs = dataset.kitti_utils.filter_labels(
            gt_objects, classes=gt_classes)

        boxes2d, _, _ = obj_utils.build_bbs_from_objects(
            filtered_gt_objs, class_needed=gt_classes)

        image_path = dataset.get_rgb_image_path(sample_name)
        image = Image.open(image_path)
        image_size = image.size

        # Read the stereo calibration matrix for visualization
        stereo_calib = calib_utils.read_calibration(dataset.calib_dir, img_idx)
        calib_p2 = stereo_calib.p2

        ##############################
        # Reformat and prepare to draw
        ##############################
        if draw_proposals_separate or draw_overlaid:
            proposals_as_anchors = box_3d_encoder.box_3d_to_anchor(
                proposal_boxes_3d)

            proposal_boxes, _ = anchor_projector.project_to_image_space(
                proposals_as_anchors, calib_p2, image_size)

            num_of_proposals = proposal_boxes_3d.shape[0]

            prop_fig, prop_2d_axes, prop_3d_axes = \
                vis_utils.visualization(dataset.rgb_image_dir,
                                        img_idx,
                                        display=False)

            draw_proposals(filtered_gt_objs, calib_p2, num_of_proposals,
                           proposal_objs, proposal_boxes, prop_2d_axes,
                           prop_3d_axes, draw_orientations_on_prop)

            if draw_proposals_separate:
                # Save just the proposals
                filename = prop_out_dir + '/' + sample_name + '.png'
                plt.savefig(filename)

                if not draw_overlaid:
                    plt.close(prop_fig)

        if draw_overlaid or draw_predictions_separate:
            if len(prediction_boxes_3d) > 0:
                # Project the 3D box predictions to image space
                image_filter = []
                final_boxes_2d = []
                for i in range(len(prediction_boxes_3d)):
                    box_3d = prediction_boxes_3d[i, 0:7]
                    img_box = box_3d_projector.project_to_image_space(
                        box_3d,
                        calib_p2,
                        truncate=True,
                        image_size=image_size,
                        discard_before_truncation=False)
                    if img_box is not None:
                        image_filter.append(True)
                        final_boxes_2d.append(img_box)
                    else:
                        image_filter.append(False)
                final_boxes_2d = np.asarray(final_boxes_2d)
                final_prediction_boxes_3d = prediction_boxes_3d[image_filter]
                final_scores = prediction_scores[image_filter]
                final_class_indices = prediction_class_indices[image_filter]

                num_of_predictions = final_boxes_2d.shape[0]

                # Convert to objs
                final_prediction_objs = \
                    [box_3d_encoder.box_3d_to_object_label(
                        prediction, obj_type='Prediction')
                        for prediction in final_prediction_boxes_3d]
                for (obj, score) in zip(final_prediction_objs, final_scores):
                    obj.score = score
            else:
                if save_empty_images:
                    pred_fig, pred_2d_axes, pred_3d_axes = \
                        vis_utils.visualization(dataset.rgb_image_dir,
                                                img_idx,
                                                display=False,
                                                fig_size=fig_size)
                    filename = pred_out_dir + '/' + sample_name + '.png'
                    plt.savefig(filename)
                    plt.close(pred_fig)
                continue

            if draw_overlaid:
                # Overlay prediction boxes on image
                draw_predictions(filtered_gt_objs, calib_p2,
                                 num_of_predictions, final_prediction_objs,
                                 final_class_indices, final_boxes_2d,
                                 prop_2d_axes, prop_3d_axes, draw_score,
                                 draw_iou, gt_classes,
                                 draw_orientations_on_pred)
                filename = overlaid_out_dir + '/' + sample_name + '.png'
                plt.savefig(filename)

                plt.close(prop_fig)

            if draw_predictions_separate:
                # Now only draw prediction boxes on images
                # on a new figure handler
                if draw_projected_2d_boxes:
                    pred_fig, pred_2d_axes, pred_3d_axes = \
                        vis_utils.visualization(dataset.rgb_image_dir,
                                                img_idx,
                                                display=False,
                                                fig_size=fig_size)

                    draw_predictions(filtered_gt_objs, calib_p2,
                                     num_of_predictions, final_prediction_objs,
                                     final_class_indices, final_boxes_2d,
                                     pred_2d_axes, pred_3d_axes, draw_score,
                                     draw_iou, gt_classes,
                                     draw_orientations_on_pred)
                else:
                    pred_fig, pred_3d_axes = \
                        vis_utils.visualize_single_plot(
                            dataset.rgb_image_dir, img_idx, display=False)

                    draw_3d_predictions(filtered_gt_objs, calib_p2,
                                        num_of_predictions,
                                        final_prediction_objs,
                                        final_class_indices, final_boxes_2d,
                                        pred_3d_axes, draw_score, draw_iou,
                                        gt_classes, draw_orientations_on_pred)
                filename = pred_out_dir + '/' + sample_name + '.png'
                plt.savefig(filename)
                plt.close(pred_fig)

    print('\nDone')
示例#18
0
def main():
    """ Converts a set of network predictions into text files required for
    KITTI evaluation.
    """

    ##############################
    # Options
    ##############################
    checkpoint_name = 'mlod_exp_example'

    # data_split = 'val'
    data_split = 'val_half'

    global_steps = None
    # global_steps = [28000, 19000, 33000, 34000]

    score_threshold = 0.1

    save_2d = False  # Save 2D predictions
    save_3d = True  # Save 2D and 3D predictions together

    # Checkpoints below this are skipped
    min_step = 20000
    # Object Type
    obj_type = 'obj'
    ##############################
    # End of Options
    ##############################

    # Parse experiment config
    pipeline_config_file = \
        mlod.root_dir() + '/data/outputs/' + checkpoint_name + \
        '/' + checkpoint_name + '.config'
    _, _, _, dataset_config = \
        config_builder_util.get_configs_from_pipeline_file(
            pipeline_config_file, is_training=False)

    # Overwrite defaults
    dataset_config = config_builder_util.proto_to_obj(dataset_config)
    dataset_config.data_split = data_split
    dataset_config.aug_list = []

    if data_split == 'test':
        dataset_config.data_split_dir = 'testing'

    dataset = DatasetBuilder.build_kitti_dataset(dataset_config,
                                                 use_defaults=False)

    # Get available prediction folders
    predictions_root_dir = mlod.root_dir() + '/data/outputs/' + \
        checkpoint_name + '/predictions'

    proposals_root_dir = predictions_root_dir + \
        '/proposals_and_scores/' + dataset.data_split

    print('Converting proposals from', proposals_root_dir)

    if not global_steps:
        global_steps = os.listdir(proposals_root_dir)
        global_steps.sort(key=int)
        print('Checkpoints found ', global_steps)

    for step_idx in range(len(global_steps)):

        global_step = global_steps[step_idx]

        # Skip first checkpoint
        if int(global_step) < min_step:
            continue

        final_predictions_dir = proposals_root_dir + \
            '/' + str(global_step)

        # 2D and 3D prediction directories
        kitti_predictions_2d_dir = predictions_root_dir + \
            '/kitti_proposals_2d/' + \
            dataset.data_split + '/' + \
            str(score_threshold) + '/' + \
            str(global_step) + '/data'
        kitti_proposals_3d_dir = predictions_root_dir + \
            '/kitti_proposals_3d/' + \
            dataset.data_split + '/' + \
            str(score_threshold) + '/' + \
            str(global_step) + '/data'

        if save_2d and not os.path.exists(kitti_predictions_2d_dir):
            os.makedirs(kitti_predictions_2d_dir)
        if save_3d and not os.path.exists(kitti_proposals_3d_dir):
            os.makedirs(kitti_proposals_3d_dir)

        # Do conversion
        num_samples = dataset.num_samples
        num_valid_samples = 0

        print('\nGlobal step:', global_step)
        print('Converting proposals from:', final_predictions_dir)

        if save_2d:
            print('2D Detections saved to:', kitti_predictions_2d_dir)
        if save_3d:
            print('Proposals saved to:', kitti_proposals_3d_dir)

        for sample_idx in range(num_samples):

            # Print progress
            sys.stdout.write('\rConverting {} / {}'.format(
                sample_idx + 1, num_samples))
            sys.stdout.flush()

            sample_name = dataset.sample_names[sample_idx]

            prediction_file = sample_name + '.txt'

            kitti_predictions_2d_file_path = kitti_predictions_2d_dir + \
                '/' + prediction_file
            kitti_predictions_3d_file_path = kitti_proposals_3d_dir + \
                '/' + prediction_file

            predictions_file_path = final_predictions_dir + \
                '/' + prediction_file

            # If no predictions, skip to next file
            if not os.path.exists(predictions_file_path):
                if save_2d:
                    np.savetxt(kitti_predictions_2d_file_path, [])
                if save_3d:
                    np.savetxt(kitti_predictions_3d_file_path, [])
                continue

            all_predictions = np.loadtxt(predictions_file_path)

            # Swap l, w for predictions where w > l
            swapped_indices = all_predictions[:, 4] > all_predictions[:, 3]
            fixed_predictions = np.copy(all_predictions)
            fixed_predictions[swapped_indices,
                              3] = all_predictions[swapped_indices, 4]
            fixed_predictions[swapped_indices,
                              4] = all_predictions[swapped_indices, 3]
            fixed_predictions[swapped_indices, 6] = np.pi / 2

            score_filter = all_predictions[:, 7] >= score_threshold
            all_predictions = fixed_predictions[score_filter]

            # If no predictions, skip to next file
            if len(all_predictions) == 0:
                if save_2d:
                    np.savetxt(kitti_predictions_2d_file_path, [])
                if save_3d:
                    np.savetxt(kitti_predictions_3d_file_path, [])
                continue

            # Project to image space
            sample_name = prediction_file.split('.')[0]
            img_idx = int(sample_name)

            # Load image for truncation
            image = Image.open(dataset.get_rgb_image_path(sample_name))
            stereo_calib_p2 = calib_utils.read_calibration(
                dataset.calib_dir, img_idx).p2

            boxes = []
            image_filter = []
            for i in range(len(all_predictions)):
                box_3d = all_predictions[i, 0:7]
                img_box = box_3d_projector.project_to_image_space(
                    box_3d,
                    stereo_calib_p2,
                    truncate=True,
                    image_size=image.size,
                    discard_before_truncation=False)

                # Skip invalid boxes (outside image space)
                if img_box is None:
                    image_filter.append(False)
                else:
                    image_filter.append(True)
                    boxes.append(img_box)

            boxes = np.asarray(boxes)
            all_predictions = all_predictions[image_filter]

            # If no predictions, skip to next file
            if len(boxes) == 0:
                if save_2d:
                    np.savetxt(kitti_predictions_2d_file_path, [])
                if save_3d:
                    np.savetxt(kitti_predictions_3d_file_path, [])
                continue

            num_valid_samples += 1

            # To keep each value in its appropriate position, an array of zeros
            # (N, 16) is allocated but only values [4:16] are used
            kitti_predictions = np.zeros([len(boxes), 16])

            # Truncation and Occlusion are always empty (see below)

            # Alpha (Not computed)
            kitti_predictions[:, 3] = -10 * np.ones(
                (len(kitti_predictions)), dtype=np.int32)

            # 2D predictions
            kitti_predictions[:, 4:8] = boxes[:, 0:4]

            # 3D predictions
            # (l, w, h)
            kitti_predictions[:, 8] = all_predictions[:, 5]
            kitti_predictions[:, 9] = all_predictions[:, 4]
            kitti_predictions[:, 10] = all_predictions[:, 3]
            # (x, y, z)
            kitti_predictions[:, 11:14] = all_predictions[:, 0:3]
            # (ry, score)
            kitti_predictions[:, 14:16] = all_predictions[:, 6:8]

            # Round detections to 3 decimal places
            kitti_predictions = np.round(kitti_predictions, 3)

            # Empty Truncation, Occlusion
            kitti_empty_1 = -1 * np.ones(
                (len(kitti_predictions), 2), dtype=np.int32)
            # Empty 3D (x, y, z)
            kitti_empty_2 = -1 * np.ones(
                (len(kitti_predictions), 3), dtype=np.int32)
            # Empty 3D (h, w, l)
            kitti_empty_3 = -1000 * np.ones(
                (len(kitti_predictions), 3), dtype=np.int32)
            # Empty 3D (ry)
            kitti_empty_4 = -10 * np.ones(
                (len(kitti_predictions), 1), dtype=np.int32)

            # Create Type Array
            obj_types = [obj_type for i in range(len(kitti_predictions))]

            # Stack 2D predictions text
            kitti_text_2d = np.column_stack([
                obj_types, kitti_empty_1, kitti_predictions[:, 3:8],
                kitti_empty_2, kitti_empty_3, kitti_empty_4,
                kitti_predictions[:, 15]
            ])

            # Stack 3D predictions text
            kitti_text_3d = np.column_stack(
                [obj_types, kitti_empty_1, kitti_predictions[:, 3:16]])

            # Save to text files
            if save_2d:
                np.savetxt(kitti_predictions_2d_file_path,
                           kitti_text_2d,
                           newline='\r\n',
                           fmt='%s')
            if save_3d:
                np.savetxt(kitti_predictions_3d_file_path,
                           kitti_text_3d,
                           newline='\r\n',
                           fmt='%s')

        print('\nNum valid:', num_valid_samples)
        print('Num samples:', num_samples)
示例#19
0
    def load_samples(self, indices):
        """ Loads input-output data for a set of samples. Should only be
            called when a particular sample dict is required. Otherwise,
            samples should be provided by the next_batch function

        Args:
            indices: A list of sample indices from the dataset.sample_list
                to be loaded

        Return:
            samples: a list of data sample dicts
        """
        sample_dicts = []
        for sample_idx in indices:
            sample = self.sample_list[sample_idx]
            sample_name = sample.name

            # Only read labels if they exist
            if self.has_labels:
                # Read mini batch first to see if it is empty
                anchors_info = self.get_anchors_info(sample_name)

                if (not anchors_info) and self.train_val_test == 'train' \
                        and (not self.train_on_all_samples):
                    empty_sample_dict = {
                        constants.KEY_SAMPLE_NAME: sample_name,
                        constants.KEY_ANCHORS_INFO: anchors_info
                    }
                    return [empty_sample_dict]

                obj_labels = obj_utils.read_labels(self.label_dir,
                                                   int(sample_name))

                # Only use objects that match dataset classes
                obj_labels = self.kitti_utils.filter_labels(obj_labels)

            else:
                obj_labels = None

                anchors_info = []

                label_anchors = np.zeros((1, 6))
                label_boxes_3d = np.zeros((1, 7))
                label_classes = np.zeros(1)

            img_idx = int(sample_name)

            # Load image (BGR -> RGB)
            cv_bgr_image = cv2.imread(self.get_rgb_image_path(
                sample_name))
            rgb_image = cv_bgr_image[..., :: -1]
            image_shape = rgb_image.shape[0:2]
            image_input = rgb_image

            # Load MRCNN mask and features
            # print('Load MRCNN mask and features')
            mrcnn_result = self.kitti_utils.get_mrcnn_result(img_idx)
            # If no pedestrian can be seen on the images, break
            if not mrcnn_result:
                print('+++++++++++++ No mrcnn_result. load_samples, early end ++++++++++++++++')
                return []

            image_mrcnn_feature_input = mrcnn_result.item().get('features')
            image_mrcnn_bbox_input = mrcnn_result.item().get('rois')
            # rois: [batch, N, (y1, x1, y2, x2)] detection bounding boxes
            image_mask_input = mrcnn_result.item().get('masks')

            # Get ground plane
            ground_plane = obj_utils.get_road_plane(int(sample_name),
                                                    self.planes_dir)

            # Get calibration
            stereo_calib_p2 = calib_utils.read_calibration(self.calib_dir,
                                                           int(sample_name)).p2

            point_cloud = self.kitti_utils.get_point_cloud(self.bev_source,
                                                           img_idx,
                                                           image_shape)

            # Augmentation (Flipping)
            if kitti_aug.AUG_FLIPPING in sample.augs:
                image_input = kitti_aug.flip_image(image_input)
                point_cloud = kitti_aug.flip_point_cloud(point_cloud)
                obj_labels = [kitti_aug.flip_label_in_3d_only(obj)
                              for obj in obj_labels]
                ground_plane = kitti_aug.flip_ground_plane(ground_plane)
                stereo_calib_p2 = kitti_aug.flip_stereo_calib_p2(
                    stereo_calib_p2, image_shape)

            # Augmentation (Image Jitter)
            if kitti_aug.AUG_PCA_JITTER in sample.augs:
                image_input[:, :, 0:3] = kitti_aug.apply_pca_jitter(
                    image_input[:, :, 0:3])

            if obj_labels is not None:
                label_boxes_3d = np.asarray(
                    [box_3d_encoder.object_label_to_box_3d(obj_label)
                     for obj_label in obj_labels])

                label_classes = [
                    self.kitti_utils.class_str_to_index(obj_label.type)
                    for obj_label in obj_labels]
                label_classes = np.asarray(label_classes, dtype=np.int32)

                # Return empty anchors_info if no ground truth after filtering
                if len(label_boxes_3d) == 0:
                    anchors_info = []
                    if self.train_on_all_samples:
                        # If training without any positive labels, we cannot
                        # set these to zeros, because later on the offset calc
                        # uses log on these anchors. So setting any arbitrary
                        # number here that does not break the offset calculation
                        # should work, since the negative samples won't be
                        # regressed in any case.
                        dummy_anchors = [[-1000, -1000, -1000, 1, 1, 1]]
                        label_anchors = np.asarray(dummy_anchors)
                        dummy_boxes = [[-1000, -1000, -1000, 1, 1, 1, 0]]
                        label_boxes_3d = np.asarray(dummy_boxes)
                    else:
                        label_anchors = np.zeros((1, 6))
                        label_boxes_3d = np.zeros((1, 7))
                    label_classes = np.zeros(1)
                else:
                    label_anchors = box_3d_encoder.box_3d_to_anchor(
                        label_boxes_3d, ortho_rotate=True)

            # Create BEV maps
            bev_images = self.kitti_utils.create_bev_maps(
                point_cloud, ground_plane)

            height_maps = bev_images.get('height_maps')
            density_map = bev_images.get('density_map')
            bev_input = np.dstack((*height_maps, density_map))
            # print('bev_input.shape = ', bev_input.shape)

            sample_dict = {
                constants.KEY_LABEL_BOXES_3D: label_boxes_3d,
                constants.KEY_LABEL_ANCHORS: label_anchors,
                constants.KEY_LABEL_CLASSES: label_classes,

                constants.KEY_IMAGE_INPUT: image_input,
                constants.KEY_BEV_INPUT: bev_input,
                constants.KEY_IMAGE_MASK_INPUT: image_mask_input,
                constants.KEY_IMAGE_MRCNN_FEATURE_INPUT: image_mrcnn_feature_input,
                constants.KEY_IMAGE_MRCNN_BBOX_INPUT: image_mrcnn_bbox_input,

                constants.KEY_ANCHORS_INFO: anchors_info,

                constants.KEY_POINT_CLOUD: point_cloud,
                constants.KEY_GROUND_PLANE: ground_plane,
                constants.KEY_STEREO_CALIB_P2: stereo_calib_p2,

                constants.KEY_SAMPLE_NAME: sample_name,
                constants.KEY_SAMPLE_AUGS: sample.augs
            }
            sample_dicts.append(sample_dict)

        return sample_dicts
示例#20
0
def save_predictions_in_kitti_format(model,
                                     checkpoint_name,
                                     data_split,
                                     score_threshold,
                                     global_step):
    """ Converts a set of network predictions into text files required for
    KITTI evaluation.
    """

    dataset = model.dataset
    # Round this because protobuf encodes default values as full decimal
    score_threshold = round(score_threshold, 3)

    # Get available prediction folders
    predictions_root_dir = avod.root_dir() + '/data/outputs/' + \
        checkpoint_name + '/predictions'

    final_predictions_root_dir = predictions_root_dir + \
        '/final_predictions_and_scores/' + dataset.data_split

    final_predictions_dir = final_predictions_root_dir + \
        '/' + str(global_step)

    # 3D prediction directories
    kitti_predictions_3d_dir = predictions_root_dir + \
        '/kitti_predictions_3d/' + \
        dataset.data_split + '/' + \
        str(score_threshold) + '/' + \
        str(global_step) + '/data'
        #'/kitti_native_eval/' + \
        #str(score_threshold) + '/' + \
        #str(global_step) + '/data'

    if not os.path.exists(kitti_predictions_3d_dir):
        os.makedirs(kitti_predictions_3d_dir)

    # Do conversion
    num_samples = dataset.num_samples
    num_valid_samples = 0

    print('\nGlobal step:', global_step)
    print('Converting detections from:', final_predictions_dir)

    print('3D Detections being saved to:', kitti_predictions_3d_dir)

    for sample_idx in range(num_samples):

        # Print progress
        sys.stdout.write('\rConverting {} / {}'.format(
            sample_idx + 1, num_samples))
        sys.stdout.flush()

        sample_name = dataset.sample_names[sample_idx]

        prediction_file = sample_name + '.txt'

        kitti_predictions_3d_file_path = kitti_predictions_3d_dir + \
            '/' + prediction_file

        predictions_file_path = final_predictions_dir + \
            '/' + prediction_file

        # If no predictions, skip to next file
        if not os.path.exists(predictions_file_path):
            np.savetxt(kitti_predictions_3d_file_path, [])
            continue

        all_predictions = np.loadtxt(predictions_file_path, ndmin=2)

        # # Swap l, w for predictions where w > l
        # swapped_indices = all_predictions[:, 4] > all_predictions[:, 3]
        # fixed_predictions = np.copy(all_predictions)
        # fixed_predictions[swapped_indices, 3] = all_predictions[
        #     swapped_indices, 4]
        # fixed_predictions[swapped_indices, 4] = all_predictions[
        #     swapped_indices, 3]

        score_filter = all_predictions[:, 7] >= score_threshold
        all_predictions = all_predictions[score_filter]

        # If no predictions, skip to next file
        if len(all_predictions) == 0:
            np.savetxt(kitti_predictions_3d_file_path, [])
            continue

        # Project to image space
        sample_name = prediction_file.split('.')[0]
        img_idx = int(sample_name)

        # Load image for truncation
        image = Image.open(dataset.get_rgb_image_path(sample_name))

        stereo_calib_p2 = calib_utils.read_calibration(dataset.calib_dir,
                                                       img_idx).p2

        boxes = []
        image_filter = []
        for i in range(len(all_predictions)):
            box_3d = all_predictions[i, 0:7]
            img_box = box_3d_projector.project_to_image_space(
                box_3d, stereo_calib_p2,
                truncate=True, image_size=image.size)

            # Skip invalid boxes (outside image space)
            if img_box is None:
                image_filter.append(False)
                continue

            image_filter.append(True)
            boxes.append(img_box)

        boxes = np.asarray(boxes)
        all_predictions = all_predictions[image_filter]

        # If no predictions, skip to next file
        if len(boxes) == 0:
            np.savetxt(kitti_predictions_3d_file_path, [])
            continue

        num_valid_samples += 1

        # To keep each value in its appropriate position, an array of zeros
        # (N, 16) is allocated but only values [4:16] are used
        kitti_predictions = np.zeros([len(boxes), 16])

        # Get object types
        all_pred_classes = all_predictions[:, 8].astype(np.int32)
        obj_types = [dataset.classes[class_idx]
                     for class_idx in all_pred_classes]

        # Truncation and Occlusion are always empty (see below)

        # Alpha (Not computed)
        kitti_predictions[:, 3] = -10 * np.ones((len(kitti_predictions)),
                                                dtype=np.int32)

        # 2D predictions
        kitti_predictions[:, 4:8] = boxes[:, 0:4]

        # 3D predictions
        # (l, w, h)
        kitti_predictions[:, 8] = all_predictions[:, 5]
        kitti_predictions[:, 9] = all_predictions[:, 4]
        kitti_predictions[:, 10] = all_predictions[:, 3]
        # (x, y, z)
        kitti_predictions[:, 11:14] = all_predictions[:, 0:3]
        # (ry, score)
        kitti_predictions[:, 14:16] = all_predictions[:, 6:8]

        # Round detections to 3 decimal places
        kitti_predictions = np.round(kitti_predictions, 3)

        # Empty Truncation, Occlusion
        kitti_empty_1 = -1 * np.ones((len(kitti_predictions), 2),
                                     dtype=np.int32)

        # Stack 3D predictions text
        kitti_text_3d = np.column_stack([obj_types,
                                         kitti_empty_1,
                                         kitti_predictions[:, 3:16]])

        # Save to text files
        np.savetxt(kitti_predictions_3d_file_path, kitti_text_3d,
                   newline='\r\n', fmt='%s')

    print('\nNum valid:', num_valid_samples)
    print('Num samples:', num_samples)
示例#21
0
    def load_samples(self, indices):
        """ Loads input-output data for a set of samples. Should only be
            called when a particular sample dict is required. Otherwise,
            samples should be provided by the next_batch function

        Args:
            indices: A list of sample indices from the dataset.sample_list
                to be loaded

        Return:
            samples: a list of data sample dicts
        """
        sample_dicts = []
        for sample_idx in indices:
            sample = self.sample_list[sample_idx]
            sample_name = sample.name

            # Only read labels if they exist
            if self.has_labels:
                # Read mini batch first to see if it is empty
                anchors_info = self.get_anchors_info(sample_name)

                if (not anchors_info) and self.train_val_test == 'train' \
                        and (not self.train_on_all_samples):
                    empty_sample_dict = {
                        constants.KEY_SAMPLE_NAME: sample_name,
                        constants.KEY_ANCHORS_INFO: anchors_info
                    }
                    return [empty_sample_dict]

                obj_labels = obj_utils.read_labels(self.label_dir,
                                                   int(sample_name))

                # Only use objects that match dataset classes
                obj_labels = self.kitti_utils.filter_labels(obj_labels)

            else:
                obj_labels = None

                anchors_info = []

                label_anchors = np.zeros((1, 6))
                label_boxes_3d = np.zeros((1, 7))
                label_classes = np.zeros(1)

            img_idx = int(sample_name)

            # Load image (BGR -> RGB)
            cv_bgr_image = cv2.imread(self.get_rgb_image_path(sample_name))
            rgb_image = cv_bgr_image[..., ::-1]
            image_shape = rgb_image.shape[0:2]
            image_input = rgb_image

            # Get ground plane
            ground_plane = obj_utils.get_road_plane(int(sample_name),
                                                    self.planes_dir)

            # Get calibration
            stereo_calib = calib_utils.read_calibration(
                self.calib_dir, int(sample_name))
            stereo_calib_p2 = stereo_calib.p2

            point_cloud = self.kitti_utils.get_point_cloud(
                self.bev_source, img_idx, image_shape)

            # Augmentation (Flipping)
            # WZN: the flipping augmentation flips both image(in camera frame), pointcloud (in Lidar frame), and calibration
            #matrix(between cam and Lidar) so the correspondence is still true.
            if kitti_aug.AUG_FLIPPING in sample.augs:
                image_input = kitti_aug.flip_image(image_input)
                point_cloud = kitti_aug.flip_point_cloud(point_cloud)
                obj_labels = [
                    kitti_aug.flip_label_in_3d_only(obj) for obj in obj_labels
                ]
                ground_plane = kitti_aug.flip_ground_plane(ground_plane)
                stereo_calib_p2 = kitti_aug.flip_stereo_calib_p2(
                    stereo_calib_p2, image_shape)

            # Augmentation (Image Jitter)
            if kitti_aug.AUG_PCA_JITTER in sample.augs:
                image_input[:, :,
                            0:3] = kitti_aug.apply_pca_jitter(image_input[:, :,
                                                                          0:3])

            if obj_labels is not None:
                label_boxes_3d = np.asarray([
                    box_3d_encoder.object_label_to_box_3d(obj_label)
                    for obj_label in obj_labels
                ])

                label_classes = [
                    self.kitti_utils.class_str_to_index(obj_label.type)
                    for obj_label in obj_labels
                ]
                label_classes = np.asarray(label_classes, dtype=np.int32)

                # Return empty anchors_info if no ground truth after filtering
                if len(label_boxes_3d) == 0:
                    anchors_info = []
                    if self.train_on_all_samples:
                        # If training without any positive labels, we cannot
                        # set these to zeros, because later on the offset calc
                        # uses log on these anchors. So setting any arbitrary
                        # number here that does not break the offset calculation
                        # should work, since the negative samples won't be
                        # regressed in any case.
                        dummy_anchors = [[-1000, -1000, -1000, 1, 1, 1]]
                        label_anchors = np.asarray(dummy_anchors)
                        dummy_boxes = [[-1000, -1000, -1000, 1, 1, 1, 0]]
                        label_boxes_3d = np.asarray(dummy_boxes)
                    else:
                        label_anchors = np.zeros((1, 6))
                        label_boxes_3d = np.zeros((1, 7))
                    label_classes = np.zeros(1)
                else:
                    label_anchors = box_3d_encoder.box_3d_to_anchor(
                        label_boxes_3d, ortho_rotate=True)

            # Create BEV maps

            bev_images = self.kitti_utils.create_bev_maps(
                point_cloud, ground_plane, output_indices=self.output_indices)
            #WZN produce input for sparse pooling
            if self.output_indices:
                voxel_indices = bev_images[1]
                pts_in_voxel = bev_images[2]
                bev_images = bev_images[0]

            height_maps = bev_images.get('height_maps')
            density_map = bev_images.get('density_map')
            bev_input = np.dstack((*height_maps, density_map))

            #import pdb
            #pdb.set_trace()
            #WZN produce input for sparse pooling
            if self.output_indices:
                sparse_pooling_input1 = produce_sparse_pooling_input(
                    gen_sparse_pooling_input_avod(
                        pts_in_voxel, voxel_indices, stereo_calib,
                        [image_shape[1], image_shape[0]],
                        bev_input.shape[0:2]),
                    stride=[1, 1])
                #WZN: Note here avod padded the vgg input by 4, so add it
                bev_input_padded = np.copy(bev_input.shape[0:2])
                bev_input_padded[0] = bev_input_padded[0] + 4
                sparse_pooling_input2 = produce_sparse_pooling_input(
                    gen_sparse_pooling_input_avod(
                        pts_in_voxel, voxel_indices, stereo_calib,
                        [image_shape[1], image_shape[0]], bev_input_padded),
                    stride=[8, 8])
                sparse_pooling_input = [
                    sparse_pooling_input1, sparse_pooling_input2
                ]
            else:
                sparse_pooling_input = None

            sample_dict = {
                constants.KEY_LABEL_BOXES_3D: label_boxes_3d,
                constants.KEY_LABEL_ANCHORS: label_anchors,
                constants.KEY_LABEL_CLASSES: label_classes,
                constants.KEY_IMAGE_INPUT: image_input,
                constants.KEY_BEV_INPUT: bev_input,
                #WZN: for sparse pooling
                constants.KEY_SPARSE_POOLING_INPUT: sparse_pooling_input,
                constants.KEY_ANCHORS_INFO: anchors_info,
                constants.KEY_POINT_CLOUD: point_cloud,
                constants.KEY_GROUND_PLANE: ground_plane,
                constants.KEY_STEREO_CALIB_P2: stereo_calib_p2,
                constants.KEY_SAMPLE_NAME: sample_name,
                constants.KEY_SAMPLE_AUGS: sample.augs
            }
            sample_dicts.append(sample_dict)

        return sample_dicts
    def preprocess(self, indices):
        """Preprocesses anchor info and saves info to files

        Args:
            indices (int array): sample indices to process.
                If None, processes all samples
        """
        # Get anchor stride for class
        anchor_strides = self._anchor_strides

        dataset = self._dataset
        dataset_utils = self._dataset.kitti_utils
        classes_name = dataset.classes_name

        # Make folder if it doesn't exist yet
        output_dir, img_roi_dir, img_roi_norm_dir = self.mini_batch_utils.get_file_path(
            classes_name, anchor_strides, sample_name=None)

        os.makedirs(output_dir, exist_ok=True)
        os.makedirs(img_roi_dir, exist_ok=True)
        os.makedirs(img_roi_norm_dir, exist_ok=True)
        # Get clusters for class
        all_clusters_sizes, _ = dataset.get_cluster_info()

        anchor_generator = grid_anchor_3d_generator.GridAnchor3dGenerator()

        # Load indices of data_split
        all_samples = dataset.sample_list

        if indices is None:
            indices = np.arange(len(all_samples))
        num_samples = len(indices)

        # For each image in the dataset, save info on the anchors
        for sample_idx in indices:
            # Get image name for given cluster
            sample_name = all_samples[sample_idx].name
            img_idx = int(sample_name)

            # Check for existing files and skip to the next
            if False and self._check_for_existing(
                    classes_name,
                    anchor_strides,  ## modification by benz to debug code
                    sample_name):
                print("{} / {}: Sample already preprocessed".format(
                    sample_idx + 1, num_samples, sample_name))
                continue

            # Get ground truth and filter based on difficulty
            ground_truth_list = obj_utils.read_labels(dataset.label_dir,
                                                      img_idx)

            # Filter objects to dataset classes
            filtered_gt_list = dataset_utils.filter_labels(ground_truth_list)
            filtered_gt_list = np.asarray(filtered_gt_list)

            # Filtering by class has no valid ground truth, skip this image
            if len(filtered_gt_list) == 0:
                print("{} / {} No {}s for sample {} "
                      "(Ground Truth Filter)".format(sample_idx + 1,
                                                     num_samples, classes_name,
                                                     sample_name))

                # Output an empty file and move on to the next image.
                self._save_to_file(classes_name, anchor_strides, sample_name)
                continue

            # Get ground plane
            ground_plane = obj_utils.get_road_plane(img_idx,
                                                    dataset.planes_dir)

            image = Image.open(dataset.get_rgb_image_path(sample_name))
            image_shape = [image.size[1], image.size[0]]

            ## benz, obtain the calibraiton infor
            stereo_calib_p2 = calib_utils.read_calibration(
                dataset.calib_dir, img_idx).p2

            # Generate sliced 2D voxel grid for filtering
            vx_grid_2d = dataset_utils.create_sliced_voxel_grid_2d(
                sample_name,
                source=dataset.bev_source,
                image_shape=image_shape)

            # List for merging all anchors
            all_anchor_boxes_3d = []

            # Create anchors for each class
            for class_idx in range(len(dataset.classes)):
                # Generate anchors for all classes
                grid_anchor_boxes_3d = anchor_generator.generate(
                    area_3d=self._area_extents,
                    anchor_3d_sizes=all_clusters_sizes[class_idx],
                    anchor_stride=self._anchor_strides[class_idx],
                    ground_plane=ground_plane)

                all_anchor_boxes_3d.extend(grid_anchor_boxes_3d)

            # Filter empty anchors
            all_anchor_boxes_3d = np.asarray(all_anchor_boxes_3d)
            ## anchors are Nx6 [x,y,z,dx,dy,dz]
            anchors = box_3d_encoder.box_3d_to_anchor(all_anchor_boxes_3d)
            empty_anchor_filter = anchor_filter.get_empty_anchor_filter_2d(
                anchors, vx_grid_2d, self._density_threshold)

            # Calculate anchor info
            ## benz, we calculate the image IoU infor
            anchors_info, img_boxes_anchors, img_boxes_anchors_norm = self._calculate_img_anchors_info(
                all_anchor_boxes_3d, empty_anchor_filter, filtered_gt_list,
                stereo_calib_p2, image_shape)

            anchor_ious = anchors_info[:, self.mini_batch_utils.col_ious]

            valid_iou_indices = np.where(anchor_ious > 0.0)[0]

            print("{} / {}:"
                  "{:>6} anchors, "
                  "{:>6} iou > 0.0, "
                  "for {:>3} {}(s) for sample {}".format(
                      sample_idx + 1, num_samples, len(anchors_info),
                      len(valid_iou_indices), len(filtered_gt_list),
                      classes_name, sample_name))

            # Save anchors info
            self._save_to_file(classes_name, anchor_strides, sample_name,
                               anchors_info, img_boxes_anchors,
                               img_boxes_anchors_norm)
示例#23
0
def main():
    """
    Visualization of 3D grid anchor generation, showing 2D projections
        in BEV and image space, and a 3D display of the anchors
    """
    dataset_config = DatasetBuilder.copy_config(DatasetBuilder.KITTI_TRAIN)
    dataset_config.num_clusters[0] = 1
    dataset = DatasetBuilder.build_kitti_dataset(dataset_config)

    label_cluster_utils = LabelClusterUtils(dataset)
    clusters, _ = label_cluster_utils.get_clusters()

    # Options
    img_idx = 1
    # fake_clusters = np.array([[5, 4, 3], [6, 5, 4]])
    # fake_clusters = np.array([[3, 3, 3], [4, 4, 4]])

    fake_clusters = np.array([[4, 2, 3]])
    fake_anchor_stride = [5.0, 5.0]
    ground_plane = [0, -1, 0, 1.72]

    anchor_3d_generator = grid_anchor_3d_generator.GridAnchor3dGenerator()

    area_extents = np.array([[-40, 40], [-5, 5], [0, 70]])

    # Generate anchors for cars only
    start_time = time.time()
    anchor_boxes_3d = anchor_3d_generator.generate(
        area_3d=dataset.kitti_utils.area_extents,
        anchor_3d_sizes=fake_clusters,
        anchor_stride=fake_anchor_stride,
        ground_plane=ground_plane)
    all_anchors = box_3d_encoder.box_3d_to_anchor(anchor_boxes_3d)
    end_time = time.time()
    print("Anchors generated in {} s".format(end_time - start_time))

    # Project into bev
    bev_boxes, bev_normalized_boxes = \
        anchor_projector.project_to_bev(all_anchors, area_extents[[0, 2]])

    bev_fig, (bev_axes, bev_normalized_axes) = \
        plt.subplots(1, 2, figsize=(16, 7))
    bev_axes.set_xlim(0, 80)
    bev_axes.set_ylim(70, 0)
    bev_normalized_axes.set_xlim(0, 1.0)
    bev_normalized_axes.set_ylim(1, 0.0)

    plt.show(block=False)

    for box in bev_boxes:
        box_w = box[2] - box[0]
        box_h = box[3] - box[1]

        rect = patches.Rectangle((box[0], box[1]),
                                 box_w,
                                 box_h,
                                 linewidth=2,
                                 edgecolor='b',
                                 facecolor='none')

        bev_axes.add_patch(rect)

    for normalized_box in bev_normalized_boxes:
        box_w = normalized_box[2] - normalized_box[0]
        box_h = normalized_box[3] - normalized_box[1]

        rect = patches.Rectangle((normalized_box[0], normalized_box[1]),
                                 box_w,
                                 box_h,
                                 linewidth=2,
                                 edgecolor='b',
                                 facecolor='none')

        bev_normalized_axes.add_patch(rect)

    rgb_fig, rgb_2d_axes, rgb_3d_axes = \
        vis_utils.visualization(dataset.rgb_image_dir, img_idx)
    plt.show(block=False)

    image_path = dataset.get_rgb_image_path(dataset.sample_names[img_idx])
    image_shape = np.array(Image.open(image_path)).shape

    stereo_calib_p2 = calib_utils.read_calibration(dataset.calib_dir,
                                                   img_idx).p2

    start_time = time.time()
    rgb_boxes, rgb_normalized_boxes = \
        anchor_projector.project_to_image_space(all_anchors,
                                                stereo_calib_p2,
                                                image_shape)
    end_time = time.time()
    print("Anchors projected in {} s".format(end_time - start_time))

    # Read the stereo calibration matrix for visualization
    stereo_calib = calib_utils.read_calibration(dataset.calib_dir, 0)
    p = stereo_calib.p2

    # Overlay boxes on images

    for anchor_idx in range(len(anchor_boxes_3d)):
        anchor_box_3d = anchor_boxes_3d[anchor_idx]

        obj_label = box_3d_encoder.box_3d_to_object_label(anchor_box_3d)

        # Draw 3D boxes
        vis_utils.draw_box_3d(rgb_3d_axes, obj_label, p)

        # Draw 2D boxes
        rgb_box_2d = rgb_boxes[anchor_idx]

        box_x1 = rgb_box_2d[0]
        box_y1 = rgb_box_2d[1]
        box_w = rgb_box_2d[2] - box_x1
        box_h = rgb_box_2d[3] - box_y1

        rect = patches.Rectangle((box_x1, box_y1),
                                 box_w,
                                 box_h,
                                 linewidth=2,
                                 edgecolor='b',
                                 facecolor='none')

        rgb_2d_axes.add_patch(rect)

        if anchor_idx % 32 == 0:
            rgb_fig.canvas.draw()

    plt.show(block=True)
def convertPredictionsToKitti(dataset, predictions_root_dir, additional_cls):
    """ Converts a set of network predictions into text files required for
    KITTI evaluation.
    """
    open_mode = 'w+'
    if additional_cls:
        open_mode = 'a+'

    ##############################
    # Options
    ##############################
    global_steps = None
    save_to_base = True
    # global_steps = [28000, 19000, 33000, 34000]

    score_threshold = 0.01

    save_2d = False  # Save 2D predictions
    save_3d = True  # Save 2D and 3D predictions together
    save_alphas = True  # Save alphas (observation angles)

    # Checkpoints below this are skipped
    min_step = 20000

    ##############################
    # End of Options
    ##############################

    final_predictions_root_dir = predictions_root_dir + \
        '/final_predictions_and_scores/' + dataset.data_split

    logging.info('Converting detections from %s', final_predictions_root_dir)

    if not global_steps:
        global_steps = os.listdir(final_predictions_root_dir)
        global_steps.sort(key=int)
        logging.debug('Checkpoints found {}'.format(global_steps))

    for step_idx in range(len(global_steps)):

        global_step = global_steps[step_idx]

        # Skip first checkpoint
        if int(global_step) < min_step:
            continue

        final_predictions_dir = final_predictions_root_dir + \
            '/' + str(global_step)

        if save_to_base:
            kitti_predictions_2d_dir = predictions_root_dir
            kitti_predictions_3d_dir = predictions_root_dir
        else:
            # 2D and 3D prediction directories
            kitti_predictions_2d_dir = predictions_root_dir + \
                '/kitti_predictions_2d/' + \
                dataset.data_split + '/' + \
                str(score_threshold) + '/' + \
                str(global_step) + '/data'
            kitti_predictions_3d_dir = predictions_root_dir + \
                '/kitti_predictions_3d/' + \
                dataset.data_split + '/' + \
                str(score_threshold) + '/' + \
                str(global_step) + '/data'

            if save_2d and not os.path.exists(kitti_predictions_2d_dir):
                os.makedirs(kitti_predictions_2d_dir)
            if save_3d and not os.path.exists(kitti_predictions_3d_dir):
                os.makedirs(kitti_predictions_3d_dir)

        # Do conversion
        num_samples = dataset.num_samples
        num_valid_samples = 0

        logging.info('\nGlobal step: %d', int(global_step))
        logging.info('Converting detections from: %s', final_predictions_dir)

        if save_2d:
            logging.info('2D Detections saved to: %s',
                         kitti_predictions_2d_dir)
        if save_3d:
            logging.info('3D Detections saved to: %s',
                         kitti_predictions_3d_dir)

        for sample_idx in range(num_samples):

            # Print progress
            sys.stdout.write('\rConverting {} / {}'.format(
                sample_idx + 1, num_samples))
            sys.stdout.flush()

            sample_name = dataset.sample_names[sample_idx]

            prediction_file = sample_name + '.txt'

            kitti_predictions_2d_file_path = kitti_predictions_2d_dir + \
                '/' + prediction_file
            kitti_predictions_3d_file_path = kitti_predictions_3d_dir + \
                '/' + prediction_file

            predictions_file_path = final_predictions_dir + \
                '/' + prediction_file

            # If no predictions, skip to next file
            if not os.path.exists(predictions_file_path):
                if save_2d:
                    np.savetxt(kitti_predictions_2d_file_path, [])
                if save_3d:
                    np.savetxt(kitti_predictions_3d_file_path, [])
                continue

            all_predictions = np.loadtxt(predictions_file_path, ndmin=2)

            # # Swap l, w for predictions where w > l
            # swapped_indices = all_predictions[:, 4] > all_predictions[:, 3]
            # fixed_predictions = np.copy(all_predictions)
            # fixed_predictions[swapped_indices, 3] = all_predictions[
            #     swapped_indices, 4]
            # fixed_predictions[swapped_indices, 4] = all_predictions[
            #     swapped_indices, 3]

            score_filter = all_predictions[:, 7] >= score_threshold
            all_predictions = all_predictions[score_filter]

            # If no predictions, skip to next file
            if len(all_predictions) == 0:
                if save_2d:
                    np.savetxt(kitti_predictions_2d_file_path, [])
                if save_3d:
                    np.savetxt(kitti_predictions_3d_file_path, [])
                continue

            # Project to image space
            sample_name = prediction_file.split('.')[0]
            img_idx = int(sample_name)

            # Load image for truncation
            image = Image.open(dataset.get_rgb_image_path(sample_name))

            stereo_calib_p2 = calib_utils.read_calibration(
                dataset.calib_dir, img_idx).p2

            boxes = []
            image_filter = []
            for i in range(len(all_predictions)):
                box_3d = all_predictions[i, 0:7]
                img_box = box_3d_projector.project_to_image_space(
                    box_3d,
                    stereo_calib_p2,
                    truncate=True,
                    image_size=image.size)

                # Skip invalid boxes (outside image space)
                if img_box is None:
                    image_filter.append(False)
                else:
                    image_filter.append(True)
                    boxes.append(img_box)

            boxes = np.asarray(boxes)
            all_predictions = all_predictions[image_filter]

            # If no predictions, skip to next file
            if len(boxes) == 0:
                if save_2d:
                    np.savetxt(kitti_predictions_2d_file_path, [])
                if save_3d:
                    np.savetxt(kitti_predictions_3d_file_path, [])
                continue

            num_valid_samples += 1

            # To keep each value in its appropriate position, an array of zeros
            # (N, 16) is allocated but only values [4:16] are used
            kitti_predictions = np.zeros([len(boxes), 16])

            # Get object types
            all_pred_classes = all_predictions[:, 8].astype(np.int32)
            obj_types = [
                dataset.classes[class_idx] for class_idx in all_pred_classes
            ]

            # Truncation and Occlusion are always empty (see below)

            # Alpha
            if not save_alphas:
                kitti_predictions[:, 3] = -10 * \
                    np.ones((len(kitti_predictions)), dtype=np.int32)
            else:
                alphas = all_predictions[:, 6] - \
                    np.arctan2(all_predictions[:, 0], all_predictions[:, 2])
                kitti_predictions[:, 3] = alphas

            # 2D predictions
            kitti_predictions[:, 4:8] = boxes[:, 0:4]

            # 3D predictions
            # (l, w, h)
            kitti_predictions[:, 8] = all_predictions[:, 5]
            kitti_predictions[:, 9] = all_predictions[:, 4]
            kitti_predictions[:, 10] = all_predictions[:, 3]
            # (x, y, z)
            kitti_predictions[:, 11:14] = all_predictions[:, 0:3]
            # (ry, score)
            kitti_predictions[:, 14:16] = all_predictions[:, 6:8]

            # Round detections to 3 decimal places
            kitti_predictions = np.round(kitti_predictions, 3)

            # Empty Truncation, Occlusion
            kitti_empty_1 = -1 * np.ones(
                (len(kitti_predictions), 2), dtype=np.int32)
            # Empty 3D (x, y, z)
            kitti_empty_2 = -1 * np.ones(
                (len(kitti_predictions), 3), dtype=np.int32)
            # Empty 3D (h, w, l)
            kitti_empty_3 = -1000 * np.ones(
                (len(kitti_predictions), 3), dtype=np.int32)
            # Empty 3D (ry)
            kitti_empty_4 = -10 * np.ones(
                (len(kitti_predictions), 1), dtype=np.int32)

            # Stack 2D predictions text
            kitti_text_2d = np.column_stack([
                obj_types, kitti_empty_1, kitti_predictions[:, 3:8],
                kitti_empty_2, kitti_empty_3, kitti_empty_4,
                kitti_predictions[:, 15]
            ])

            # Stack 3D predictions text
            kitti_text_3d = np.column_stack(
                [obj_types, kitti_empty_1, kitti_predictions[:, 3:16]])

            # Save to text files
            if save_2d:
                np.savetxt(kitti_predictions_2d_file_path,
                           kitti_text_2d,
                           newline='\r\n',
                           fmt='%s')
            if save_3d:
                with open(kitti_predictions_3d_file_path, open_mode) as f:
                    np.savetxt(f, kitti_text_3d, newline='\r\n', fmt='%s')

        logging.debug('\nNum valid: %d', num_valid_samples)
        logging.debug('Num samples: %d', num_samples)

    for the_file in os.listdir(predictions_root_dir):
        file_path = os.path.join(predictions_root_dir, the_file)
        try:
            if os.path.isdir(file_path):
                shutil.rmtree(file_path)
                logging.debug("Removing folder: %s", file_path)
        except Exception as e:
            print(e)
            logging.exception(e)
def load_calib(calib_dir, idx, fname=None):
    # Get calibration
    stereo_calib = calib_utils.read_calibration(calib_dir, idx, fname)
    return stereo_calib
示例#26
0
    def load_samples(self, indices):
        """ Loads input-output data for a set of samples. Should only be
            called when a particular sample dict is required. Otherwise,
            samples should be provided by the next_batch function

        Args:
            indices: A list of sample indices from the dataset.sample_list
                to be loaded

        Return:
            samples: a list of data sample dicts
        """
        sample_dicts = []
        for sample_idx in indices:
            sample = self.sample_list[sample_idx]
            sample_name = sample.name

            # Only read labels if they exist
            if self.has_labels:
                # Read mini batch first to see if it is empty
                anchors_info = self.get_anchors_info(sample_name)

                if (not anchors_info) and self.train_val_test == 'train' \
                        and (not self.train_on_all_samples):
                    empty_sample_dict = {
                        constants.KEY_SAMPLE_NAME: sample_name,
                        constants.KEY_ANCHORS_INFO: anchors_info
                    }
                    return [empty_sample_dict]

                obj_labels = obj_utils.read_labels(self.label_dir,
                                                   int(sample_name))

                # Only use objects that match dataset classes
                obj_labels = self.kitti_utils.filter_labels(obj_labels)

            else:
                obj_labels = None

                anchors_info = []

                label_anchors = np.zeros((1, 6))
                label_boxes_3d = np.zeros((1, 7))
                label_boxes_2d = np.zeros((1, 4))
                label_classes = np.zeros(1)

            img_idx = int(sample_name)

            lidar_only = False
            num_views = 1

            if not lidar_only:
                # Load image (BGR -> RGB)
                cv_bgr_image = cv2.imread(self.get_rgb_image_path(sample_name))
                rgb_image = cv_bgr_image[..., ::-1]
                image_shape = rgb_image.shape[0:2]

                # Append the depth channel
                if self.add_depth:
                    depth_map = obj_utils.get_depth_map(
                        img_idx, self.depth_dir)

                    # Set invalid pixels to max depth
                    depth_map[np.asarray(depth_map == 0.0)] = \
                        self.kitti_utils.bev_extents[1, 1]

                    # Add channel dimension to make stacking easier
                    depth_map = np.expand_dims(depth_map, 2)
                    image_input = np.concatenate([rgb_image, depth_map],
                                                 axis=2)
                else:
                    image_input = rgb_image
            else:
                image_shape = (370, 1224)

            # Get ground plane
            ground_plane = obj_utils.get_road_plane(int(sample_name),
                                                    self.planes_dir)
            #ground_plane = np.array([0,-1,0,1.68])

            if lidar_only:
                p_matrix = np.zeros((num_views, 3, 4), dtype=float)
                if num_views > 0:
                    p_matrix[0] = np.array([[
                        8.39713500e+02, 3.58853400e+01, 4.48566750e+02,
                        2.31460650e+03
                    ],
                                            [
                                                1.02835238e-13, 8.54979440e+02,
                                                1.57320433e+02, 2.49655872e+03
                                            ],
                                            [
                                                0.00000000e+00, 7.97452000e-02,
                                                9.96815000e-01, 5.14357000e+00
                                            ]])

                    p_matrix[1] = np.array([[
                        1.20171708e+03, 9.73326000e+01, 3.99933320e+02,
                        1.04945816e+04
                    ],
                                            [
                                                1.41054657e+01, 8.65088160e+02,
                                                8.46334690e+01, 5.24229862e+03
                                            ],
                                            [
                                                1.62221000e-01, 1.62221000e-01,
                                                9.73329000e-01, 1.13555000e+01
                                            ]])
            else:
                # Get calibration
                stereo_calib_p2 = calib_utils.read_calibration(
                    self.calib_dir, int(sample_name)).p2

            point_cloud = self.kitti_utils.get_point_cloud(
                self.bev_source, img_idx, image_shape)
            # Augmentation (Flipping)
            if kitti_aug.AUG_FLIPPING in sample.augs:
                if not lidar_only:
                    image_input = kitti_aug.flip_image(image_input)
                point_cloud = kitti_aug.flip_point_cloud(point_cloud)
                obj_labels = [
                    kitti_aug.flip_label(obj, image_shape)
                    for obj in obj_labels
                ]

                ground_plane = kitti_aug.flip_ground_plane(ground_plane)
                if lidar_only:
                    for i in range(num_views):
                        p_matrix[i] = kitti_aug.flip_stereo_calib_p2(
                            p_matrix[i], image_shape)
                else:
                    stereo_calib_p2 = kitti_aug.flip_stereo_calib_p2(
                        stereo_calib_p2, image_shape)

            # Augmentation (Image Jitter)
            if (kitti_aug.AUG_PCA_JITTER in sample.augs) and not lidar_only:
                image_input[:, :, 0:3] = kitti_aug.apply_pca_jitter(
                    image_input[:, :, 0:3], aug_img_noise=self.aug_img_noise)

            # Augmentation (Random Occlusion)
            if kitti_aug.AUG_RANDOM_OCC in sample.augs:
                point_cloud = kitti_aug.occ_aug(point_cloud, stereo_calib_p2,
                                                obj_labels)

            if obj_labels is not None:
                label_boxes_3d = np.asarray([
                    box_3d_encoder.object_label_to_box_3d(obj_label)
                    for obj_label in obj_labels
                ])

                label_boxes_2d = np.asarray([
                    box_3d_encoder.object_label_to_box_2d(obj_label)
                    for obj_label in obj_labels
                ])

                label_classes = [
                    self.kitti_utils.class_str_to_index(obj_label.type)
                    for obj_label in obj_labels
                ]
                label_classes = np.asarray(label_classes, dtype=np.int32)

                # Return empty anchors_info if no ground truth after filtering
                if len(label_boxes_3d) == 0:
                    anchors_info = []
                    if self.train_on_all_samples:
                        # If training without any positive labels, we cannot
                        # set these to zeros, because later on the offset calc
                        # uses log on these anchors. So setting any arbitrary
                        # number here that does not break the offset calculation
                        # should work, since the negative samples won't be
                        # regressed in any case.
                        dummy_anchors = [[-1000, -1000, -1000, 1, 1, 1]]
                        label_anchors = np.asarray(dummy_anchors)
                        dummy_boxes = [[-1000, -1000, -1000, 1, 1, 1, 0]]
                        label_boxes_3d = np.asarray(dummy_boxes)
                        label_boxes_2d = np.asarray([[-1.0, -1.0, -1.0, -1.0]])
                    else:
                        label_anchors = np.zeros((1, 6))
                        label_boxes_3d = np.zeros((1, 7))
                        label_boxes_2d = np.zeros((1, 4))
                    label_classes = np.zeros(1)
                else:
                    label_anchors = box_3d_encoder.box_3d_to_anchor(
                        label_boxes_3d, ortho_rotate=True)

            # Create BEV maps
            bev_images = self.kitti_utils.create_bev_maps(
                point_cloud, ground_plane)
            height_maps = bev_images.get('height_maps')

            #bev random masking
            """
            bev_drop_p = 0.5
            rand_01 = random.random()
            mask_bev_layer = np.zeros(height_maps[0].shape,dtype=np.float32)
            if rand_01 > bev_drop_p:
                mask_idx = random.randint(0,4)
                height_maps[mask_idx] = mask_bev_layer
            """

            #print(height_maps[0].shape)
            density_map = bev_images.get('density_map')
            bev_input = np.dstack((*height_maps, density_map))
            #bev_input = np.transpose(np.array(height_maps),(1,2,0))

            point_cloud = self.kitti_utils._apply_slice_filter(
                point_cloud, ground_plane).T

            if lidar_only:
                depth_map = np.zeros(
                    (num_views, image_shape[0], image_shape[1]), dtype=float)
                for i in range(num_views):
                    depth_map[i, :, :] = project_depths(
                        point_cloud, p_matrix[i], image_shape[0:2])
                depth_map_expand_dims = np.expand_dims(depth_map, axis=-1)
                sample_dict = {
                    constants.KEY_LABEL_BOXES_3D: label_boxes_3d,
                    constants.KEY_LABEL_ANCHORS: label_anchors,
                    constants.KEY_LABEL_CLASSES: label_classes,
                    constants.KEY_IMAGE_INPUT: depth_map_expand_dims,
                    constants.KEY_BEV_INPUT: bev_input,
                    constants.KEY_ANCHORS_INFO: anchors_info,
                    constants.KEY_POINT_CLOUD: point_cloud,
                    constants.KEY_GROUND_PLANE: ground_plane,
                    constants.KEY_STEREO_CALIB_P2: p_matrix[0:num_views],
                    constants.KEY_SAMPLE_NAME: sample_name,
                    constants.KEY_SAMPLE_AUGS: sample.augs,
                    constants.KEY_DPT_INPUT: depth_map
                }
            else:
                depth_map = project_depths(point_cloud, stereo_calib_p2,
                                           image_shape[0:2])
                depth_map = np.expand_dims(depth_map, axis=0)
                sample_dict = {
                    constants.KEY_LABEL_BOXES_3D: label_boxes_3d,
                    constants.KEY_LABEL_BOXES_2D: label_boxes_2d,
                    constants.KEY_LABEL_ANCHORS: label_anchors,
                    constants.KEY_LABEL_CLASSES: label_classes,
                    constants.KEY_IMAGE_INPUT: image_input,
                    constants.KEY_BEV_INPUT: bev_input,
                    constants.KEY_ANCHORS_INFO: anchors_info,
                    constants.KEY_POINT_CLOUD: point_cloud,
                    constants.KEY_GROUND_PLANE: ground_plane,
                    constants.KEY_STEREO_CALIB_P2: stereo_calib_p2,
                    constants.KEY_SAMPLE_NAME: sample_name,
                    constants.KEY_SAMPLE_AUGS: sample.augs,
                    constants.KEY_DPT_INPUT: depth_map
                }

            sample_dicts.append(sample_dict)

        return sample_dicts
示例#27
0
    def load_samples(self, indices):
        """ Loads input-output data for a set of samples. Should only be
            called when a particular sample dict is required. Otherwise,
            samples should be provided by the next_batch function

        Args:
            indices: A list of sample indices from the dataset.sample_list
                to be loaded

        Return:
            samples: a list of data sample dicts
        """
        sample_dicts = []
        for sample_idx in indices:
            sample = self.sample_list[sample_idx]
            sample_name = sample.name

            # Only read labels if they exist
            if self.has_labels:
                # Read mini batch first to see if it is empty
                anchors_info = self.get_anchors_info(sample_name)
                img_roi_all = self.get_img_roi_data(sample_name)
                #img_roi      = all_img_rois[0]
                #img_roi_norm = all_img_rois[1]

                if (not anchors_info) and self.train_val_test == 'train' \
                        and (not self.train_on_all_samples):
                    empty_sample_dict = {
                        constants.KEY_SAMPLE_NAME:
                        sample_name,
                        constants.KEY_ANCHORS_INFO:
                        anchors_info,
                        # constants.KEY_IMG_ROI: img_roi,
                        constants.KEY_IMG_ROI_ALL:
                        img_roi_all
                    }
                    return [empty_sample_dict]

                obj_labels = obj_utils.read_labels(self.label_dir,
                                                   int(sample_name))

                # Only use objects that match dataset classes
                obj_labels = self.kitti_utils.filter_labels(obj_labels)

            else:
                obj_labels = None

                anchors_info = []

                label_anchors = np.zeros((1, 6))
                label_boxes_3d = np.zeros((1, 7))
                label_classes = np.zeros(1)

            img_idx = int(sample_name)

            # Load image (BGR -> RGB)  ##  decoded images will have the channels
            # stored in B G R order.
            cv_bgr_image = cv2.imread(self.get_rgb_image_path(sample_name))
            rgb_image = cv_bgr_image[..., ::-1]
            image_shape = rgb_image.shape[0:2]
            image_input = rgb_image

            # Get ground plane
            ground_plane = obj_utils.get_road_plane(int(sample_name),
                                                    self.planes_dir)

            # Get calibration
            stereo_calib_p2 = calib_utils.read_calibration(
                self.calib_dir, int(sample_name)).p2

            point_cloud = self.kitti_utils.get_point_cloud(
                self.bev_source, img_idx, image_shape)

            # Augmentation (Flipping)
            if kitti_aug.AUG_FLIPPING in sample.augs:
                image_input = kitti_aug.flip_image(image_input)
                point_cloud = kitti_aug.flip_point_cloud(point_cloud)
                obj_labels = [
                    kitti_aug.flip_label_in_3d_only(obj) for obj in obj_labels
                ]
                ground_plane = kitti_aug.flip_ground_plane(ground_plane)
                stereo_calib_p2 = kitti_aug.flip_stereo_calib_p2(
                    stereo_calib_p2, image_shape)

                ## flipping the image rois
                img_roi_all = kitti_aug.flip_roi(
                    img_roi_all[0],
                    image_shape), kitti_aug.flip_roi_norm(img_roi_all[1])
                if anchors_info:
                    anchor_indices, anchors_ious, anchor_offsets, anchor_classes = anchors_info
                    anchor_offsets[:, 0] = -anchor_offsets[:, 0]
                    anchor_offsets = anchor_indices, anchors_ious, anchor_offsets, anchor_classes

            # Augmentation (Image Jitter)
            if kitti_aug.AUG_PCA_JITTER in sample.augs:
                image_input[:, :,
                            0:3] = kitti_aug.apply_pca_jitter(image_input[:, :,
                                                                          0:3])

            if obj_labels is not None:
                label_boxes_3d = np.asarray([
                    box_3d_encoder.object_label_to_box_3d(obj_label)
                    for obj_label in obj_labels
                ])

                label_boxes_2d = np.asarray([
                    box_2d_encoder.object_label_to_box_2d(obj_label)
                    for obj_label in obj_labels
                ])

                ## augmentation of dataset
                if kitti_aug.AUG_FLIPPING in sample.augs:
                    label_boxes_2d = kitti_aug.flip_roi(
                        label_boxes_2d, image_shape)

                label_classes = [
                    self.kitti_utils.class_str_to_index(obj_label.type)
                    for obj_label in obj_labels
                ]
                label_classes = np.asarray(label_classes, dtype=np.int32)

                # Return empty anchors_info if no ground truth after filtering
                if len(label_boxes_3d) == 0:
                    anchors_info = []
                    img_roi_all = []
                    #img_roi = []
                    #img_roi_norm= []
                    if self.train_on_all_samples:
                        # If training without any positive labels, we cannot
                        # set these to zeros, because later on the offset calc
                        # uses log on these anchors. So setting any arbitrary
                        # number here that does not break the offset calculation
                        # should work, since the negative samples won't be
                        # regressed in any case.
                        dummy_anchors = [[-1000, -1000, -1000, 1, 1, 1]]
                        label_anchors = np.asarray(dummy_anchors)
                        dummy_boxes = [[-1000, -1000, -1000, 1, 1, 1, 0]]
                        label_boxes_3d = np.asarray(dummy_boxes)
                    else:
                        label_anchors = np.zeros((1, 6))
                        label_boxes_3d = np.zeros((1, 7))
                    label_classes = np.zeros(1)
                else:
                    label_anchors = box_3d_encoder.box_3d_to_anchor(
                        label_boxes_3d, ortho_rotate=True)

            # Create BEV maps
            bev_images = self.kitti_utils.create_bev_maps(
                point_cloud, ground_plane)

            height_maps = bev_images.get('height_maps')
            density_map = bev_images.get('density_map')
            bev_input = np.dstack((*height_maps, density_map))

            sample_dict = {
                # constants.KEY_LABEL_BOXES_3D: label_boxes_3d,
                constants.KEY_LABEL_BOXES_2D:
                label_boxes_2d,
                constants.KEY_LABEL_ANCHORS:
                label_anchors,
                constants.KEY_LABEL_CLASSES:
                label_classes,
                constants.KEY_IMAGE_INPUT:
                image_input,
                constants.KEY_BEV_INPUT:
                bev_input,
                constants.KEY_ANCHORS_INFO:
                anchors_info,
                constants.KEY_IMG_ROI_ALL:
                img_roi_all,
                #constants.KEY_IMG_ROI: img_roi,
                #constants.KEY_IMG_ROI_NORM:img_roi_norm,
                constants.KEY_POINT_CLOUD:
                point_cloud,
                constants.KEY_GROUND_PLANE:
                ground_plane,
                constants.KEY_STEREO_CALIB_P2:
                stereo_calib_p2,
                constants.KEY_SAMPLE_NAME:
                sample_name,
                constants.KEY_SAMPLE_AUGS:
                sample.augs
            }
            sample_dicts.append(sample_dict)

        return sample_dicts
示例#28
0
def main():

    # Create Dataset
    dataset_config_path = mlod.root_dir() + \
        '/configs/mb_preprocessing/rpn_cars.config'
    dataset = DatasetBuilder.load_dataset_from_config(dataset_config_path)

    # Random sample
    sample_name = '000169'

    anchor_strides = dataset.kitti_utils.anchor_strides

    img_idx = int(sample_name)

    print("Showing mini batch for sample {}".format(sample_name))

    image = cv2.imread(dataset.get_rgb_image_path(sample_name))
    image_shape = [image.shape[1], image.shape[0]]

    # KittiUtils class
    dataset_utils = dataset.kitti_utils

    ground_plane = obj_utils.get_road_plane(img_idx, dataset.planes_dir)

    point_cloud = obj_utils.get_depth_map_point_cloud(img_idx,
                                                      dataset.calib_dir,
                                                      dataset.depth_dir,
                                                      image_shape)

    # Grab ground truth
    ground_truth_list = obj_utils.read_labels(dataset.label_dir, img_idx)
    ground_truth_list = dataset_utils.filter_labels(ground_truth_list)

    stereo_calib_p2 = calib_utils.read_calibration(dataset.calib_dir,
                                                   img_idx).p2

    ##############################
    # Flip sample info
    ##############################
    start_time = time.time()

    flipped_image = kitti_aug.flip_image(image)
    flipped_point_cloud = kitti_aug.flip_point_cloud(point_cloud)
    flipped_gt_list = [kitti_aug.flip_label_in_3d_only(obj)
                       for obj in ground_truth_list]
    flipped_ground_plane = kitti_aug.flip_ground_plane(ground_plane)
    flipped_calib_p2 = kitti_aug.flip_stereo_calib_p2(
        stereo_calib_p2, image_shape)

    flipped_points = flipped_point_cloud.T

    print('flip sample', time.time() - start_time)

    ##############################
    # Generate anchors
    ##############################
    clusters, _ = dataset.get_cluster_info()
    anchor_generator = grid_anchor_3d_generator.GridAnchor3dGenerator()

    # Read mini batch info
    anchors_info = dataset_utils.get_anchors_info(
        dataset.classes_name,
        anchor_strides,
        sample_name)

    all_anchor_boxes_3d = []
    all_ious = []
    for class_idx in range(len(dataset.classes)):

        anchor_boxes_3d = anchor_generator.generate(
            area_3d=dataset.kitti_utils.area_extents,
            anchor_3d_sizes=clusters[class_idx],
            anchor_stride=anchor_strides[class_idx],
            ground_plane=ground_plane)

        if anchors_info:
            indices, ious, offsets, classes = anchors_info

            # Get non empty anchors from the indices
            non_empty_anchor_boxes_3d = anchor_boxes_3d[indices]

            all_anchor_boxes_3d.extend(non_empty_anchor_boxes_3d)
            all_ious.extend(ious)

    if not len(all_anchor_boxes_3d) > 0:
        # Exit early if anchors_info is empty
        print("No anchors, Please try a different sample")
        return

    # Convert to ndarrays
    all_anchor_boxes_3d = np.asarray(all_anchor_boxes_3d)
    all_ious = np.asarray(all_ious)

    ##############################
    # Flip anchors
    ##############################
    start_time = time.time()

    flipped_anchor_boxes_3d = kitti_aug.flip_boxes_3d(all_anchor_boxes_3d,
                                                      flip_ry=False)

    print('flip anchors', time.time() - start_time)

    # Overwrite with flipped things
    all_anchor_boxes_3d = flipped_anchor_boxes_3d
    points = flipped_points
    ground_truth_list = flipped_gt_list
    ground_plane = flipped_ground_plane
    def run_checkpoint_once(self, checkpoint_to_restore):
        """Evaluates network metrics once over all the validation samples.

        Args:
            checkpoint_to_restore: The directory of the checkpoint to restore.
        """

        self._saver.restore(self._sess, checkpoint_to_restore)

        data_split = self.dataset_config.data_split
        predictions_base_dir = self.paths_config.pred_dir

        num_samples = self.model.dataset.num_samples
        train_val_test = self.model._train_val_test
        print('model: train_val_test: ', train_val_test)

        validation = train_val_test == 'val'

        global_step = trainer_utils.get_global_step(self._sess,
                                                    self.global_step_tensor)

        # Rpn average losses dictionary
        if validation:
            sum_losses = self._create_losses_dict()

        # Make sure the box representation is valid
        predictions_dir = predictions_base_dir + \
                "/final_predictions_and_scores/{}/{}".format(
                data_split, global_step)
        trainer_utils.create_dir(predictions_dir)

        num_valid_samples = 0

        # Keep track of feed_dict and inference time
        total_feed_dict_time = []
        total_inference_time = []

        # Run through a single epoch
        current_epoch = self.model.dataset.epochs_completed

        #run_metadata = tf.RunMetadata()
        #run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
        while current_epoch == self.model.dataset.epochs_completed:
            # Keep track of feed_dict speed
            start_time = time.time()
            #feed_dict = self.model.create_feed_dict(sample_index=sample_index)
            feed_dict = self.model.create_feed_dict()
            feed_dict_time = time.time() - start_time

            # Get sample name from model
            sample_name = self.model.sample_info['sample_name']
            stereo_calib = calib_utils.read_calibration(
                self.model.dataset.calib_dir, int(sample_name))
            stereo_calib_p2 = stereo_calib.p2

            output_file_path = predictions_dir + \
                "/{}.txt".format(sample_name)

            num_valid_samples += 1
            #if num_valid_samples > 1:
            #    break
            print("Step {}: {} / {}, Inference on sample {}".format(
                global_step, num_valid_samples, num_samples, sample_name))

            # Do predictions, loss calculations, and summaries

            if validation:
                if self.summary_merged is not None:
                    predictions, eval_losses, eval_total_loss, summary_out = \
                        self._sess.run([self._prediction_dict,
                                        self._loss_dict,
                                        self._total_loss,
                                        self.summary_merged],
                                       feed_dict=feed_dict)

                    if num_valid_samples == 2 and num_samples == 2:
                        self.summary_writer2.add_summary(
                            summary_out, global_step)
                    else:
                        self.summary_writer.add_summary(
                            summary_out, global_step)

                else:
                    print('start inference without smry:')
                    predictions, eval_losses, eval_total_loss = \
                        self._sess.run([self._prediction_dict,
                                        self._loss_dict,
                                        self._total_loss],
                                       feed_dict=feed_dict)
                    #options=run_options,
                    #run_metadata=run_metadata)
                    #self.summary_writer.add_run_metadata(run_metadata, \
                    #        'step {} sp:{}'.format(global_step/1000, int(sample_name)))

                self._update_losses(eval_losses, eval_total_loss, sum_losses,
                                    global_step)
                # Save predictions

                print('save predictions')
                predictions_and_scores = \
                    self.get_predicted_boxes_3d_and_scores(predictions,
                                                            stereo_calib_p2)
                np.savetxt(output_file_path,
                           predictions_and_scores,
                           fmt='%.5f')

                # Calculate accuracies
                #Unnecessary because there is only one class.. object class without bkg class..
                self.get_cls_accuracy(predictions, sum_losses, global_step)
                print("Step {}: Total time {} s".format(
                    global_step,
                    time.time() - start_time))

            else:
                # Test mode --> train_val_test == 'test'
                inference_start_time = time.time()
                # Don't calculate loss or run summaries for test
                predictions = self._sess.run(self._prediction_dict,
                                             feed_dict=feed_dict)
                inference_time = time.time() - inference_start_time

                # Add times to list
                total_feed_dict_time.append(feed_dict_time)
                total_inference_time.append(inference_time)

                predictions_and_scores = \
                    self.get_predicted_boxes_3d_and_scores(predictions,
                                                            stereo_calib_p2)
                np.savetxt(file_path, predictions_and_scores, fmt='%.5f')

        # end while current_epoch == model.dataset.epochs_completed:

        if validation:
            # Kitti native evaluation, do this during validation
            # and when running Avod model.
            # Store predictions in kitti format
            self.save_prediction_losses_results(sum_losses, num_valid_samples, \
                    global_step, predictions_base_dir)
            if self.do_kitti_native_eval:
                pass
                #self.run_kitti_native_eval(global_step)

        else:
            # Test mode --> train_val_test == 'test'
            evaluator_utils.print_inference_time_statistics(
                total_feed_dict_time, total_inference_time)

        print("Step {}: Finished evaluation, results saved to {}".format(
            global_step, predictions_dir))
示例#30
0
def main():
    """This demo shows RPN proposals and AVOD predictions in 3D
    and 2D in image space. Given certain thresholds for proposals
    and predictions, it selects and draws the bounding boxes on
    the image sample. It goes through the entire proposal and
    prediction samples for the given dataset split.

    The proposals, overlaid, and prediction images can be toggled on or off
    separately in the options section.
    The prediction score and IoU with ground truth can be toggled on or off
    as well, shown as (score, IoU) above the detection.
    """
    
    fig_size = (10, 6.1)
    gt_classes = ['Car', 'Pedestrian', 'Cyclist']

    # Output images directories
    output_dir_base = 'images_2d'
    data_dir = '../../DATA/Kitti/object/'
    label_dir = data_dir + 'training/label_2'
    image_dir = data_dir + 'training/image_2'
    filepath = data_dir + 'val.txt'
    calib_dir = data_dir + 'training/calib'

    filenames = open(filepath, 'r').readlines()
    filenames = [int(filename) for filename in filenames]

    i = 0
    i_max = len(filenames)

    for filename in filenames:
        ##############################
        # Ground Truth
        ##############################

        # Get ground truth labels
        gt_objects = obj_utils.read_labels(label_dir, filename)

        boxes2d, _, _ = obj_utils.build_bbs_from_objects(
            gt_objects, class_needed=gt_classes)

        image_path = image_dir + "/%06d.png" % filename
        image = Image.open(image_path)
        image_size = image.size

        prop_fig, prop_2d_axes, prop_3d_axes = \
                vis_utils.visualization(image_dir,
                                        filename,
                                        display=False)

        # Read the stereo calibration matrix for visualization
        stereo_calib = calib_utils.read_calibration(calib_dir, filename)
        calib_p2 = stereo_calib.p2

        draw_gt(gt_objects, prop_2d_axes, prop_3d_axes, calib_p2)

        out_name = output_dir_base + "/%06d.png" % filename
        plt.savefig(out_name)
        plt.close(prop_fig)

        i += 1
        print(str(i) + '/' + str(i_max))

    print('\nDone')