示例#1
0
    def get_lidar(self, raw_date, raw_sequence_id, raw_frame_id):
        raw_frame_id_filled = str(raw_frame_id).zfill(10)
        lidar_filename = os.path.join(self.root_dir, raw_date, raw_sequence_id, 'velodyne_points', 'data', raw_frame_id_filled + '.bin')

        return utils.load_velo_scan(lidar_filename)
示例#2
0
if __name__ == "__main__":
    print("Starting")
    image_path = '/home/eshan/Pictures/000009.jpeg'
    img = cv2.imread(image_path)
    file_path = '/home/eshan/Pictures/0001.txt'
    filecontent = np.array([f.split() for f in open(file_path, 'r')])
    alllabels = np.unique(filecontent[:, 2])
    labels = ['Car', 'Pedestrian', 'Cyclist']
    finalset = [x for x in alllabels if x not in labels]
    for val in finalset:
        filecontent = filecontent[filecontent[:, 2] != val, :]
    data = (Object3d(getstringfromarray(line[2:]))
            for line in filecontent[filecontent[:, 0] == '0', :])
    calib = Calibration('/home/eshan/Pictures/calib.txt')
    velo = load_velo_scan('/home/eshan/Pictures/000009.bin',
                          np.float32,
                          n_vec=4)[:, 0:4]
    img2 = np.copy(img)
    show_lidar_with_boxes(velo, data, calib)
    for obj in data:
        print("here")
        box3d_pts_2d, _ = compute_box_3d(obj, calib.P)
        img2 = draw_projected_box3d(img2, box3d_pts_2d, color=(0, 0, 0))
        text = 'gt ID: %d, Type: %s' % (0, obj.type)
        if box3d_pts_2d is not None:
            print("also")
            img2 = cv2.putText(
                img2,
                text, (int(box3d_pts_2d[4, 0]), int(box3d_pts_2d[4, 1]) - 8),
                cv2.FONT_HERSHEY_TRIPLEX,
                0.5,
示例#3
0
 def get_lidar(self, idx):
     assert(idx<self.num_samples)
     lidar_filename = os.path.join(self.lidar_dir, '%06d.bin'%(idx))
     return utils.load_velo_scan(lidar_filename)
def f(img_n):
    idx = fn[-10:-4] #string
    vel_n = origin_data_dir+'/velodyne/'+int(idx)+'.bin'
    cal_n = origin_data_dir+'/calib/'+idx+'.txt'
    lab_n = origin_data_dir+'/label_2/'+idx+'.txt'
    calib_data = kitti_utils.Calibration(cal_n)
    
    vel = kitti_utils.load_velo_scan(vel_n)
    vel = calib_data.project_velo_to_rect(vel)
    # 0 < z < 70
    vel = vel[np.where(vel[:,2] > 0)]
    vel = vel[np.where(vel[:,2] < 70)]
    # -3 < y < 1
    vel = vel[np.where(vel[:,1] < 1)]
    vel = vel[np.where(vel[:,1] > -3)]
    # -40 < y < 40
    vel = vel[np.where(vel[:,0] < 40)]
    vel = vel[np.where(vel[:,0] > -40)]
    # 这里没有中心化!注意变化!!!!
    # coords=np.ascontiguousarray(vel[:,:3]-v[:,:3].mean(0))
    coords = np.ascontiguousarray(vel)

    

    # Get color
    vel_2d = calib_data.project_rect_to_image(vel)
    vel_rgb=np.zeros((len(vel_2d),3))
    colors = kitti_utils.img_xy2rgb(img_n, vel_2d)
    
    # read label
    f = open(lab_n)
    str_label = f.readlines()
    label_3d_boxes = []
    # label.append(KITTI_LABEL_NAMES.index(name))

    for label in str_label:
        if label[0:8] == "DontCare":
            str_label.remove(label)
            continue
        obj = Object3d(label)
        box3d = compute_box_3d(obj, calib_data.P)
        label_3d_boxes.append(box3d[1])

    
    label_3d_boxes = np.asarray(label_3d_boxes)
    label_3d_boxes = label_3d_boxes.astype(np.float)
    
    str_label = [x for x in str_label if x[0:8] != 'DontCare']
    str_label = [x.split(' ') for x in str_label]
    nplabel = np.asarray(str_label)
    for i in range(len(nplabel)): 
        nplabel[i][0] = KITTI_LABEL_NAMES.index(nplabel[i][0])
    nplabel = nplabel.astype(np.float)
    #第1列truck 表示图中出现了卡车(一共有'Car', 'Van', 'Truck','Pedestrian', 'Person_sitting', 'Cyclist', 'Tram','Misc' or 'DontCare'这些类别,Don’t care 是没有3D标注的,原因是雷达扫不了那么远,即使可以视觉检测出来)
    #第2列0.0表示其是否被截断的程度为0。(如果车在图片边缘,那么就有可能发生部分被截断的情况。用0-1 表示被截断的程度。)
    #第3列0表示没有被遮挡。(0表示完全可见,1表示部分遮挡,2表示大部分被遮挡,3表示未知。)
    #第4列 -1.57 表示卡车中心与相机中心构成的矢量与在bird view下的夹角为-1.57,实际上就是说明改开叉在-90,即正前方。这个信息反映目标物体中心在bird view相对方向信息。
    #第5-8列的599.41 156.40 629.75 189.25是目标的2D bounding box 像素位置,形式为xyxy,前两个值为bounding box左上点的x,y位置,后两个点为右下角的x,y位置。
    # 第9-11列 2.85 2.63 12.34 表示该车的高度,宽度,和长度,单位为米。
    # 第12-14列 0.47 1.49 69.44 表示该车的3D中心在相机坐标下的xyz坐标。
    # 第15列 -1.56 表示车体朝向,绕相机坐标系y轴的弧度值。注意和第4列区别开来,第四列不在乎车体朝向,而是车体中心与相机中心所构成矢量在与相机坐标系z轴的夹角(其实这里笔者有点疑虑,如果车体中心位置已知,车体朝向是不知道的,但是第4列的alpha是可以算出来的,那么其实第4列的数据是冗余的?)。



    torch.save((coords,colors,nplabel,label_3d_boxes),proc_data_dir+idx+'.pth')
    print(idx)
示例#5
0
def vis(result_sha, data_root, result_root):
    def show_image_with_boxes(img,
                              velo,
                              objects_res,
                              objects_res_det,
                              objects_res_raw,
                              labeldata,
                              object_gt,
                              calib,
                              save_path,
                              height_threshold=0,
                              show_lidar=True,
                              save_image=False):
        img2 = np.copy(img)

        for obj in objects_res:
            box3d_pts_2d, _ = compute_box_3d(obj, calib.P)
            color_tmp = tuple(
                [int(tmp * 255) for tmp in colors[obj.id % max_color]])
            img2 = draw_projected_box3d(img2, box3d_pts_2d, color=(0, 0, 255))
            text = 'Tracked ID: %d, Type: %s' % (obj.id, obj.type)
            if box3d_pts_2d is not None:
                img2 = cv2.putText(
                    img2,
                    text,
                    (int(box3d_pts_2d[4, 0]), int(box3d_pts_2d[4, 1]) - 8),
                    cv2.FONT_HERSHEY_TRIPLEX,
                    0.5,
                    color=(0, 0, 255))
        for obj in objects_res_det:
            box3d_pts_2d, _ = compute_box_3d(obj, calib.P)
            color_tmp = tuple(
                [int(tmp * 255) for tmp in colors[obj.id % max_color]])
            img2 = draw_projected_box3d(img2, box3d_pts_2d, color=(0, 255, 0))
            text = 'Detection ID: %d, Type: %s' % (obj.id, obj.type)
            if box3d_pts_2d is not None:
                img2 = cv2.putText(
                    img2,
                    text,
                    (int(box3d_pts_2d[3, 0]), int(box3d_pts_2d[3, 1]) - 8),
                    cv2.FONT_HERSHEY_TRIPLEX,
                    0.5,
                    color=(0, 255, 0))
        import itertools
        labeldata, labeldata2 = itertools.tee(labeldata)
        for obj in labeldata:
            # print("here")
            box3d_pts_2d, _ = compute_box_3d(obj, calib.P)
            img2 = draw_projected_box3d(img2, box3d_pts_2d, color=(255, 0, 0))
            text = 'GT, Type: %s' % (obj.type)
            if box3d_pts_2d is not None:
                # print("also")
                print(text)
                img2 = cv2.putText(
                    img2,
                    text,
                    (int(box3d_pts_2d[4, 0]), int(box3d_pts_2d[4, 1]) - 8),
                    cv2.FONT_HERSHEY_TRIPLEX,
                    0.5,
                    color=(255, 0, 0))
        # for obj in objects_res_raw:
        #     box3d_pts_2d, _ = compute_box_3d(obj, calib.P)
        #     color_tmp = tuple([int(tmp * 255)
        #                        for tmp in colors[obj.id % max_color]])
        #     img2 = draw_projected_box3d(img2, box3d_pts_2d, color=(255,0,0))
        #     text = 'Estimate ID: %d' % obj.id
        #     if box3d_pts_2d is not None:
        #         img2 = cv2.putText(img2, text, (int(box3d_pts_2d[2, 0]), int(
        #             box3d_pts_2d[2, 1]) - 8), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color=(255,0,0))
        if show_lidar:
            show_lidar_with_boxes(velo,
                                  objects=labeldata2,
                                  calib=calib,
                                  objects_pred=objects_res)
        img = Image.fromarray(img2)
        img = img.resize((width, height))
        cv2.imshow("Image", img2)
        cv2.waitKey()

        if save_image:
            print("Saving Image at", save_path)
            img.save(save_path)

        return img2

    for seq in seq_list:
        image_dir = os.path.join(data_root, 'image_02/%s' % seq)
        calib_file = os.path.join(data_root, 'calib/%s.txt' % seq)
        label_file = os.path.join(data_root, 'label_02/%s.txt' % seq)
        velo_dir = os.path.join(data_root, 'velodyne/%s' % seq)
        result_dir = [
            os.path.join(result_root,
                         '%s/trk_withid/%s' % (result_sha[0], seq)),
            os.path.join(result_root,
                         '%s/trk_withid/%s' % (result_sha[1], seq)),
            os.path.join(result_root,
                         '%s/trk_withid/%s' % (result_sha[2], seq))
        ]
        save_3d_bbox_dir = os.path.join(
            result_root,
            '%s/trk_image_vis/%s' % ("Combined_Final_WithLabel", seq))
        mkdir_if_missing(save_3d_bbox_dir)

        # load the list
        images_list, num_images = load_list_from_folder(image_dir)
        velo_list, num_velo = load_list_from_folder(velo_dir)
        print('number of images to visualize is %d' % num_images)
        start_count = 0
        filecontent = np.array([f.split() for f in open(label_file, 'r')])
        # alllabels = np.unique(filecontent[:,2])
        # labels = ['Car', 'Pedestrian', 'Cyclist']
        # finallabelset = [x for x in alllabels if x not in labels]
        # print(alllabels)
        # print(finallabelset)
        # for val in finallabelset:
        #     filecontent = filecontent[filecontent[:,2]!=val,:]
        # print(np.unique(filecontent[:,2]))
        size = (width, height)
        out = cv2.VideoWriter(f'{result_root}/{seq}.avi',
                              cv2.VideoWriter_fourcc(*'DIVX'), 15, size)
        num_images = 1
        for count in range(start_count, num_images):
            image_tmp = images_list[count]
            velo_tmp = velo_list[count]
            if not is_path_exists(image_tmp):
                count += 1
                continue
            image_index = int(fileparts(image_tmp)[1])
            image_tmp = np.array(Image.open(image_tmp))
            img_height, img_width, img_channel = image_tmp.shape
            filecontentframe = filecontent[filecontent[:, 0] ==
                                           str(image_index), :]
            print(len(filecontentframe))
            print(f"Labels for frame {image_index}",
                  np.unique(filecontentframe[:, 2]))
            labeldata = (Object3d(getstringfromarray(line[2:]))
                         for line in filecontentframe)
            object_res = []
            object_res_det = []
            object_res_raw = []
            for dirt in result_dir:
                result_tmp = os.path.join(dirt, '%06d.txt' %
                                          image_index)  # load the result
                if is_path_exists(result_tmp):
                    object_res = object_res + read_label(result_tmp)
                result_tmp_det = os.path.join(dirt, 'det%06d.txt' %
                                              image_index)  # load the result
                if is_path_exists(result_tmp_det):
                    object_res_det = object_res_det + \
                        read_label(result_tmp_det)
                result_tmp_raw = os.path.join(dirt, 'raw%06d.txt' %
                                              image_index)  # load the result
                if is_path_exists(result_tmp_raw):
                    object_res_raw = object_res_raw + \
                        read_label(result_tmp_raw)
            print('processing index: %d, %d/%d, results from %s' %
                  (image_index, count + 1, num_images, result_tmp))
            calib_tmp = Calibration(calib_file)  # load the calibration

            object_res_filtered = []
            for object_tmp in object_res:
                if object_tmp.type not in type_whitelist:
                    continue
                if hasattr(object_tmp, 'score'):
                    if object_tmp.score < score_threshold:
                        continue
                center = object_tmp.t
                object_res_filtered.append(object_tmp)
            object_res_filtered_det = []
            for object_tmp in object_res_det:
                if object_tmp.type not in type_whitelist:
                    continue
                if hasattr(object_tmp, 'score'):
                    if object_tmp.score < score_threshold:
                        continue
                center = object_tmp.t
                object_res_filtered_det.append(object_tmp)
            object_res_filtered_raw = []
            for object_tmp in object_res_raw:
                if object_tmp.type not in type_whitelist:
                    continue
                # if hasattr(object_tmp, 'score'):
                #     if object_tmp.score < score_threshold:
                #         continue
                center = object_tmp.t
                object_res_filtered_raw.append(object_tmp)
            num_instances = len(object_res_filtered)
            save_image_with_3dbbox_gt_path = os.path.join(
                save_3d_bbox_dir, '%06d.jpg' % (image_index))
            velodyne_scan = load_velo_scan(velo_tmp, np.float32, n_vec=4)[:,
                                                                          0:4]
            img = show_image_with_boxes(
                image_tmp,
                velodyne_scan,
                object_res_filtered,
                object_res_filtered_det,
                object_res_filtered_raw,
                labeldata, [],
                calib_tmp,
                save_path=save_image_with_3dbbox_gt_path)
            print('number of objects to plot is %d, %d, %d' %
                  (num_instances, len(object_res_filtered_det),
                   len(object_res_filtered_raw)))
            count += 1
            out.write(img)
        out.release()
示例#6
0
    def __getitem__(self, index):

        # start time
        get_item_start_time = time.time()

        # get point cloud
        lidar_filename = os.path.join(self.lidar_dir, '%06d.bin' % index)
        lidar_data = kitti_utils.load_velo_scan(lidar_filename)

        # time for loading point cloud
        read_point_cloud_end_time = time.time()
        read_point_cloud_time = read_point_cloud_end_time - get_item_start_time

        # voxelize point cloud
        voxel_point_cloud = torch.tensor(
            kitti_utils.voxelize(point_cloud=lidar_data),
            requires_grad=True,
            device=self.device).float()

        # time for voxelization
        voxelization_end_time = time.time()
        voxelization_time = voxelization_end_time - read_point_cloud_end_time

        # channels along first dimensions according to PyTorch convention
        voxel_point_cloud = voxel_point_cloud.permute([2, 0, 1])

        # get image
        if self.get_image:
            image_filename = os.path.join(self.image_dir, '%06d.png' % index)
            image = kitti_utils.get_image(image_filename)

        # get current time
        read_labels_start_time = time.time()

        # get calibration
        calib_filename = os.path.join(self.calib_dir, '%06d.txt' % index)
        calib = kitti_utils.Calibration(calib_filename)

        # get labels
        label_filename = os.path.join(self.label_dir, '%06d.txt' % index)
        labels = kitti_utils.read_label(label_filename)

        read_labels_end_time = time.time()
        read_labels_time = read_labels_end_time - read_labels_start_time

        # compute network label
        if self.split == 'training':
            # get current time
            compute_label_start_time = time.time()

            # create empty pixel labels
            regression_label = np.zeros(
                (OUTPUT_DIM_0, OUTPUT_DIM_1, OUTPUT_DIM_REG))
            classification_label = np.zeros(
                (OUTPUT_DIM_0, OUTPUT_DIM_1, OUTPUT_DIM_CLA))

            # iterate over all 3D label objects in list
            for label in labels:
                if label.type == 'Car':
                    # compute corners of 3D bounding box in camera coordinates
                    _, bbox_corners_camera_coord = kitti_utils.compute_box_3d(
                        label, calib.P, scale=1.0)
                    # get pixel label for classification and BEV bounding box
                    regression_label, classification_label = compute_pixel_labels\
                        (regression_label, classification_label, label, bbox_corners_camera_coord)

            # stack classification and regression label
            regression_label = torch.tensor(regression_label,
                                            device=self.device).float()
            classification_label = torch.tensor(classification_label,
                                                device=self.device).float()
            training_label = torch.cat(
                (regression_label, classification_label), dim=2)

            # get time for computing pixel label
            compute_label_end_time = time.time()
            compute_label_time = compute_label_end_time - compute_label_start_time

            # total time for data loading
            get_item_end_time = time.time()
            get_item_time = get_item_end_time - get_item_start_time

            if self.show_times:
                print('---------------------------')
                print('Get Item Time: {:.4f} s'.format(get_item_time))
                print('---------------------------')
                print('Read Point Cloud Time: {:.4f} s'.format(
                    read_point_cloud_time))
                print('Voxelization Time: {:.4f} s'.format(voxelization_time))
                print('Read Labels Time: {:.4f} s'.format(read_labels_time))
                print(
                    'Compute Labels Time: {:.4f} s'.format(compute_label_time))

            return voxel_point_cloud, training_label

        else:
            if self.get_image:
                return image, voxel_point_cloud, labels, calib
            else:
                return voxel_point_cloud, labels, calib
示例#7
0
        x_out = torch.cat((x_reg, x_class), dim=1)

        return x_out


########
# Main #
########

if __name__ == '__main__':

    # exemplary input point cloud
    base_dir = 'Data/training/velodyne'
    index = 1
    lidar_filename = os.path.join(base_dir, '%06d.bin' % index)
    lidar_data = kitti_utils.load_velo_scan(lidar_filename)
    # create torch tensor from numpy array
    voxel_point_cloud = torch.tensor(kitti_utils.voxelize(lidar_data),
                                     requires_grad=True,
                                     device='cpu').float()
    # channels along first dimensions according to PyTorch convention
    voxel_point_cloud = voxel_point_cloud.permute([2, 0, 1])
    voxel_point_cloud = torch.unsqueeze(
        voxel_point_cloud, 0)  # add dimension 0 to tensor for batch

    # forward pass through network
    pixor = PIXOR()
    prediction = pixor(voxel_point_cloud)
    classification_prediction = prediction[:, :, -1]
    regression_prediction = prediction[:, :, :-1]