Exemplo n.º 1
0
def preprocess_point_cloud(point_cloud):
    ''' Prepare the numpy point cloud (N,3) for forward pass '''
    point_cloud = point_cloud[:,0:3] # do not use color for now
    floor_height = np.percentile(point_cloud[:,2],0.99)
    height = point_cloud[:,2] - floor_height
    point_cloud = np.concatenate([point_cloud, np.expand_dims(height, 1)],1) # (N,4) or (N,7)
    point_cloud = random_sampling(point_cloud, FLAGS.num_point)
    pc = np.expand_dims(point_cloud.astype(np.float32), 0) # (1,40000,4)
    return pc
Exemplo n.º 2
0
 def depth_to_pc(self, dep):
     """Depth map to point cloud network input."""
     point_map = np.zeros((480, 640, 3))
     for i in range(3):
         point_map[:, :, i] = dep * XYZ[:, :, i]
     point_cloud = point_map.reshape(480 * 640, 3)
     floor_height = np.percentile(point_cloud[:, 2], 0.99)
     height = point_cloud[:, 2] - floor_height
     point_cloud = np.concatenate(
         [point_cloud, np.expand_dims(height, 1)], 1)  # (N,4) or (N,7)
     point_cloud, choices = random_sampling(point_cloud,
                                            num_point,
                                            return_choices=True)
     pc = np.expand_dims(point_cloud.astype(np.float32), 0)  # (1,20000,4)
     return pc
Exemplo n.º 3
0
def get_roi_ptcloud(inputs,
                    batch_pred_boxes_params,
                    enlarge_ratio=1.2,
                    num_point_roi=512,
                    min_num_point=100):
    """ Generate ROI point cloud w.r.t predicted box

    :param inputs: dict {'point_clouds'}
                   input point clouds of the whole scene
           batch_pred_boxes_params: (B, num_proposals, 7), numpy array
                   predicted bounding box from detector
           enlarge_ratio: scalar
                   the value to enlarge the predicted box size
           num_point_roi: scalar
                   the number of points to be sampled in each enlarged box

    :return:
        batch_pc_roi: (B, num_proposals, num_sampled_points, input_pc_features) numpy array
        nonempty_roi_mask: (B, num_proposals) numpy array
    """
    batch_pc = inputs['point_clouds'].detach().cpu().numpy()[:, :, :]  # B,N,C
    bsize = batch_pred_boxes_params.shape[0]
    K = batch_pred_boxes_params.shape[1]
    batch_pc_roi = np.zeros((bsize, K, num_point_roi, batch_pc.shape[2]),
                            dtype=np.float32)
    nonempty_roi_mask = np.ones((bsize, K))

    for i in range(bsize):
        pc = batch_pc[i, :, :]  # (N,C)
        for j in range(K):
            box_params = batch_pred_boxes_params[i, j, :]  # (7)
            center = box_params[0:3]
            center_upright_camera = flip_axis_to_camera(
                center)  #.reshape(1,-1))[0]
            box_size = box_params[3:6] * enlarge_ratio  #enlarge the box size
            heading_angle = box_params[6]
            box3d = get_3d_box(box_size, heading_angle, center_upright_camera)
            box3d = flip_axis_to_depth(box3d)
            pc_in_box, inds = extract_pc_in_box3d(pc, box3d)
            # print('The number of points in roi box is ', pc_in_box.shape[0])
            if len(pc_in_box) >= min_num_point:
                batch_pc_roi[i, j, :, :] = random_sampling(
                    pc_in_box, num_point_roi)
            else:
                nonempty_roi_mask[i, j] = 0
    return batch_pc_roi, nonempty_roi_mask
Exemplo n.º 4
0
def data_viz(data_dir, dump_dir=os.path.join(BASE_DIR, 'data_viz_dump')):
    ''' Examine and visualize ycbgrasp dataset. '''
    ycb = ycb_object(data_dir)
    idxs = np.array(range(0, len(ycb)))

    if not os.path.exists(dump_dir):
        os.mkdir(dump_dir)

    for idx in range(len(ycb)):
        if idx % 10:
            continue
        data_idx = idxs[idx]
        print('data index: ', data_idx)
        pc = ycb.get_pointcloud(data_idx)
        pc = pc[:, 0:3]
        pc = pc_util.random_sampling(pc, args.num_point)
        pc_util.write_ply(pc, os.path.join(dump_dir, str(idx) + '_pc.ply'))

    print('Complete!')
Exemplo n.º 5
0
    def __getitem__(self, idx):
        crop_point_cloud = self.crops[idx]

        # center data
        minbound = np.min(crop_point_cloud[:, :3], axis=0)
        maxbound = np.max(crop_point_cloud[:, :3], axis=0)
        mid = (minbound + maxbound) / 2.0
        crop_point_cloud[:, :3] -= mid

        # convert PC to z is up.
        mid[[0, 1, 2]] = mid[[0, 2, 1]]

        crop_point_cloud[:, [0, 1, 2]] = crop_point_cloud[:, [0, 2, 1]]

        if not self.use_color:
            point_cloud = crop_point_cloud[:, 0:3]  # do not use color for now
        else:
            point_cloud = crop_point_cloud[:, 0:6]
            point_cloud[:,
                        3:] = point_cloud[:,
                                          3:] - (self.MEAN_COLOR_RGB) / 256.0

        if self.use_height:
            floor_height = np.percentile(point_cloud[:, 2], 0.99)
            height = point_cloud[:, 2] - floor_height
            point_cloud = np.concatenate(
                [point_cloud, np.expand_dims(height, 1)], 1)

        point_cloud, _ = pc_util.random_sampling(point_cloud,
                                                 self.num_points,
                                                 return_choices=True)

        ret_dict = {}
        ret_dict['point_clouds'] = point_cloud.astype(np.float32)
        ret_dict['mid'] = mid

        return ret_dict
Exemplo n.º 6
0
def extract_blender_data_inner(data_idx, dataset, data_dir, split, num_point,
                               type_whitelist, save_votes):
    idx = dataset.index[data_idx]
    # Skip if XX/XXXX_votes.npz already exists
    if os.path.exists(
            os.path.join(data_dir, split,
                         '{:02d}/{:04d}_votes.npz'.format(idx[0], idx[1]))):
        return 0

    #    print('------------- ', data_idx+1, ' von ' , len(dataset))
    objects = dataset.get_label_objects(data_idx)

    # Skip scenes with 0 object or 0 objects out of type_whitelist
    if (len(objects)==0 or \
        len([obj for obj in objects if obj.classname in type_whitelist])==0):
        print(data_idx)
        return 1

    object_list = []
    for obj in objects:
        if obj.classname not in type_whitelist: continue
        obb = np.zeros(10)
        obb[0:3] = obj.centroid
        # Note that compared with that in data_viz, we do not time 2 to l,w.h
        # neither do we flip the heading angle
        obb[3:6] = np.array([obj.l, obj.w, obj.h])
        obb[6:9] = obj.heading_angle
        obb[9] = blender_utils.type2class[obj.classname]
        object_list.append(obb)
    if len(object_list) == 0:
        obbs = np.zeros((0, 10))
    else:
        obbs = np.vstack(object_list)  # (K,10) K objects with 10 data entries

    pc_upright_depth = dataset.get_depth(data_idx)
    assert pc_upright_depth.shape[
        1] > 0, "Es gibt keine Datenpunkte in der Pointcloud"
    pc_upright_depth_subsampled = pc_util.random_sampling(
        pc_upright_depth, num_point)

    np.savez_compressed(os.path.join(
        data_dir, split, '{:02d}/{:04d}_pc.npz'.format(idx[0], idx[1])),
                        pc=pc_upright_depth_subsampled)
    np.save(
        os.path.join(data_dir, split,
                     '{:02d}/{:04d}_bbox.npy'.format(idx[0], idx[1])), obbs)

    if save_votes:
        N = pc_upright_depth_subsampled.shape[0]
        point_votes = np.zeros((N, 10))  # 3 votes and 1 vote mask
        point_vote_idx = np.zeros(
            (N)).astype(np.int32)  # in the range of [0,2]
        indices = np.arange(N)
        for obj in objects:
            if obj.classname not in type_whitelist: continue
            try:
                # Find all points in this object's OBB
                box3d_pts_3d = blender_utils.my_compute_box_3d(
                    obj.centroid, np.array([obj.l, obj.w, obj.h]),
                    obj.heading_angle)
                pc_in_box3d, inds = blender_utils.extract_pc_in_box3d(
                    pc_upright_depth_subsampled, box3d_pts_3d)
                # Assign first dimension to indicate it is in an object box
                point_votes[inds, 0] = 1
                # Add the votes (all 0 if the point is not in any object's OBB)
                votes = np.expand_dims(obj.centroid, 0) - pc_in_box3d[:, 0:3]
                sparse_inds = indices[
                    inds]  # turn dense True,False inds to sparse number-wise inds
                for i in range(len(sparse_inds)):
                    j = sparse_inds[i]
                    point_votes[j,
                                int(point_vote_idx[j] * 3 +
                                    1):int((point_vote_idx[j] + 1) * 3 +
                                           1)] = votes[i, :]
                    # Populate votes with the fisrt vote
                    if point_vote_idx[j] == 0:
                        point_votes[j, 4:7] = votes[i, :]
                        point_votes[j, 7:10] = votes[i, :]
                point_vote_idx[inds] = np.minimum(2, point_vote_idx[inds] + 1)
            except:
                print('ERROR ----', data_idx, obj.classname)
        np.savez_compressed(os.path.join(
            data_dir, split, '{:02d}/{:04d}_votes.npz'.format(idx[0], idx[1])),
                            point_votes=point_votes)
    return 0
Exemplo n.º 7
0
    def __getitem__(self, idx):

        scan_name = self.scan_names[idx]
        point_cloud = np.load(
            os.path.join(self.data_path, scan_name) + '_pc.npz')['pc']
        poses = np.load(os.path.join(self.data_path, scan_name) + '_pose.npy')
        point_votes = np.load(
            os.path.join(self.data_path, scan_name) +
            '_object_votes.npz')['point_object_votes']
        point_part_votes = np.load(
            os.path.join(self.data_path, scan_name) +
            '_part_votes.npz')['point_part_votes']

        if not self.use_color:
            point_cloud = point_cloud[:, 0:3]
        else:
            point_cloud = point_cloud[:, 0:6]
            point_cloud[:, 3:] = (point_cloud[:, 3:] - MEAN_COLOR_RGB)

        if self.use_height:
            floor_height = np.percentile(point_cloud[:, 2], 0.99)
            height = point_cloud[:, 2] - floor_height
            point_cloud = np.concatenate(
                [point_cloud, np.expand_dims(height, 1)], 1)

        # ------------------------------- LABELS ------------------------------
        label_mask = np.zeros((MAX_NUM_POSE))
        label_mask[0:poses.shape[0]] = 1

        target_poses_mask = label_mask
        target_poses = np.zeros((MAX_NUM_POSE, 6))
        for i in range(poses.shape[0]):
            pose = poses[i]
            target_pose = pose[0:6]
            target_poses[i, :] = target_pose

        point_cloud, choices = pc_util.random_sampling(point_cloud,
                                                       self.num_points,
                                                       return_choices=True)
        point_votes_mask = point_votes[choices, 0]
        point_votes = point_votes[choices, 1:]

        point_part_votes_mask = point_part_votes[choices, 0]
        point_part_votes = point_part_votes[choices, 1:]

        ret_dict = {}
        ret_dict['point_clouds'] = point_cloud.astype(np.float32)
        ret_dict['vote_label'] = point_votes.astype(np.float32)
        ret_dict['vote_part_label'] = point_part_votes.astype(np.float32)
        ret_dict['vote_label_mask'] = point_votes_mask.astype(np.int64)
        ret_dict['scan_idx'] = np.array(idx).astype(np.int64)

        ret_dict['center_label'] = target_poses.astype(np.float32)[:, 0:3]
        ret_dict['rot_label'] = target_poses.astype(np.float32)[:, 3:6]

        target_poses_semcls = np.zeros((MAX_NUM_POSE))
        target_poses_semcls[0:poses.shape[0]] = poses[:, -1]
        ret_dict['sem_cls_label'] = target_poses_semcls.astype(np.int64)
        ret_dict['object_label_mask'] = target_poses_mask.astype(np.float32)

        return ret_dict
Exemplo n.º 8
0
    def __getitem__(self, idx):
        """
        Returns a dict with following keys:
            point_clouds: (N,3+C)
            scan_idx: int scan index in scan_names list
        """
        scan_name = self.scan_names[idx]
        mesh_vertices = np.load(
            os.path.join(self.data_path, scan_name) + '_pc.npz')['pc']  # Nx6

        if not self.use_color:
            raw_point_cloud = mesh_vertices[:, 0:3]  # do not use color for now
        else:
            raw_point_cloud = mesh_vertices[:, 0:6]
            raw_point_cloud[:, 3:] = (raw_point_cloud[:, 3:] -
                                      MEAN_COLOR_RGB) / 256.0

        if self.use_height:
            floor_height = np.percentile(raw_point_cloud[:, 2], 0.99)
            height = raw_point_cloud[:, 2] - floor_height
            raw_point_cloud = np.concatenate(
                [raw_point_cloud, np.expand_dims(height, 1)], 1)

        ret_dict = {}
        ema_point_cloud = pc_util.random_sampling(raw_point_cloud,
                                                  self.num_points,
                                                  return_choices=False)
        ret_dict['ema_point_clouds'] = ema_point_cloud.astype(np.float32)

        bboxes = np.load(
            os.path.join(self.data_path, scan_name) + '_bbox.npy')  # K,8
        target_bboxes = np.zeros((MAX_NUM_OBJ, 6))
        target_bboxes_mask = np.zeros((MAX_NUM_OBJ))
        angle_classes = np.zeros((MAX_NUM_OBJ, ))
        angle_residuals = np.zeros((MAX_NUM_OBJ, ))
        size_classes = np.zeros((MAX_NUM_OBJ, ))
        size_residuals = np.zeros((MAX_NUM_OBJ, 3))
        target_bboxes_semcls = np.zeros((MAX_NUM_OBJ))

        target_bboxes_mask[0:bboxes.shape[0]] = 1
        target_bboxes[0:bboxes.shape[0], :] = bboxes[:, 0:6]

        for i in range(bboxes.shape[0]):
            bbox = bboxes[i]
            semantic_class = bbox[7]
            angle_class, angle_residual = DC.angle2class(bbox[6])
            # NOTE: The mean size stored in size2class is of full length of box edges,
            # while in sunrgbd_data.py data dumping we dumped *half* length l,w,h.. so have to time it by 2 here
            box3d_size = bbox[3:6] * 2
            size_class, size_residual = DC.size2class(
                box3d_size, DC.class2type[semantic_class])
            angle_classes[i] = angle_class
            angle_residuals[i] = angle_residual
            size_classes[i] = size_class
            size_residuals[i] = size_residual
            target_bboxes_semcls[i] = semantic_class

        if self.load_labels:
            ret_dict['center_label'] = target_bboxes.astype(np.float32)[:, 0:3]
            ret_dict['heading_class_label'] = angle_classes.astype(np.int64)
            ret_dict['heading_residual_label'] = angle_residuals.astype(
                np.float32)
            ret_dict['size_class_label'] = size_classes.astype(np.int64)
            ret_dict['size_residual_label'] = size_residuals.astype(np.float32)
            ret_dict['sem_cls_label'] = target_bboxes_semcls.astype(np.int64)
            ret_dict['box_label_mask'] = target_bboxes_mask.astype(np.float32)

        point_cloud, choices = pc_util.random_sampling(raw_point_cloud,
                                                       self.num_points,
                                                       return_choices=True)
        flip_x_axis = 0
        flip_y_axis = 0
        rot_angle = 0
        rot_mat = np.identity(3)
        scale_ratio = np.ones((1, 3))
        if self.augment:
            if np.random.random() > 0.5:
                # Flipping along the YZ plane
                flip_x_axis = 1
                point_cloud[:, 0] = -1 * point_cloud[:, 0]

            # Rotation along up-axis/Z-axis
            rot_angle = (np.random.random() * np.pi /
                         3) - np.pi / 6  # -30 ~ +30 degree
            rot_mat = pc_util.rotz(rot_angle)
            point_cloud[:, 0:3] = np.dot(point_cloud[:, 0:3],
                                         np.transpose(rot_mat))

            # Augment point cloud scale: 0.85x-1.15x
            scale_ratio = np.random.random() * 0.3 + 0.85
            scale_ratio = np.expand_dims(np.tile(scale_ratio, 3), 0)
            point_cloud[:, 0:3] *= scale_ratio
            if self.use_height:
                point_cloud[:, -1] *= scale_ratio[0, 0]

        ret_dict['point_clouds'] = point_cloud.astype(np.float32)
        ret_dict['flip_x_axis'] = np.array(flip_x_axis).astype(np.int64)
        ret_dict['flip_y_axis'] = np.array(flip_y_axis).astype(np.int64)
        ret_dict['rot_mat'] = rot_mat.astype(np.float32)
        ret_dict['rot_angle'] = np.array(rot_angle).astype(np.float32)
        ret_dict['scale'] = np.array(scale_ratio).astype(np.float32)
        ret_dict['scan_idx'] = np.array(idx).astype(np.int64)
        ret_dict['supervised_mask'] = np.array(0).astype(np.int64)
        return ret_dict
Exemplo n.º 9
0
def extract_sunrgbd_data(idx_filename, split, output_folder, num_point=20000,
                         type_whitelist=DEFAULT_TYPE_WHITELIST,
                         save_votes=False, use_v1=False, skip_empty_scene=True):
    """ Extract scene point clouds and 
    bounding boxes (centroids, box sizes, heading angles, semantic classes).
    Dumped point clouds and boxes are in upright depth coord.

    Args:
        idx_filename: a TXT file where each line is an int number (index)
        split: training or testing
        save_votes: whether to compute and save Ground truth votes.
        use_v1: use the SUN RGB-D V1 data
        skip_empty_scene: if True, skip scenes that contain no object (no objet in whitelist)

    Dumps:
        <id>_pc.npz of (N,6) where N is for number of subsampled points and 6 is
            for XYZ and RGB (in 0~1) in upright depth coord
        <id>_bbox.npy of (K,8) where K is the number of objects, 8 is for
            centroids (cx,cy,cz), dimension (l,w,h), heanding_angle and semantic_class
        <id>_votes.npz of (N,10) with 0/1 indicating whether the point belongs to an object,
            then three sets of GT votes for up to three objects. If the point is only in one
            object's OBB, then the three GT votes are the same.
    """
    dataset = sunrgbd_object('./sunrgbd_trainval', split, use_v1=use_v1)
    data_idx_list = [int(line.rstrip()) for line in open(idx_filename)]

    if not os.path.exists(output_folder):
        os.mkdir(output_folder)

    all_obbs = []
    all_pc_upright_depth_subsampled = []
    all_point_votes = []
    for data_idx in data_idx_list:
        print('------------- ', data_idx)
        objects = dataset.get_label_objects(data_idx)

        # Skip scenes with 0 object
        if skip_empty_scene and (len(objects) == 0 or
                                 len([obj for obj in objects if obj.classname in type_whitelist]) == 0):
            continue

        object_list = []
        for obj in objects:
            if obj.classname not in type_whitelist: continue
            obb = np.zeros((8))
            obb[0:3] = obj.centroid
            # Note that compared with that in data_viz, we do not time 2 to l,w.h
            # neither do we flip the heading angle
            obb[3:6] = np.array([obj.l, obj.w, obj.h])
            obb[6] = obj.heading_angle
            obb[7] = sunrgbd_utils.type2class[obj.classname]
            object_list.append(obb)
        if len(object_list) == 0:
            obbs = np.zeros((0, 8))
        else:
            obbs = np.vstack(object_list)  # (K,8)
        print(f"{data_idx} has {obbs.shape[0]} gt bboxes")

        pc_upright_depth = dataset.get_depth(data_idx)
        pc_upright_depth_subsampled = pc_util.random_sampling(pc_upright_depth, num_point)

        np.savez_compressed(os.path.join(output_folder, '%06d_pc.npz' % (data_idx)),
                            pc=pc_upright_depth_subsampled)
        np.save(os.path.join(output_folder, '%06d_bbox.npy' % (data_idx)), obbs)
        # pickle save
        with open(os.path.join(output_folder, '%06d_pc.pkl' % (data_idx)), 'wb') as f:
            pickle.dump(pc_upright_depth_subsampled, f)
            print(f"{os.path.join(output_folder, '%06d_pc.pkl' % (data_idx))} saved successfully !!")
        with open(os.path.join(output_folder, '%06d_bbox.pkl' % (data_idx)), 'wb') as f:
            pickle.dump(obbs, f)
            print(f"{os.path.join(output_folder, '%06d_bbox.pkl' % (data_idx))} saved successfully !!")
        # add to collection
        all_pc_upright_depth_subsampled.append(pc_upright_depth_subsampled)
        all_obbs.append(obbs)

        N = pc_upright_depth_subsampled.shape[0]
        point_votes = np.zeros((N, 13))  # 1 vote mask + 3 votes and + 3 votes gt ind
        point_votes[:, 10:13] = -1
        point_vote_idx = np.zeros((N)).astype(np.int32)  # in the range of [0,2]
        indices = np.arange(N)
        i_obj = 0
        for obj in objects:
            if obj.classname not in type_whitelist: continue
            try:
                # Find all points in this object's OBB
                box3d_pts_3d = sunrgbd_utils.my_compute_box_3d(obj.centroid,
                                                               np.array([obj.l, obj.w, obj.h]), obj.heading_angle)
                pc_in_box3d, inds = sunrgbd_utils.extract_pc_in_box3d( \
                    pc_upright_depth_subsampled, box3d_pts_3d)
                # Assign first dimension to indicate it is in an object box
                point_votes[inds, 0] = 1
                # Add the votes (all 0 if the point is not in any object's OBB)
                votes = np.expand_dims(obj.centroid, 0) - pc_in_box3d[:, 0:3]
                sparse_inds = indices[inds]  # turn dense True,False inds to sparse number-wise inds
                for i in range(len(sparse_inds)):
                    j = sparse_inds[i]
                    point_votes[j, int(point_vote_idx[j] * 3 + 1):int((point_vote_idx[j] + 1) * 3 + 1)] = votes[i,
                                                                                                          :]
                    point_votes[j, point_vote_idx[j] + 10] = i_obj
                    # Populate votes with the fisrt vote
                    if point_vote_idx[j] == 0:
                        point_votes[j, 4:7] = votes[i, :]
                        point_votes[j, 7:10] = votes[i, :]
                        point_votes[j, 10] = i_obj
                        point_votes[j, 11] = i_obj
                        point_votes[j, 12] = i_obj
                point_vote_idx[inds] = np.minimum(2, point_vote_idx[inds] + 1)
                i_obj += 1
            except:
                print('ERROR ----', data_idx, obj.classname)

        # choose the nearest as the first gt for each point
        for ip in range(N):
            is_pos = (point_votes[ip, 0] > 0)
            if is_pos:
                vote_delta1 = point_votes[ip, 1:4].copy()
                vote_delta2 = point_votes[ip, 4:7].copy()
                vote_delta3 = point_votes[ip, 7:10].copy()
                dist1 = np.sum(vote_delta1 ** 2)
                dist2 = np.sum(vote_delta2 ** 2)
                dist3 = np.sum(vote_delta3 ** 2)

                gt_ind1 = int(point_votes[ip, 10].copy())
                # gt_ind2 = int(point_votes[ip, 11].copy())
                # gt_ind3 = int(point_votes[ip, 12].copy())
                # gt1 = obbs[gt_ind1]
                # gt2 = obbs[gt_ind2]
                # gt3 = obbs[gt_ind3]
                # size_norm_vote_delta1 = vote_delta1 / gt1[3:6]
                # size_norm_vote_delta2 = vote_delta2 / gt2[3:6]
                # size_norm_vote_delta3 = vote_delta3 / gt3[3:6]
                # size_norm_dist1 = np.sum(size_norm_vote_delta1 ** 2)
                # size_norm_dist2 = np.sum(size_norm_vote_delta2 ** 2)
                # size_norm_dist3 = np.sum(size_norm_vote_delta3 ** 2)

                near_ind = np.argmin([dist1, dist2, dist3])
                # near_ind = np.argmin([size_norm_dist1, size_norm_dist2, size_norm_dist3])

                point_votes[ip, 10] = point_votes[ip, 10 + near_ind].copy()
                point_votes[ip, 10 + near_ind] = gt_ind1
                point_votes[ip, 1:4] = point_votes[ip, int(near_ind * 3 + 1):int((near_ind + 1) * 3 + 1)].copy()
                point_votes[ip, int(near_ind * 3 + 1):int((near_ind + 1) * 3 + 1)] = vote_delta1
            else:
                assert point_votes[ip, 10] == -1, "error"
                assert point_votes[ip, 11] == -1, "error"
                assert point_votes[ip, 12] == -1, "error"

        print(f"{data_idx}_votes.npz has {i_obj} gt bboxes")
        np.savez_compressed(os.path.join(output_folder, '%06d_votes.npz' % (data_idx)),
                            point_votes=point_votes)
        with open(os.path.join(output_folder, '%06d_votes.pkl' % (data_idx)), 'wb') as f:
            pickle.dump(point_votes, f)
            print(f"{os.path.join(output_folder, '%06d_votes.pkl' % (data_idx))} saved successfully !!")
        all_point_votes.append(point_votes)

    pickle_filename = os.path.join(output_folder, 'all_obbs_modified_nearest_has_empty.pkl')
    with open(pickle_filename, 'wb') as f:
        pickle.dump(all_obbs, f)
        print(f"{pickle_filename} saved successfully !!")

    pickle_filename = os.path.join(output_folder, 'all_pc_modified_nearest_has_empty.pkl')
    with open(pickle_filename, 'wb') as f:
        pickle.dump(all_pc_upright_depth_subsampled, f)
        print(f"{pickle_filename} saved successfully !!")

    pickle_filename = os.path.join(output_folder, 'all_point_votes_nearest_has_empty.pkl')
    with open(pickle_filename, 'wb') as f:
        pickle.dump(all_point_votes, f)
        print(f"{pickle_filename} saved successfully !!")

    all_point_labels = []
    for point_votes in all_point_votes:
        point_labels = point_votes[:, [0, 10]]
        all_point_labels.append(point_labels)
    pickle_filename = os.path.join(output_folder, 'all_point_labels_nearest_has_empty.pkl')
    with open(pickle_filename, 'wb') as f:
        pickle.dump(all_point_labels, f)
        print(f"{pickle_filename} saved successfully !!")
Exemplo n.º 10
0
    def __getitem__(self, idx):
        """
        Returns a dict with following keys:
            point_clouds: (N,3+C)
            center_label: (MAX_NUM_OBJ,3) for GT box center XYZ
            sem_cls_label: (MAX_NUM_OBJ,) semantic class index
            angle_class_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_HEADING_BIN-1
            angle_residual_label: (MAX_NUM_OBJ,)
            size_classe_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_SIZE_CLUSTER
            size_residual_label: (MAX_NUM_OBJ,3)
            box_label_mask: (MAX_NUM_OBJ) as 0/1 with 1 indicating a unique box
            point_votes: (N,3) with votes XYZ
            point_votes_mask: (N,) with 0/1 with 1 indicating the point is in one of the object's OBB.
            scan_idx: int scan index in scan_names list
            pcl_color: unused
        """
        scan_name = self.scan_names[idx]
        mesh_vertices = np.load(os.path.join(self.data_path, scan_name)+'_vert.npy')
        meta_vertices = np.load(os.path.join(self.data_path, scan_name)+'_all_noangle_40cls.npy') ### Need to change the name here
        
        instance_labels = meta_vertices[:,-2]
        semantic_labels = meta_vertices[:,-1]
        
        if not self.use_color:
            point_cloud = mesh_vertices[:,0:3] # do not use color for now
            pcl_color = mesh_vertices[:,3:6]
        else:
            point_cloud = mesh_vertices[:,0:6] 
            point_cloud[:,3:] = (point_cloud[:,3:]-MEAN_COLOR_RGB)/256.0
            pcl_color = (point_cloud[:,3:]-MEAN_COLOR_RGB)/256.0
        
        if self.use_height:
            floor_height = np.percentile(point_cloud[:,2],0.99)
            height = point_cloud[:,2] - floor_height
            point_cloud = np.concatenate([point_cloud, np.expand_dims(height, 1)],1) 
        # ------------------------------- LABELS ------------------------------        
        target_bboxes = np.zeros((MAX_NUM_OBJ, 6))
        target_bboxes_mask = np.zeros((MAX_NUM_OBJ))    
        angle_classes = np.zeros((MAX_NUM_OBJ,))
        angle_label = np.zeros((MAX_NUM_OBJ,))
        angle_residuals = np.zeros((MAX_NUM_OBJ,))
        size_classes = np.zeros((MAX_NUM_OBJ,))
        size_residuals = np.zeros((MAX_NUM_OBJ, 3))

        ### For statistics
        surface_cue = np.zeros((MAX_NUM_OBJ))
        line_cue = np.zeros((MAX_NUM_OBJ,))
        
        before_sample = np.unique(instance_labels)
        while True:
            orig_point_cloud = np.copy(point_cloud)
            temp_point_cloud, choices = pc_util.random_sampling(orig_point_cloud,
                                                           self.num_points, return_choices=True)
            after_sample = np.unique(instance_labels[choices])
            if np.array_equal(before_sample, after_sample):
                point_cloud = temp_point_cloud
                break
        instance_labels = instance_labels[choices]
        semantic_labels = semantic_labels[choices]
        meta_vertices = meta_vertices[choices]
        
        pcl_color = pcl_color[choices]
        
        # ------------------------------- DATA AUGMENTATION ------------------------------        
        if self.augment:
            if np.random.random() > 0.5:
                # Flipping along the YZ plane
                point_cloud[:,0] = -1 * point_cloud[:,0]
                # target_bboxes[:,0] = -1 * target_bboxes[:,0]                
                meta_vertices[:, 0] = -1 * meta_vertices[:, 0]                
                meta_vertices[:, 6] = -1 * meta_vertices[:, 6]
                
            if np.random.random() > 0.5:
                # Flipping along the XZ plane
                point_cloud[:,1] = -1 * point_cloud[:,1]
                # target_bboxes[:,1] = -1 * target_bboxes[:,1]
                meta_vertices[:, 1] = -1 * meta_vertices[:, 1]
                meta_vertices[:, 6] = -1 * meta_vertices[:, 6]
            
            # Rotation along up-axis/Z-axis
            rot_angle = (np.random.random()*np.pi/18) - np.pi/36 # -5 ~ +5 degree
            rot_mat = pc_util.rotz(rot_angle).astype(np.float32)
            point_cloud[:,0:3] = np.dot(point_cloud[:,0:3], np.transpose(rot_mat))
            meta_vertices[:, :6] = rotate_aligned_boxes(meta_vertices[:, :6], rot_mat)
            meta_vertices[:, 6] += rot_angle
        
        # ------------------------------- Plane and point ------------------------------
        # compute votes *AFTER* augmentation
        # generate votes
        # Note: since there's no map between bbox instance labels and
        # pc instance_labels (it had been filtered 
        # in the data preparation step) we'll compute the instance bbox
        # from the points sharing the same instance label. 
        point_votes = np.zeros([self.num_points, 3])
        point_votes_mask = np.zeros(self.num_points)

        point_boundary_mask_z = np.zeros(self.num_points)
        point_boundary_mask_xy = np.zeros(self.num_points)
        point_boundary_offset_z = np.zeros([self.num_points, 3])
        point_boundary_offset_xy = np.zeros([self.num_points, 3])
        point_boundary_sem_z = np.zeros([self.num_points, 3+2+1])
        point_boundary_sem_xy = np.zeros([self.num_points, 3+1+1])

        point_line_mask = np.zeros(self.num_points)
        point_line_offset = np.zeros([self.num_points, 3])
        point_line_sem = np.zeros([self.num_points, 3+1])

        point_sem_label = np.zeros(self.num_points)
        
        selected_instances = []
        selected_centers = []
        selected_centers_support = []
        selected_centers_bsupport = []
        obj_meta = []

        counter = -1
        for i_instance in np.unique(instance_labels):            
            # find all points belong to that instance
            ind = np.where(instance_labels == i_instance)[0]

            if semantic_labels[ind[0]] in DC.nyu40ids:
                counter += 1
                idx_instance = counter
                x = point_cloud[ind,:3]
                ### Meta information here
                meta = meta_vertices[ind[0]]
                obj_meta.append(meta)
                
                ### Get the centroid here
                center = meta[:3]

                point_votes[ind, :] = center - x
                point_votes_mask[ind] = 1.0
                point_sem_label[ind] = DC.nyu40id2class_sem[meta[-1]]
                                
                ### Corners
                corners, xmin, ymin, zmin, xmax, ymax, zmax = params2bbox(center, meta[3], meta[4], meta[5], meta[6])
                
                ## Get lower four lines
                plane_lower_temp = np.array([0,0,1,-corners[6,-1]])
                para_points = np.array([corners[1], corners[3], corners[5], corners[7]])
                newd = np.sum(para_points * plane_lower_temp[:3], 1)
                if check_upright(para_points) and plane_lower_temp[0]+plane_lower_temp[1] < LOWER_THRESH:
                    plane_lower = np.array([0,0,1,plane_lower_temp[-1]]) 
                    plane_upper = np.array([0,0,1,-np.mean(newd)])
                else:
                    import pdb;pdb.set_trace()
                    print ("error with upright")
                if check_z(plane_upper, para_points) == False:
                    import pdb;pdb.set_trace()
                ### Get the boundary points here
                alldist = np.abs(np.sum(x*plane_lower[:3], 1) + plane_lower[-1])
                mind = np.min(alldist)
                sel = np.abs(alldist - mind) < DIST_THRESH
                
                ## Get lower four lines
                line_sel1, line_sel2, line_sel3, line_sel4 = get_linesel(x[sel], xmin, xmax, ymin, ymax)
                if np.sum(line_sel1) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel1]] = 1.0
                    linecenter = np.mean(x[sel][line_sel1], axis=0)
                    linecenter[1] = (ymin+ymax)/2.0
                    point_line_offset[ind[sel][line_sel1]] = linecenter - x[sel][line_sel1]
                    point_line_sem[ind[sel][line_sel1]] = np.array([linecenter[0], linecenter[1], linecenter[2], np.where(DC.nyu40ids == meta_vertices[ind[0],-1])[0][0]])
                if np.sum(line_sel2) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel2]] = 1.0
                    linecenter = np.mean(x[sel][line_sel2], axis=0)
                    linecenter[1] = (ymin+ymax)/2.0
                    point_line_offset[ind[sel][line_sel2]] = linecenter - x[sel][line_sel2]
                    point_line_sem[ind[sel][line_sel2]] = np.array([linecenter[0], linecenter[1], linecenter[2], np.where(DC.nyu40ids == meta_vertices[ind[0],-1])[0][0]])
                if np.sum(line_sel3) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel3]] = 1.0
                    linecenter = np.mean(x[sel][line_sel3], axis=0)
                    linecenter[0] = (xmin+xmax)/2.0
                    point_line_offset[ind[sel][line_sel3]] = linecenter - x[sel][line_sel3]
                    point_line_sem[ind[sel][line_sel3]] = np.array([linecenter[0], linecenter[1], linecenter[2], np.where(DC.nyu40ids == meta_vertices[ind[0],-1])[0][0]])
                if np.sum(line_sel4) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel4]] = 1.0
                    linecenter = np.mean(x[sel][line_sel4], axis=0)
                    linecenter[0] = (xmin+xmax)/2.0
                    point_line_offset[ind[sel][line_sel4]] = linecenter - x[sel][line_sel4]
                    point_line_sem[ind[sel][line_sel4]] = np.array([linecenter[0], linecenter[1], linecenter[2], np.where(DC.nyu40ids == meta_vertices[ind[0],-1])[0][0]])
                ### Set the surface labels here
                if np.sum(sel) > NUM_POINT and np.var(alldist[sel]) < VAR_THRESH:
                    center = np.array([(xmin+xmax)/2.0, (ymin+ymax)/2.0, np.mean(x[sel][:,2])])
                    sel_global = ind[sel]
                    point_boundary_mask_z[sel_global] = 1.0
                    point_boundary_sem_z[sel_global] = np.array([center[0], center[1], center[2], xmax - xmin, ymax - ymin, np.where(DC.nyu40ids == meta_vertices[ind[0],-1])[0][0]])
                    point_boundary_offset_z[sel_global] = center - x[sel]
                                    
                ### Get the boundary points here
                alldist = np.abs(np.sum(x*plane_upper[:3], 1) + plane_upper[-1])
                mind = np.min(alldist)
                sel = np.abs(alldist - mind) < DIST_THRESH
                ## Get upper four lines
                line_sel1, line_sel2, line_sel3, line_sel4 = get_linesel(x[sel], xmin, xmax, ymin, ymax)
                if np.sum(line_sel1) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel1]] = 1.0
                    linecenter = np.mean(x[sel][line_sel1], axis=0)
                    linecenter[1] = (ymin+ymax)/2.0
                    point_line_offset[ind[sel][line_sel1]] = linecenter - x[sel][line_sel1]
                    point_line_sem[ind[sel][line_sel1]] = np.array([linecenter[0], linecenter[1], linecenter[2], np.where(DC.nyu40ids == meta_vertices[ind[0],-1])[0][0]])
                if np.sum(line_sel2) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel2]] = 1.0
                    linecenter = np.mean(x[sel][line_sel2], axis=0)
                    linecenter[1] = (ymin+ymax)/2.0
                    point_line_offset[ind[sel][line_sel2]] = linecenter - x[sel][line_sel2]
                    point_line_sem[ind[sel][line_sel2]] = np.array([linecenter[0], linecenter[1], linecenter[2], np.where(DC.nyu40ids == meta_vertices[ind[0],-1])[0][0]])
                if np.sum(line_sel3) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel3]] = 1.0
                    linecenter = np.mean(x[sel][line_sel3], axis=0)
                    linecenter[0] = (xmin+xmax)/2.0
                    point_line_offset[ind[sel][line_sel3]] = linecenter - x[sel][line_sel3]
                    point_line_sem[ind[sel][line_sel3]] = np.array([linecenter[0], linecenter[1], linecenter[2], np.where(DC.nyu40ids == meta_vertices[ind[0],-1])[0][0]])
                if np.sum(line_sel4) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel4]] = 1.0
                    linecenter = np.mean(x[sel][line_sel4], axis=0)
                    linecenter[0] = (xmin+xmax)/2.0
                    point_line_offset[ind[sel][line_sel4]] = linecenter - x[sel][line_sel4]
                    point_line_sem[ind[sel][line_sel4]] = np.array([linecenter[0], linecenter[1], linecenter[2], np.where(DC.nyu40ids == meta_vertices[ind[0],-1])[0][0]])
                
                if np.sum(sel) > NUM_POINT and np.var(alldist[sel]) < VAR_THRESH:
                    center = np.array([(xmin+xmax)/2.0, (ymin+ymax)/2.0, np.mean(x[sel][:,2])])
                    sel_global = ind[sel]
                    point_boundary_mask_z[sel_global] = 1.0
                    point_boundary_sem_z[sel_global] = np.array([center[0], center[1], center[2], xmax - xmin, ymax - ymin, np.where(DC.nyu40ids == meta_vertices[ind[0],-1])[0][0]])
                    point_boundary_offset_z[sel_global] = center - x[sel]
                                    
                ## Get left two lines
                v1 = corners[3] - corners[2]
                v2 = corners[2] - corners[0]
                cp = np.cross(v1, v2)
                d = -np.dot(cp,corners[0])
                a,b,c = cp
                plane_left_temp = np.array([a, b, c, d])
                para_points = np.array([corners[4], corners[5], corners[6], corners[7]])
                ### Normalize xy here
                plane_left_temp /= np.linalg.norm(plane_left_temp[:3])
                newd = np.sum(para_points * plane_left_temp[:3], 1)
                if plane_left_temp[2] < LOWER_THRESH:
                    plane_left = plane_left_temp#np.array([cls,res,tempsign,plane_left_temp[-1]]) 
                    plane_right = np.array([plane_left_temp[0], plane_left_temp[1], plane_left_temp[2], -np.mean(newd)])
                else:
                    import pdb;pdb.set_trace()
                    print ("error with upright")
                ### Get the boundary points here
                alldist = np.abs(np.sum(x*plane_left[:3], 1) + plane_left[-1])
                mind = np.min(alldist)
                sel = np.abs(alldist - mind) < DIST_THRESH
                ## Get upper four lines
                line_sel1, line_sel2 = get_linesel2(x[sel], ymin, ymax, zmin, zmax, axis=1)
                if np.sum(line_sel1) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel1]] = 1.0
                    linecenter = np.mean(x[sel][line_sel1], axis=0)
                    linecenter[2] = (zmin+zmax)/2.0
                    point_line_offset[ind[sel][line_sel1]] = linecenter - x[sel][line_sel1]
                    point_line_sem[ind[sel][line_sel1]] = np.array([linecenter[0], linecenter[1], linecenter[2], np.where(DC.nyu40ids == meta_vertices[ind[0],-1])[0][0]])
                if np.sum(line_sel2) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel2]] = 1.0
                    linecenter = np.mean(x[sel][line_sel2], axis=0)
                    linecenter[2] = (zmin+zmax)/2.0
                    point_line_offset[ind[sel][line_sel2]] = linecenter - x[sel][line_sel2]
                    point_line_sem[ind[sel][line_sel2]] = np.array([linecenter[0], linecenter[1], linecenter[2], np.where(DC.nyu40ids == meta_vertices[ind[0],-1])[0][0]])
                if np.sum(sel) > NUM_POINT and np.var(alldist[sel]) < VAR_THRESH:
                    center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0])
                    sel_global = ind[sel]
                    point_boundary_mask_xy[sel_global] = 1.0
                    point_boundary_sem_xy[sel_global] = np.array([center[0], center[1], center[2], zmax - zmin, np.where(DC.nyu40ids == meta_vertices[ind[0],-1])[0][0]])
                    point_boundary_offset_xy[sel_global] = center - x[sel]
                    
                ### Get the boundary points here
                alldist = np.abs(np.sum(x*plane_right[:3], 1) + plane_right[-1])
                mind = np.min(alldist)
                sel = np.abs(alldist - mind) < DIST_THRESH
                line_sel1, line_sel2 = get_linesel2(x[sel], ymin, ymax,  zmin, zmax, axis=1)
                if np.sum(line_sel1) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel1]] = 1.0
                    linecenter = np.mean(x[sel][line_sel1], axis=0)
                    linecenter[2] = (zmin+zmax)/2.0
                    point_line_offset[ind[sel][line_sel1]] = linecenter - x[sel][line_sel1]
                    point_line_sem[ind[sel][line_sel1]] = np.array([linecenter[0], linecenter[1], linecenter[2], np.where(DC.nyu40ids == meta_vertices[ind[0],-1])[0][0]])
                if np.sum(line_sel2) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel2]] = 1.0
                    linecenter = np.mean(x[sel][line_sel2], axis=0)
                    linecenter[2] = (zmin+zmax)/2.0
                    point_line_offset[ind[sel][line_sel2]] = linecenter - x[sel][line_sel2]
                    point_line_sem[ind[sel][line_sel2]] = np.array([linecenter[0], linecenter[1], linecenter[2], np.where(DC.nyu40ids == meta_vertices[ind[0],-1])[0][0]])
                if np.sum(sel) > NUM_POINT and np.var(alldist[sel]) < VAR_THRESH:
                    center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0])
                    sel_global = ind[sel]
                    point_boundary_mask_xy[sel_global] = 1.0
                    point_boundary_sem_xy[sel_global] = np.array([center[0], center[1], center[2], zmax - zmin, np.where(DC.nyu40ids == meta_vertices[ind[0],-1])[0][0]])
                    point_boundary_offset_xy[sel_global] = center - x[sel]
                                        
                ### Get the boundary points here
                v1 = corners[0] - corners[4]
                v2 = corners[4] - corners[5]
                cp = np.cross(v1, v2)
                d = -np.dot(cp,corners[5])
                a,b,c = cp
                plane_front_temp = np.array([a, b, c, d])
                para_points = np.array([corners[2], corners[3], corners[6], corners[7]])
                plane_front_temp /= np.linalg.norm(plane_front_temp[:3])
                newd = np.sum(para_points * plane_front_temp[:3], 1)
                if plane_front_temp[2] < LOWER_THRESH:
                    plane_front = plane_front_temp
                    plane_back = np.array([plane_front_temp[0], plane_front_temp[1], plane_front_temp[2], -np.mean(newd)])
                else:
                    import pdb;pdb.set_trace()
                    print ("error with upright")
                ### Get the boundary points here
                alldist = np.abs(np.sum(x*plane_front[:3], 1) + plane_front[-1])
                mind = np.min(alldist)
                sel = np.abs(alldist - mind) < DIST_THRESH
                if np.sum(sel) > NUM_POINT and np.var(alldist[sel]) < VAR_THRESH:
                    center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0])
                    sel_global = ind[sel]
                    point_boundary_mask_xy[sel_global] = 1.0
                    point_boundary_sem_xy[sel_global] = np.array([center[0], center[1], center[2], zmax - zmin, np.where(DC.nyu40ids == meta_vertices[ind[0],-1])[0][0]])
                    point_boundary_offset_xy[sel_global] = center - x[sel]
                                    
                ### Get the boundary points here
                alldist = np.abs(np.sum(x*plane_back[:3], 1) + plane_back[-1])
                mind = np.min(alldist)
                sel = np.abs(alldist - mind) < DIST_THRESH
                if np.sum(sel) > NUM_POINT and np.var(alldist[sel]) < VAR_THRESH:
                    center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0])
                    sel_global = ind[sel]
                    point_boundary_mask_xy[sel_global] = 1.0
                    point_boundary_sem_xy[sel_global] = np.array([center[0], center[1], center[2], zmax - zmin, np.where(DC.nyu40ids == meta_vertices[ind[0],-1])[0][0]])
                    point_boundary_offset_xy[sel_global] = center - x[sel]
                    
        num_instance = len(obj_meta)
        obj_meta = np.array(obj_meta)
        obj_meta = obj_meta.reshape(-1, 9)

        target_bboxes_mask[0:num_instance] = 1
        target_bboxes[0:num_instance,:6] = obj_meta[:,0:6]
        
        class_ind = [np.where(DC.nyu40ids == x)[0][0] for x in obj_meta[:,-1]]   
        # NOTE: set size class as semantic class. Consider use size2class.
        size_classes[0:num_instance] = class_ind
        size_residuals[0:num_instance, :] = \
                                            target_bboxes[0:num_instance, 3:6] - DC.mean_size_arr[class_ind,:]
        
        point_votes = np.tile(point_votes, (1, 3)) # make 3 votes identical
        point_sem_label = np.tile(np.expand_dims(point_sem_label, -1), (1, 3)) # make 3 votes identical

        ret_dict = {}
                
        ret_dict['point_clouds'] = point_cloud.astype(np.float32)
        ret_dict['center_label'] = target_bboxes.astype(np.float32)[:,0:3]
        ret_dict['size_label'] = target_bboxes.astype(np.float32)[:,3:6]
        ret_dict['heading_label'] = angle_label.astype(np.float32)
        ret_dict['heading_class_label'] = angle_classes.astype(np.int64)
        ret_dict['heading_residual_label'] = angle_residuals.astype(np.float32)
        ret_dict['size_class_label'] = size_classes.astype(np.int64)
        ret_dict['size_residual_label'] = size_residuals.astype(np.float32)

        if self.use_height:
            ret_dict['floor_height'] = floor_height
        
        target_bboxes_semcls = np.zeros((MAX_NUM_OBJ))                                
        target_bboxes_semcls[0:num_instance] = \
            [DC.nyu40id2class[x] for x in obj_meta[:,-1][0:obj_meta.shape[0]]]                
        ret_dict['sem_cls_label'] = target_bboxes_semcls.astype(np.int64)

        ret_dict['point_sem_cls_label'] = point_sem_label.astype(np.int64)
        ret_dict['box_label_mask'] = target_bboxes_mask.astype(np.float32)

        ret_dict['point_boundary_mask_z'] = point_boundary_mask_z.astype(np.float32)
        ret_dict['point_boundary_mask_xy'] = point_boundary_mask_xy.astype(np.float32)
        ret_dict['point_boundary_offset_z'] = point_boundary_offset_z.astype(np.float32)
        ret_dict['point_boundary_offset_xy'] = point_boundary_offset_xy.astype(np.float32)
        ret_dict['point_boundary_sem_z'] = point_boundary_sem_z.astype(np.float32)
        ret_dict['point_boundary_sem_xy'] = point_boundary_sem_xy.astype(np.float32)

        ret_dict['point_line_mask'] = point_line_mask.astype(np.float32)
        ret_dict['point_line_offset'] = point_line_offset.astype(np.float32)
        ret_dict['point_line_sem'] = point_line_sem.astype(np.float32)
        
        ret_dict['vote_label'] = point_votes.astype(np.float32)
        ret_dict['vote_label_mask'] = point_votes_mask.astype(np.int64)
        
        ret_dict['scan_idx'] = np.array(idx).astype(np.int64)
        ret_dict['pcl_color'] = pcl_color
        ret_dict['num_instance'] = num_instance
        
        return ret_dict
Exemplo n.º 11
0
    def __getitem__(self, idx):
        """
        Returns a dict with following keys:
            point_clouds: (N,3+C)
            scan_idx: int scan index in scan_names list
        """
        scan_name = self.scan_names[idx]
        mesh_vertices = np.load(
            os.path.join(self.data_path, scan_name) + '_pc.npz')['pc']  # Nx6

        if not self.use_color:
            raw_point_cloud = mesh_vertices[:, 0:3]  # do not use color for now
        else:
            raw_point_cloud = mesh_vertices[:, 0:6]
            raw_point_cloud[:, 3:] = (raw_point_cloud[:, 3:] -
                                      MEAN_COLOR_RGB) / 256.0

        if self.use_height:
            floor_height = np.percentile(raw_point_cloud[:, 2], 0.99)
            height = raw_point_cloud[:, 2] - floor_height
            raw_point_cloud = np.concatenate(
                [raw_point_cloud, np.expand_dims(height, 1)], 1)

        point_cloud, choices = pc_util.random_sampling(raw_point_cloud,
                                                       self.num_points,
                                                       return_choices=True)
        #ema_point_cloud = pc_util.random_sampling(raw_point_cloud, self.num_points, return_choices=False)
        ema_point_cloud = point_cloud.copy()  # 2021.2.28
        # ------------------------------- DATA AUGMENTATION ------------------------------
        flip_x_axis = 0
        flip_y_axis = 0
        flip_x_axis_ema = 0  # 2021.2.28
        flip_y_axis_ema = 0  # 2021.2.28
        rot_mat = np.identity(3)
        scale_ratio = np.ones((1, 3))
        if self.augment:
            if np.random.random() > 0.5:
                # Flipping along the YZ plane
                flip_x_axis = 1
                point_cloud[:, 0] = -1 * point_cloud[:, 0]

            if np.random.random() > 0.5:  # 2021.2.28
                # Flipping along the YZ plane
                flip_x_axis_ema = 1
                ema_point_cloud[:, 0] = -1 * ema_point_cloud[:, 0]

            if np.random.random() > 0.5:
                # Flipping along the XZ plane
                flip_y_axis = 1
                point_cloud[:, 1] = -1 * point_cloud[:, 1]

            if np.random.random() > 0.5:  # 2021.2.28
                # Flipping along the XZ plane
                flip_y_axis_ema = 1
                ema_point_cloud[:, 1] = -1 * ema_point_cloud[:, 1]

            # Rotation along up-axis/Z-axis
            rot_angle = (np.random.random() * np.pi /
                         3) - np.pi / 6  # -30 ~ +30 degree
            rot_mat = pc_util.rotz(rot_angle)
            point_cloud[:, 0:3] = np.dot(point_cloud[:, 0:3],
                                         np.transpose(rot_mat))

            # Augment point cloud scale: 0.85x-1.15x
            scale_ratio = np.random.random() * 0.3 + 0.85
            scale_ratio = np.expand_dims(np.tile(scale_ratio, 3), 0)
            point_cloud[:, 0:3] *= scale_ratio
            if self.use_height:
                point_cloud[:, -1] *= scale_ratio[0, 0]

        ret_dict = {}
        ret_dict['point_clouds'] = point_cloud.astype(np.float32)
        ret_dict['scan_idx'] = np.array(idx).astype(np.int64)
        ret_dict['supervised_mask'] = np.array(0).astype(np.int64)

        ret_dict['ema_point_clouds'] = ema_point_cloud.astype(np.float32)
        ret_dict['flip_x_axis'] = np.array(flip_x_axis).astype(np.int64)
        ret_dict['flip_y_axis'] = np.array(flip_y_axis).astype(np.int64)
        ret_dict['rot_mat'] = rot_mat.astype(np.float32)
        ret_dict['scale'] = np.array(scale_ratio).astype(np.float32)

        ret_dict['flip_x_axis_ema'] = np.array(flip_x_axis_ema).astype(
            np.int64)  # 2021.2.28
        ret_dict['flip_y_axis_ema'] = np.array(flip_y_axis_ema).astype(
            np.int64)  # 2021.2.28

        return ret_dict
Exemplo n.º 12
0
def preprocess_point_cloud(point_cloud):
    ''' Prepare the numpy point cloud (N,3) for forward pass '''
    point_cloud = point_cloud[:, 0:3]  # do not use color for now
    point_cloud = random_sampling(point_cloud, FLAGS.num_point).reshape(1, -1, 3)
    return point_cloud
Exemplo n.º 13
0
def data_viz(data_dir, dump_dir=os.path.join(BASE_DIR, 'data_viz_dump')):
    ''' Examine and visualize SUN RGB-D data. '''
    sunrgbd = sunrgbd_object(data_dir)
    idxs = np.array(range(1, len(sunrgbd) + 1))
    np.random.seed(0)
    np.random.shuffle(idxs)
    for idx in range(len(sunrgbd)):
        data_idx = idxs[idx]
        print('-' * 10, 'data index: ', data_idx)
        pc = sunrgbd.get_depth(data_idx)
        print('Point cloud shape:', pc.shape)

        # Project points to image
        calib = sunrgbd.get_calibration(data_idx)
        uv, d = calib.project_upright_depth_to_image(pc[:, 0:3])
        print('Point UV:', uv)
        print('Point depth:', d)

        import matplotlib.pyplot as plt
        cmap = plt.cm.get_cmap('hsv', 256)
        cmap = np.array([cmap(i) for i in range(256)])[:, :3] * 255

        img = sunrgbd.get_image(data_idx)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        for i in range(uv.shape[0]):
            depth = d[i]
            color = cmap[int(120.0 / depth), :]
            cv2.circle(img, (int(np.round(uv[i, 0])), int(np.round(uv[i, 1]))),
                       2,
                       color=tuple(color),
                       thickness=-1)
        if not os.path.exists(dump_dir):
            os.mkdir(dump_dir)
        Image.fromarray(img).save(os.path.join(dump_dir, 'img_depth.jpg'))

        # Load box labels
        objects = sunrgbd.get_label_objects(data_idx)
        print('Objects:', objects)

        # Draw 2D boxes on image
        img = sunrgbd.get_image(data_idx)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        for i, obj in enumerate(objects):
            cv2.rectangle(img, (int(obj.xmin), int(obj.ymin)),
                          (int(obj.xmax), int(obj.ymax)), (0, 255, 0), 2)
            cv2.putText(img, '%d %s' % (i, obj.classname),
                        (max(int(obj.xmin), 15), max(int(obj.ymin), 15)),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 0, 0), 2)
        Image.fromarray(img).save(os.path.join(dump_dir, 'img_box2d.jpg'))

        # Dump OBJ files for the colored point cloud
        for num_point in [10000, 20000, 40000, 80000]:
            sampled_pcrgb = pc_util.random_sampling(pc, num_point)
            pc_util.write_ply_rgb(
                sampled_pcrgb[:, 0:3],
                (sampled_pcrgb[:, 3:] * 256).astype(np.int8),
                os.path.join(dump_dir, 'pcrgb_%dk.obj' % (num_point // 1000)))
        # Dump OBJ files for 3D bounding boxes
        # l,w,h correspond to dx,dy,dz
        # heading angle is from +X rotating towards -Y
        # (+X is degree, -Y is 90 degrees)
        oriented_boxes = []
        for obj in objects:
            obb = np.zeros((7))
            obb[0:3] = obj.centroid
            # Some conversion to map with default setting of w,l,h
            # and angle in box dumping
            obb[3:6] = np.array([obj.l, obj.w, obj.h]) * 2
            obb[6] = -1 * obj.heading_angle
            print('Object cls, heading, l, w, h:',\
                 obj.classname, obj.heading_angle, obj.l, obj.w, obj.h)
            oriented_boxes.append(obb)
        if len(oriented_boxes) > 0:
            oriented_boxes = np.vstack(tuple(oriented_boxes))
            pc_util.write_oriented_bbox(oriented_boxes,
                                        os.path.join(dump_dir, 'obbs.ply'))
        else:
            print('-' * 30)
            continue

        # Draw 3D boxes on depth points
        box3d = []
        ori3d = []
        for obj in objects:
            corners_3d_image, corners_3d = sunrgbd_utils.compute_box_3d(
                obj, calib)
            ori_3d_image, ori_3d = sunrgbd_utils.compute_orientation_3d(
                obj, calib)
            print('Corners 3D: ', corners_3d)
            box3d.append(corners_3d)
            ori3d.append(ori_3d)
        pc_box3d = np.concatenate(box3d, 0)
        pc_ori3d = np.concatenate(ori3d, 0)
        print(pc_box3d.shape)
        print(pc_ori3d.shape)
        pc_util.write_ply(pc_box3d, os.path.join(dump_dir,
                                                 'box3d_corners.ply'))
        pc_util.write_ply(pc_ori3d, os.path.join(dump_dir, 'box3d_ori.ply'))
        print('-' * 30)
        print('Point clouds and bounding boxes saved to PLY files under %s' %
              (dump_dir))
        print('Type anything to continue to the next sample...')
        input()
Exemplo n.º 14
0
    def __getitem__(self, idx):
        """
        Returns a dict with following keys:
            point_clouds: (N,3+C)
            center_label: (MAX_NUM_OBJ,3) for GT box center XYZ
            sem_cls_label: (MAX_NUM_OBJ,) semantic class index
            angle_class_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_HEADING_BIN-1
            angle_residual_label: (MAX_NUM_OBJ,)
            size_classe_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_SIZE_CLUSTER
            size_residual_label: (MAX_NUM_OBJ,3)
            box_label_mask: (MAX_NUM_OBJ) as 0/1 with 1 indicating a unique box
            point_votes: (N,3) with votes XYZ
            point_votes_mask: (N,) with 0/1 with 1 indicating the point is in one of the object's OBB.
            scan_idx: int scan index in scan_names list
            pcl_color: unused
        """

        scan_name = self.scan_names[idx]
        mesh_vertices = np.load(
            os.path.join(self.data_path, scan_name) + '_vert.npy')
        instance_labels = np.load(
            os.path.join(self.data_path, scan_name) + '_ins_label.npy')
        semantic_labels = np.load(
            os.path.join(self.data_path, scan_name) + '_sem_label.npy')
        instance_bboxes = np.load(
            os.path.join(self.data_path, scan_name) + '_bbox.npy')

        if not self.use_color:
            point_cloud = mesh_vertices[:, 0:3]  # do not use color for now
            pcl_color = mesh_vertices[:, 3:6]
        else:
            point_cloud = mesh_vertices[:, 0:6]
            point_cloud[:, 3:] = (point_cloud[:, 3:] - MEAN_COLOR_RGB) / 256.0

        if self.use_height:
            floor_height = np.percentile(point_cloud[:, 2], 0.99)
            height = point_cloud[:, 2] - floor_height
            point_cloud = np.concatenate(
                [point_cloud, np.expand_dims(height, 1)], 1)

        # ------------------------------- LABELS ------------------------------
        target_bboxes = np.zeros((MAX_NUM_OBJ, 6))
        target_bboxes_mask = np.zeros((MAX_NUM_OBJ))
        angle_classes = np.zeros((MAX_NUM_OBJ, ))
        angle_residuals = np.zeros((MAX_NUM_OBJ, ))
        size_classes = np.zeros((MAX_NUM_OBJ, ))
        size_residuals = np.zeros((MAX_NUM_OBJ, 3))

        point_cloud, choices = pc_util.random_sampling(point_cloud,
                                                       self.num_points,
                                                       return_choices=True)
        instance_labels = instance_labels[choices]
        semantic_labels = semantic_labels[choices]

        pcl_color = pcl_color[choices]

        target_bboxes_mask[0:instance_bboxes.shape[0]] = 1
        target_bboxes[0:instance_bboxes.shape[0], :] = instance_bboxes[:, 0:6]

        # ------------------------------- DATA AUGMENTATION ------------------------------
        if self.augment:
            if np.random.random() > 0.5:
                # Flipping along the YZ plane
                point_cloud[:, 0] = -1 * point_cloud[:, 0]
                target_bboxes[:, 0] = -1 * target_bboxes[:, 0]

            if np.random.random() > 0.5:
                # Flipping along the XZ plane
                point_cloud[:, 1] = -1 * point_cloud[:, 1]
                target_bboxes[:, 1] = -1 * target_bboxes[:, 1]

            # Rotation along up-axis/Z-axis
            rot_angle = (np.random.random() * np.pi /
                         18) - np.pi / 36  # -5 ~ +5 degree
            rot_mat = pc_util.rotz(rot_angle)
            point_cloud[:, 0:3] = np.dot(point_cloud[:, 0:3],
                                         np.transpose(rot_mat))
            target_bboxes = rotate_aligned_boxes(target_bboxes, rot_mat)

        # compute votes *AFTER* augmentation
        # generate votes
        # Note: since there's no map between bbox instance labels and
        # pc instance_labels (it had been filtered
        # in the data preparation step) we'll compute the instance bbox
        # from the points sharing the same instance label.
        point_votes = np.zeros([self.num_points, 3])
        point_votes_mask = np.zeros(self.num_points)
        for i_instance in np.unique(instance_labels):
            # find all points belong to that instance
            ind = np.where(instance_labels == i_instance)[0]
            # find the semantic label
            if semantic_labels[ind[0]] in DC.nyu40ids:
                x = point_cloud[ind, :3]
                center = 0.5 * (x.min(0) + x.max(0))
                point_votes[ind, :] = center - x
                point_votes_mask[ind] = 1.0
        point_votes = np.tile(point_votes, (1, 3))  # make 3 votes identical

        class_ind = [
            np.where(DC.nyu40ids == x)[0][0] for x in instance_bboxes[:, -1]
        ]
        # NOTE: set size class as semantic class. Consider use size2class.
        size_classes[0:instance_bboxes.shape[0]] = class_ind
        size_residuals[0:instance_bboxes.shape[0], :] = \
            target_bboxes[0:instance_bboxes.shape[0], 3:6] - DC.mean_size_arr[class_ind,:]

        # keep the same nums of points for each cloud
        mesh_vertices, _ = pc_util.random_sampling(mesh_vertices,
                                                   50000,
                                                   return_choices=True)

        ret_dict = {}
        ret_dict['mesh_vertices'] = mesh_vertices.astype(np.float32)
        ret_dict['point_clouds'] = point_cloud.astype(np.float32)
        ret_dict['center_label'] = target_bboxes.astype(np.float32)[:, 0:3]
        ret_dict['heading_class_label'] = angle_classes.astype(np.int64)
        ret_dict['heading_residual_label'] = angle_residuals.astype(np.float32)
        ret_dict['size_class_label'] = size_classes.astype(np.int64)
        ret_dict['size_residual_label'] = size_residuals.astype(np.float32)
        target_bboxes_semcls = np.zeros((MAX_NUM_OBJ))
        target_bboxes_semcls[0:instance_bboxes.shape[0]] = \
            [DC.nyu40id2class[x] for x in instance_bboxes[:,-1][0:instance_bboxes.shape[0]]]
        ret_dict['sem_cls_label'] = target_bboxes_semcls.astype(np.int64)
        ret_dict['box_label_mask'] = target_bboxes_mask.astype(np.float32)
        ret_dict['vote_label'] = point_votes.astype(np.float32)
        ret_dict['vote_label_mask'] = point_votes_mask.astype(np.int64)
        ret_dict['scan_idx'] = np.array(idx).astype(np.int64)
        ret_dict['pcl_color'] = pcl_color
        return ret_dict
Exemplo n.º 15
0
    def __getitem__(self, idx):
        """
        Returns a dict with following keys:
            point_clouds: (N,3+C)
            center_label: (MAX_NUM_OBJ,3) for GT box center XYZ
            heading_class_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_HEADING_BIN-1
            heading_residual_label: (MAX_NUM_OBJ,)
            size_classe_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_SIZE_CLUSTER
            size_residual_label: (MAX_NUM_OBJ,3)
            sem_cls_label: (MAX_NUM_OBJ,) semantic class index
            box_label_mask: (MAX_NUM_OBJ) as 0/1 with 1 indicating a unique box
            vote_label: (N,9) with votes XYZ (3 votes: X1Y1Z1, X2Y2Z2, X3Y3Z3)
                if there is only one vote than X1==X2==X3 etc.
            vote_label_mask: (N,) with 0/1 with 1 indicating the point
                is in one of the object's OBB.
            scan_idx: int scan index in scan_names list
            max_gt_bboxes: unused
        """
        scan_name = self.scan_names[idx]
        point_cloud = np.load(os.path.join(self.data_path, scan_name)+'_pc.npz')['pc'] # Nx6
        bboxes = np.load(os.path.join(self.data_path, scan_name)+'_bbox.npy') # K,8
        point_votes = np.load(os.path.join(self.data_path, scan_name)+'_votes.npz')['point_votes'] # Nx10
        if self.use_imvote:
            # Read camera parameters
            calib_lines = [line for line in open(os.path.join(self.raw_data_path, 'calib', scan_name+'.txt')).readlines()]
            calib_Rtilt = np.reshape(np.array([float(x) for x in calib_lines[0].rstrip().split(' ')]), (3,3), 'F')
            calib_K = np.reshape(np.array([float(x) for x in calib_lines[1].rstrip().split(' ')]), (3,3), 'F')
            # Read image
            full_img = sunrgbd_utils.load_image(os.path.join(self.raw_data_path, 'image', scan_name+'.jpg'))
            full_img_height = full_img.shape[0]
            full_img_width = full_img.shape[1]
            
            # ------------------------------- 2D IMAGE VOTES ------------------------------
            cls_id_list = self.cls_id_map[scan_name]
            cls_score_list = self.cls_score_map[scan_name]
            bbox_2d_list = self.bbox_2d_map[scan_name]
            obj_img_list = []
            for i2d, (cls2d, box2d) in enumerate(zip(cls_id_list, bbox_2d_list)):
                xmin, ymin, xmax, ymax = box2d
                # During training we randomly drop 2D boxes to reduce over-fitting
                if self.train and np.random.random()>0.5:
                    continue

                obj_img = full_img[ymin:ymax, xmin:xmax, :]
                obj_h = obj_img.shape[0]
                obj_w = obj_img.shape[1]
                # Bounding box coordinates (4 values), class id, index to the semantic cues
                meta_data = (xmin, ymin, obj_h, obj_w, cls2d, i2d)
                if obj_h == 0 or obj_w == 0:
                    continue

                # Use 2D box center as approximation
                uv_centroid = np.array([int(obj_w/2), int(obj_h/2)])
                uv_centroid = np.expand_dims(uv_centroid, 0)

                v_coords, u_coords = np.meshgrid(range(obj_h), range(obj_w), indexing='ij')
                img_vote = np.transpose(np.array([u_coords, v_coords]), (1,2,0))
                img_vote = np.expand_dims(uv_centroid, 0) - img_vote 

                obj_img_list.append((meta_data, img_vote))

            full_img_votes = np.zeros((full_img_height,full_img_width,self.vote_dims), dtype=np.float32)
            # Empty votes: 2d box index is set to -1
            full_img_votes[:,:,3::4] = -1.

            for obj_img_data in obj_img_list:
                meta_data, img_vote = obj_img_data
                u0, v0, h, w, cls2d, i2d = meta_data
                for u in range(u0, u0+w):
                    for v in range(v0, v0+h):
                        iidx = int(full_img_votes[v,u,0])
                        if iidx >= self.max_imvote_per_pixel: 
                            continue
                        full_img_votes[v,u,(1+iidx*4):(1+iidx*4+2)] = img_vote[v-v0,u-u0,:]
                        full_img_votes[v,u,(1+iidx*4+2)] = cls2d
                        full_img_votes[v,u,(1+iidx*4+3)] = i2d + 1 # add +1 here as we need a dummy feature for pixels outside all boxes
                full_img_votes[v0:(v0+h), u0:(u0+w), 0] += 1

            full_img_votes_1d = np.zeros((MAX_NUM_PIXEL*self.vote_dims), dtype=np.float32)
            full_img_votes_1d[0:full_img_height*full_img_width*self.vote_dims] = full_img_votes.flatten()

            # Semantic cues: one-hot vector for class scores
            cls_score_feats = np.zeros((1+MAX_NUM_2D_DET,NUM_CLS), dtype=np.float32)
            # First row is dumpy feature
            len_obj = len(cls_id_list)
            if len_obj:
                ind_obj = np.arange(1,len_obj+1)
                ind_cls = np.array(cls_id_list)
                cls_score_feats[ind_obj, ind_cls] = np.array(cls_score_list)

            # Texture cues: normalized RGB values
            full_img = (full_img - 128.) / 255.
            # Serialize data to 1D and save image size so that we can recover the original location in the image
            full_img_1d = np.zeros((MAX_NUM_PIXEL*3), dtype=np.float32)
            full_img_1d[:full_img_height*full_img_width*3] = full_img.flatten()

        if not self.use_color:
            point_cloud = point_cloud[:,0:3]
        else:
            point_cloud = point_cloud[:,0:6]
            point_cloud[:,3:] = (point_cloud[:,3:]-MEAN_COLOR_RGB)

        if self.use_height:
            floor_height = np.percentile(point_cloud[:,2],0.99)
            height = point_cloud[:,2] - floor_height
            point_cloud = np.concatenate([point_cloud, np.expand_dims(height, 1)],1) # (N,4) or (N,7)

        # ------------------------------- DATA AUGMENTATION ------------------------------
        scale_ratio = 1.
        if self.augment:
            flip_flag = (np.random.random()>0.5)
            if flip_flag:
                # Flipping along the YZ plane
                point_cloud[:,0] = -1 * point_cloud[:,0]
                bboxes[:,0] = -1 * bboxes[:,0]
                bboxes[:,6] = np.pi - bboxes[:,6]
                point_votes[:,[1,4,7]] = -1 * point_votes[:,[1,4,7]]

            # Rotation along up-axis/Z-axis
            rot_angle = (np.random.random()*np.pi/3) - np.pi/6 # -30 ~ +30 degree
            rot_mat = sunrgbd_utils.rotz(rot_angle)

            point_votes_end = np.zeros_like(point_votes)
            point_votes_end[:,1:4] = np.dot(point_cloud[:,0:3] + point_votes[:,1:4], np.transpose(rot_mat))
            point_votes_end[:,4:7] = np.dot(point_cloud[:,0:3] + point_votes[:,4:7], np.transpose(rot_mat))
            point_votes_end[:,7:10] = np.dot(point_cloud[:,0:3] + point_votes[:,7:10], np.transpose(rot_mat))

            point_cloud[:,0:3] = np.dot(point_cloud[:,0:3], np.transpose(rot_mat))
            bboxes[:,0:3] = np.dot(bboxes[:,0:3], np.transpose(rot_mat))
            bboxes[:,6] -= rot_angle
            point_votes[:,1:4] = point_votes_end[:,1:4] - point_cloud[:,0:3]
            point_votes[:,4:7] = point_votes_end[:,4:7] - point_cloud[:,0:3]
            point_votes[:,7:10] = point_votes_end[:,7:10] - point_cloud[:,0:3]

            if self.use_imvote:
                R_inverse = np.copy(np.transpose(rot_mat))
                if flip_flag:
                    R_inverse[0,:] *= -1
                # Update Rtilt according to the augmentation
                # R_inverse (3x3) * point (3x1) transforms an augmented depth point
                # to original point in upright_depth coordinates
                calib_Rtilt = np.dot(np.transpose(R_inverse), calib_Rtilt) 

            # Augment RGB color
            if self.use_color:
                rgb_color = point_cloud[:,3:6] + MEAN_COLOR_RGB
                rgb_color *= (1+0.4*np.random.random(3)-0.2) # brightness change for each channel
                rgb_color += (0.1*np.random.random(3)-0.05) # color shift for each channel
                rgb_color += np.expand_dims((0.05*np.random.random(point_cloud.shape[0])-0.025), -1) # jittering on each pixel
                rgb_color = np.clip(rgb_color, 0, 1)
                # randomly drop out 30% of the points' colors
                rgb_color *= np.expand_dims(np.random.random(point_cloud.shape[0])>0.3,-1)
                point_cloud[:,3:6] = rgb_color - MEAN_COLOR_RGB

            # Augment point cloud scale: 0.85x-1.15x
            scale_ratio = np.random.random()*0.3+0.85
            if self.use_imvote:
                calib_Rtilt = np.dot(np.array([[scale_ratio,0,0],[0,scale_ratio,0],[0,0,scale_ratio]]), calib_Rtilt)
            scale_ratio_expand = np.expand_dims(np.tile(scale_ratio,3),0)
            point_cloud[:,0:3] *= scale_ratio_expand
            bboxes[:,0:3] *= scale_ratio_expand
            bboxes[:,3:6] *= scale_ratio_expand
            point_votes[:,1:4] *= scale_ratio_expand
            point_votes[:,4:7] *= scale_ratio_expand
            point_votes[:,7:10] *= scale_ratio_expand
            if self.use_height:
                point_cloud[:,-1] *= scale_ratio

        # ------------------------------- LABELS ------------------------------
        box3d_centers = np.zeros((MAX_NUM_OBJ, 3))
        box3d_sizes = np.zeros((MAX_NUM_OBJ, 3))
        angle_classes = np.zeros((MAX_NUM_OBJ,))
        angle_residuals = np.zeros((MAX_NUM_OBJ,))
        size_classes = np.zeros((MAX_NUM_OBJ,))
        size_residuals = np.zeros((MAX_NUM_OBJ, 3))
        label_mask = np.zeros((MAX_NUM_OBJ))
        label_mask[0:bboxes.shape[0]] = 1
        max_bboxes = np.zeros((MAX_NUM_OBJ, 8))
        max_bboxes[0:bboxes.shape[0],:] = bboxes

        for i in range(bboxes.shape[0]):
            bbox = bboxes[i]
            semantic_class = bbox[7]
            box3d_center = bbox[0:3]
            angle_class, angle_residual = DC.angle2class(bbox[6])
            # NOTE: The mean size stored in size2class is of full length of box edges,
            # while in sunrgbd_data.py data dumping we dumped *half* length l,w,h.. so have to time it by 2 here 
            box3d_size = bbox[3:6]*2
            size_class, size_residual = DC.size2class(box3d_size, DC.class2type[semantic_class])
            box3d_centers[i,:] = box3d_center
            angle_classes[i] = angle_class
            angle_residuals[i] = angle_residual
            size_classes[i] = size_class
            size_residuals[i] = size_residual
            box3d_sizes[i,:] = box3d_size

        target_bboxes_mask = label_mask 
        target_bboxes = np.zeros((MAX_NUM_OBJ, 6))
        for i in range(bboxes.shape[0]):
            bbox = bboxes[i]
            corners_3d = sunrgbd_utils.my_compute_box_3d(bbox[0:3], bbox[3:6], bbox[6])
            # compute axis aligned box
            xmin = np.min(corners_3d[:,0])
            ymin = np.min(corners_3d[:,1])
            zmin = np.min(corners_3d[:,2])
            xmax = np.max(corners_3d[:,0])
            ymax = np.max(corners_3d[:,1])
            zmax = np.max(corners_3d[:,2])
            target_bbox = np.array([(xmin+xmax)/2, (ymin+ymax)/2, (zmin+zmax)/2, xmax-xmin, ymax-ymin, zmax-zmin])
            target_bboxes[i,:] = target_bbox

        point_cloud, choices = pc_util.random_sampling(point_cloud, self.num_points, return_choices=True)
        point_votes_mask = point_votes[choices,0]
        point_votes = point_votes[choices,1:]

        ret_dict = {}
        ret_dict['point_clouds'] = point_cloud.astype(np.float32)
        ret_dict['center_label'] = target_bboxes.astype(np.float32)[:,0:3]
        ret_dict['heading_class_label'] = angle_classes.astype(np.int64)
        ret_dict['heading_residual_label'] = angle_residuals.astype(np.float32)
        ret_dict['size_class_label'] = size_classes.astype(np.int64)
        ret_dict['size_residual_label'] = size_residuals.astype(np.float32)
        target_bboxes_semcls = np.zeros((MAX_NUM_OBJ))
        target_bboxes_semcls[0:bboxes.shape[0]] = bboxes[:,-1] # from 0 to 9
        ret_dict['sem_cls_label'] = target_bboxes_semcls.astype(np.int64)
        ret_dict['box_label_mask'] = target_bboxes_mask.astype(np.float32)
        ret_dict['vote_label'] = point_votes.astype(np.float32)
        ret_dict['vote_label_mask'] = point_votes_mask.astype(np.int64)
        ret_dict['scan_idx'] = np.array(idx).astype(np.int64)
        ret_dict['max_gt_bboxes'] = max_bboxes
        if self.use_imvote:
            ret_dict['scale'] = np.array(scale_ratio).astype(np.float32)
            ret_dict['calib_Rtilt'] = calib_Rtilt.astype(np.float32)
            ret_dict['calib_K'] = calib_K.astype(np.float32)
            ret_dict['full_img_width'] = np.array(full_img_width).astype(np.int64)
            ret_dict['cls_score_feats'] = cls_score_feats.astype(np.float32)
            ret_dict['full_img_votes_1d'] = full_img_votes_1d.astype(np.float32)
            ret_dict['full_img_1d'] = full_img_1d.astype(np.float32)

        return ret_dict
Exemplo n.º 16
0
    def load_crop(self, filename, use_color, use_height, num_points,
                  max_num_obj, DC):

        MEAN_COLOR_RGB = np.array([109.8, 97.2, 83.8])

        scan_name = filename.split('.')[0]

        h5file = h5py.File(filename, 'r')
        mesh_vertices = np.array(h5file['point_cloud'], dtype=np.float32)
        instance_labels = np.array(h5file['instance'], dtype=np.int32)
        semantic_labels = np.array(h5file['semantic'], dtype=np.int32)
        instance_bboxes = np.array(h5file['bboxes'], dtype=np.float32)
        instance_bboxes = instance_bboxes[:, :8]
        h5file.close()

        # center data
        minbound = np.min(mesh_vertices[:, :3], axis=0)
        maxbound = np.max(mesh_vertices[:, :3], axis=0)
        mid = (minbound + maxbound) / 2.0
        mesh_vertices[:, :3] -= mid
        instance_bboxes[:, :3] -= mid

        # convert PC to z is up.
        mid[[0, 1, 2]] = mid[[0, 2, 1]]

        mesh_vertices[:, [0, 1, 2]] = mesh_vertices[:, [0, 2, 1]]

        # convert annotations to z is up.
        instance_bboxes[:, [0, 1, 2]] = instance_bboxes[:, [0, 2, 1]]
        instance_bboxes[:, [3, 4, 5]] = instance_bboxes[:, [3, 5, 4]]

        if not use_color:
            point_cloud = mesh_vertices[:, 0:3]  # do not use color for now
        else:
            point_cloud = mesh_vertices[:, 0:6]
            point_cloud[:, 3:] = point_cloud[:, 3:] - (MEAN_COLOR_RGB) / 256.0

        if use_height:
            floor_height = np.percentile(point_cloud[:, 2], 0.99)
            height = point_cloud[:, 2] - floor_height
            point_cloud = np.concatenate(
                [point_cloud, np.expand_dims(height, 1)], 1)

        # ------------------------------- LABELS ------------------------------
        target_bboxes = np.zeros((max_num_obj, 6))
        target_bboxes_mask = np.zeros((max_num_obj))
        angle_classes = np.zeros((max_num_obj, ))
        angle_residuals = np.zeros((max_num_obj, ))
        size_classes = np.zeros((max_num_obj, ))
        size_residuals = np.zeros((max_num_obj, 3))

        point_cloud, choices = pc_util.random_sampling(point_cloud,
                                                       num_points,
                                                       return_choices=True)
        instance_labels = instance_labels[choices]
        semantic_labels = semantic_labels[choices]

        target_bboxes_mask[0:instance_bboxes.shape[0]] = 1
        target_bboxes[0:instance_bboxes.shape[0], :] = instance_bboxes[:, 0:6]

        # compute votes *AFTER* augmentation
        # generate votes
        # Note: since there's no map between bbox instance labels and
        # pc instance_labels (it had been filtered
        # in the data preparation step) we'll compute the instance bbox
        # from the points sharing the same instance label.
        point_votes = np.zeros([num_points, 3])
        point_votes_mask = np.zeros(num_points)

        for i_instance in np.unique(instance_labels):
            # ignore points not associated with a box
            #if i_instance not in instance_bboxes_instance_labels: continue

            # find all points belong to that instance
            ind = np.where(instance_labels == i_instance)[0]
            # find the semantic label
            #TODO: change classe labels
            if not (semantic_labels[ind[0]] == -1):
                x = point_cloud[ind, :3]
                center = 0.5 * (x.min(0) + x.max(0))
                point_votes[ind, :] = center - x
                point_votes_mask[ind] = 1.0

        point_votes = np.tile(point_votes, (1, 3))  # make 3 votes identical

        # NOTE: set size class as semantic class. Consider use size2class.
        size_classes[0:instance_bboxes.shape[0]] = instance_bboxes[:, -1]
        instance_bboxes_sids = instance_bboxes[:, -1]
        instance_bboxes_sids = instance_bboxes_sids.astype(np.int)
        size_residuals[0:instance_bboxes.shape[0], :] = \
            target_bboxes[0:instance_bboxes.shape[0], 3:6] - DC.mean_size_arr[instance_bboxes_sids,:]

        #TODO: update angle_classes + residuals
        angle_residuals[0:instance_bboxes.shape[0]] = instance_bboxes[:, 6]

        ret_dict = {}
        ret_dict['point_clouds'] = point_cloud.astype(np.float32)
        ret_dict['center_label'] = target_bboxes.astype(np.float32)[:, 0:3]
        ret_dict['heading_class_label'] = angle_classes.astype(np.int64)
        ret_dict['heading_residual_label'] = angle_residuals.astype(np.float32)
        ret_dict['size_class_label'] = size_classes.astype(np.int64)
        ret_dict['size_residual_label'] = size_residuals.astype(np.float32)
        target_bboxes_semcls = np.zeros((max_num_obj))
        target_bboxes_semcls[0:instance_bboxes.shape[0]] = instance_bboxes[:,
                                                                           -1]
        ret_dict['sem_cls_label'] = target_bboxes_semcls.astype(np.int64)
        ret_dict['box_label_mask'] = target_bboxes_mask.astype(np.float32)
        ret_dict['vote_label'] = point_votes.astype(np.float32)
        ret_dict['vote_label_mask'] = point_votes_mask.astype(np.int64)
        ret_dict['scan_name'] = scan_name
        ret_dict['mid'] = mid

        return ret_dict
Exemplo n.º 17
0
    def forward_full_tour(self,
                          batch_data,
                          DC,
                          device,
                          files_crops=None,
                          **kwargs):
        if 'use_color' in kwargs:
            use_color = kwargs['use_color']
        else:
            use_color = False
        if 'use_height' in kwargs:
            use_height = kwargs['use_height']
        else:
            use_height = False
        if 'num_point' in kwargs:
            num_point = kwargs['num_point']
        else:
            num_point = 40000
        if 'max_num_obj' in kwargs:
            max_num_obj = kwargs['max_num_obj']
        else:
            max_num_obj = 64

        output_keys = [
            'center',
            'heading_scores',
            'heading_residuals',
            'heading_residuals_normalized',
            'size_scores',
            'size_residuals',
            'size_residuals_normalized',
            'sem_cls_scores',
            'objectness_scores',
            'seed_xyz',
            'vote_xyz',
            'seed_inds',
            'aggregated_vote_xyz',
            'aggregated_vote_inds',
            'proposal_lastlayer_features',
        ]

        if files_crops is not None:

            print(len(files_crops))

            end_points = {}

            # -- init with first file
            file = files_crops[0]
            file_data = self.load_crop(
                file,
                use_color,
                use_height,
                num_point,
                max_num_obj,
                DC,
            )

            inputs = {
                'point_clouds':
                torch.FloatTensor(
                    file_data['point_clouds']).unsqueeze(0).to(device)
            }
            tmp_end_points = self.forward(inputs)
            tmp_end_points['center'] += torch.FloatTensor(
                file_data['mid']).to(device)

            tmp_end_points['proposal_lastlayer_features'] = tmp_end_points[
                'proposal_lastlayer_features'].permute(0, 2, 1)

            for k in output_keys:
                end_points[k] = tmp_end_points[k].detach().cpu()

            # -- iterate through all the files
            for file in files_crops[1:]:
                file_data = self.load_crop(
                    file,
                    use_color,
                    use_height,
                    num_point,
                    max_num_obj,
                    DC,
                )

                inputs = {
                    'point_clouds':
                    torch.FloatTensor(
                        file_data['point_clouds']).unsqueeze(0).to(device)
                }
                tmp_end_points = self.forward(inputs)
                tmp_end_points['center'] += torch.FloatTensor(
                    file_data['mid']).to(device)

                tmp_end_points['proposal_lastlayer_features'] = tmp_end_points[
                    'proposal_lastlayer_features'].permute(0, 2, 1)

                for k in output_keys:
                    end_points[k] = torch.cat(
                        (end_points[k], tmp_end_points[k].detach().cpu()),
                        dim=1)
        else:
            """
            Assumes one Point-Cloud (BS>2 not implemented) in numpy format.
            (N,6) x,y,z,R,G,B.
            Assumes y is up.
            """

            NUM_POINTS_THRESHOLD = 5000
            MEAN_COLOR_RGB = np.array([109.8, 97.2, 83.8])

            end_points = {}

            point_cloud_points = batch_data['point_cloud']

            min_bound = np.min(point_cloud_points[:, :3], axis=0)
            max_bound = np.max(point_cloud_points[:, :3], axis=0)

            first_ite = True
            for x in np.arange(min_bound[0], max_bound[0], 2.0):
                for y in np.arange(min_bound[1], max_bound[1], 1.5):
                    for z in np.arange(min_bound[2], max_bound[2], 2.0):

                        crop_min_bound = np.array([x, y, z])
                        crop_max_bound = np.array([x + 4.0, y + 3.0, z + 4.0])

                        vertices_mask = (point_cloud_points[:,:3] > crop_min_bound).all(axis=1) *\
                                        (point_cloud_points[:,:3] < crop_max_bound).all(axis=1)

                        crop_point_cloud = point_cloud_points[
                            vertices_mask, :].copy()

                        if crop_point_cloud.shape[0] < NUM_POINTS_THRESHOLD:
                            continue

                        # center data
                        minbound = np.min(crop_point_cloud[:, :3], axis=0)
                        maxbound = np.max(crop_point_cloud[:, :3], axis=0)
                        mid = (minbound + maxbound) / 2.0
                        crop_point_cloud[:, :3] -= mid

                        # convert PC to z is up.
                        mid[[0, 1, 2]] = mid[[0, 2, 1]]

                        crop_point_cloud[:, [0, 1, 2
                                             ]] = crop_point_cloud[:,
                                                                   [0, 2, 1]]

                        if not use_color:
                            point_cloud = crop_point_cloud[:, 0:
                                                           3]  # do not use color for now
                        else:
                            point_cloud = crop_point_cloud[:, 0:6]
                            point_cloud[:, 3:] = point_cloud[:, 3:] - (
                                MEAN_COLOR_RGB) / 256.0

                        if use_height:
                            floor_height = np.percentile(
                                point_cloud[:, 2], 0.99)
                            height = point_cloud[:, 2] - floor_height
                            point_cloud = np.concatenate(
                                [point_cloud,
                                 np.expand_dims(height, 1)], 1)

                        point_cloud, _ = pc_util.random_sampling(
                            point_cloud, num_point, return_choices=True)

                        inputs = {
                            'point_clouds':
                            torch.FloatTensor(point_cloud).unsqueeze(0).to(
                                device)
                        }
                        tmp_end_points = self.forward(inputs)
                        tmp_end_points['center'] += torch.FloatTensor(mid).to(
                            device)

                        tmp_end_points[
                            'proposal_lastlayer_features'] = tmp_end_points[
                                'proposal_lastlayer_features'].permute(
                                    0, 2, 1)

                        if first_ite:
                            for k in output_keys:
                                end_points[k] = tmp_end_points[k].detach().cpu(
                                )
                            first_ite = False
                        else:
                            for k in output_keys:
                                end_points[k] = torch.cat(
                                    (end_points[k],
                                     tmp_end_points[k].detach().cpu()),
                                    dim=1)

        return end_points
Exemplo n.º 18
0
    def __getitem__(self, idx):
        """
        Returns a dict with following keys:
            point_clouds: (N,3+C)
            center_label: (MAX_NUM_OBJ,3) for GT box center XYZ
            sem_cls_label: (MAX_NUM_OBJ,) semantic class index
            heading_class_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_HEADING_BIN-1
            heading_residual_label: (MAX_NUM_OBJ,)
            size_classe_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_SIZE_CLUSTER
            size_residual_label: (MAX_NUM_OBJ,3)
            box_label_mask: (MAX_NUM_OBJ) as 0/1 with 1 indicating a unique box
            point_obj_mask: (N,) with 0/1 with 1 indicating the point is in one of the object's OBB.
            point_instance_label: (N,) with int values in -1,...,num_box, indicating which object the point belongs to, -1 means a backgound point.
            scan_idx: int scan index in scan_names list
            max_gt_bboxes: unused
        """
        point_cloud = self.point_cloud_list[idx]  # Nx6
        bboxes = self.bboxes_list[idx]  # K,8
        point_obj_mask = self.point_labels_list[idx][:, 0]
        point_instance_label = self.point_labels_list[idx][:, -1]

        if not self.use_color:
            point_cloud = point_cloud[:, 0:3]
        else:
            point_cloud = point_cloud[:, 0:6]
            point_cloud[:, 3:] = (point_cloud[:, 3:] - MEAN_COLOR_RGB)

        if self.use_height:
            floor_height = np.percentile(point_cloud[:, 2], 0.99)
            height = point_cloud[:, 2] - floor_height
            point_cloud = np.concatenate(
                [point_cloud, np.expand_dims(height, 1)], 1)  # (N,4) or (N,7)

        # ------------------------------- DATA AUGMENTATION ------------------------------
        if self.augment:
            if np.random.random() > 0.5:
                # Flipping along the YZ plane
                point_cloud[:, 0] = -1 * point_cloud[:, 0]
                bboxes[:, 0] = -1 * bboxes[:, 0]
                bboxes[:, 6] = np.pi - bboxes[:, 6]

            # Rotation along up-axis/Z-axis
            rot_angle = (np.random.random() * np.pi /
                         3) - np.pi / 6  # -30 ~ +30 degree
            rot_mat = sunrgbd_utils.rotz(rot_angle)

            point_cloud[:, 0:3] = np.dot(point_cloud[:, 0:3],
                                         np.transpose(rot_mat))
            bboxes[:, 0:3] = np.dot(bboxes[:, 0:3], np.transpose(rot_mat))
            bboxes[:, 6] -= rot_angle

            # Augment RGB color
            if self.use_color:
                rgb_color = point_cloud[:, 3:6] + MEAN_COLOR_RGB
                rgb_color *= (1 + 0.4 * np.random.random(3) - 0.2
                              )  # brightness change for each channel
                rgb_color += (0.1 * np.random.random(3) - 0.05
                              )  # color shift for each channel
                rgb_color += np.expand_dims(
                    (0.05 * np.random.random(point_cloud.shape[0]) - 0.025),
                    -1)  # jittering on each pixel
                rgb_color = np.clip(rgb_color, 0, 1)
                # randomly drop out 30% of the points' colors
                rgb_color *= np.expand_dims(
                    np.random.random(point_cloud.shape[0]) > 0.3, -1)
                point_cloud[:, 3:6] = rgb_color - MEAN_COLOR_RGB

            # Augment point cloud scale: 0.85x-1.15x
            scale_ratio = np.random.random() * 0.3 + 0.85
            scale_ratio = np.expand_dims(np.tile(scale_ratio, 3), 0)
            point_cloud[:, 0:3] *= scale_ratio
            bboxes[:, 0:3] *= scale_ratio
            bboxes[:, 3:6] *= scale_ratio
            if self.use_height:
                point_cloud[:, -1] *= scale_ratio[0, 0]

        # ------------------------------- LABELS ------------------------------
        box3d_centers = np.zeros((MAX_NUM_OBJ, 3))
        box3d_sizes = np.zeros((MAX_NUM_OBJ, 3))
        angle_classes = np.zeros((MAX_NUM_OBJ, ))
        angle_residuals = np.zeros((MAX_NUM_OBJ, ))
        size_classes = np.zeros((MAX_NUM_OBJ, ))
        size_residuals = np.zeros((MAX_NUM_OBJ, 3))
        label_mask = np.zeros((MAX_NUM_OBJ))
        label_mask[0:bboxes.shape[0]] = 1
        max_bboxes = np.zeros((MAX_NUM_OBJ, 8))
        max_bboxes[0:bboxes.shape[0], :] = bboxes

        for i in range(bboxes.shape[0]):
            bbox = bboxes[i]
            semantic_class = bbox[7]
            box3d_center = bbox[0:3]
            angle_class, angle_residual = DC.angle2class(bbox[6])
            # NOTE: The mean size stored in size2class is of full length of box edges,
            # while in sunrgbd_data.py data dumping we dumped *half* length l,w,h.. so have to time it by 2 here
            box3d_size = bbox[3:6] * 2
            size_class, size_residual = DC.size2class(
                box3d_size, DC.class2type[semantic_class])
            box3d_centers[i, :] = box3d_center
            angle_classes[i] = angle_class
            angle_residuals[i] = angle_residual
            size_classes[i] = size_class
            size_residuals[i] = size_residual
            box3d_sizes[i, :] = box3d_size

        target_bboxes_mask = label_mask
        target_bboxes = np.zeros((MAX_NUM_OBJ, 6))
        target_bboxes[:, 0:3] += 1000.0
        size_gts = np.zeros((MAX_NUM_OBJ, 3))
        for i in range(bboxes.shape[0]):
            bbox = bboxes[i]
            corners_3d = sunrgbd_utils.my_compute_box_3d(
                bbox[0:3], bbox[3:6], bbox[6])
            # compute axis aligned box
            xmin = np.min(corners_3d[:, 0])
            ymin = np.min(corners_3d[:, 1])
            zmin = np.min(corners_3d[:, 2])
            xmax = np.max(corners_3d[:, 0])
            ymax = np.max(corners_3d[:, 1])
            zmax = np.max(corners_3d[:, 2])
            target_bbox = np.array([(xmin + xmax) / 2, (ymin + ymax) / 2,
                                    (zmin + zmax) / 2, xmax - xmin,
                                    ymax - ymin, zmax - zmin])
            target_bboxes[i, :] = target_bbox
            size_gts[i, :] = target_bbox[3:6]

        point_cloud, choices = pc_util.random_sampling(point_cloud,
                                                       self.num_points,
                                                       return_choices=True)
        point_obj_mask = point_obj_mask[choices]
        point_instance_label = point_instance_label[choices]

        ret_dict = {}
        ret_dict['point_clouds'] = point_cloud.astype(np.float32)
        ret_dict['center_label'] = target_bboxes.astype(np.float32)[:, 0:3]
        ret_dict['heading_class_label'] = angle_classes.astype(np.int64)
        ret_dict['heading_residual_label'] = angle_residuals.astype(np.float32)
        ret_dict['size_class_label'] = size_classes.astype(np.int64)
        ret_dict['size_residual_label'] = size_residuals.astype(np.float32)
        ret_dict['size_gts'] = size_gts.astype(np.float32)
        target_bboxes_semcls = np.zeros((MAX_NUM_OBJ))
        target_bboxes_semcls[0:bboxes.shape[0]] = bboxes[:, -1]  # from 0 to 9
        ret_dict['sem_cls_label'] = target_bboxes_semcls.astype(np.int64)
        ret_dict['box_label_mask'] = target_bboxes_mask.astype(np.float32)
        ret_dict['point_obj_mask'] = point_obj_mask.astype(np.int64)
        ret_dict['point_instance_label'] = point_instance_label.astype(
            np.int64)
        ret_dict['scan_idx'] = np.array(idx).astype(np.int64)
        ret_dict['max_gt_bboxes'] = max_bboxes
        return ret_dict
    def __getitem__(self, idx):
        """
        Returns a dict with following keys:
            point_clouds: (N,3+C)
            center_label: (MAX_NUM_OBJ,3) for GT box center XYZ
            heading_class_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_HEADING_BIN-1
            heading_residual_label: (MAX_NUM_OBJ,)
            size_classe_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_SIZE_CLUSTER
            size_residual_label: (MAX_NUM_OBJ,3)
            sem_cls_label: (MAX_NUM_OBJ,) semantic class index
            box_label_mask: (MAX_NUM_OBJ) as 0/1 with 1 indicating a unique box
            vote_label: (N,9) with votes XYZ (3 votes: X1Y1Z1, X2Y2Z2, X3Y3Z3)
                if there is only one vote than X1==X2==X3 etc.
            vote_label_mask: (N,) with 0/1 with 1 indicating the point
                is in one of the object's OBB.
            scan_idx: int scan index in scan_names list
            max_gt_bboxes: unused
        """
        scan_name = self.scan_names[idx]
        point_color_sem = np.load(
            os.path.join(self.data_path, scan_name) + '_pc.npz')['pc']  # Nx6
        bboxes = np.load(
            os.path.join(self.data_path, scan_name) + '_bbox.npy')  # K,8
        point_votes = np.load(
            os.path.join(self.data_path, scan_name) +
            '_votes.npz')['point_votes']  # Nx10

        semantics37 = point_color_sem[:, 6]
        semantics10 = np.array([DC.class37_2_class10[k] for k in semantics37])
        semantics10_multi = [
            DC.class37_2_class10_multi[k] for k in semantics37
        ]
        if not self.use_color:
            point_cloud = point_color_sem[:, 0:3]
        else:
            point_cloud = point_color_sem[:, 0:6]
            point_cloud[:, 3:6] = (point_color_sem[:, 3:6] - MEAN_COLOR_RGB)

        if self.use_height:
            floor_height = np.percentile(point_cloud[:, 2], 0.99)
            height = point_cloud[:, 2] - floor_height
            point_cloud = np.concatenate(
                [point_cloud, np.expand_dims(height, 1)], 1)  # (N,4) or (N,7)

        # ------------------------------- DATA AUGMENTATION ------------------------------
        if self.augment:
            if np.random.random() > 0.5:
                # Flipping along the YZ plane
                point_cloud[:, 0] = -1 * point_cloud[:, 0]
                bboxes[:, 0] = -1 * bboxes[:, 0]
                bboxes[:, 6] = np.pi - bboxes[:, 6]
                point_votes[:, [1, 4, 7]] = -1 * point_votes[:, [1, 4, 7]]

            # Rotation along up-axis/Z-axis
            rot_angle = (np.random.random() * np.pi /
                         3) - np.pi / 6  # -30 ~ +30 degree
            rot_mat = sunrgbd_utils.rotz(rot_angle)

            point_votes_end = np.zeros_like(point_votes)
            point_votes_end[:, 1:4] = np.dot(
                point_cloud[:, 0:3] + point_votes[:, 1:4],
                np.transpose(rot_mat))
            point_votes_end[:, 4:7] = np.dot(
                point_cloud[:, 0:3] + point_votes[:, 4:7],
                np.transpose(rot_mat))
            point_votes_end[:, 7:10] = np.dot(
                point_cloud[:, 0:3] + point_votes[:, 7:10],
                np.transpose(rot_mat))

            point_cloud[:, 0:3] = np.dot(point_cloud[:, 0:3],
                                         np.transpose(rot_mat))
            bboxes[:, 0:3] = np.dot(bboxes[:, 0:3], np.transpose(rot_mat))
            bboxes[:, 6] -= rot_angle
            point_votes[:, 1:4] = point_votes_end[:, 1:4] - point_cloud[:, 0:3]
            point_votes[:, 4:7] = point_votes_end[:, 4:7] - point_cloud[:, 0:3]
            point_votes[:,
                        7:10] = point_votes_end[:, 7:10] - point_cloud[:, 0:3]

            # Augment RGB color
            if self.use_color:
                rgb_color = point_cloud[:, 3:6] + MEAN_COLOR_RGB
                rgb_color *= (1 + 0.4 * np.random.random(3) - 0.2
                              )  # brightness change for each channel
                rgb_color += (0.1 * np.random.random(3) - 0.05
                              )  # color shift for each channel
                rgb_color += np.expand_dims(
                    (0.05 * np.random.random(point_cloud.shape[0]) - 0.025),
                    -1)  # jittering on each pixel
                rgb_color = np.clip(rgb_color, 0, 1)
                # randomly drop out 30% of the points' colors
                rgb_color *= np.expand_dims(
                    np.random.random(point_cloud.shape[0]) > 0.3, -1)
                point_cloud[:, 3:6] = rgb_color - MEAN_COLOR_RGB

            # Augment point cloud scale: 0.85x-1.15x
            scale_ratio = np.random.random() * 0.3 + 0.85
            scale_ratio = np.expand_dims(np.tile(scale_ratio, 3), 0)
            point_cloud[:, 0:3] *= scale_ratio
            bboxes[:, 0:3] *= scale_ratio
            bboxes[:, 3:6] *= scale_ratio
            point_votes[:, 1:4] *= scale_ratio
            point_votes[:, 4:7] *= scale_ratio
            point_votes[:, 7:10] *= scale_ratio
            if self.use_height:
                point_cloud[:, -1] *= scale_ratio[0, 0]

        # ------------------------------- LABELS ------------------------------
        box3d_centers = np.zeros((MAX_NUM_OBJ, 3))
        box3d_sizes = np.zeros((MAX_NUM_OBJ, 3))
        angle_classes = np.zeros((MAX_NUM_OBJ, ))
        angle_residuals = np.zeros((MAX_NUM_OBJ, ))
        size_classes = np.zeros((MAX_NUM_OBJ, ))
        size_residuals = np.zeros((MAX_NUM_OBJ, 3))
        label_mask = np.zeros((MAX_NUM_OBJ))
        label_mask[0:bboxes.shape[0]] = 1
        max_bboxes = np.zeros((MAX_NUM_OBJ, 8))
        max_bboxes[0:bboxes.shape[0], :] = bboxes

        # new items
        box3d_angles = np.zeros((MAX_NUM_OBJ, ))

        point_boundary_mask_z = np.zeros(self.num_points)
        point_boundary_mask_xy = np.zeros(self.num_points)
        point_boundary_offset_z = np.zeros([self.num_points, 3])
        point_boundary_offset_xy = np.zeros([self.num_points, 3])
        point_boundary_sem_z = np.zeros([self.num_points, 3 + 2 + 1])
        point_boundary_sem_xy = np.zeros([self.num_points, 3 + 1 + 1])
        point_line_mask = np.zeros(self.num_points)
        point_line_offset = np.zeros([self.num_points, 3])
        point_line_sem = np.zeros([self.num_points, 3 + 1])

        for i in range(bboxes.shape[0]):
            bbox = bboxes[i]
            semantic_class = bbox[7]
            box3d_center = bbox[0:3]
            angle_class, angle_residual = DC.angle2class(bbox[6])
            # NOTE: The mean size stored in size2class is of full length of box edges,
            # while in sunrgbd_data.py data dumping we dumped *half* length l,w,h.. so have to time it by 2 here
            box3d_size = bbox[3:6] * 2
            size_class, size_residual = DC.size2class(
                box3d_size, DC.class2type[semantic_class])
            box3d_centers[i, :] = box3d_center
            angle_classes[i] = angle_class
            angle_residuals[i] = angle_residual
            size_classes[i] = size_class
            size_residuals[i] = size_residual
            box3d_sizes[i, :] = box3d_size
            box3d_angles[i] = bbox[6]

        target_bboxes_mask = label_mask
        target_bboxes = np.zeros((MAX_NUM_OBJ, 6))
        for i in range(bboxes.shape[0]):
            bbox = bboxes[i]
            corners_3d = sunrgbd_utils.my_compute_box_3d(
                bbox[0:3], bbox[3:6], bbox[6])
            # compute axis aligned box
            xmin = np.min(corners_3d[:, 0])
            ymin = np.min(corners_3d[:, 1])
            zmin = np.min(corners_3d[:, 2])
            xmax = np.max(corners_3d[:, 0])
            ymax = np.max(corners_3d[:, 1])
            zmax = np.max(corners_3d[:, 2])
            target_bbox = np.array([(xmin + xmax) / 2, (ymin + ymax) / 2,
                                    (zmin + zmax) / 2, xmax - xmin,
                                    ymax - ymin, zmax - zmin])
            target_bboxes[i, :] = target_bbox

        point_cloud, choices = pc_util.random_sampling(point_cloud,
                                                       self.num_points,
                                                       return_choices=True)
        semantics37 = semantics37[choices]
        semantics10 = semantics10[choices]
        semantics10_multi = [semantics10_multi[i] for i in choices]
        point_votes_mask = point_votes[choices, 0]
        point_votes = point_votes[choices, 1:]

        # box angle is -pi to pi
        for i in range(bboxes.shape[0]):
            bbox = bboxes[i]
            corners = params2bbox(bbox[:3], 2 * bbox[3:6],
                                  clockwise2counter(bbox[6]))
            # corners_votenet = sunrgbd_utils.my_compute_box_3d(bbox[:3], bbox[3:6], bbox[6])

            try:
                x_all_cls, ind_all_cls = extract_pc_in_box3d(
                    point_cloud, corners)
            except:
                continue
            ind_all_cls = np.where(ind_all_cls)[0]  # T/F to index
            # find point with same semantic as bbox, note semantics is 37 cls in sunrgbd

            # ind = ind_all_cls[np.where(semantics10[ind_all_cls] == bbox[7])[0]]
            ind = []
            for j in ind_all_cls:
                if bbox[7] in semantics10_multi[j]:
                    ind.append(j)
            ind = np.array(ind)

            if ind.shape[0] < NUM_POINT_SEM_THRESHOLD:
                pass
            else:
                x = point_cloud[ind, :3]

                ###Get bb planes and boundary points
                plane_lower_temp = np.array([0, 0, 1, -corners[6, -1]])
                para_points = np.array(
                    [corners[1], corners[3], corners[5], corners[7]])
                newd = np.sum(para_points * plane_lower_temp[:3], 1)
                if check_upright(
                        para_points
                ) and plane_lower_temp[0] + plane_lower_temp[1] < LOWER_THRESH:
                    plane_lower = np.array([0, 0, 1, plane_lower_temp[-1]])
                    plane_upper = np.array([0, 0, 1, -np.mean(newd)])
                else:
                    import pdb
                    pdb.set_trace()
                    print("error with upright")
                if check_z(plane_upper, para_points) == False:
                    import pdb
                    pdb.set_trace()
                ### Get the boundary points here
                #alldist = np.abs(np.sum(point_cloud[:,:3]*plane_lower[:3], 1) + plane_lower[-1])
                alldist = np.abs(
                    np.sum(x * plane_lower[:3], 1) + plane_lower[-1])
                mind = np.min(alldist)
                #[count, val] = np.histogram(alldist, bins=20)
                #mind = val[np.argmax(count)]
                sel = np.abs(alldist - mind) < DIST_THRESH
                #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,0] >= xmin) & (point_cloud[:,0] <= xmax) & (point_cloud[:,1] >= ymin) & (point_cloud[:,1] <= ymax)

                ## Get lower four lines
                line_sel1, line_sel2, line_sel3, line_sel4 = get_linesel(
                    x[sel], corners, 'lower')
                if np.sum(line_sel1) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel1]] = 1.0
                    linecenter = (corners[0] + corners[2]) / 2.0
                    point_line_offset[
                        ind[sel][line_sel1]] = linecenter - x[sel][line_sel1]
                    point_line_sem[ind[sel][line_sel1]] = np.array(
                        [linecenter[0], linecenter[1], linecenter[2], bbox[7]])
                if np.sum(line_sel2) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel2]] = 1.0
                    linecenter = (corners[4] + corners[6]) / 2.0
                    point_line_offset[
                        ind[sel][line_sel2]] = linecenter - x[sel][line_sel2]
                    point_line_sem[ind[sel][line_sel2]] = np.array(
                        [linecenter[0], linecenter[1], linecenter[2], bbox[7]])
                if np.sum(line_sel3) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel3]] = 1.0
                    linecenter = (corners[0] + corners[4]) / 2.0
                    point_line_offset[
                        ind[sel][line_sel3]] = linecenter - x[sel][line_sel3]
                    point_line_sem[ind[sel][line_sel3]] = np.array(
                        [linecenter[0], linecenter[1], linecenter[2], bbox[7]])
                if np.sum(line_sel4) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel4]] = 1.0
                    linecenter = (corners[2] + corners[6]) / 2.0
                    point_line_offset[
                        ind[sel][line_sel4]] = linecenter - x[sel][line_sel4]
                    point_line_sem[ind[sel][line_sel4]] = np.array(
                        [linecenter[0], linecenter[1], linecenter[2], bbox[7]])

                if np.sum(sel) > NUM_POINT and np.var(
                        alldist[sel]) < VAR_THRESH:
                    # center = np.array([(xmin+xmax)/2.0, (ymin+ymax)/2.0, np.mean(x[sel][:,2])])
                    center = (corners[0] + corners[6]) / 2.0
                    center[2] = np.mean(x[sel][:, 2])
                    sel_global = ind[sel]
                    point_boundary_mask_z[sel_global] = 1.0
                    point_boundary_sem_z[sel_global] = np.array([
                        center[0], center[1], center[2],
                        np.linalg.norm(corners[4] - corners[0]),
                        np.linalg.norm(corners[2] - corners[0]), bbox[7]
                    ])
                    point_boundary_offset_z[sel_global] = center - x[sel]
                '''
                ### Check for middle z surfaces
                [count, val] = np.histogram(alldist, bins=20)
                mind_middle = val[np.argmax(count)]
                sel_pre = np.copy(sel)
                sel = np.abs(alldist - mind_middle) < DIST_THRESH
                if np.abs(np.mean(x[sel_pre][:,2]) - np.mean(x[sel][:,2])) > MIND_THRESH:
                    ### Do not use line for middle surfaces
                    if np.sum(sel) > NUM_POINT and np.var(alldist[sel]) < VAR_THRESH:
                        center = (corners[0] + corners[6]) / 2.0
                        center[2] = np.mean(x[sel][:,2])
                        # center = np.array([(xmin+xmax)/2.0, (ymin+ymax)/2.0, np.mean(x[sel][:,2])])
                        sel_global = ind[sel]
                        point_boundary_mask_z[sel_global] = 1.0
                        point_boundary_sem_z[sel_global] = np.array([center[0], center[1], center[2], np.linalg.norm(corners[4] - corners[0]), np.linalg.norm(corners[2] - corners[0]), bbox[7]])
                        point_boundary_offset_z[sel_global] = center - x[sel]
                '''

                ### Get the boundary points here
                alldist = np.abs(
                    np.sum(x * plane_upper[:3], 1) + plane_upper[-1])
                mind = np.min(alldist)
                #[count, val] = np.histogram(alldist, bins=20)
                #mind = val[np.argmax(count)]
                sel = np.abs(alldist - mind) < DIST_THRESH
                #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,0] >= xmin) & (point_cloud[:,0] <= xmax) & (point_cloud[:,1] >= ymin) & (point_cloud[:,1] <= ymax)

                ## Get upper four lines
                line_sel1, line_sel2, line_sel3, line_sel4 = get_linesel(
                    x[sel], corners, 'upper')
                if np.sum(line_sel1) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel1]] = 1.0
                    linecenter = (corners[1] + corners[3]) / 2.0
                    point_line_offset[
                        ind[sel][line_sel1]] = linecenter - x[sel][line_sel1]
                    point_line_sem[ind[sel][line_sel1]] = np.array(
                        [linecenter[0], linecenter[1], linecenter[2], bbox[7]])
                if np.sum(line_sel2) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel2]] = 1.0
                    linecenter = (corners[5] + corners[7]) / 2.0
                    point_line_offset[
                        ind[sel][line_sel2]] = linecenter - x[sel][line_sel2]
                    point_line_sem[ind[sel][line_sel2]] = np.array(
                        [linecenter[0], linecenter[1], linecenter[2], bbox[7]])
                if np.sum(line_sel3) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel3]] = 1.0
                    linecenter = (corners[1] + corners[5]) / 2.0
                    point_line_offset[
                        ind[sel][line_sel3]] = linecenter - x[sel][line_sel3]
                    point_line_sem[ind[sel][line_sel3]] = np.array(
                        [linecenter[0], linecenter[1], linecenter[2], bbox[7]])
                if np.sum(line_sel4) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel4]] = 1.0
                    linecenter = (corners[3] + corners[7]) / 2.0
                    point_line_offset[
                        ind[sel][line_sel4]] = linecenter - x[sel][line_sel4]
                    point_line_sem[ind[sel][line_sel4]] = np.array(
                        [linecenter[0], linecenter[1], linecenter[2], bbox[7]])

                if np.sum(sel) > NUM_POINT and np.var(
                        alldist[sel]) < VAR_THRESH:
                    # center = np.array([(xmin+xmax)/2.0, (ymin+ymax)/2.0, np.mean(x[sel][:,2])])
                    center = (corners[1] + corners[7]) / 2.0
                    center[2] = np.mean(x[sel][:, 2])
                    sel_global = ind[sel]
                    point_boundary_mask_z[sel_global] = 1.0
                    point_boundary_sem_z[sel_global] = np.array([
                        center[0], center[1], center[2],
                        np.linalg.norm(corners[5] - corners[1]),
                        np.linalg.norm(corners[3] - corners[1]), bbox[7]
                    ])
                    point_boundary_offset_z[sel_global] = center - x[sel]

                v1 = corners[3] - corners[2]
                v2 = corners[2] - corners[0]
                cp = np.cross(v1, v2)
                d = -np.dot(cp, corners[0])
                a, b, c = cp
                plane_left_temp = np.array([a, b, c, d])
                para_points = np.array(
                    [corners[4], corners[5], corners[6], corners[7]])
                ### Normalize xy here
                plane_left_temp /= np.linalg.norm(plane_left_temp[:3])
                newd = np.sum(para_points * plane_left_temp[:3], 1)
                if plane_left_temp[2] < LOWER_THRESH:
                    plane_left = plane_left_temp  #np.array([cls,res,tempsign,plane_left_temp[-1]])
                    plane_right = np.array([
                        plane_left_temp[0], plane_left_temp[1],
                        plane_left_temp[2], -np.mean(newd)
                    ])
                else:
                    import pdb
                    pdb.set_trace()
                    print("error with upright")
                ### Get the boundary points here
                alldist = np.abs(
                    np.sum(x * plane_left[:3], 1) + plane_left[-1])
                mind = np.min(alldist)
                #[count, val] = np.histogram(alldist, bins=20)
                #mind = val[np.argmax(count)]
                sel = np.abs(alldist - mind) < DIST_THRESH
                #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,2] >= zmin) & (point_cloud[:,2] <= zmax) & (point_cloud[:,1] >= ymin) & (point_cloud[:,1] <= ymax)
                ## Get upper four lines
                line_sel1, line_sel2 = get_linesel(x[sel], corners, 'left')
                if np.sum(line_sel1) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel1]] = 1.0
                    linecenter = (corners[0] + corners[1]) / 2.0
                    point_line_offset[
                        ind[sel][line_sel1]] = linecenter - x[sel][line_sel1]
                    point_line_sem[ind[sel][line_sel1]] = np.array(
                        [linecenter[0], linecenter[1], linecenter[2], bbox[7]])
                if np.sum(line_sel2) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel2]] = 1.0
                    linecenter = (corners[2] + corners[3]) / 2.0
                    point_line_offset[
                        ind[sel][line_sel2]] = linecenter - x[sel][line_sel2]
                    point_line_sem[ind[sel][line_sel2]] = np.array(
                        [linecenter[0], linecenter[1], linecenter[2], bbox[7]])
                if np.sum(sel) > NUM_POINT and np.var(
                        alldist[sel]) < VAR_THRESH:
                    # center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0])
                    center = np.array([
                        np.mean(x[sel][:, 0]),
                        np.mean(x[sel][:, 1]),
                        (corners[0, 2] + corners[1, 2]) / 2.0
                    ])
                    sel_global = ind[sel]
                    point_boundary_mask_xy[sel_global] = 1.0
                    # point_boundary_sem_xy[sel_global] = np.array([center[0], center[1], center[2], zmax - zmin, np.where(DC.nyu40ids == meta_vertices[ind[0],-1])[0][0]])
                    point_boundary_sem_xy[sel_global] = np.array([
                        center[0], center[1], center[2],
                        corners[1, 2] - corners[0, 2], bbox[7]
                    ])
                    point_boundary_offset_xy[sel_global] = center - x[sel]
                '''
                [count, val] = np.histogram(alldist, bins=20)
                mind_middle = val[np.argmax(count)]
                #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,2] >= zmin) & (point_cloud[:,2] <= zmax) & (point_cloud[:,1] >= ymin) & (point_cloud[:,1] <= ymax)
                ## Get upper four lines
                sel_pre = np.copy(sel)
                sel = np.abs(alldist - mind_middle) < DIST_THRESH
                if np.abs(np.mean(x[sel_pre][:,0]) - np.mean(x[sel][:,0])) > MIND_THRESH:
                    ### Do not use line for middle surfaces
                    if np.sum(sel) > NUM_POINT and np.var(alldist[sel]) < VAR_THRESH:
                        # center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0])
                        center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (corners[0, 2] + corners[1, 2])/2.0])
                        sel_global = ind[sel]
                        point_boundary_mask_xy[sel_global] = 1.0
                        point_boundary_sem_xy[sel_global] = np.array([center[0], center[1], center[2], corners[1, 2] - corners[0, 2], bbox[7]])
                        point_boundary_offset_xy[sel_global] = center - x[sel]
                '''

                ### Get the boundary points here
                alldist = np.abs(
                    np.sum(x * plane_right[:3], 1) + plane_right[-1])
                mind = np.min(alldist)
                #[count, val] = np.histogram(alldist, bins=20)
                #mind = val[np.argmax(count)]
                sel = np.abs(alldist - mind) < DIST_THRESH
                #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,2] >= zmin) & (point_cloud[:,2] <= zmax) & (point_cloud[:,1] >= ymin) & (point_cloud[:,1] <= ymax)
                line_sel1, line_sel2 = get_linesel(x[sel], corners, 'right')
                if np.sum(line_sel1) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel1]] = 1.0
                    linecenter = (corners[4] + corners[5]) / 2.0
                    point_line_offset[
                        ind[sel][line_sel1]] = linecenter - x[sel][line_sel1]
                    point_line_sem[ind[sel][line_sel1]] = np.array(
                        [linecenter[0], linecenter[1], linecenter[2], bbox[7]])
                if np.sum(line_sel2) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel2]] = 1.0
                    linecenter = (corners[6] + corners[7]) / 2.0
                    point_line_offset[
                        ind[sel][line_sel2]] = linecenter - x[sel][line_sel2]
                    point_line_sem[ind[sel][line_sel2]] = np.array(
                        [linecenter[0], linecenter[1], linecenter[2], bbox[7]])
                if np.sum(sel) > NUM_POINT and np.var(
                        alldist[sel]) < VAR_THRESH:
                    # center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0])
                    center = np.array([
                        np.mean(x[sel][:, 0]),
                        np.mean(x[sel][:, 1]),
                        (corners[4, 2] + corners[5, 2]) / 2.0
                    ])
                    sel_global = ind[sel]
                    point_boundary_mask_xy[sel_global] = 1.0
                    point_boundary_sem_xy[sel_global] = np.array([
                        center[0], center[1], center[2],
                        corners[5, 2] - corners[4, 2], bbox[7]
                    ])
                    point_boundary_offset_xy[sel_global] = center - x[sel]

                #plane_front_temp = leastsq(residuals, [0,1,0,0], args=(None, np.array([corners[0], corners[1], corners[4], corners[5]]).T))[0]
                v1 = corners[0] - corners[4]
                v2 = corners[4] - corners[5]
                cp = np.cross(v1, v2)
                d = -np.dot(cp, corners[5])
                a, b, c = cp
                plane_front_temp = np.array([a, b, c, d])
                para_points = np.array(
                    [corners[2], corners[3], corners[6], corners[7]])
                plane_front_temp /= np.linalg.norm(plane_front_temp[:3])
                newd = np.sum(para_points * plane_front_temp[:3], 1)
                if plane_front_temp[2] < LOWER_THRESH:
                    plane_front = plane_front_temp  #np.array([cls,res,tempsign,plane_front_temp[-1]])
                    plane_back = np.array([
                        plane_front_temp[0], plane_front_temp[1],
                        plane_front_temp[2], -np.mean(newd)
                    ])
                else:
                    import pdb
                    pdb.set_trace()
                    print("error with upright")
                ### Get the boundary points here
                alldist = np.abs(
                    np.sum(x * plane_front[:3], 1) + plane_front[-1])
                mind = np.min(alldist)
                #[count, val] = np.histogram(alldist, bins=20)
                #mind = val[np.argmax(count)]
                sel = np.abs(alldist - mind) < DIST_THRESH
                #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,0] >= xmin) & (point_cloud[:,0] <= xmax) & (point_cloud[:,2] >= zmin) & (point_cloud[:,2] <= zmax)
                if np.sum(sel) > NUM_POINT and np.var(
                        alldist[sel]) < VAR_THRESH:
                    # center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0])
                    center = np.array([
                        np.mean(x[sel][:, 0]),
                        np.mean(x[sel][:, 1]),
                        (corners[0, 2] + corners[1, 2]) / 2.0
                    ])
                    sel_global = ind[sel]
                    point_boundary_mask_xy[sel_global] = 1.0
                    point_boundary_sem_xy[sel_global] = np.array([
                        center[0], center[1], center[2],
                        corners[1, 2] - corners[0, 2], bbox[7]
                    ])
                    point_boundary_offset_xy[sel_global] = center - x[sel]
                '''
                [count, val] = np.histogram(alldist, bins=20)
                mind_middle = val[np.argmax(count)]
                sel_pre = np.copy(sel)
                sel = np.abs(alldist - mind_middle) < DIST_THRESH
                if np.abs(np.mean(x[sel_pre][:,1]) - np.mean(x[sel][:,1])) > MIND_THRESH:
                    ### Do not use line for middle surfaces
                    if np.sum(sel) > NUM_POINT and np.var(alldist[sel]) < VAR_THRESH:
                        # center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0])
                        center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (corners[0, 2] + corners[1, 2])/2.0])
                        sel_global = ind[sel]
                        point_boundary_mask_xy[sel_global] = 1.0
                        point_boundary_sem_xy[sel_global] = np.array([center[0], center[1], center[2], corners[1, 2] - corners[0, 2], bbox[7]])
                        point_boundary_offset_xy[sel_global] = center - x[sel]
                '''

                ### Get the boundary points here
                alldist = np.abs(
                    np.sum(x * plane_back[:3], 1) + plane_back[-1])
                mind = np.min(alldist)
                #[count, val] = np.histogram(alldist, bins=20)
                #mind = val[np.argmax(count)]
                sel = np.abs(alldist - mind) < DIST_THRESH
                if np.sum(sel) > NUM_POINT and np.var(
                        alldist[sel]) < VAR_THRESH:
                    #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,0] >= xmin) & (point_cloud[:,0] <= xmax) & (point_cloud[:,2] >= zmin) & (point_cloud[:,2] <= zmax)
                    # center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0])
                    center = np.array([
                        np.mean(x[sel][:, 0]),
                        np.mean(x[sel][:, 1]),
                        (corners[2, 2] + corners[3, 2]) / 2.0
                    ])
                    #point_boundary_offset_xy[sel] = center - x[sel]
                    sel_global = ind[sel]
                    point_boundary_mask_xy[sel_global] = 1.0
                    point_boundary_sem_xy[sel_global] = np.array([
                        center[0], center[1], center[2],
                        corners[3, 2] - corners[2, 2], bbox[7]
                    ])
                    point_boundary_offset_xy[sel_global] = center - x[sel]

        ret_dict = {}
        ret_dict['point_clouds'] = point_cloud.astype(np.float32)
        ret_dict['center_label'] = target_bboxes.astype(np.float32)[:, 0:3]
        ret_dict['heading_class_label'] = angle_classes.astype(np.int64)
        ret_dict['heading_residual_label'] = angle_residuals.astype(np.float32)
        ret_dict['size_class_label'] = size_classes.astype(np.int64)
        ret_dict['size_residual_label'] = size_residuals.astype(np.float32)
        target_bboxes_semcls = np.zeros((MAX_NUM_OBJ))
        target_bboxes_semcls[0:bboxes.shape[0]] = bboxes[:, -1]  # from 0 to 9
        ret_dict['sem_cls_label'] = target_bboxes_semcls.astype(np.int64)
        ret_dict['box_label_mask'] = target_bboxes_mask.astype(np.float32)
        ret_dict['vote_label'] = point_votes.astype(np.float32)
        ret_dict['vote_label_mask'] = point_votes_mask.astype(np.int64)
        ret_dict['scan_idx'] = np.array(idx).astype(np.int64)
        ret_dict['max_gt_bboxes'] = max_bboxes

        # new items
        ret_dict['size_label'] = box3d_sizes.astype(np.float32)
        ret_dict['heading_label'] = box3d_angles.astype(np.float32)
        if self.use_height:
            ret_dict['floor_height'] = floor_height

        ret_dict['point_boundary_mask_z'] = point_boundary_mask_z.astype(
            np.float32)
        ret_dict['point_boundary_mask_xy'] = point_boundary_mask_xy.astype(
            np.float32)
        ret_dict['point_boundary_offset_z'] = point_boundary_offset_z.astype(
            np.float32)
        ret_dict['point_boundary_offset_xy'] = point_boundary_offset_xy.astype(
            np.float32)
        ret_dict['point_boundary_sem_z'] = point_boundary_sem_z.astype(
            np.float32)
        ret_dict['point_boundary_sem_xy'] = point_boundary_sem_xy.astype(
            np.float32)

        ret_dict['point_line_mask'] = point_line_mask.astype(np.float32)
        ret_dict['point_line_offset'] = point_line_offset.astype(np.float32)
        ret_dict['point_line_sem'] = point_line_sem.astype(np.float32)

        return ret_dict
Exemplo n.º 20
0
def extract_sunrgbd_data(idx_filename,
                         split,
                         output_folder,
                         num_point=20000,
                         type_whitelist=DEFAULT_TYPE_WHITELIST,
                         save_votes=False,
                         use_v1=False,
                         skip_empty_scene=True):
    """ Extract scene point clouds and 
    bounding boxes (centroids, box sizes, heading angles, semantic classes).
    Dumped point clouds and boxes are in upright depth coord.

    Args:
        idx_filename: a TXT file where each line is an int number (index)
        split: training or testing
        save_votes: whether to compute and save Ground truth votes.
        use_v1: use the SUN RGB-D V1 data
        skip_empty_scene: if True, skip scenes that contain no object (no objet in whitelist)

    Dumps:
        <id>_pc.npz of (N,6) where N is for number of subsampled points and 6 is
            for XYZ and RGB (in 0~1) in upright depth coord
        <id>_bbox.npy of (K,8) where K is the number of objects, 8 is for
            centroids (cx,cy,cz), dimension (l,w,h), heanding_angle and semantic_class
        <id>_votes.npz of (N,10) with 0/1 indicating whether the point belongs to an object,
            then three sets of GT votes for up to three objects. If the point is only in one
            object's OBB, then the three GT votes are the same.
    """
    dataset = sunrgbd_object('./sunrgbd_trainval', split, use_v1=use_v1)
    data_idx_list = [int(line.rstrip()) for line in open(idx_filename)]

    if not os.path.exists(output_folder):
        os.mkdir(output_folder)

    for data_idx in data_idx_list:
        print('------------- ', data_idx)
        if data_idx == 479: continue
        objects = dataset.get_label_objects(data_idx)

        # Skip scenes with 0 object
        if skip_empty_scene and (len(objects)==0 or \
            len([obj for obj in objects if obj.classname in type_whitelist])==0):
            continue

        object_list = []
        for obj in objects:
            if obj.classname not in type_whitelist: continue
            obb = np.zeros((8))
            obb[0:3] = obj.centroid
            # Note that compared with that in data_viz, we do not time 2 to l,w.h
            # neither do we flip the heading angle
            obb[3:6] = np.array([obj.l, obj.w, obj.h])
            obb[6] = obj.heading_angle
            obb[7] = sunrgbd_utils.type2class[obj.classname]
            object_list.append(obb)
        if len(object_list) == 0:
            obbs = np.zeros((0, 8))
        else:
            obbs = np.vstack(object_list)  # (K,8)

        pc_upright_depth = dataset.get_depth(data_idx)
        pc_upright_depth_subsampled = pc_util.random_sampling(
            pc_upright_depth, num_point)

        np.savez_compressed(os.path.join(output_folder,
                                         '%06d_pc.npz' % (data_idx)),
                            pc=pc_upright_depth_subsampled)
        np.save(os.path.join(output_folder, '%06d_bbox.npy' % (data_idx)),
                obbs)

        if save_votes:
            N = pc_upright_depth_subsampled.shape[0]
            point_votes = np.zeros((N, 10))  # 3 votes and 1 vote mask
            point_vote_idx = np.zeros(
                (N)).astype(np.int32)  # in the range of [0,2]
            indices = np.arange(N)
            for obj in objects:
                if obj.classname not in type_whitelist: continue
                try:
                    # Find all points in this object's OBB
                    box3d_pts_3d = sunrgbd_utils.my_compute_box_3d(
                        obj.centroid, np.array([obj.l, obj.w, obj.h]),
                        obj.heading_angle)
                    pc_in_box3d,inds = sunrgbd_utils.extract_pc_in_box3d(\
                        pc_upright_depth_subsampled, box3d_pts_3d)
                    # Assign first dimension to indicate it is in an object box
                    point_votes[inds, 0] = 1
                    # Add the votes (all 0 if the point is not in any object's OBB)
                    votes = np.expand_dims(obj.centroid, 0) - pc_in_box3d[:,
                                                                          0:3]
                    sparse_inds = indices[
                        inds]  # turn dense True,False inds to sparse number-wise inds
                    for i in range(len(sparse_inds)):
                        j = sparse_inds[i]
                        point_votes[j,
                                    int(point_vote_idx[j] * 3 +
                                        1):int((point_vote_idx[j] + 1) * 3 +
                                               1)] = votes[i, :]
                        # Populate votes with the fisrt vote
                        if point_vote_idx[j] == 0:
                            point_votes[j, 4:7] = votes[i, :]
                            point_votes[j, 7:10] = votes[i, :]
                    point_vote_idx[inds] = np.minimum(2,
                                                      point_vote_idx[inds] + 1)
                except:
                    print('ERROR ----', data_idx, obj.classname)
            np.savez_compressed(os.path.join(output_folder,
                                             '%06d_votes.npz' % (data_idx)),
                                point_votes=point_votes)
Exemplo n.º 21
0
    def __getitem__(self, index: int):
        id_scan = self.scene_list[index]
        assert (id_scan not in self.error_scan)
        id_scan_path = os.path.join(self.data_path, id_scan)
        point_cloud = np.load(
            os.path.join(id_scan_path, '{}.npy'.format('point_cloud')))
        ins_vert = np.load(
            os.path.join(id_scan_path, '{}.npy'.format('ins_vert'))).squeeze(1)
        ins_bbox = np.load(os.path.join(id_scan_path, '{}.npy'.format('bbox')))

        points = point_cloud  # (N, 3)
        center = ins_bbox[:, 0:3]  # (B, 10)
        bbox_length = ins_bbox[:, 3:6]  # (B, 3)
        sem_cls = ins_bbox[:, 6:7]  # (B, 1)
        symmetry = ins_bbox[:, 7:8]  # (B, 1)

        K = center.shape[0]

        # LABELS
        if points.shape[0] > self.num_points:
            points, choices = pc_util.random_sampling(points,
                                                      self.num_points,
                                                      return_choices=True)
            ins_vert = ins_vert[choices]
        elif points.shape[0] < self.num_points:
            print('false data')
        target_bboxes = np.zeros((MAX_NUM_OBJ, 6), dtype=np.float32)
        target_bboxes_mask = np.zeros((MAX_NUM_OBJ), dtype=np.int64)
        # target_center   = np.zeros((MAX_NUM_OBJ, 3), dtype=np.float32)
        # target_rot_q    = np.zeros((MAX_NUM_OBJ, 4), dtype=np.float32)
        # target_rot_6d   = np.zeros((MAX_NUM_OBJ, 6), dtype=np.float32)
        # target_scale    = np.zeros((MAX_NUM_OBJ, 3), dtype=np.float32)
        target_sem_cls = np.zeros((MAX_NUM_OBJ, ), dtype=np.int64)
        target_sym = np.zeros((MAX_NUM_OBJ, ), dtype=np.int64)
        target_size_classes = np.zeros((MAX_NUM_OBJ, ))
        target_size_residuals = np.zeros((MAX_NUM_OBJ, 3))

        # target_center[:K]       = center[:,0:3]
        # target_rot_q[:K]    = alignments[:,3:7]
        # for k in range(K):
        #     target_rot_6d[k]    = from_q_to_6d(alignments[k,3:7])
        # target_scale[:K]        = alignments[:,7:10]
        target_sem_cls[:K] = sem_cls.squeeze(1)
        target_sym[:K] = symmetry.squeeze(1)

        target_bboxes[:K, 0:3] = center
        target_bboxes[:K, 3:6] = bbox_length
        target_bboxes_mask[:K] = 1

        # ------------------------------- DATA AUGMENTATION ------------------------------
        if self.augment:
            if np.random.random() > 0.5:
                # Flipping along the YZ plane
                points[:, 0] = -1 * points[:, 0]
                target_bboxes[:, 0] = -1 * target_bboxes[:, 0]

            if np.random.random() > 0.5:
                # Flipping along the XZ plane
                points[:, 1] = -1 * points[:, 1]
                target_bboxes[:, 1] = -1 * target_bboxes[:, 1]

            # Rotation along up-axis/Z-axis
            rot_angle = (np.random.random() * np.pi /
                         18) - np.pi / 36  # -5 ~ +5 degree
            rot_mat = pc_util.rotz(rot_angle)
            points[:, 0:3] = np.dot(points[:, 0:3], np.transpose(rot_mat))
            target_bboxes = rotate_aligned_boxes(target_bboxes, rot_mat)

        target_center = target_bboxes[:, 0:3]
        # ====== GENERATE VOTES ======
        # compute votes *AFTER* augmentation
        # NOTE: i_ins: (1,B) not (0,B-1)
        point_votes = np.zeros([self.num_points, 3])
        point_votes_mask = np.zeros(self.num_points)
        for i_ins in np.unique(ins_vert):
            i_ins -= 1
            if target_sem_cls[i_ins] in NOT_CARED_IDS or i_ins < 0:
                continue
            ind = np.where(ins_vert == i_ins + 1)[0]
            x = points[ind, :3]
            point_votes[ind, :] = x - target_center[i_ins]
            point_votes_mask[ind] = 1.0
        point_votes = np.tile(point_votes, (1, 3))

        target_size_classes[:K] = target_sem_cls[:K]
        target_size_residuals[:K, :3] =\
            target_bboxes[:K, 3:6] - DC.mean_size_arr[target_sem_cls[:K], :]

        # ====== LABELS ======
        label = {}
        label['point_clouds'] = points.astype(np.float32)
        label['center_label'] = target_center.astype(np.float32)
        label['heading_class_label'] = np.zeros(
            (MAX_NUM_OBJ, )).astype(np.int64)
        label['heading_residual_label'] = np.zeros(
            (MAX_NUM_OBJ, )).astype(np.float32)
        label['size_class_label'] = target_size_classes.astype(np.int64)
        label['size_residual_label'] = target_size_residuals.astype(np.float32)
        label['sem_cls_label'] = target_sem_cls.astype(np.int64)
        label['box_label_mask'] = target_bboxes_mask.astype(np.float32)
        label['vote_label'] = point_votes.astype(np.float32)
        label['vote_label_mask'] = point_votes_mask.astype(np.int64)
        label['scan_idx'] = np.array(index).astype(np.int64)

        return label
Exemplo n.º 22
0
    def __getitem__(self, idx):
        """
        Returns a dict with following keys:
            point_clouds: (N,3+C)
            center_label: (MAX_NUM_GRASP,3) for GT grasp point XYZ
            angle_class_label: (MAX_NUM_GRASP,) with int values in 0,...,NUM_ANGLE_BIN-1
            angle_residual_label: (MAX_NUM_GRASP,)
            size_classe_label: (MAX_NUM_GRASP,) with int values in 0,...,NUM_SIZE_CLUSTER
            sem_cls_label: (MAX_NUM_GRASP,) semantic class index
            grasp_label_mask: (MAX_NUM_GRASP) as 0/1 with 1 indicating a unique grasp
            vote_label: (N,9) with votes XYZ (3 votes: X1Y1Z1, X2Y2Z2, X3Y3Z3)
                if there is only one vote than X1==X2==X3 etc.
            vote_label_mask: (N,) with 0/1 with 1 indicating the point
                is in one of the object's OBB.
            scan_idx: int scan index in scan_names list
            max_gt_grasps: unused
        """
        scan_name = self.scan_names[idx]
        point_cloud = np.load(os.path.join(self.data_path, scan_name)+'_pc.npz')['pc'] # Nx6
        grasps = np.load(os.path.join(self.data_path, scan_name)+'_grasp.npy') # K,8
        point_votes = np.load(os.path.join(self.data_path, scan_name)+'_votes.npz')['point_votes'] # Nx10

        if not self.use_color:
            point_cloud = point_cloud[:,0:3]
        else:
            point_cloud = point_cloud[:,0:6]
            point_cloud[:,3:] = (point_cloud[:,3:]-MEAN_COLOR_RGB)

        if self.use_height:
            floor_height = np.percentile(point_cloud[:,2],0.99)
            height = point_cloud[:,2] - floor_height
            point_cloud = np.concatenate([point_cloud, np.expand_dims(height, 1)],1) # (N,4) or (N,7)

        # ------------------------------- LABELS ------------------------------
        grasp_centers = np.zeros((MAX_NUM_GRASP, 3))
        grasp_sizes = np.zeros((MAX_NUM_GRASP, 3))
        angle_classes = np.zeros((MAX_NUM_GRASP,))
        angle_residuals = np.zeros((MAX_NUM_GRASP,))
        viewpoint_classes = np.zeros((MAX_NUM_GRASP,))
        widths = np.zeros((MAX_NUM_GRASP,))
        qualities = np.zeros((MAX_NUM_GRASP,))
        label_mask = np.zeros((MAX_NUM_GRASP))
        label_mask[0:grasps.shape[0]] = 1

        for i in range(grasps.shape[0]):
            grasp = grasps[i]
            grasp_center = grasp[0:3]
            viewpoint_class = grasp[3]
            angle_class, angle_residual = DC.angle2class(grasp[4])
            grasp_quality = grasp[5]
            grasp_width = grasp[6]
            semantic_class = grasp[7]
            
            grasp_centers[i,:] = grasp_center
            viewpoint_classes[i] = viewpoint_class
            angle_classes[i] = angle_class
            angle_residuals[i] = angle_residual
            qualities[i] = grasp_quality
            widths[i] = grasp_width

        target_grasps_mask = label_mask 
        target_grasps = np.zeros((MAX_NUM_GRASP, 6))
        for i in range(grasps.shape[0]):
            grasp = grasps[i]
            target_grasp = grasp[0:6]
            target_grasps[i,:] = target_grasp

        point_cloud, choices = pc_util.random_sampling(point_cloud, self.num_points, return_choices=True)
        point_votes_mask = point_votes[choices,0]
        point_votes = point_votes[choices,1:]

        ret_dict = {}
        ret_dict['point_clouds'] = point_cloud.astype(np.float32)
        ret_dict['vote_label'] = point_votes.astype(np.float32)
        ret_dict['vote_label_mask'] = point_votes_mask.astype(np.int64)
        ret_dict['scan_idx'] = np.array(idx).astype(np.int64)

        ret_dict['width_label'] = widths.astype(np.float32)
        ret_dict['quality_label'] = qualities.astype(np.float32)

        ret_dict['center_label'] = target_grasps.astype(np.float32)[:,0:3]
        ret_dict['angle_class_label'] = angle_classes.astype(np.int64)
        ret_dict['angle_residual_label'] = angle_residuals.astype(np.float32)
        ret_dict['viewpoint_class_label'] = viewpoint_classes.astype(np.int64)
        target_grasps_semcls = np.zeros((MAX_NUM_GRASP))
        target_grasps_semcls[0:grasps.shape[0]] = grasps[:,-1]
        ret_dict['sem_cls_label'] = target_grasps_semcls.astype(np.int64)
        ret_dict['grasp_label_mask'] = target_grasps_mask.astype(np.float32)
        
        return ret_dict
Exemplo n.º 23
0
    def __getitem__(self, idx):
        """
        Returns a dict with following keys:
            point_clouds: (N,3+C)
            center_label: (MAX_NUM_OBJ,3) for GT box center XYZ
            sem_cls_label: (MAX_NUM_OBJ,) semantic class index
            angle_class_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_HEADING_BIN-1
            angle_residual_label: (MAX_NUM_OBJ,)
            size_classe_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_SIZE_CLUSTER
            size_residual_label: (MAX_NUM_OBJ,3)
            box_label_mask: (MAX_NUM_OBJ) as 0/1 with 1 indicating a unique box
            point_votes: (N,3) with votes XYZ
            point_votes_mask: (N,) with 0/1 with 1 indicating the point is in one of the object's OBB.
            scan_idx: int scan index in scan_names list
        """

        scan_name = self.scan_names[idx]
        mesh_vertices = np.load(
            os.path.join(self.data_path, scan_name) + '_vert.npy')
        instance_labels = np.load(
            os.path.join(self.data_path, scan_name) + '_ins_label.npy')
        semantic_labels = np.load(
            os.path.join(self.data_path, scan_name) + '_sem_label.npy')
        instance_bboxes = np.load(
            os.path.join(self.data_path, scan_name) + '_bbox.npy')

        if not self.use_color:
            raw_point_cloud = mesh_vertices[:, 0:3]  # do not use color for now
        else:
            raw_point_cloud = mesh_vertices[:, 0:6]
            raw_point_cloud[:, 3:] = (raw_point_cloud[:, 3:] -
                                      MEAN_COLOR_RGB) / 256.0

        if self.use_height:
            floor_height = np.percentile(raw_point_cloud[:, 2], 0.99)
            height = raw_point_cloud[:, 2] - floor_height
            raw_point_cloud = np.concatenate(
                [raw_point_cloud, np.expand_dims(height, 1)], 1)

            # ------------------------------- LABELS ------------------------------
        target_bboxes = np.zeros((MAX_NUM_OBJ, 6))
        target_bboxes_mask = np.zeros((MAX_NUM_OBJ))
        angle_classes = np.zeros((MAX_NUM_OBJ, ))
        angle_residuals = np.zeros((MAX_NUM_OBJ, ))
        size_classes = np.zeros((MAX_NUM_OBJ, ))
        size_residuals = np.zeros((MAX_NUM_OBJ, 3))
        target_bboxes_semcls = np.zeros((MAX_NUM_OBJ))

        point_cloud, choices = pc_util.random_sampling(raw_point_cloud,
                                                       self.num_points,
                                                       return_choices=True)
        #ema_point_cloud = pc_util.random_sampling(raw_point_cloud, self.num_points, return_choices=False)
        ema_point_cloud = point_cloud.copy()  # 2021.2.28
        instance_labels = instance_labels[choices]
        semantic_labels = semantic_labels[choices]

        target_bboxes_mask[0:instance_bboxes.shape[0]] = 1
        target_bboxes[0:instance_bboxes.shape[0], :] = instance_bboxes[:, 0:6]

        # ------------------------------- DATA AUGMENTATION ------------------------------
        flip_x_axis = 0
        flip_y_axis = 0
        flip_x_axis_ema = 0
        flip_y_axis_ema = 0
        rot_mat = np.identity(3)
        scale_ratio = np.ones((1, 3))
        if self.augment:
            if np.random.random() > 0.5:
                # Flipping along the YZ plane
                flip_x_axis = 1
                point_cloud[:, 0] = -1 * point_cloud[:, 0]
                target_bboxes[:, 0] = -1 * target_bboxes[:, 0]

            if np.random.random() > 0.5:  # 2021.2.28
                # Flipping along the YZ plane for ema
                flip_x_axis_ema = 1
                ema_point_cloud[:, 0] = -1 * ema_point_cloud[:, 0]

            if np.random.random() > 0.5:
                # Flipping along the XZ plane
                flip_y_axis = 1
                point_cloud[:, 1] = -1 * point_cloud[:, 1]
                target_bboxes[:, 1] = -1 * target_bboxes[:, 1]

            if np.random.random() > 0.5:  # 2021.2.28
                # Flipping along the XZ plane for ema
                flip_y_axis_ema = 1
                ema_point_cloud[:, 1] = -1 * ema_point_cloud[:, 1]

            # Rotation along up-axis/Z-axis
            rot_angle = (np.random.random() * np.pi /
                         18) - np.pi / 36  # -5 ~ +5 degree
            rot_mat = pc_util.rotz(rot_angle)
            point_cloud[:, 0:3] = np.dot(point_cloud[:, 0:3],
                                         np.transpose(rot_mat))
            target_bboxes = rotate_aligned_boxes(target_bboxes, rot_mat)

            # Augment point cloud scale: 0.85x-1.15x
            scale_ratio = np.random.random() * 0.3 + 0.85
            scale_ratio = np.expand_dims(np.tile(scale_ratio, 3), 0)
            point_cloud[:, 0:3] *= scale_ratio
            target_bboxes[:, 0:3] *= scale_ratio
            target_bboxes[:, 3:6] *= scale_ratio
            if self.use_height:
                point_cloud[:, -1] *= scale_ratio[0, 0]

        # compute votes *AFTER* augmentation
        # generate votes
        # Note: since there's no map between bbox instance labels and
        # pc instance_labels (it had been filtered
        # in the data preparation step) we'll compute the instance bbox
        # from the points sharing the same instance label.
        point_votes = np.zeros([self.num_points, 3])
        point_votes_mask = np.zeros(self.num_points)
        for i_instance in np.unique(instance_labels):
            # find all points belong to that instance
            ind = np.where(instance_labels == i_instance)[0]
            # find the semantic label
            if semantic_labels[ind[0]] in DC.nyu40ids:
                x = point_cloud[ind, :3]
                center = 0.5 * (x.min(0) + x.max(0))
                point_votes[ind, :] = center - x
                point_votes_mask[ind] = 1.0
        point_votes = np.tile(point_votes, (1, 3))  # make 3 votes identical

        class_ind = [
            np.where(DC.nyu40ids == x)[0][0] for x in instance_bboxes[:, -1]
        ]
        # NOTE: set size class as semantic class. Consider use size2class.
        size_classes[0:instance_bboxes.shape[0]] = class_ind
        size_residuals[0:instance_bboxes.shape[0], :] = \
            target_bboxes[0:instance_bboxes.shape[0], 3:6] - DC.mean_size_arr[class_ind, :]

        target_bboxes_semcls[0:instance_bboxes.shape[0]] = class_ind

        ret_dict = {}
        ret_dict['point_clouds'] = point_cloud.astype(np.float32)
        ret_dict['center_label'] = target_bboxes.astype(np.float32)[:, 0:3]
        ret_dict['heading_class_label'] = angle_classes.astype(np.int64)
        ret_dict['heading_residual_label'] = angle_residuals.astype(np.float32)
        ret_dict['size_class_label'] = size_classes.astype(np.int64)
        ret_dict['size_residual_label'] = size_residuals.astype(np.float32)
        ret_dict['sem_cls_label'] = target_bboxes_semcls.astype(np.int64)
        ret_dict['box_label_mask'] = target_bboxes_mask.astype(np.float32)
        ret_dict['vote_label'] = point_votes.astype(np.float32)
        ret_dict['vote_label_mask'] = point_votes_mask.astype(np.int64)
        ret_dict['scan_idx'] = np.array(idx).astype(np.int64)
        ret_dict['supervised_mask'] = np.array(1).astype(np.int64)

        scene_label = np.zeros(DC.num_class)
        unique_class_ind = list(set(class_ind))
        for ind in unique_class_ind:
            scene_label[int(ind)] = 1
        ret_dict['scene_label'] = scene_label.astype(np.float32)

        ret_dict['ema_point_clouds'] = ema_point_cloud.astype(np.float32)
        ret_dict['flip_x_axis'] = np.array(flip_x_axis).astype(np.int64)
        ret_dict['flip_y_axis'] = np.array(flip_y_axis).astype(np.int64)
        ret_dict['rot_mat'] = rot_mat.astype(np.float32)
        ret_dict['scale'] = np.array(scale_ratio).astype(np.float32)

        ret_dict['flip_x_axis_ema'] = np.array(flip_x_axis_ema).astype(
            np.int64)  #2021.2.28
        ret_dict['flip_y_axis_ema'] = np.array(flip_y_axis_ema).astype(
            np.int64)  #2021.2.28

        return ret_dict
    def __getitem__(self, idx):
        """
        Returns a dict with following keys:
            point_clouds: (N,3+C)
            center_label: (MAX_NUM_OBJ,3) for GT box center XYZ
            heading_class_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_HEADING_BIN-1
            heading_residual_label: (MAX_NUM_OBJ,)
            size_classe_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_SIZE_CLUSTER
            size_residual_label: (MAX_NUM_OBJ,3)
            sem_cls_label: (MAX_NUM_OBJ,) semantic class index
            box_label_mask: (MAX_NUM_OBJ) as 0/1 with 1 indicating a unique box
            vote_label: (N,9) with votes XYZ (3 votes: X1Y1Z1, X2Y2Z2, X3Y3Z3)
                if there is only one vote than X1==X2==X3 etc.
            vote_label_mask: (N,) with 0/1 with 1 indicating the point
                is in one of the object's OBB.
            scan_idx: int scan index in scan_names list
            max_gt_bboxes: unused
        """
        scan_name = self.scan_names[idx]
        point_cloud = np.load(
            os.path.join(self.data_path, scan_name) + '_pc.npz')['pc']  # Nx6
        bboxes = np.load(
            os.path.join(self.data_path, scan_name) + '_bbox.npy')  # K,8
        point_votes = np.load(
            os.path.join(self.data_path, scan_name) +
            '_votes.npz')['point_votes']  # Nx10
        bbox2ds = np.load(
            os.path.join(self.data_path, scan_name) + '_bbox2d.npy')
        bbox2d_probs = np.load(
            os.path.join(self.data_path, scan_name) + '_bbox2d_prob.npy')
        calib_Rtilt = np.load(
            os.path.join(self.data_path, scan_name) + '_calib_Rtilt.npy')
        calib_K = np.load(
            os.path.join(self.data_path, scan_name) + '_calib_K.npy')

        if self.use_color and self.use_box2d:
            raise NotImplemented(
                'color and 2d bounding box at the same time is not implemented'
            )
        if not self.use_color:
            #point_cloud = point_cloud[:,0:3]
            point_cloud = get_box2d_feature(point_cloud, bbox2ds, bbox2d_probs,
                                            calib_Rtilt, calib_K)
        else:
            point_cloud = point_cloud[:, 0:6]
            point_cloud[:, 3:] = (point_cloud[:, 3:] - MEAN_COLOR_RGB)

        if self.use_height:
            floor_height = np.percentile(point_cloud[:, 2], 0.99)
            height = point_cloud[:, 2] - floor_height
            point_cloud = np.concatenate(
                [point_cloud, np.expand_dims(height, 1)], 1)  # (N,4) or (N,7)

        # ------------------------------- DATA AUGMENTATION ------------------------------
        if self.augment:
            if np.random.random() > 0.5:
                # Flipping along the YZ plane
                point_cloud[:, 0] = -1 * point_cloud[:, 0]
                bboxes[:, 0] = -1 * bboxes[:, 0]
                bboxes[:, 6] = np.pi - bboxes[:, 6]
                point_votes[:, [1, 4, 7]] = -1 * point_votes[:, [1, 4, 7]]

            # Rotation along up-axis/Z-axis
            rot_angle = (np.random.random() * np.pi /
                         3) - np.pi / 6  # -30 ~ +30 degree
            rot_mat = sunrgbd_utils.rotz(rot_angle)

            point_votes_end = np.zeros_like(point_votes)
            point_votes_end[:, 1:4] = np.dot(
                point_cloud[:, 0:3] + point_votes[:, 1:4],
                np.transpose(rot_mat))
            point_votes_end[:, 4:7] = np.dot(
                point_cloud[:, 0:3] + point_votes[:, 4:7],
                np.transpose(rot_mat))
            point_votes_end[:, 7:10] = np.dot(
                point_cloud[:, 0:3] + point_votes[:, 7:10],
                np.transpose(rot_mat))

            point_cloud[:, 0:3] = np.dot(point_cloud[:, 0:3],
                                         np.transpose(rot_mat))
            bboxes[:, 0:3] = np.dot(bboxes[:, 0:3], np.transpose(rot_mat))
            bboxes[:, 6] -= rot_angle
            point_votes[:, 1:4] = point_votes_end[:, 1:4] - point_cloud[:, 0:3]
            point_votes[:, 4:7] = point_votes_end[:, 4:7] - point_cloud[:, 0:3]
            point_votes[:,
                        7:10] = point_votes_end[:, 7:10] - point_cloud[:, 0:3]

            # Augment RGB color
            if self.use_color:
                rgb_color = point_cloud[:, 3:6] + MEAN_COLOR_RGB
                rgb_color *= (1 + 0.4 * np.random.random(3) - 0.2
                              )  # brightness change for each channel
                rgb_color += (0.1 * np.random.random(3) - 0.05
                              )  # color shift for each channel
                rgb_color += np.expand_dims(
                    (0.05 * np.random.random(point_cloud.shape[0]) - 0.025),
                    -1)  # jittering on each pixel
                rgb_color = np.clip(rgb_color, 0, 1)
                # randomly drop out 30% of the points' colors
                rgb_color *= np.expand_dims(
                    np.random.random(point_cloud.shape[0]) > 0.3, -1)
                point_cloud[:, 3:6] = rgb_color - MEAN_COLOR_RGB

            # Augment point cloud scale: 0.85x-1.15x
            scale_ratio = np.random.random() * 0.3 + 0.85
            scale_ratio = np.expand_dims(np.tile(scale_ratio, 3), 0)
            point_cloud[:, 0:3] *= scale_ratio
            bboxes[:, 0:3] *= scale_ratio
            bboxes[:, 3:6] *= scale_ratio
            point_votes[:, 1:4] *= scale_ratio
            point_votes[:, 4:7] *= scale_ratio
            point_votes[:, 7:10] *= scale_ratio
            if self.use_height:
                point_cloud[:, -1] *= scale_ratio[0, 0]

        # ------------------------------- LABELS ------------------------------
        box3d_centers = np.zeros((MAX_NUM_OBJ, 3))
        box3d_sizes = np.zeros((MAX_NUM_OBJ, 3))
        angle_classes = np.zeros((MAX_NUM_OBJ, ))
        angle_residuals = np.zeros((MAX_NUM_OBJ, ))
        size_classes = np.zeros((MAX_NUM_OBJ, ))
        size_residuals = np.zeros((MAX_NUM_OBJ, 3))
        label_mask = np.zeros((MAX_NUM_OBJ))
        label_mask[0:bboxes.shape[0]] = 1
        max_bboxes = np.zeros((MAX_NUM_OBJ, 8))
        max_bboxes[0:bboxes.shape[0], :] = bboxes

        for i in range(bboxes.shape[0]):
            bbox = bboxes[i]
            semantic_class = bbox[7]
            box3d_center = bbox[0:3]
            angle_class, angle_residual = DC.angle2class(bbox[6])
            # NOTE: The mean size stored in size2class is of full length of box edges,
            # while in sunrgbd_data.py data dumping we dumped *half* length l,w,h.. so have to time it by 2 here
            box3d_size = bbox[3:6] * 2
            size_class, size_residual = DC.size2class(
                box3d_size, DC.class2type[semantic_class])
            box3d_centers[i, :] = box3d_center
            angle_classes[i] = angle_class
            angle_residuals[i] = angle_residual
            size_classes[i] = size_class
            size_residuals[i] = size_residual
            box3d_sizes[i, :] = box3d_size

        target_bboxes_mask = label_mask
        target_bboxes = np.zeros((MAX_NUM_OBJ, 6))
        for i in range(bboxes.shape[0]):
            bbox = bboxes[i]
            corners_3d = sunrgbd_utils.my_compute_box_3d(
                bbox[0:3], bbox[3:6], bbox[6])
            # compute axis aligned box
            xmin = np.min(corners_3d[:, 0])
            ymin = np.min(corners_3d[:, 1])
            zmin = np.min(corners_3d[:, 2])
            xmax = np.max(corners_3d[:, 0])
            ymax = np.max(corners_3d[:, 1])
            zmax = np.max(corners_3d[:, 2])
            target_bbox = np.array([(xmin + xmax) / 2, (ymin + ymax) / 2,
                                    (zmin + zmax) / 2, xmax - xmin,
                                    ymax - ymin, zmax - zmin])
            target_bboxes[i, :] = target_bbox

        point_cloud, choices = pc_util.random_sampling(point_cloud,
                                                       self.num_points,
                                                       return_choices=True)
        point_votes_mask = point_votes[choices, 0]
        point_votes = point_votes[choices, 1:]

        ret_dict = {}
        ret_dict['point_clouds'] = point_cloud.astype(np.float32)
        ret_dict['center_label'] = target_bboxes.astype(np.float32)[:, 0:3]
        ret_dict['heading_class_label'] = angle_classes.astype(np.int64)
        ret_dict['heading_residual_label'] = angle_residuals.astype(np.float32)
        ret_dict['size_class_label'] = size_classes.astype(np.int64)
        ret_dict['size_residual_label'] = size_residuals.astype(np.float32)
        target_bboxes_semcls = np.zeros((MAX_NUM_OBJ))
        target_bboxes_semcls[0:bboxes.shape[0]] = bboxes[:, -1]  # from 0 to 9
        ret_dict['sem_cls_label'] = target_bboxes_semcls.astype(np.int64)
        ret_dict['box_label_mask'] = target_bboxes_mask.astype(np.float32)
        ret_dict['vote_label'] = point_votes.astype(np.float32)
        ret_dict['vote_label_mask'] = point_votes_mask.astype(np.int64)
        ret_dict['scan_idx'] = np.array(idx).astype(np.int64)
        ret_dict['max_gt_bboxes'] = max_bboxes
        # new for box2d
        #ret_dict['bbox2ds'] = bbox2ds.astype(np.float32)
        #ret_dict['bbox2d_probs'] = bbox2d_probs.astype(np.float32)
        #ret_dict['calib_Rtilt'] = calib_Rtilt.astype(np.float32)
        #ret_dict['calib_K'] = calib_K.astype(np.float32)
        return ret_dict
Exemplo n.º 25
0
def run_votenet_full_tour(point_cloud):
    point_cloud = point_cloud[0].cpu().clone().numpy()

    min_bound = np.min(point_cloud[:,:3], axis=0)
    max_bound = np.max(point_cloud[:,:3], axis=0)

    keys = ['center',
            'heading_scores',
            'heading_residuals',
            'heading_residuals_normalized',
            'size_scores',
            'size_residuals',
            'size_residuals_normalized',
            'sem_cls_scores',
            'objectness_scores',

            'seed_xyz',
            'vote_xyz',
            'seed_inds',
            'aggregated_vote_xyz',
            'aggregated_vote_inds',
           ]

    end_points = {}
    first_ite = True
    cpt = 0
    for x in np.arange(min_bound[0], max_bound[0], 2.0):
        for y in np.arange(min_bound[1], max_bound[1], 2.0):
            for z in np.arange(min_bound[2], max_bound[2], 1.5):

                crop_mask = (point_cloud[:,0] > x) *\
                            (point_cloud[:,0] <=x+4.0) *\
                            (point_cloud[:,1] > y) *\
                            (point_cloud[:,1] <=y+4.0) *\
                            (point_cloud[:,2] > z) *\
                            (point_cloud[:,2] <=z+3.0)

                crop_point_cloud = point_cloud[crop_mask,:].copy()

                if crop_point_cloud.shape[0] < 30000:
                    continue

                cpt+=1

                debug_pcd = o3d.geometry.PointCloud()
                debug_point_cloud = crop_point_cloud[:,:3].copy()
                debug_pcd.points = o3d.utility.Vector3dVector(debug_point_cloud)
                o3d.io.write_point_cloud(f'debug_dump_pc/debug_pc_{cpt}.ply',
                                         debug_pcd)

                # center data
                minbound = np.min(crop_point_cloud[:,:3], axis=0)
                maxbound = np.max(crop_point_cloud[:,:3], axis=0)
                mid = (minbound + maxbound) / 2.0
                crop_point_cloud[:,:3] -= mid

                crop_point_cloud_sampled, choices=pc_util.random_sampling(crop_point_cloud.copy(),
                                                                          NUM_POINT,
                                                                          return_choices=True)


                inputs = {'point_clouds':torch.FloatTensor(crop_point_cloud_sampled).to(device).unsqueeze(0)}
                with torch.no_grad():
                    tmp_end_points = net(inputs)

                    mid = torch.FloatTensor(mid).unsqueeze(0).unsqueeze(0)
                    mid = mid.repeat(1,tmp_end_points['center'].shape[1],1)
                    mid = mid.to(device)
                    tmp_end_points['center']+=mid

                    if first_ite:
                        for k in keys:
                            end_points[k] = tmp_end_points[k].detach().clone()
                        first_ite=False
                    else:
                        for k in keys:
                            end_points[k] = torch.cat((end_points[k],
                                                       tmp_end_points[k].detach().clone()),
                                                      dim=1)
    print('numer of crops PC : ', cpt)
    return end_points
Exemplo n.º 26
0
    def __getitem__(self, idx):
        """
         Returns a dict with following keys:
             point_clouds: (N,3+C)
             center_label: (MAX_NUM_OBJ,3) for GT box center XYZ
             heading_class_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_HEADING_BIN-1
             heading_residual_label: (MAX_NUM_OBJ,)
             size_classe_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_SIZE_CLUSTER
             size_residual_label: (MAX_NUM_OBJ,3)
             sem_cls_label: (MAX_NUM_OBJ,) semantic class index
             box_label_mask: (MAX_NUM_OBJ) as 0/1 with 1 indicating a unique box
             vote_label: (N,9) with votes XYZ (3 votes: X1Y1Z1, X2Y2Z2, X3Y3Z3)
                 if there is only one vote than X1==X2==X3 etc.
             vote_label_mask: (N,) with 0/1 with 1 indicating the point
                 is in one of the object's OBB.
             scan_idx: int scan index in scan_names list
         """
        scan_name = self.scan_names[idx]
        point_cloud = np.load(
            os.path.join(self.data_path, scan_name) + '_pc.npz')['pc']  # Nx6
        bboxes = np.load(
            os.path.join(self.data_path, scan_name) + '_bbox.npy')  # K,8
        point_votes = np.load(
            os.path.join(self.data_path, scan_name) +
            '_votes.npz')['point_votes']  # Nx10

        if not self.use_color:
            point_cloud = point_cloud[:, 0:3]
        else:
            point_cloud = point_cloud[:, 0:6]
            point_cloud[:, 3:] = (point_cloud[:, 3:] - MEAN_COLOR_RGB)

        if self.use_height:
            floor_height = np.percentile(point_cloud[:, 2], 0.99)
            height = point_cloud[:, 2] - floor_height
            point_cloud = np.concatenate(
                [point_cloud, np.expand_dims(height, 1)], 1)  # (N,4) or (N,7)

        #ema_point_cloud = pc_util.random_sampling(point_cloud, self.num_points, return_choices=False) #2021.2.28
        raw_points = point_cloud.copy()  #2021.2.28
        # ------------------------------- DATA AUGMENTATION ------------------------------
        flip_x_axis = 0
        flip_y_axis = 0
        flip_x_axis_ema = 0  #2021.2.28
        flip_y_axis_ema = 0  #2021.2.28
        rot_mat = np.identity(3)
        scale_ratio = np.ones((1, 3))
        if self.augment:
            if np.random.random() > 0.5:
                # Flipping along the YZ plane
                flip_x_axis = 1
                point_cloud[:, 0] = -1 * point_cloud[:, 0]
                bboxes[:, 0] = -1 * bboxes[:, 0]
                bboxes[:, 6] = np.pi - bboxes[:, 6]
                point_votes[:, [1, 4, 7]] = -1 * point_votes[:, [1, 4, 7]]

            # Rotation along up-axis/Z-axis
            #TODO: set different degree range (keep consistent with scannet?)
            rot_angle = (np.random.random() * np.pi /
                         3) - np.pi / 6  # -30 ~ +30 degree
            rot_mat = sunrgbd_utils.rotz(rot_angle)

            point_votes_end = np.zeros_like(point_votes)
            point_votes_end[:, 1:4] = np.dot(
                point_cloud[:, 0:3] + point_votes[:, 1:4],
                np.transpose(rot_mat))
            point_votes_end[:, 4:7] = np.dot(
                point_cloud[:, 0:3] + point_votes[:, 4:7],
                np.transpose(rot_mat))
            point_votes_end[:, 7:10] = np.dot(
                point_cloud[:, 0:3] + point_votes[:, 7:10],
                np.transpose(rot_mat))

            point_cloud[:, 0:3] = np.dot(point_cloud[:, 0:3],
                                         np.transpose(rot_mat))
            bboxes[:, 0:3] = np.dot(bboxes[:, 0:3], np.transpose(rot_mat))
            bboxes[:, 6] -= rot_angle
            point_votes[:, 1:4] = point_votes_end[:, 1:4] - point_cloud[:, 0:3]
            point_votes[:, 4:7] = point_votes_end[:, 4:7] - point_cloud[:, 0:3]
            point_votes[:,
                        7:10] = point_votes_end[:, 7:10] - point_cloud[:, 0:3]

            # TODO: turn on scale augmentation (keep consistent in scannet?)
            # Augment point cloud scale: 0.85x-1.15x
            scale_ratio = np.random.random() * 0.3 + 0.85
            scale_ratio = np.expand_dims(np.tile(scale_ratio, 3), 0)
            point_cloud[:, 0:3] *= scale_ratio
            bboxes[:, 0:3] *= scale_ratio
            bboxes[:, 3:6] *= scale_ratio
            point_votes[:, 1:4] *= scale_ratio
            point_votes[:, 4:7] *= scale_ratio
            point_votes[:, 7:10] *= scale_ratio
            if self.use_height:
                point_cloud[:, -1] *= scale_ratio[0, 0]

        # ------------------------------- LABELS ------------------------------
        target_bboxes = np.zeros((MAX_NUM_OBJ, 6))
        target_bboxes_mask = np.zeros((MAX_NUM_OBJ))
        angle_classes = np.zeros((MAX_NUM_OBJ, ))
        angle_residuals = np.zeros((MAX_NUM_OBJ, ))
        size_classes = np.zeros((MAX_NUM_OBJ, ))
        size_residuals = np.zeros((MAX_NUM_OBJ, 3))
        target_bboxes_semcls = np.zeros((MAX_NUM_OBJ))

        target_bboxes_mask[0:bboxes.shape[0]] = 1
        target_bboxes[0:bboxes.shape[0], :] = bboxes[:, 0:6]

        for i in range(bboxes.shape[0]):
            bbox = bboxes[i]
            semantic_class = bbox[7]
            angle_class, angle_residual = DC.angle2class(bbox[6])
            # NOTE: The mean size stored in size2class is of full length of box edges,
            # while in sunrgbd_data.py data dumping we dumped *half* length l,w,h.. so have to time it by 2 here
            box3d_size = bbox[3:6] * 2
            size_class, size_residual = DC.size2class(
                box3d_size, DC.class2type[semantic_class])
            angle_classes[i] = angle_class
            angle_residuals[i] = angle_residual
            size_classes[i] = size_class
            size_residuals[i] = size_residual
            target_bboxes_semcls[i] = semantic_class

        point_cloud, choices = pc_util.random_sampling(point_cloud,
                                                       self.num_points,
                                                       return_choices=True)
        point_votes_mask = point_votes[choices, 0]
        point_votes = point_votes[choices, 1:]
        ema_point_cloud = raw_points[choices]  #2021.2.28
        if self.augment:  #2021.2.28
            if np.random.random() > 0.5:  #2021.2.28
                # Flipping along the YZ plane
                flip_x_axis_ema = 1
                ema_point_cloud[:, 0] = -1 * ema_point_cloud[:, 0]

        ret_dict = {}
        ret_dict['point_clouds'] = point_cloud.astype(np.float32)
        ret_dict['center_label'] = target_bboxes.astype(np.float32)[:, 0:3]
        ret_dict['heading_class_label'] = angle_classes.astype(np.int64)
        ret_dict['heading_residual_label'] = angle_residuals.astype(np.float32)
        ret_dict['size_class_label'] = size_classes.astype(np.int64)
        ret_dict['size_residual_label'] = size_residuals.astype(np.float32)
        ret_dict['sem_cls_label'] = target_bboxes_semcls.astype(np.int64)
        ret_dict['box_label_mask'] = target_bboxes_mask.astype(np.float32)
        ret_dict['vote_label'] = point_votes.astype(np.float32)
        ret_dict['vote_label_mask'] = point_votes_mask.astype(np.int64)
        ret_dict['scan_idx'] = np.array(idx).astype(np.int64)

        ret_dict['supervised_mask'] = np.array(1).astype(np.int64)
        ret_dict['ema_point_clouds'] = ema_point_cloud.astype(np.float32)
        ret_dict['flip_x_axis'] = np.array(flip_x_axis).astype(np.int64)
        ret_dict['flip_y_axis'] = np.array(flip_y_axis).astype(np.int64)
        ret_dict['rot_mat'] = rot_mat.astype(np.float32)
        ret_dict['scale'] = np.array(scale_ratio).astype(np.float32)

        ret_dict['flip_x_axis_ema'] = np.array(flip_x_axis_ema).astype(
            np.int64)  #2021.2.28
        ret_dict['flip_y_axis_ema'] = np.array(flip_y_axis_ema).astype(
            np.int64)  #2021.2.28
        return ret_dict
    def __getitem__(self, idx):
        scan_name = self.scan_names[idx]
        mesh_vertices = np.load(
            os.path.join(self.data_path, scan_name) + '_vert.npy')
        instance_labels = np.load(
            os.path.join(self.data_path, scan_name) + '_ins_label.npy')
        semantic_labels = np.load(
            os.path.join(self.data_path, scan_name) + '_sem_label.npy').astype(
                np.int32) - 1
        bboxes = np.load(os.path.join(self.data_path, scan_name) + '_bbox.npy')

        if not self.use_color:
            point_cloud = mesh_vertices[:, 0:3]  # do not use color for now
            pcl_color = mesh_vertices[:, 3:6]
        else:
            point_cloud = mesh_vertices[:, 0:6]
            point_cloud[:, 3:] = (point_cloud[:, 3:] - MEAN_COLOR_RGB) / 256.0

        if self.use_height:
            floor_height = np.percentile(point_cloud[:, 2], 0.99)
            height = point_cloud[:, 2] - floor_height
            point_cloud = np.concatenate(
                [point_cloud, np.expand_dims(height, 1)], 1)

            # ------------------------------- LABELS ------------------------------
        target_bboxes = np.zeros((MAX_NUM_OBJ, 6))
        target_bboxes_mask = np.zeros((MAX_NUM_OBJ))
        angle_classes = np.zeros((MAX_NUM_OBJ, ))
        angle_residuals = np.zeros((MAX_NUM_OBJ, ))
        size_classes = np.zeros((MAX_NUM_OBJ, ))
        size_residuals = np.zeros((MAX_NUM_OBJ, 3))

        point_cloud, choices = pc_util.random_sampling(point_cloud,
                                                       self.num_points,
                                                       return_choices=True)
        # instance_labels = instance_labels[choices]
        # semantic_labels = semantic_labels[choices]
        #
        # pcl_color = pcl_color[choices]

        # target_bboxes_mask[0:instance_bboxes.shape[0]] = 1
        # target_bboxes[0:instance_bboxes.shape[0], :] = instance_bboxes[:, 0:6]

        # ------------------------------- DATA AUGMENTATION ------------------------------
        if self.augment:
            pass
            # if np.random.random() > 0.5:
            #     # Flipping along the YZ plane
            #     point_cloud[:, 0] = -1 * point_cloud[:, 0]
            #     target_bboxes[:, 0] = -1 * target_bboxes[:, 0]
            #
            # if np.random.random() > 0.5:
            #     # Flipping along the XZ plane
            #     point_cloud[:, 1] = -1 * point_cloud[:, 1]
            #     target_bboxes[:, 1] = -1 * target_bboxes[:, 1]
            #
            #     # Rotation along up-axis/Z-axis
            # rot_angle = (np.random.random() * np.pi / 18) - np.pi / 36  # -5 ~ +5 degree
            # rot_mat = pc_util.rotz(rot_angle)
            # point_cloud[:, 0:3] = np.dot(point_cloud[:, 0:3], np.transpose(rot_mat))
            # target_bboxes = rotate_aligned_boxes(target_bboxes, rot_mat)

        # ------------------------------- LABELS ------------------------------
        box3d_centers = np.zeros((MAX_NUM_OBJ, 3))
        box3d_sizes = np.zeros((MAX_NUM_OBJ, 3))
        angle_classes = np.zeros((MAX_NUM_OBJ, ))
        angle_residuals = np.zeros((MAX_NUM_OBJ, ))
        size_classes = np.zeros((MAX_NUM_OBJ, ))
        size_residuals = np.zeros((MAX_NUM_OBJ, 3))
        label_mask = np.zeros((MAX_NUM_OBJ))
        label_mask[0:bboxes.shape[0]] = 1

        # compute votes *AFTER* augmentation
        # generate votes
        # Note: since there's no map between bbox instance labels and
        # pc instance_labels (it had been filtered
        # in the data preparation step) we'll compute the instance bbox
        # from the points sharing the same instance label.
        point_votes = np.zeros([self.num_points, 3])
        point_votes_mask = np.zeros(self.num_points)
        for i_instance in np.unique(instance_labels):
            # find all points belong to that instance
            ind = np.where(instance_labels == i_instance)[0]
            # find the semantic label
            if semantic_labels[ind[0]] in set(DC.type2class.values()):
                x = point_cloud[ind, :3]
                center = 0.5 * (x.min(0) + x.max(0))
                point_votes[ind, :] = center - x
                point_votes_mask[ind] = 1.0
        point_votes = np.tile(point_votes, (1, 3))  # make 3 votes identical

        for i in range(bboxes.shape[0]):
            bbox = bboxes[i]
            semantic_class = bbox[7]
            box3d_center = bbox[0:3]
            angle_class, angle_residual = DC.angle2class(bbox[6])
            box3d_size = bbox[3:6]
            size_class, size_residual = DC.size2class(
                box3d_size, DC.class2type[semantic_class])
            box3d_centers[i, :] = box3d_center
            angle_classes[i] = angle_class
            angle_residuals[i] = angle_residual
            size_classes[i] = size_class
            size_residuals[i] = size_residual
            box3d_sizes[i, :] = box3d_size

        target_bboxes_mask = label_mask
        target_bboxes = np.zeros((MAX_NUM_OBJ, 6))
        for i in range(bboxes.shape[0]):
            bbox = bboxes[i]
            corners_3d = sunrgbd_utils.my_compute_box_3d(
                bbox[0:3], bbox[3:6], bbox[6])
            # compute axis aligned box
            xmin = np.min(corners_3d[:, 0])
            ymin = np.min(corners_3d[:, 1])
            zmin = np.min(corners_3d[:, 2])
            xmax = np.max(corners_3d[:, 0])
            ymax = np.max(corners_3d[:, 1])
            zmax = np.max(corners_3d[:, 2])
            target_bbox = np.array([(xmin + xmax) / 2, (ymin + ymax) / 2,
                                    (zmin + zmax) / 2, xmax - xmin,
                                    ymax - ymin, zmax - zmin])
            target_bboxes[i, :] = target_bbox

        point_cloud, choices = pc_util.random_sampling(point_cloud,
                                                       self.num_points,
                                                       return_choices=True)
        point_votes = point_votes[choices]
        point_votes_mask = point_votes_mask[choices]
        #point_votes_mask = point_votes[choices,0]
        #point_votes = point_votes[choices,1:]

        ret_dict = {}
        ret_dict['point_clouds'] = point_cloud.astype(np.float32)
        ret_dict['center_label'] = target_bboxes.astype(np.float32)[:, 0:3]
        ret_dict['heading_class_label'] = angle_classes.astype(np.int64)
        ret_dict['heading_residual_label'] = angle_residuals.astype(np.float32)
        ret_dict['size_class_label'] = size_classes.astype(np.int64)
        ret_dict['size_residual_label'] = size_residuals.astype(np.float32)
        target_bboxes_semcls = np.zeros((MAX_NUM_OBJ))
        target_bboxes_semcls[0:bboxes.shape[0]] = bboxes[:, -1]  # from 0 to 9
        ret_dict['sem_cls_label'] = target_bboxes_semcls.astype(np.int64)
        ret_dict['box_label_mask'] = target_bboxes_mask.astype(np.float32)
        ret_dict['vote_label'] = point_votes.astype(np.float32)
        ret_dict['vote_label_mask'] = point_votes_mask.astype(np.int64)
        ret_dict['scan_idx'] = np.array(idx).astype(np.int64)
        return ret_dict