Пример #1
0
def get_roi_ptcloud(inputs,
                    batch_pred_boxes_params,
                    enlarge_ratio=1.2,
                    num_point_roi=512,
                    min_num_point=100):
    """ Generate ROI point cloud w.r.t predicted box

    :param inputs: dict {'point_clouds'}
                   input point clouds of the whole scene
           batch_pred_boxes_params: (B, num_proposals, 7), numpy array
                   predicted bounding box from detector
           enlarge_ratio: scalar
                   the value to enlarge the predicted box size
           num_point_roi: scalar
                   the number of points to be sampled in each enlarged box

    :return:
        batch_pc_roi: (B, num_proposals, num_sampled_points, input_pc_features) numpy array
        nonempty_roi_mask: (B, num_proposals) numpy array
    """
    batch_pc = inputs['point_clouds'].detach().cpu().numpy()[:, :, :]  # B,N,C
    bsize = batch_pred_boxes_params.shape[0]
    K = batch_pred_boxes_params.shape[1]
    batch_pc_roi = np.zeros((bsize, K, num_point_roi, batch_pc.shape[2]),
                            dtype=np.float32)
    nonempty_roi_mask = np.ones((bsize, K))

    for i in range(bsize):
        pc = batch_pc[i, :, :]  # (N,C)
        for j in range(K):
            box_params = batch_pred_boxes_params[i, j, :]  # (7)
            center = box_params[0:3]
            center_upright_camera = flip_axis_to_camera(
                center)  #.reshape(1,-1))[0]
            box_size = box_params[3:6] * enlarge_ratio  #enlarge the box size
            heading_angle = box_params[6]
            box3d = get_3d_box(box_size, heading_angle, center_upright_camera)
            box3d = flip_axis_to_depth(box3d)
            pc_in_box, inds = extract_pc_in_box3d(pc, box3d)
            # print('The number of points in roi box is ', pc_in_box.shape[0])
            if len(pc_in_box) >= min_num_point:
                batch_pc_roi[i, j, :, :] = random_sampling(
                    pc_in_box, num_point_roi)
            else:
                nonempty_roi_mask[i, j] = 0
    return batch_pc_roi, nonempty_roi_mask
Пример #2
0
def extract_sunrgbd_data(idx_filename,
                         split,
                         output_folder,
                         num_point=20000,
                         type_whitelist=DEFAULT_TYPE_WHITELIST,
                         save_votes=False,
                         use_v1=False,
                         skip_empty_scene=True):
    """ Extract scene point clouds and 
    bounding boxes (centroids, box sizes, heading angles, semantic classes).
    Dumped point clouds and boxes are in upright depth coord.

    Args:
        idx_filename: a TXT file where each line is an int number (index)
        split: training or testing
        save_votes: whether to compute and save Ground truth votes.
        use_v1: use the SUN RGB-D V1 data
        skip_empty_scene: if True, skip scenes that contain no object (no objet in whitelist)

    Dumps:
        <id>_pc.npz of (N,6) where N is for number of subsampled points and 6 is
            for XYZ and RGB (in 0~1) in upright depth coord
        <id>_bbox.npy of (K,8) where K is the number of objects, 8 is for
            centroids (cx,cy,cz), dimension (l,w,h), heanding_angle and semantic_class
        <id>_votes.npz of (N,10) with 0/1 indicating whether the point belongs to an object,
            then three sets of GT votes for up to three objects. If the point is only in one
            object's OBB, then the three GT votes are the same.
    """
    dataset = sunrgbd_object('./sunrgbd_trainval', split, use_v1=use_v1)
    data_idx_list = [int(line.rstrip()) for line in open(idx_filename)]

    if not os.path.exists(output_folder):
        os.mkdir(output_folder)

    for data_idx in data_idx_list:
        print('------------- ', data_idx)
        if data_idx == 479: continue
        objects = dataset.get_label_objects(data_idx)

        # Skip scenes with 0 object
        if skip_empty_scene and (len(objects)==0 or \
            len([obj for obj in objects if obj.classname in type_whitelist])==0):
            continue

        object_list = []
        for obj in objects:
            if obj.classname not in type_whitelist: continue
            obb = np.zeros((8))
            obb[0:3] = obj.centroid
            # Note that compared with that in data_viz, we do not time 2 to l,w.h
            # neither do we flip the heading angle
            obb[3:6] = np.array([obj.l, obj.w, obj.h])
            obb[6] = obj.heading_angle
            obb[7] = sunrgbd_utils.type2class[obj.classname]
            object_list.append(obb)
        if len(object_list) == 0:
            obbs = np.zeros((0, 8))
        else:
            obbs = np.vstack(object_list)  # (K,8)

        pc_upright_depth = dataset.get_depth(data_idx)
        pc_upright_depth_subsampled = pc_util.random_sampling(
            pc_upright_depth, num_point)

        np.savez_compressed(os.path.join(output_folder,
                                         '%06d_pc.npz' % (data_idx)),
                            pc=pc_upright_depth_subsampled)
        np.save(os.path.join(output_folder, '%06d_bbox.npy' % (data_idx)),
                obbs)

        if save_votes:
            N = pc_upright_depth_subsampled.shape[0]
            point_votes = np.zeros((N, 10))  # 3 votes and 1 vote mask
            point_vote_idx = np.zeros(
                (N)).astype(np.int32)  # in the range of [0,2]
            indices = np.arange(N)
            for obj in objects:
                if obj.classname not in type_whitelist: continue
                try:
                    # Find all points in this object's OBB
                    box3d_pts_3d = sunrgbd_utils.my_compute_box_3d(
                        obj.centroid, np.array([obj.l, obj.w, obj.h]),
                        obj.heading_angle)
                    pc_in_box3d,inds = sunrgbd_utils.extract_pc_in_box3d(\
                        pc_upright_depth_subsampled, box3d_pts_3d)
                    # Assign first dimension to indicate it is in an object box
                    point_votes[inds, 0] = 1
                    # Add the votes (all 0 if the point is not in any object's OBB)
                    votes = np.expand_dims(obj.centroid, 0) - pc_in_box3d[:,
                                                                          0:3]
                    sparse_inds = indices[
                        inds]  # turn dense True,False inds to sparse number-wise inds
                    for i in range(len(sparse_inds)):
                        j = sparse_inds[i]
                        point_votes[j,
                                    int(point_vote_idx[j] * 3 +
                                        1):int((point_vote_idx[j] + 1) * 3 +
                                               1)] = votes[i, :]
                        # Populate votes with the fisrt vote
                        if point_vote_idx[j] == 0:
                            point_votes[j, 4:7] = votes[i, :]
                            point_votes[j, 7:10] = votes[i, :]
                    point_vote_idx[inds] = np.minimum(2,
                                                      point_vote_idx[inds] + 1)
                except:
                    print('ERROR ----', data_idx, obj.classname)
            np.savez_compressed(os.path.join(output_folder,
                                             '%06d_votes.npz' % (data_idx)),
                                point_votes=point_votes)
Пример #3
0
def parse_predictions(end_points, config_dict):
    """ Parse predictions to OBB parameters and suppress overlapping boxes
    
    Args:
        end_points: dict
            {point_clouds, center, heading_scores, heading_residuals,
            size_scores, size_residuals, sem_cls_scores}
        config_dict: dict
            {dataset_config, remove_empty_box, use_3d_nms, nms_iou,
            use_old_type_nms, conf_thresh, per_class_proposal}

    Returns:
        batch_pred_map_cls: a list of len == batch size (BS)
            [pred_list_i], i = 0, 1, ..., BS-1
            where pred_list_i = [(pred_sem_cls, box_params, box_score)_j]
            where j = 0, ..., num of valid detections - 1 from sample input i
    """
    pred_center = end_points['center']  # B,num_proposal,3
    pred_heading_class = torch.argmax(end_points['heading_scores'],
                                      2)  # B,num_proposal,3
    pred_heading_residual = torch.gather(
        end_points['heading_residuals'], 2,
        pred_heading_class.unsqueeze(2))  # B,num_proposal,3
    pred_heading_residual.squeeze_(2)
    pred_size_class = torch.argmax(end_points['size_scores'],
                                   -1)  # B,num_proposal
    pred_size_residual = torch.gather(
        end_points['size_residuals'], 2,
        pred_size_class.unsqueeze(-1).unsqueeze(-1).repeat(
            1, 1, 1, 3))  # B,num_proposal,1,3
    pred_size_residual.squeeze_(2)
    pred_sem_cls = torch.argmax(end_points['sem_cls_scores'],
                                -1)  # B,num_proposal
    sem_cls_probs = softmax(end_points['sem_cls_scores'].detach().cpu().numpy(
    ))  # B,num_proposal,10
    pred_sem_cls_prob = np.max(sem_cls_probs, -1)  # B,num_proposal

    num_proposal = pred_center.shape[1]
    # Since we operate in upright_depth coord for points, while util functions
    # assume upright_camera coord.
    bsize = pred_center.shape[0]
    pred_corners_3d_upright_camera = np.zeros((bsize, num_proposal, 8, 3))
    pred_center_upright_camera = flip_axis_to_camera(
        pred_center.detach().cpu().numpy())
    for i in range(bsize):
        for j in range(num_proposal):
            heading_angle = config_dict['dataset_config'].class2angle(\
                pred_heading_class[i,j].detach().cpu().numpy(), pred_heading_residual[i,j].detach().cpu().numpy())
            box_size = config_dict['dataset_config'].class2size(\
                int(pred_size_class[i,j].detach().cpu().numpy()), pred_size_residual[i,j].detach().cpu().numpy())
            corners_3d_upright_camera = new_get_3d_box(
                pred_center_upright_camera[i, j, :], box_size, heading_angle)
            pred_corners_3d_upright_camera[i, j] = corners_3d_upright_camera

    K = pred_center.shape[1]  # K==num_proposal
    nonempty_box_mask = np.ones((bsize, K))

    if config_dict['remove_empty_box']:
        # -------------------------------------
        # Remove predicted boxes without any point within them..
        batch_pc = end_points['point_clouds'].cpu().numpy()[:, :, 0:3]  # B,N,3
        for i in range(bsize):
            pc = batch_pc[i, :, :]  # (N,3)
            for j in range(K):
                box3d = pred_corners_3d_upright_camera[i, j, :, :]  # (8,3)
                box3d = flip_axis_to_depth(box3d)
                pc_in_box, inds = extract_pc_in_box3d(pc, box3d)
                if len(pc_in_box) < 5:
                    nonempty_box_mask[i, j] = 0
        # -------------------------------------

    obj_logits = end_points['objectness_scores'].detach().cpu().numpy()
    obj_prob = softmax(obj_logits)[:, :, 1]  # (B,K)
    if not config_dict['use_3d_nms']:
        # ---------- NMS input: pred_with_prob in (B,K,7) -----------
        pred_mask = np.zeros((bsize, K))
        for i in range(bsize):
            boxes_2d_with_prob = np.zeros((K, 5))
            for j in range(K):
                boxes_2d_with_prob[j, 0] = np.min(
                    pred_corners_3d_upright_camera[i, j, :, 0])
                boxes_2d_with_prob[j, 2] = np.max(
                    pred_corners_3d_upright_camera[i, j, :, 0])
                boxes_2d_with_prob[j, 1] = np.min(
                    pred_corners_3d_upright_camera[i, j, :, 2])
                boxes_2d_with_prob[j, 3] = np.max(
                    pred_corners_3d_upright_camera[i, j, :, 2])
                boxes_2d_with_prob[j, 4] = obj_prob[i, j]
            nonempty_box_inds = np.where(nonempty_box_mask[i, :] == 1)[0]
            pick = nms_2d_faster(
                boxes_2d_with_prob[nonempty_box_mask[i, :] == 1, :],
                config_dict['nms_iou'], config_dict['use_old_type_nms'])
            assert (len(pick) > 0)
            pred_mask[i, nonempty_box_inds[pick]] = 1
        end_points['pred_mask'] = pred_mask
        # ---------- NMS output: pred_mask in (B,K) -----------
    elif config_dict['use_3d_nms'] and (not config_dict['cls_nms']):
        # ---------- NMS input: pred_with_prob in (B,K,7) -----------
        pred_mask = np.zeros((bsize, K))
        for i in range(bsize):
            boxes_3d_with_prob = np.zeros((K, 7))
            for j in range(K):
                boxes_3d_with_prob[j, 0] = np.min(
                    pred_corners_3d_upright_camera[i, j, :, 0])
                boxes_3d_with_prob[j, 1] = np.min(
                    pred_corners_3d_upright_camera[i, j, :, 1])
                boxes_3d_with_prob[j, 2] = np.min(
                    pred_corners_3d_upright_camera[i, j, :, 2])
                boxes_3d_with_prob[j, 3] = np.max(
                    pred_corners_3d_upright_camera[i, j, :, 0])
                boxes_3d_with_prob[j, 4] = np.max(
                    pred_corners_3d_upright_camera[i, j, :, 1])
                boxes_3d_with_prob[j, 5] = np.max(
                    pred_corners_3d_upright_camera[i, j, :, 2])
                boxes_3d_with_prob[j, 6] = obj_prob[i, j]
            nonempty_box_inds = np.where(nonempty_box_mask[i, :] == 1)[0]
            pick = nms_3d_faster(
                boxes_3d_with_prob[nonempty_box_mask[i, :] == 1, :],
                config_dict['nms_iou'], config_dict['use_old_type_nms'])
            assert (len(pick) > 0)
            pred_mask[i, nonempty_box_inds[pick]] = 1
        end_points['pred_mask'] = pred_mask
        # ---------- NMS output: pred_mask in (B,K) -----------
    elif config_dict['use_3d_nms'] and config_dict['cls_nms']:
        # ---------- NMS input: pred_with_prob in (B,K,8) -----------
        pred_mask = np.zeros((bsize, K))
        for i in range(bsize):
            boxes_3d_with_prob = np.zeros((K, 8))
            for j in range(K):
                boxes_3d_with_prob[j, 0] = np.min(
                    pred_corners_3d_upright_camera[i, j, :, 0])
                boxes_3d_with_prob[j, 1] = np.min(
                    pred_corners_3d_upright_camera[i, j, :, 1])
                boxes_3d_with_prob[j, 2] = np.min(
                    pred_corners_3d_upright_camera[i, j, :, 2])
                boxes_3d_with_prob[j, 3] = np.max(
                    pred_corners_3d_upright_camera[i, j, :, 0])
                boxes_3d_with_prob[j, 4] = np.max(
                    pred_corners_3d_upright_camera[i, j, :, 1])
                boxes_3d_with_prob[j, 5] = np.max(
                    pred_corners_3d_upright_camera[i, j, :, 2])
                boxes_3d_with_prob[j, 6] = obj_prob[i, j]
                boxes_3d_with_prob[j, 7] = pred_sem_cls[
                    i,
                    j]  # only suppress if the two boxes are of the same class!!
            nonempty_box_inds = np.where(nonempty_box_mask[i, :] == 1)[0]
            pick = nms_3d_faster_samecls(
                boxes_3d_with_prob[nonempty_box_mask[i, :] == 1, :],
                config_dict['nms_iou'], config_dict['use_old_type_nms'])
            assert (len(pick) > 0)
            pred_mask[i, nonempty_box_inds[pick]] = 1
        end_points['pred_mask'] = pred_mask
        # ---------- NMS output: pred_mask in (B,K) -----------

    batch_pred_map_cls = [
    ]  # a list (len: batch_size) of list (len: num of predictions per sample) of tuples of pred_cls, pred_box and conf (0-1)
    for i in range(bsize):
        if config_dict['per_class_proposal']:
            cur_list = []
            for ii in range(config_dict['dataset_config'].num_class):
                cur_list += [(ii, pred_corners_3d_upright_camera[i,j], sem_cls_probs[i,j,ii]*obj_prob[i,j]) \
                    for j in range(pred_center.shape[1]) if pred_mask[i,j]==1 and obj_prob[i,j]>config_dict['conf_thresh']]
            batch_pred_map_cls.append(cur_list)
        else:
            batch_pred_map_cls.append([(pred_sem_cls[i,j].item(), pred_corners_3d_upright_camera[i,j], obj_prob[i,j]) \
                for j in range(pred_center.shape[1]) if pred_mask[i,j]==1 and obj_prob[i,j]>config_dict['conf_thresh']])
    end_points['batch_pred_map_cls'] = batch_pred_map_cls

    return batch_pred_map_cls
    def __getitem__(self, idx):
        """
        Returns a dict with following keys:
            point_clouds: (N,3+C)
            center_label: (MAX_NUM_OBJ,3) for GT box center XYZ
            heading_class_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_HEADING_BIN-1
            heading_residual_label: (MAX_NUM_OBJ,)
            size_classe_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_SIZE_CLUSTER
            size_residual_label: (MAX_NUM_OBJ,3)
            sem_cls_label: (MAX_NUM_OBJ,) semantic class index
            box_label_mask: (MAX_NUM_OBJ) as 0/1 with 1 indicating a unique box
            vote_label: (N,9) with votes XYZ (3 votes: X1Y1Z1, X2Y2Z2, X3Y3Z3)
                if there is only one vote than X1==X2==X3 etc.
            vote_label_mask: (N,) with 0/1 with 1 indicating the point
                is in one of the object's OBB.
            scan_idx: int scan index in scan_names list
            max_gt_bboxes: unused
        """
        scan_name = self.scan_names[idx]
        point_color_sem = np.load(
            os.path.join(self.data_path, scan_name) + '_pc.npz')['pc']  # Nx6
        bboxes = np.load(
            os.path.join(self.data_path, scan_name) + '_bbox.npy')  # K,8
        point_votes = np.load(
            os.path.join(self.data_path, scan_name) +
            '_votes.npz')['point_votes']  # Nx10

        semantics37 = point_color_sem[:, 6]
        semantics10 = np.array([DC.class37_2_class10[k] for k in semantics37])
        semantics10_multi = [
            DC.class37_2_class10_multi[k] for k in semantics37
        ]
        if not self.use_color:
            point_cloud = point_color_sem[:, 0:3]
        else:
            point_cloud = point_color_sem[:, 0:6]
            point_cloud[:, 3:6] = (point_color_sem[:, 3:6] - MEAN_COLOR_RGB)

        if self.use_height:
            floor_height = np.percentile(point_cloud[:, 2], 0.99)
            height = point_cloud[:, 2] - floor_height
            point_cloud = np.concatenate(
                [point_cloud, np.expand_dims(height, 1)], 1)  # (N,4) or (N,7)

        # ------------------------------- DATA AUGMENTATION ------------------------------
        if self.augment:
            if np.random.random() > 0.5:
                # Flipping along the YZ plane
                point_cloud[:, 0] = -1 * point_cloud[:, 0]
                bboxes[:, 0] = -1 * bboxes[:, 0]
                bboxes[:, 6] = np.pi - bboxes[:, 6]
                point_votes[:, [1, 4, 7]] = -1 * point_votes[:, [1, 4, 7]]

            # Rotation along up-axis/Z-axis
            rot_angle = (np.random.random() * np.pi /
                         3) - np.pi / 6  # -30 ~ +30 degree
            rot_mat = sunrgbd_utils.rotz(rot_angle)

            point_votes_end = np.zeros_like(point_votes)
            point_votes_end[:, 1:4] = np.dot(
                point_cloud[:, 0:3] + point_votes[:, 1:4],
                np.transpose(rot_mat))
            point_votes_end[:, 4:7] = np.dot(
                point_cloud[:, 0:3] + point_votes[:, 4:7],
                np.transpose(rot_mat))
            point_votes_end[:, 7:10] = np.dot(
                point_cloud[:, 0:3] + point_votes[:, 7:10],
                np.transpose(rot_mat))

            point_cloud[:, 0:3] = np.dot(point_cloud[:, 0:3],
                                         np.transpose(rot_mat))
            bboxes[:, 0:3] = np.dot(bboxes[:, 0:3], np.transpose(rot_mat))
            bboxes[:, 6] -= rot_angle
            point_votes[:, 1:4] = point_votes_end[:, 1:4] - point_cloud[:, 0:3]
            point_votes[:, 4:7] = point_votes_end[:, 4:7] - point_cloud[:, 0:3]
            point_votes[:,
                        7:10] = point_votes_end[:, 7:10] - point_cloud[:, 0:3]

            # Augment RGB color
            if self.use_color:
                rgb_color = point_cloud[:, 3:6] + MEAN_COLOR_RGB
                rgb_color *= (1 + 0.4 * np.random.random(3) - 0.2
                              )  # brightness change for each channel
                rgb_color += (0.1 * np.random.random(3) - 0.05
                              )  # color shift for each channel
                rgb_color += np.expand_dims(
                    (0.05 * np.random.random(point_cloud.shape[0]) - 0.025),
                    -1)  # jittering on each pixel
                rgb_color = np.clip(rgb_color, 0, 1)
                # randomly drop out 30% of the points' colors
                rgb_color *= np.expand_dims(
                    np.random.random(point_cloud.shape[0]) > 0.3, -1)
                point_cloud[:, 3:6] = rgb_color - MEAN_COLOR_RGB

            # Augment point cloud scale: 0.85x-1.15x
            scale_ratio = np.random.random() * 0.3 + 0.85
            scale_ratio = np.expand_dims(np.tile(scale_ratio, 3), 0)
            point_cloud[:, 0:3] *= scale_ratio
            bboxes[:, 0:3] *= scale_ratio
            bboxes[:, 3:6] *= scale_ratio
            point_votes[:, 1:4] *= scale_ratio
            point_votes[:, 4:7] *= scale_ratio
            point_votes[:, 7:10] *= scale_ratio
            if self.use_height:
                point_cloud[:, -1] *= scale_ratio[0, 0]

        # ------------------------------- LABELS ------------------------------
        box3d_centers = np.zeros((MAX_NUM_OBJ, 3))
        box3d_sizes = np.zeros((MAX_NUM_OBJ, 3))
        angle_classes = np.zeros((MAX_NUM_OBJ, ))
        angle_residuals = np.zeros((MAX_NUM_OBJ, ))
        size_classes = np.zeros((MAX_NUM_OBJ, ))
        size_residuals = np.zeros((MAX_NUM_OBJ, 3))
        label_mask = np.zeros((MAX_NUM_OBJ))
        label_mask[0:bboxes.shape[0]] = 1
        max_bboxes = np.zeros((MAX_NUM_OBJ, 8))
        max_bboxes[0:bboxes.shape[0], :] = bboxes

        # new items
        box3d_angles = np.zeros((MAX_NUM_OBJ, ))

        point_boundary_mask_z = np.zeros(self.num_points)
        point_boundary_mask_xy = np.zeros(self.num_points)
        point_boundary_offset_z = np.zeros([self.num_points, 3])
        point_boundary_offset_xy = np.zeros([self.num_points, 3])
        point_boundary_sem_z = np.zeros([self.num_points, 3 + 2 + 1])
        point_boundary_sem_xy = np.zeros([self.num_points, 3 + 1 + 1])
        point_line_mask = np.zeros(self.num_points)
        point_line_offset = np.zeros([self.num_points, 3])
        point_line_sem = np.zeros([self.num_points, 3 + 1])

        for i in range(bboxes.shape[0]):
            bbox = bboxes[i]
            semantic_class = bbox[7]
            box3d_center = bbox[0:3]
            angle_class, angle_residual = DC.angle2class(bbox[6])
            # NOTE: The mean size stored in size2class is of full length of box edges,
            # while in sunrgbd_data.py data dumping we dumped *half* length l,w,h.. so have to time it by 2 here
            box3d_size = bbox[3:6] * 2
            size_class, size_residual = DC.size2class(
                box3d_size, DC.class2type[semantic_class])
            box3d_centers[i, :] = box3d_center
            angle_classes[i] = angle_class
            angle_residuals[i] = angle_residual
            size_classes[i] = size_class
            size_residuals[i] = size_residual
            box3d_sizes[i, :] = box3d_size
            box3d_angles[i] = bbox[6]

        target_bboxes_mask = label_mask
        target_bboxes = np.zeros((MAX_NUM_OBJ, 6))
        for i in range(bboxes.shape[0]):
            bbox = bboxes[i]
            corners_3d = sunrgbd_utils.my_compute_box_3d(
                bbox[0:3], bbox[3:6], bbox[6])
            # compute axis aligned box
            xmin = np.min(corners_3d[:, 0])
            ymin = np.min(corners_3d[:, 1])
            zmin = np.min(corners_3d[:, 2])
            xmax = np.max(corners_3d[:, 0])
            ymax = np.max(corners_3d[:, 1])
            zmax = np.max(corners_3d[:, 2])
            target_bbox = np.array([(xmin + xmax) / 2, (ymin + ymax) / 2,
                                    (zmin + zmax) / 2, xmax - xmin,
                                    ymax - ymin, zmax - zmin])
            target_bboxes[i, :] = target_bbox

        point_cloud, choices = pc_util.random_sampling(point_cloud,
                                                       self.num_points,
                                                       return_choices=True)
        semantics37 = semantics37[choices]
        semantics10 = semantics10[choices]
        semantics10_multi = [semantics10_multi[i] for i in choices]
        point_votes_mask = point_votes[choices, 0]
        point_votes = point_votes[choices, 1:]

        # box angle is -pi to pi
        for i in range(bboxes.shape[0]):
            bbox = bboxes[i]
            corners = params2bbox(bbox[:3], 2 * bbox[3:6],
                                  clockwise2counter(bbox[6]))
            # corners_votenet = sunrgbd_utils.my_compute_box_3d(bbox[:3], bbox[3:6], bbox[6])

            try:
                x_all_cls, ind_all_cls = extract_pc_in_box3d(
                    point_cloud, corners)
            except:
                continue
            ind_all_cls = np.where(ind_all_cls)[0]  # T/F to index
            # find point with same semantic as bbox, note semantics is 37 cls in sunrgbd

            # ind = ind_all_cls[np.where(semantics10[ind_all_cls] == bbox[7])[0]]
            ind = []
            for j in ind_all_cls:
                if bbox[7] in semantics10_multi[j]:
                    ind.append(j)
            ind = np.array(ind)

            if ind.shape[0] < NUM_POINT_SEM_THRESHOLD:
                pass
            else:
                x = point_cloud[ind, :3]

                ###Get bb planes and boundary points
                plane_lower_temp = np.array([0, 0, 1, -corners[6, -1]])
                para_points = np.array(
                    [corners[1], corners[3], corners[5], corners[7]])
                newd = np.sum(para_points * plane_lower_temp[:3], 1)
                if check_upright(
                        para_points
                ) and plane_lower_temp[0] + plane_lower_temp[1] < LOWER_THRESH:
                    plane_lower = np.array([0, 0, 1, plane_lower_temp[-1]])
                    plane_upper = np.array([0, 0, 1, -np.mean(newd)])
                else:
                    import pdb
                    pdb.set_trace()
                    print("error with upright")
                if check_z(plane_upper, para_points) == False:
                    import pdb
                    pdb.set_trace()
                ### Get the boundary points here
                #alldist = np.abs(np.sum(point_cloud[:,:3]*plane_lower[:3], 1) + plane_lower[-1])
                alldist = np.abs(
                    np.sum(x * plane_lower[:3], 1) + plane_lower[-1])
                mind = np.min(alldist)
                #[count, val] = np.histogram(alldist, bins=20)
                #mind = val[np.argmax(count)]
                sel = np.abs(alldist - mind) < DIST_THRESH
                #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,0] >= xmin) & (point_cloud[:,0] <= xmax) & (point_cloud[:,1] >= ymin) & (point_cloud[:,1] <= ymax)

                ## Get lower four lines
                line_sel1, line_sel2, line_sel3, line_sel4 = get_linesel(
                    x[sel], corners, 'lower')
                if np.sum(line_sel1) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel1]] = 1.0
                    linecenter = (corners[0] + corners[2]) / 2.0
                    point_line_offset[
                        ind[sel][line_sel1]] = linecenter - x[sel][line_sel1]
                    point_line_sem[ind[sel][line_sel1]] = np.array(
                        [linecenter[0], linecenter[1], linecenter[2], bbox[7]])
                if np.sum(line_sel2) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel2]] = 1.0
                    linecenter = (corners[4] + corners[6]) / 2.0
                    point_line_offset[
                        ind[sel][line_sel2]] = linecenter - x[sel][line_sel2]
                    point_line_sem[ind[sel][line_sel2]] = np.array(
                        [linecenter[0], linecenter[1], linecenter[2], bbox[7]])
                if np.sum(line_sel3) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel3]] = 1.0
                    linecenter = (corners[0] + corners[4]) / 2.0
                    point_line_offset[
                        ind[sel][line_sel3]] = linecenter - x[sel][line_sel3]
                    point_line_sem[ind[sel][line_sel3]] = np.array(
                        [linecenter[0], linecenter[1], linecenter[2], bbox[7]])
                if np.sum(line_sel4) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel4]] = 1.0
                    linecenter = (corners[2] + corners[6]) / 2.0
                    point_line_offset[
                        ind[sel][line_sel4]] = linecenter - x[sel][line_sel4]
                    point_line_sem[ind[sel][line_sel4]] = np.array(
                        [linecenter[0], linecenter[1], linecenter[2], bbox[7]])

                if np.sum(sel) > NUM_POINT and np.var(
                        alldist[sel]) < VAR_THRESH:
                    # center = np.array([(xmin+xmax)/2.0, (ymin+ymax)/2.0, np.mean(x[sel][:,2])])
                    center = (corners[0] + corners[6]) / 2.0
                    center[2] = np.mean(x[sel][:, 2])
                    sel_global = ind[sel]
                    point_boundary_mask_z[sel_global] = 1.0
                    point_boundary_sem_z[sel_global] = np.array([
                        center[0], center[1], center[2],
                        np.linalg.norm(corners[4] - corners[0]),
                        np.linalg.norm(corners[2] - corners[0]), bbox[7]
                    ])
                    point_boundary_offset_z[sel_global] = center - x[sel]
                '''
                ### Check for middle z surfaces
                [count, val] = np.histogram(alldist, bins=20)
                mind_middle = val[np.argmax(count)]
                sel_pre = np.copy(sel)
                sel = np.abs(alldist - mind_middle) < DIST_THRESH
                if np.abs(np.mean(x[sel_pre][:,2]) - np.mean(x[sel][:,2])) > MIND_THRESH:
                    ### Do not use line for middle surfaces
                    if np.sum(sel) > NUM_POINT and np.var(alldist[sel]) < VAR_THRESH:
                        center = (corners[0] + corners[6]) / 2.0
                        center[2] = np.mean(x[sel][:,2])
                        # center = np.array([(xmin+xmax)/2.0, (ymin+ymax)/2.0, np.mean(x[sel][:,2])])
                        sel_global = ind[sel]
                        point_boundary_mask_z[sel_global] = 1.0
                        point_boundary_sem_z[sel_global] = np.array([center[0], center[1], center[2], np.linalg.norm(corners[4] - corners[0]), np.linalg.norm(corners[2] - corners[0]), bbox[7]])
                        point_boundary_offset_z[sel_global] = center - x[sel]
                '''

                ### Get the boundary points here
                alldist = np.abs(
                    np.sum(x * plane_upper[:3], 1) + plane_upper[-1])
                mind = np.min(alldist)
                #[count, val] = np.histogram(alldist, bins=20)
                #mind = val[np.argmax(count)]
                sel = np.abs(alldist - mind) < DIST_THRESH
                #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,0] >= xmin) & (point_cloud[:,0] <= xmax) & (point_cloud[:,1] >= ymin) & (point_cloud[:,1] <= ymax)

                ## Get upper four lines
                line_sel1, line_sel2, line_sel3, line_sel4 = get_linesel(
                    x[sel], corners, 'upper')
                if np.sum(line_sel1) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel1]] = 1.0
                    linecenter = (corners[1] + corners[3]) / 2.0
                    point_line_offset[
                        ind[sel][line_sel1]] = linecenter - x[sel][line_sel1]
                    point_line_sem[ind[sel][line_sel1]] = np.array(
                        [linecenter[0], linecenter[1], linecenter[2], bbox[7]])
                if np.sum(line_sel2) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel2]] = 1.0
                    linecenter = (corners[5] + corners[7]) / 2.0
                    point_line_offset[
                        ind[sel][line_sel2]] = linecenter - x[sel][line_sel2]
                    point_line_sem[ind[sel][line_sel2]] = np.array(
                        [linecenter[0], linecenter[1], linecenter[2], bbox[7]])
                if np.sum(line_sel3) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel3]] = 1.0
                    linecenter = (corners[1] + corners[5]) / 2.0
                    point_line_offset[
                        ind[sel][line_sel3]] = linecenter - x[sel][line_sel3]
                    point_line_sem[ind[sel][line_sel3]] = np.array(
                        [linecenter[0], linecenter[1], linecenter[2], bbox[7]])
                if np.sum(line_sel4) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel4]] = 1.0
                    linecenter = (corners[3] + corners[7]) / 2.0
                    point_line_offset[
                        ind[sel][line_sel4]] = linecenter - x[sel][line_sel4]
                    point_line_sem[ind[sel][line_sel4]] = np.array(
                        [linecenter[0], linecenter[1], linecenter[2], bbox[7]])

                if np.sum(sel) > NUM_POINT and np.var(
                        alldist[sel]) < VAR_THRESH:
                    # center = np.array([(xmin+xmax)/2.0, (ymin+ymax)/2.0, np.mean(x[sel][:,2])])
                    center = (corners[1] + corners[7]) / 2.0
                    center[2] = np.mean(x[sel][:, 2])
                    sel_global = ind[sel]
                    point_boundary_mask_z[sel_global] = 1.0
                    point_boundary_sem_z[sel_global] = np.array([
                        center[0], center[1], center[2],
                        np.linalg.norm(corners[5] - corners[1]),
                        np.linalg.norm(corners[3] - corners[1]), bbox[7]
                    ])
                    point_boundary_offset_z[sel_global] = center - x[sel]

                v1 = corners[3] - corners[2]
                v2 = corners[2] - corners[0]
                cp = np.cross(v1, v2)
                d = -np.dot(cp, corners[0])
                a, b, c = cp
                plane_left_temp = np.array([a, b, c, d])
                para_points = np.array(
                    [corners[4], corners[5], corners[6], corners[7]])
                ### Normalize xy here
                plane_left_temp /= np.linalg.norm(plane_left_temp[:3])
                newd = np.sum(para_points * plane_left_temp[:3], 1)
                if plane_left_temp[2] < LOWER_THRESH:
                    plane_left = plane_left_temp  #np.array([cls,res,tempsign,plane_left_temp[-1]])
                    plane_right = np.array([
                        plane_left_temp[0], plane_left_temp[1],
                        plane_left_temp[2], -np.mean(newd)
                    ])
                else:
                    import pdb
                    pdb.set_trace()
                    print("error with upright")
                ### Get the boundary points here
                alldist = np.abs(
                    np.sum(x * plane_left[:3], 1) + plane_left[-1])
                mind = np.min(alldist)
                #[count, val] = np.histogram(alldist, bins=20)
                #mind = val[np.argmax(count)]
                sel = np.abs(alldist - mind) < DIST_THRESH
                #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,2] >= zmin) & (point_cloud[:,2] <= zmax) & (point_cloud[:,1] >= ymin) & (point_cloud[:,1] <= ymax)
                ## Get upper four lines
                line_sel1, line_sel2 = get_linesel(x[sel], corners, 'left')
                if np.sum(line_sel1) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel1]] = 1.0
                    linecenter = (corners[0] + corners[1]) / 2.0
                    point_line_offset[
                        ind[sel][line_sel1]] = linecenter - x[sel][line_sel1]
                    point_line_sem[ind[sel][line_sel1]] = np.array(
                        [linecenter[0], linecenter[1], linecenter[2], bbox[7]])
                if np.sum(line_sel2) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel2]] = 1.0
                    linecenter = (corners[2] + corners[3]) / 2.0
                    point_line_offset[
                        ind[sel][line_sel2]] = linecenter - x[sel][line_sel2]
                    point_line_sem[ind[sel][line_sel2]] = np.array(
                        [linecenter[0], linecenter[1], linecenter[2], bbox[7]])
                if np.sum(sel) > NUM_POINT and np.var(
                        alldist[sel]) < VAR_THRESH:
                    # center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0])
                    center = np.array([
                        np.mean(x[sel][:, 0]),
                        np.mean(x[sel][:, 1]),
                        (corners[0, 2] + corners[1, 2]) / 2.0
                    ])
                    sel_global = ind[sel]
                    point_boundary_mask_xy[sel_global] = 1.0
                    # point_boundary_sem_xy[sel_global] = np.array([center[0], center[1], center[2], zmax - zmin, np.where(DC.nyu40ids == meta_vertices[ind[0],-1])[0][0]])
                    point_boundary_sem_xy[sel_global] = np.array([
                        center[0], center[1], center[2],
                        corners[1, 2] - corners[0, 2], bbox[7]
                    ])
                    point_boundary_offset_xy[sel_global] = center - x[sel]
                '''
                [count, val] = np.histogram(alldist, bins=20)
                mind_middle = val[np.argmax(count)]
                #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,2] >= zmin) & (point_cloud[:,2] <= zmax) & (point_cloud[:,1] >= ymin) & (point_cloud[:,1] <= ymax)
                ## Get upper four lines
                sel_pre = np.copy(sel)
                sel = np.abs(alldist - mind_middle) < DIST_THRESH
                if np.abs(np.mean(x[sel_pre][:,0]) - np.mean(x[sel][:,0])) > MIND_THRESH:
                    ### Do not use line for middle surfaces
                    if np.sum(sel) > NUM_POINT and np.var(alldist[sel]) < VAR_THRESH:
                        # center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0])
                        center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (corners[0, 2] + corners[1, 2])/2.0])
                        sel_global = ind[sel]
                        point_boundary_mask_xy[sel_global] = 1.0
                        point_boundary_sem_xy[sel_global] = np.array([center[0], center[1], center[2], corners[1, 2] - corners[0, 2], bbox[7]])
                        point_boundary_offset_xy[sel_global] = center - x[sel]
                '''

                ### Get the boundary points here
                alldist = np.abs(
                    np.sum(x * plane_right[:3], 1) + plane_right[-1])
                mind = np.min(alldist)
                #[count, val] = np.histogram(alldist, bins=20)
                #mind = val[np.argmax(count)]
                sel = np.abs(alldist - mind) < DIST_THRESH
                #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,2] >= zmin) & (point_cloud[:,2] <= zmax) & (point_cloud[:,1] >= ymin) & (point_cloud[:,1] <= ymax)
                line_sel1, line_sel2 = get_linesel(x[sel], corners, 'right')
                if np.sum(line_sel1) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel1]] = 1.0
                    linecenter = (corners[4] + corners[5]) / 2.0
                    point_line_offset[
                        ind[sel][line_sel1]] = linecenter - x[sel][line_sel1]
                    point_line_sem[ind[sel][line_sel1]] = np.array(
                        [linecenter[0], linecenter[1], linecenter[2], bbox[7]])
                if np.sum(line_sel2) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel2]] = 1.0
                    linecenter = (corners[6] + corners[7]) / 2.0
                    point_line_offset[
                        ind[sel][line_sel2]] = linecenter - x[sel][line_sel2]
                    point_line_sem[ind[sel][line_sel2]] = np.array(
                        [linecenter[0], linecenter[1], linecenter[2], bbox[7]])
                if np.sum(sel) > NUM_POINT and np.var(
                        alldist[sel]) < VAR_THRESH:
                    # center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0])
                    center = np.array([
                        np.mean(x[sel][:, 0]),
                        np.mean(x[sel][:, 1]),
                        (corners[4, 2] + corners[5, 2]) / 2.0
                    ])
                    sel_global = ind[sel]
                    point_boundary_mask_xy[sel_global] = 1.0
                    point_boundary_sem_xy[sel_global] = np.array([
                        center[0], center[1], center[2],
                        corners[5, 2] - corners[4, 2], bbox[7]
                    ])
                    point_boundary_offset_xy[sel_global] = center - x[sel]

                #plane_front_temp = leastsq(residuals, [0,1,0,0], args=(None, np.array([corners[0], corners[1], corners[4], corners[5]]).T))[0]
                v1 = corners[0] - corners[4]
                v2 = corners[4] - corners[5]
                cp = np.cross(v1, v2)
                d = -np.dot(cp, corners[5])
                a, b, c = cp
                plane_front_temp = np.array([a, b, c, d])
                para_points = np.array(
                    [corners[2], corners[3], corners[6], corners[7]])
                plane_front_temp /= np.linalg.norm(plane_front_temp[:3])
                newd = np.sum(para_points * plane_front_temp[:3], 1)
                if plane_front_temp[2] < LOWER_THRESH:
                    plane_front = plane_front_temp  #np.array([cls,res,tempsign,plane_front_temp[-1]])
                    plane_back = np.array([
                        plane_front_temp[0], plane_front_temp[1],
                        plane_front_temp[2], -np.mean(newd)
                    ])
                else:
                    import pdb
                    pdb.set_trace()
                    print("error with upright")
                ### Get the boundary points here
                alldist = np.abs(
                    np.sum(x * plane_front[:3], 1) + plane_front[-1])
                mind = np.min(alldist)
                #[count, val] = np.histogram(alldist, bins=20)
                #mind = val[np.argmax(count)]
                sel = np.abs(alldist - mind) < DIST_THRESH
                #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,0] >= xmin) & (point_cloud[:,0] <= xmax) & (point_cloud[:,2] >= zmin) & (point_cloud[:,2] <= zmax)
                if np.sum(sel) > NUM_POINT and np.var(
                        alldist[sel]) < VAR_THRESH:
                    # center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0])
                    center = np.array([
                        np.mean(x[sel][:, 0]),
                        np.mean(x[sel][:, 1]),
                        (corners[0, 2] + corners[1, 2]) / 2.0
                    ])
                    sel_global = ind[sel]
                    point_boundary_mask_xy[sel_global] = 1.0
                    point_boundary_sem_xy[sel_global] = np.array([
                        center[0], center[1], center[2],
                        corners[1, 2] - corners[0, 2], bbox[7]
                    ])
                    point_boundary_offset_xy[sel_global] = center - x[sel]
                '''
                [count, val] = np.histogram(alldist, bins=20)
                mind_middle = val[np.argmax(count)]
                sel_pre = np.copy(sel)
                sel = np.abs(alldist - mind_middle) < DIST_THRESH
                if np.abs(np.mean(x[sel_pre][:,1]) - np.mean(x[sel][:,1])) > MIND_THRESH:
                    ### Do not use line for middle surfaces
                    if np.sum(sel) > NUM_POINT and np.var(alldist[sel]) < VAR_THRESH:
                        # center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0])
                        center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (corners[0, 2] + corners[1, 2])/2.0])
                        sel_global = ind[sel]
                        point_boundary_mask_xy[sel_global] = 1.0
                        point_boundary_sem_xy[sel_global] = np.array([center[0], center[1], center[2], corners[1, 2] - corners[0, 2], bbox[7]])
                        point_boundary_offset_xy[sel_global] = center - x[sel]
                '''

                ### Get the boundary points here
                alldist = np.abs(
                    np.sum(x * plane_back[:3], 1) + plane_back[-1])
                mind = np.min(alldist)
                #[count, val] = np.histogram(alldist, bins=20)
                #mind = val[np.argmax(count)]
                sel = np.abs(alldist - mind) < DIST_THRESH
                if np.sum(sel) > NUM_POINT and np.var(
                        alldist[sel]) < VAR_THRESH:
                    #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,0] >= xmin) & (point_cloud[:,0] <= xmax) & (point_cloud[:,2] >= zmin) & (point_cloud[:,2] <= zmax)
                    # center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0])
                    center = np.array([
                        np.mean(x[sel][:, 0]),
                        np.mean(x[sel][:, 1]),
                        (corners[2, 2] + corners[3, 2]) / 2.0
                    ])
                    #point_boundary_offset_xy[sel] = center - x[sel]
                    sel_global = ind[sel]
                    point_boundary_mask_xy[sel_global] = 1.0
                    point_boundary_sem_xy[sel_global] = np.array([
                        center[0], center[1], center[2],
                        corners[3, 2] - corners[2, 2], bbox[7]
                    ])
                    point_boundary_offset_xy[sel_global] = center - x[sel]

        ret_dict = {}
        ret_dict['point_clouds'] = point_cloud.astype(np.float32)
        ret_dict['center_label'] = target_bboxes.astype(np.float32)[:, 0:3]
        ret_dict['heading_class_label'] = angle_classes.astype(np.int64)
        ret_dict['heading_residual_label'] = angle_residuals.astype(np.float32)
        ret_dict['size_class_label'] = size_classes.astype(np.int64)
        ret_dict['size_residual_label'] = size_residuals.astype(np.float32)
        target_bboxes_semcls = np.zeros((MAX_NUM_OBJ))
        target_bboxes_semcls[0:bboxes.shape[0]] = bboxes[:, -1]  # from 0 to 9
        ret_dict['sem_cls_label'] = target_bboxes_semcls.astype(np.int64)
        ret_dict['box_label_mask'] = target_bboxes_mask.astype(np.float32)
        ret_dict['vote_label'] = point_votes.astype(np.float32)
        ret_dict['vote_label_mask'] = point_votes_mask.astype(np.int64)
        ret_dict['scan_idx'] = np.array(idx).astype(np.int64)
        ret_dict['max_gt_bboxes'] = max_bboxes

        # new items
        ret_dict['size_label'] = box3d_sizes.astype(np.float32)
        ret_dict['heading_label'] = box3d_angles.astype(np.float32)
        if self.use_height:
            ret_dict['floor_height'] = floor_height

        ret_dict['point_boundary_mask_z'] = point_boundary_mask_z.astype(
            np.float32)
        ret_dict['point_boundary_mask_xy'] = point_boundary_mask_xy.astype(
            np.float32)
        ret_dict['point_boundary_offset_z'] = point_boundary_offset_z.astype(
            np.float32)
        ret_dict['point_boundary_offset_xy'] = point_boundary_offset_xy.astype(
            np.float32)
        ret_dict['point_boundary_sem_z'] = point_boundary_sem_z.astype(
            np.float32)
        ret_dict['point_boundary_sem_xy'] = point_boundary_sem_xy.astype(
            np.float32)

        ret_dict['point_line_mask'] = point_line_mask.astype(np.float32)
        ret_dict['point_line_offset'] = point_line_offset.astype(np.float32)
        ret_dict['point_line_sem'] = point_line_sem.astype(np.float32)

        return ret_dict
Пример #5
0
def extract_sunrgbd_data(idx_filename, split, output_folder, num_point=20000,
                         type_whitelist=DEFAULT_TYPE_WHITELIST,
                         save_votes=False, use_v1=False, skip_empty_scene=True):
    """ Extract scene point clouds and 
    bounding boxes (centroids, box sizes, heading angles, semantic classes).
    Dumped point clouds and boxes are in upright depth coord.

    Args:
        idx_filename: a TXT file where each line is an int number (index)
        split: training or testing
        save_votes: whether to compute and save Ground truth votes.
        use_v1: use the SUN RGB-D V1 data
        skip_empty_scene: if True, skip scenes that contain no object (no objet in whitelist)

    Dumps:
        <id>_pc.npz of (N,6) where N is for number of subsampled points and 6 is
            for XYZ and RGB (in 0~1) in upright depth coord
        <id>_bbox.npy of (K,8) where K is the number of objects, 8 is for
            centroids (cx,cy,cz), dimension (l,w,h), heanding_angle and semantic_class
        <id>_votes.npz of (N,10) with 0/1 indicating whether the point belongs to an object,
            then three sets of GT votes for up to three objects. If the point is only in one
            object's OBB, then the three GT votes are the same.
    """
    dataset = sunrgbd_object('./sunrgbd_trainval', split, use_v1=use_v1)
    data_idx_list = [int(line.rstrip()) for line in open(idx_filename)]

    if not os.path.exists(output_folder):
        os.mkdir(output_folder)

    all_obbs = []
    all_pc_upright_depth_subsampled = []
    all_point_votes = []
    for data_idx in data_idx_list:
        print('------------- ', data_idx)
        objects = dataset.get_label_objects(data_idx)

        # Skip scenes with 0 object
        if skip_empty_scene and (len(objects) == 0 or
                                 len([obj for obj in objects if obj.classname in type_whitelist]) == 0):
            continue

        object_list = []
        for obj in objects:
            if obj.classname not in type_whitelist: continue
            obb = np.zeros((8))
            obb[0:3] = obj.centroid
            # Note that compared with that in data_viz, we do not time 2 to l,w.h
            # neither do we flip the heading angle
            obb[3:6] = np.array([obj.l, obj.w, obj.h])
            obb[6] = obj.heading_angle
            obb[7] = sunrgbd_utils.type2class[obj.classname]
            object_list.append(obb)
        if len(object_list) == 0:
            obbs = np.zeros((0, 8))
        else:
            obbs = np.vstack(object_list)  # (K,8)
        print(f"{data_idx} has {obbs.shape[0]} gt bboxes")

        pc_upright_depth = dataset.get_depth(data_idx)
        pc_upright_depth_subsampled = pc_util.random_sampling(pc_upright_depth, num_point)

        np.savez_compressed(os.path.join(output_folder, '%06d_pc.npz' % (data_idx)),
                            pc=pc_upright_depth_subsampled)
        np.save(os.path.join(output_folder, '%06d_bbox.npy' % (data_idx)), obbs)
        # pickle save
        with open(os.path.join(output_folder, '%06d_pc.pkl' % (data_idx)), 'wb') as f:
            pickle.dump(pc_upright_depth_subsampled, f)
            print(f"{os.path.join(output_folder, '%06d_pc.pkl' % (data_idx))} saved successfully !!")
        with open(os.path.join(output_folder, '%06d_bbox.pkl' % (data_idx)), 'wb') as f:
            pickle.dump(obbs, f)
            print(f"{os.path.join(output_folder, '%06d_bbox.pkl' % (data_idx))} saved successfully !!")
        # add to collection
        all_pc_upright_depth_subsampled.append(pc_upright_depth_subsampled)
        all_obbs.append(obbs)

        N = pc_upright_depth_subsampled.shape[0]
        point_votes = np.zeros((N, 13))  # 1 vote mask + 3 votes and + 3 votes gt ind
        point_votes[:, 10:13] = -1
        point_vote_idx = np.zeros((N)).astype(np.int32)  # in the range of [0,2]
        indices = np.arange(N)
        i_obj = 0
        for obj in objects:
            if obj.classname not in type_whitelist: continue
            try:
                # Find all points in this object's OBB
                box3d_pts_3d = sunrgbd_utils.my_compute_box_3d(obj.centroid,
                                                               np.array([obj.l, obj.w, obj.h]), obj.heading_angle)
                pc_in_box3d, inds = sunrgbd_utils.extract_pc_in_box3d( \
                    pc_upright_depth_subsampled, box3d_pts_3d)
                # Assign first dimension to indicate it is in an object box
                point_votes[inds, 0] = 1
                # Add the votes (all 0 if the point is not in any object's OBB)
                votes = np.expand_dims(obj.centroid, 0) - pc_in_box3d[:, 0:3]
                sparse_inds = indices[inds]  # turn dense True,False inds to sparse number-wise inds
                for i in range(len(sparse_inds)):
                    j = sparse_inds[i]
                    point_votes[j, int(point_vote_idx[j] * 3 + 1):int((point_vote_idx[j] + 1) * 3 + 1)] = votes[i,
                                                                                                          :]
                    point_votes[j, point_vote_idx[j] + 10] = i_obj
                    # Populate votes with the fisrt vote
                    if point_vote_idx[j] == 0:
                        point_votes[j, 4:7] = votes[i, :]
                        point_votes[j, 7:10] = votes[i, :]
                        point_votes[j, 10] = i_obj
                        point_votes[j, 11] = i_obj
                        point_votes[j, 12] = i_obj
                point_vote_idx[inds] = np.minimum(2, point_vote_idx[inds] + 1)
                i_obj += 1
            except:
                print('ERROR ----', data_idx, obj.classname)

        # choose the nearest as the first gt for each point
        for ip in range(N):
            is_pos = (point_votes[ip, 0] > 0)
            if is_pos:
                vote_delta1 = point_votes[ip, 1:4].copy()
                vote_delta2 = point_votes[ip, 4:7].copy()
                vote_delta3 = point_votes[ip, 7:10].copy()
                dist1 = np.sum(vote_delta1 ** 2)
                dist2 = np.sum(vote_delta2 ** 2)
                dist3 = np.sum(vote_delta3 ** 2)

                gt_ind1 = int(point_votes[ip, 10].copy())
                # gt_ind2 = int(point_votes[ip, 11].copy())
                # gt_ind3 = int(point_votes[ip, 12].copy())
                # gt1 = obbs[gt_ind1]
                # gt2 = obbs[gt_ind2]
                # gt3 = obbs[gt_ind3]
                # size_norm_vote_delta1 = vote_delta1 / gt1[3:6]
                # size_norm_vote_delta2 = vote_delta2 / gt2[3:6]
                # size_norm_vote_delta3 = vote_delta3 / gt3[3:6]
                # size_norm_dist1 = np.sum(size_norm_vote_delta1 ** 2)
                # size_norm_dist2 = np.sum(size_norm_vote_delta2 ** 2)
                # size_norm_dist3 = np.sum(size_norm_vote_delta3 ** 2)

                near_ind = np.argmin([dist1, dist2, dist3])
                # near_ind = np.argmin([size_norm_dist1, size_norm_dist2, size_norm_dist3])

                point_votes[ip, 10] = point_votes[ip, 10 + near_ind].copy()
                point_votes[ip, 10 + near_ind] = gt_ind1
                point_votes[ip, 1:4] = point_votes[ip, int(near_ind * 3 + 1):int((near_ind + 1) * 3 + 1)].copy()
                point_votes[ip, int(near_ind * 3 + 1):int((near_ind + 1) * 3 + 1)] = vote_delta1
            else:
                assert point_votes[ip, 10] == -1, "error"
                assert point_votes[ip, 11] == -1, "error"
                assert point_votes[ip, 12] == -1, "error"

        print(f"{data_idx}_votes.npz has {i_obj} gt bboxes")
        np.savez_compressed(os.path.join(output_folder, '%06d_votes.npz' % (data_idx)),
                            point_votes=point_votes)
        with open(os.path.join(output_folder, '%06d_votes.pkl' % (data_idx)), 'wb') as f:
            pickle.dump(point_votes, f)
            print(f"{os.path.join(output_folder, '%06d_votes.pkl' % (data_idx))} saved successfully !!")
        all_point_votes.append(point_votes)

    pickle_filename = os.path.join(output_folder, 'all_obbs_modified_nearest_has_empty.pkl')
    with open(pickle_filename, 'wb') as f:
        pickle.dump(all_obbs, f)
        print(f"{pickle_filename} saved successfully !!")

    pickle_filename = os.path.join(output_folder, 'all_pc_modified_nearest_has_empty.pkl')
    with open(pickle_filename, 'wb') as f:
        pickle.dump(all_pc_upright_depth_subsampled, f)
        print(f"{pickle_filename} saved successfully !!")

    pickle_filename = os.path.join(output_folder, 'all_point_votes_nearest_has_empty.pkl')
    with open(pickle_filename, 'wb') as f:
        pickle.dump(all_point_votes, f)
        print(f"{pickle_filename} saved successfully !!")

    all_point_labels = []
    for point_votes in all_point_votes:
        point_labels = point_votes[:, [0, 10]]
        all_point_labels.append(point_labels)
    pickle_filename = os.path.join(output_folder, 'all_point_labels_nearest_has_empty.pkl')
    with open(pickle_filename, 'wb') as f:
        pickle.dump(all_point_labels, f)
        print(f"{pickle_filename} saved successfully !!")
Пример #6
0
def parse_crop_predictions(end_points, point_cloud, DC):
    pred_center = end_points['center'].clone()  # B,num_proposal,3
    pred_heading_class = torch.argmax(end_points['heading_scores'].clone(),
                                      -1)  # B,num_proposal
    pred_heading_residual = torch.gather(
        end_points['heading_residuals'].clone(), 2,
        pred_heading_class.unsqueeze(-1))  # B,num_proposal,1
    pred_heading_residual.squeeze_(2)
    pred_size_class = torch.argmax(end_points['size_scores'].clone(),
                                   -1)  # B,num_proposal
    pred_size_residual = torch.gather(
        end_points['size_residuals'].clone(), 2,
        pred_size_class.unsqueeze(-1).unsqueeze(-1).repeat(
            1, 1, 1, 3))  # B,num_proposal,1,3
    pred_size_residual.squeeze_(2)
    pred_sem_cls = torch.argmax(end_points['sem_cls_scores'].clone(),
                                -1)  # B,num_proposal
    sem_cls_probs = softmax(end_points['sem_cls_scores'].detach().cpu().clone(
    ).numpy())  # B,num_proposal,10
    pred_sem_cls_prob = np.max(sem_cls_probs, -1)  # B,num_proposal

    bsize = pred_center.shape[0]
    assert bsize == 1
    num_proposal = pred_center.shape[1]

    # Since we operate in upright_depth coord for points, while util functions
    # assume upright_camera coord.
    pred_corners_3d_upright_camera = np.zeros((bsize, num_proposal, 8, 3))
    pred_center_upright_camera = flip_axis_to_camera(
        pred_center.detach().cpu().numpy())
    for i in range(bsize):
        for j in range(num_proposal):
            heading_angle = DC.class2angle(
                pred_heading_class[i, j].detach().cpu().numpy(),
                pred_heading_residual[i, j].detach().cpu().numpy())
            box_size = DC.class2size(
                int(pred_size_class[i, j].detach().cpu().numpy()),
                pred_size_residual[i, j].detach().cpu().numpy())
            corners_3d_upright_camera = get_3d_box(
                box_size, heading_angle, pred_center_upright_camera[i, j, :])
            pred_corners_3d_upright_camera[i, j] = corners_3d_upright_camera

    K = pred_center.shape[1]  # K==num_proposal
    nonempty_box_mask = np.ones((bsize, K))

    # -------------------------------------
    # Remove predicted boxes without any point within them..
    batch_pc = point_cloud.copy()[:, 0:3]  # B,N,3
    i = 0
    pc = batch_pc[:, :]  # (N,3)
    for j in range(K):
        box3d = pred_corners_3d_upright_camera[i, j, :, :]  # (8,3)
        # -- OG version of getting in-box points
        box3d = flip_axis_to_depth(box3d)
        pc_in_box, inds = extract_pc_in_box3d(pc, box3d)
        if len(pc_in_box) < 5:
            nonempty_box_mask[i, j] = 0

        # -- new version
        #min_bound_box = np.min(box3d, axis=0)
        #max_bound_box = np.max(box3d, axis=0)
        #in_bound_pc = np.all(pc > min_bound_box, axis=1) *\
        #              np.all(pc < max_bound_box, axis=1)
        #if np.sum(in_bound_pc) < 5:
        #    nonempty_box_mask[i,j] = 0
    end_points['center'] = end_points['center'][i][nonempty_box_mask[i, :], :]
    end_points['heading_scores'] = end_points['heading_scores'][i][
        nonempty_box_mask[i, :], :]
    end_points['heading_residuals'] = end_points['heading_residuals'][i][
        nonempty_box_mask[i, :], :]
    end_points['heading_residuals_normalized'] = end_points[
        'heading_residuals_normalized'][i][nonempty_box_mask[i, :], :]
    end_points['size_scores'] = end_points['size_scores'][i][nonempty_box_mask[
        i, :], :]
    end_points['size_residuals'] = end_points['size_residuals'][i][
        nonempty_box_mask[i, :], :]
    end_points['size_residuals_normalized'] = end_points[
        'size_residuals_normalized'][i][nonempty_box_mask[i, :], :]
    end_points['sem_cls_scores'] = end_points['sem_cls_scores'][i][
        nonempty_box_mask[i, :], :]
    end_points['objectness_scores'] = end_points['objectness_scores'][i][
        nonempty_box_mask[i, :], :]

    end_points['center'] = end_points['center'].unsqueeze(0)
    end_points['heading_scores'] = end_points['heading_scores'].unsqueeze(0)
    end_points['heading_residuals'] = end_points[
        'heading_residuals'].unsqueeze(0)
    end_points['heading_residuals_normalized'] = end_points[
        'heading_residuals_normalized'].unsqueeze(0)
    end_points['size_scores'] = end_points['size_scores'].unsqueeze(0)
    end_points['size_residuals'] = end_points['size_residuals'].unsqueeze(0)
    end_points['size_residuals_normalized'] = end_points[
        'size_residuals_normalized'].unsqueeze(0)
    end_points['sem_cls_scores'] = end_points['sem_cls_scores'].unsqueeze(0)
    end_points['objectness_scores'] = end_points[
        'objectness_scores'].unsqueeze(0)

    return end_points
Пример #7
0
def extract_frustum_data(sunrgbd_dir,
                         idx_filename,
                         split,
                         output_filename,
                         type_whitelist,
                         perturb_box2d=False,
                         augmentX=1,
                         with_down_sample=False):
    dataset = sunrgbd_object(sunrgbd_dir, split)
    data_idx_list = [int(line.rstrip()) for line in open(idx_filename)]

    id_list = []  # int number
    box2d_list = []  # [xmin,ymin,xmax,ymax]
    box3d_list = []  # (8,3) array in upright depth coord
    input_list = []  # channel number = 6, xyz,rgb in upright depth coord
    label_list = []  # 1 for roi object, 0 for clutter
    type_list = []  # string e.g. bed
    heading_list = [
    ]  # face of object angle, radius of clockwise angle from positive x axis in upright camera coord
    box3d_size_list = []  # array of l,w,h
    frustum_angle_list = [
    ]  # angle of 2d box center from pos x-axis (clockwise)

    img_coord_list = []
    calib_K_list = []
    calib_R_list = []

    pos_cnt = 0
    all_cnt = 0
    for data_idx in data_idx_list:
        print('------------- ', data_idx)
        calib = dataset.get_calibration(data_idx)
        objects = dataset.get_label_objects(data_idx)

        pc_upright_depth = dataset.get_pointcloud(data_idx)
        pc_upright_camera = np.zeros_like(pc_upright_depth)
        pc_upright_camera[:,
                          0:3] = calib.project_upright_depth_to_upright_camera(
                              pc_upright_depth[:, 0:3])
        pc_upright_camera[:, 3:] = pc_upright_depth[:, 3:]

        if with_down_sample:
            idx = down_sample(pc_upright_camera[:, :3], 0.01)
            # print(len(idx), len(pc_upright_camera))
            pc_upright_camera = pc_upright_camera[idx]
            pc_upright_depth = pc_upright_depth[idx]

        # img = dataset.get_image(data_idx)
        # img_height, img_width, img_channel = img.shape
        pc_image_coord, _ = calib.project_upright_depth_to_image(
            pc_upright_depth)

        for obj_idx in range(len(objects)):
            obj = objects[obj_idx]
            if obj.classname not in type_whitelist:
                continue
            # 2D BOX: Get pts rect backprojected
            box2d = obj.box2d
            for _ in range(augmentX):
                if perturb_box2d:
                    xmin, ymin, xmax, ymax = random_shift_box2d(box2d)
                    # print(xmin,ymin,xmax,ymax)
                else:
                    xmin, ymin, xmax, ymax = box2d
                box_fov_inds = (pc_image_coord[:, 0] <
                                xmax) & (pc_image_coord[:, 0] >= xmin) & (
                                    pc_image_coord[:, 1] <
                                    ymax) & (pc_image_coord[:, 1] >= ymin)
                coord_in_box_fov = pc_image_coord[box_fov_inds, :]
                pc_in_box_fov = pc_upright_camera[box_fov_inds, :]
                # Get frustum angle (according to center pixel in 2D BOX)
                box2d_center = np.array([(xmin + xmax) / 2.0,
                                         (ymin + ymax) / 2.0])
                uvdepth = np.zeros((1, 3))
                uvdepth[0, 0:2] = box2d_center
                uvdepth[0, 2] = 20  # some random depth
                box2d_center_upright_camera = calib.project_image_to_upright_camera(
                    uvdepth)
                # print('UVdepth, center in upright camera: ', uvdepth, box2d_center_upright_camera)
                frustum_angle = -1 * np.arctan2(
                    box2d_center_upright_camera[0, 2],
                    box2d_center_upright_camera[0, 0]
                )  # angle as to positive x-axis as in the Zoox paper
                # print('Frustum angle: ', frustum_angle)
                # 3D BOX: Get pts velo in 3d box
                box3d_pts_2d, box3d_pts_3d = utils.compute_box_3d(obj, calib)
                box3d_pts_3d = calib.project_upright_depth_to_upright_camera(
                    box3d_pts_3d)
                try:
                    _, inds = extract_pc_in_box3d(pc_in_box_fov, box3d_pts_3d)
                except Exception as e:
                    print(e)
                    continue

                label = np.zeros((pc_in_box_fov.shape[0]))
                label[inds] = 1
                box3d_size = np.array([2 * obj.l, 2 * obj.w, 2 * obj.h])
                # Subsample points..
                num_point = pc_in_box_fov.shape[0]
                if num_point > 2048:
                    choice = np.random.choice(pc_in_box_fov.shape[0],
                                              2048,
                                              replace=False)
                    coord_in_box_fov = coord_in_box_fov[choice, :]
                    pc_in_box_fov = pc_in_box_fov[choice, :]
                    label = label[choice]
                # Reject object with too few points
                if np.sum(label) < 5:
                    continue

                id_list.append(data_idx)
                box2d_list.append(
                    np.array([xmin, ymin, xmax, ymax], dtype=np.float32))
                box3d_list.append(box3d_pts_3d)
                input_list.append(pc_in_box_fov.astype(np.float32))
                label_list.append(label.astype(np.bool))
                type_list.append(obj.classname)
                heading_list.append(obj.heading_angle)
                box3d_size_list.append(box3d_size)
                frustum_angle_list.append(frustum_angle)
                img_coord_list.append(coord_in_box_fov.astype(np.float32))
                calib_K_list.append(calib.K)
                calib_R_list.append(calib.Rtilt)

                # collect statistics
                pos_cnt += np.sum(label)
                all_cnt += pc_in_box_fov.shape[0]

    print('Average pos ratio: ', pos_cnt / float(all_cnt))
    print('Average npoints: ', float(all_cnt) / len(id_list))

    data_dict = {
        'id': id_list,
        'box2d': box2d_list,
        'box3d': box3d_list,
        'box3d_size': box3d_size_list,
        'box3d_heading': heading_list,
        'type': type_list,
        'input': input_list,
        'frustum_angle': frustum_angle_list,
        'label': label_list,
        'calib_K': calib_K_list,
        'calib_R': calib_R_list,
        # 'image_coord': img_coord_list,
    }

    with open(output_filename, 'wb') as f:
        pickle.dump(data_dict, f, -1)

    print("save in {}".format(output_filename))