Пример #1
0
    def forward(self, inputs):
        output_features = self.bn_foward(inputs.F, self.bn, inputs.F.shape[-1])
        output_tensor = SparseTensor(output_features, inputs.C, inputs.s)
        output_tensor.coord_maps = inputs.coord_maps
        output_tensor.kernel_maps = inputs.kernel_maps

        return output_tensor
Пример #2
0
def generate_random_point_cloud(size=100000, voxel_size=0.2):
    pc = np.random.randn(size, 4)
    pc[:, :3] = pc[:, :3] * 10
    rounded_pc = np.round(pc[:, :3] / voxel_size).astype(np.int32)
    labels = np.random.choice(10, size)
    inds, _, inverse_map = sparse_quantize(
        rounded_pc,
        pc,
        labels,
        return_index=True,
        return_invs=True
    )
    
    voxel_pc = rounded_pc[inds]
    voxel_feat = pc[inds]
    voxel_labels = labels[inds]
    
    sparse_tensor = SparseTensor(voxel_feat, voxel_pc)
    label_tensor = SparseTensor(voxel_labels, voxel_pc)
    
    feed_dict = {
        'lidar': sparse_tensor,
        'targets': label_tensor
    }
    
    return feed_dict
Пример #3
0
def process_point_cloud(input_point_cloud, voxel_size=0.05):
    input_point_cloud[:, 3] = input_point_cloud[:, 3]
    pc_ = np.round(input_point_cloud[:, :3] / voxel_size)
    pc_ -= pc_.min(0, keepdims=1)

    labels_ = np.zeros(pc_.shape[0], dtype=np.int64)
    feat_ = input_point_cloud
    out_pc = input_point_cloud
    pc_ = pc_

    inds, labels, inverse_map = sparse_quantize(pc_,
                                                feat_,
                                                labels_,
                                                return_index=True,
                                                return_invs=True)
    pc = np.zeros((inds.shape[0], 4))
    pc[:, :3] = pc_[inds]

    feat = feat_[inds]
    labels = labels_[inds]
    lidar = SparseTensor(
        torch.from_numpy(feat).float(),
        torch.from_numpy(pc).int())
    return {
        'pc': out_pc,
        'lidar': lidar,
        'targets': labels,
        'targets_mapped': labels_,
        'inverse_map': inverse_map
    }
Пример #4
0
def pcd_uv_to_sparsetensor(pcd,
                           u_u0,
                           v_v0,
                           mask_valid,
                           f=500.0,
                           voxel_size=0.01,
                           mask_side=None,
                           num_points=100000):
    if mask_side is not None:
        mask_valid = mask_valid & mask_side
    pcd_valid = pcd[mask_valid]
    u_u0_valid = u_u0[mask_valid][:, np.newaxis] / f
    v_v0_valid = v_v0[mask_valid][:, np.newaxis] / f

    block_ = np.concatenate([pcd_valid, u_u0_valid, v_v0_valid], axis=1)
    block = np.zeros_like(block_)
    block[:, :] = block_[:, :]

    pc_ = np.round(block_[:, :3] / voxel_size)
    pc_ -= pc_.min(0, keepdims=1)
    feat_ = block

    # transfer point cloud to voxels
    inds = sparse_quantize(pc_, feat_, return_index=True, return_invs=False)
    if len(inds) > num_points:
        inds = np.random.choice(inds, num_points, replace=False)

    pc = pc_[inds]
    feat = feat_[inds]
    lidar = SparseTensor(feat, pc)
    feed_dict = [{'lidar': lidar}]
    inputs = sparse_collate_fn(feed_dict)
    return inputs
Пример #5
0
 def set_input(self, data, device):
     if data.batch.dim() == 1:
         data.batch = data.batch.unsqueeze(-1)
     coords = torch.cat([data.pos, data.batch], -1)
     self.batch_idx = data.batch.squeeze()
     self.input = SparseTensor(data.x, coords).to(self.device)
     self.labels = data.y.to(self.device)
Пример #6
0
def spcrop(inputs, loc_min, loc_max):
    features = inputs.F
    coords = inputs.C
    cur_stride = inputs.s

    valid_flag = ((coords[:, :3] >= loc_min) &
                  (coords[:, :3] < loc_max)).all(-1)
    output_coords = coords[valid_flag]
    output_features = features[valid_flag]
    return SparseTensor(output_features, output_coords, cur_stride)
Пример #7
0
def point_to_voxel(x, z):
    if z.additional_features is None or z.additional_features.get('idx_query') is None\
       or z.additional_features['idx_query'].get(x.s) is None:
        #pc_hash = hash_gpu(torch.floor(z.C).int())
        pc_hash = F.sphash(
            torch.cat([
                torch.floor(z.C[:, :3] / x.s[0]).int() * x.s[0],
                z.C[:, -1].int().view(-1, 1)
            ], 1))
        sparse_hash = F.sphash(x.C)
        idx_query = F.sphashquery(pc_hash, sparse_hash)
        counts = F.spcount(idx_query.int(), x.C.shape[0])
        z.additional_features['idx_query'][x.s] = idx_query
        z.additional_features['counts'][x.s] = counts
    else:
        idx_query = z.additional_features['idx_query'][x.s]
        counts = z.additional_features['counts'][x.s]

    inserted_feat = F.spvoxelize(z.F, idx_query, counts)
    new_tensor = SparseTensor(inserted_feat, x.C, x.s)
    new_tensor.cmaps = x.cmaps
    new_tensor.kmaps = x.kmaps

    return new_tensor
Пример #8
0
    def _get_sparse_tensors(self, index: int) -> Dict[str, SparseTensor]:
        dense_dict = self._get_numpy(index)
        coordinates_ = dense_dict['coordinates']
        coordinates_ = np.round(coordinates_ / self.voxel_size)
        coordinates_ -= coordinates_.min(0, keepdims=1)

        _, inds, inverse_map = sparse_quantize(coordinates_,
                                               return_index=True,
                                               return_inverse=True)

        coordinates = coordinates_[inds]
        sparse_dict = {
            k: SparseTensor(v[inds], coordinates)
            for k, v in dense_dict.items() if k != 'coordinates'
        }
        inverse_map = SparseTensor(inverse_map, coordinates_)
        sparse_dict['inverse_map'] = inverse_map
        coordinates = SparseTensor(dense_dict['coordinates'][inds],
                                   coordinates_)
        sparse_dict['coordinates'] = coordinates
        for k, v in dense_dict.items():
            if k != 'coordinates':
                sparse_dict[f'{k}_mapped'] = SparseTensor(v, coordinates_)
        return sparse_dict
Пример #9
0
def process_point_cloud(input_point_cloud, input_labels=None, voxel_size=0.05):
    input_point_cloud[:, 3] = input_point_cloud[:, 3]
    pc_ = np.round(input_point_cloud[:, :3] / voxel_size)
    pc_ -= pc_.min(0, keepdims=1)

    label_map = create_label_map()
    if input_labels is not None:
        labels_ = label_map[input_labels & 0xFFFF].astype(
            np.int64)  # semantic labels
    else:
        labels_ = np.zeros(pc_.shape[0], dtype=np.int64)

    feat_ = input_point_cloud

    if input_labels is not None:
        out_pc = input_point_cloud[labels_ != labels_.max(), :3]
        pc_ = pc_[labels_ != labels_.max()]
        feat_ = feat_[labels_ != labels_.max()]
        labels_ = labels_[labels_ != labels_.max()]
    else:
        out_pc = input_point_cloud
        pc_ = pc_

    inds, labels, inverse_map = sparse_quantize(pc_,
                                                feat_,
                                                labels_,
                                                return_index=True,
                                                return_invs=True)
    pc = np.zeros((inds.shape[0], 4))
    pc[:, :3] = pc_[inds]

    feat = feat_[inds]
    labels = labels_[inds]
    lidar = SparseTensor(
        torch.from_numpy(feat).float(),
        torch.from_numpy(pc).int())
    return {
        'pc': out_pc,
        'lidar': lidar,
        'targets': labels,
        'targets_mapped': labels_,
        'inverse_map': inverse_map
    }
Пример #10
0
def pcd_to_sparsetensor(pcd, mask_valid, voxel_size=0.01, num_points=100000):
    pcd_valid = pcd[mask_valid]
    block_ = pcd_valid
    block = np.zeros_like(block_)
    block[:, :3] = block_[:, :3]

    pc_ = np.round(block_[:, :3] / voxel_size)
    pc_ -= pc_.min(0, keepdims=1)
    feat_ = block

    # transfer point cloud to voxels
    inds = sparse_quantize(pc_, feat_, return_index=True, return_invs=False)
    if len(inds) > num_points:
        inds = np.random.choice(inds, num_points, replace=False)

    pc = pc_[inds]
    feat = feat_[inds]
    lidar = SparseTensor(feat, pc)
    feed_dict = [{'lidar': lidar}]
    inputs = sparse_collate_fn(feed_dict)
    return inputs
Пример #11
0
def initial_voxelize(z, init_res, after_res):
    new_float_coord = torch.cat(
        [(z.C[:, :3] * init_res) / after_res, z.C[:, -1].view(-1, 1)], 1)

    pc_hash = F.sphash(torch.floor(new_float_coord).int())
    sparse_hash = torch.unique(pc_hash)
    idx_query = F.sphashquery(pc_hash, sparse_hash)
    counts = F.spcount(idx_query.int(), len(sparse_hash))

    inserted_coords = F.spvoxelize(torch.floor(new_float_coord), idx_query,
                                   counts)
    inserted_coords = torch.round(inserted_coords).int()
    inserted_feat = F.spvoxelize(z.F, idx_query, counts)

    new_tensor = SparseTensor(inserted_feat, inserted_coords, 1)
    new_tensor.cmaps.setdefault(new_tensor.stride, new_tensor.coords)
    z.additional_features['idx_query'][1] = idx_query
    z.additional_features['counts'][1] = counts
    z.C = new_float_coord

    return new_tensor
Пример #12
0
    def filter_candidates(self, data_dict, lang_cls_pred):
        pred_obb_batch = []
        pts_batch = []
        obj_points_batch = []
        num_filtered_objs = []
        batch_size = len(data_dict['instance_points'])

        for i in range(batch_size):
            instance_point = data_dict['instance_points'][i]
            instance_obb = data_dict['instance_obbs'][i]
            instance_class = data_dict['instance_class'][i]
            num_obj = len(instance_point)

            pts = []
            pred_obbs = []

            # filter by class
            for j in range(num_obj):
                if instance_class[j] == lang_cls_pred[i]:
                    pred_obbs.append(instance_obb[j])
                    point_cloud = instance_point[j]
                    pc = point_cloud[:, :3]

                    coords, feats = sparse_quantize(
                        pc,
                        point_cloud,
                        quantization_size=self.voxel_size
                    )
                    pt = SparseTensor(feats, coords)
                    pts.append(pt)
                    obj_points_batch.append(point_cloud)

            num_filtered_objs.append(len(pts))
            if len(pts) < 2:
                pts = []
            pts_batch += pts
            pred_obbs = np.asarray(pred_obbs)
            pred_obb_batch.append(pred_obbs)

        return pts_batch, pred_obb_batch, num_filtered_objs
Пример #13
0
def sparse_collate_tensors(sparse_tensors):
    coords, feats = sparse_collate([x.C for x in sparse_tensors],
                                   [x.F for x in sparse_tensors])
    return SparseTensor(feats, coords, sparse_tensors[0].s)
Пример #14
0
def process_point_cloud(msg, input_labels=None, voxel_size=0.05):
    t_t = time.time()
    msg_cloud = ros_numpy.point_cloud2.pointcloud2_to_array(msg)
    point_cloud = get_xyz_points(msg_cloud, True)

    #input_point_cloud = point_cloud.reshape([-1,5])
    input_point_cloud = point_cloud[:, :4]
    input_point_cloud[:, 3] = input_point_cloud[:, 3]
    pc_ = np.round(input_point_cloud[:, :3] / voxel_size)
    pc_ -= pc_.min(0, keepdims=1)

    label_map = create_label_map()
    if input_labels is not None:
        labels_ = label_map[input_labels & 0xFFFF].astype(
            np.int64)  # semantic labels
    else:
        labels_ = np.zeros(pc_.shape[0], dtype=np.int64)

    feat_ = input_point_cloud

    if input_labels is not None:
        out_pc = input_point_cloud[labels_ != labels_.max(), :3]
        pc_ = pc_[labels_ != labels_.max()]
        feat_ = feat_[labels_ != labels_.max()]
        labels_ = labels_[labels_ != labels_.max()]
    else:
        out_pc = input_point_cloud
        pc_ = pc_

    inds, labels, inverse_map = sparse_quantize(pc_,
                                                feat_,
                                                labels_,
                                                return_index=True,
                                                return_invs=True)
    pc = np.zeros((inds.shape[0], 4))
    pc[:, :3] = pc_[inds]

    feat = feat_[inds]
    labels = labels_[inds]
    lidar = SparseTensor(
        torch.from_numpy(feat).float(),
        torch.from_numpy(pc).int())
    feed_dict = {
        'pc': out_pc,
        'lidar': lidar,
        'targets': labels,
        'targets_mapped': labels_,
        'inverse_map': inverse_map
    }

    inputs = feed_dict['lidar'].to(device)
    t = time.time()
    outputs = model(inputs)
    rospy.logdebug("Network predict time cost:", time.time() - t)
    predictions = outputs.argmax(1).cpu().numpy()
    predictions = predictions[feed_dict[
        'inverse_map']]  # Here you can check segmentaion results for each point
    #rospy.loginfo(predictions)
    input_point_cloud = input_point_cloud.astype(np.float32)
    #sending segmented pc
    pc_id = predictions.reshape(-1)
    labels = pc_id.astype(np.uint32)
    msg = to_msg(input_point_cloud[:, :3], labels, msg.header)
    pub_.publish(msg)

    rospy.logdebug(f"Total cost time: {time.time() - t_t}")
    return
Пример #15
0
    def __getitem__(self, index):
        with open(self.files[index], 'rb') as b:
            block_ = np.fromfile(b, dtype=np.float32).reshape(-1, 4)
        block = np.zeros_like(block_)

        if 'train' in self.split:
            theta = np.random.uniform(0, 2 * np.pi)
            scale_factor = np.random.uniform(0.95, 1.05)
            rot_mat = np.array([[np.cos(theta),
                                 np.sin(theta), 0],
                                [-np.sin(theta),
                                 np.cos(theta), 0], [0, 0, 1]])

            block[:, :3] = np.dot(block_[:, :3], rot_mat) * scale_factor
            #block[:, 3:] = block_[:, 3:] + np.random.randn(3) * 0.1
        else:
            theta = self.angle
            transform_mat = np.array([[np.cos(theta),
                                       np.sin(theta), 0],
                                      [-np.sin(theta),
                                       np.cos(theta), 0], [0, 0, 1]])
            block[...] = block_[...]
            block[:, :3] = np.dot(block[:, :3], transform_mat)

        block[:, 3] = block_[:, 3]
        pc_ = np.round(block[:, :3] / self.voxel_size)
        pc_ -= pc_.min(0, keepdims=1)
        #inds = self.inds[index]

        label_file = self.files[index].replace('velodyne', 'labels').replace(
            '.bin', '.label')
        if os.path.exists(label_file):
            with open(label_file, 'rb') as a:
                all_labels = np.fromfile(a, dtype=np.int32).reshape(-1)
        else:
            all_labels = np.zeros((pc_.shape[0])).astype(np.int32)

        labels_ = self.label_map[all_labels & 0xFFFF].astype(
            np.int64)  # semantic labels
        inst_labels_ = (all_labels >> 16).astype(np.int64)  # instance labels

        feat_ = block

        inds, labels, inverse_map = sparse_quantize(pc_,
                                                    feat_,
                                                    labels_,
                                                    return_index=True,
                                                    return_invs=True)

        if 'train' in self.split:
            if len(inds) > self.num_points:
                inds = np.random.choice(inds, self.num_points, replace=False)

        pc = pc_[inds]
        feat = feat_[inds]
        labels = labels_[inds]
        lidar = SparseTensor(feat, pc)
        labels = SparseTensor(labels, pc)
        labels_ = SparseTensor(labels_, pc_)
        inverse_map = SparseTensor(inverse_map, pc_)
        
        return {
            'lidar': lidar,
            'targets': labels,
            'targets_mapped': labels_,
            'inverse_map': inverse_map,
            'file_name': self.files[index]
        }
Пример #16
0
    def __getitem__(self, idx):
        scene_id = self.scanrefer[idx]["scene_id"]
        object_id = int(self.scanrefer[idx]["object_id"])
        object_name = " ".join(self.scanrefer[idx]["object_name"].split("_"))
        ann_id = int(self.scanrefer[idx]["ann_id"])
        object_cat = self.raw2label[
            object_name] if object_name in self.raw2label else 17

        # tokenize the description
        tokens = self.scanrefer[idx]["token"]
        embeddings = np.zeros((CONF.TRAIN.MAX_DES_LEN, 300))

        for token_id in range(CONF.TRAIN.MAX_DES_LEN):
            if token_id < len(tokens):
                token = tokens[token_id]
                if token.isspace():
                    continue
                if token in self.glove:
                    embeddings[token_id] = self.glove[token]
                else:
                    embeddings[token_id] = self.glove["unk"]

            else:
                break

        # get language features
        lang_feat = embeddings
        lang_token = tokens
        lang_len = len([token for token in lang_token if not token.isspace()])
        lang_len = lang_len if lang_len <= CONF.TRAIN.MAX_DES_LEN else CONF.TRAIN.MAX_DES_LEN

        # get pc
        mesh_vertices = np.load(
            os.path.join(CONF.PATH.SCANNET_DATA, scene_id) +
            "_aligned_vert.npy")  # axis-aligned
        instance_labels = np.load(
            os.path.join(CONF.PATH.SCANNET_DATA, scene_id) +
            "_ins_label_pg.npy")
        semantic_labels = np.load(
            os.path.join(CONF.PATH.SCANNET_DATA, scene_id) +
            "_sem_label_pg.npy")
        instance_bboxes = np.load(
            os.path.join(CONF.PATH.SCANNET_DATA, scene_id) +
            "_aligned_bbox.npy")

        if not self.use_color:
            point_cloud = mesh_vertices[:, 0:3]  # do not use color for now
            pcl_color = mesh_vertices[:, 3:6]
        else:
            point_cloud = mesh_vertices[:, 0:6]
            point_cloud[:,
                        3:6] = (point_cloud[:, 3:6] - MEAN_COLOR_RGB) / 256.0
            pcl_color = point_cloud[:, 3:6]

        if self.use_normal:
            normals = mesh_vertices[:, 6:9]
            point_cloud = np.concatenate([point_cloud, normals], 1)

        if self.use_multiview:
            # load multiview database
            if not hasattr(self, 'multiview_data'):
                self.multiview_data = h5py.File(MULTIVIEW_DATA,
                                                "r",
                                                libver="latest",
                                                swmr=True)

            multiview = np.array(self.multiview_data[scene_id])
            point_cloud = np.concatenate([point_cloud, multiview], 1)

        if self.use_height:
            floor_height = np.percentile(point_cloud[:, 2], 0.99)
            height = point_cloud[:, 2] - floor_height
            point_cloud = np.concatenate(
                [point_cloud, np.expand_dims(height, 1)], 1)

        point_cloud, choices = random_sampling(point_cloud,
                                               self.num_points,
                                               return_choices=True)
        instance_labels = instance_labels[choices]
        semantic_labels = semantic_labels[choices]
        pcl_color = pcl_color[choices]

        # ------------------------------- LABELS ------------------------------
        target_bboxes = np.zeros((MAX_NUM_OBJ, 6))
        target_bboxes_mask = np.zeros((MAX_NUM_OBJ))
        angle_classes = np.zeros((MAX_NUM_OBJ, ))
        angle_residuals = np.zeros((MAX_NUM_OBJ, ))
        size_classes = np.zeros((MAX_NUM_OBJ, ))
        size_residuals = np.zeros((MAX_NUM_OBJ, 3))
        ref_box_label = np.zeros(
            MAX_NUM_OBJ)  # bbox label for reference target
        ref_center_label = np.zeros(3)  # bbox center for reference target
        ref_heading_class_label = 0
        ref_heading_residual_label = 0
        ref_size_class_label = 0
        ref_size_residual_label = np.zeros(
            3)  # bbox size residual for reference target
        scene_points = np.zeros((1, 10))

        if self.split != "test":
            num_bbox = instance_bboxes.shape[
                0] if instance_bboxes.shape[0] < MAX_NUM_OBJ else MAX_NUM_OBJ
            target_bboxes_mask[0:num_bbox] = 1
            target_bboxes[0:num_bbox, :] = instance_bboxes[:MAX_NUM_OBJ, 0:6]

            # ------------------------------- DATA AUGMENTATION ------------------------------
            if self.augment:
                if torch.rand(1).item() > 0.5:
                    # Flipping along the YZ plane
                    point_cloud[:, 0] = -1 * point_cloud[:, 0]
                    target_bboxes[:, 0] = -1 * target_bboxes[:, 0]

                if torch.rand(1).item() > 0.5:
                    # Flipping along the XZ plane
                    point_cloud[:, 1] = -1 * point_cloud[:, 1]
                    target_bboxes[:, 1] = -1 * target_bboxes[:, 1]

                # Rotation along X-axis
                rot_angle = (torch.rand(1).item() * np.pi /
                             18) - np.pi / 36  # -5 ~ +5 degree
                rot_mat = rotx(rot_angle)
                point_cloud[:, 0:3] = np.dot(point_cloud[:, 0:3],
                                             np.transpose(rot_mat))
                target_bboxes = rotate_aligned_boxes_along_axis(
                    target_bboxes, rot_mat, "x")

                # Rotation along Y-axis
                rot_angle = (torch.rand(1).item() * np.pi /
                             18) - np.pi / 36  # -5 ~ +5 degree
                rot_mat = roty(rot_angle)
                point_cloud[:, 0:3] = np.dot(point_cloud[:, 0:3],
                                             np.transpose(rot_mat))
                target_bboxes = rotate_aligned_boxes_along_axis(
                    target_bboxes, rot_mat, "y")

                # Rotation along up-axis/Z-axis
                rot_angle = (torch.rand(1).item() * np.pi /
                             18) - np.pi / 36  # -5 ~ +5 degree
                rot_mat = rotz(rot_angle)
                point_cloud[:, 0:3] = np.dot(point_cloud[:, 0:3],
                                             np.transpose(rot_mat))
                target_bboxes = rotate_aligned_boxes_along_axis(
                    target_bboxes, rot_mat, "z")

                # Translation
                point_cloud, target_bboxes = self._translate(
                    point_cloud, target_bboxes)

            # NOTE: set size class as semantic class. Consider use size2class.
            class_ind = [
                DC.nyu40id2class[int(x)]
                for x in instance_bboxes[:num_bbox, -2]
            ]
            size_classes[0:num_bbox] = class_ind
            size_residuals[0:num_bbox, :] = target_bboxes[
                0:num_bbox, 3:6] - DC.mean_size_arr[class_ind, :]

            # construct the reference target label for each bbox
            ref_box_label = np.zeros(MAX_NUM_OBJ)
            for i, gt_id in enumerate(instance_bboxes[:num_bbox, -1]):
                if gt_id == object_id:
                    ref_box_label[i] = 1
                    ref_center_label = target_bboxes[i, 0:3]
                    ref_heading_class_label = angle_classes[i]
                    ref_heading_residual_label = angle_residuals[i]
                    ref_size_class_label = size_classes[i]
                    ref_size_residual_label = size_residuals[i]
        else:
            num_bbox = 1

        instance_points = []
        instance_class = []
        ref_target = []
        ins_obbs = []
        pts_batch = []
        pred_obbs = []
        for i_instance in np.unique(instance_labels):

            # find all points belong to that instance
            ind = np.nonzero(instance_labels == i_instance)[0]

            # find the semantic label
            ins_class = semantic_labels[ind[0]]
            if ins_class in DC.nyu40ids:
                x = point_cloud[ind]
                ins_class = DC.nyu40id2class[int(ins_class)]
                instance_class.append(ins_class)

                pc = x[:, :3]
                center = 0.5 * (pc.min(0) + pc.max(0))
                size = pc.max(0) - pc.min(0)
                ins_obb = np.concatenate((center, size, np.array([0])))
                ins_obbs.append(ins_obb)
                x = random_sampling(x, 1024)
                instance_points.append(x)

                if ins_class == object_cat:
                    pc = x[:, :3]
                    coords, feats = sparse_quantize(
                        pc, x, quantization_size=self.voxel_size_ap)
                    pt_inst = SparseTensor(feats, coords)

                    if len(ins_obb) < 2:
                        continue

                    pred_obbs.append(ins_obb)
                    pts_batch.append(pt_inst)

                if i_instance == (object_id + 1):
                    ref_target.append(1)
                else:
                    ref_target.append(0)
            else:
                scene_points = point_cloud[ind]

        target_bboxes_semcls = np.zeros((MAX_NUM_OBJ))
        try:
            target_bboxes_semcls[0:num_bbox] = [
                DC.nyu40id2class[int(x)]
                for x in instance_bboxes[:, -2][0:num_bbox]
            ]
        except KeyError:
            pass

        pc = point_cloud[:, :3]
        coords, feats = sparse_quantize(pc,
                                        point_cloud,
                                        quantization_size=self.voxel_size_glp)
        pt = SparseTensor(feats, coords)

        data_dict = {}
        data_dict['lidar'] = pt
        data_dict['pts_batch'] = pts_batch
        data_dict['pred_obb_batch'] = pred_obbs
        data_dict['scene_points'] = [scene_points]
        data_dict['point_min'] = point_cloud.min(0)[:3]
        data_dict['point_max'] = point_cloud.max(0)[:3]
        data_dict['instance_labels'] = instance_labels.astype(np.int64)
        data_dict['instance_points'] = instance_points
        data_dict['instance_class'] = instance_class
        data_dict['instance_obbs'] = ins_obbs
        data_dict["point_clouds"] = point_cloud.astype(
            np.float32)  # point cloud data including features
        data_dict["lang_feat"] = lang_feat.astype(
            np.float32)  # language feature vectors
        data_dict["lang_token"] = lang_token
        data_dict["lang_len"] = np.array(lang_len).astype(
            np.int64)  # length of each description
        data_dict["center_label"] = target_bboxes.astype(
            np.float32)[:, 0:3]  # (MAX_NUM_OBJ, 3) for GT box center XYZ
        data_dict["heading_class_label"] = angle_classes.astype(
            np.int64
        )  # (MAX_NUM_OBJ,) with int values in 0,...,NUM_HEADING_BIN-1
        data_dict["heading_residual_label"] = angle_residuals.astype(
            np.float32)  # (MAX_NUM_OBJ,)
        data_dict["size_class_label"] = size_classes.astype(
            np.int64
        )  # (MAX_NUM_OBJ,) with int values in 0,...,NUM_SIZE_CLUSTER
        data_dict["size_residual_label"] = size_residuals.astype(
            np.float32)  # (MAX_NUM_OBJ, 3)
        data_dict["num_bbox"] = np.array(num_bbox).astype(np.int64)
        data_dict["scan_idx"] = np.array(idx).astype(np.int64)
        data_dict["pcl_color"] = pcl_color
        data_dict["ref_box_label"] = ref_box_label.astype(
            np.int64)  # 0/1 reference labels for each object bbox
        data_dict["ref_center_label"] = ref_center_label.astype(np.float32)
        data_dict["ref_heading_class_label"] = np.array(
            int(ref_heading_class_label)).astype(np.int64)
        data_dict["ref_heading_residual_label"] = np.array(
            int(ref_heading_residual_label)).astype(np.int64)
        data_dict["ref_size_class_label"] = np.array(
            int(ref_size_class_label)).astype(np.int64)
        data_dict["ref_size_residual_label"] = ref_size_residual_label.astype(
            np.float32)
        data_dict["object_id"] = np.array(int(object_id)).astype(np.int64)
        data_dict["ann_id"] = np.array(ann_id).astype(np.int64)
        data_dict["object_cat"] = np.array(object_cat).astype(np.int64)
        data_dict["unique_multiple"] = np.array(
            self.unique_multiple_lookup[scene_id][str(object_id)][str(
                ann_id)]).astype(np.int64)

        return data_dict