def __getitem__(self, idx): """ Returns a dict with following keys: point_clouds: (N,3+C) center_label: (MAX_NUM_OBJ,3) for GT box center XYZ heading_class_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_HEADING_BIN-1 heading_residual_label: (MAX_NUM_OBJ,) size_classe_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_SIZE_CLUSTER size_residual_label: (MAX_NUM_OBJ,3) sem_cls_label: (MAX_NUM_OBJ,) semantic class index box_label_mask: (MAX_NUM_OBJ) as 0/1 with 1 indicating a unique box vote_label: (N,9) with votes XYZ (3 votes: X1Y1Z1, X2Y2Z2, X3Y3Z3) if there is only one vote than X1==X2==X3 etc. vote_label_mask: (N,) with 0/1 with 1 indicating the point is in one of the object's OBB. scan_idx: int scan index in scan_names list """ scan_name = self.scan_names[idx] point_cloud = np.load( os.path.join(self.data_path, scan_name) + '_pc.npz')['pc'] # Nx6 bboxes = np.load( os.path.join(self.data_path, scan_name) + '_bbox.npy') # K,8 point_votes = np.load( os.path.join(self.data_path, scan_name) + '_votes.npz')['point_votes'] # Nx10 if not self.use_color: point_cloud = point_cloud[:, 0:3] else: point_cloud = point_cloud[:, 0:6] point_cloud[:, 3:] = (point_cloud[:, 3:] - MEAN_COLOR_RGB) if self.use_height: floor_height = np.percentile(point_cloud[:, 2], 0.99) height = point_cloud[:, 2] - floor_height point_cloud = np.concatenate( [point_cloud, np.expand_dims(height, 1)], 1) # (N,4) or (N,7) #ema_point_cloud = pc_util.random_sampling(point_cloud, self.num_points, return_choices=False) #2021.2.28 raw_points = point_cloud.copy() #2021.2.28 # ------------------------------- DATA AUGMENTATION ------------------------------ flip_x_axis = 0 flip_y_axis = 0 flip_x_axis_ema = 0 #2021.2.28 flip_y_axis_ema = 0 #2021.2.28 rot_mat = np.identity(3) scale_ratio = np.ones((1, 3)) if self.augment: if np.random.random() > 0.5: # Flipping along the YZ plane flip_x_axis = 1 point_cloud[:, 0] = -1 * point_cloud[:, 0] bboxes[:, 0] = -1 * bboxes[:, 0] bboxes[:, 6] = np.pi - bboxes[:, 6] point_votes[:, [1, 4, 7]] = -1 * point_votes[:, [1, 4, 7]] # Rotation along up-axis/Z-axis #TODO: set different degree range (keep consistent with scannet?) rot_angle = (np.random.random() * np.pi / 3) - np.pi / 6 # -30 ~ +30 degree rot_mat = sunrgbd_utils.rotz(rot_angle) point_votes_end = np.zeros_like(point_votes) point_votes_end[:, 1:4] = np.dot( point_cloud[:, 0:3] + point_votes[:, 1:4], np.transpose(rot_mat)) point_votes_end[:, 4:7] = np.dot( point_cloud[:, 0:3] + point_votes[:, 4:7], np.transpose(rot_mat)) point_votes_end[:, 7:10] = np.dot( point_cloud[:, 0:3] + point_votes[:, 7:10], np.transpose(rot_mat)) point_cloud[:, 0:3] = np.dot(point_cloud[:, 0:3], np.transpose(rot_mat)) bboxes[:, 0:3] = np.dot(bboxes[:, 0:3], np.transpose(rot_mat)) bboxes[:, 6] -= rot_angle point_votes[:, 1:4] = point_votes_end[:, 1:4] - point_cloud[:, 0:3] point_votes[:, 4:7] = point_votes_end[:, 4:7] - point_cloud[:, 0:3] point_votes[:, 7:10] = point_votes_end[:, 7:10] - point_cloud[:, 0:3] # TODO: turn on scale augmentation (keep consistent in scannet?) # Augment point cloud scale: 0.85x-1.15x scale_ratio = np.random.random() * 0.3 + 0.85 scale_ratio = np.expand_dims(np.tile(scale_ratio, 3), 0) point_cloud[:, 0:3] *= scale_ratio bboxes[:, 0:3] *= scale_ratio bboxes[:, 3:6] *= scale_ratio point_votes[:, 1:4] *= scale_ratio point_votes[:, 4:7] *= scale_ratio point_votes[:, 7:10] *= scale_ratio if self.use_height: point_cloud[:, -1] *= scale_ratio[0, 0] # ------------------------------- LABELS ------------------------------ target_bboxes = np.zeros((MAX_NUM_OBJ, 6)) target_bboxes_mask = np.zeros((MAX_NUM_OBJ)) angle_classes = np.zeros((MAX_NUM_OBJ, )) angle_residuals = np.zeros((MAX_NUM_OBJ, )) size_classes = np.zeros((MAX_NUM_OBJ, )) size_residuals = np.zeros((MAX_NUM_OBJ, 3)) target_bboxes_semcls = np.zeros((MAX_NUM_OBJ)) target_bboxes_mask[0:bboxes.shape[0]] = 1 target_bboxes[0:bboxes.shape[0], :] = bboxes[:, 0:6] for i in range(bboxes.shape[0]): bbox = bboxes[i] semantic_class = bbox[7] angle_class, angle_residual = DC.angle2class(bbox[6]) # NOTE: The mean size stored in size2class is of full length of box edges, # while in sunrgbd_data.py data dumping we dumped *half* length l,w,h.. so have to time it by 2 here box3d_size = bbox[3:6] * 2 size_class, size_residual = DC.size2class( box3d_size, DC.class2type[semantic_class]) angle_classes[i] = angle_class angle_residuals[i] = angle_residual size_classes[i] = size_class size_residuals[i] = size_residual target_bboxes_semcls[i] = semantic_class point_cloud, choices = pc_util.random_sampling(point_cloud, self.num_points, return_choices=True) point_votes_mask = point_votes[choices, 0] point_votes = point_votes[choices, 1:] ema_point_cloud = raw_points[choices] #2021.2.28 if self.augment: #2021.2.28 if np.random.random() > 0.5: #2021.2.28 # Flipping along the YZ plane flip_x_axis_ema = 1 ema_point_cloud[:, 0] = -1 * ema_point_cloud[:, 0] ret_dict = {} ret_dict['point_clouds'] = point_cloud.astype(np.float32) ret_dict['center_label'] = target_bboxes.astype(np.float32)[:, 0:3] ret_dict['heading_class_label'] = angle_classes.astype(np.int64) ret_dict['heading_residual_label'] = angle_residuals.astype(np.float32) ret_dict['size_class_label'] = size_classes.astype(np.int64) ret_dict['size_residual_label'] = size_residuals.astype(np.float32) ret_dict['sem_cls_label'] = target_bboxes_semcls.astype(np.int64) ret_dict['box_label_mask'] = target_bboxes_mask.astype(np.float32) ret_dict['vote_label'] = point_votes.astype(np.float32) ret_dict['vote_label_mask'] = point_votes_mask.astype(np.int64) ret_dict['scan_idx'] = np.array(idx).astype(np.int64) ret_dict['supervised_mask'] = np.array(1).astype(np.int64) ret_dict['ema_point_clouds'] = ema_point_cloud.astype(np.float32) ret_dict['flip_x_axis'] = np.array(flip_x_axis).astype(np.int64) ret_dict['flip_y_axis'] = np.array(flip_y_axis).astype(np.int64) ret_dict['rot_mat'] = rot_mat.astype(np.float32) ret_dict['scale'] = np.array(scale_ratio).astype(np.float32) ret_dict['flip_x_axis_ema'] = np.array(flip_x_axis_ema).astype( np.int64) #2021.2.28 ret_dict['flip_y_axis_ema'] = np.array(flip_y_axis_ema).astype( np.int64) #2021.2.28 return ret_dict
def __getitem__(self, idx): """ Returns a dict with following keys: point_clouds: (N,3+C) center_label: (MAX_NUM_OBJ,3) for GT box center XYZ heading_class_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_HEADING_BIN-1 heading_residual_label: (MAX_NUM_OBJ,) size_classe_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_SIZE_CLUSTER size_residual_label: (MAX_NUM_OBJ,3) sem_cls_label: (MAX_NUM_OBJ,) semantic class index box_label_mask: (MAX_NUM_OBJ) as 0/1 with 1 indicating a unique box vote_label: (N,9) with votes XYZ (3 votes: X1Y1Z1, X2Y2Z2, X3Y3Z3) if there is only one vote than X1==X2==X3 etc. vote_label_mask: (N,) with 0/1 with 1 indicating the point is in one of the object's OBB. scan_idx: int scan index in scan_names list max_gt_bboxes: unused """ scan_name = self.scan_names[idx] point_cloud = np.load( os.path.join(self.data_path, scan_name) + '_pc.npz')['pc'] # Nx6 bboxes = np.load( os.path.join(self.data_path, scan_name) + '_bbox.npy') # K,8 point_votes = np.load( os.path.join(self.data_path, scan_name) + '_votes.npz')['point_votes'] # Nx10 bbox2ds = np.load( os.path.join(self.data_path, scan_name) + '_bbox2d.npy') bbox2d_probs = np.load( os.path.join(self.data_path, scan_name) + '_bbox2d_prob.npy') calib_Rtilt = np.load( os.path.join(self.data_path, scan_name) + '_calib_Rtilt.npy') calib_K = np.load( os.path.join(self.data_path, scan_name) + '_calib_K.npy') if self.use_color and self.use_box2d: raise NotImplemented( 'color and 2d bounding box at the same time is not implemented' ) if not self.use_color: #point_cloud = point_cloud[:,0:3] point_cloud = get_box2d_feature(point_cloud, bbox2ds, bbox2d_probs, calib_Rtilt, calib_K) else: point_cloud = point_cloud[:, 0:6] point_cloud[:, 3:] = (point_cloud[:, 3:] - MEAN_COLOR_RGB) if self.use_height: floor_height = np.percentile(point_cloud[:, 2], 0.99) height = point_cloud[:, 2] - floor_height point_cloud = np.concatenate( [point_cloud, np.expand_dims(height, 1)], 1) # (N,4) or (N,7) # ------------------------------- DATA AUGMENTATION ------------------------------ if self.augment: if np.random.random() > 0.5: # Flipping along the YZ plane point_cloud[:, 0] = -1 * point_cloud[:, 0] bboxes[:, 0] = -1 * bboxes[:, 0] bboxes[:, 6] = np.pi - bboxes[:, 6] point_votes[:, [1, 4, 7]] = -1 * point_votes[:, [1, 4, 7]] # Rotation along up-axis/Z-axis rot_angle = (np.random.random() * np.pi / 3) - np.pi / 6 # -30 ~ +30 degree rot_mat = sunrgbd_utils.rotz(rot_angle) point_votes_end = np.zeros_like(point_votes) point_votes_end[:, 1:4] = np.dot( point_cloud[:, 0:3] + point_votes[:, 1:4], np.transpose(rot_mat)) point_votes_end[:, 4:7] = np.dot( point_cloud[:, 0:3] + point_votes[:, 4:7], np.transpose(rot_mat)) point_votes_end[:, 7:10] = np.dot( point_cloud[:, 0:3] + point_votes[:, 7:10], np.transpose(rot_mat)) point_cloud[:, 0:3] = np.dot(point_cloud[:, 0:3], np.transpose(rot_mat)) bboxes[:, 0:3] = np.dot(bboxes[:, 0:3], np.transpose(rot_mat)) bboxes[:, 6] -= rot_angle point_votes[:, 1:4] = point_votes_end[:, 1:4] - point_cloud[:, 0:3] point_votes[:, 4:7] = point_votes_end[:, 4:7] - point_cloud[:, 0:3] point_votes[:, 7:10] = point_votes_end[:, 7:10] - point_cloud[:, 0:3] # Augment RGB color if self.use_color: rgb_color = point_cloud[:, 3:6] + MEAN_COLOR_RGB rgb_color *= (1 + 0.4 * np.random.random(3) - 0.2 ) # brightness change for each channel rgb_color += (0.1 * np.random.random(3) - 0.05 ) # color shift for each channel rgb_color += np.expand_dims( (0.05 * np.random.random(point_cloud.shape[0]) - 0.025), -1) # jittering on each pixel rgb_color = np.clip(rgb_color, 0, 1) # randomly drop out 30% of the points' colors rgb_color *= np.expand_dims( np.random.random(point_cloud.shape[0]) > 0.3, -1) point_cloud[:, 3:6] = rgb_color - MEAN_COLOR_RGB # Augment point cloud scale: 0.85x-1.15x scale_ratio = np.random.random() * 0.3 + 0.85 scale_ratio = np.expand_dims(np.tile(scale_ratio, 3), 0) point_cloud[:, 0:3] *= scale_ratio bboxes[:, 0:3] *= scale_ratio bboxes[:, 3:6] *= scale_ratio point_votes[:, 1:4] *= scale_ratio point_votes[:, 4:7] *= scale_ratio point_votes[:, 7:10] *= scale_ratio if self.use_height: point_cloud[:, -1] *= scale_ratio[0, 0] # ------------------------------- LABELS ------------------------------ box3d_centers = np.zeros((MAX_NUM_OBJ, 3)) box3d_sizes = np.zeros((MAX_NUM_OBJ, 3)) angle_classes = np.zeros((MAX_NUM_OBJ, )) angle_residuals = np.zeros((MAX_NUM_OBJ, )) size_classes = np.zeros((MAX_NUM_OBJ, )) size_residuals = np.zeros((MAX_NUM_OBJ, 3)) label_mask = np.zeros((MAX_NUM_OBJ)) label_mask[0:bboxes.shape[0]] = 1 max_bboxes = np.zeros((MAX_NUM_OBJ, 8)) max_bboxes[0:bboxes.shape[0], :] = bboxes for i in range(bboxes.shape[0]): bbox = bboxes[i] semantic_class = bbox[7] box3d_center = bbox[0:3] angle_class, angle_residual = DC.angle2class(bbox[6]) # NOTE: The mean size stored in size2class is of full length of box edges, # while in sunrgbd_data.py data dumping we dumped *half* length l,w,h.. so have to time it by 2 here box3d_size = bbox[3:6] * 2 size_class, size_residual = DC.size2class( box3d_size, DC.class2type[semantic_class]) box3d_centers[i, :] = box3d_center angle_classes[i] = angle_class angle_residuals[i] = angle_residual size_classes[i] = size_class size_residuals[i] = size_residual box3d_sizes[i, :] = box3d_size target_bboxes_mask = label_mask target_bboxes = np.zeros((MAX_NUM_OBJ, 6)) for i in range(bboxes.shape[0]): bbox = bboxes[i] corners_3d = sunrgbd_utils.my_compute_box_3d( bbox[0:3], bbox[3:6], bbox[6]) # compute axis aligned box xmin = np.min(corners_3d[:, 0]) ymin = np.min(corners_3d[:, 1]) zmin = np.min(corners_3d[:, 2]) xmax = np.max(corners_3d[:, 0]) ymax = np.max(corners_3d[:, 1]) zmax = np.max(corners_3d[:, 2]) target_bbox = np.array([(xmin + xmax) / 2, (ymin + ymax) / 2, (zmin + zmax) / 2, xmax - xmin, ymax - ymin, zmax - zmin]) target_bboxes[i, :] = target_bbox point_cloud, choices = pc_util.random_sampling(point_cloud, self.num_points, return_choices=True) point_votes_mask = point_votes[choices, 0] point_votes = point_votes[choices, 1:] ret_dict = {} ret_dict['point_clouds'] = point_cloud.astype(np.float32) ret_dict['center_label'] = target_bboxes.astype(np.float32)[:, 0:3] ret_dict['heading_class_label'] = angle_classes.astype(np.int64) ret_dict['heading_residual_label'] = angle_residuals.astype(np.float32) ret_dict['size_class_label'] = size_classes.astype(np.int64) ret_dict['size_residual_label'] = size_residuals.astype(np.float32) target_bboxes_semcls = np.zeros((MAX_NUM_OBJ)) target_bboxes_semcls[0:bboxes.shape[0]] = bboxes[:, -1] # from 0 to 9 ret_dict['sem_cls_label'] = target_bboxes_semcls.astype(np.int64) ret_dict['box_label_mask'] = target_bboxes_mask.astype(np.float32) ret_dict['vote_label'] = point_votes.astype(np.float32) ret_dict['vote_label_mask'] = point_votes_mask.astype(np.int64) ret_dict['scan_idx'] = np.array(idx).astype(np.int64) ret_dict['max_gt_bboxes'] = max_bboxes # new for box2d #ret_dict['bbox2ds'] = bbox2ds.astype(np.float32) #ret_dict['bbox2d_probs'] = bbox2d_probs.astype(np.float32) #ret_dict['calib_Rtilt'] = calib_Rtilt.astype(np.float32) #ret_dict['calib_K'] = calib_K.astype(np.float32) return ret_dict
def __getitem__(self, idx): """ Returns a dict with following keys: point_clouds: (N,3+C) center_label: (MAX_NUM_OBJ,3) for GT box center XYZ heading_class_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_HEADING_BIN-1 heading_residual_label: (MAX_NUM_OBJ,) size_classe_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_SIZE_CLUSTER size_residual_label: (MAX_NUM_OBJ,3) sem_cls_label: (MAX_NUM_OBJ,) semantic class index box_label_mask: (MAX_NUM_OBJ) as 0/1 with 1 indicating a unique box vote_label: (N,9) with votes XYZ (3 votes: X1Y1Z1, X2Y2Z2, X3Y3Z3) if there is only one vote than X1==X2==X3 etc. vote_label_mask: (N,) with 0/1 with 1 indicating the point is in one of the object's OBB. scan_idx: int scan index in scan_names list max_gt_bboxes: unused """ scan_name = self.scan_names[idx] point_cloud = np.load(os.path.join(self.data_path, scan_name)+'_pc.npz')['pc'] # Nx6 bboxes = np.load(os.path.join(self.data_path, scan_name)+'_bbox.npy') # K,8 point_votes = np.load(os.path.join(self.data_path, scan_name)+'_votes.npz')['point_votes'] # Nx10 if self.use_imvote: # Read camera parameters calib_lines = [line for line in open(os.path.join(self.raw_data_path, 'calib', scan_name+'.txt')).readlines()] calib_Rtilt = np.reshape(np.array([float(x) for x in calib_lines[0].rstrip().split(' ')]), (3,3), 'F') calib_K = np.reshape(np.array([float(x) for x in calib_lines[1].rstrip().split(' ')]), (3,3), 'F') # Read image full_img = sunrgbd_utils.load_image(os.path.join(self.raw_data_path, 'image', scan_name+'.jpg')) full_img_height = full_img.shape[0] full_img_width = full_img.shape[1] # ------------------------------- 2D IMAGE VOTES ------------------------------ cls_id_list = self.cls_id_map[scan_name] cls_score_list = self.cls_score_map[scan_name] bbox_2d_list = self.bbox_2d_map[scan_name] obj_img_list = [] for i2d, (cls2d, box2d) in enumerate(zip(cls_id_list, bbox_2d_list)): xmin, ymin, xmax, ymax = box2d # During training we randomly drop 2D boxes to reduce over-fitting if self.train and np.random.random()>0.5: continue obj_img = full_img[ymin:ymax, xmin:xmax, :] obj_h = obj_img.shape[0] obj_w = obj_img.shape[1] # Bounding box coordinates (4 values), class id, index to the semantic cues meta_data = (xmin, ymin, obj_h, obj_w, cls2d, i2d) if obj_h == 0 or obj_w == 0: continue # Use 2D box center as approximation uv_centroid = np.array([int(obj_w/2), int(obj_h/2)]) uv_centroid = np.expand_dims(uv_centroid, 0) v_coords, u_coords = np.meshgrid(range(obj_h), range(obj_w), indexing='ij') img_vote = np.transpose(np.array([u_coords, v_coords]), (1,2,0)) img_vote = np.expand_dims(uv_centroid, 0) - img_vote obj_img_list.append((meta_data, img_vote)) full_img_votes = np.zeros((full_img_height,full_img_width,self.vote_dims), dtype=np.float32) # Empty votes: 2d box index is set to -1 full_img_votes[:,:,3::4] = -1. for obj_img_data in obj_img_list: meta_data, img_vote = obj_img_data u0, v0, h, w, cls2d, i2d = meta_data for u in range(u0, u0+w): for v in range(v0, v0+h): iidx = int(full_img_votes[v,u,0]) if iidx >= self.max_imvote_per_pixel: continue full_img_votes[v,u,(1+iidx*4):(1+iidx*4+2)] = img_vote[v-v0,u-u0,:] full_img_votes[v,u,(1+iidx*4+2)] = cls2d full_img_votes[v,u,(1+iidx*4+3)] = i2d + 1 # add +1 here as we need a dummy feature for pixels outside all boxes full_img_votes[v0:(v0+h), u0:(u0+w), 0] += 1 full_img_votes_1d = np.zeros((MAX_NUM_PIXEL*self.vote_dims), dtype=np.float32) full_img_votes_1d[0:full_img_height*full_img_width*self.vote_dims] = full_img_votes.flatten() # Semantic cues: one-hot vector for class scores cls_score_feats = np.zeros((1+MAX_NUM_2D_DET,NUM_CLS), dtype=np.float32) # First row is dumpy feature len_obj = len(cls_id_list) if len_obj: ind_obj = np.arange(1,len_obj+1) ind_cls = np.array(cls_id_list) cls_score_feats[ind_obj, ind_cls] = np.array(cls_score_list) # Texture cues: normalized RGB values full_img = (full_img - 128.) / 255. # Serialize data to 1D and save image size so that we can recover the original location in the image full_img_1d = np.zeros((MAX_NUM_PIXEL*3), dtype=np.float32) full_img_1d[:full_img_height*full_img_width*3] = full_img.flatten() if not self.use_color: point_cloud = point_cloud[:,0:3] else: point_cloud = point_cloud[:,0:6] point_cloud[:,3:] = (point_cloud[:,3:]-MEAN_COLOR_RGB) if self.use_height: floor_height = np.percentile(point_cloud[:,2],0.99) height = point_cloud[:,2] - floor_height point_cloud = np.concatenate([point_cloud, np.expand_dims(height, 1)],1) # (N,4) or (N,7) # ------------------------------- DATA AUGMENTATION ------------------------------ scale_ratio = 1. if self.augment: flip_flag = (np.random.random()>0.5) if flip_flag: # Flipping along the YZ plane point_cloud[:,0] = -1 * point_cloud[:,0] bboxes[:,0] = -1 * bboxes[:,0] bboxes[:,6] = np.pi - bboxes[:,6] point_votes[:,[1,4,7]] = -1 * point_votes[:,[1,4,7]] # Rotation along up-axis/Z-axis rot_angle = (np.random.random()*np.pi/3) - np.pi/6 # -30 ~ +30 degree rot_mat = sunrgbd_utils.rotz(rot_angle) point_votes_end = np.zeros_like(point_votes) point_votes_end[:,1:4] = np.dot(point_cloud[:,0:3] + point_votes[:,1:4], np.transpose(rot_mat)) point_votes_end[:,4:7] = np.dot(point_cloud[:,0:3] + point_votes[:,4:7], np.transpose(rot_mat)) point_votes_end[:,7:10] = np.dot(point_cloud[:,0:3] + point_votes[:,7:10], np.transpose(rot_mat)) point_cloud[:,0:3] = np.dot(point_cloud[:,0:3], np.transpose(rot_mat)) bboxes[:,0:3] = np.dot(bboxes[:,0:3], np.transpose(rot_mat)) bboxes[:,6] -= rot_angle point_votes[:,1:4] = point_votes_end[:,1:4] - point_cloud[:,0:3] point_votes[:,4:7] = point_votes_end[:,4:7] - point_cloud[:,0:3] point_votes[:,7:10] = point_votes_end[:,7:10] - point_cloud[:,0:3] if self.use_imvote: R_inverse = np.copy(np.transpose(rot_mat)) if flip_flag: R_inverse[0,:] *= -1 # Update Rtilt according to the augmentation # R_inverse (3x3) * point (3x1) transforms an augmented depth point # to original point in upright_depth coordinates calib_Rtilt = np.dot(np.transpose(R_inverse), calib_Rtilt) # Augment RGB color if self.use_color: rgb_color = point_cloud[:,3:6] + MEAN_COLOR_RGB rgb_color *= (1+0.4*np.random.random(3)-0.2) # brightness change for each channel rgb_color += (0.1*np.random.random(3)-0.05) # color shift for each channel rgb_color += np.expand_dims((0.05*np.random.random(point_cloud.shape[0])-0.025), -1) # jittering on each pixel rgb_color = np.clip(rgb_color, 0, 1) # randomly drop out 30% of the points' colors rgb_color *= np.expand_dims(np.random.random(point_cloud.shape[0])>0.3,-1) point_cloud[:,3:6] = rgb_color - MEAN_COLOR_RGB # Augment point cloud scale: 0.85x-1.15x scale_ratio = np.random.random()*0.3+0.85 if self.use_imvote: calib_Rtilt = np.dot(np.array([[scale_ratio,0,0],[0,scale_ratio,0],[0,0,scale_ratio]]), calib_Rtilt) scale_ratio_expand = np.expand_dims(np.tile(scale_ratio,3),0) point_cloud[:,0:3] *= scale_ratio_expand bboxes[:,0:3] *= scale_ratio_expand bboxes[:,3:6] *= scale_ratio_expand point_votes[:,1:4] *= scale_ratio_expand point_votes[:,4:7] *= scale_ratio_expand point_votes[:,7:10] *= scale_ratio_expand if self.use_height: point_cloud[:,-1] *= scale_ratio # ------------------------------- LABELS ------------------------------ box3d_centers = np.zeros((MAX_NUM_OBJ, 3)) box3d_sizes = np.zeros((MAX_NUM_OBJ, 3)) angle_classes = np.zeros((MAX_NUM_OBJ,)) angle_residuals = np.zeros((MAX_NUM_OBJ,)) size_classes = np.zeros((MAX_NUM_OBJ,)) size_residuals = np.zeros((MAX_NUM_OBJ, 3)) label_mask = np.zeros((MAX_NUM_OBJ)) label_mask[0:bboxes.shape[0]] = 1 max_bboxes = np.zeros((MAX_NUM_OBJ, 8)) max_bboxes[0:bboxes.shape[0],:] = bboxes for i in range(bboxes.shape[0]): bbox = bboxes[i] semantic_class = bbox[7] box3d_center = bbox[0:3] angle_class, angle_residual = DC.angle2class(bbox[6]) # NOTE: The mean size stored in size2class is of full length of box edges, # while in sunrgbd_data.py data dumping we dumped *half* length l,w,h.. so have to time it by 2 here box3d_size = bbox[3:6]*2 size_class, size_residual = DC.size2class(box3d_size, DC.class2type[semantic_class]) box3d_centers[i,:] = box3d_center angle_classes[i] = angle_class angle_residuals[i] = angle_residual size_classes[i] = size_class size_residuals[i] = size_residual box3d_sizes[i,:] = box3d_size target_bboxes_mask = label_mask target_bboxes = np.zeros((MAX_NUM_OBJ, 6)) for i in range(bboxes.shape[0]): bbox = bboxes[i] corners_3d = sunrgbd_utils.my_compute_box_3d(bbox[0:3], bbox[3:6], bbox[6]) # compute axis aligned box xmin = np.min(corners_3d[:,0]) ymin = np.min(corners_3d[:,1]) zmin = np.min(corners_3d[:,2]) xmax = np.max(corners_3d[:,0]) ymax = np.max(corners_3d[:,1]) zmax = np.max(corners_3d[:,2]) target_bbox = np.array([(xmin+xmax)/2, (ymin+ymax)/2, (zmin+zmax)/2, xmax-xmin, ymax-ymin, zmax-zmin]) target_bboxes[i,:] = target_bbox point_cloud, choices = pc_util.random_sampling(point_cloud, self.num_points, return_choices=True) point_votes_mask = point_votes[choices,0] point_votes = point_votes[choices,1:] ret_dict = {} ret_dict['point_clouds'] = point_cloud.astype(np.float32) ret_dict['center_label'] = target_bboxes.astype(np.float32)[:,0:3] ret_dict['heading_class_label'] = angle_classes.astype(np.int64) ret_dict['heading_residual_label'] = angle_residuals.astype(np.float32) ret_dict['size_class_label'] = size_classes.astype(np.int64) ret_dict['size_residual_label'] = size_residuals.astype(np.float32) target_bboxes_semcls = np.zeros((MAX_NUM_OBJ)) target_bboxes_semcls[0:bboxes.shape[0]] = bboxes[:,-1] # from 0 to 9 ret_dict['sem_cls_label'] = target_bboxes_semcls.astype(np.int64) ret_dict['box_label_mask'] = target_bboxes_mask.astype(np.float32) ret_dict['vote_label'] = point_votes.astype(np.float32) ret_dict['vote_label_mask'] = point_votes_mask.astype(np.int64) ret_dict['scan_idx'] = np.array(idx).astype(np.int64) ret_dict['max_gt_bboxes'] = max_bboxes if self.use_imvote: ret_dict['scale'] = np.array(scale_ratio).astype(np.float32) ret_dict['calib_Rtilt'] = calib_Rtilt.astype(np.float32) ret_dict['calib_K'] = calib_K.astype(np.float32) ret_dict['full_img_width'] = np.array(full_img_width).astype(np.int64) ret_dict['cls_score_feats'] = cls_score_feats.astype(np.float32) ret_dict['full_img_votes_1d'] = full_img_votes_1d.astype(np.float32) ret_dict['full_img_1d'] = full_img_1d.astype(np.float32) return ret_dict
def __getitem__(self, idx): """ Returns a dict with following keys: point_clouds: (N,3+C) center_label: (MAX_NUM_OBJ,3) for GT box center XYZ sem_cls_label: (MAX_NUM_OBJ,) semantic class index heading_class_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_HEADING_BIN-1 heading_residual_label: (MAX_NUM_OBJ,) size_classe_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_SIZE_CLUSTER size_residual_label: (MAX_NUM_OBJ,3) box_label_mask: (MAX_NUM_OBJ) as 0/1 with 1 indicating a unique box point_obj_mask: (N,) with 0/1 with 1 indicating the point is in one of the object's OBB. point_instance_label: (N,) with int values in -1,...,num_box, indicating which object the point belongs to, -1 means a backgound point. scan_idx: int scan index in scan_names list max_gt_bboxes: unused """ point_cloud = self.point_cloud_list[idx] # Nx6 bboxes = self.bboxes_list[idx] # K,8 point_obj_mask = self.point_labels_list[idx][:, 0] point_instance_label = self.point_labels_list[idx][:, -1] if not self.use_color: point_cloud = point_cloud[:, 0:3] else: point_cloud = point_cloud[:, 0:6] point_cloud[:, 3:] = (point_cloud[:, 3:] - MEAN_COLOR_RGB) if self.use_height: floor_height = np.percentile(point_cloud[:, 2], 0.99) height = point_cloud[:, 2] - floor_height point_cloud = np.concatenate( [point_cloud, np.expand_dims(height, 1)], 1) # (N,4) or (N,7) # ------------------------------- DATA AUGMENTATION ------------------------------ if self.augment: if np.random.random() > 0.5: # Flipping along the YZ plane point_cloud[:, 0] = -1 * point_cloud[:, 0] bboxes[:, 0] = -1 * bboxes[:, 0] bboxes[:, 6] = np.pi - bboxes[:, 6] # Rotation along up-axis/Z-axis rot_angle = (np.random.random() * np.pi / 3) - np.pi / 6 # -30 ~ +30 degree rot_mat = sunrgbd_utils.rotz(rot_angle) point_cloud[:, 0:3] = np.dot(point_cloud[:, 0:3], np.transpose(rot_mat)) bboxes[:, 0:3] = np.dot(bboxes[:, 0:3], np.transpose(rot_mat)) bboxes[:, 6] -= rot_angle # Augment RGB color if self.use_color: rgb_color = point_cloud[:, 3:6] + MEAN_COLOR_RGB rgb_color *= (1 + 0.4 * np.random.random(3) - 0.2 ) # brightness change for each channel rgb_color += (0.1 * np.random.random(3) - 0.05 ) # color shift for each channel rgb_color += np.expand_dims( (0.05 * np.random.random(point_cloud.shape[0]) - 0.025), -1) # jittering on each pixel rgb_color = np.clip(rgb_color, 0, 1) # randomly drop out 30% of the points' colors rgb_color *= np.expand_dims( np.random.random(point_cloud.shape[0]) > 0.3, -1) point_cloud[:, 3:6] = rgb_color - MEAN_COLOR_RGB # Augment point cloud scale: 0.85x-1.15x scale_ratio = np.random.random() * 0.3 + 0.85 scale_ratio = np.expand_dims(np.tile(scale_ratio, 3), 0) point_cloud[:, 0:3] *= scale_ratio bboxes[:, 0:3] *= scale_ratio bboxes[:, 3:6] *= scale_ratio if self.use_height: point_cloud[:, -1] *= scale_ratio[0, 0] # ------------------------------- LABELS ------------------------------ box3d_centers = np.zeros((MAX_NUM_OBJ, 3)) box3d_sizes = np.zeros((MAX_NUM_OBJ, 3)) angle_classes = np.zeros((MAX_NUM_OBJ, )) angle_residuals = np.zeros((MAX_NUM_OBJ, )) size_classes = np.zeros((MAX_NUM_OBJ, )) size_residuals = np.zeros((MAX_NUM_OBJ, 3)) label_mask = np.zeros((MAX_NUM_OBJ)) label_mask[0:bboxes.shape[0]] = 1 max_bboxes = np.zeros((MAX_NUM_OBJ, 8)) max_bboxes[0:bboxes.shape[0], :] = bboxes for i in range(bboxes.shape[0]): bbox = bboxes[i] semantic_class = bbox[7] box3d_center = bbox[0:3] angle_class, angle_residual = DC.angle2class(bbox[6]) # NOTE: The mean size stored in size2class is of full length of box edges, # while in sunrgbd_data.py data dumping we dumped *half* length l,w,h.. so have to time it by 2 here box3d_size = bbox[3:6] * 2 size_class, size_residual = DC.size2class( box3d_size, DC.class2type[semantic_class]) box3d_centers[i, :] = box3d_center angle_classes[i] = angle_class angle_residuals[i] = angle_residual size_classes[i] = size_class size_residuals[i] = size_residual box3d_sizes[i, :] = box3d_size target_bboxes_mask = label_mask target_bboxes = np.zeros((MAX_NUM_OBJ, 6)) target_bboxes[:, 0:3] += 1000.0 size_gts = np.zeros((MAX_NUM_OBJ, 3)) for i in range(bboxes.shape[0]): bbox = bboxes[i] corners_3d = sunrgbd_utils.my_compute_box_3d( bbox[0:3], bbox[3:6], bbox[6]) # compute axis aligned box xmin = np.min(corners_3d[:, 0]) ymin = np.min(corners_3d[:, 1]) zmin = np.min(corners_3d[:, 2]) xmax = np.max(corners_3d[:, 0]) ymax = np.max(corners_3d[:, 1]) zmax = np.max(corners_3d[:, 2]) target_bbox = np.array([(xmin + xmax) / 2, (ymin + ymax) / 2, (zmin + zmax) / 2, xmax - xmin, ymax - ymin, zmax - zmin]) target_bboxes[i, :] = target_bbox size_gts[i, :] = target_bbox[3:6] point_cloud, choices = pc_util.random_sampling(point_cloud, self.num_points, return_choices=True) point_obj_mask = point_obj_mask[choices] point_instance_label = point_instance_label[choices] ret_dict = {} ret_dict['point_clouds'] = point_cloud.astype(np.float32) ret_dict['center_label'] = target_bboxes.astype(np.float32)[:, 0:3] ret_dict['heading_class_label'] = angle_classes.astype(np.int64) ret_dict['heading_residual_label'] = angle_residuals.astype(np.float32) ret_dict['size_class_label'] = size_classes.astype(np.int64) ret_dict['size_residual_label'] = size_residuals.astype(np.float32) ret_dict['size_gts'] = size_gts.astype(np.float32) target_bboxes_semcls = np.zeros((MAX_NUM_OBJ)) target_bboxes_semcls[0:bboxes.shape[0]] = bboxes[:, -1] # from 0 to 9 ret_dict['sem_cls_label'] = target_bboxes_semcls.astype(np.int64) ret_dict['box_label_mask'] = target_bboxes_mask.astype(np.float32) ret_dict['point_obj_mask'] = point_obj_mask.astype(np.int64) ret_dict['point_instance_label'] = point_instance_label.astype( np.int64) ret_dict['scan_idx'] = np.array(idx).astype(np.int64) ret_dict['max_gt_bboxes'] = max_bboxes return ret_dict
def __getitem__(self, idx): """ Returns a dict with following keys: point_clouds: (N,3+C) center_label: (MAX_NUM_OBJ,3) for GT box center XYZ heading_class_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_HEADING_BIN-1 heading_residual_label: (MAX_NUM_OBJ,) size_classe_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_SIZE_CLUSTER size_residual_label: (MAX_NUM_OBJ,3) sem_cls_label: (MAX_NUM_OBJ,) semantic class index box_label_mask: (MAX_NUM_OBJ) as 0/1 with 1 indicating a unique box vote_label: (N,9) with votes XYZ (3 votes: X1Y1Z1, X2Y2Z2, X3Y3Z3) if there is only one vote than X1==X2==X3 etc. vote_label_mask: (N,) with 0/1 with 1 indicating the point is in one of the object's OBB. scan_idx: int scan index in scan_names list max_gt_bboxes: unused """ scan_name = self.scan_names[idx] point_color_sem = np.load( os.path.join(self.data_path, scan_name) + '_pc.npz')['pc'] # Nx6 bboxes = np.load( os.path.join(self.data_path, scan_name) + '_bbox.npy') # K,8 point_votes = np.load( os.path.join(self.data_path, scan_name) + '_votes.npz')['point_votes'] # Nx10 semantics37 = point_color_sem[:, 6] semantics10 = np.array([DC.class37_2_class10[k] for k in semantics37]) semantics10_multi = [ DC.class37_2_class10_multi[k] for k in semantics37 ] if not self.use_color: point_cloud = point_color_sem[:, 0:3] else: point_cloud = point_color_sem[:, 0:6] point_cloud[:, 3:6] = (point_color_sem[:, 3:6] - MEAN_COLOR_RGB) if self.use_height: floor_height = np.percentile(point_cloud[:, 2], 0.99) height = point_cloud[:, 2] - floor_height point_cloud = np.concatenate( [point_cloud, np.expand_dims(height, 1)], 1) # (N,4) or (N,7) # ------------------------------- DATA AUGMENTATION ------------------------------ if self.augment: if np.random.random() > 0.5: # Flipping along the YZ plane point_cloud[:, 0] = -1 * point_cloud[:, 0] bboxes[:, 0] = -1 * bboxes[:, 0] bboxes[:, 6] = np.pi - bboxes[:, 6] point_votes[:, [1, 4, 7]] = -1 * point_votes[:, [1, 4, 7]] # Rotation along up-axis/Z-axis rot_angle = (np.random.random() * np.pi / 3) - np.pi / 6 # -30 ~ +30 degree rot_mat = sunrgbd_utils.rotz(rot_angle) point_votes_end = np.zeros_like(point_votes) point_votes_end[:, 1:4] = np.dot( point_cloud[:, 0:3] + point_votes[:, 1:4], np.transpose(rot_mat)) point_votes_end[:, 4:7] = np.dot( point_cloud[:, 0:3] + point_votes[:, 4:7], np.transpose(rot_mat)) point_votes_end[:, 7:10] = np.dot( point_cloud[:, 0:3] + point_votes[:, 7:10], np.transpose(rot_mat)) point_cloud[:, 0:3] = np.dot(point_cloud[:, 0:3], np.transpose(rot_mat)) bboxes[:, 0:3] = np.dot(bboxes[:, 0:3], np.transpose(rot_mat)) bboxes[:, 6] -= rot_angle point_votes[:, 1:4] = point_votes_end[:, 1:4] - point_cloud[:, 0:3] point_votes[:, 4:7] = point_votes_end[:, 4:7] - point_cloud[:, 0:3] point_votes[:, 7:10] = point_votes_end[:, 7:10] - point_cloud[:, 0:3] # Augment RGB color if self.use_color: rgb_color = point_cloud[:, 3:6] + MEAN_COLOR_RGB rgb_color *= (1 + 0.4 * np.random.random(3) - 0.2 ) # brightness change for each channel rgb_color += (0.1 * np.random.random(3) - 0.05 ) # color shift for each channel rgb_color += np.expand_dims( (0.05 * np.random.random(point_cloud.shape[0]) - 0.025), -1) # jittering on each pixel rgb_color = np.clip(rgb_color, 0, 1) # randomly drop out 30% of the points' colors rgb_color *= np.expand_dims( np.random.random(point_cloud.shape[0]) > 0.3, -1) point_cloud[:, 3:6] = rgb_color - MEAN_COLOR_RGB # Augment point cloud scale: 0.85x-1.15x scale_ratio = np.random.random() * 0.3 + 0.85 scale_ratio = np.expand_dims(np.tile(scale_ratio, 3), 0) point_cloud[:, 0:3] *= scale_ratio bboxes[:, 0:3] *= scale_ratio bboxes[:, 3:6] *= scale_ratio point_votes[:, 1:4] *= scale_ratio point_votes[:, 4:7] *= scale_ratio point_votes[:, 7:10] *= scale_ratio if self.use_height: point_cloud[:, -1] *= scale_ratio[0, 0] # ------------------------------- LABELS ------------------------------ box3d_centers = np.zeros((MAX_NUM_OBJ, 3)) box3d_sizes = np.zeros((MAX_NUM_OBJ, 3)) angle_classes = np.zeros((MAX_NUM_OBJ, )) angle_residuals = np.zeros((MAX_NUM_OBJ, )) size_classes = np.zeros((MAX_NUM_OBJ, )) size_residuals = np.zeros((MAX_NUM_OBJ, 3)) label_mask = np.zeros((MAX_NUM_OBJ)) label_mask[0:bboxes.shape[0]] = 1 max_bboxes = np.zeros((MAX_NUM_OBJ, 8)) max_bboxes[0:bboxes.shape[0], :] = bboxes # new items box3d_angles = np.zeros((MAX_NUM_OBJ, )) point_boundary_mask_z = np.zeros(self.num_points) point_boundary_mask_xy = np.zeros(self.num_points) point_boundary_offset_z = np.zeros([self.num_points, 3]) point_boundary_offset_xy = np.zeros([self.num_points, 3]) point_boundary_sem_z = np.zeros([self.num_points, 3 + 2 + 1]) point_boundary_sem_xy = np.zeros([self.num_points, 3 + 1 + 1]) point_line_mask = np.zeros(self.num_points) point_line_offset = np.zeros([self.num_points, 3]) point_line_sem = np.zeros([self.num_points, 3 + 1]) for i in range(bboxes.shape[0]): bbox = bboxes[i] semantic_class = bbox[7] box3d_center = bbox[0:3] angle_class, angle_residual = DC.angle2class(bbox[6]) # NOTE: The mean size stored in size2class is of full length of box edges, # while in sunrgbd_data.py data dumping we dumped *half* length l,w,h.. so have to time it by 2 here box3d_size = bbox[3:6] * 2 size_class, size_residual = DC.size2class( box3d_size, DC.class2type[semantic_class]) box3d_centers[i, :] = box3d_center angle_classes[i] = angle_class angle_residuals[i] = angle_residual size_classes[i] = size_class size_residuals[i] = size_residual box3d_sizes[i, :] = box3d_size box3d_angles[i] = bbox[6] target_bboxes_mask = label_mask target_bboxes = np.zeros((MAX_NUM_OBJ, 6)) for i in range(bboxes.shape[0]): bbox = bboxes[i] corners_3d = sunrgbd_utils.my_compute_box_3d( bbox[0:3], bbox[3:6], bbox[6]) # compute axis aligned box xmin = np.min(corners_3d[:, 0]) ymin = np.min(corners_3d[:, 1]) zmin = np.min(corners_3d[:, 2]) xmax = np.max(corners_3d[:, 0]) ymax = np.max(corners_3d[:, 1]) zmax = np.max(corners_3d[:, 2]) target_bbox = np.array([(xmin + xmax) / 2, (ymin + ymax) / 2, (zmin + zmax) / 2, xmax - xmin, ymax - ymin, zmax - zmin]) target_bboxes[i, :] = target_bbox point_cloud, choices = pc_util.random_sampling(point_cloud, self.num_points, return_choices=True) semantics37 = semantics37[choices] semantics10 = semantics10[choices] semantics10_multi = [semantics10_multi[i] for i in choices] point_votes_mask = point_votes[choices, 0] point_votes = point_votes[choices, 1:] # box angle is -pi to pi for i in range(bboxes.shape[0]): bbox = bboxes[i] corners = params2bbox(bbox[:3], 2 * bbox[3:6], clockwise2counter(bbox[6])) # corners_votenet = sunrgbd_utils.my_compute_box_3d(bbox[:3], bbox[3:6], bbox[6]) try: x_all_cls, ind_all_cls = extract_pc_in_box3d( point_cloud, corners) except: continue ind_all_cls = np.where(ind_all_cls)[0] # T/F to index # find point with same semantic as bbox, note semantics is 37 cls in sunrgbd # ind = ind_all_cls[np.where(semantics10[ind_all_cls] == bbox[7])[0]] ind = [] for j in ind_all_cls: if bbox[7] in semantics10_multi[j]: ind.append(j) ind = np.array(ind) if ind.shape[0] < NUM_POINT_SEM_THRESHOLD: pass else: x = point_cloud[ind, :3] ###Get bb planes and boundary points plane_lower_temp = np.array([0, 0, 1, -corners[6, -1]]) para_points = np.array( [corners[1], corners[3], corners[5], corners[7]]) newd = np.sum(para_points * plane_lower_temp[:3], 1) if check_upright( para_points ) and plane_lower_temp[0] + plane_lower_temp[1] < LOWER_THRESH: plane_lower = np.array([0, 0, 1, plane_lower_temp[-1]]) plane_upper = np.array([0, 0, 1, -np.mean(newd)]) else: import pdb pdb.set_trace() print("error with upright") if check_z(plane_upper, para_points) == False: import pdb pdb.set_trace() ### Get the boundary points here #alldist = np.abs(np.sum(point_cloud[:,:3]*plane_lower[:3], 1) + plane_lower[-1]) alldist = np.abs( np.sum(x * plane_lower[:3], 1) + plane_lower[-1]) mind = np.min(alldist) #[count, val] = np.histogram(alldist, bins=20) #mind = val[np.argmax(count)] sel = np.abs(alldist - mind) < DIST_THRESH #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,0] >= xmin) & (point_cloud[:,0] <= xmax) & (point_cloud[:,1] >= ymin) & (point_cloud[:,1] <= ymax) ## Get lower four lines line_sel1, line_sel2, line_sel3, line_sel4 = get_linesel( x[sel], corners, 'lower') if np.sum(line_sel1) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel1]] = 1.0 linecenter = (corners[0] + corners[2]) / 2.0 point_line_offset[ ind[sel][line_sel1]] = linecenter - x[sel][line_sel1] point_line_sem[ind[sel][line_sel1]] = np.array( [linecenter[0], linecenter[1], linecenter[2], bbox[7]]) if np.sum(line_sel2) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel2]] = 1.0 linecenter = (corners[4] + corners[6]) / 2.0 point_line_offset[ ind[sel][line_sel2]] = linecenter - x[sel][line_sel2] point_line_sem[ind[sel][line_sel2]] = np.array( [linecenter[0], linecenter[1], linecenter[2], bbox[7]]) if np.sum(line_sel3) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel3]] = 1.0 linecenter = (corners[0] + corners[4]) / 2.0 point_line_offset[ ind[sel][line_sel3]] = linecenter - x[sel][line_sel3] point_line_sem[ind[sel][line_sel3]] = np.array( [linecenter[0], linecenter[1], linecenter[2], bbox[7]]) if np.sum(line_sel4) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel4]] = 1.0 linecenter = (corners[2] + corners[6]) / 2.0 point_line_offset[ ind[sel][line_sel4]] = linecenter - x[sel][line_sel4] point_line_sem[ind[sel][line_sel4]] = np.array( [linecenter[0], linecenter[1], linecenter[2], bbox[7]]) if np.sum(sel) > NUM_POINT and np.var( alldist[sel]) < VAR_THRESH: # center = np.array([(xmin+xmax)/2.0, (ymin+ymax)/2.0, np.mean(x[sel][:,2])]) center = (corners[0] + corners[6]) / 2.0 center[2] = np.mean(x[sel][:, 2]) sel_global = ind[sel] point_boundary_mask_z[sel_global] = 1.0 point_boundary_sem_z[sel_global] = np.array([ center[0], center[1], center[2], np.linalg.norm(corners[4] - corners[0]), np.linalg.norm(corners[2] - corners[0]), bbox[7] ]) point_boundary_offset_z[sel_global] = center - x[sel] ''' ### Check for middle z surfaces [count, val] = np.histogram(alldist, bins=20) mind_middle = val[np.argmax(count)] sel_pre = np.copy(sel) sel = np.abs(alldist - mind_middle) < DIST_THRESH if np.abs(np.mean(x[sel_pre][:,2]) - np.mean(x[sel][:,2])) > MIND_THRESH: ### Do not use line for middle surfaces if np.sum(sel) > NUM_POINT and np.var(alldist[sel]) < VAR_THRESH: center = (corners[0] + corners[6]) / 2.0 center[2] = np.mean(x[sel][:,2]) # center = np.array([(xmin+xmax)/2.0, (ymin+ymax)/2.0, np.mean(x[sel][:,2])]) sel_global = ind[sel] point_boundary_mask_z[sel_global] = 1.0 point_boundary_sem_z[sel_global] = np.array([center[0], center[1], center[2], np.linalg.norm(corners[4] - corners[0]), np.linalg.norm(corners[2] - corners[0]), bbox[7]]) point_boundary_offset_z[sel_global] = center - x[sel] ''' ### Get the boundary points here alldist = np.abs( np.sum(x * plane_upper[:3], 1) + plane_upper[-1]) mind = np.min(alldist) #[count, val] = np.histogram(alldist, bins=20) #mind = val[np.argmax(count)] sel = np.abs(alldist - mind) < DIST_THRESH #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,0] >= xmin) & (point_cloud[:,0] <= xmax) & (point_cloud[:,1] >= ymin) & (point_cloud[:,1] <= ymax) ## Get upper four lines line_sel1, line_sel2, line_sel3, line_sel4 = get_linesel( x[sel], corners, 'upper') if np.sum(line_sel1) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel1]] = 1.0 linecenter = (corners[1] + corners[3]) / 2.0 point_line_offset[ ind[sel][line_sel1]] = linecenter - x[sel][line_sel1] point_line_sem[ind[sel][line_sel1]] = np.array( [linecenter[0], linecenter[1], linecenter[2], bbox[7]]) if np.sum(line_sel2) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel2]] = 1.0 linecenter = (corners[5] + corners[7]) / 2.0 point_line_offset[ ind[sel][line_sel2]] = linecenter - x[sel][line_sel2] point_line_sem[ind[sel][line_sel2]] = np.array( [linecenter[0], linecenter[1], linecenter[2], bbox[7]]) if np.sum(line_sel3) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel3]] = 1.0 linecenter = (corners[1] + corners[5]) / 2.0 point_line_offset[ ind[sel][line_sel3]] = linecenter - x[sel][line_sel3] point_line_sem[ind[sel][line_sel3]] = np.array( [linecenter[0], linecenter[1], linecenter[2], bbox[7]]) if np.sum(line_sel4) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel4]] = 1.0 linecenter = (corners[3] + corners[7]) / 2.0 point_line_offset[ ind[sel][line_sel4]] = linecenter - x[sel][line_sel4] point_line_sem[ind[sel][line_sel4]] = np.array( [linecenter[0], linecenter[1], linecenter[2], bbox[7]]) if np.sum(sel) > NUM_POINT and np.var( alldist[sel]) < VAR_THRESH: # center = np.array([(xmin+xmax)/2.0, (ymin+ymax)/2.0, np.mean(x[sel][:,2])]) center = (corners[1] + corners[7]) / 2.0 center[2] = np.mean(x[sel][:, 2]) sel_global = ind[sel] point_boundary_mask_z[sel_global] = 1.0 point_boundary_sem_z[sel_global] = np.array([ center[0], center[1], center[2], np.linalg.norm(corners[5] - corners[1]), np.linalg.norm(corners[3] - corners[1]), bbox[7] ]) point_boundary_offset_z[sel_global] = center - x[sel] v1 = corners[3] - corners[2] v2 = corners[2] - corners[0] cp = np.cross(v1, v2) d = -np.dot(cp, corners[0]) a, b, c = cp plane_left_temp = np.array([a, b, c, d]) para_points = np.array( [corners[4], corners[5], corners[6], corners[7]]) ### Normalize xy here plane_left_temp /= np.linalg.norm(plane_left_temp[:3]) newd = np.sum(para_points * plane_left_temp[:3], 1) if plane_left_temp[2] < LOWER_THRESH: plane_left = plane_left_temp #np.array([cls,res,tempsign,plane_left_temp[-1]]) plane_right = np.array([ plane_left_temp[0], plane_left_temp[1], plane_left_temp[2], -np.mean(newd) ]) else: import pdb pdb.set_trace() print("error with upright") ### Get the boundary points here alldist = np.abs( np.sum(x * plane_left[:3], 1) + plane_left[-1]) mind = np.min(alldist) #[count, val] = np.histogram(alldist, bins=20) #mind = val[np.argmax(count)] sel = np.abs(alldist - mind) < DIST_THRESH #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,2] >= zmin) & (point_cloud[:,2] <= zmax) & (point_cloud[:,1] >= ymin) & (point_cloud[:,1] <= ymax) ## Get upper four lines line_sel1, line_sel2 = get_linesel(x[sel], corners, 'left') if np.sum(line_sel1) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel1]] = 1.0 linecenter = (corners[0] + corners[1]) / 2.0 point_line_offset[ ind[sel][line_sel1]] = linecenter - x[sel][line_sel1] point_line_sem[ind[sel][line_sel1]] = np.array( [linecenter[0], linecenter[1], linecenter[2], bbox[7]]) if np.sum(line_sel2) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel2]] = 1.0 linecenter = (corners[2] + corners[3]) / 2.0 point_line_offset[ ind[sel][line_sel2]] = linecenter - x[sel][line_sel2] point_line_sem[ind[sel][line_sel2]] = np.array( [linecenter[0], linecenter[1], linecenter[2], bbox[7]]) if np.sum(sel) > NUM_POINT and np.var( alldist[sel]) < VAR_THRESH: # center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0]) center = np.array([ np.mean(x[sel][:, 0]), np.mean(x[sel][:, 1]), (corners[0, 2] + corners[1, 2]) / 2.0 ]) sel_global = ind[sel] point_boundary_mask_xy[sel_global] = 1.0 # point_boundary_sem_xy[sel_global] = np.array([center[0], center[1], center[2], zmax - zmin, np.where(DC.nyu40ids == meta_vertices[ind[0],-1])[0][0]]) point_boundary_sem_xy[sel_global] = np.array([ center[0], center[1], center[2], corners[1, 2] - corners[0, 2], bbox[7] ]) point_boundary_offset_xy[sel_global] = center - x[sel] ''' [count, val] = np.histogram(alldist, bins=20) mind_middle = val[np.argmax(count)] #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,2] >= zmin) & (point_cloud[:,2] <= zmax) & (point_cloud[:,1] >= ymin) & (point_cloud[:,1] <= ymax) ## Get upper four lines sel_pre = np.copy(sel) sel = np.abs(alldist - mind_middle) < DIST_THRESH if np.abs(np.mean(x[sel_pre][:,0]) - np.mean(x[sel][:,0])) > MIND_THRESH: ### Do not use line for middle surfaces if np.sum(sel) > NUM_POINT and np.var(alldist[sel]) < VAR_THRESH: # center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0]) center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (corners[0, 2] + corners[1, 2])/2.0]) sel_global = ind[sel] point_boundary_mask_xy[sel_global] = 1.0 point_boundary_sem_xy[sel_global] = np.array([center[0], center[1], center[2], corners[1, 2] - corners[0, 2], bbox[7]]) point_boundary_offset_xy[sel_global] = center - x[sel] ''' ### Get the boundary points here alldist = np.abs( np.sum(x * plane_right[:3], 1) + plane_right[-1]) mind = np.min(alldist) #[count, val] = np.histogram(alldist, bins=20) #mind = val[np.argmax(count)] sel = np.abs(alldist - mind) < DIST_THRESH #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,2] >= zmin) & (point_cloud[:,2] <= zmax) & (point_cloud[:,1] >= ymin) & (point_cloud[:,1] <= ymax) line_sel1, line_sel2 = get_linesel(x[sel], corners, 'right') if np.sum(line_sel1) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel1]] = 1.0 linecenter = (corners[4] + corners[5]) / 2.0 point_line_offset[ ind[sel][line_sel1]] = linecenter - x[sel][line_sel1] point_line_sem[ind[sel][line_sel1]] = np.array( [linecenter[0], linecenter[1], linecenter[2], bbox[7]]) if np.sum(line_sel2) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel2]] = 1.0 linecenter = (corners[6] + corners[7]) / 2.0 point_line_offset[ ind[sel][line_sel2]] = linecenter - x[sel][line_sel2] point_line_sem[ind[sel][line_sel2]] = np.array( [linecenter[0], linecenter[1], linecenter[2], bbox[7]]) if np.sum(sel) > NUM_POINT and np.var( alldist[sel]) < VAR_THRESH: # center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0]) center = np.array([ np.mean(x[sel][:, 0]), np.mean(x[sel][:, 1]), (corners[4, 2] + corners[5, 2]) / 2.0 ]) sel_global = ind[sel] point_boundary_mask_xy[sel_global] = 1.0 point_boundary_sem_xy[sel_global] = np.array([ center[0], center[1], center[2], corners[5, 2] - corners[4, 2], bbox[7] ]) point_boundary_offset_xy[sel_global] = center - x[sel] #plane_front_temp = leastsq(residuals, [0,1,0,0], args=(None, np.array([corners[0], corners[1], corners[4], corners[5]]).T))[0] v1 = corners[0] - corners[4] v2 = corners[4] - corners[5] cp = np.cross(v1, v2) d = -np.dot(cp, corners[5]) a, b, c = cp plane_front_temp = np.array([a, b, c, d]) para_points = np.array( [corners[2], corners[3], corners[6], corners[7]]) plane_front_temp /= np.linalg.norm(plane_front_temp[:3]) newd = np.sum(para_points * plane_front_temp[:3], 1) if plane_front_temp[2] < LOWER_THRESH: plane_front = plane_front_temp #np.array([cls,res,tempsign,plane_front_temp[-1]]) plane_back = np.array([ plane_front_temp[0], plane_front_temp[1], plane_front_temp[2], -np.mean(newd) ]) else: import pdb pdb.set_trace() print("error with upright") ### Get the boundary points here alldist = np.abs( np.sum(x * plane_front[:3], 1) + plane_front[-1]) mind = np.min(alldist) #[count, val] = np.histogram(alldist, bins=20) #mind = val[np.argmax(count)] sel = np.abs(alldist - mind) < DIST_THRESH #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,0] >= xmin) & (point_cloud[:,0] <= xmax) & (point_cloud[:,2] >= zmin) & (point_cloud[:,2] <= zmax) if np.sum(sel) > NUM_POINT and np.var( alldist[sel]) < VAR_THRESH: # center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0]) center = np.array([ np.mean(x[sel][:, 0]), np.mean(x[sel][:, 1]), (corners[0, 2] + corners[1, 2]) / 2.0 ]) sel_global = ind[sel] point_boundary_mask_xy[sel_global] = 1.0 point_boundary_sem_xy[sel_global] = np.array([ center[0], center[1], center[2], corners[1, 2] - corners[0, 2], bbox[7] ]) point_boundary_offset_xy[sel_global] = center - x[sel] ''' [count, val] = np.histogram(alldist, bins=20) mind_middle = val[np.argmax(count)] sel_pre = np.copy(sel) sel = np.abs(alldist - mind_middle) < DIST_THRESH if np.abs(np.mean(x[sel_pre][:,1]) - np.mean(x[sel][:,1])) > MIND_THRESH: ### Do not use line for middle surfaces if np.sum(sel) > NUM_POINT and np.var(alldist[sel]) < VAR_THRESH: # center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0]) center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (corners[0, 2] + corners[1, 2])/2.0]) sel_global = ind[sel] point_boundary_mask_xy[sel_global] = 1.0 point_boundary_sem_xy[sel_global] = np.array([center[0], center[1], center[2], corners[1, 2] - corners[0, 2], bbox[7]]) point_boundary_offset_xy[sel_global] = center - x[sel] ''' ### Get the boundary points here alldist = np.abs( np.sum(x * plane_back[:3], 1) + plane_back[-1]) mind = np.min(alldist) #[count, val] = np.histogram(alldist, bins=20) #mind = val[np.argmax(count)] sel = np.abs(alldist - mind) < DIST_THRESH if np.sum(sel) > NUM_POINT and np.var( alldist[sel]) < VAR_THRESH: #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,0] >= xmin) & (point_cloud[:,0] <= xmax) & (point_cloud[:,2] >= zmin) & (point_cloud[:,2] <= zmax) # center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0]) center = np.array([ np.mean(x[sel][:, 0]), np.mean(x[sel][:, 1]), (corners[2, 2] + corners[3, 2]) / 2.0 ]) #point_boundary_offset_xy[sel] = center - x[sel] sel_global = ind[sel] point_boundary_mask_xy[sel_global] = 1.0 point_boundary_sem_xy[sel_global] = np.array([ center[0], center[1], center[2], corners[3, 2] - corners[2, 2], bbox[7] ]) point_boundary_offset_xy[sel_global] = center - x[sel] ret_dict = {} ret_dict['point_clouds'] = point_cloud.astype(np.float32) ret_dict['center_label'] = target_bboxes.astype(np.float32)[:, 0:3] ret_dict['heading_class_label'] = angle_classes.astype(np.int64) ret_dict['heading_residual_label'] = angle_residuals.astype(np.float32) ret_dict['size_class_label'] = size_classes.astype(np.int64) ret_dict['size_residual_label'] = size_residuals.astype(np.float32) target_bboxes_semcls = np.zeros((MAX_NUM_OBJ)) target_bboxes_semcls[0:bboxes.shape[0]] = bboxes[:, -1] # from 0 to 9 ret_dict['sem_cls_label'] = target_bboxes_semcls.astype(np.int64) ret_dict['box_label_mask'] = target_bboxes_mask.astype(np.float32) ret_dict['vote_label'] = point_votes.astype(np.float32) ret_dict['vote_label_mask'] = point_votes_mask.astype(np.int64) ret_dict['scan_idx'] = np.array(idx).astype(np.int64) ret_dict['max_gt_bboxes'] = max_bboxes # new items ret_dict['size_label'] = box3d_sizes.astype(np.float32) ret_dict['heading_label'] = box3d_angles.astype(np.float32) if self.use_height: ret_dict['floor_height'] = floor_height ret_dict['point_boundary_mask_z'] = point_boundary_mask_z.astype( np.float32) ret_dict['point_boundary_mask_xy'] = point_boundary_mask_xy.astype( np.float32) ret_dict['point_boundary_offset_z'] = point_boundary_offset_z.astype( np.float32) ret_dict['point_boundary_offset_xy'] = point_boundary_offset_xy.astype( np.float32) ret_dict['point_boundary_sem_z'] = point_boundary_sem_z.astype( np.float32) ret_dict['point_boundary_sem_xy'] = point_boundary_sem_xy.astype( np.float32) ret_dict['point_line_mask'] = point_line_mask.astype(np.float32) ret_dict['point_line_offset'] = point_line_offset.astype(np.float32) ret_dict['point_line_sem'] = point_line_sem.astype(np.float32) return ret_dict
def __data_generation_(self, idx): ''' Returns: point_cloud: N,3+C center_label: MAX_NUM_OBJ, 3 heading_class_label: MAX_NUM_OBJ, heading_residual_label: MAX_NUM_OBJ, size_class_label: MAX_NUM_OBJ, size_residual_label: MAX_NUM_OBJ, 3 sem_cls_label: MAX_NUM_OBJ, box_label_mask: MAX_NUM_OBJ, vote_label: N, 9 vote_label_mask: N, ''' scan_name = self.scan_names[idx] point_cloud = np.load( os.path.join(self.data_path, scan_name) + '_pc.npz')['pc'] # N,6 # Bounding boxes (K,8) # [0:3]: centroid coordinate. x,y,z # [3:6]: size. height, width, height # [6]: heading angle # [7]: class one hot label bboxes = np.load( os.path.join(self.data_path, scan_name) + '_bbox.npy') # K,8: # Votes (N, 10) --3 votes and 1 vote mask # [0]: this point is in a bounding box or not (0/1) # [1:4],[4:7],[7:10]: if point is not in any bounding box, all zeros; # else the offset to bouding box center # one point can be assigned to at maximal 3 bounding boxes point_votes = np.load( os.path.join(self.data_path, scan_name) + '_votes.npz')['point_votes'] # Nx10 if not self.use_color: point_cloud = point_cloud[:, 0:3] # x,y,z else: point_cloud = point_cloud[:, 0:6] # x,y,z,r,g,b point_cloud[:, 3] = point_cloud[:, 3:] - MEAN_COLOR_RGB if self.use_height: floor_height = np.percentile( point_cloud[:, 2], 0.99) # 0.99% of all height. wired number... height = point_cloud[:, 2] - floor_height point_cloud = np.concatenate( [point_cloud, np.expand_dims(height, 1)], 1) # N,4 or N,7 #-------------data augmentation------------- if self.augment: if np.random.rand() > 0.5: # flipping along YZ plane point_cloud[:, 0] = -1 * point_cloud[:, 0] bboxes[:, 0] = bboxes[:, 0] * -1 bboxes[:, 6] = np.pi - bboxes[:, 6] point_votes[:, [1, 4, 7]] = -1 * point_votes[:, [1, 4, 7]] # Rotation along up-axis/Z-axis rot_angle = (np.random.rand() * np.pi / 3) - np.pi / 6 # -30~30 degree rot_mat = sunrgbd_utils.rotz(rot_angle) point_votes_end = np.zeros_like(point_votes) # first, rotate votes "with" the point_cloud point_votes_end[:, 1:4] = np.dot( point_cloud[:, 0:3] + point_votes[:, 1:4], np.transpose(rot_mat)) point_votes_end[:, 4:7] = np.dot( point_cloud[:, 0:3] + point_votes[:, 4:7], np.transpose(rot_mat)) point_votes_end[:, 7:10] = np.dot( point_cloud[:, 0:3] + point_votes[:, 7:10], np.transpose(rot_mat)) # then, rotate the point cloud alone point_cloud[:, 0:3] = np.dot(point_cloud[:, 0:3], np.transpose(rot_mat)) bboxes[:, 0:3] = np.dot(bboxes[:, 0:3], np.transpose(rot_mat)) bboxes[:, 6] -= rot_angle # the original angle is NOT filpped # finally, restore the point_votes by recalculate the offset point_votes[:, 1:4] = point_votes_end[:, 1:4] - point_cloud[:, 0:3] point_votes[:, 4:7] = point_votes_end[:, 4:7] - point_cloud[:, 0:3] point_votes[:, 7:10] = point_votes_end[:, 7:10] - point_cloud[:, 0:3] # augment the color if self.use_color: rgb_color = point_cloud[:, 3: 6] + MEAN_COLOR_RGB # restore color to 0~1 rgb_color *= (1 + 0.4 * np.random.rand(3) - 0.2 ) # random scale brightness 80% ~ 120% rgb_color += (0.1 * np.random.rand(3) - 0.05) # random shift rgb_color += np.expand_dims( np.random.rand(point_cloud.shape[0]) * 0.05 - 0.025, -1) #random jitter rgb_color = rgb_color - MEAN_COLOR_RGB rgb_color *= np.expand_dims( np.random.rand(point_cloud.shape[0]) > 0.3, -1) # drop 30% colors # scale the size scale_ratio = np.random.rand() * 0.3 + 0.85 # 0.85 ~ 1.15 point_cloud[:, 0:3] *= scale_ratio bboxes[:, 0:6] *= scale_ratio point_votes[:, 1:-1] *= scale_ratio if self.use_height: point_cloud[:, -1] *= scale_ratio # shift the point cloud -0.5~0.5 offset = np.random.rand(3) - 0.5 offset = np.expand_dims(offset, 0) point_cloud[:, 0:3] += offset bboxes[:, 0:3] += offset # shifting doesn't change: size, votes, height # ------------labels------------ box3d_centers = np.zeros((MAX_NUM_OBJ, 3)) box3d_sizes = np.zeros((MAX_NUM_OBJ, 3)) angle_classes = np.zeros((MAX_NUM_OBJ, )) angle_residuals = np.zeros((MAX_NUM_OBJ, )) size_classes = np.zeros((MAX_NUM_OBJ, )) size_residuals = np.zeros((MAX_NUM_OBJ, 3)) label_mask = np.zeros((MAX_NUM_OBJ)) label_mask[0:bboxes.shape[0]] = 1 max_bboxes = np.zeros((MAX_NUM_OBJ, 8)) max_bboxes[0:bboxes.shape[0], :] = bboxes for i in range(bboxes.shape[0]): bbox = bboxes[i] semantic_class = bbox[7] box3d_center = bbox[0:3] angle_class, angle_residual = DC.angle2class(bbox[6]) # NOTE: The mean size stored in size2class is of full length of box edges, # while in sunrgbd_data.py data dumping we dumped *half* length l,w,h.. so have to time it by 2 here box3d_size = bbox[3:6] * 2 size_class, size_residual = DC.size2class( box3d_size, DC.class2type[semantic_class]) box3d_centers[i, :] = box3d_center angle_classes[i] = angle_class angle_residuals[i] = angle_residual size_classes[i] = size_class size_residuals[i] = size_residual box3d_sizes[i, :] = box3d_size target_bboxes_mask = label_mask target_bboxes = np.zeros((MAX_NUM_OBJ, 6)) for i in range(bboxes.shape[0]): bbox = bboxes[i] corners_3d = sunrgbd_utils.my_compute_box_3d( bbox[0:3], bbox[3:6], bbox[6]) # compute axis aligned box xmin = np.min(corners_3d[:, 0]) ymin = np.min(corners_3d[:, 1]) zmin = np.min(corners_3d[:, 2]) xmax = np.max(corners_3d[:, 0]) ymax = np.max(corners_3d[:, 1]) zmax = np.max(corners_3d[:, 2]) # 0:3 - centers # 3:6 - size target_bbox = np.array([(xmin + xmax) / 2, (ymin + ymax) / 2, (zmin + zmax) / 2, xmax - xmin, ymax - ymin, zmax - zmin]) target_bboxes[i, :] = target_bbox point_cloud, choice = pc_utils.random_sampling(point_cloud, self.num_points, return_choices=True) point_votes_mask = point_votes[choice, 0] point_votes = point_votes[choice, 1:] center_label = target_bboxes.astype(np.float32)[:, :3] heading_class_label = angle_classes.astype(np.int64) heading_residual_label = angle_residuals.astype(np.float32) size_class_label = size_classes.astype(np.int64) size_residual_label = size_residuals.astype(np.float32) target_bboxes_semcls = np.zeros((MAX_NUM_OBJ)) target_bboxes_semcls[0:bboxes.shape[0]] = bboxes[:, -1] # from 0 to 9 sem_cls_label = target_bboxes_semcls.astype(np.int64) box_label_mask = target_bboxes_mask.astype(np.float32) vote_label = point_votes.astype(np.float32) vote_label_mask = point_votes_mask.astype(np.int64) return [point_cloud.astype(np.float32), \ center_label, \ heading_class_label, \ heading_residual_label, \ size_class_label, \ size_residual_label, \ sem_cls_label, \ box_label_mask, \ vote_label, \ vote_label_mask]