def preprocess_point_cloud(point_cloud): ''' Prepare the numpy point cloud (N,3) for forward pass ''' point_cloud = point_cloud[:,0:3] # do not use color for now floor_height = np.percentile(point_cloud[:,2],0.99) height = point_cloud[:,2] - floor_height point_cloud = np.concatenate([point_cloud, np.expand_dims(height, 1)],1) # (N,4) or (N,7) point_cloud = random_sampling(point_cloud, FLAGS.num_point) pc = np.expand_dims(point_cloud.astype(np.float32), 0) # (1,40000,4) return pc
def depth_to_pc(self, dep): """Depth map to point cloud network input.""" point_map = np.zeros((480, 640, 3)) for i in range(3): point_map[:, :, i] = dep * XYZ[:, :, i] point_cloud = point_map.reshape(480 * 640, 3) floor_height = np.percentile(point_cloud[:, 2], 0.99) height = point_cloud[:, 2] - floor_height point_cloud = np.concatenate( [point_cloud, np.expand_dims(height, 1)], 1) # (N,4) or (N,7) point_cloud, choices = random_sampling(point_cloud, num_point, return_choices=True) pc = np.expand_dims(point_cloud.astype(np.float32), 0) # (1,20000,4) return pc
def get_roi_ptcloud(inputs, batch_pred_boxes_params, enlarge_ratio=1.2, num_point_roi=512, min_num_point=100): """ Generate ROI point cloud w.r.t predicted box :param inputs: dict {'point_clouds'} input point clouds of the whole scene batch_pred_boxes_params: (B, num_proposals, 7), numpy array predicted bounding box from detector enlarge_ratio: scalar the value to enlarge the predicted box size num_point_roi: scalar the number of points to be sampled in each enlarged box :return: batch_pc_roi: (B, num_proposals, num_sampled_points, input_pc_features) numpy array nonempty_roi_mask: (B, num_proposals) numpy array """ batch_pc = inputs['point_clouds'].detach().cpu().numpy()[:, :, :] # B,N,C bsize = batch_pred_boxes_params.shape[0] K = batch_pred_boxes_params.shape[1] batch_pc_roi = np.zeros((bsize, K, num_point_roi, batch_pc.shape[2]), dtype=np.float32) nonempty_roi_mask = np.ones((bsize, K)) for i in range(bsize): pc = batch_pc[i, :, :] # (N,C) for j in range(K): box_params = batch_pred_boxes_params[i, j, :] # (7) center = box_params[0:3] center_upright_camera = flip_axis_to_camera( center) #.reshape(1,-1))[0] box_size = box_params[3:6] * enlarge_ratio #enlarge the box size heading_angle = box_params[6] box3d = get_3d_box(box_size, heading_angle, center_upright_camera) box3d = flip_axis_to_depth(box3d) pc_in_box, inds = extract_pc_in_box3d(pc, box3d) # print('The number of points in roi box is ', pc_in_box.shape[0]) if len(pc_in_box) >= min_num_point: batch_pc_roi[i, j, :, :] = random_sampling( pc_in_box, num_point_roi) else: nonempty_roi_mask[i, j] = 0 return batch_pc_roi, nonempty_roi_mask
def data_viz(data_dir, dump_dir=os.path.join(BASE_DIR, 'data_viz_dump')): ''' Examine and visualize ycbgrasp dataset. ''' ycb = ycb_object(data_dir) idxs = np.array(range(0, len(ycb))) if not os.path.exists(dump_dir): os.mkdir(dump_dir) for idx in range(len(ycb)): if idx % 10: continue data_idx = idxs[idx] print('data index: ', data_idx) pc = ycb.get_pointcloud(data_idx) pc = pc[:, 0:3] pc = pc_util.random_sampling(pc, args.num_point) pc_util.write_ply(pc, os.path.join(dump_dir, str(idx) + '_pc.ply')) print('Complete!')
def __getitem__(self, idx): crop_point_cloud = self.crops[idx] # center data minbound = np.min(crop_point_cloud[:, :3], axis=0) maxbound = np.max(crop_point_cloud[:, :3], axis=0) mid = (minbound + maxbound) / 2.0 crop_point_cloud[:, :3] -= mid # convert PC to z is up. mid[[0, 1, 2]] = mid[[0, 2, 1]] crop_point_cloud[:, [0, 1, 2]] = crop_point_cloud[:, [0, 2, 1]] if not self.use_color: point_cloud = crop_point_cloud[:, 0:3] # do not use color for now else: point_cloud = crop_point_cloud[:, 0:6] point_cloud[:, 3:] = point_cloud[:, 3:] - (self.MEAN_COLOR_RGB) / 256.0 if self.use_height: floor_height = np.percentile(point_cloud[:, 2], 0.99) height = point_cloud[:, 2] - floor_height point_cloud = np.concatenate( [point_cloud, np.expand_dims(height, 1)], 1) point_cloud, _ = pc_util.random_sampling(point_cloud, self.num_points, return_choices=True) ret_dict = {} ret_dict['point_clouds'] = point_cloud.astype(np.float32) ret_dict['mid'] = mid return ret_dict
def extract_blender_data_inner(data_idx, dataset, data_dir, split, num_point, type_whitelist, save_votes): idx = dataset.index[data_idx] # Skip if XX/XXXX_votes.npz already exists if os.path.exists( os.path.join(data_dir, split, '{:02d}/{:04d}_votes.npz'.format(idx[0], idx[1]))): return 0 # print('------------- ', data_idx+1, ' von ' , len(dataset)) objects = dataset.get_label_objects(data_idx) # Skip scenes with 0 object or 0 objects out of type_whitelist if (len(objects)==0 or \ len([obj for obj in objects if obj.classname in type_whitelist])==0): print(data_idx) return 1 object_list = [] for obj in objects: if obj.classname not in type_whitelist: continue obb = np.zeros(10) obb[0:3] = obj.centroid # Note that compared with that in data_viz, we do not time 2 to l,w.h # neither do we flip the heading angle obb[3:6] = np.array([obj.l, obj.w, obj.h]) obb[6:9] = obj.heading_angle obb[9] = blender_utils.type2class[obj.classname] object_list.append(obb) if len(object_list) == 0: obbs = np.zeros((0, 10)) else: obbs = np.vstack(object_list) # (K,10) K objects with 10 data entries pc_upright_depth = dataset.get_depth(data_idx) assert pc_upright_depth.shape[ 1] > 0, "Es gibt keine Datenpunkte in der Pointcloud" pc_upright_depth_subsampled = pc_util.random_sampling( pc_upright_depth, num_point) np.savez_compressed(os.path.join( data_dir, split, '{:02d}/{:04d}_pc.npz'.format(idx[0], idx[1])), pc=pc_upright_depth_subsampled) np.save( os.path.join(data_dir, split, '{:02d}/{:04d}_bbox.npy'.format(idx[0], idx[1])), obbs) if save_votes: N = pc_upright_depth_subsampled.shape[0] point_votes = np.zeros((N, 10)) # 3 votes and 1 vote mask point_vote_idx = np.zeros( (N)).astype(np.int32) # in the range of [0,2] indices = np.arange(N) for obj in objects: if obj.classname not in type_whitelist: continue try: # Find all points in this object's OBB box3d_pts_3d = blender_utils.my_compute_box_3d( obj.centroid, np.array([obj.l, obj.w, obj.h]), obj.heading_angle) pc_in_box3d, inds = blender_utils.extract_pc_in_box3d( pc_upright_depth_subsampled, box3d_pts_3d) # Assign first dimension to indicate it is in an object box point_votes[inds, 0] = 1 # Add the votes (all 0 if the point is not in any object's OBB) votes = np.expand_dims(obj.centroid, 0) - pc_in_box3d[:, 0:3] sparse_inds = indices[ inds] # turn dense True,False inds to sparse number-wise inds for i in range(len(sparse_inds)): j = sparse_inds[i] point_votes[j, int(point_vote_idx[j] * 3 + 1):int((point_vote_idx[j] + 1) * 3 + 1)] = votes[i, :] # Populate votes with the fisrt vote if point_vote_idx[j] == 0: point_votes[j, 4:7] = votes[i, :] point_votes[j, 7:10] = votes[i, :] point_vote_idx[inds] = np.minimum(2, point_vote_idx[inds] + 1) except: print('ERROR ----', data_idx, obj.classname) np.savez_compressed(os.path.join( data_dir, split, '{:02d}/{:04d}_votes.npz'.format(idx[0], idx[1])), point_votes=point_votes) return 0
def __getitem__(self, idx): scan_name = self.scan_names[idx] point_cloud = np.load( os.path.join(self.data_path, scan_name) + '_pc.npz')['pc'] poses = np.load(os.path.join(self.data_path, scan_name) + '_pose.npy') point_votes = np.load( os.path.join(self.data_path, scan_name) + '_object_votes.npz')['point_object_votes'] point_part_votes = np.load( os.path.join(self.data_path, scan_name) + '_part_votes.npz')['point_part_votes'] if not self.use_color: point_cloud = point_cloud[:, 0:3] else: point_cloud = point_cloud[:, 0:6] point_cloud[:, 3:] = (point_cloud[:, 3:] - MEAN_COLOR_RGB) if self.use_height: floor_height = np.percentile(point_cloud[:, 2], 0.99) height = point_cloud[:, 2] - floor_height point_cloud = np.concatenate( [point_cloud, np.expand_dims(height, 1)], 1) # ------------------------------- LABELS ------------------------------ label_mask = np.zeros((MAX_NUM_POSE)) label_mask[0:poses.shape[0]] = 1 target_poses_mask = label_mask target_poses = np.zeros((MAX_NUM_POSE, 6)) for i in range(poses.shape[0]): pose = poses[i] target_pose = pose[0:6] target_poses[i, :] = target_pose point_cloud, choices = pc_util.random_sampling(point_cloud, self.num_points, return_choices=True) point_votes_mask = point_votes[choices, 0] point_votes = point_votes[choices, 1:] point_part_votes_mask = point_part_votes[choices, 0] point_part_votes = point_part_votes[choices, 1:] ret_dict = {} ret_dict['point_clouds'] = point_cloud.astype(np.float32) ret_dict['vote_label'] = point_votes.astype(np.float32) ret_dict['vote_part_label'] = point_part_votes.astype(np.float32) ret_dict['vote_label_mask'] = point_votes_mask.astype(np.int64) ret_dict['scan_idx'] = np.array(idx).astype(np.int64) ret_dict['center_label'] = target_poses.astype(np.float32)[:, 0:3] ret_dict['rot_label'] = target_poses.astype(np.float32)[:, 3:6] target_poses_semcls = np.zeros((MAX_NUM_POSE)) target_poses_semcls[0:poses.shape[0]] = poses[:, -1] ret_dict['sem_cls_label'] = target_poses_semcls.astype(np.int64) ret_dict['object_label_mask'] = target_poses_mask.astype(np.float32) return ret_dict
def __getitem__(self, idx): """ Returns a dict with following keys: point_clouds: (N,3+C) scan_idx: int scan index in scan_names list """ scan_name = self.scan_names[idx] mesh_vertices = np.load( os.path.join(self.data_path, scan_name) + '_pc.npz')['pc'] # Nx6 if not self.use_color: raw_point_cloud = mesh_vertices[:, 0:3] # do not use color for now else: raw_point_cloud = mesh_vertices[:, 0:6] raw_point_cloud[:, 3:] = (raw_point_cloud[:, 3:] - MEAN_COLOR_RGB) / 256.0 if self.use_height: floor_height = np.percentile(raw_point_cloud[:, 2], 0.99) height = raw_point_cloud[:, 2] - floor_height raw_point_cloud = np.concatenate( [raw_point_cloud, np.expand_dims(height, 1)], 1) ret_dict = {} ema_point_cloud = pc_util.random_sampling(raw_point_cloud, self.num_points, return_choices=False) ret_dict['ema_point_clouds'] = ema_point_cloud.astype(np.float32) bboxes = np.load( os.path.join(self.data_path, scan_name) + '_bbox.npy') # K,8 target_bboxes = np.zeros((MAX_NUM_OBJ, 6)) target_bboxes_mask = np.zeros((MAX_NUM_OBJ)) angle_classes = np.zeros((MAX_NUM_OBJ, )) angle_residuals = np.zeros((MAX_NUM_OBJ, )) size_classes = np.zeros((MAX_NUM_OBJ, )) size_residuals = np.zeros((MAX_NUM_OBJ, 3)) target_bboxes_semcls = np.zeros((MAX_NUM_OBJ)) target_bboxes_mask[0:bboxes.shape[0]] = 1 target_bboxes[0:bboxes.shape[0], :] = bboxes[:, 0:6] for i in range(bboxes.shape[0]): bbox = bboxes[i] semantic_class = bbox[7] angle_class, angle_residual = DC.angle2class(bbox[6]) # NOTE: The mean size stored in size2class is of full length of box edges, # while in sunrgbd_data.py data dumping we dumped *half* length l,w,h.. so have to time it by 2 here box3d_size = bbox[3:6] * 2 size_class, size_residual = DC.size2class( box3d_size, DC.class2type[semantic_class]) angle_classes[i] = angle_class angle_residuals[i] = angle_residual size_classes[i] = size_class size_residuals[i] = size_residual target_bboxes_semcls[i] = semantic_class if self.load_labels: ret_dict['center_label'] = target_bboxes.astype(np.float32)[:, 0:3] ret_dict['heading_class_label'] = angle_classes.astype(np.int64) ret_dict['heading_residual_label'] = angle_residuals.astype( np.float32) ret_dict['size_class_label'] = size_classes.astype(np.int64) ret_dict['size_residual_label'] = size_residuals.astype(np.float32) ret_dict['sem_cls_label'] = target_bboxes_semcls.astype(np.int64) ret_dict['box_label_mask'] = target_bboxes_mask.astype(np.float32) point_cloud, choices = pc_util.random_sampling(raw_point_cloud, self.num_points, return_choices=True) flip_x_axis = 0 flip_y_axis = 0 rot_angle = 0 rot_mat = np.identity(3) scale_ratio = np.ones((1, 3)) if self.augment: if np.random.random() > 0.5: # Flipping along the YZ plane flip_x_axis = 1 point_cloud[:, 0] = -1 * point_cloud[:, 0] # Rotation along up-axis/Z-axis rot_angle = (np.random.random() * np.pi / 3) - np.pi / 6 # -30 ~ +30 degree rot_mat = pc_util.rotz(rot_angle) point_cloud[:, 0:3] = np.dot(point_cloud[:, 0:3], np.transpose(rot_mat)) # Augment point cloud scale: 0.85x-1.15x scale_ratio = np.random.random() * 0.3 + 0.85 scale_ratio = np.expand_dims(np.tile(scale_ratio, 3), 0) point_cloud[:, 0:3] *= scale_ratio if self.use_height: point_cloud[:, -1] *= scale_ratio[0, 0] ret_dict['point_clouds'] = point_cloud.astype(np.float32) ret_dict['flip_x_axis'] = np.array(flip_x_axis).astype(np.int64) ret_dict['flip_y_axis'] = np.array(flip_y_axis).astype(np.int64) ret_dict['rot_mat'] = rot_mat.astype(np.float32) ret_dict['rot_angle'] = np.array(rot_angle).astype(np.float32) ret_dict['scale'] = np.array(scale_ratio).astype(np.float32) ret_dict['scan_idx'] = np.array(idx).astype(np.int64) ret_dict['supervised_mask'] = np.array(0).astype(np.int64) return ret_dict
def extract_sunrgbd_data(idx_filename, split, output_folder, num_point=20000, type_whitelist=DEFAULT_TYPE_WHITELIST, save_votes=False, use_v1=False, skip_empty_scene=True): """ Extract scene point clouds and bounding boxes (centroids, box sizes, heading angles, semantic classes). Dumped point clouds and boxes are in upright depth coord. Args: idx_filename: a TXT file where each line is an int number (index) split: training or testing save_votes: whether to compute and save Ground truth votes. use_v1: use the SUN RGB-D V1 data skip_empty_scene: if True, skip scenes that contain no object (no objet in whitelist) Dumps: <id>_pc.npz of (N,6) where N is for number of subsampled points and 6 is for XYZ and RGB (in 0~1) in upright depth coord <id>_bbox.npy of (K,8) where K is the number of objects, 8 is for centroids (cx,cy,cz), dimension (l,w,h), heanding_angle and semantic_class <id>_votes.npz of (N,10) with 0/1 indicating whether the point belongs to an object, then three sets of GT votes for up to three objects. If the point is only in one object's OBB, then the three GT votes are the same. """ dataset = sunrgbd_object('./sunrgbd_trainval', split, use_v1=use_v1) data_idx_list = [int(line.rstrip()) for line in open(idx_filename)] if not os.path.exists(output_folder): os.mkdir(output_folder) all_obbs = [] all_pc_upright_depth_subsampled = [] all_point_votes = [] for data_idx in data_idx_list: print('------------- ', data_idx) objects = dataset.get_label_objects(data_idx) # Skip scenes with 0 object if skip_empty_scene and (len(objects) == 0 or len([obj for obj in objects if obj.classname in type_whitelist]) == 0): continue object_list = [] for obj in objects: if obj.classname not in type_whitelist: continue obb = np.zeros((8)) obb[0:3] = obj.centroid # Note that compared with that in data_viz, we do not time 2 to l,w.h # neither do we flip the heading angle obb[3:6] = np.array([obj.l, obj.w, obj.h]) obb[6] = obj.heading_angle obb[7] = sunrgbd_utils.type2class[obj.classname] object_list.append(obb) if len(object_list) == 0: obbs = np.zeros((0, 8)) else: obbs = np.vstack(object_list) # (K,8) print(f"{data_idx} has {obbs.shape[0]} gt bboxes") pc_upright_depth = dataset.get_depth(data_idx) pc_upright_depth_subsampled = pc_util.random_sampling(pc_upright_depth, num_point) np.savez_compressed(os.path.join(output_folder, '%06d_pc.npz' % (data_idx)), pc=pc_upright_depth_subsampled) np.save(os.path.join(output_folder, '%06d_bbox.npy' % (data_idx)), obbs) # pickle save with open(os.path.join(output_folder, '%06d_pc.pkl' % (data_idx)), 'wb') as f: pickle.dump(pc_upright_depth_subsampled, f) print(f"{os.path.join(output_folder, '%06d_pc.pkl' % (data_idx))} saved successfully !!") with open(os.path.join(output_folder, '%06d_bbox.pkl' % (data_idx)), 'wb') as f: pickle.dump(obbs, f) print(f"{os.path.join(output_folder, '%06d_bbox.pkl' % (data_idx))} saved successfully !!") # add to collection all_pc_upright_depth_subsampled.append(pc_upright_depth_subsampled) all_obbs.append(obbs) N = pc_upright_depth_subsampled.shape[0] point_votes = np.zeros((N, 13)) # 1 vote mask + 3 votes and + 3 votes gt ind point_votes[:, 10:13] = -1 point_vote_idx = np.zeros((N)).astype(np.int32) # in the range of [0,2] indices = np.arange(N) i_obj = 0 for obj in objects: if obj.classname not in type_whitelist: continue try: # Find all points in this object's OBB box3d_pts_3d = sunrgbd_utils.my_compute_box_3d(obj.centroid, np.array([obj.l, obj.w, obj.h]), obj.heading_angle) pc_in_box3d, inds = sunrgbd_utils.extract_pc_in_box3d( \ pc_upright_depth_subsampled, box3d_pts_3d) # Assign first dimension to indicate it is in an object box point_votes[inds, 0] = 1 # Add the votes (all 0 if the point is not in any object's OBB) votes = np.expand_dims(obj.centroid, 0) - pc_in_box3d[:, 0:3] sparse_inds = indices[inds] # turn dense True,False inds to sparse number-wise inds for i in range(len(sparse_inds)): j = sparse_inds[i] point_votes[j, int(point_vote_idx[j] * 3 + 1):int((point_vote_idx[j] + 1) * 3 + 1)] = votes[i, :] point_votes[j, point_vote_idx[j] + 10] = i_obj # Populate votes with the fisrt vote if point_vote_idx[j] == 0: point_votes[j, 4:7] = votes[i, :] point_votes[j, 7:10] = votes[i, :] point_votes[j, 10] = i_obj point_votes[j, 11] = i_obj point_votes[j, 12] = i_obj point_vote_idx[inds] = np.minimum(2, point_vote_idx[inds] + 1) i_obj += 1 except: print('ERROR ----', data_idx, obj.classname) # choose the nearest as the first gt for each point for ip in range(N): is_pos = (point_votes[ip, 0] > 0) if is_pos: vote_delta1 = point_votes[ip, 1:4].copy() vote_delta2 = point_votes[ip, 4:7].copy() vote_delta3 = point_votes[ip, 7:10].copy() dist1 = np.sum(vote_delta1 ** 2) dist2 = np.sum(vote_delta2 ** 2) dist3 = np.sum(vote_delta3 ** 2) gt_ind1 = int(point_votes[ip, 10].copy()) # gt_ind2 = int(point_votes[ip, 11].copy()) # gt_ind3 = int(point_votes[ip, 12].copy()) # gt1 = obbs[gt_ind1] # gt2 = obbs[gt_ind2] # gt3 = obbs[gt_ind3] # size_norm_vote_delta1 = vote_delta1 / gt1[3:6] # size_norm_vote_delta2 = vote_delta2 / gt2[3:6] # size_norm_vote_delta3 = vote_delta3 / gt3[3:6] # size_norm_dist1 = np.sum(size_norm_vote_delta1 ** 2) # size_norm_dist2 = np.sum(size_norm_vote_delta2 ** 2) # size_norm_dist3 = np.sum(size_norm_vote_delta3 ** 2) near_ind = np.argmin([dist1, dist2, dist3]) # near_ind = np.argmin([size_norm_dist1, size_norm_dist2, size_norm_dist3]) point_votes[ip, 10] = point_votes[ip, 10 + near_ind].copy() point_votes[ip, 10 + near_ind] = gt_ind1 point_votes[ip, 1:4] = point_votes[ip, int(near_ind * 3 + 1):int((near_ind + 1) * 3 + 1)].copy() point_votes[ip, int(near_ind * 3 + 1):int((near_ind + 1) * 3 + 1)] = vote_delta1 else: assert point_votes[ip, 10] == -1, "error" assert point_votes[ip, 11] == -1, "error" assert point_votes[ip, 12] == -1, "error" print(f"{data_idx}_votes.npz has {i_obj} gt bboxes") np.savez_compressed(os.path.join(output_folder, '%06d_votes.npz' % (data_idx)), point_votes=point_votes) with open(os.path.join(output_folder, '%06d_votes.pkl' % (data_idx)), 'wb') as f: pickle.dump(point_votes, f) print(f"{os.path.join(output_folder, '%06d_votes.pkl' % (data_idx))} saved successfully !!") all_point_votes.append(point_votes) pickle_filename = os.path.join(output_folder, 'all_obbs_modified_nearest_has_empty.pkl') with open(pickle_filename, 'wb') as f: pickle.dump(all_obbs, f) print(f"{pickle_filename} saved successfully !!") pickle_filename = os.path.join(output_folder, 'all_pc_modified_nearest_has_empty.pkl') with open(pickle_filename, 'wb') as f: pickle.dump(all_pc_upright_depth_subsampled, f) print(f"{pickle_filename} saved successfully !!") pickle_filename = os.path.join(output_folder, 'all_point_votes_nearest_has_empty.pkl') with open(pickle_filename, 'wb') as f: pickle.dump(all_point_votes, f) print(f"{pickle_filename} saved successfully !!") all_point_labels = [] for point_votes in all_point_votes: point_labels = point_votes[:, [0, 10]] all_point_labels.append(point_labels) pickle_filename = os.path.join(output_folder, 'all_point_labels_nearest_has_empty.pkl') with open(pickle_filename, 'wb') as f: pickle.dump(all_point_labels, f) print(f"{pickle_filename} saved successfully !!")
def __getitem__(self, idx): """ Returns a dict with following keys: point_clouds: (N,3+C) center_label: (MAX_NUM_OBJ,3) for GT box center XYZ sem_cls_label: (MAX_NUM_OBJ,) semantic class index angle_class_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_HEADING_BIN-1 angle_residual_label: (MAX_NUM_OBJ,) size_classe_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_SIZE_CLUSTER size_residual_label: (MAX_NUM_OBJ,3) box_label_mask: (MAX_NUM_OBJ) as 0/1 with 1 indicating a unique box point_votes: (N,3) with votes XYZ point_votes_mask: (N,) with 0/1 with 1 indicating the point is in one of the object's OBB. scan_idx: int scan index in scan_names list pcl_color: unused """ scan_name = self.scan_names[idx] mesh_vertices = np.load(os.path.join(self.data_path, scan_name)+'_vert.npy') meta_vertices = np.load(os.path.join(self.data_path, scan_name)+'_all_noangle_40cls.npy') ### Need to change the name here instance_labels = meta_vertices[:,-2] semantic_labels = meta_vertices[:,-1] if not self.use_color: point_cloud = mesh_vertices[:,0:3] # do not use color for now pcl_color = mesh_vertices[:,3:6] else: point_cloud = mesh_vertices[:,0:6] point_cloud[:,3:] = (point_cloud[:,3:]-MEAN_COLOR_RGB)/256.0 pcl_color = (point_cloud[:,3:]-MEAN_COLOR_RGB)/256.0 if self.use_height: floor_height = np.percentile(point_cloud[:,2],0.99) height = point_cloud[:,2] - floor_height point_cloud = np.concatenate([point_cloud, np.expand_dims(height, 1)],1) # ------------------------------- LABELS ------------------------------ target_bboxes = np.zeros((MAX_NUM_OBJ, 6)) target_bboxes_mask = np.zeros((MAX_NUM_OBJ)) angle_classes = np.zeros((MAX_NUM_OBJ,)) angle_label = np.zeros((MAX_NUM_OBJ,)) angle_residuals = np.zeros((MAX_NUM_OBJ,)) size_classes = np.zeros((MAX_NUM_OBJ,)) size_residuals = np.zeros((MAX_NUM_OBJ, 3)) ### For statistics surface_cue = np.zeros((MAX_NUM_OBJ)) line_cue = np.zeros((MAX_NUM_OBJ,)) before_sample = np.unique(instance_labels) while True: orig_point_cloud = np.copy(point_cloud) temp_point_cloud, choices = pc_util.random_sampling(orig_point_cloud, self.num_points, return_choices=True) after_sample = np.unique(instance_labels[choices]) if np.array_equal(before_sample, after_sample): point_cloud = temp_point_cloud break instance_labels = instance_labels[choices] semantic_labels = semantic_labels[choices] meta_vertices = meta_vertices[choices] pcl_color = pcl_color[choices] # ------------------------------- DATA AUGMENTATION ------------------------------ if self.augment: if np.random.random() > 0.5: # Flipping along the YZ plane point_cloud[:,0] = -1 * point_cloud[:,0] # target_bboxes[:,0] = -1 * target_bboxes[:,0] meta_vertices[:, 0] = -1 * meta_vertices[:, 0] meta_vertices[:, 6] = -1 * meta_vertices[:, 6] if np.random.random() > 0.5: # Flipping along the XZ plane point_cloud[:,1] = -1 * point_cloud[:,1] # target_bboxes[:,1] = -1 * target_bboxes[:,1] meta_vertices[:, 1] = -1 * meta_vertices[:, 1] meta_vertices[:, 6] = -1 * meta_vertices[:, 6] # Rotation along up-axis/Z-axis rot_angle = (np.random.random()*np.pi/18) - np.pi/36 # -5 ~ +5 degree rot_mat = pc_util.rotz(rot_angle).astype(np.float32) point_cloud[:,0:3] = np.dot(point_cloud[:,0:3], np.transpose(rot_mat)) meta_vertices[:, :6] = rotate_aligned_boxes(meta_vertices[:, :6], rot_mat) meta_vertices[:, 6] += rot_angle # ------------------------------- Plane and point ------------------------------ # compute votes *AFTER* augmentation # generate votes # Note: since there's no map between bbox instance labels and # pc instance_labels (it had been filtered # in the data preparation step) we'll compute the instance bbox # from the points sharing the same instance label. point_votes = np.zeros([self.num_points, 3]) point_votes_mask = np.zeros(self.num_points) point_boundary_mask_z = np.zeros(self.num_points) point_boundary_mask_xy = np.zeros(self.num_points) point_boundary_offset_z = np.zeros([self.num_points, 3]) point_boundary_offset_xy = np.zeros([self.num_points, 3]) point_boundary_sem_z = np.zeros([self.num_points, 3+2+1]) point_boundary_sem_xy = np.zeros([self.num_points, 3+1+1]) point_line_mask = np.zeros(self.num_points) point_line_offset = np.zeros([self.num_points, 3]) point_line_sem = np.zeros([self.num_points, 3+1]) point_sem_label = np.zeros(self.num_points) selected_instances = [] selected_centers = [] selected_centers_support = [] selected_centers_bsupport = [] obj_meta = [] counter = -1 for i_instance in np.unique(instance_labels): # find all points belong to that instance ind = np.where(instance_labels == i_instance)[0] if semantic_labels[ind[0]] in DC.nyu40ids: counter += 1 idx_instance = counter x = point_cloud[ind,:3] ### Meta information here meta = meta_vertices[ind[0]] obj_meta.append(meta) ### Get the centroid here center = meta[:3] point_votes[ind, :] = center - x point_votes_mask[ind] = 1.0 point_sem_label[ind] = DC.nyu40id2class_sem[meta[-1]] ### Corners corners, xmin, ymin, zmin, xmax, ymax, zmax = params2bbox(center, meta[3], meta[4], meta[5], meta[6]) ## Get lower four lines plane_lower_temp = np.array([0,0,1,-corners[6,-1]]) para_points = np.array([corners[1], corners[3], corners[5], corners[7]]) newd = np.sum(para_points * plane_lower_temp[:3], 1) if check_upright(para_points) and plane_lower_temp[0]+plane_lower_temp[1] < LOWER_THRESH: plane_lower = np.array([0,0,1,plane_lower_temp[-1]]) plane_upper = np.array([0,0,1,-np.mean(newd)]) else: import pdb;pdb.set_trace() print ("error with upright") if check_z(plane_upper, para_points) == False: import pdb;pdb.set_trace() ### Get the boundary points here alldist = np.abs(np.sum(x*plane_lower[:3], 1) + plane_lower[-1]) mind = np.min(alldist) sel = np.abs(alldist - mind) < DIST_THRESH ## Get lower four lines line_sel1, line_sel2, line_sel3, line_sel4 = get_linesel(x[sel], xmin, xmax, ymin, ymax) if np.sum(line_sel1) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel1]] = 1.0 linecenter = np.mean(x[sel][line_sel1], axis=0) linecenter[1] = (ymin+ymax)/2.0 point_line_offset[ind[sel][line_sel1]] = linecenter - x[sel][line_sel1] point_line_sem[ind[sel][line_sel1]] = np.array([linecenter[0], linecenter[1], linecenter[2], np.where(DC.nyu40ids == meta_vertices[ind[0],-1])[0][0]]) if np.sum(line_sel2) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel2]] = 1.0 linecenter = np.mean(x[sel][line_sel2], axis=0) linecenter[1] = (ymin+ymax)/2.0 point_line_offset[ind[sel][line_sel2]] = linecenter - x[sel][line_sel2] point_line_sem[ind[sel][line_sel2]] = np.array([linecenter[0], linecenter[1], linecenter[2], np.where(DC.nyu40ids == meta_vertices[ind[0],-1])[0][0]]) if np.sum(line_sel3) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel3]] = 1.0 linecenter = np.mean(x[sel][line_sel3], axis=0) linecenter[0] = (xmin+xmax)/2.0 point_line_offset[ind[sel][line_sel3]] = linecenter - x[sel][line_sel3] point_line_sem[ind[sel][line_sel3]] = np.array([linecenter[0], linecenter[1], linecenter[2], np.where(DC.nyu40ids == meta_vertices[ind[0],-1])[0][0]]) if np.sum(line_sel4) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel4]] = 1.0 linecenter = np.mean(x[sel][line_sel4], axis=0) linecenter[0] = (xmin+xmax)/2.0 point_line_offset[ind[sel][line_sel4]] = linecenter - x[sel][line_sel4] point_line_sem[ind[sel][line_sel4]] = np.array([linecenter[0], linecenter[1], linecenter[2], np.where(DC.nyu40ids == meta_vertices[ind[0],-1])[0][0]]) ### Set the surface labels here if np.sum(sel) > NUM_POINT and np.var(alldist[sel]) < VAR_THRESH: center = np.array([(xmin+xmax)/2.0, (ymin+ymax)/2.0, np.mean(x[sel][:,2])]) sel_global = ind[sel] point_boundary_mask_z[sel_global] = 1.0 point_boundary_sem_z[sel_global] = np.array([center[0], center[1], center[2], xmax - xmin, ymax - ymin, np.where(DC.nyu40ids == meta_vertices[ind[0],-1])[0][0]]) point_boundary_offset_z[sel_global] = center - x[sel] ### Get the boundary points here alldist = np.abs(np.sum(x*plane_upper[:3], 1) + plane_upper[-1]) mind = np.min(alldist) sel = np.abs(alldist - mind) < DIST_THRESH ## Get upper four lines line_sel1, line_sel2, line_sel3, line_sel4 = get_linesel(x[sel], xmin, xmax, ymin, ymax) if np.sum(line_sel1) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel1]] = 1.0 linecenter = np.mean(x[sel][line_sel1], axis=0) linecenter[1] = (ymin+ymax)/2.0 point_line_offset[ind[sel][line_sel1]] = linecenter - x[sel][line_sel1] point_line_sem[ind[sel][line_sel1]] = np.array([linecenter[0], linecenter[1], linecenter[2], np.where(DC.nyu40ids == meta_vertices[ind[0],-1])[0][0]]) if np.sum(line_sel2) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel2]] = 1.0 linecenter = np.mean(x[sel][line_sel2], axis=0) linecenter[1] = (ymin+ymax)/2.0 point_line_offset[ind[sel][line_sel2]] = linecenter - x[sel][line_sel2] point_line_sem[ind[sel][line_sel2]] = np.array([linecenter[0], linecenter[1], linecenter[2], np.where(DC.nyu40ids == meta_vertices[ind[0],-1])[0][0]]) if np.sum(line_sel3) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel3]] = 1.0 linecenter = np.mean(x[sel][line_sel3], axis=0) linecenter[0] = (xmin+xmax)/2.0 point_line_offset[ind[sel][line_sel3]] = linecenter - x[sel][line_sel3] point_line_sem[ind[sel][line_sel3]] = np.array([linecenter[0], linecenter[1], linecenter[2], np.where(DC.nyu40ids == meta_vertices[ind[0],-1])[0][0]]) if np.sum(line_sel4) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel4]] = 1.0 linecenter = np.mean(x[sel][line_sel4], axis=0) linecenter[0] = (xmin+xmax)/2.0 point_line_offset[ind[sel][line_sel4]] = linecenter - x[sel][line_sel4] point_line_sem[ind[sel][line_sel4]] = np.array([linecenter[0], linecenter[1], linecenter[2], np.where(DC.nyu40ids == meta_vertices[ind[0],-1])[0][0]]) if np.sum(sel) > NUM_POINT and np.var(alldist[sel]) < VAR_THRESH: center = np.array([(xmin+xmax)/2.0, (ymin+ymax)/2.0, np.mean(x[sel][:,2])]) sel_global = ind[sel] point_boundary_mask_z[sel_global] = 1.0 point_boundary_sem_z[sel_global] = np.array([center[0], center[1], center[2], xmax - xmin, ymax - ymin, np.where(DC.nyu40ids == meta_vertices[ind[0],-1])[0][0]]) point_boundary_offset_z[sel_global] = center - x[sel] ## Get left two lines v1 = corners[3] - corners[2] v2 = corners[2] - corners[0] cp = np.cross(v1, v2) d = -np.dot(cp,corners[0]) a,b,c = cp plane_left_temp = np.array([a, b, c, d]) para_points = np.array([corners[4], corners[5], corners[6], corners[7]]) ### Normalize xy here plane_left_temp /= np.linalg.norm(plane_left_temp[:3]) newd = np.sum(para_points * plane_left_temp[:3], 1) if plane_left_temp[2] < LOWER_THRESH: plane_left = plane_left_temp#np.array([cls,res,tempsign,plane_left_temp[-1]]) plane_right = np.array([plane_left_temp[0], plane_left_temp[1], plane_left_temp[2], -np.mean(newd)]) else: import pdb;pdb.set_trace() print ("error with upright") ### Get the boundary points here alldist = np.abs(np.sum(x*plane_left[:3], 1) + plane_left[-1]) mind = np.min(alldist) sel = np.abs(alldist - mind) < DIST_THRESH ## Get upper four lines line_sel1, line_sel2 = get_linesel2(x[sel], ymin, ymax, zmin, zmax, axis=1) if np.sum(line_sel1) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel1]] = 1.0 linecenter = np.mean(x[sel][line_sel1], axis=0) linecenter[2] = (zmin+zmax)/2.0 point_line_offset[ind[sel][line_sel1]] = linecenter - x[sel][line_sel1] point_line_sem[ind[sel][line_sel1]] = np.array([linecenter[0], linecenter[1], linecenter[2], np.where(DC.nyu40ids == meta_vertices[ind[0],-1])[0][0]]) if np.sum(line_sel2) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel2]] = 1.0 linecenter = np.mean(x[sel][line_sel2], axis=0) linecenter[2] = (zmin+zmax)/2.0 point_line_offset[ind[sel][line_sel2]] = linecenter - x[sel][line_sel2] point_line_sem[ind[sel][line_sel2]] = np.array([linecenter[0], linecenter[1], linecenter[2], np.where(DC.nyu40ids == meta_vertices[ind[0],-1])[0][0]]) if np.sum(sel) > NUM_POINT and np.var(alldist[sel]) < VAR_THRESH: center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0]) sel_global = ind[sel] point_boundary_mask_xy[sel_global] = 1.0 point_boundary_sem_xy[sel_global] = np.array([center[0], center[1], center[2], zmax - zmin, np.where(DC.nyu40ids == meta_vertices[ind[0],-1])[0][0]]) point_boundary_offset_xy[sel_global] = center - x[sel] ### Get the boundary points here alldist = np.abs(np.sum(x*plane_right[:3], 1) + plane_right[-1]) mind = np.min(alldist) sel = np.abs(alldist - mind) < DIST_THRESH line_sel1, line_sel2 = get_linesel2(x[sel], ymin, ymax, zmin, zmax, axis=1) if np.sum(line_sel1) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel1]] = 1.0 linecenter = np.mean(x[sel][line_sel1], axis=0) linecenter[2] = (zmin+zmax)/2.0 point_line_offset[ind[sel][line_sel1]] = linecenter - x[sel][line_sel1] point_line_sem[ind[sel][line_sel1]] = np.array([linecenter[0], linecenter[1], linecenter[2], np.where(DC.nyu40ids == meta_vertices[ind[0],-1])[0][0]]) if np.sum(line_sel2) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel2]] = 1.0 linecenter = np.mean(x[sel][line_sel2], axis=0) linecenter[2] = (zmin+zmax)/2.0 point_line_offset[ind[sel][line_sel2]] = linecenter - x[sel][line_sel2] point_line_sem[ind[sel][line_sel2]] = np.array([linecenter[0], linecenter[1], linecenter[2], np.where(DC.nyu40ids == meta_vertices[ind[0],-1])[0][0]]) if np.sum(sel) > NUM_POINT and np.var(alldist[sel]) < VAR_THRESH: center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0]) sel_global = ind[sel] point_boundary_mask_xy[sel_global] = 1.0 point_boundary_sem_xy[sel_global] = np.array([center[0], center[1], center[2], zmax - zmin, np.where(DC.nyu40ids == meta_vertices[ind[0],-1])[0][0]]) point_boundary_offset_xy[sel_global] = center - x[sel] ### Get the boundary points here v1 = corners[0] - corners[4] v2 = corners[4] - corners[5] cp = np.cross(v1, v2) d = -np.dot(cp,corners[5]) a,b,c = cp plane_front_temp = np.array([a, b, c, d]) para_points = np.array([corners[2], corners[3], corners[6], corners[7]]) plane_front_temp /= np.linalg.norm(plane_front_temp[:3]) newd = np.sum(para_points * plane_front_temp[:3], 1) if plane_front_temp[2] < LOWER_THRESH: plane_front = plane_front_temp plane_back = np.array([plane_front_temp[0], plane_front_temp[1], plane_front_temp[2], -np.mean(newd)]) else: import pdb;pdb.set_trace() print ("error with upright") ### Get the boundary points here alldist = np.abs(np.sum(x*plane_front[:3], 1) + plane_front[-1]) mind = np.min(alldist) sel = np.abs(alldist - mind) < DIST_THRESH if np.sum(sel) > NUM_POINT and np.var(alldist[sel]) < VAR_THRESH: center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0]) sel_global = ind[sel] point_boundary_mask_xy[sel_global] = 1.0 point_boundary_sem_xy[sel_global] = np.array([center[0], center[1], center[2], zmax - zmin, np.where(DC.nyu40ids == meta_vertices[ind[0],-1])[0][0]]) point_boundary_offset_xy[sel_global] = center - x[sel] ### Get the boundary points here alldist = np.abs(np.sum(x*plane_back[:3], 1) + plane_back[-1]) mind = np.min(alldist) sel = np.abs(alldist - mind) < DIST_THRESH if np.sum(sel) > NUM_POINT and np.var(alldist[sel]) < VAR_THRESH: center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0]) sel_global = ind[sel] point_boundary_mask_xy[sel_global] = 1.0 point_boundary_sem_xy[sel_global] = np.array([center[0], center[1], center[2], zmax - zmin, np.where(DC.nyu40ids == meta_vertices[ind[0],-1])[0][0]]) point_boundary_offset_xy[sel_global] = center - x[sel] num_instance = len(obj_meta) obj_meta = np.array(obj_meta) obj_meta = obj_meta.reshape(-1, 9) target_bboxes_mask[0:num_instance] = 1 target_bboxes[0:num_instance,:6] = obj_meta[:,0:6] class_ind = [np.where(DC.nyu40ids == x)[0][0] for x in obj_meta[:,-1]] # NOTE: set size class as semantic class. Consider use size2class. size_classes[0:num_instance] = class_ind size_residuals[0:num_instance, :] = \ target_bboxes[0:num_instance, 3:6] - DC.mean_size_arr[class_ind,:] point_votes = np.tile(point_votes, (1, 3)) # make 3 votes identical point_sem_label = np.tile(np.expand_dims(point_sem_label, -1), (1, 3)) # make 3 votes identical ret_dict = {} ret_dict['point_clouds'] = point_cloud.astype(np.float32) ret_dict['center_label'] = target_bboxes.astype(np.float32)[:,0:3] ret_dict['size_label'] = target_bboxes.astype(np.float32)[:,3:6] ret_dict['heading_label'] = angle_label.astype(np.float32) ret_dict['heading_class_label'] = angle_classes.astype(np.int64) ret_dict['heading_residual_label'] = angle_residuals.astype(np.float32) ret_dict['size_class_label'] = size_classes.astype(np.int64) ret_dict['size_residual_label'] = size_residuals.astype(np.float32) if self.use_height: ret_dict['floor_height'] = floor_height target_bboxes_semcls = np.zeros((MAX_NUM_OBJ)) target_bboxes_semcls[0:num_instance] = \ [DC.nyu40id2class[x] for x in obj_meta[:,-1][0:obj_meta.shape[0]]] ret_dict['sem_cls_label'] = target_bboxes_semcls.astype(np.int64) ret_dict['point_sem_cls_label'] = point_sem_label.astype(np.int64) ret_dict['box_label_mask'] = target_bboxes_mask.astype(np.float32) ret_dict['point_boundary_mask_z'] = point_boundary_mask_z.astype(np.float32) ret_dict['point_boundary_mask_xy'] = point_boundary_mask_xy.astype(np.float32) ret_dict['point_boundary_offset_z'] = point_boundary_offset_z.astype(np.float32) ret_dict['point_boundary_offset_xy'] = point_boundary_offset_xy.astype(np.float32) ret_dict['point_boundary_sem_z'] = point_boundary_sem_z.astype(np.float32) ret_dict['point_boundary_sem_xy'] = point_boundary_sem_xy.astype(np.float32) ret_dict['point_line_mask'] = point_line_mask.astype(np.float32) ret_dict['point_line_offset'] = point_line_offset.astype(np.float32) ret_dict['point_line_sem'] = point_line_sem.astype(np.float32) ret_dict['vote_label'] = point_votes.astype(np.float32) ret_dict['vote_label_mask'] = point_votes_mask.astype(np.int64) ret_dict['scan_idx'] = np.array(idx).astype(np.int64) ret_dict['pcl_color'] = pcl_color ret_dict['num_instance'] = num_instance return ret_dict
def __getitem__(self, idx): """ Returns a dict with following keys: point_clouds: (N,3+C) scan_idx: int scan index in scan_names list """ scan_name = self.scan_names[idx] mesh_vertices = np.load( os.path.join(self.data_path, scan_name) + '_pc.npz')['pc'] # Nx6 if not self.use_color: raw_point_cloud = mesh_vertices[:, 0:3] # do not use color for now else: raw_point_cloud = mesh_vertices[:, 0:6] raw_point_cloud[:, 3:] = (raw_point_cloud[:, 3:] - MEAN_COLOR_RGB) / 256.0 if self.use_height: floor_height = np.percentile(raw_point_cloud[:, 2], 0.99) height = raw_point_cloud[:, 2] - floor_height raw_point_cloud = np.concatenate( [raw_point_cloud, np.expand_dims(height, 1)], 1) point_cloud, choices = pc_util.random_sampling(raw_point_cloud, self.num_points, return_choices=True) #ema_point_cloud = pc_util.random_sampling(raw_point_cloud, self.num_points, return_choices=False) ema_point_cloud = point_cloud.copy() # 2021.2.28 # ------------------------------- DATA AUGMENTATION ------------------------------ flip_x_axis = 0 flip_y_axis = 0 flip_x_axis_ema = 0 # 2021.2.28 flip_y_axis_ema = 0 # 2021.2.28 rot_mat = np.identity(3) scale_ratio = np.ones((1, 3)) if self.augment: if np.random.random() > 0.5: # Flipping along the YZ plane flip_x_axis = 1 point_cloud[:, 0] = -1 * point_cloud[:, 0] if np.random.random() > 0.5: # 2021.2.28 # Flipping along the YZ plane flip_x_axis_ema = 1 ema_point_cloud[:, 0] = -1 * ema_point_cloud[:, 0] if np.random.random() > 0.5: # Flipping along the XZ plane flip_y_axis = 1 point_cloud[:, 1] = -1 * point_cloud[:, 1] if np.random.random() > 0.5: # 2021.2.28 # Flipping along the XZ plane flip_y_axis_ema = 1 ema_point_cloud[:, 1] = -1 * ema_point_cloud[:, 1] # Rotation along up-axis/Z-axis rot_angle = (np.random.random() * np.pi / 3) - np.pi / 6 # -30 ~ +30 degree rot_mat = pc_util.rotz(rot_angle) point_cloud[:, 0:3] = np.dot(point_cloud[:, 0:3], np.transpose(rot_mat)) # Augment point cloud scale: 0.85x-1.15x scale_ratio = np.random.random() * 0.3 + 0.85 scale_ratio = np.expand_dims(np.tile(scale_ratio, 3), 0) point_cloud[:, 0:3] *= scale_ratio if self.use_height: point_cloud[:, -1] *= scale_ratio[0, 0] ret_dict = {} ret_dict['point_clouds'] = point_cloud.astype(np.float32) ret_dict['scan_idx'] = np.array(idx).astype(np.int64) ret_dict['supervised_mask'] = np.array(0).astype(np.int64) ret_dict['ema_point_clouds'] = ema_point_cloud.astype(np.float32) ret_dict['flip_x_axis'] = np.array(flip_x_axis).astype(np.int64) ret_dict['flip_y_axis'] = np.array(flip_y_axis).astype(np.int64) ret_dict['rot_mat'] = rot_mat.astype(np.float32) ret_dict['scale'] = np.array(scale_ratio).astype(np.float32) ret_dict['flip_x_axis_ema'] = np.array(flip_x_axis_ema).astype( np.int64) # 2021.2.28 ret_dict['flip_y_axis_ema'] = np.array(flip_y_axis_ema).astype( np.int64) # 2021.2.28 return ret_dict
def preprocess_point_cloud(point_cloud): ''' Prepare the numpy point cloud (N,3) for forward pass ''' point_cloud = point_cloud[:, 0:3] # do not use color for now point_cloud = random_sampling(point_cloud, FLAGS.num_point).reshape(1, -1, 3) return point_cloud
def data_viz(data_dir, dump_dir=os.path.join(BASE_DIR, 'data_viz_dump')): ''' Examine and visualize SUN RGB-D data. ''' sunrgbd = sunrgbd_object(data_dir) idxs = np.array(range(1, len(sunrgbd) + 1)) np.random.seed(0) np.random.shuffle(idxs) for idx in range(len(sunrgbd)): data_idx = idxs[idx] print('-' * 10, 'data index: ', data_idx) pc = sunrgbd.get_depth(data_idx) print('Point cloud shape:', pc.shape) # Project points to image calib = sunrgbd.get_calibration(data_idx) uv, d = calib.project_upright_depth_to_image(pc[:, 0:3]) print('Point UV:', uv) print('Point depth:', d) import matplotlib.pyplot as plt cmap = plt.cm.get_cmap('hsv', 256) cmap = np.array([cmap(i) for i in range(256)])[:, :3] * 255 img = sunrgbd.get_image(data_idx) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) for i in range(uv.shape[0]): depth = d[i] color = cmap[int(120.0 / depth), :] cv2.circle(img, (int(np.round(uv[i, 0])), int(np.round(uv[i, 1]))), 2, color=tuple(color), thickness=-1) if not os.path.exists(dump_dir): os.mkdir(dump_dir) Image.fromarray(img).save(os.path.join(dump_dir, 'img_depth.jpg')) # Load box labels objects = sunrgbd.get_label_objects(data_idx) print('Objects:', objects) # Draw 2D boxes on image img = sunrgbd.get_image(data_idx) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) for i, obj in enumerate(objects): cv2.rectangle(img, (int(obj.xmin), int(obj.ymin)), (int(obj.xmax), int(obj.ymax)), (0, 255, 0), 2) cv2.putText(img, '%d %s' % (i, obj.classname), (max(int(obj.xmin), 15), max(int(obj.ymin), 15)), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 0, 0), 2) Image.fromarray(img).save(os.path.join(dump_dir, 'img_box2d.jpg')) # Dump OBJ files for the colored point cloud for num_point in [10000, 20000, 40000, 80000]: sampled_pcrgb = pc_util.random_sampling(pc, num_point) pc_util.write_ply_rgb( sampled_pcrgb[:, 0:3], (sampled_pcrgb[:, 3:] * 256).astype(np.int8), os.path.join(dump_dir, 'pcrgb_%dk.obj' % (num_point // 1000))) # Dump OBJ files for 3D bounding boxes # l,w,h correspond to dx,dy,dz # heading angle is from +X rotating towards -Y # (+X is degree, -Y is 90 degrees) oriented_boxes = [] for obj in objects: obb = np.zeros((7)) obb[0:3] = obj.centroid # Some conversion to map with default setting of w,l,h # and angle in box dumping obb[3:6] = np.array([obj.l, obj.w, obj.h]) * 2 obb[6] = -1 * obj.heading_angle print('Object cls, heading, l, w, h:',\ obj.classname, obj.heading_angle, obj.l, obj.w, obj.h) oriented_boxes.append(obb) if len(oriented_boxes) > 0: oriented_boxes = np.vstack(tuple(oriented_boxes)) pc_util.write_oriented_bbox(oriented_boxes, os.path.join(dump_dir, 'obbs.ply')) else: print('-' * 30) continue # Draw 3D boxes on depth points box3d = [] ori3d = [] for obj in objects: corners_3d_image, corners_3d = sunrgbd_utils.compute_box_3d( obj, calib) ori_3d_image, ori_3d = sunrgbd_utils.compute_orientation_3d( obj, calib) print('Corners 3D: ', corners_3d) box3d.append(corners_3d) ori3d.append(ori_3d) pc_box3d = np.concatenate(box3d, 0) pc_ori3d = np.concatenate(ori3d, 0) print(pc_box3d.shape) print(pc_ori3d.shape) pc_util.write_ply(pc_box3d, os.path.join(dump_dir, 'box3d_corners.ply')) pc_util.write_ply(pc_ori3d, os.path.join(dump_dir, 'box3d_ori.ply')) print('-' * 30) print('Point clouds and bounding boxes saved to PLY files under %s' % (dump_dir)) print('Type anything to continue to the next sample...') input()
def __getitem__(self, idx): """ Returns a dict with following keys: point_clouds: (N,3+C) center_label: (MAX_NUM_OBJ,3) for GT box center XYZ sem_cls_label: (MAX_NUM_OBJ,) semantic class index angle_class_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_HEADING_BIN-1 angle_residual_label: (MAX_NUM_OBJ,) size_classe_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_SIZE_CLUSTER size_residual_label: (MAX_NUM_OBJ,3) box_label_mask: (MAX_NUM_OBJ) as 0/1 with 1 indicating a unique box point_votes: (N,3) with votes XYZ point_votes_mask: (N,) with 0/1 with 1 indicating the point is in one of the object's OBB. scan_idx: int scan index in scan_names list pcl_color: unused """ scan_name = self.scan_names[idx] mesh_vertices = np.load( os.path.join(self.data_path, scan_name) + '_vert.npy') instance_labels = np.load( os.path.join(self.data_path, scan_name) + '_ins_label.npy') semantic_labels = np.load( os.path.join(self.data_path, scan_name) + '_sem_label.npy') instance_bboxes = np.load( os.path.join(self.data_path, scan_name) + '_bbox.npy') if not self.use_color: point_cloud = mesh_vertices[:, 0:3] # do not use color for now pcl_color = mesh_vertices[:, 3:6] else: point_cloud = mesh_vertices[:, 0:6] point_cloud[:, 3:] = (point_cloud[:, 3:] - MEAN_COLOR_RGB) / 256.0 if self.use_height: floor_height = np.percentile(point_cloud[:, 2], 0.99) height = point_cloud[:, 2] - floor_height point_cloud = np.concatenate( [point_cloud, np.expand_dims(height, 1)], 1) # ------------------------------- LABELS ------------------------------ target_bboxes = np.zeros((MAX_NUM_OBJ, 6)) target_bboxes_mask = np.zeros((MAX_NUM_OBJ)) angle_classes = np.zeros((MAX_NUM_OBJ, )) angle_residuals = np.zeros((MAX_NUM_OBJ, )) size_classes = np.zeros((MAX_NUM_OBJ, )) size_residuals = np.zeros((MAX_NUM_OBJ, 3)) point_cloud, choices = pc_util.random_sampling(point_cloud, self.num_points, return_choices=True) instance_labels = instance_labels[choices] semantic_labels = semantic_labels[choices] pcl_color = pcl_color[choices] target_bboxes_mask[0:instance_bboxes.shape[0]] = 1 target_bboxes[0:instance_bboxes.shape[0], :] = instance_bboxes[:, 0:6] # ------------------------------- DATA AUGMENTATION ------------------------------ if self.augment: if np.random.random() > 0.5: # Flipping along the YZ plane point_cloud[:, 0] = -1 * point_cloud[:, 0] target_bboxes[:, 0] = -1 * target_bboxes[:, 0] if np.random.random() > 0.5: # Flipping along the XZ plane point_cloud[:, 1] = -1 * point_cloud[:, 1] target_bboxes[:, 1] = -1 * target_bboxes[:, 1] # Rotation along up-axis/Z-axis rot_angle = (np.random.random() * np.pi / 18) - np.pi / 36 # -5 ~ +5 degree rot_mat = pc_util.rotz(rot_angle) point_cloud[:, 0:3] = np.dot(point_cloud[:, 0:3], np.transpose(rot_mat)) target_bboxes = rotate_aligned_boxes(target_bboxes, rot_mat) # compute votes *AFTER* augmentation # generate votes # Note: since there's no map between bbox instance labels and # pc instance_labels (it had been filtered # in the data preparation step) we'll compute the instance bbox # from the points sharing the same instance label. point_votes = np.zeros([self.num_points, 3]) point_votes_mask = np.zeros(self.num_points) for i_instance in np.unique(instance_labels): # find all points belong to that instance ind = np.where(instance_labels == i_instance)[0] # find the semantic label if semantic_labels[ind[0]] in DC.nyu40ids: x = point_cloud[ind, :3] center = 0.5 * (x.min(0) + x.max(0)) point_votes[ind, :] = center - x point_votes_mask[ind] = 1.0 point_votes = np.tile(point_votes, (1, 3)) # make 3 votes identical class_ind = [ np.where(DC.nyu40ids == x)[0][0] for x in instance_bboxes[:, -1] ] # NOTE: set size class as semantic class. Consider use size2class. size_classes[0:instance_bboxes.shape[0]] = class_ind size_residuals[0:instance_bboxes.shape[0], :] = \ target_bboxes[0:instance_bboxes.shape[0], 3:6] - DC.mean_size_arr[class_ind,:] # keep the same nums of points for each cloud mesh_vertices, _ = pc_util.random_sampling(mesh_vertices, 50000, return_choices=True) ret_dict = {} ret_dict['mesh_vertices'] = mesh_vertices.astype(np.float32) ret_dict['point_clouds'] = point_cloud.astype(np.float32) ret_dict['center_label'] = target_bboxes.astype(np.float32)[:, 0:3] ret_dict['heading_class_label'] = angle_classes.astype(np.int64) ret_dict['heading_residual_label'] = angle_residuals.astype(np.float32) ret_dict['size_class_label'] = size_classes.astype(np.int64) ret_dict['size_residual_label'] = size_residuals.astype(np.float32) target_bboxes_semcls = np.zeros((MAX_NUM_OBJ)) target_bboxes_semcls[0:instance_bboxes.shape[0]] = \ [DC.nyu40id2class[x] for x in instance_bboxes[:,-1][0:instance_bboxes.shape[0]]] ret_dict['sem_cls_label'] = target_bboxes_semcls.astype(np.int64) ret_dict['box_label_mask'] = target_bboxes_mask.astype(np.float32) ret_dict['vote_label'] = point_votes.astype(np.float32) ret_dict['vote_label_mask'] = point_votes_mask.astype(np.int64) ret_dict['scan_idx'] = np.array(idx).astype(np.int64) ret_dict['pcl_color'] = pcl_color return ret_dict
def __getitem__(self, idx): """ Returns a dict with following keys: point_clouds: (N,3+C) center_label: (MAX_NUM_OBJ,3) for GT box center XYZ heading_class_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_HEADING_BIN-1 heading_residual_label: (MAX_NUM_OBJ,) size_classe_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_SIZE_CLUSTER size_residual_label: (MAX_NUM_OBJ,3) sem_cls_label: (MAX_NUM_OBJ,) semantic class index box_label_mask: (MAX_NUM_OBJ) as 0/1 with 1 indicating a unique box vote_label: (N,9) with votes XYZ (3 votes: X1Y1Z1, X2Y2Z2, X3Y3Z3) if there is only one vote than X1==X2==X3 etc. vote_label_mask: (N,) with 0/1 with 1 indicating the point is in one of the object's OBB. scan_idx: int scan index in scan_names list max_gt_bboxes: unused """ scan_name = self.scan_names[idx] point_cloud = np.load(os.path.join(self.data_path, scan_name)+'_pc.npz')['pc'] # Nx6 bboxes = np.load(os.path.join(self.data_path, scan_name)+'_bbox.npy') # K,8 point_votes = np.load(os.path.join(self.data_path, scan_name)+'_votes.npz')['point_votes'] # Nx10 if self.use_imvote: # Read camera parameters calib_lines = [line for line in open(os.path.join(self.raw_data_path, 'calib', scan_name+'.txt')).readlines()] calib_Rtilt = np.reshape(np.array([float(x) for x in calib_lines[0].rstrip().split(' ')]), (3,3), 'F') calib_K = np.reshape(np.array([float(x) for x in calib_lines[1].rstrip().split(' ')]), (3,3), 'F') # Read image full_img = sunrgbd_utils.load_image(os.path.join(self.raw_data_path, 'image', scan_name+'.jpg')) full_img_height = full_img.shape[0] full_img_width = full_img.shape[1] # ------------------------------- 2D IMAGE VOTES ------------------------------ cls_id_list = self.cls_id_map[scan_name] cls_score_list = self.cls_score_map[scan_name] bbox_2d_list = self.bbox_2d_map[scan_name] obj_img_list = [] for i2d, (cls2d, box2d) in enumerate(zip(cls_id_list, bbox_2d_list)): xmin, ymin, xmax, ymax = box2d # During training we randomly drop 2D boxes to reduce over-fitting if self.train and np.random.random()>0.5: continue obj_img = full_img[ymin:ymax, xmin:xmax, :] obj_h = obj_img.shape[0] obj_w = obj_img.shape[1] # Bounding box coordinates (4 values), class id, index to the semantic cues meta_data = (xmin, ymin, obj_h, obj_w, cls2d, i2d) if obj_h == 0 or obj_w == 0: continue # Use 2D box center as approximation uv_centroid = np.array([int(obj_w/2), int(obj_h/2)]) uv_centroid = np.expand_dims(uv_centroid, 0) v_coords, u_coords = np.meshgrid(range(obj_h), range(obj_w), indexing='ij') img_vote = np.transpose(np.array([u_coords, v_coords]), (1,2,0)) img_vote = np.expand_dims(uv_centroid, 0) - img_vote obj_img_list.append((meta_data, img_vote)) full_img_votes = np.zeros((full_img_height,full_img_width,self.vote_dims), dtype=np.float32) # Empty votes: 2d box index is set to -1 full_img_votes[:,:,3::4] = -1. for obj_img_data in obj_img_list: meta_data, img_vote = obj_img_data u0, v0, h, w, cls2d, i2d = meta_data for u in range(u0, u0+w): for v in range(v0, v0+h): iidx = int(full_img_votes[v,u,0]) if iidx >= self.max_imvote_per_pixel: continue full_img_votes[v,u,(1+iidx*4):(1+iidx*4+2)] = img_vote[v-v0,u-u0,:] full_img_votes[v,u,(1+iidx*4+2)] = cls2d full_img_votes[v,u,(1+iidx*4+3)] = i2d + 1 # add +1 here as we need a dummy feature for pixels outside all boxes full_img_votes[v0:(v0+h), u0:(u0+w), 0] += 1 full_img_votes_1d = np.zeros((MAX_NUM_PIXEL*self.vote_dims), dtype=np.float32) full_img_votes_1d[0:full_img_height*full_img_width*self.vote_dims] = full_img_votes.flatten() # Semantic cues: one-hot vector for class scores cls_score_feats = np.zeros((1+MAX_NUM_2D_DET,NUM_CLS), dtype=np.float32) # First row is dumpy feature len_obj = len(cls_id_list) if len_obj: ind_obj = np.arange(1,len_obj+1) ind_cls = np.array(cls_id_list) cls_score_feats[ind_obj, ind_cls] = np.array(cls_score_list) # Texture cues: normalized RGB values full_img = (full_img - 128.) / 255. # Serialize data to 1D and save image size so that we can recover the original location in the image full_img_1d = np.zeros((MAX_NUM_PIXEL*3), dtype=np.float32) full_img_1d[:full_img_height*full_img_width*3] = full_img.flatten() if not self.use_color: point_cloud = point_cloud[:,0:3] else: point_cloud = point_cloud[:,0:6] point_cloud[:,3:] = (point_cloud[:,3:]-MEAN_COLOR_RGB) if self.use_height: floor_height = np.percentile(point_cloud[:,2],0.99) height = point_cloud[:,2] - floor_height point_cloud = np.concatenate([point_cloud, np.expand_dims(height, 1)],1) # (N,4) or (N,7) # ------------------------------- DATA AUGMENTATION ------------------------------ scale_ratio = 1. if self.augment: flip_flag = (np.random.random()>0.5) if flip_flag: # Flipping along the YZ plane point_cloud[:,0] = -1 * point_cloud[:,0] bboxes[:,0] = -1 * bboxes[:,0] bboxes[:,6] = np.pi - bboxes[:,6] point_votes[:,[1,4,7]] = -1 * point_votes[:,[1,4,7]] # Rotation along up-axis/Z-axis rot_angle = (np.random.random()*np.pi/3) - np.pi/6 # -30 ~ +30 degree rot_mat = sunrgbd_utils.rotz(rot_angle) point_votes_end = np.zeros_like(point_votes) point_votes_end[:,1:4] = np.dot(point_cloud[:,0:3] + point_votes[:,1:4], np.transpose(rot_mat)) point_votes_end[:,4:7] = np.dot(point_cloud[:,0:3] + point_votes[:,4:7], np.transpose(rot_mat)) point_votes_end[:,7:10] = np.dot(point_cloud[:,0:3] + point_votes[:,7:10], np.transpose(rot_mat)) point_cloud[:,0:3] = np.dot(point_cloud[:,0:3], np.transpose(rot_mat)) bboxes[:,0:3] = np.dot(bboxes[:,0:3], np.transpose(rot_mat)) bboxes[:,6] -= rot_angle point_votes[:,1:4] = point_votes_end[:,1:4] - point_cloud[:,0:3] point_votes[:,4:7] = point_votes_end[:,4:7] - point_cloud[:,0:3] point_votes[:,7:10] = point_votes_end[:,7:10] - point_cloud[:,0:3] if self.use_imvote: R_inverse = np.copy(np.transpose(rot_mat)) if flip_flag: R_inverse[0,:] *= -1 # Update Rtilt according to the augmentation # R_inverse (3x3) * point (3x1) transforms an augmented depth point # to original point in upright_depth coordinates calib_Rtilt = np.dot(np.transpose(R_inverse), calib_Rtilt) # Augment RGB color if self.use_color: rgb_color = point_cloud[:,3:6] + MEAN_COLOR_RGB rgb_color *= (1+0.4*np.random.random(3)-0.2) # brightness change for each channel rgb_color += (0.1*np.random.random(3)-0.05) # color shift for each channel rgb_color += np.expand_dims((0.05*np.random.random(point_cloud.shape[0])-0.025), -1) # jittering on each pixel rgb_color = np.clip(rgb_color, 0, 1) # randomly drop out 30% of the points' colors rgb_color *= np.expand_dims(np.random.random(point_cloud.shape[0])>0.3,-1) point_cloud[:,3:6] = rgb_color - MEAN_COLOR_RGB # Augment point cloud scale: 0.85x-1.15x scale_ratio = np.random.random()*0.3+0.85 if self.use_imvote: calib_Rtilt = np.dot(np.array([[scale_ratio,0,0],[0,scale_ratio,0],[0,0,scale_ratio]]), calib_Rtilt) scale_ratio_expand = np.expand_dims(np.tile(scale_ratio,3),0) point_cloud[:,0:3] *= scale_ratio_expand bboxes[:,0:3] *= scale_ratio_expand bboxes[:,3:6] *= scale_ratio_expand point_votes[:,1:4] *= scale_ratio_expand point_votes[:,4:7] *= scale_ratio_expand point_votes[:,7:10] *= scale_ratio_expand if self.use_height: point_cloud[:,-1] *= scale_ratio # ------------------------------- LABELS ------------------------------ box3d_centers = np.zeros((MAX_NUM_OBJ, 3)) box3d_sizes = np.zeros((MAX_NUM_OBJ, 3)) angle_classes = np.zeros((MAX_NUM_OBJ,)) angle_residuals = np.zeros((MAX_NUM_OBJ,)) size_classes = np.zeros((MAX_NUM_OBJ,)) size_residuals = np.zeros((MAX_NUM_OBJ, 3)) label_mask = np.zeros((MAX_NUM_OBJ)) label_mask[0:bboxes.shape[0]] = 1 max_bboxes = np.zeros((MAX_NUM_OBJ, 8)) max_bboxes[0:bboxes.shape[0],:] = bboxes for i in range(bboxes.shape[0]): bbox = bboxes[i] semantic_class = bbox[7] box3d_center = bbox[0:3] angle_class, angle_residual = DC.angle2class(bbox[6]) # NOTE: The mean size stored in size2class is of full length of box edges, # while in sunrgbd_data.py data dumping we dumped *half* length l,w,h.. so have to time it by 2 here box3d_size = bbox[3:6]*2 size_class, size_residual = DC.size2class(box3d_size, DC.class2type[semantic_class]) box3d_centers[i,:] = box3d_center angle_classes[i] = angle_class angle_residuals[i] = angle_residual size_classes[i] = size_class size_residuals[i] = size_residual box3d_sizes[i,:] = box3d_size target_bboxes_mask = label_mask target_bboxes = np.zeros((MAX_NUM_OBJ, 6)) for i in range(bboxes.shape[0]): bbox = bboxes[i] corners_3d = sunrgbd_utils.my_compute_box_3d(bbox[0:3], bbox[3:6], bbox[6]) # compute axis aligned box xmin = np.min(corners_3d[:,0]) ymin = np.min(corners_3d[:,1]) zmin = np.min(corners_3d[:,2]) xmax = np.max(corners_3d[:,0]) ymax = np.max(corners_3d[:,1]) zmax = np.max(corners_3d[:,2]) target_bbox = np.array([(xmin+xmax)/2, (ymin+ymax)/2, (zmin+zmax)/2, xmax-xmin, ymax-ymin, zmax-zmin]) target_bboxes[i,:] = target_bbox point_cloud, choices = pc_util.random_sampling(point_cloud, self.num_points, return_choices=True) point_votes_mask = point_votes[choices,0] point_votes = point_votes[choices,1:] ret_dict = {} ret_dict['point_clouds'] = point_cloud.astype(np.float32) ret_dict['center_label'] = target_bboxes.astype(np.float32)[:,0:3] ret_dict['heading_class_label'] = angle_classes.astype(np.int64) ret_dict['heading_residual_label'] = angle_residuals.astype(np.float32) ret_dict['size_class_label'] = size_classes.astype(np.int64) ret_dict['size_residual_label'] = size_residuals.astype(np.float32) target_bboxes_semcls = np.zeros((MAX_NUM_OBJ)) target_bboxes_semcls[0:bboxes.shape[0]] = bboxes[:,-1] # from 0 to 9 ret_dict['sem_cls_label'] = target_bboxes_semcls.astype(np.int64) ret_dict['box_label_mask'] = target_bboxes_mask.astype(np.float32) ret_dict['vote_label'] = point_votes.astype(np.float32) ret_dict['vote_label_mask'] = point_votes_mask.astype(np.int64) ret_dict['scan_idx'] = np.array(idx).astype(np.int64) ret_dict['max_gt_bboxes'] = max_bboxes if self.use_imvote: ret_dict['scale'] = np.array(scale_ratio).astype(np.float32) ret_dict['calib_Rtilt'] = calib_Rtilt.astype(np.float32) ret_dict['calib_K'] = calib_K.astype(np.float32) ret_dict['full_img_width'] = np.array(full_img_width).astype(np.int64) ret_dict['cls_score_feats'] = cls_score_feats.astype(np.float32) ret_dict['full_img_votes_1d'] = full_img_votes_1d.astype(np.float32) ret_dict['full_img_1d'] = full_img_1d.astype(np.float32) return ret_dict
def load_crop(self, filename, use_color, use_height, num_points, max_num_obj, DC): MEAN_COLOR_RGB = np.array([109.8, 97.2, 83.8]) scan_name = filename.split('.')[0] h5file = h5py.File(filename, 'r') mesh_vertices = np.array(h5file['point_cloud'], dtype=np.float32) instance_labels = np.array(h5file['instance'], dtype=np.int32) semantic_labels = np.array(h5file['semantic'], dtype=np.int32) instance_bboxes = np.array(h5file['bboxes'], dtype=np.float32) instance_bboxes = instance_bboxes[:, :8] h5file.close() # center data minbound = np.min(mesh_vertices[:, :3], axis=0) maxbound = np.max(mesh_vertices[:, :3], axis=0) mid = (minbound + maxbound) / 2.0 mesh_vertices[:, :3] -= mid instance_bboxes[:, :3] -= mid # convert PC to z is up. mid[[0, 1, 2]] = mid[[0, 2, 1]] mesh_vertices[:, [0, 1, 2]] = mesh_vertices[:, [0, 2, 1]] # convert annotations to z is up. instance_bboxes[:, [0, 1, 2]] = instance_bboxes[:, [0, 2, 1]] instance_bboxes[:, [3, 4, 5]] = instance_bboxes[:, [3, 5, 4]] if not use_color: point_cloud = mesh_vertices[:, 0:3] # do not use color for now else: point_cloud = mesh_vertices[:, 0:6] point_cloud[:, 3:] = point_cloud[:, 3:] - (MEAN_COLOR_RGB) / 256.0 if use_height: floor_height = np.percentile(point_cloud[:, 2], 0.99) height = point_cloud[:, 2] - floor_height point_cloud = np.concatenate( [point_cloud, np.expand_dims(height, 1)], 1) # ------------------------------- LABELS ------------------------------ target_bboxes = np.zeros((max_num_obj, 6)) target_bboxes_mask = np.zeros((max_num_obj)) angle_classes = np.zeros((max_num_obj, )) angle_residuals = np.zeros((max_num_obj, )) size_classes = np.zeros((max_num_obj, )) size_residuals = np.zeros((max_num_obj, 3)) point_cloud, choices = pc_util.random_sampling(point_cloud, num_points, return_choices=True) instance_labels = instance_labels[choices] semantic_labels = semantic_labels[choices] target_bboxes_mask[0:instance_bboxes.shape[0]] = 1 target_bboxes[0:instance_bboxes.shape[0], :] = instance_bboxes[:, 0:6] # compute votes *AFTER* augmentation # generate votes # Note: since there's no map between bbox instance labels and # pc instance_labels (it had been filtered # in the data preparation step) we'll compute the instance bbox # from the points sharing the same instance label. point_votes = np.zeros([num_points, 3]) point_votes_mask = np.zeros(num_points) for i_instance in np.unique(instance_labels): # ignore points not associated with a box #if i_instance not in instance_bboxes_instance_labels: continue # find all points belong to that instance ind = np.where(instance_labels == i_instance)[0] # find the semantic label #TODO: change classe labels if not (semantic_labels[ind[0]] == -1): x = point_cloud[ind, :3] center = 0.5 * (x.min(0) + x.max(0)) point_votes[ind, :] = center - x point_votes_mask[ind] = 1.0 point_votes = np.tile(point_votes, (1, 3)) # make 3 votes identical # NOTE: set size class as semantic class. Consider use size2class. size_classes[0:instance_bboxes.shape[0]] = instance_bboxes[:, -1] instance_bboxes_sids = instance_bboxes[:, -1] instance_bboxes_sids = instance_bboxes_sids.astype(np.int) size_residuals[0:instance_bboxes.shape[0], :] = \ target_bboxes[0:instance_bboxes.shape[0], 3:6] - DC.mean_size_arr[instance_bboxes_sids,:] #TODO: update angle_classes + residuals angle_residuals[0:instance_bboxes.shape[0]] = instance_bboxes[:, 6] ret_dict = {} ret_dict['point_clouds'] = point_cloud.astype(np.float32) ret_dict['center_label'] = target_bboxes.astype(np.float32)[:, 0:3] ret_dict['heading_class_label'] = angle_classes.astype(np.int64) ret_dict['heading_residual_label'] = angle_residuals.astype(np.float32) ret_dict['size_class_label'] = size_classes.astype(np.int64) ret_dict['size_residual_label'] = size_residuals.astype(np.float32) target_bboxes_semcls = np.zeros((max_num_obj)) target_bboxes_semcls[0:instance_bboxes.shape[0]] = instance_bboxes[:, -1] ret_dict['sem_cls_label'] = target_bboxes_semcls.astype(np.int64) ret_dict['box_label_mask'] = target_bboxes_mask.astype(np.float32) ret_dict['vote_label'] = point_votes.astype(np.float32) ret_dict['vote_label_mask'] = point_votes_mask.astype(np.int64) ret_dict['scan_name'] = scan_name ret_dict['mid'] = mid return ret_dict
def forward_full_tour(self, batch_data, DC, device, files_crops=None, **kwargs): if 'use_color' in kwargs: use_color = kwargs['use_color'] else: use_color = False if 'use_height' in kwargs: use_height = kwargs['use_height'] else: use_height = False if 'num_point' in kwargs: num_point = kwargs['num_point'] else: num_point = 40000 if 'max_num_obj' in kwargs: max_num_obj = kwargs['max_num_obj'] else: max_num_obj = 64 output_keys = [ 'center', 'heading_scores', 'heading_residuals', 'heading_residuals_normalized', 'size_scores', 'size_residuals', 'size_residuals_normalized', 'sem_cls_scores', 'objectness_scores', 'seed_xyz', 'vote_xyz', 'seed_inds', 'aggregated_vote_xyz', 'aggregated_vote_inds', 'proposal_lastlayer_features', ] if files_crops is not None: print(len(files_crops)) end_points = {} # -- init with first file file = files_crops[0] file_data = self.load_crop( file, use_color, use_height, num_point, max_num_obj, DC, ) inputs = { 'point_clouds': torch.FloatTensor( file_data['point_clouds']).unsqueeze(0).to(device) } tmp_end_points = self.forward(inputs) tmp_end_points['center'] += torch.FloatTensor( file_data['mid']).to(device) tmp_end_points['proposal_lastlayer_features'] = tmp_end_points[ 'proposal_lastlayer_features'].permute(0, 2, 1) for k in output_keys: end_points[k] = tmp_end_points[k].detach().cpu() # -- iterate through all the files for file in files_crops[1:]: file_data = self.load_crop( file, use_color, use_height, num_point, max_num_obj, DC, ) inputs = { 'point_clouds': torch.FloatTensor( file_data['point_clouds']).unsqueeze(0).to(device) } tmp_end_points = self.forward(inputs) tmp_end_points['center'] += torch.FloatTensor( file_data['mid']).to(device) tmp_end_points['proposal_lastlayer_features'] = tmp_end_points[ 'proposal_lastlayer_features'].permute(0, 2, 1) for k in output_keys: end_points[k] = torch.cat( (end_points[k], tmp_end_points[k].detach().cpu()), dim=1) else: """ Assumes one Point-Cloud (BS>2 not implemented) in numpy format. (N,6) x,y,z,R,G,B. Assumes y is up. """ NUM_POINTS_THRESHOLD = 5000 MEAN_COLOR_RGB = np.array([109.8, 97.2, 83.8]) end_points = {} point_cloud_points = batch_data['point_cloud'] min_bound = np.min(point_cloud_points[:, :3], axis=0) max_bound = np.max(point_cloud_points[:, :3], axis=0) first_ite = True for x in np.arange(min_bound[0], max_bound[0], 2.0): for y in np.arange(min_bound[1], max_bound[1], 1.5): for z in np.arange(min_bound[2], max_bound[2], 2.0): crop_min_bound = np.array([x, y, z]) crop_max_bound = np.array([x + 4.0, y + 3.0, z + 4.0]) vertices_mask = (point_cloud_points[:,:3] > crop_min_bound).all(axis=1) *\ (point_cloud_points[:,:3] < crop_max_bound).all(axis=1) crop_point_cloud = point_cloud_points[ vertices_mask, :].copy() if crop_point_cloud.shape[0] < NUM_POINTS_THRESHOLD: continue # center data minbound = np.min(crop_point_cloud[:, :3], axis=0) maxbound = np.max(crop_point_cloud[:, :3], axis=0) mid = (minbound + maxbound) / 2.0 crop_point_cloud[:, :3] -= mid # convert PC to z is up. mid[[0, 1, 2]] = mid[[0, 2, 1]] crop_point_cloud[:, [0, 1, 2 ]] = crop_point_cloud[:, [0, 2, 1]] if not use_color: point_cloud = crop_point_cloud[:, 0: 3] # do not use color for now else: point_cloud = crop_point_cloud[:, 0:6] point_cloud[:, 3:] = point_cloud[:, 3:] - ( MEAN_COLOR_RGB) / 256.0 if use_height: floor_height = np.percentile( point_cloud[:, 2], 0.99) height = point_cloud[:, 2] - floor_height point_cloud = np.concatenate( [point_cloud, np.expand_dims(height, 1)], 1) point_cloud, _ = pc_util.random_sampling( point_cloud, num_point, return_choices=True) inputs = { 'point_clouds': torch.FloatTensor(point_cloud).unsqueeze(0).to( device) } tmp_end_points = self.forward(inputs) tmp_end_points['center'] += torch.FloatTensor(mid).to( device) tmp_end_points[ 'proposal_lastlayer_features'] = tmp_end_points[ 'proposal_lastlayer_features'].permute( 0, 2, 1) if first_ite: for k in output_keys: end_points[k] = tmp_end_points[k].detach().cpu( ) first_ite = False else: for k in output_keys: end_points[k] = torch.cat( (end_points[k], tmp_end_points[k].detach().cpu()), dim=1) return end_points
def __getitem__(self, idx): """ Returns a dict with following keys: point_clouds: (N,3+C) center_label: (MAX_NUM_OBJ,3) for GT box center XYZ sem_cls_label: (MAX_NUM_OBJ,) semantic class index heading_class_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_HEADING_BIN-1 heading_residual_label: (MAX_NUM_OBJ,) size_classe_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_SIZE_CLUSTER size_residual_label: (MAX_NUM_OBJ,3) box_label_mask: (MAX_NUM_OBJ) as 0/1 with 1 indicating a unique box point_obj_mask: (N,) with 0/1 with 1 indicating the point is in one of the object's OBB. point_instance_label: (N,) with int values in -1,...,num_box, indicating which object the point belongs to, -1 means a backgound point. scan_idx: int scan index in scan_names list max_gt_bboxes: unused """ point_cloud = self.point_cloud_list[idx] # Nx6 bboxes = self.bboxes_list[idx] # K,8 point_obj_mask = self.point_labels_list[idx][:, 0] point_instance_label = self.point_labels_list[idx][:, -1] if not self.use_color: point_cloud = point_cloud[:, 0:3] else: point_cloud = point_cloud[:, 0:6] point_cloud[:, 3:] = (point_cloud[:, 3:] - MEAN_COLOR_RGB) if self.use_height: floor_height = np.percentile(point_cloud[:, 2], 0.99) height = point_cloud[:, 2] - floor_height point_cloud = np.concatenate( [point_cloud, np.expand_dims(height, 1)], 1) # (N,4) or (N,7) # ------------------------------- DATA AUGMENTATION ------------------------------ if self.augment: if np.random.random() > 0.5: # Flipping along the YZ plane point_cloud[:, 0] = -1 * point_cloud[:, 0] bboxes[:, 0] = -1 * bboxes[:, 0] bboxes[:, 6] = np.pi - bboxes[:, 6] # Rotation along up-axis/Z-axis rot_angle = (np.random.random() * np.pi / 3) - np.pi / 6 # -30 ~ +30 degree rot_mat = sunrgbd_utils.rotz(rot_angle) point_cloud[:, 0:3] = np.dot(point_cloud[:, 0:3], np.transpose(rot_mat)) bboxes[:, 0:3] = np.dot(bboxes[:, 0:3], np.transpose(rot_mat)) bboxes[:, 6] -= rot_angle # Augment RGB color if self.use_color: rgb_color = point_cloud[:, 3:6] + MEAN_COLOR_RGB rgb_color *= (1 + 0.4 * np.random.random(3) - 0.2 ) # brightness change for each channel rgb_color += (0.1 * np.random.random(3) - 0.05 ) # color shift for each channel rgb_color += np.expand_dims( (0.05 * np.random.random(point_cloud.shape[0]) - 0.025), -1) # jittering on each pixel rgb_color = np.clip(rgb_color, 0, 1) # randomly drop out 30% of the points' colors rgb_color *= np.expand_dims( np.random.random(point_cloud.shape[0]) > 0.3, -1) point_cloud[:, 3:6] = rgb_color - MEAN_COLOR_RGB # Augment point cloud scale: 0.85x-1.15x scale_ratio = np.random.random() * 0.3 + 0.85 scale_ratio = np.expand_dims(np.tile(scale_ratio, 3), 0) point_cloud[:, 0:3] *= scale_ratio bboxes[:, 0:3] *= scale_ratio bboxes[:, 3:6] *= scale_ratio if self.use_height: point_cloud[:, -1] *= scale_ratio[0, 0] # ------------------------------- LABELS ------------------------------ box3d_centers = np.zeros((MAX_NUM_OBJ, 3)) box3d_sizes = np.zeros((MAX_NUM_OBJ, 3)) angle_classes = np.zeros((MAX_NUM_OBJ, )) angle_residuals = np.zeros((MAX_NUM_OBJ, )) size_classes = np.zeros((MAX_NUM_OBJ, )) size_residuals = np.zeros((MAX_NUM_OBJ, 3)) label_mask = np.zeros((MAX_NUM_OBJ)) label_mask[0:bboxes.shape[0]] = 1 max_bboxes = np.zeros((MAX_NUM_OBJ, 8)) max_bboxes[0:bboxes.shape[0], :] = bboxes for i in range(bboxes.shape[0]): bbox = bboxes[i] semantic_class = bbox[7] box3d_center = bbox[0:3] angle_class, angle_residual = DC.angle2class(bbox[6]) # NOTE: The mean size stored in size2class is of full length of box edges, # while in sunrgbd_data.py data dumping we dumped *half* length l,w,h.. so have to time it by 2 here box3d_size = bbox[3:6] * 2 size_class, size_residual = DC.size2class( box3d_size, DC.class2type[semantic_class]) box3d_centers[i, :] = box3d_center angle_classes[i] = angle_class angle_residuals[i] = angle_residual size_classes[i] = size_class size_residuals[i] = size_residual box3d_sizes[i, :] = box3d_size target_bboxes_mask = label_mask target_bboxes = np.zeros((MAX_NUM_OBJ, 6)) target_bboxes[:, 0:3] += 1000.0 size_gts = np.zeros((MAX_NUM_OBJ, 3)) for i in range(bboxes.shape[0]): bbox = bboxes[i] corners_3d = sunrgbd_utils.my_compute_box_3d( bbox[0:3], bbox[3:6], bbox[6]) # compute axis aligned box xmin = np.min(corners_3d[:, 0]) ymin = np.min(corners_3d[:, 1]) zmin = np.min(corners_3d[:, 2]) xmax = np.max(corners_3d[:, 0]) ymax = np.max(corners_3d[:, 1]) zmax = np.max(corners_3d[:, 2]) target_bbox = np.array([(xmin + xmax) / 2, (ymin + ymax) / 2, (zmin + zmax) / 2, xmax - xmin, ymax - ymin, zmax - zmin]) target_bboxes[i, :] = target_bbox size_gts[i, :] = target_bbox[3:6] point_cloud, choices = pc_util.random_sampling(point_cloud, self.num_points, return_choices=True) point_obj_mask = point_obj_mask[choices] point_instance_label = point_instance_label[choices] ret_dict = {} ret_dict['point_clouds'] = point_cloud.astype(np.float32) ret_dict['center_label'] = target_bboxes.astype(np.float32)[:, 0:3] ret_dict['heading_class_label'] = angle_classes.astype(np.int64) ret_dict['heading_residual_label'] = angle_residuals.astype(np.float32) ret_dict['size_class_label'] = size_classes.astype(np.int64) ret_dict['size_residual_label'] = size_residuals.astype(np.float32) ret_dict['size_gts'] = size_gts.astype(np.float32) target_bboxes_semcls = np.zeros((MAX_NUM_OBJ)) target_bboxes_semcls[0:bboxes.shape[0]] = bboxes[:, -1] # from 0 to 9 ret_dict['sem_cls_label'] = target_bboxes_semcls.astype(np.int64) ret_dict['box_label_mask'] = target_bboxes_mask.astype(np.float32) ret_dict['point_obj_mask'] = point_obj_mask.astype(np.int64) ret_dict['point_instance_label'] = point_instance_label.astype( np.int64) ret_dict['scan_idx'] = np.array(idx).astype(np.int64) ret_dict['max_gt_bboxes'] = max_bboxes return ret_dict
def __getitem__(self, idx): """ Returns a dict with following keys: point_clouds: (N,3+C) center_label: (MAX_NUM_OBJ,3) for GT box center XYZ heading_class_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_HEADING_BIN-1 heading_residual_label: (MAX_NUM_OBJ,) size_classe_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_SIZE_CLUSTER size_residual_label: (MAX_NUM_OBJ,3) sem_cls_label: (MAX_NUM_OBJ,) semantic class index box_label_mask: (MAX_NUM_OBJ) as 0/1 with 1 indicating a unique box vote_label: (N,9) with votes XYZ (3 votes: X1Y1Z1, X2Y2Z2, X3Y3Z3) if there is only one vote than X1==X2==X3 etc. vote_label_mask: (N,) with 0/1 with 1 indicating the point is in one of the object's OBB. scan_idx: int scan index in scan_names list max_gt_bboxes: unused """ scan_name = self.scan_names[idx] point_color_sem = np.load( os.path.join(self.data_path, scan_name) + '_pc.npz')['pc'] # Nx6 bboxes = np.load( os.path.join(self.data_path, scan_name) + '_bbox.npy') # K,8 point_votes = np.load( os.path.join(self.data_path, scan_name) + '_votes.npz')['point_votes'] # Nx10 semantics37 = point_color_sem[:, 6] semantics10 = np.array([DC.class37_2_class10[k] for k in semantics37]) semantics10_multi = [ DC.class37_2_class10_multi[k] for k in semantics37 ] if not self.use_color: point_cloud = point_color_sem[:, 0:3] else: point_cloud = point_color_sem[:, 0:6] point_cloud[:, 3:6] = (point_color_sem[:, 3:6] - MEAN_COLOR_RGB) if self.use_height: floor_height = np.percentile(point_cloud[:, 2], 0.99) height = point_cloud[:, 2] - floor_height point_cloud = np.concatenate( [point_cloud, np.expand_dims(height, 1)], 1) # (N,4) or (N,7) # ------------------------------- DATA AUGMENTATION ------------------------------ if self.augment: if np.random.random() > 0.5: # Flipping along the YZ plane point_cloud[:, 0] = -1 * point_cloud[:, 0] bboxes[:, 0] = -1 * bboxes[:, 0] bboxes[:, 6] = np.pi - bboxes[:, 6] point_votes[:, [1, 4, 7]] = -1 * point_votes[:, [1, 4, 7]] # Rotation along up-axis/Z-axis rot_angle = (np.random.random() * np.pi / 3) - np.pi / 6 # -30 ~ +30 degree rot_mat = sunrgbd_utils.rotz(rot_angle) point_votes_end = np.zeros_like(point_votes) point_votes_end[:, 1:4] = np.dot( point_cloud[:, 0:3] + point_votes[:, 1:4], np.transpose(rot_mat)) point_votes_end[:, 4:7] = np.dot( point_cloud[:, 0:3] + point_votes[:, 4:7], np.transpose(rot_mat)) point_votes_end[:, 7:10] = np.dot( point_cloud[:, 0:3] + point_votes[:, 7:10], np.transpose(rot_mat)) point_cloud[:, 0:3] = np.dot(point_cloud[:, 0:3], np.transpose(rot_mat)) bboxes[:, 0:3] = np.dot(bboxes[:, 0:3], np.transpose(rot_mat)) bboxes[:, 6] -= rot_angle point_votes[:, 1:4] = point_votes_end[:, 1:4] - point_cloud[:, 0:3] point_votes[:, 4:7] = point_votes_end[:, 4:7] - point_cloud[:, 0:3] point_votes[:, 7:10] = point_votes_end[:, 7:10] - point_cloud[:, 0:3] # Augment RGB color if self.use_color: rgb_color = point_cloud[:, 3:6] + MEAN_COLOR_RGB rgb_color *= (1 + 0.4 * np.random.random(3) - 0.2 ) # brightness change for each channel rgb_color += (0.1 * np.random.random(3) - 0.05 ) # color shift for each channel rgb_color += np.expand_dims( (0.05 * np.random.random(point_cloud.shape[0]) - 0.025), -1) # jittering on each pixel rgb_color = np.clip(rgb_color, 0, 1) # randomly drop out 30% of the points' colors rgb_color *= np.expand_dims( np.random.random(point_cloud.shape[0]) > 0.3, -1) point_cloud[:, 3:6] = rgb_color - MEAN_COLOR_RGB # Augment point cloud scale: 0.85x-1.15x scale_ratio = np.random.random() * 0.3 + 0.85 scale_ratio = np.expand_dims(np.tile(scale_ratio, 3), 0) point_cloud[:, 0:3] *= scale_ratio bboxes[:, 0:3] *= scale_ratio bboxes[:, 3:6] *= scale_ratio point_votes[:, 1:4] *= scale_ratio point_votes[:, 4:7] *= scale_ratio point_votes[:, 7:10] *= scale_ratio if self.use_height: point_cloud[:, -1] *= scale_ratio[0, 0] # ------------------------------- LABELS ------------------------------ box3d_centers = np.zeros((MAX_NUM_OBJ, 3)) box3d_sizes = np.zeros((MAX_NUM_OBJ, 3)) angle_classes = np.zeros((MAX_NUM_OBJ, )) angle_residuals = np.zeros((MAX_NUM_OBJ, )) size_classes = np.zeros((MAX_NUM_OBJ, )) size_residuals = np.zeros((MAX_NUM_OBJ, 3)) label_mask = np.zeros((MAX_NUM_OBJ)) label_mask[0:bboxes.shape[0]] = 1 max_bboxes = np.zeros((MAX_NUM_OBJ, 8)) max_bboxes[0:bboxes.shape[0], :] = bboxes # new items box3d_angles = np.zeros((MAX_NUM_OBJ, )) point_boundary_mask_z = np.zeros(self.num_points) point_boundary_mask_xy = np.zeros(self.num_points) point_boundary_offset_z = np.zeros([self.num_points, 3]) point_boundary_offset_xy = np.zeros([self.num_points, 3]) point_boundary_sem_z = np.zeros([self.num_points, 3 + 2 + 1]) point_boundary_sem_xy = np.zeros([self.num_points, 3 + 1 + 1]) point_line_mask = np.zeros(self.num_points) point_line_offset = np.zeros([self.num_points, 3]) point_line_sem = np.zeros([self.num_points, 3 + 1]) for i in range(bboxes.shape[0]): bbox = bboxes[i] semantic_class = bbox[7] box3d_center = bbox[0:3] angle_class, angle_residual = DC.angle2class(bbox[6]) # NOTE: The mean size stored in size2class is of full length of box edges, # while in sunrgbd_data.py data dumping we dumped *half* length l,w,h.. so have to time it by 2 here box3d_size = bbox[3:6] * 2 size_class, size_residual = DC.size2class( box3d_size, DC.class2type[semantic_class]) box3d_centers[i, :] = box3d_center angle_classes[i] = angle_class angle_residuals[i] = angle_residual size_classes[i] = size_class size_residuals[i] = size_residual box3d_sizes[i, :] = box3d_size box3d_angles[i] = bbox[6] target_bboxes_mask = label_mask target_bboxes = np.zeros((MAX_NUM_OBJ, 6)) for i in range(bboxes.shape[0]): bbox = bboxes[i] corners_3d = sunrgbd_utils.my_compute_box_3d( bbox[0:3], bbox[3:6], bbox[6]) # compute axis aligned box xmin = np.min(corners_3d[:, 0]) ymin = np.min(corners_3d[:, 1]) zmin = np.min(corners_3d[:, 2]) xmax = np.max(corners_3d[:, 0]) ymax = np.max(corners_3d[:, 1]) zmax = np.max(corners_3d[:, 2]) target_bbox = np.array([(xmin + xmax) / 2, (ymin + ymax) / 2, (zmin + zmax) / 2, xmax - xmin, ymax - ymin, zmax - zmin]) target_bboxes[i, :] = target_bbox point_cloud, choices = pc_util.random_sampling(point_cloud, self.num_points, return_choices=True) semantics37 = semantics37[choices] semantics10 = semantics10[choices] semantics10_multi = [semantics10_multi[i] for i in choices] point_votes_mask = point_votes[choices, 0] point_votes = point_votes[choices, 1:] # box angle is -pi to pi for i in range(bboxes.shape[0]): bbox = bboxes[i] corners = params2bbox(bbox[:3], 2 * bbox[3:6], clockwise2counter(bbox[6])) # corners_votenet = sunrgbd_utils.my_compute_box_3d(bbox[:3], bbox[3:6], bbox[6]) try: x_all_cls, ind_all_cls = extract_pc_in_box3d( point_cloud, corners) except: continue ind_all_cls = np.where(ind_all_cls)[0] # T/F to index # find point with same semantic as bbox, note semantics is 37 cls in sunrgbd # ind = ind_all_cls[np.where(semantics10[ind_all_cls] == bbox[7])[0]] ind = [] for j in ind_all_cls: if bbox[7] in semantics10_multi[j]: ind.append(j) ind = np.array(ind) if ind.shape[0] < NUM_POINT_SEM_THRESHOLD: pass else: x = point_cloud[ind, :3] ###Get bb planes and boundary points plane_lower_temp = np.array([0, 0, 1, -corners[6, -1]]) para_points = np.array( [corners[1], corners[3], corners[5], corners[7]]) newd = np.sum(para_points * plane_lower_temp[:3], 1) if check_upright( para_points ) and plane_lower_temp[0] + plane_lower_temp[1] < LOWER_THRESH: plane_lower = np.array([0, 0, 1, plane_lower_temp[-1]]) plane_upper = np.array([0, 0, 1, -np.mean(newd)]) else: import pdb pdb.set_trace() print("error with upright") if check_z(plane_upper, para_points) == False: import pdb pdb.set_trace() ### Get the boundary points here #alldist = np.abs(np.sum(point_cloud[:,:3]*plane_lower[:3], 1) + plane_lower[-1]) alldist = np.abs( np.sum(x * plane_lower[:3], 1) + plane_lower[-1]) mind = np.min(alldist) #[count, val] = np.histogram(alldist, bins=20) #mind = val[np.argmax(count)] sel = np.abs(alldist - mind) < DIST_THRESH #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,0] >= xmin) & (point_cloud[:,0] <= xmax) & (point_cloud[:,1] >= ymin) & (point_cloud[:,1] <= ymax) ## Get lower four lines line_sel1, line_sel2, line_sel3, line_sel4 = get_linesel( x[sel], corners, 'lower') if np.sum(line_sel1) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel1]] = 1.0 linecenter = (corners[0] + corners[2]) / 2.0 point_line_offset[ ind[sel][line_sel1]] = linecenter - x[sel][line_sel1] point_line_sem[ind[sel][line_sel1]] = np.array( [linecenter[0], linecenter[1], linecenter[2], bbox[7]]) if np.sum(line_sel2) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel2]] = 1.0 linecenter = (corners[4] + corners[6]) / 2.0 point_line_offset[ ind[sel][line_sel2]] = linecenter - x[sel][line_sel2] point_line_sem[ind[sel][line_sel2]] = np.array( [linecenter[0], linecenter[1], linecenter[2], bbox[7]]) if np.sum(line_sel3) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel3]] = 1.0 linecenter = (corners[0] + corners[4]) / 2.0 point_line_offset[ ind[sel][line_sel3]] = linecenter - x[sel][line_sel3] point_line_sem[ind[sel][line_sel3]] = np.array( [linecenter[0], linecenter[1], linecenter[2], bbox[7]]) if np.sum(line_sel4) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel4]] = 1.0 linecenter = (corners[2] + corners[6]) / 2.0 point_line_offset[ ind[sel][line_sel4]] = linecenter - x[sel][line_sel4] point_line_sem[ind[sel][line_sel4]] = np.array( [linecenter[0], linecenter[1], linecenter[2], bbox[7]]) if np.sum(sel) > NUM_POINT and np.var( alldist[sel]) < VAR_THRESH: # center = np.array([(xmin+xmax)/2.0, (ymin+ymax)/2.0, np.mean(x[sel][:,2])]) center = (corners[0] + corners[6]) / 2.0 center[2] = np.mean(x[sel][:, 2]) sel_global = ind[sel] point_boundary_mask_z[sel_global] = 1.0 point_boundary_sem_z[sel_global] = np.array([ center[0], center[1], center[2], np.linalg.norm(corners[4] - corners[0]), np.linalg.norm(corners[2] - corners[0]), bbox[7] ]) point_boundary_offset_z[sel_global] = center - x[sel] ''' ### Check for middle z surfaces [count, val] = np.histogram(alldist, bins=20) mind_middle = val[np.argmax(count)] sel_pre = np.copy(sel) sel = np.abs(alldist - mind_middle) < DIST_THRESH if np.abs(np.mean(x[sel_pre][:,2]) - np.mean(x[sel][:,2])) > MIND_THRESH: ### Do not use line for middle surfaces if np.sum(sel) > NUM_POINT and np.var(alldist[sel]) < VAR_THRESH: center = (corners[0] + corners[6]) / 2.0 center[2] = np.mean(x[sel][:,2]) # center = np.array([(xmin+xmax)/2.0, (ymin+ymax)/2.0, np.mean(x[sel][:,2])]) sel_global = ind[sel] point_boundary_mask_z[sel_global] = 1.0 point_boundary_sem_z[sel_global] = np.array([center[0], center[1], center[2], np.linalg.norm(corners[4] - corners[0]), np.linalg.norm(corners[2] - corners[0]), bbox[7]]) point_boundary_offset_z[sel_global] = center - x[sel] ''' ### Get the boundary points here alldist = np.abs( np.sum(x * plane_upper[:3], 1) + plane_upper[-1]) mind = np.min(alldist) #[count, val] = np.histogram(alldist, bins=20) #mind = val[np.argmax(count)] sel = np.abs(alldist - mind) < DIST_THRESH #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,0] >= xmin) & (point_cloud[:,0] <= xmax) & (point_cloud[:,1] >= ymin) & (point_cloud[:,1] <= ymax) ## Get upper four lines line_sel1, line_sel2, line_sel3, line_sel4 = get_linesel( x[sel], corners, 'upper') if np.sum(line_sel1) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel1]] = 1.0 linecenter = (corners[1] + corners[3]) / 2.0 point_line_offset[ ind[sel][line_sel1]] = linecenter - x[sel][line_sel1] point_line_sem[ind[sel][line_sel1]] = np.array( [linecenter[0], linecenter[1], linecenter[2], bbox[7]]) if np.sum(line_sel2) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel2]] = 1.0 linecenter = (corners[5] + corners[7]) / 2.0 point_line_offset[ ind[sel][line_sel2]] = linecenter - x[sel][line_sel2] point_line_sem[ind[sel][line_sel2]] = np.array( [linecenter[0], linecenter[1], linecenter[2], bbox[7]]) if np.sum(line_sel3) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel3]] = 1.0 linecenter = (corners[1] + corners[5]) / 2.0 point_line_offset[ ind[sel][line_sel3]] = linecenter - x[sel][line_sel3] point_line_sem[ind[sel][line_sel3]] = np.array( [linecenter[0], linecenter[1], linecenter[2], bbox[7]]) if np.sum(line_sel4) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel4]] = 1.0 linecenter = (corners[3] + corners[7]) / 2.0 point_line_offset[ ind[sel][line_sel4]] = linecenter - x[sel][line_sel4] point_line_sem[ind[sel][line_sel4]] = np.array( [linecenter[0], linecenter[1], linecenter[2], bbox[7]]) if np.sum(sel) > NUM_POINT and np.var( alldist[sel]) < VAR_THRESH: # center = np.array([(xmin+xmax)/2.0, (ymin+ymax)/2.0, np.mean(x[sel][:,2])]) center = (corners[1] + corners[7]) / 2.0 center[2] = np.mean(x[sel][:, 2]) sel_global = ind[sel] point_boundary_mask_z[sel_global] = 1.0 point_boundary_sem_z[sel_global] = np.array([ center[0], center[1], center[2], np.linalg.norm(corners[5] - corners[1]), np.linalg.norm(corners[3] - corners[1]), bbox[7] ]) point_boundary_offset_z[sel_global] = center - x[sel] v1 = corners[3] - corners[2] v2 = corners[2] - corners[0] cp = np.cross(v1, v2) d = -np.dot(cp, corners[0]) a, b, c = cp plane_left_temp = np.array([a, b, c, d]) para_points = np.array( [corners[4], corners[5], corners[6], corners[7]]) ### Normalize xy here plane_left_temp /= np.linalg.norm(plane_left_temp[:3]) newd = np.sum(para_points * plane_left_temp[:3], 1) if plane_left_temp[2] < LOWER_THRESH: plane_left = plane_left_temp #np.array([cls,res,tempsign,plane_left_temp[-1]]) plane_right = np.array([ plane_left_temp[0], plane_left_temp[1], plane_left_temp[2], -np.mean(newd) ]) else: import pdb pdb.set_trace() print("error with upright") ### Get the boundary points here alldist = np.abs( np.sum(x * plane_left[:3], 1) + plane_left[-1]) mind = np.min(alldist) #[count, val] = np.histogram(alldist, bins=20) #mind = val[np.argmax(count)] sel = np.abs(alldist - mind) < DIST_THRESH #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,2] >= zmin) & (point_cloud[:,2] <= zmax) & (point_cloud[:,1] >= ymin) & (point_cloud[:,1] <= ymax) ## Get upper four lines line_sel1, line_sel2 = get_linesel(x[sel], corners, 'left') if np.sum(line_sel1) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel1]] = 1.0 linecenter = (corners[0] + corners[1]) / 2.0 point_line_offset[ ind[sel][line_sel1]] = linecenter - x[sel][line_sel1] point_line_sem[ind[sel][line_sel1]] = np.array( [linecenter[0], linecenter[1], linecenter[2], bbox[7]]) if np.sum(line_sel2) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel2]] = 1.0 linecenter = (corners[2] + corners[3]) / 2.0 point_line_offset[ ind[sel][line_sel2]] = linecenter - x[sel][line_sel2] point_line_sem[ind[sel][line_sel2]] = np.array( [linecenter[0], linecenter[1], linecenter[2], bbox[7]]) if np.sum(sel) > NUM_POINT and np.var( alldist[sel]) < VAR_THRESH: # center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0]) center = np.array([ np.mean(x[sel][:, 0]), np.mean(x[sel][:, 1]), (corners[0, 2] + corners[1, 2]) / 2.0 ]) sel_global = ind[sel] point_boundary_mask_xy[sel_global] = 1.0 # point_boundary_sem_xy[sel_global] = np.array([center[0], center[1], center[2], zmax - zmin, np.where(DC.nyu40ids == meta_vertices[ind[0],-1])[0][0]]) point_boundary_sem_xy[sel_global] = np.array([ center[0], center[1], center[2], corners[1, 2] - corners[0, 2], bbox[7] ]) point_boundary_offset_xy[sel_global] = center - x[sel] ''' [count, val] = np.histogram(alldist, bins=20) mind_middle = val[np.argmax(count)] #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,2] >= zmin) & (point_cloud[:,2] <= zmax) & (point_cloud[:,1] >= ymin) & (point_cloud[:,1] <= ymax) ## Get upper four lines sel_pre = np.copy(sel) sel = np.abs(alldist - mind_middle) < DIST_THRESH if np.abs(np.mean(x[sel_pre][:,0]) - np.mean(x[sel][:,0])) > MIND_THRESH: ### Do not use line for middle surfaces if np.sum(sel) > NUM_POINT and np.var(alldist[sel]) < VAR_THRESH: # center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0]) center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (corners[0, 2] + corners[1, 2])/2.0]) sel_global = ind[sel] point_boundary_mask_xy[sel_global] = 1.0 point_boundary_sem_xy[sel_global] = np.array([center[0], center[1], center[2], corners[1, 2] - corners[0, 2], bbox[7]]) point_boundary_offset_xy[sel_global] = center - x[sel] ''' ### Get the boundary points here alldist = np.abs( np.sum(x * plane_right[:3], 1) + plane_right[-1]) mind = np.min(alldist) #[count, val] = np.histogram(alldist, bins=20) #mind = val[np.argmax(count)] sel = np.abs(alldist - mind) < DIST_THRESH #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,2] >= zmin) & (point_cloud[:,2] <= zmax) & (point_cloud[:,1] >= ymin) & (point_cloud[:,1] <= ymax) line_sel1, line_sel2 = get_linesel(x[sel], corners, 'right') if np.sum(line_sel1) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel1]] = 1.0 linecenter = (corners[4] + corners[5]) / 2.0 point_line_offset[ ind[sel][line_sel1]] = linecenter - x[sel][line_sel1] point_line_sem[ind[sel][line_sel1]] = np.array( [linecenter[0], linecenter[1], linecenter[2], bbox[7]]) if np.sum(line_sel2) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel2]] = 1.0 linecenter = (corners[6] + corners[7]) / 2.0 point_line_offset[ ind[sel][line_sel2]] = linecenter - x[sel][line_sel2] point_line_sem[ind[sel][line_sel2]] = np.array( [linecenter[0], linecenter[1], linecenter[2], bbox[7]]) if np.sum(sel) > NUM_POINT and np.var( alldist[sel]) < VAR_THRESH: # center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0]) center = np.array([ np.mean(x[sel][:, 0]), np.mean(x[sel][:, 1]), (corners[4, 2] + corners[5, 2]) / 2.0 ]) sel_global = ind[sel] point_boundary_mask_xy[sel_global] = 1.0 point_boundary_sem_xy[sel_global] = np.array([ center[0], center[1], center[2], corners[5, 2] - corners[4, 2], bbox[7] ]) point_boundary_offset_xy[sel_global] = center - x[sel] #plane_front_temp = leastsq(residuals, [0,1,0,0], args=(None, np.array([corners[0], corners[1], corners[4], corners[5]]).T))[0] v1 = corners[0] - corners[4] v2 = corners[4] - corners[5] cp = np.cross(v1, v2) d = -np.dot(cp, corners[5]) a, b, c = cp plane_front_temp = np.array([a, b, c, d]) para_points = np.array( [corners[2], corners[3], corners[6], corners[7]]) plane_front_temp /= np.linalg.norm(plane_front_temp[:3]) newd = np.sum(para_points * plane_front_temp[:3], 1) if plane_front_temp[2] < LOWER_THRESH: plane_front = plane_front_temp #np.array([cls,res,tempsign,plane_front_temp[-1]]) plane_back = np.array([ plane_front_temp[0], plane_front_temp[1], plane_front_temp[2], -np.mean(newd) ]) else: import pdb pdb.set_trace() print("error with upright") ### Get the boundary points here alldist = np.abs( np.sum(x * plane_front[:3], 1) + plane_front[-1]) mind = np.min(alldist) #[count, val] = np.histogram(alldist, bins=20) #mind = val[np.argmax(count)] sel = np.abs(alldist - mind) < DIST_THRESH #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,0] >= xmin) & (point_cloud[:,0] <= xmax) & (point_cloud[:,2] >= zmin) & (point_cloud[:,2] <= zmax) if np.sum(sel) > NUM_POINT and np.var( alldist[sel]) < VAR_THRESH: # center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0]) center = np.array([ np.mean(x[sel][:, 0]), np.mean(x[sel][:, 1]), (corners[0, 2] + corners[1, 2]) / 2.0 ]) sel_global = ind[sel] point_boundary_mask_xy[sel_global] = 1.0 point_boundary_sem_xy[sel_global] = np.array([ center[0], center[1], center[2], corners[1, 2] - corners[0, 2], bbox[7] ]) point_boundary_offset_xy[sel_global] = center - x[sel] ''' [count, val] = np.histogram(alldist, bins=20) mind_middle = val[np.argmax(count)] sel_pre = np.copy(sel) sel = np.abs(alldist - mind_middle) < DIST_THRESH if np.abs(np.mean(x[sel_pre][:,1]) - np.mean(x[sel][:,1])) > MIND_THRESH: ### Do not use line for middle surfaces if np.sum(sel) > NUM_POINT and np.var(alldist[sel]) < VAR_THRESH: # center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0]) center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (corners[0, 2] + corners[1, 2])/2.0]) sel_global = ind[sel] point_boundary_mask_xy[sel_global] = 1.0 point_boundary_sem_xy[sel_global] = np.array([center[0], center[1], center[2], corners[1, 2] - corners[0, 2], bbox[7]]) point_boundary_offset_xy[sel_global] = center - x[sel] ''' ### Get the boundary points here alldist = np.abs( np.sum(x * plane_back[:3], 1) + plane_back[-1]) mind = np.min(alldist) #[count, val] = np.histogram(alldist, bins=20) #mind = val[np.argmax(count)] sel = np.abs(alldist - mind) < DIST_THRESH if np.sum(sel) > NUM_POINT and np.var( alldist[sel]) < VAR_THRESH: #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,0] >= xmin) & (point_cloud[:,0] <= xmax) & (point_cloud[:,2] >= zmin) & (point_cloud[:,2] <= zmax) # center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0]) center = np.array([ np.mean(x[sel][:, 0]), np.mean(x[sel][:, 1]), (corners[2, 2] + corners[3, 2]) / 2.0 ]) #point_boundary_offset_xy[sel] = center - x[sel] sel_global = ind[sel] point_boundary_mask_xy[sel_global] = 1.0 point_boundary_sem_xy[sel_global] = np.array([ center[0], center[1], center[2], corners[3, 2] - corners[2, 2], bbox[7] ]) point_boundary_offset_xy[sel_global] = center - x[sel] ret_dict = {} ret_dict['point_clouds'] = point_cloud.astype(np.float32) ret_dict['center_label'] = target_bboxes.astype(np.float32)[:, 0:3] ret_dict['heading_class_label'] = angle_classes.astype(np.int64) ret_dict['heading_residual_label'] = angle_residuals.astype(np.float32) ret_dict['size_class_label'] = size_classes.astype(np.int64) ret_dict['size_residual_label'] = size_residuals.astype(np.float32) target_bboxes_semcls = np.zeros((MAX_NUM_OBJ)) target_bboxes_semcls[0:bboxes.shape[0]] = bboxes[:, -1] # from 0 to 9 ret_dict['sem_cls_label'] = target_bboxes_semcls.astype(np.int64) ret_dict['box_label_mask'] = target_bboxes_mask.astype(np.float32) ret_dict['vote_label'] = point_votes.astype(np.float32) ret_dict['vote_label_mask'] = point_votes_mask.astype(np.int64) ret_dict['scan_idx'] = np.array(idx).astype(np.int64) ret_dict['max_gt_bboxes'] = max_bboxes # new items ret_dict['size_label'] = box3d_sizes.astype(np.float32) ret_dict['heading_label'] = box3d_angles.astype(np.float32) if self.use_height: ret_dict['floor_height'] = floor_height ret_dict['point_boundary_mask_z'] = point_boundary_mask_z.astype( np.float32) ret_dict['point_boundary_mask_xy'] = point_boundary_mask_xy.astype( np.float32) ret_dict['point_boundary_offset_z'] = point_boundary_offset_z.astype( np.float32) ret_dict['point_boundary_offset_xy'] = point_boundary_offset_xy.astype( np.float32) ret_dict['point_boundary_sem_z'] = point_boundary_sem_z.astype( np.float32) ret_dict['point_boundary_sem_xy'] = point_boundary_sem_xy.astype( np.float32) ret_dict['point_line_mask'] = point_line_mask.astype(np.float32) ret_dict['point_line_offset'] = point_line_offset.astype(np.float32) ret_dict['point_line_sem'] = point_line_sem.astype(np.float32) return ret_dict
def extract_sunrgbd_data(idx_filename, split, output_folder, num_point=20000, type_whitelist=DEFAULT_TYPE_WHITELIST, save_votes=False, use_v1=False, skip_empty_scene=True): """ Extract scene point clouds and bounding boxes (centroids, box sizes, heading angles, semantic classes). Dumped point clouds and boxes are in upright depth coord. Args: idx_filename: a TXT file where each line is an int number (index) split: training or testing save_votes: whether to compute and save Ground truth votes. use_v1: use the SUN RGB-D V1 data skip_empty_scene: if True, skip scenes that contain no object (no objet in whitelist) Dumps: <id>_pc.npz of (N,6) where N is for number of subsampled points and 6 is for XYZ and RGB (in 0~1) in upright depth coord <id>_bbox.npy of (K,8) where K is the number of objects, 8 is for centroids (cx,cy,cz), dimension (l,w,h), heanding_angle and semantic_class <id>_votes.npz of (N,10) with 0/1 indicating whether the point belongs to an object, then three sets of GT votes for up to three objects. If the point is only in one object's OBB, then the three GT votes are the same. """ dataset = sunrgbd_object('./sunrgbd_trainval', split, use_v1=use_v1) data_idx_list = [int(line.rstrip()) for line in open(idx_filename)] if not os.path.exists(output_folder): os.mkdir(output_folder) for data_idx in data_idx_list: print('------------- ', data_idx) if data_idx == 479: continue objects = dataset.get_label_objects(data_idx) # Skip scenes with 0 object if skip_empty_scene and (len(objects)==0 or \ len([obj for obj in objects if obj.classname in type_whitelist])==0): continue object_list = [] for obj in objects: if obj.classname not in type_whitelist: continue obb = np.zeros((8)) obb[0:3] = obj.centroid # Note that compared with that in data_viz, we do not time 2 to l,w.h # neither do we flip the heading angle obb[3:6] = np.array([obj.l, obj.w, obj.h]) obb[6] = obj.heading_angle obb[7] = sunrgbd_utils.type2class[obj.classname] object_list.append(obb) if len(object_list) == 0: obbs = np.zeros((0, 8)) else: obbs = np.vstack(object_list) # (K,8) pc_upright_depth = dataset.get_depth(data_idx) pc_upright_depth_subsampled = pc_util.random_sampling( pc_upright_depth, num_point) np.savez_compressed(os.path.join(output_folder, '%06d_pc.npz' % (data_idx)), pc=pc_upright_depth_subsampled) np.save(os.path.join(output_folder, '%06d_bbox.npy' % (data_idx)), obbs) if save_votes: N = pc_upright_depth_subsampled.shape[0] point_votes = np.zeros((N, 10)) # 3 votes and 1 vote mask point_vote_idx = np.zeros( (N)).astype(np.int32) # in the range of [0,2] indices = np.arange(N) for obj in objects: if obj.classname not in type_whitelist: continue try: # Find all points in this object's OBB box3d_pts_3d = sunrgbd_utils.my_compute_box_3d( obj.centroid, np.array([obj.l, obj.w, obj.h]), obj.heading_angle) pc_in_box3d,inds = sunrgbd_utils.extract_pc_in_box3d(\ pc_upright_depth_subsampled, box3d_pts_3d) # Assign first dimension to indicate it is in an object box point_votes[inds, 0] = 1 # Add the votes (all 0 if the point is not in any object's OBB) votes = np.expand_dims(obj.centroid, 0) - pc_in_box3d[:, 0:3] sparse_inds = indices[ inds] # turn dense True,False inds to sparse number-wise inds for i in range(len(sparse_inds)): j = sparse_inds[i] point_votes[j, int(point_vote_idx[j] * 3 + 1):int((point_vote_idx[j] + 1) * 3 + 1)] = votes[i, :] # Populate votes with the fisrt vote if point_vote_idx[j] == 0: point_votes[j, 4:7] = votes[i, :] point_votes[j, 7:10] = votes[i, :] point_vote_idx[inds] = np.minimum(2, point_vote_idx[inds] + 1) except: print('ERROR ----', data_idx, obj.classname) np.savez_compressed(os.path.join(output_folder, '%06d_votes.npz' % (data_idx)), point_votes=point_votes)
def __getitem__(self, index: int): id_scan = self.scene_list[index] assert (id_scan not in self.error_scan) id_scan_path = os.path.join(self.data_path, id_scan) point_cloud = np.load( os.path.join(id_scan_path, '{}.npy'.format('point_cloud'))) ins_vert = np.load( os.path.join(id_scan_path, '{}.npy'.format('ins_vert'))).squeeze(1) ins_bbox = np.load(os.path.join(id_scan_path, '{}.npy'.format('bbox'))) points = point_cloud # (N, 3) center = ins_bbox[:, 0:3] # (B, 10) bbox_length = ins_bbox[:, 3:6] # (B, 3) sem_cls = ins_bbox[:, 6:7] # (B, 1) symmetry = ins_bbox[:, 7:8] # (B, 1) K = center.shape[0] # LABELS if points.shape[0] > self.num_points: points, choices = pc_util.random_sampling(points, self.num_points, return_choices=True) ins_vert = ins_vert[choices] elif points.shape[0] < self.num_points: print('false data') target_bboxes = np.zeros((MAX_NUM_OBJ, 6), dtype=np.float32) target_bboxes_mask = np.zeros((MAX_NUM_OBJ), dtype=np.int64) # target_center = np.zeros((MAX_NUM_OBJ, 3), dtype=np.float32) # target_rot_q = np.zeros((MAX_NUM_OBJ, 4), dtype=np.float32) # target_rot_6d = np.zeros((MAX_NUM_OBJ, 6), dtype=np.float32) # target_scale = np.zeros((MAX_NUM_OBJ, 3), dtype=np.float32) target_sem_cls = np.zeros((MAX_NUM_OBJ, ), dtype=np.int64) target_sym = np.zeros((MAX_NUM_OBJ, ), dtype=np.int64) target_size_classes = np.zeros((MAX_NUM_OBJ, )) target_size_residuals = np.zeros((MAX_NUM_OBJ, 3)) # target_center[:K] = center[:,0:3] # target_rot_q[:K] = alignments[:,3:7] # for k in range(K): # target_rot_6d[k] = from_q_to_6d(alignments[k,3:7]) # target_scale[:K] = alignments[:,7:10] target_sem_cls[:K] = sem_cls.squeeze(1) target_sym[:K] = symmetry.squeeze(1) target_bboxes[:K, 0:3] = center target_bboxes[:K, 3:6] = bbox_length target_bboxes_mask[:K] = 1 # ------------------------------- DATA AUGMENTATION ------------------------------ if self.augment: if np.random.random() > 0.5: # Flipping along the YZ plane points[:, 0] = -1 * points[:, 0] target_bboxes[:, 0] = -1 * target_bboxes[:, 0] if np.random.random() > 0.5: # Flipping along the XZ plane points[:, 1] = -1 * points[:, 1] target_bboxes[:, 1] = -1 * target_bboxes[:, 1] # Rotation along up-axis/Z-axis rot_angle = (np.random.random() * np.pi / 18) - np.pi / 36 # -5 ~ +5 degree rot_mat = pc_util.rotz(rot_angle) points[:, 0:3] = np.dot(points[:, 0:3], np.transpose(rot_mat)) target_bboxes = rotate_aligned_boxes(target_bboxes, rot_mat) target_center = target_bboxes[:, 0:3] # ====== GENERATE VOTES ====== # compute votes *AFTER* augmentation # NOTE: i_ins: (1,B) not (0,B-1) point_votes = np.zeros([self.num_points, 3]) point_votes_mask = np.zeros(self.num_points) for i_ins in np.unique(ins_vert): i_ins -= 1 if target_sem_cls[i_ins] in NOT_CARED_IDS or i_ins < 0: continue ind = np.where(ins_vert == i_ins + 1)[0] x = points[ind, :3] point_votes[ind, :] = x - target_center[i_ins] point_votes_mask[ind] = 1.0 point_votes = np.tile(point_votes, (1, 3)) target_size_classes[:K] = target_sem_cls[:K] target_size_residuals[:K, :3] =\ target_bboxes[:K, 3:6] - DC.mean_size_arr[target_sem_cls[:K], :] # ====== LABELS ====== label = {} label['point_clouds'] = points.astype(np.float32) label['center_label'] = target_center.astype(np.float32) label['heading_class_label'] = np.zeros( (MAX_NUM_OBJ, )).astype(np.int64) label['heading_residual_label'] = np.zeros( (MAX_NUM_OBJ, )).astype(np.float32) label['size_class_label'] = target_size_classes.astype(np.int64) label['size_residual_label'] = target_size_residuals.astype(np.float32) label['sem_cls_label'] = target_sem_cls.astype(np.int64) label['box_label_mask'] = target_bboxes_mask.astype(np.float32) label['vote_label'] = point_votes.astype(np.float32) label['vote_label_mask'] = point_votes_mask.astype(np.int64) label['scan_idx'] = np.array(index).astype(np.int64) return label
def __getitem__(self, idx): """ Returns a dict with following keys: point_clouds: (N,3+C) center_label: (MAX_NUM_GRASP,3) for GT grasp point XYZ angle_class_label: (MAX_NUM_GRASP,) with int values in 0,...,NUM_ANGLE_BIN-1 angle_residual_label: (MAX_NUM_GRASP,) size_classe_label: (MAX_NUM_GRASP,) with int values in 0,...,NUM_SIZE_CLUSTER sem_cls_label: (MAX_NUM_GRASP,) semantic class index grasp_label_mask: (MAX_NUM_GRASP) as 0/1 with 1 indicating a unique grasp vote_label: (N,9) with votes XYZ (3 votes: X1Y1Z1, X2Y2Z2, X3Y3Z3) if there is only one vote than X1==X2==X3 etc. vote_label_mask: (N,) with 0/1 with 1 indicating the point is in one of the object's OBB. scan_idx: int scan index in scan_names list max_gt_grasps: unused """ scan_name = self.scan_names[idx] point_cloud = np.load(os.path.join(self.data_path, scan_name)+'_pc.npz')['pc'] # Nx6 grasps = np.load(os.path.join(self.data_path, scan_name)+'_grasp.npy') # K,8 point_votes = np.load(os.path.join(self.data_path, scan_name)+'_votes.npz')['point_votes'] # Nx10 if not self.use_color: point_cloud = point_cloud[:,0:3] else: point_cloud = point_cloud[:,0:6] point_cloud[:,3:] = (point_cloud[:,3:]-MEAN_COLOR_RGB) if self.use_height: floor_height = np.percentile(point_cloud[:,2],0.99) height = point_cloud[:,2] - floor_height point_cloud = np.concatenate([point_cloud, np.expand_dims(height, 1)],1) # (N,4) or (N,7) # ------------------------------- LABELS ------------------------------ grasp_centers = np.zeros((MAX_NUM_GRASP, 3)) grasp_sizes = np.zeros((MAX_NUM_GRASP, 3)) angle_classes = np.zeros((MAX_NUM_GRASP,)) angle_residuals = np.zeros((MAX_NUM_GRASP,)) viewpoint_classes = np.zeros((MAX_NUM_GRASP,)) widths = np.zeros((MAX_NUM_GRASP,)) qualities = np.zeros((MAX_NUM_GRASP,)) label_mask = np.zeros((MAX_NUM_GRASP)) label_mask[0:grasps.shape[0]] = 1 for i in range(grasps.shape[0]): grasp = grasps[i] grasp_center = grasp[0:3] viewpoint_class = grasp[3] angle_class, angle_residual = DC.angle2class(grasp[4]) grasp_quality = grasp[5] grasp_width = grasp[6] semantic_class = grasp[7] grasp_centers[i,:] = grasp_center viewpoint_classes[i] = viewpoint_class angle_classes[i] = angle_class angle_residuals[i] = angle_residual qualities[i] = grasp_quality widths[i] = grasp_width target_grasps_mask = label_mask target_grasps = np.zeros((MAX_NUM_GRASP, 6)) for i in range(grasps.shape[0]): grasp = grasps[i] target_grasp = grasp[0:6] target_grasps[i,:] = target_grasp point_cloud, choices = pc_util.random_sampling(point_cloud, self.num_points, return_choices=True) point_votes_mask = point_votes[choices,0] point_votes = point_votes[choices,1:] ret_dict = {} ret_dict['point_clouds'] = point_cloud.astype(np.float32) ret_dict['vote_label'] = point_votes.astype(np.float32) ret_dict['vote_label_mask'] = point_votes_mask.astype(np.int64) ret_dict['scan_idx'] = np.array(idx).astype(np.int64) ret_dict['width_label'] = widths.astype(np.float32) ret_dict['quality_label'] = qualities.astype(np.float32) ret_dict['center_label'] = target_grasps.astype(np.float32)[:,0:3] ret_dict['angle_class_label'] = angle_classes.astype(np.int64) ret_dict['angle_residual_label'] = angle_residuals.astype(np.float32) ret_dict['viewpoint_class_label'] = viewpoint_classes.astype(np.int64) target_grasps_semcls = np.zeros((MAX_NUM_GRASP)) target_grasps_semcls[0:grasps.shape[0]] = grasps[:,-1] ret_dict['sem_cls_label'] = target_grasps_semcls.astype(np.int64) ret_dict['grasp_label_mask'] = target_grasps_mask.astype(np.float32) return ret_dict
def __getitem__(self, idx): """ Returns a dict with following keys: point_clouds: (N,3+C) center_label: (MAX_NUM_OBJ,3) for GT box center XYZ sem_cls_label: (MAX_NUM_OBJ,) semantic class index angle_class_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_HEADING_BIN-1 angle_residual_label: (MAX_NUM_OBJ,) size_classe_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_SIZE_CLUSTER size_residual_label: (MAX_NUM_OBJ,3) box_label_mask: (MAX_NUM_OBJ) as 0/1 with 1 indicating a unique box point_votes: (N,3) with votes XYZ point_votes_mask: (N,) with 0/1 with 1 indicating the point is in one of the object's OBB. scan_idx: int scan index in scan_names list """ scan_name = self.scan_names[idx] mesh_vertices = np.load( os.path.join(self.data_path, scan_name) + '_vert.npy') instance_labels = np.load( os.path.join(self.data_path, scan_name) + '_ins_label.npy') semantic_labels = np.load( os.path.join(self.data_path, scan_name) + '_sem_label.npy') instance_bboxes = np.load( os.path.join(self.data_path, scan_name) + '_bbox.npy') if not self.use_color: raw_point_cloud = mesh_vertices[:, 0:3] # do not use color for now else: raw_point_cloud = mesh_vertices[:, 0:6] raw_point_cloud[:, 3:] = (raw_point_cloud[:, 3:] - MEAN_COLOR_RGB) / 256.0 if self.use_height: floor_height = np.percentile(raw_point_cloud[:, 2], 0.99) height = raw_point_cloud[:, 2] - floor_height raw_point_cloud = np.concatenate( [raw_point_cloud, np.expand_dims(height, 1)], 1) # ------------------------------- LABELS ------------------------------ target_bboxes = np.zeros((MAX_NUM_OBJ, 6)) target_bboxes_mask = np.zeros((MAX_NUM_OBJ)) angle_classes = np.zeros((MAX_NUM_OBJ, )) angle_residuals = np.zeros((MAX_NUM_OBJ, )) size_classes = np.zeros((MAX_NUM_OBJ, )) size_residuals = np.zeros((MAX_NUM_OBJ, 3)) target_bboxes_semcls = np.zeros((MAX_NUM_OBJ)) point_cloud, choices = pc_util.random_sampling(raw_point_cloud, self.num_points, return_choices=True) #ema_point_cloud = pc_util.random_sampling(raw_point_cloud, self.num_points, return_choices=False) ema_point_cloud = point_cloud.copy() # 2021.2.28 instance_labels = instance_labels[choices] semantic_labels = semantic_labels[choices] target_bboxes_mask[0:instance_bboxes.shape[0]] = 1 target_bboxes[0:instance_bboxes.shape[0], :] = instance_bboxes[:, 0:6] # ------------------------------- DATA AUGMENTATION ------------------------------ flip_x_axis = 0 flip_y_axis = 0 flip_x_axis_ema = 0 flip_y_axis_ema = 0 rot_mat = np.identity(3) scale_ratio = np.ones((1, 3)) if self.augment: if np.random.random() > 0.5: # Flipping along the YZ plane flip_x_axis = 1 point_cloud[:, 0] = -1 * point_cloud[:, 0] target_bboxes[:, 0] = -1 * target_bboxes[:, 0] if np.random.random() > 0.5: # 2021.2.28 # Flipping along the YZ plane for ema flip_x_axis_ema = 1 ema_point_cloud[:, 0] = -1 * ema_point_cloud[:, 0] if np.random.random() > 0.5: # Flipping along the XZ plane flip_y_axis = 1 point_cloud[:, 1] = -1 * point_cloud[:, 1] target_bboxes[:, 1] = -1 * target_bboxes[:, 1] if np.random.random() > 0.5: # 2021.2.28 # Flipping along the XZ plane for ema flip_y_axis_ema = 1 ema_point_cloud[:, 1] = -1 * ema_point_cloud[:, 1] # Rotation along up-axis/Z-axis rot_angle = (np.random.random() * np.pi / 18) - np.pi / 36 # -5 ~ +5 degree rot_mat = pc_util.rotz(rot_angle) point_cloud[:, 0:3] = np.dot(point_cloud[:, 0:3], np.transpose(rot_mat)) target_bboxes = rotate_aligned_boxes(target_bboxes, rot_mat) # Augment point cloud scale: 0.85x-1.15x scale_ratio = np.random.random() * 0.3 + 0.85 scale_ratio = np.expand_dims(np.tile(scale_ratio, 3), 0) point_cloud[:, 0:3] *= scale_ratio target_bboxes[:, 0:3] *= scale_ratio target_bboxes[:, 3:6] *= scale_ratio if self.use_height: point_cloud[:, -1] *= scale_ratio[0, 0] # compute votes *AFTER* augmentation # generate votes # Note: since there's no map between bbox instance labels and # pc instance_labels (it had been filtered # in the data preparation step) we'll compute the instance bbox # from the points sharing the same instance label. point_votes = np.zeros([self.num_points, 3]) point_votes_mask = np.zeros(self.num_points) for i_instance in np.unique(instance_labels): # find all points belong to that instance ind = np.where(instance_labels == i_instance)[0] # find the semantic label if semantic_labels[ind[0]] in DC.nyu40ids: x = point_cloud[ind, :3] center = 0.5 * (x.min(0) + x.max(0)) point_votes[ind, :] = center - x point_votes_mask[ind] = 1.0 point_votes = np.tile(point_votes, (1, 3)) # make 3 votes identical class_ind = [ np.where(DC.nyu40ids == x)[0][0] for x in instance_bboxes[:, -1] ] # NOTE: set size class as semantic class. Consider use size2class. size_classes[0:instance_bboxes.shape[0]] = class_ind size_residuals[0:instance_bboxes.shape[0], :] = \ target_bboxes[0:instance_bboxes.shape[0], 3:6] - DC.mean_size_arr[class_ind, :] target_bboxes_semcls[0:instance_bboxes.shape[0]] = class_ind ret_dict = {} ret_dict['point_clouds'] = point_cloud.astype(np.float32) ret_dict['center_label'] = target_bboxes.astype(np.float32)[:, 0:3] ret_dict['heading_class_label'] = angle_classes.astype(np.int64) ret_dict['heading_residual_label'] = angle_residuals.astype(np.float32) ret_dict['size_class_label'] = size_classes.astype(np.int64) ret_dict['size_residual_label'] = size_residuals.astype(np.float32) ret_dict['sem_cls_label'] = target_bboxes_semcls.astype(np.int64) ret_dict['box_label_mask'] = target_bboxes_mask.astype(np.float32) ret_dict['vote_label'] = point_votes.astype(np.float32) ret_dict['vote_label_mask'] = point_votes_mask.astype(np.int64) ret_dict['scan_idx'] = np.array(idx).astype(np.int64) ret_dict['supervised_mask'] = np.array(1).astype(np.int64) scene_label = np.zeros(DC.num_class) unique_class_ind = list(set(class_ind)) for ind in unique_class_ind: scene_label[int(ind)] = 1 ret_dict['scene_label'] = scene_label.astype(np.float32) ret_dict['ema_point_clouds'] = ema_point_cloud.astype(np.float32) ret_dict['flip_x_axis'] = np.array(flip_x_axis).astype(np.int64) ret_dict['flip_y_axis'] = np.array(flip_y_axis).astype(np.int64) ret_dict['rot_mat'] = rot_mat.astype(np.float32) ret_dict['scale'] = np.array(scale_ratio).astype(np.float32) ret_dict['flip_x_axis_ema'] = np.array(flip_x_axis_ema).astype( np.int64) #2021.2.28 ret_dict['flip_y_axis_ema'] = np.array(flip_y_axis_ema).astype( np.int64) #2021.2.28 return ret_dict
def __getitem__(self, idx): """ Returns a dict with following keys: point_clouds: (N,3+C) center_label: (MAX_NUM_OBJ,3) for GT box center XYZ heading_class_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_HEADING_BIN-1 heading_residual_label: (MAX_NUM_OBJ,) size_classe_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_SIZE_CLUSTER size_residual_label: (MAX_NUM_OBJ,3) sem_cls_label: (MAX_NUM_OBJ,) semantic class index box_label_mask: (MAX_NUM_OBJ) as 0/1 with 1 indicating a unique box vote_label: (N,9) with votes XYZ (3 votes: X1Y1Z1, X2Y2Z2, X3Y3Z3) if there is only one vote than X1==X2==X3 etc. vote_label_mask: (N,) with 0/1 with 1 indicating the point is in one of the object's OBB. scan_idx: int scan index in scan_names list max_gt_bboxes: unused """ scan_name = self.scan_names[idx] point_cloud = np.load( os.path.join(self.data_path, scan_name) + '_pc.npz')['pc'] # Nx6 bboxes = np.load( os.path.join(self.data_path, scan_name) + '_bbox.npy') # K,8 point_votes = np.load( os.path.join(self.data_path, scan_name) + '_votes.npz')['point_votes'] # Nx10 bbox2ds = np.load( os.path.join(self.data_path, scan_name) + '_bbox2d.npy') bbox2d_probs = np.load( os.path.join(self.data_path, scan_name) + '_bbox2d_prob.npy') calib_Rtilt = np.load( os.path.join(self.data_path, scan_name) + '_calib_Rtilt.npy') calib_K = np.load( os.path.join(self.data_path, scan_name) + '_calib_K.npy') if self.use_color and self.use_box2d: raise NotImplemented( 'color and 2d bounding box at the same time is not implemented' ) if not self.use_color: #point_cloud = point_cloud[:,0:3] point_cloud = get_box2d_feature(point_cloud, bbox2ds, bbox2d_probs, calib_Rtilt, calib_K) else: point_cloud = point_cloud[:, 0:6] point_cloud[:, 3:] = (point_cloud[:, 3:] - MEAN_COLOR_RGB) if self.use_height: floor_height = np.percentile(point_cloud[:, 2], 0.99) height = point_cloud[:, 2] - floor_height point_cloud = np.concatenate( [point_cloud, np.expand_dims(height, 1)], 1) # (N,4) or (N,7) # ------------------------------- DATA AUGMENTATION ------------------------------ if self.augment: if np.random.random() > 0.5: # Flipping along the YZ plane point_cloud[:, 0] = -1 * point_cloud[:, 0] bboxes[:, 0] = -1 * bboxes[:, 0] bboxes[:, 6] = np.pi - bboxes[:, 6] point_votes[:, [1, 4, 7]] = -1 * point_votes[:, [1, 4, 7]] # Rotation along up-axis/Z-axis rot_angle = (np.random.random() * np.pi / 3) - np.pi / 6 # -30 ~ +30 degree rot_mat = sunrgbd_utils.rotz(rot_angle) point_votes_end = np.zeros_like(point_votes) point_votes_end[:, 1:4] = np.dot( point_cloud[:, 0:3] + point_votes[:, 1:4], np.transpose(rot_mat)) point_votes_end[:, 4:7] = np.dot( point_cloud[:, 0:3] + point_votes[:, 4:7], np.transpose(rot_mat)) point_votes_end[:, 7:10] = np.dot( point_cloud[:, 0:3] + point_votes[:, 7:10], np.transpose(rot_mat)) point_cloud[:, 0:3] = np.dot(point_cloud[:, 0:3], np.transpose(rot_mat)) bboxes[:, 0:3] = np.dot(bboxes[:, 0:3], np.transpose(rot_mat)) bboxes[:, 6] -= rot_angle point_votes[:, 1:4] = point_votes_end[:, 1:4] - point_cloud[:, 0:3] point_votes[:, 4:7] = point_votes_end[:, 4:7] - point_cloud[:, 0:3] point_votes[:, 7:10] = point_votes_end[:, 7:10] - point_cloud[:, 0:3] # Augment RGB color if self.use_color: rgb_color = point_cloud[:, 3:6] + MEAN_COLOR_RGB rgb_color *= (1 + 0.4 * np.random.random(3) - 0.2 ) # brightness change for each channel rgb_color += (0.1 * np.random.random(3) - 0.05 ) # color shift for each channel rgb_color += np.expand_dims( (0.05 * np.random.random(point_cloud.shape[0]) - 0.025), -1) # jittering on each pixel rgb_color = np.clip(rgb_color, 0, 1) # randomly drop out 30% of the points' colors rgb_color *= np.expand_dims( np.random.random(point_cloud.shape[0]) > 0.3, -1) point_cloud[:, 3:6] = rgb_color - MEAN_COLOR_RGB # Augment point cloud scale: 0.85x-1.15x scale_ratio = np.random.random() * 0.3 + 0.85 scale_ratio = np.expand_dims(np.tile(scale_ratio, 3), 0) point_cloud[:, 0:3] *= scale_ratio bboxes[:, 0:3] *= scale_ratio bboxes[:, 3:6] *= scale_ratio point_votes[:, 1:4] *= scale_ratio point_votes[:, 4:7] *= scale_ratio point_votes[:, 7:10] *= scale_ratio if self.use_height: point_cloud[:, -1] *= scale_ratio[0, 0] # ------------------------------- LABELS ------------------------------ box3d_centers = np.zeros((MAX_NUM_OBJ, 3)) box3d_sizes = np.zeros((MAX_NUM_OBJ, 3)) angle_classes = np.zeros((MAX_NUM_OBJ, )) angle_residuals = np.zeros((MAX_NUM_OBJ, )) size_classes = np.zeros((MAX_NUM_OBJ, )) size_residuals = np.zeros((MAX_NUM_OBJ, 3)) label_mask = np.zeros((MAX_NUM_OBJ)) label_mask[0:bboxes.shape[0]] = 1 max_bboxes = np.zeros((MAX_NUM_OBJ, 8)) max_bboxes[0:bboxes.shape[0], :] = bboxes for i in range(bboxes.shape[0]): bbox = bboxes[i] semantic_class = bbox[7] box3d_center = bbox[0:3] angle_class, angle_residual = DC.angle2class(bbox[6]) # NOTE: The mean size stored in size2class is of full length of box edges, # while in sunrgbd_data.py data dumping we dumped *half* length l,w,h.. so have to time it by 2 here box3d_size = bbox[3:6] * 2 size_class, size_residual = DC.size2class( box3d_size, DC.class2type[semantic_class]) box3d_centers[i, :] = box3d_center angle_classes[i] = angle_class angle_residuals[i] = angle_residual size_classes[i] = size_class size_residuals[i] = size_residual box3d_sizes[i, :] = box3d_size target_bboxes_mask = label_mask target_bboxes = np.zeros((MAX_NUM_OBJ, 6)) for i in range(bboxes.shape[0]): bbox = bboxes[i] corners_3d = sunrgbd_utils.my_compute_box_3d( bbox[0:3], bbox[3:6], bbox[6]) # compute axis aligned box xmin = np.min(corners_3d[:, 0]) ymin = np.min(corners_3d[:, 1]) zmin = np.min(corners_3d[:, 2]) xmax = np.max(corners_3d[:, 0]) ymax = np.max(corners_3d[:, 1]) zmax = np.max(corners_3d[:, 2]) target_bbox = np.array([(xmin + xmax) / 2, (ymin + ymax) / 2, (zmin + zmax) / 2, xmax - xmin, ymax - ymin, zmax - zmin]) target_bboxes[i, :] = target_bbox point_cloud, choices = pc_util.random_sampling(point_cloud, self.num_points, return_choices=True) point_votes_mask = point_votes[choices, 0] point_votes = point_votes[choices, 1:] ret_dict = {} ret_dict['point_clouds'] = point_cloud.astype(np.float32) ret_dict['center_label'] = target_bboxes.astype(np.float32)[:, 0:3] ret_dict['heading_class_label'] = angle_classes.astype(np.int64) ret_dict['heading_residual_label'] = angle_residuals.astype(np.float32) ret_dict['size_class_label'] = size_classes.astype(np.int64) ret_dict['size_residual_label'] = size_residuals.astype(np.float32) target_bboxes_semcls = np.zeros((MAX_NUM_OBJ)) target_bboxes_semcls[0:bboxes.shape[0]] = bboxes[:, -1] # from 0 to 9 ret_dict['sem_cls_label'] = target_bboxes_semcls.astype(np.int64) ret_dict['box_label_mask'] = target_bboxes_mask.astype(np.float32) ret_dict['vote_label'] = point_votes.astype(np.float32) ret_dict['vote_label_mask'] = point_votes_mask.astype(np.int64) ret_dict['scan_idx'] = np.array(idx).astype(np.int64) ret_dict['max_gt_bboxes'] = max_bboxes # new for box2d #ret_dict['bbox2ds'] = bbox2ds.astype(np.float32) #ret_dict['bbox2d_probs'] = bbox2d_probs.astype(np.float32) #ret_dict['calib_Rtilt'] = calib_Rtilt.astype(np.float32) #ret_dict['calib_K'] = calib_K.astype(np.float32) return ret_dict
def run_votenet_full_tour(point_cloud): point_cloud = point_cloud[0].cpu().clone().numpy() min_bound = np.min(point_cloud[:,:3], axis=0) max_bound = np.max(point_cloud[:,:3], axis=0) keys = ['center', 'heading_scores', 'heading_residuals', 'heading_residuals_normalized', 'size_scores', 'size_residuals', 'size_residuals_normalized', 'sem_cls_scores', 'objectness_scores', 'seed_xyz', 'vote_xyz', 'seed_inds', 'aggregated_vote_xyz', 'aggregated_vote_inds', ] end_points = {} first_ite = True cpt = 0 for x in np.arange(min_bound[0], max_bound[0], 2.0): for y in np.arange(min_bound[1], max_bound[1], 2.0): for z in np.arange(min_bound[2], max_bound[2], 1.5): crop_mask = (point_cloud[:,0] > x) *\ (point_cloud[:,0] <=x+4.0) *\ (point_cloud[:,1] > y) *\ (point_cloud[:,1] <=y+4.0) *\ (point_cloud[:,2] > z) *\ (point_cloud[:,2] <=z+3.0) crop_point_cloud = point_cloud[crop_mask,:].copy() if crop_point_cloud.shape[0] < 30000: continue cpt+=1 debug_pcd = o3d.geometry.PointCloud() debug_point_cloud = crop_point_cloud[:,:3].copy() debug_pcd.points = o3d.utility.Vector3dVector(debug_point_cloud) o3d.io.write_point_cloud(f'debug_dump_pc/debug_pc_{cpt}.ply', debug_pcd) # center data minbound = np.min(crop_point_cloud[:,:3], axis=0) maxbound = np.max(crop_point_cloud[:,:3], axis=0) mid = (minbound + maxbound) / 2.0 crop_point_cloud[:,:3] -= mid crop_point_cloud_sampled, choices=pc_util.random_sampling(crop_point_cloud.copy(), NUM_POINT, return_choices=True) inputs = {'point_clouds':torch.FloatTensor(crop_point_cloud_sampled).to(device).unsqueeze(0)} with torch.no_grad(): tmp_end_points = net(inputs) mid = torch.FloatTensor(mid).unsqueeze(0).unsqueeze(0) mid = mid.repeat(1,tmp_end_points['center'].shape[1],1) mid = mid.to(device) tmp_end_points['center']+=mid if first_ite: for k in keys: end_points[k] = tmp_end_points[k].detach().clone() first_ite=False else: for k in keys: end_points[k] = torch.cat((end_points[k], tmp_end_points[k].detach().clone()), dim=1) print('numer of crops PC : ', cpt) return end_points
def __getitem__(self, idx): """ Returns a dict with following keys: point_clouds: (N,3+C) center_label: (MAX_NUM_OBJ,3) for GT box center XYZ heading_class_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_HEADING_BIN-1 heading_residual_label: (MAX_NUM_OBJ,) size_classe_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_SIZE_CLUSTER size_residual_label: (MAX_NUM_OBJ,3) sem_cls_label: (MAX_NUM_OBJ,) semantic class index box_label_mask: (MAX_NUM_OBJ) as 0/1 with 1 indicating a unique box vote_label: (N,9) with votes XYZ (3 votes: X1Y1Z1, X2Y2Z2, X3Y3Z3) if there is only one vote than X1==X2==X3 etc. vote_label_mask: (N,) with 0/1 with 1 indicating the point is in one of the object's OBB. scan_idx: int scan index in scan_names list """ scan_name = self.scan_names[idx] point_cloud = np.load( os.path.join(self.data_path, scan_name) + '_pc.npz')['pc'] # Nx6 bboxes = np.load( os.path.join(self.data_path, scan_name) + '_bbox.npy') # K,8 point_votes = np.load( os.path.join(self.data_path, scan_name) + '_votes.npz')['point_votes'] # Nx10 if not self.use_color: point_cloud = point_cloud[:, 0:3] else: point_cloud = point_cloud[:, 0:6] point_cloud[:, 3:] = (point_cloud[:, 3:] - MEAN_COLOR_RGB) if self.use_height: floor_height = np.percentile(point_cloud[:, 2], 0.99) height = point_cloud[:, 2] - floor_height point_cloud = np.concatenate( [point_cloud, np.expand_dims(height, 1)], 1) # (N,4) or (N,7) #ema_point_cloud = pc_util.random_sampling(point_cloud, self.num_points, return_choices=False) #2021.2.28 raw_points = point_cloud.copy() #2021.2.28 # ------------------------------- DATA AUGMENTATION ------------------------------ flip_x_axis = 0 flip_y_axis = 0 flip_x_axis_ema = 0 #2021.2.28 flip_y_axis_ema = 0 #2021.2.28 rot_mat = np.identity(3) scale_ratio = np.ones((1, 3)) if self.augment: if np.random.random() > 0.5: # Flipping along the YZ plane flip_x_axis = 1 point_cloud[:, 0] = -1 * point_cloud[:, 0] bboxes[:, 0] = -1 * bboxes[:, 0] bboxes[:, 6] = np.pi - bboxes[:, 6] point_votes[:, [1, 4, 7]] = -1 * point_votes[:, [1, 4, 7]] # Rotation along up-axis/Z-axis #TODO: set different degree range (keep consistent with scannet?) rot_angle = (np.random.random() * np.pi / 3) - np.pi / 6 # -30 ~ +30 degree rot_mat = sunrgbd_utils.rotz(rot_angle) point_votes_end = np.zeros_like(point_votes) point_votes_end[:, 1:4] = np.dot( point_cloud[:, 0:3] + point_votes[:, 1:4], np.transpose(rot_mat)) point_votes_end[:, 4:7] = np.dot( point_cloud[:, 0:3] + point_votes[:, 4:7], np.transpose(rot_mat)) point_votes_end[:, 7:10] = np.dot( point_cloud[:, 0:3] + point_votes[:, 7:10], np.transpose(rot_mat)) point_cloud[:, 0:3] = np.dot(point_cloud[:, 0:3], np.transpose(rot_mat)) bboxes[:, 0:3] = np.dot(bboxes[:, 0:3], np.transpose(rot_mat)) bboxes[:, 6] -= rot_angle point_votes[:, 1:4] = point_votes_end[:, 1:4] - point_cloud[:, 0:3] point_votes[:, 4:7] = point_votes_end[:, 4:7] - point_cloud[:, 0:3] point_votes[:, 7:10] = point_votes_end[:, 7:10] - point_cloud[:, 0:3] # TODO: turn on scale augmentation (keep consistent in scannet?) # Augment point cloud scale: 0.85x-1.15x scale_ratio = np.random.random() * 0.3 + 0.85 scale_ratio = np.expand_dims(np.tile(scale_ratio, 3), 0) point_cloud[:, 0:3] *= scale_ratio bboxes[:, 0:3] *= scale_ratio bboxes[:, 3:6] *= scale_ratio point_votes[:, 1:4] *= scale_ratio point_votes[:, 4:7] *= scale_ratio point_votes[:, 7:10] *= scale_ratio if self.use_height: point_cloud[:, -1] *= scale_ratio[0, 0] # ------------------------------- LABELS ------------------------------ target_bboxes = np.zeros((MAX_NUM_OBJ, 6)) target_bboxes_mask = np.zeros((MAX_NUM_OBJ)) angle_classes = np.zeros((MAX_NUM_OBJ, )) angle_residuals = np.zeros((MAX_NUM_OBJ, )) size_classes = np.zeros((MAX_NUM_OBJ, )) size_residuals = np.zeros((MAX_NUM_OBJ, 3)) target_bboxes_semcls = np.zeros((MAX_NUM_OBJ)) target_bboxes_mask[0:bboxes.shape[0]] = 1 target_bboxes[0:bboxes.shape[0], :] = bboxes[:, 0:6] for i in range(bboxes.shape[0]): bbox = bboxes[i] semantic_class = bbox[7] angle_class, angle_residual = DC.angle2class(bbox[6]) # NOTE: The mean size stored in size2class is of full length of box edges, # while in sunrgbd_data.py data dumping we dumped *half* length l,w,h.. so have to time it by 2 here box3d_size = bbox[3:6] * 2 size_class, size_residual = DC.size2class( box3d_size, DC.class2type[semantic_class]) angle_classes[i] = angle_class angle_residuals[i] = angle_residual size_classes[i] = size_class size_residuals[i] = size_residual target_bboxes_semcls[i] = semantic_class point_cloud, choices = pc_util.random_sampling(point_cloud, self.num_points, return_choices=True) point_votes_mask = point_votes[choices, 0] point_votes = point_votes[choices, 1:] ema_point_cloud = raw_points[choices] #2021.2.28 if self.augment: #2021.2.28 if np.random.random() > 0.5: #2021.2.28 # Flipping along the YZ plane flip_x_axis_ema = 1 ema_point_cloud[:, 0] = -1 * ema_point_cloud[:, 0] ret_dict = {} ret_dict['point_clouds'] = point_cloud.astype(np.float32) ret_dict['center_label'] = target_bboxes.astype(np.float32)[:, 0:3] ret_dict['heading_class_label'] = angle_classes.astype(np.int64) ret_dict['heading_residual_label'] = angle_residuals.astype(np.float32) ret_dict['size_class_label'] = size_classes.astype(np.int64) ret_dict['size_residual_label'] = size_residuals.astype(np.float32) ret_dict['sem_cls_label'] = target_bboxes_semcls.astype(np.int64) ret_dict['box_label_mask'] = target_bboxes_mask.astype(np.float32) ret_dict['vote_label'] = point_votes.astype(np.float32) ret_dict['vote_label_mask'] = point_votes_mask.astype(np.int64) ret_dict['scan_idx'] = np.array(idx).astype(np.int64) ret_dict['supervised_mask'] = np.array(1).astype(np.int64) ret_dict['ema_point_clouds'] = ema_point_cloud.astype(np.float32) ret_dict['flip_x_axis'] = np.array(flip_x_axis).astype(np.int64) ret_dict['flip_y_axis'] = np.array(flip_y_axis).astype(np.int64) ret_dict['rot_mat'] = rot_mat.astype(np.float32) ret_dict['scale'] = np.array(scale_ratio).astype(np.float32) ret_dict['flip_x_axis_ema'] = np.array(flip_x_axis_ema).astype( np.int64) #2021.2.28 ret_dict['flip_y_axis_ema'] = np.array(flip_y_axis_ema).astype( np.int64) #2021.2.28 return ret_dict
def __getitem__(self, idx): scan_name = self.scan_names[idx] mesh_vertices = np.load( os.path.join(self.data_path, scan_name) + '_vert.npy') instance_labels = np.load( os.path.join(self.data_path, scan_name) + '_ins_label.npy') semantic_labels = np.load( os.path.join(self.data_path, scan_name) + '_sem_label.npy').astype( np.int32) - 1 bboxes = np.load(os.path.join(self.data_path, scan_name) + '_bbox.npy') if not self.use_color: point_cloud = mesh_vertices[:, 0:3] # do not use color for now pcl_color = mesh_vertices[:, 3:6] else: point_cloud = mesh_vertices[:, 0:6] point_cloud[:, 3:] = (point_cloud[:, 3:] - MEAN_COLOR_RGB) / 256.0 if self.use_height: floor_height = np.percentile(point_cloud[:, 2], 0.99) height = point_cloud[:, 2] - floor_height point_cloud = np.concatenate( [point_cloud, np.expand_dims(height, 1)], 1) # ------------------------------- LABELS ------------------------------ target_bboxes = np.zeros((MAX_NUM_OBJ, 6)) target_bboxes_mask = np.zeros((MAX_NUM_OBJ)) angle_classes = np.zeros((MAX_NUM_OBJ, )) angle_residuals = np.zeros((MAX_NUM_OBJ, )) size_classes = np.zeros((MAX_NUM_OBJ, )) size_residuals = np.zeros((MAX_NUM_OBJ, 3)) point_cloud, choices = pc_util.random_sampling(point_cloud, self.num_points, return_choices=True) # instance_labels = instance_labels[choices] # semantic_labels = semantic_labels[choices] # # pcl_color = pcl_color[choices] # target_bboxes_mask[0:instance_bboxes.shape[0]] = 1 # target_bboxes[0:instance_bboxes.shape[0], :] = instance_bboxes[:, 0:6] # ------------------------------- DATA AUGMENTATION ------------------------------ if self.augment: pass # if np.random.random() > 0.5: # # Flipping along the YZ plane # point_cloud[:, 0] = -1 * point_cloud[:, 0] # target_bboxes[:, 0] = -1 * target_bboxes[:, 0] # # if np.random.random() > 0.5: # # Flipping along the XZ plane # point_cloud[:, 1] = -1 * point_cloud[:, 1] # target_bboxes[:, 1] = -1 * target_bboxes[:, 1] # # # Rotation along up-axis/Z-axis # rot_angle = (np.random.random() * np.pi / 18) - np.pi / 36 # -5 ~ +5 degree # rot_mat = pc_util.rotz(rot_angle) # point_cloud[:, 0:3] = np.dot(point_cloud[:, 0:3], np.transpose(rot_mat)) # target_bboxes = rotate_aligned_boxes(target_bboxes, rot_mat) # ------------------------------- LABELS ------------------------------ box3d_centers = np.zeros((MAX_NUM_OBJ, 3)) box3d_sizes = np.zeros((MAX_NUM_OBJ, 3)) angle_classes = np.zeros((MAX_NUM_OBJ, )) angle_residuals = np.zeros((MAX_NUM_OBJ, )) size_classes = np.zeros((MAX_NUM_OBJ, )) size_residuals = np.zeros((MAX_NUM_OBJ, 3)) label_mask = np.zeros((MAX_NUM_OBJ)) label_mask[0:bboxes.shape[0]] = 1 # compute votes *AFTER* augmentation # generate votes # Note: since there's no map between bbox instance labels and # pc instance_labels (it had been filtered # in the data preparation step) we'll compute the instance bbox # from the points sharing the same instance label. point_votes = np.zeros([self.num_points, 3]) point_votes_mask = np.zeros(self.num_points) for i_instance in np.unique(instance_labels): # find all points belong to that instance ind = np.where(instance_labels == i_instance)[0] # find the semantic label if semantic_labels[ind[0]] in set(DC.type2class.values()): x = point_cloud[ind, :3] center = 0.5 * (x.min(0) + x.max(0)) point_votes[ind, :] = center - x point_votes_mask[ind] = 1.0 point_votes = np.tile(point_votes, (1, 3)) # make 3 votes identical for i in range(bboxes.shape[0]): bbox = bboxes[i] semantic_class = bbox[7] box3d_center = bbox[0:3] angle_class, angle_residual = DC.angle2class(bbox[6]) box3d_size = bbox[3:6] size_class, size_residual = DC.size2class( box3d_size, DC.class2type[semantic_class]) box3d_centers[i, :] = box3d_center angle_classes[i] = angle_class angle_residuals[i] = angle_residual size_classes[i] = size_class size_residuals[i] = size_residual box3d_sizes[i, :] = box3d_size target_bboxes_mask = label_mask target_bboxes = np.zeros((MAX_NUM_OBJ, 6)) for i in range(bboxes.shape[0]): bbox = bboxes[i] corners_3d = sunrgbd_utils.my_compute_box_3d( bbox[0:3], bbox[3:6], bbox[6]) # compute axis aligned box xmin = np.min(corners_3d[:, 0]) ymin = np.min(corners_3d[:, 1]) zmin = np.min(corners_3d[:, 2]) xmax = np.max(corners_3d[:, 0]) ymax = np.max(corners_3d[:, 1]) zmax = np.max(corners_3d[:, 2]) target_bbox = np.array([(xmin + xmax) / 2, (ymin + ymax) / 2, (zmin + zmax) / 2, xmax - xmin, ymax - ymin, zmax - zmin]) target_bboxes[i, :] = target_bbox point_cloud, choices = pc_util.random_sampling(point_cloud, self.num_points, return_choices=True) point_votes = point_votes[choices] point_votes_mask = point_votes_mask[choices] #point_votes_mask = point_votes[choices,0] #point_votes = point_votes[choices,1:] ret_dict = {} ret_dict['point_clouds'] = point_cloud.astype(np.float32) ret_dict['center_label'] = target_bboxes.astype(np.float32)[:, 0:3] ret_dict['heading_class_label'] = angle_classes.astype(np.int64) ret_dict['heading_residual_label'] = angle_residuals.astype(np.float32) ret_dict['size_class_label'] = size_classes.astype(np.int64) ret_dict['size_residual_label'] = size_residuals.astype(np.float32) target_bboxes_semcls = np.zeros((MAX_NUM_OBJ)) target_bboxes_semcls[0:bboxes.shape[0]] = bboxes[:, -1] # from 0 to 9 ret_dict['sem_cls_label'] = target_bboxes_semcls.astype(np.int64) ret_dict['box_label_mask'] = target_bboxes_mask.astype(np.float32) ret_dict['vote_label'] = point_votes.astype(np.float32) ret_dict['vote_label_mask'] = point_votes_mask.astype(np.int64) ret_dict['scan_idx'] = np.array(idx).astype(np.int64) return ret_dict