def analyze_image_depths(path, bbox, out_name): """ path should lead to a .npy file """ img = np.load(path) img = np.reshape(img, img.shape[:2]) img_slice = img[bbox[0]:bbox[2], bbox[1]:bbox[3]] vec = np.ndarray.flatten(img_slice) # vec = reject_outliers(vec) var = np.var(vec) mean = np.mean(vec) print("State for {}: Mean: {}, Standard Deviation: {}\n".format( out_name, mean, np.sqrt(var))) n, bins, patches = plt.hist(vec, vec.size // 10, facecolor="blue") plt.xlabel("depth value") plt.ylabel("count") plt.title("depth within region") plt.grid(True) plt.show() plt.savefig(os.path.join(out_path, "graph_" + out_name), bbox_inches="tight") plt.close() depth_img = DepthImage(img_slice) depth_img.save(os.path.join(out_path, out_name))
def test_depth_init(self): # valid data random_valid_data = np.random.rand(IM_HEIGHT, IM_WIDTH).astype(np.float32) im = DepthImage(random_valid_data) self.assertEqual(im.height, IM_HEIGHT) self.assertEqual(im.width, IM_WIDTH) self.assertEqual(im.channels, 1) self.assertEqual(im.type, np.float32) self.assertTrue(np.allclose(im.data, random_valid_data)) # invalid channels random_data = np.random.rand(IM_HEIGHT, IM_WIDTH, 3).astype(np.float32) caught_bad_channels = False try: im = DepthImage(random_data) except ValueError: caught_bad_channels = True self.assertTrue(caught_bad_channels) # invalid type random_data = np.random.rand(IM_HEIGHT, IM_WIDTH).astype(np.uint8) caught_bad_dtype = False try: im = DepthImage(random_data) except ValueError: caught_bad_dtype = True self.assertTrue(caught_bad_dtype)
def inpaint(img): """ Inpaint the image """ # create DepthImage from gray version of img gray_img = skimage.color.rgb2gray(img) depth_img = DepthImage(gray_img) # zero out high-gradient areas and inpaint thresh_img = depth_img.threshold_gradients_pctile(0.95) inpaint_img = thresh_img.inpaint() return inpaint_img.data
def _depth_im_from_pointcloud(self, msg): """Convert a pointcloud2 message to a depth image.""" # set format if self._format is None: self._set_format(msg) # rescale camera intr in case binning is turned on if msg.height != self._camera_intr.height: rescale_factor = float(msg.height) / self._camera_intr.height self._camera_intr = self._camera_intr.resize(rescale_factor) # read num points num_points = msg.height * msg.width # read buffer raw_tup = struct.Struct(self._format).unpack_from(msg.data, 0) raw_arr = np.array(raw_tup) # subsample depth values and reshape depth_ind = 2 + 4 * np.arange(num_points) depth_buf = raw_arr[depth_ind] depth_arr = depth_buf.reshape(msg.height, msg.width) depth_im = DepthImage(depth_arr, frame=self._frame) return depth_im
def _read_color_and_depth_image(self): """Read a color and depth image from the device.""" frames = self._pipe.wait_for_frames() if self._depth_align: frames = self._align.process(frames) depth_frame = frames.get_depth_frame() color_frame = frames.get_color_frame() if not depth_frame or not color_frame: logging.warning("Could not retrieve frames.") return None, None if self._filter_depth: depth_frame = self._filter_depth_frame(depth_frame) # convert to numpy arrays depth_image = self._to_numpy(depth_frame, np.float32) color_image = self._to_numpy(color_frame, np.uint8) # convert depth to meters depth_image *= self._depth_scale # bgr to rgb color_image = color_image[..., ::-1] depth = DepthImage(depth_image, frame=self._frame) color = ColorImage(color_image, frame=self._frame) return color, depth
def rendered_images(data, render_mode=None): if render_mode is None: render_mode = RenderMode.SEGMASK rendered_images = [] num_images = data.attrs[NUM_IMAGES_KEY] for i in range(num_images): # get the image data y'all image_key = IMAGE_KEY + '_' + str(i) image_data = data[image_key] image_arr = np.array(image_data[IMAGE_DATA_KEY]) frame = image_data.attrs[IMAGE_FRAME_KEY] if render_mode == RenderMode.SEGMASK: image = BinaryImage(image_arr, frame) elif render_mode == RenderMode.DEPTH: image = DepthImage(image_arr, frame) elif render_mode == RenderMode.SCALED_DEPTH: image = ColorImage(image_arr, frame) R_camera_table = image_data.attrs[CAM_ROT_KEY] t_camera_table = image_data.attrs[CAM_POS_KEY] frame = image_data.attrs[CAM_FRAME_KEY] T_camera_world = RigidTransform(R_camera_table, t_camera_table, from_frame=frame, to_frame='world') rendered_images.append(ObjectRender(image, T_camera_world)) return rendered_images
def visualize_predictions( run_dir, test_config, pred_mask_dir, pred_info_dir, show_bbox=True, show_class=True, ): """Visualizes predictions.""" # Create subdirectory for prediction visualizations vis_dir = os.path.join(run_dir, "vis") depth_dir = os.path.join(test_config["path"], test_config["images"]) if not os.path.exists(vis_dir): os.makedirs(vis_dir) indices_arr = np.load( os.path.join(test_config["path"], test_config["indices"])) image_ids = np.arange(indices_arr.size) ################################################################## # Process each image ################################################################## print("VISUALIZING PREDICTIONS") for image_id in tqdm(image_ids): depth_image_fn = "image_{:06d}.npy".format(indices_arr[image_id]) # depth_image_fn = 'image_{:06d}.png'.format(indices_arr[image_id]) # Load image and ground truth data and resize for net depth_data = np.load(os.path.join(depth_dir, depth_image_fn)) # image = ColorImage.open(os.path.join(depth_dir, '..', 'depth_ims', depth_image_fn)).data image = DepthImage(depth_data).to_color().data # load mask and info r = np.load( os.path.join(pred_info_dir, "image_{:06}.npy".format(image_id))).item() r_masks = np.load( os.path.join(pred_mask_dir, "image_{:06}.npy".format(image_id))) # Must transpose from (n, h, w) to (h, w, n) if r_masks.any(): r["masks"] = np.transpose(r_masks, (1, 2, 0)) else: r["masks"] = r_masks # Visualize fig = plt.figure(figsize=(1.7067, 1.7067), dpi=300, frameon=False) ax = plt.Axes(fig, [0.0, 0.0, 1.0, 1.0]) fig.add_axes(ax) visualize.display_instances( image, r["rois"], r["masks"], r["class_ids"], ["bg", "obj"], ax=ax, show_bbox=show_bbox, show_class=show_class, ) file_name = os.path.join(vis_dir, "vis_{:06d}".format(image_id)) fig.savefig(file_name, transparent=True, dpi=300) plt.close()
def _depth_im_callback(self, msg): """Callback for handling depth images.""" try: self._cur_depth_im = DepthImage(self._bridge.imgmsg_to_cv2(msg) / 1000.0, frame=self._frame) except (CvBridgeError, ValueError): self._cur_depth_im = None
def frames(self): """Retrieve the next frame from the tensor dataset and convert it to a ColorImage and a DepthImage. Parameters ---------- skip_registration : bool If True, the registration step is skipped. Returns ------- :obj:`tuple` of :obj:`ColorImage`, :obj:`DepthImage` The ColorImage and DepthImage of the current frame. Raises ------ RuntimeError If the stream is not running or if all images in the directory have been used. """ if not self._running: raise RuntimeError( "Device pointing to %s not runnning. Cannot read frames" % (self._path_to_images)) if self._im_index >= self._num_images: raise RuntimeError("Device is out of images") # read images datapoint = self._dataset.datapoint( self._im_index, TensorDatasetVirtualSensor.IMAGE_FIELDS) color_im = ColorImage( datapoint[TensorDatasetVirtualSensor.COLOR_IM_FIELD], frame=self._frame, ) depth_im = DepthImage( datapoint[TensorDatasetVirtualSensor.DEPTH_IM_FIELD], frame=self._frame, ) if self._image_rescale_factor != 1.0: color_im = color_im.resize(self._image_rescale_factor) depth_im = depth_im.resize(self._image_rescale_factor, interp="nearest") self._im_index = (self._im_index + 1) % self._num_images return color_im, depth_im
def _depth_image_callback(self, image_msg): """subscribe to depth image topic and keep it up to date""" encoding = image_msg.encoding try: depth_arr = self._bridge.imgmsg_to_cv2(image_msg, encoding) except CvBridgeError as e: rospy.logerr(e) depth = np.array(depth_arr * MM_TO_METERS, np.float32) self._cur_depth_im = DepthImage(depth, self._frame)
def _read_depth_images(self, num_images): """Reads depth images from the device""" depth_images = self._ros_read_images(self._depth_image_buffer, num_images, self.staleness_limit) for i in range(0, num_images): depth_images[i] = (depth_images[i] * MM_TO_METERS ) # convert to meters if self._flip_images: depth_images[i] = np.flipud(depth_images[i]) depth_images[i] = np.fliplr(depth_images[i]) depth_images[i] = DepthImage(depth_images[i], frame=self._frame) return depth_images
def frames(self): """Retrieve the next frame from the image directory and convert it to a ColorImage and a DepthImage. Parameters ---------- skip_registration : bool If True, the registration step is skipped. Returns ------- :obj:`tuple` of :obj:`ColorImage`, :obj:`DepthImage` The ColorImage and DepthImage of the current frame. Raises ------ RuntimeError If the stream is not running or if all images in the directory have been used. """ if not self._running: raise RuntimeError( "Device pointing to %s not runnning. Cannot read frames" % (self._path_to_images)) if (self._im_index >= self._num_images) and not self._loop: raise RuntimeError("Device is out of images") elif (self._im_index >= self._num_images) and self._loop: self._im_index = 0 # read images color_filename = os.path.join( self._path_to_images, "color_%d%s" % (self._im_index, self._color_ext), ) color_im = ColorImage.open(color_filename, frame=self._frame) depth_filename = os.path.join(self._path_to_images, "depth_%d.npy" % (self._im_index)) depth_im = DepthImage.open(depth_filename, frame=self._frame) self._im_index += 1 return color_im, depth_im
def _frames_and_index_map(self, skip_registration=False): """Retrieve a new frame from the Kinect and return a ColorImage, DepthImage, IrImage, and a map from depth pixels to color pixel indices. Parameters ---------- skip_registration : bool If True, the registration step is skipped. Returns ------- :obj:`tuple` of :obj:`ColorImage`, :obj:`DepthImage`, :obj:`IrImage`, :obj:`numpy.ndarray` The ColorImage, DepthImage, and IrImage of the current frame, and an ndarray that maps pixels of the depth image to the index of the corresponding pixel in the color image. Raises ------ RuntimeError If the Kinect stream is not running. """ if not self._running: raise RuntimeError( "Kinect2 device %s not runnning. Cannot read frames" % (self._device_num)) # read frames frames = self._listener.waitForNewFrame() unregistered_color = frames["color"] distorted_depth = frames["depth"] ir = frames["ir"] # apply color to depth registration color_frame = self._color_frame color = unregistered_color depth = distorted_depth color_depth_map = (np.zeros([depth.height, depth.width]).astype(np.int32).ravel()) if (not skip_registration and self._registration_mode == Kinect2RegistrationMode.COLOR_TO_DEPTH): color_frame = self._ir_frame depth = lf2.Frame(depth.width, depth.height, 4, lf2.FrameType.Depth) color = lf2.Frame(depth.width, depth.height, 4, lf2.FrameType.Color) self._registration.apply( unregistered_color, distorted_depth, depth, color, color_depth_map=color_depth_map, ) # convert to array (copy needed to prevent reference of deleted data color_arr = np.copy(color.asarray()) color_arr[:, :, [0, 2]] = color_arr[:, :, [2, 0]] # convert BGR to RGB color_arr[:, :, 0] = np.fliplr(color_arr[:, :, 0]) color_arr[:, :, 1] = np.fliplr(color_arr[:, :, 1]) color_arr[:, :, 2] = np.fliplr(color_arr[:, :, 2]) color_arr[:, :, 3] = np.fliplr(color_arr[:, :, 3]) depth_arr = np.fliplr(np.copy(depth.asarray())) ir_arr = np.fliplr(np.copy(ir.asarray())) # convert from mm to meters if self._depth_mode == Kinect2DepthMode.METERS: depth_arr = depth_arr * MM_TO_METERS # Release and return self._listener.release(frames) return ( ColorImage(color_arr[:, :, :3], color_frame), DepthImage(depth_arr, self._ir_frame), IrImage(ir_arr.astype(np.uint16), self._ir_frame), color_depth_map, )
def detect(detector_type, config, run_dir, test_config): """Run PCL-based detection on a depth-image-based dataset. Parameters ---------- config : dict config for a PCL detector run_dir : str Directory to save outputs in. Output will be saved in pred_masks, pred_info, and modal_segmasks_processed subdirectories. test_config : dict config containing dataset information """ ################################################################## # Set up output directories ################################################################## # Create subdirectory for prediction masks pred_dir = os.path.join(run_dir, 'pred_masks') mkdir_if_missing(pred_dir) # Create subdirectory for prediction scores & bboxes pred_info_dir = os.path.join(run_dir, 'pred_info') mkdir_if_missing(pred_info_dir) # Create subdirectory for transformed GT segmasks resized_segmask_dir = os.path.join(run_dir, 'modal_segmasks_processed') mkdir_if_missing(resized_segmask_dir) ################################################################## # Set up input directories ################################################################## dataset_dir = test_config['path'] indices_arr = np.load(os.path.join(dataset_dir, test_config['indices'])) # Input depth image data (numpy files, not .pngs) depth_dir = os.path.join(dataset_dir, test_config['images']) # Input GT binary masks dir gt_mask_dir = os.path.join(dataset_dir, test_config['masks']) # Input binary mask data if 'bin_masks' in test_config.keys(): bin_mask_dir = os.path.join(dataset_dir, test_config['bin_masks']) # Input camera intrinsics camera_intrinsics_fn = os.path.join(dataset_dir, 'camera_intrinsics.intr') camera_intrs = CameraIntrinsics.load(camera_intrinsics_fn) image_ids = np.arange(indices_arr.size) ################################################################## # Process each image ################################################################## for image_id in tqdm(image_ids): base_name = 'image_{:06d}'.format(indices_arr[image_id]) output_name = 'image_{:06d}'.format(image_id) depth_image_fn = base_name + '.npy' # Extract depth image depth_data = np.load(os.path.join(depth_dir, depth_image_fn)) depth_im = DepthImage(depth_data, camera_intrs.frame) depth_im = depth_im.inpaint(0.25) # Mask out bin pixels if appropriate/necessary if bin_mask_dir: mask_im = BinaryImage.open( os.path.join(bin_mask_dir, base_name + '.png'), camera_intrs.frame) mask_im = mask_im.resize(depth_im.shape[:2]) depth_im = depth_im.mask_binary(mask_im) point_cloud = camera_intrs.deproject(depth_im) point_cloud.remove_zero_points() pcl_cloud = pcl.PointCloud(point_cloud.data.T.astype(np.float32)) tree = pcl_cloud.make_kdtree() if detector_type == 'euclidean': segmentor = pcl_cloud.make_EuclideanClusterExtraction() segmentor.set_ClusterTolerance(config['tolerance']) elif detector_type == 'region_growing': segmentor = pcl_cloud.make_RegionGrowing(ksearch=50) segmentor.set_NumberOfNeighbours(config['n_neighbors']) segmentor.set_CurvatureThreshold(config['curvature']) segmentor.set_SmoothnessThreshold(config['smoothness']) else: print('PCL detector type not supported') exit() segmentor.set_MinClusterSize(config['min_cluster_size']) segmentor.set_MaxClusterSize(config['max_cluster_size']) segmentor.set_SearchMethod(tree) cluster_indices = segmentor.Extract() # Set up predicted masks and metadata indiv_pred_masks = [] r_info = { 'rois': [], 'scores': [], 'class_ids': [], } for i, cluster in enumerate(cluster_indices): points = pcl_cloud.to_array()[cluster] indiv_pred_mask = camera_intrs.project_to_image( PointCloud(points.T, frame=camera_intrs.frame)).to_binary() indiv_pred_mask.data[indiv_pred_mask.data > 0] = 1 indiv_pred_masks.append(indiv_pred_mask.data) # Compute bounding box, score, class_id nonzero_pix = np.nonzero(indiv_pred_mask.data) min_x, max_x = np.min(nonzero_pix[1]), np.max(nonzero_pix[1]) min_y, max_y = np.min(nonzero_pix[0]), np.max(nonzero_pix[0]) r_info['rois'].append([min_y, min_x, max_y, max_x]) r_info['scores'].append(1.0) r_info['class_ids'].append(1) r_info['rois'] = np.array(r_info['rois']) r_info['scores'] = np.array(r_info['scores']) r_info['class_ids'] = np.array(r_info['class_ids']) # Write the predicted masks and metadata if indiv_pred_masks: pred_mask_output = np.stack(indiv_pred_masks).astype(np.uint8) else: pred_mask_output = np.array(indiv_pred_masks).astype(np.uint8) # Save out ground-truth mask as array of shape (n, h, w) indiv_gt_masks = [] gt_mask = cv2.imread(os.path.join(gt_mask_dir, base_name + '.png')) gt_mask = cv2.resize(gt_mask, (depth_im.shape[1], depth_im.shape[0])).astype( np.uint8)[:, :, 0] num_gt_masks = np.max(gt_mask) for i in range(1, num_gt_masks + 1): indiv_gt_mask = (gt_mask == i) if np.any(indiv_gt_mask): indiv_gt_masks.append(gt_mask == i) gt_mask_output = np.stack(indiv_gt_masks) np.save(os.path.join(resized_segmask_dir, output_name + '.npy'), gt_mask_output) np.save(os.path.join(pred_dir, output_name + '.npy'), pred_mask_output) np.save(os.path.join(pred_info_dir, output_name + '.npy'), r_info) print('Saved prediction masks to:\t {}'.format(pred_dir)) print('Saved prediction info (bboxes, scores, classes) to:\t {}'.format( pred_info_dir)) print('Saved transformed GT segmasks to:\t {}'.format(resized_segmask_dir)) return pred_dir, pred_info_dir, resized_segmask_dir
def create_camera_data(self, rgb: np.ndarray, depth: np.ndarray, cam_intr_frame: str, fx: float, fy: float, cx: float, cy: float, skew: float, w: int, h: int, seg_mask: np.ndarray = np.empty(shape=(0, )), bbox: tuple = ()): """Create the CameraData object in the correct format expected by gqcnn Parameters --------- req: rgb: np.ndarray: rgb image depth: np.ndarray: depth image cam_intr_frame: str: the reference frame of the images. Grasp poses are computed wrt this frame fx: float: focal length (x) fy: float: focal length (y) cx: float: principal point (x) cy: float: principal point (y) skew: float: skew coefficient w: int: width h: int: height opt: seg_mask: np.ndarray: segmentation mask bbox: tuple: a tuple of 4 values that define the mask bounding box = (x_min, y_min, x_max, y_max) Returns: CameraData: object that stores the input data required by plan_grasp() """ camera_data = CameraData() # Create images camera_data.rgb_img = ColorImage(rgb, frame=cam_intr_frame) camera_data.depth_img = DepthImage(depth, frame=cam_intr_frame) if seg_mask.size > 0: camera_data.seg_img = BinaryImage(seg_mask, cam_intr_frame) # Check image sizes if camera_data.rgb_img.height != camera_data.depth_img.height or \ camera_data.rgb_img.width != camera_data.depth_img.width: msg = ("Color image and depth image must be the same shape! Color" " is %d x %d but depth is %d x %d") % ( camera_data.rgb_img.height, camera_data.rgb_img.width, camera_data.depth_img.height, camera_data.depth_img.width) raise AssertionError(msg) if (camera_data.rgb_img.height < self.min_height or camera_data.rgb_img.width < self.min_width): msg = ("Color image is too small! Must be at least %d x %d" " resolution but the requested image is only %d x %d") % ( self.min_height, self.min_width, camera_data.rgb_img.height, camera_data.rgb_img.width) raise AssertionError(msg) if camera_data.rgb_img.height != camera_data.seg_img.height or \ camera_data.rgb_img.width != camera_data.seg_img.width: msg = ("Images and segmask must be the same shape! Color image is" " %d x %d but segmask is %d x %d") % ( camera_data.rgb_img.height, camera_data.rgb_img.width, camera_data.seg_img.height, camera_data.seg_img.width) raise AssertionError(msg) # set intrinsic params camera_data.intrinsic_params = CameraIntrinsics( cam_intr_frame, fx, fy, cx, cy, skew, h, w) # set mask bounding box if len(bbox) == 4: camera_data.bounding_box = { 'min_x': bbox[0], 'min_y': bbox[1], 'max_x': bbox[2], 'max_y': bbox[3] } return camera_data
def read_images(self, req): """Reads images from a ROS service request. Parameters --------- req: :obj:`ROS ServiceRequest` ROS ServiceRequest for grasp planner service. """ # Get the raw depth and color images as ROS `Image` objects. raw_color = req.color_image raw_depth = req.depth_image # Get the raw camera info as ROS `CameraInfo`. raw_camera_info = req.camera_info # Wrap the camera info in a BerkeleyAutomation/perception # `CameraIntrinsics` object. camera_intr = CameraIntrinsics( frame=raw_camera_info.header.frame_id, fx=raw_camera_info.K[0], fy=raw_camera_info.K[4], cx=raw_camera_info.K[2], cy=raw_camera_info.K[5], width=raw_camera_info.width, height=raw_camera_info.height, ) # Create wrapped BerkeleyAutomation/perception RGB and depth images by # unpacking the ROS images using ROS `CvBridge` try: color_im = ColorImage(self.cv_bridge.imgmsg_to_cv2( raw_color, "rgb8"), frame=camera_intr.frame) cv2_depth = self.cv_bridge.imgmsg_to_cv2( raw_depth, desired_encoding="passthrough") cv2_depth = np.array(cv2_depth, dtype=np.float32) cv2_depth *= 0.001 nan_idxs = np.isnan(cv2_depth) cv2_depth[nan_idxs] = 1000.0 depth_im = DepthImage(cv2_depth, frame=camera_intr.frame) except CvBridgeError as cv_bridge_exception: print("except CvBridgeError") rospy.logerr(cv_bridge_exception) # Check image sizes. if color_im.height != depth_im.height or color_im.width != depth_im.width: msg = ("Color image and depth image must be the same shape! Color" " is %d x %d but depth is %d x %d") % ( color_im.height, color_im.width, depth_im.height, depth_im.width) rospy.logerr(msg) raise rospy.ServiceException(msg) if (color_im.height < self.min_height or color_im.width < self.min_width): msg = ("Color image is too small! Must be at least %d x %d" " resolution but the requested image is only %d x %d") % ( self.min_height, self.min_width, color_im.height, color_im.width) rospy.logerr(msg) raise rospy.ServiceException(msg) # --- create CameraData struct --- # camera_data = CameraData() camera_data.rgb_img = color_im camera_data.depth_img = depth_im camera_data.intrinsic_params = camera_intr return camera_data
def generate_segmask_dataset(output_dataset_path, config, save_tensors=True, warm_start=False): """Generate a segmentation training dataset Parameters ---------- dataset_path : str path to store the dataset config : dict dictionary-like objects containing parameters of the simulator and visualization save_tensors : bool save tensor datasets (for recreating state) warm_start : bool restart dataset generation from a previous state """ # read subconfigs dataset_config = config["dataset"] image_config = config["images"] vis_config = config["vis"] # debugging debug = config["debug"] if debug: np.random.seed(SEED) # read general parameters num_states = config["num_states"] num_images_per_state = config["num_images_per_state"] states_per_flush = config["states_per_flush"] states_per_garbage_collect = config["states_per_garbage_collect"] # set max obj per state max_objs_per_state = config["state_space"]["heap"]["max_objs"] # read image parameters im_height = config["state_space"]["camera"]["im_height"] im_width = config["state_space"]["camera"]["im_width"] segmask_channels = max_objs_per_state + 1 # create the dataset path and all subfolders if they don't exist if not os.path.exists(output_dataset_path): os.mkdir(output_dataset_path) image_dir = os.path.join(output_dataset_path, "images") if not os.path.exists(image_dir): os.mkdir(image_dir) color_dir = os.path.join(image_dir, "color_ims") if image_config["color"] and not os.path.exists(color_dir): os.mkdir(color_dir) depth_dir = os.path.join(image_dir, "depth_ims") if image_config["depth"] and not os.path.exists(depth_dir): os.mkdir(depth_dir) amodal_dir = os.path.join(image_dir, "amodal_masks") if image_config["amodal"] and not os.path.exists(amodal_dir): os.mkdir(amodal_dir) modal_dir = os.path.join(image_dir, "modal_masks") if image_config["modal"] and not os.path.exists(modal_dir): os.mkdir(modal_dir) semantic_dir = os.path.join(image_dir, "semantic_masks") if image_config["semantic"] and not os.path.exists(semantic_dir): os.mkdir(semantic_dir) # setup logging experiment_log_filename = os.path.join(output_dataset_path, "dataset_generation.log") if os.path.exists(experiment_log_filename) and not warm_start: os.remove(experiment_log_filename) Logger.add_log_file(logger, experiment_log_filename, global_log_file=True) config.save( os.path.join(output_dataset_path, "dataset_generation_params.yaml")) metadata = {} num_prev_states = 0 # set dataset params if save_tensors: # read dataset subconfigs state_dataset_config = dataset_config["states"] image_dataset_config = dataset_config["images"] state_tensor_config = state_dataset_config["tensors"] image_tensor_config = image_dataset_config["tensors"] obj_pose_dim = POSE_DIM * max_objs_per_state obj_com_dim = POINT_DIM * max_objs_per_state state_tensor_config["fields"]["obj_poses"]["height"] = obj_pose_dim state_tensor_config["fields"]["obj_coms"]["height"] = obj_com_dim state_tensor_config["fields"]["obj_ids"]["height"] = max_objs_per_state image_tensor_config["fields"]["camera_pose"]["height"] = POSE_DIM if image_config["color"]: image_tensor_config["fields"]["color_im"] = { "dtype": "uint8", "channels": 3, "height": im_height, "width": im_width, } if image_config["depth"]: image_tensor_config["fields"]["depth_im"] = { "dtype": "float32", "channels": 1, "height": im_height, "width": im_width, } if image_config["modal"]: image_tensor_config["fields"]["modal_segmasks"] = { "dtype": "uint8", "channels": segmask_channels, "height": im_height, "width": im_width, } if image_config["amodal"]: image_tensor_config["fields"]["amodal_segmasks"] = { "dtype": "uint8", "channels": segmask_channels, "height": im_height, "width": im_width, } if image_config["semantic"]: image_tensor_config["fields"]["semantic_segmasks"] = { "dtype": "uint8", "channels": 1, "height": im_height, "width": im_width, } # create dataset filenames state_dataset_path = os.path.join(output_dataset_path, "state_tensors") image_dataset_path = os.path.join(output_dataset_path, "image_tensors") if warm_start: if not os.path.exists(state_dataset_path) or not os.path.exists( image_dataset_path): logger.error( "Attempting to warm start without saved tensor dataset") exit(1) # open datasets logger.info("Opening state dataset") state_dataset = TensorDataset.open(state_dataset_path, access_mode="READ_WRITE") logger.info("Opening image dataset") image_dataset = TensorDataset.open(image_dataset_path, access_mode="READ_WRITE") # read configs state_tensor_config = state_dataset.config image_tensor_config = image_dataset.config # clean up datasets (there may be datapoints with indices corresponding to non-existent data) num_state_datapoints = state_dataset.num_datapoints num_image_datapoints = image_dataset.num_datapoints num_prev_states = num_state_datapoints # clean up images image_ind = num_image_datapoints - 1 image_datapoint = image_dataset[image_ind] while (image_ind > 0 and image_datapoint["state_ind"] >= num_state_datapoints): image_ind -= 1 image_datapoint = image_dataset[image_ind] images_to_remove = num_image_datapoints - 1 - image_ind logger.info("Deleting last %d image tensors" % (images_to_remove)) if images_to_remove > 0: image_dataset.delete_last(images_to_remove) num_image_datapoints = image_dataset.num_datapoints else: # create datasets from scratch logger.info("Creating datasets") state_dataset = TensorDataset(state_dataset_path, state_tensor_config) image_dataset = TensorDataset(image_dataset_path, image_tensor_config) # read templates state_datapoint = state_dataset.datapoint_template image_datapoint = image_dataset.datapoint_template if warm_start: if not os.path.exists( os.path.join(output_dataset_path, "metadata.json")): logger.error( "Attempting to warm start without previously created dataset") exit(1) # Read metadata and indices metadata = json.load( open(os.path.join(output_dataset_path, "metadata.json"), "r")) test_inds = np.load(os.path.join(image_dir, "test_indices.npy")).tolist() train_inds = np.load(os.path.join(image_dir, "train_indices.npy")).tolist() # set obj ids and splits reverse_obj_ids = metadata["obj_ids"] obj_id_map = utils.reverse_dictionary(reverse_obj_ids) obj_splits = metadata["obj_splits"] obj_keys = obj_splits.keys() mesh_filenames = metadata["meshes"] # Get list of images generated so far generated_images = (sorted(os.listdir(color_dir)) if image_config["color"] else sorted( os.listdir(depth_dir))) num_total_images = len(generated_images) # Do our own calculation if no saved tensors if num_prev_states == 0: num_prev_states = num_total_images // num_images_per_state # Find images to remove and remove them from all relevant places if they exist num_images_to_remove = num_total_images - (num_prev_states * num_images_per_state) logger.info( "Deleting last {} invalid images".format(num_images_to_remove)) for k in range(num_images_to_remove): im_name = generated_images[-(k + 1)] im_basename = os.path.splitext(im_name)[0] im_ind = int(im_basename.split("_")[1]) if os.path.exists(os.path.join(depth_dir, im_name)): os.remove(os.path.join(depth_dir, im_name)) if os.path.exists(os.path.join(color_dir, im_name)): os.remove(os.path.join(color_dir, im_name)) if os.path.exists(os.path.join(semantic_dir, im_name)): os.remove(os.path.join(semantic_dir, im_name)) if os.path.exists(os.path.join(modal_dir, im_basename)): shutil.rmtree(os.path.join(modal_dir, im_basename)) if os.path.exists(os.path.join(amodal_dir, im_basename)): shutil.rmtree(os.path.join(amodal_dir, im_basename)) if im_ind in train_inds: train_inds.remove(im_ind) elif im_ind in test_inds: test_inds.remove(im_ind) else: # Create initial env to generate metadata env = BinHeapEnv(config) obj_id_map = env.state_space.obj_id_map obj_keys = env.state_space.obj_keys obj_splits = env.state_space.obj_splits mesh_filenames = env.state_space.mesh_filenames save_obj_id_map = obj_id_map.copy() save_obj_id_map[ENVIRONMENT_KEY] = np.iinfo(np.uint32).max reverse_obj_ids = utils.reverse_dictionary(save_obj_id_map) metadata["obj_ids"] = reverse_obj_ids metadata["obj_splits"] = obj_splits metadata["meshes"] = mesh_filenames json.dump( metadata, open(os.path.join(output_dataset_path, "metadata.json"), "w"), indent=JSON_INDENT, sort_keys=True, ) train_inds = [] test_inds = [] # generate states and images state_id = num_prev_states while state_id < num_states: # create env and set objects create_start = time.time() env = BinHeapEnv(config) env.state_space.obj_id_map = obj_id_map env.state_space.obj_keys = obj_keys env.state_space.set_splits(obj_splits) env.state_space.mesh_filenames = mesh_filenames create_stop = time.time() logger.info("Creating env took %.3f sec" % (create_stop - create_start)) # sample states states_remaining = num_states - state_id for i in range(min(states_per_garbage_collect, states_remaining)): # log current rollout if state_id % config["log_rate"] == 0: logger.info("State: %06d" % (state_id)) try: # reset env env.reset() state = env.state split = state.metadata["split"] # render state if vis_config["state"]: env.view_3d_scene() # Save state if desired if save_tensors: # set obj state variables obj_pose_vec = np.zeros(obj_pose_dim) obj_com_vec = np.zeros(obj_com_dim) obj_id_vec = np.iinfo( np.uint32).max * np.ones(max_objs_per_state) j = 0 for obj_state in state.obj_states: obj_pose_vec[j * POSE_DIM:(j + 1) * POSE_DIM] = obj_state.pose.vec obj_com_vec[j * POINT_DIM:(j + 1) * POINT_DIM] = obj_state.center_of_mass obj_id_vec[j] = int(obj_id_map[obj_state.key]) j += 1 # store datapoint env params state_datapoint["state_id"] = state_id state_datapoint["obj_poses"] = obj_pose_vec state_datapoint["obj_coms"] = obj_com_vec state_datapoint["obj_ids"] = obj_id_vec state_datapoint["split"] = split # store state datapoint image_start_ind = image_dataset.num_datapoints image_end_ind = image_start_ind + num_images_per_state state_datapoint["image_start_ind"] = image_start_ind state_datapoint["image_end_ind"] = image_end_ind # clean up del obj_pose_vec del obj_com_vec del obj_id_vec # add state state_dataset.add(state_datapoint) # render images for k in range(num_images_per_state): # reset the camera if num_images_per_state > 1: env.reset_camera() obs = env.render_camera_image(color=image_config["color"]) if image_config["color"]: color_obs, depth_obs = obs else: depth_obs = obs # vis obs if vis_config["obs"]: if image_config["depth"]: plt.figure() plt.imshow(depth_obs) plt.title("Depth Observation") if image_config["color"]: plt.figure() plt.imshow(color_obs) plt.title("Color Observation") plt.show() if (image_config["modal"] or image_config["amodal"] or image_config["semantic"]): # render segmasks ( amodal_segmasks, modal_segmasks, ) = env.render_segmentation_images() # retrieve segmask data modal_segmask_arr = np.iinfo(np.uint8).max * np.ones( [im_height, im_width, segmask_channels], dtype=np.uint8, ) amodal_segmask_arr = np.iinfo(np.uint8).max * np.ones( [im_height, im_width, segmask_channels], dtype=np.uint8, ) stacked_segmask_arr = np.zeros( [im_height, im_width, 1], dtype=np.uint8) modal_segmask_arr[:, :, :env. num_objects] = modal_segmasks amodal_segmask_arr[:, :, :env. num_objects] = amodal_segmasks if image_config["semantic"]: for j in range(env.num_objects): this_obj_px = np.where( modal_segmasks[:, :, j] > 0) stacked_segmask_arr[this_obj_px[0], this_obj_px[1], 0] = (j + 1) # visualize if vis_config["semantic"]: plt.figure() plt.imshow(stacked_segmask_arr.squeeze()) plt.show() if save_tensors: # save image data as tensors if image_config["color"]: image_datapoint["color_im"] = color_obs if image_config["depth"]: image_datapoint["depth_im"] = depth_obs[:, :, None] if image_config["modal"]: image_datapoint[ "modal_segmasks"] = modal_segmask_arr if image_config["amodal"]: image_datapoint[ "amodal_segmasks"] = amodal_segmask_arr if image_config["semantic"]: image_datapoint[ "semantic_segmasks"] = stacked_segmask_arr image_datapoint["camera_pose"] = env.camera.pose.vec image_datapoint[ "camera_intrs"] = env.camera.intrinsics.vec image_datapoint["state_ind"] = state_id image_datapoint["split"] = split # add image image_dataset.add(image_datapoint) # Save depth image and semantic masks if image_config["color"]: ColorImage(color_obs).save( os.path.join( color_dir, "image_{:06d}.png".format( num_images_per_state * state_id + k), )) if image_config["depth"]: DepthImage(depth_obs).save( os.path.join( depth_dir, "image_{:06d}.png".format( num_images_per_state * state_id + k), )) if image_config["modal"]: modal_id_dir = os.path.join( modal_dir, "image_{:06d}".format(num_images_per_state * state_id + k), ) if not os.path.exists(modal_id_dir): os.mkdir(modal_id_dir) for i in range(env.num_objects): BinaryImage(modal_segmask_arr[:, :, i]).save( os.path.join( modal_id_dir, "channel_{:03d}.png".format(i), )) if image_config["amodal"]: amodal_id_dir = os.path.join( amodal_dir, "image_{:06d}".format(num_images_per_state * state_id + k), ) if not os.path.exists(amodal_id_dir): os.mkdir(amodal_id_dir) for i in range(env.num_objects): BinaryImage(amodal_segmask_arr[:, :, i]).save( os.path.join( amodal_id_dir, "channel_{:03d}.png".format(i), )) if image_config["semantic"]: GrayscaleImage(stacked_segmask_arr.squeeze()).save( os.path.join( semantic_dir, "image_{:06d}.png".format( num_images_per_state * state_id + k), )) # Save split if split == TRAIN_ID: train_inds.append(num_images_per_state * state_id + k) else: test_inds.append(num_images_per_state * state_id + k) # auto-flush after every so many timesteps if state_id % states_per_flush == 0: np.save( os.path.join(image_dir, "train_indices.npy"), train_inds, ) np.save(os.path.join(image_dir, "test_indices.npy"), test_inds) if save_tensors: state_dataset.flush() image_dataset.flush() # delete action objects for obj_state in state.obj_states: del obj_state del state gc.collect() # update state id state_id += 1 except Exception as e: # log an error logger.warning("Heap failed!") logger.warning("%s" % (str(e))) logger.warning(traceback.print_exc()) if debug: raise del env gc.collect() env = BinHeapEnv(config) env.state_space.obj_id_map = obj_id_map env.state_space.obj_keys = obj_keys env.state_space.set_splits(obj_splits) env.state_space.mesh_filenames = mesh_filenames # garbage collect del env gc.collect() # write all datasets to file, save indices np.save(os.path.join(image_dir, "train_indices.npy"), train_inds) np.save(os.path.join(image_dir, "test_indices.npy"), test_inds) if save_tensors: state_dataset.flush() image_dataset.flush() logger.info("Generated %d image datapoints" % (state_id * num_images_per_state))
out_name, mean, np.sqrt(var))) n, bins, patches = plt.hist(vec, vec.size // 10, facecolor="blue") plt.xlabel("depth value") plt.ylabel("count") plt.title("depth within region") plt.grid(True) plt.show() plt.savefig(os.path.join(out_path, "graph_" + out_name), bbox_inches="tight") plt.close() depth_img = DepthImage(img_slice) depth_img.save(os.path.join(out_path, out_name)) if __name__ == "__main__": for img_name in images: img_path = os.path.join(base_path, img_name) img = np.load(img_path) img = np.reshape(img, img.shape[:2]) DepthImage(img).save(os.path.join(base_path, "depth_0.png")) for img_name, boxes in zip(images, boxes_list): img_path = os.path.join(base_path, img_name) for box in boxes: out_name = "img_{}_box_{}_{}_{}_{}.png".format( img_name, box[0], box[1], box[2], box[3]) analyze_image_depths(img_path, box, out_name)
def test_virtual(self, height=100, width=100): # Generate folder of color and depth images if not os.path.exists(IM_FILEROOT): os.makedirs(IM_FILEROOT) cam_intr = CameraIntrinsics( "a", fx=0.0, fy=0.0, cx=0.0, cy=0.0, skew=0.0, height=100, width=100, ) cam_intr.save(os.path.join(IM_FILEROOT, "a.intr")) color_data = (255 * np.random.rand(10, height, width, 3)).astype( np.uint8) depth_data = np.random.rand(10, height, width).astype(np.float32) for i in range(10): im = ColorImage(color_data[i], frame="a") im.save(os.path.join(IM_FILEROOT, "color_{:d}.png".format(i))) im = DepthImage(depth_data[i], frame="a") im.save(os.path.join(IM_FILEROOT, "depth_{:d}.npy".format(i))) # Create virtual camera virtual_cam = RgbdSensorFactory.sensor("virtual", cfg={ "image_dir": IM_FILEROOT, "frame": "a" }) self.assertTrue( virtual_cam.path_to_images == IM_FILEROOT, msg="img path changed after init", ) # Start virtual camera and read frames virtual_cam.start() self.assertTrue(virtual_cam.is_running, msg="camera not running after start") for i in range(10): color, depth = virtual_cam.frames() self.assertTrue( np.all(color.data == color_data[i]), msg="color data for img {:d} changed".format(i), ) self.assertTrue( color.frame == virtual_cam.frame, msg="frame mismatch between color im and camera", ) self.assertTrue( np.all(depth.data == depth_data[i]), msg="depth data for img {:d} changed".format(i), ) self.assertTrue( depth.frame == virtual_cam.frame, msg="frame mismatch between depth im and camera", ) # Make sure camera is stopped virtual_cam.stop() self.assertFalse(virtual_cam.is_running, msg="camera running after stop") # Cleanup images for i in range(10): os.remove(os.path.join(IM_FILEROOT, "color_{:d}.png".format(i))) os.remove(os.path.join(IM_FILEROOT, "depth_{:d}.npy".format(i))) os.remove(os.path.join(IM_FILEROOT, "a.intr")) os.rmdir(IM_FILEROOT)
def wrapped_images(self, mesh, object_to_camera_poses, render_mode, stable_pose=None, mat_props=None, light_props=None, debug=False): """Create ObjectRender objects of the given mesh at the list of object to camera poses. Parameters ---------- mesh : :obj:`Mesh3D` The mesh to be rendered. object_to_camera_poses : :obj:`list` of :obj:`RigidTransform` A list of object to camera transforms to render from. render_mode : int One of RenderMode.COLOR, RenderMode.DEPTH, or RenderMode.SCALED_DEPTH. stable_pose : :obj:`StablePose` A stable pose to render the object in. mat_props : :obj:`MaterialProperties` Material properties for the mesh light_props : :obj:`MaterialProperties` Lighting properties for the scene debug : bool Whether or not to debug the C++ meshrendering code. Returns ------- :obj:`list` of :obj:`ObjectRender` A list of ObjectRender objects generated from the given parameters. """ # pre-multiply the stable pose world_to_camera_poses = [ T_obj_camera.as_frames('obj', 'camera') for T_obj_camera in object_to_camera_poses ] if stable_pose is not None: t_obj_stp = np.array([0, 0, -stable_pose.r.dot(stable_pose.x0)[2]]) T_obj_stp = RigidTransform(rotation=stable_pose.r, translation=t_obj_stp, from_frame='obj', to_frame='stp') stp_to_camera_poses = copy.copy(object_to_camera_poses) object_to_camera_poses = [] for T_stp_camera in stp_to_camera_poses: T_stp_camera.from_frame = 'stp' object_to_camera_poses.append(T_stp_camera.dot(T_obj_stp)) # set lighting mode enable_lighting = True if render_mode == RenderMode.SEGMASK or render_mode == RenderMode.DEPTH or \ render_mode == RenderMode.DEPTH_SCENE: enable_lighting = False # render both image types (doesn't really cost any time) color_ims, depth_ims = self.images(mesh, object_to_camera_poses, mat_props=mat_props, light_props=light_props, enable_lighting=enable_lighting, debug=debug) # convert to image wrapper classes images = [] if render_mode == RenderMode.SEGMASK: # wrap binary images for binary_im in color_ims: images.append( BinaryImage(binary_im[:, :, 0], frame=self._camera_intr.frame, threshold=0)) elif render_mode == RenderMode.COLOR: # wrap color images for color_im in color_ims: images.append( ColorImage(color_im, frame=self._camera_intr.frame)) elif render_mode == RenderMode.COLOR_SCENE: # wrap color and depth images for color_im in color_ims: images.append( ColorImage(color_im, frame=self._camera_intr.frame)) # render images of scene objects color_scene_ims = {} for name, scene_obj in self._scene.items(): scene_object_to_camera_poses = [] for world_to_camera_pose in world_to_camera_poses: scene_object_to_camera_poses.append(world_to_camera_pose * scene_obj.T_mesh_world) color_scene_ims[name] = self.wrapped_images( scene_obj.mesh, scene_object_to_camera_poses, RenderMode.COLOR, mat_props=scene_obj.mat_props, light_props=light_props, debug=debug) # combine with scene images # TODO: re-add farther for i in range(len(images)): for j, name in enumerate(color_scene_ims.keys()): zero_px = images[i].zero_pixels() images[i].data[zero_px[:, 0], zero_px[:, 1], :] = color_scene_ims[name][ i].image.data[zero_px[:, 0], zero_px[:, 1], :] elif render_mode == RenderMode.DEPTH: # render depth image for depth_im in depth_ims: images.append( DepthImage(depth_im, frame=self._camera_intr.frame)) elif render_mode == RenderMode.DEPTH_SCENE: # create empty depth images for depth_im in depth_ims: images.append( DepthImage(depth_im, frame=self._camera_intr.frame)) # render images of scene objects depth_scene_ims = {} for name, scene_obj in self._scene.items(): scene_object_to_camera_poses = [] for world_to_camera_pose in world_to_camera_poses: scene_object_to_camera_poses.append(world_to_camera_pose * scene_obj.T_mesh_world) depth_scene_ims[name] = self.wrapped_images( scene_obj.mesh, scene_object_to_camera_poses, RenderMode.DEPTH, mat_props=scene_obj.mat_props, light_props=light_props) # combine with scene images for i in range(len(images)): for j, name in enumerate(depth_scene_ims.keys()): images[i] = images[i].combine_with( depth_scene_ims[name][i].image) elif render_mode == RenderMode.RGBD: # create RGB-D images for color_im, depth_im in zip(color_ims, depth_ims): c = ColorImage(color_im, frame=self._camera_intr.frame) d = DepthImage(depth_im, frame=self._camera_intr.frame) images.append(RgbdImage.from_color_and_depth(c, d)) elif render_mode == RenderMode.RGBD_SCENE: # create RGB-D images for color_im, depth_im in zip(color_ims, depth_ims): c = ColorImage(color_im, frame=self._camera_intr.frame) d = DepthImage(depth_im, frame=self._camera_intr.frame) images.append(RgbdImage.from_color_and_depth(c, d)) # render images of scene objects rgbd_scene_ims = {} for name, scene_obj in self._scene.items(): scene_object_to_camera_poses = [] for world_to_camera_pose in world_to_camera_poses: scene_object_to_camera_poses.append(world_to_camera_pose * scene_obj.T_mesh_world) rgbd_scene_ims[name] = self.wrapped_images( scene_obj.mesh, scene_object_to_camera_poses, RenderMode.RGBD, mat_props=scene_obj.mat_props, light_props=light_props, debug=debug) # combine with scene images for i in range(len(images)): for j, name in enumerate(rgbd_scene_ims.keys()): images[i] = images[i].combine_with( rgbd_scene_ims[name][i].image) elif render_mode == RenderMode.SCALED_DEPTH: # convert to color image for depth_im in depth_ims: d = DepthImage(depth_im, frame=self._camera_intr.frame) images.append(d.to_color()) else: raise ValueError('Render mode %s not supported' % (render_mode)) # create object renders if stable_pose is not None: object_to_camera_poses = copy.copy(stp_to_camera_poses) rendered_images = [] for image, T_obj_camera in zip(images, object_to_camera_poses): T_camera_obj = T_obj_camera.inverse() rendered_images.append(ObjectRender(image, T_camera_obj)) return rendered_images