def __init__(self, dataset_path, frame=None, loop=True): self._dataset_path = dataset_path self._frame = frame self._color_frame = frame self._ir_frame = frame self._im_index = 0 self._running = False from autolab_core import TensorDataset self._dataset = TensorDataset.open(self._dataset_path) self._num_images = self._dataset.num_datapoints self._image_rescale_factor = 1.0 if "image_rescale_factor" in self._dataset.metadata.keys(): self._image_rescale_factor = ( 1.0 / self._dataset.metadata["image_rescale_factor"]) datapoint = self._dataset.datapoint( 0, [TensorDatasetVirtualSensor.CAMERA_INTR_FIELD]) camera_intr_vec = datapoint[ TensorDatasetVirtualSensor.CAMERA_INTR_FIELD] self._color_intr = CameraIntrinsics.from_vec( camera_intr_vec, frame=self._color_frame).resize(self._image_rescale_factor) self._ir_intr = CameraIntrinsics.from_vec( camera_intr_vec, frame=self._ir_frame).resize(self._image_rescale_factor)
def __init__(self, frame="webcam", device_id=0): """Initialize a Logitech webcam sensor. Parameters ---------- frame : str A name for the frame in which RGB images are returned. device_id : int The device ID for the webcam (by default, zero). """ self._frame = frame self._camera_intr = None self._device_id = device_id self._cap = None self._running = False self._adjust_exposure = True # Set up camera intrinsics for the sensor width, height = 1280, 960 focal_x, focal_y = 1430.0, 1420.0 center_x, center_y = 1280 / 2, 960 / 2 self._camera_intr = CameraIntrinsics( self._frame, focal_x, focal_y, center_x, center_y, height=height, width=width, )
def _set_camera_properties(self, msg): """Set the camera intrinsics from an info msg.""" focal_x = msg.K[0] focal_y = msg.K[4] center_x = msg.K[2] center_y = msg.K[5] im_height = msg.height im_width = msg.width self._camera_intr = CameraIntrinsics( self._frame, focal_x, focal_y, center_x, center_y, height=im_height, width=im_width, )
def __init__(self, frame="phoxi", device_name="2018-02-020-LC3", size="small"): """Initialize a PhoXi Sensor. Parameters ---------- frame : str A name for the frame in which depth images, normal maps, and RGB images are returned. device_name : str The string name of the PhoXi device (SN listed on sticker on back sensor). Old PhoXi: 1703005 New PhoXi: 2018-02-020-LC3 size : str An indicator for which size of image is desired. Either 'large' (2064x1544) or 'small' (1032x772). """ self._frame = frame self._device_name = str(device_name) self._camera_intr = None self._running = False self._bridge = CvBridge() self._cur_color_im = None self._cur_depth_im = None self._cur_normal_map = None # Set up camera intrinsics for the sensor width, height = 2064, 1544 focal_x, focal_y = 2244.0, 2244.0 center_x, center_y = 1023.0, 768.0 if size == "small": width = 1032 height = 772 focal_x = focal_x / 2 focal_y = focal_y / 2 center_x = center_x / 2 center_y = center_y / 2 if str(device_name) == "1703005": focal_x = focal_y = 1105.0 self._camera_intr = CameraIntrinsics( self._frame, focal_x, focal_y, center_x, center_y, height=height, width=width, )
def color_intrinsics(self): """:obj:`CameraIntrinsics` : RealSense color camera intrinsics.""" return CameraIntrinsics( self._frame, self._intrinsics[0, 0], self._intrinsics[1, 1], self._intrinsics[0, 2], self._intrinsics[1, 2], height=RealSenseSensor.COLOR_IM_HEIGHT, width=RealSenseSensor.COLOR_IM_WIDTH, )
def color_intrinsics(self): """:obj:`CameraIntrinsics` : Color camera intrinsics of Kinect.""" if self._device is None: raise RuntimeError( "Kinect2 device not runnning. Cannot return color intrinsics") camera_params = self._device.getColorCameraParams() return CameraIntrinsics( self._color_frame, camera_params.fx, camera_params.fy, camera_params.cx, camera_params.cy, )
def ir_intrinsics(self): """:obj:`CameraIntrinsics` : IR camera intrinsics for the Kinect.""" if self._device is None: raise RuntimeError( "Kinect2 device not runnning. Cannot return IR intrinsics") camera_params = self._device.getIrCameraParams() return CameraIntrinsics( self._ir_frame, camera_params.fx, camera_params.fy, camera_params.cx, camera_params.cy, height=Kinect2Sensor.DEPTH_IM_HEIGHT, width=Kinect2Sensor.DEPTH_IM_WIDTH, )
def sample(self, size=1): """Sample random variables from the model. Parameters ---------- size : int number of sample to take Returns ------- :obj:`list` of :obj:`CameraSample` sampled camera intrinsics and poses """ samples = [] for i in range(size): # sample camera params focal = self.focal_rv.rvs(size=1)[0] cx = self.cx_rv.rvs(size=1)[0] cy = self.cy_rv.rvs(size=1)[0] # sample viewsphere params radius = self.rad_rv.rvs(size=1)[0] elev = self.elev_rv.rvs(size=1)[0] az = self.az_rv.rvs(size=1)[0] roll = self.roll_rv.rvs(size=1)[0] # sample plane translation tx = self.tx_rv.rvs(size=1)[0] ty = self.ty_rv.rvs(size=1)[0] # convert to pose and intrinsics pose = self.camera_to_world_pose(radius, elev, az, roll, tx, ty) intrinsics = CameraIntrinsics( self.frame, fx=focal, fy=focal, cx=cx, cy=cy, skew=0.0, height=self.im_height, width=self.im_width, ) # convert to camera pose samples.append((pose, intrinsics)) # not a list if only 1 sample if size == 1: return samples[0] return samples
def read_images(self, req): """Reads images from a ROS service request. Parameters --------- req: :obj:`ROS ServiceRequest` ROS ServiceRequest for grasp planner service. """ # Get the raw depth and color images as ROS `Image` objects. raw_color = req.color_image raw_depth = req.depth_image # Get the raw camera info as ROS `CameraInfo`. raw_camera_info = req.camera_info # Wrap the camera info in a BerkeleyAutomation/perception # `CameraIntrinsics` object. camera_intr = CameraIntrinsics( frame=raw_camera_info.header.frame_id, fx=raw_camera_info.K[0], fy=raw_camera_info.K[4], cx=raw_camera_info.K[2], cy=raw_camera_info.K[5], width=raw_camera_info.width, height=raw_camera_info.height, ) # Create wrapped BerkeleyAutomation/perception RGB and depth images by # unpacking the ROS images using ROS `CvBridge` try: color_im = ColorImage(self.cv_bridge.imgmsg_to_cv2( raw_color, "rgb8"), frame=camera_intr.frame) cv2_depth = self.cv_bridge.imgmsg_to_cv2( raw_depth, desired_encoding="passthrough") cv2_depth = np.array(cv2_depth, dtype=np.float32) cv2_depth *= 0.001 nan_idxs = np.isnan(cv2_depth) cv2_depth[nan_idxs] = 1000.0 depth_im = DepthImage(cv2_depth, frame=camera_intr.frame) except CvBridgeError as cv_bridge_exception: print("except CvBridgeError") rospy.logerr(cv_bridge_exception) # Check image sizes. if color_im.height != depth_im.height or color_im.width != depth_im.width: msg = ("Color image and depth image must be the same shape! Color" " is %d x %d but depth is %d x %d") % ( color_im.height, color_im.width, depth_im.height, depth_im.width) rospy.logerr(msg) raise rospy.ServiceException(msg) if (color_im.height < self.min_height or color_im.width < self.min_width): msg = ("Color image is too small! Must be at least %d x %d" " resolution but the requested image is only %d x %d") % ( self.min_height, self.min_width, color_im.height, color_im.width) rospy.logerr(msg) raise rospy.ServiceException(msg) # --- create CameraData struct --- # camera_data = CameraData() camera_data.rgb_img = color_im camera_data.depth_img = depth_im camera_data.intrinsic_params = camera_intr return camera_data
def test_virtual(self, height=100, width=100): # Generate folder of color and depth images if not os.path.exists(IM_FILEROOT): os.makedirs(IM_FILEROOT) cam_intr = CameraIntrinsics( "a", fx=0.0, fy=0.0, cx=0.0, cy=0.0, skew=0.0, height=100, width=100, ) cam_intr.save(os.path.join(IM_FILEROOT, "a.intr")) color_data = (255 * np.random.rand(10, height, width, 3)).astype( np.uint8) depth_data = np.random.rand(10, height, width).astype(np.float32) for i in range(10): im = ColorImage(color_data[i], frame="a") im.save(os.path.join(IM_FILEROOT, "color_{:d}.png".format(i))) im = DepthImage(depth_data[i], frame="a") im.save(os.path.join(IM_FILEROOT, "depth_{:d}.npy".format(i))) # Create virtual camera virtual_cam = RgbdSensorFactory.sensor("virtual", cfg={ "image_dir": IM_FILEROOT, "frame": "a" }) self.assertTrue( virtual_cam.path_to_images == IM_FILEROOT, msg="img path changed after init", ) # Start virtual camera and read frames virtual_cam.start() self.assertTrue(virtual_cam.is_running, msg="camera not running after start") for i in range(10): color, depth = virtual_cam.frames() self.assertTrue( np.all(color.data == color_data[i]), msg="color data for img {:d} changed".format(i), ) self.assertTrue( color.frame == virtual_cam.frame, msg="frame mismatch between color im and camera", ) self.assertTrue( np.all(depth.data == depth_data[i]), msg="depth data for img {:d} changed".format(i), ) self.assertTrue( depth.frame == virtual_cam.frame, msg="frame mismatch between depth im and camera", ) # Make sure camera is stopped virtual_cam.stop() self.assertFalse(virtual_cam.is_running, msg="camera running after stop") # Cleanup images for i in range(10): os.remove(os.path.join(IM_FILEROOT, "color_{:d}.png".format(i))) os.remove(os.path.join(IM_FILEROOT, "depth_{:d}.npy".format(i))) os.remove(os.path.join(IM_FILEROOT, "a.intr")) os.rmdir(IM_FILEROOT)
def __init__(self, path_to_images, frame=None, loop=True): """Create a new virtualized Primesense sensor. This requires a directory containing a specific set of files. First, the directory must contain a set of images, where each image has three files: - color_{#}.png - depth_{#}.npy - ir_{#}.npy In these, the {#} is replaced with the integer index for the image. These indices should start at zero and increase consecutively. Second, the directory must contain CameraIntrinsics files for the color and ir cameras: - {frame}_color.intr - {frame}_ir.intr In these, the {frame} is replaced with the reference frame name that is passed as a parameter to this function. Parameters ---------- path_to_images : :obj:`str` The path to a directory containing images that the virtualized sensor will return. frame : :obj:`str` The name of the frame of reference in which the sensor resides. If None, this will be discovered from the files in the directory. loop : :obj:`str` Whether or not to loop back to the first image after running out """ self._running = False self._path_to_images = path_to_images self._im_index = 0 self._num_images = 0 self._frame = frame self._loop = loop filenames = os.listdir(self._path_to_images) # get number of images for filename in filenames: if filename.find("depth") != -1 and filename.endswith(".npy"): self._num_images += 1 # set the frame dynamically self._color_ext = ".png" for filename in filenames: file_root, file_ext = os.path.splitext(filename) color_ind = file_root.rfind("color") if (file_ext in VirtualSensor.SUPPORTED_FILE_EXTS and color_ind != -1): self._color_ext = file_ext if (self._frame is None and file_ext == INTR_EXTENSION and color_ind != -1): self._frame = file_root[:color_ind - 1] self._color_frame = file_root self._ir_frame = file_root # load color intrinsics color_intr_filename = os.path.join(self._path_to_images, "%s_color.intr" % (self._frame)) ir_intr_filename = os.path.join(self._path_to_images, "%s_ir.intr" % (self._frame)) generic_intr_filename = os.path.join(self._path_to_images, "%s.intr" % (self._frame)) if os.path.exists(color_intr_filename): self._color_intr = CameraIntrinsics.load(color_intr_filename) else: self._color_intr = CameraIntrinsics.load(generic_intr_filename) if os.path.exists(ir_intr_filename): self._ir_intr = CameraIntrinsics.load(ir_intr_filename) else: self._ir_intr = CameraIntrinsics.load(generic_intr_filename)
class EnsensoSensor(CameraSensor): """Class for interfacing with an Ensenso N* sensor.""" def __init__(self, frame="ensenso"): # set member vars self._frame = frame self._initialized = False self._format = None self._camera_intr = None self._cur_depth_im = None self._running = False def __del__(self): """Automatically stop the sensor for safety.""" if self.is_running: self.stop() def _set_format(self, msg): """Set the buffer formatting.""" num_points = msg.height * msg.width self._format = "<" + num_points * "ffff" def _set_camera_properties(self, msg): """Set the camera intrinsics from an info msg.""" focal_x = msg.K[0] focal_y = msg.K[4] center_x = msg.K[2] center_y = msg.K[5] im_height = msg.height im_width = msg.width self._camera_intr = CameraIntrinsics( self._frame, focal_x, focal_y, center_x, center_y, height=im_height, width=im_width, ) def _depth_im_from_pointcloud(self, msg): """Convert a pointcloud2 message to a depth image.""" # set format if self._format is None: self._set_format(msg) # rescale camera intr in case binning is turned on if msg.height != self._camera_intr.height: rescale_factor = float(msg.height) / self._camera_intr.height self._camera_intr = self._camera_intr.resize(rescale_factor) # read num points num_points = msg.height * msg.width # read buffer raw_tup = struct.Struct(self._format).unpack_from(msg.data, 0) raw_arr = np.array(raw_tup) # subsample depth values and reshape depth_ind = 2 + 4 * np.arange(num_points) depth_buf = raw_arr[depth_ind] depth_arr = depth_buf.reshape(msg.height, msg.width) depth_im = DepthImage(depth_arr, frame=self._frame) return depth_im def _pointcloud_callback(self, msg): """Callback for handling point clouds.""" self._cur_depth_im = self._depth_im_from_pointcloud(msg) def _camera_info_callback(self, msg): """Callback for reading camera info.""" self._camera_info_sub.unregister() self._set_camera_properties(msg) @property def ir_intrinsics(self): """:obj:`CameraIntrinsics` : IR Camera intrinsics for Ensenso.""" return self._camera_intr @property def is_running(self): """bool : True if the stream is running, or false otherwise.""" return self._running @property def frame(self): """:obj:`str` : The reference frame of the sensor.""" return self._frame def start(self): """Start the sensor""" # initialize subscribers self._pointcloud_sub = rospy.Subscriber( "/%s/depth/points" % (self.frame), PointCloud2, self._pointcloud_callback, ) self._camera_info_sub = rospy.Subscriber( "/%s/left/camera_info" % (self.frame), CameraInfo, self._camera_info_callback, ) while self._camera_intr is None: time.sleep(0.1) self._running = True def stop(self): """Stop the sensor""" # check that everything is running if not self._running: logging.warning("Ensenso not running. Aborting stop") return False # stop subs self._pointcloud_sub.unregister() self._camera_info_sub.unregister() self._running = False return True def frames(self): """Retrieve a new frame from the Ensenso and convert it to a ColorImage and a DepthImage. Returns ------- :obj:`tuple` of :obj:`ColorImage`, :obj:`DepthImage` The ColorImage and DepthImage of the current frame. Raises ------ RuntimeError If the Ensenso stream is not running. """ # wait for a new image while self._cur_depth_im is None: time.sleep(0.01) # read next image depth_im = self._cur_depth_im color_im = ColorImage( np.zeros([depth_im.height, depth_im.width, 3]).astype(np.uint8), frame=self._frame, ) self._cur_depth_im = None return color_im, depth_im, None def median_depth_img(self, num_img=1, fill_depth=0.0): """Collect a series of depth images and return the median of the set. Parameters ---------- num_img : int The number of consecutive frames to process. Returns ------- :obj:`DepthImage` The median DepthImage collected from the frames. """ depths = [] for _ in range(num_img): _, depth, _ = self.frames() depths.append(depth) median_depth = Image.median_images(depths) median_depth.data[median_depth.data == 0.0] = fill_depth return median_depth
def create_camera_data(self, rgb: np.ndarray, depth: np.ndarray, cam_intr_frame: str, fx: float, fy: float, cx: float, cy: float, skew: float, w: int, h: int, seg_mask: np.ndarray = np.empty(shape=(0, )), bbox: tuple = ()): """Create the CameraData object in the correct format expected by gqcnn Parameters --------- req: rgb: np.ndarray: rgb image depth: np.ndarray: depth image cam_intr_frame: str: the reference frame of the images. Grasp poses are computed wrt this frame fx: float: focal length (x) fy: float: focal length (y) cx: float: principal point (x) cy: float: principal point (y) skew: float: skew coefficient w: int: width h: int: height opt: seg_mask: np.ndarray: segmentation mask bbox: tuple: a tuple of 4 values that define the mask bounding box = (x_min, y_min, x_max, y_max) Returns: CameraData: object that stores the input data required by plan_grasp() """ camera_data = CameraData() # Create images camera_data.rgb_img = ColorImage(rgb, frame=cam_intr_frame) camera_data.depth_img = DepthImage(depth, frame=cam_intr_frame) if seg_mask.size > 0: camera_data.seg_img = BinaryImage(seg_mask, cam_intr_frame) # Check image sizes if camera_data.rgb_img.height != camera_data.depth_img.height or \ camera_data.rgb_img.width != camera_data.depth_img.width: msg = ("Color image and depth image must be the same shape! Color" " is %d x %d but depth is %d x %d") % ( camera_data.rgb_img.height, camera_data.rgb_img.width, camera_data.depth_img.height, camera_data.depth_img.width) raise AssertionError(msg) if (camera_data.rgb_img.height < self.min_height or camera_data.rgb_img.width < self.min_width): msg = ("Color image is too small! Must be at least %d x %d" " resolution but the requested image is only %d x %d") % ( self.min_height, self.min_width, camera_data.rgb_img.height, camera_data.rgb_img.width) raise AssertionError(msg) if camera_data.rgb_img.height != camera_data.seg_img.height or \ camera_data.rgb_img.width != camera_data.seg_img.width: msg = ("Images and segmask must be the same shape! Color image is" " %d x %d but segmask is %d x %d") % ( camera_data.rgb_img.height, camera_data.rgb_img.width, camera_data.seg_img.height, camera_data.seg_img.width) raise AssertionError(msg) # set intrinsic params camera_data.intrinsic_params = CameraIntrinsics( cam_intr_frame, fx, fy, cx, cy, skew, h, w) # set mask bounding box if len(bbox) == 4: camera_data.bounding_box = { 'min_x': bbox[0], 'min_y': bbox[1], 'max_x': bbox[2], 'max_y': bbox[3] } return camera_data
def detect(detector_type, config, run_dir, test_config): """Run PCL-based detection on a depth-image-based dataset. Parameters ---------- config : dict config for a PCL detector run_dir : str Directory to save outputs in. Output will be saved in pred_masks, pred_info, and modal_segmasks_processed subdirectories. test_config : dict config containing dataset information """ ################################################################## # Set up output directories ################################################################## # Create subdirectory for prediction masks pred_dir = os.path.join(run_dir, 'pred_masks') mkdir_if_missing(pred_dir) # Create subdirectory for prediction scores & bboxes pred_info_dir = os.path.join(run_dir, 'pred_info') mkdir_if_missing(pred_info_dir) # Create subdirectory for transformed GT segmasks resized_segmask_dir = os.path.join(run_dir, 'modal_segmasks_processed') mkdir_if_missing(resized_segmask_dir) ################################################################## # Set up input directories ################################################################## dataset_dir = test_config['path'] indices_arr = np.load(os.path.join(dataset_dir, test_config['indices'])) # Input depth image data (numpy files, not .pngs) depth_dir = os.path.join(dataset_dir, test_config['images']) # Input GT binary masks dir gt_mask_dir = os.path.join(dataset_dir, test_config['masks']) # Input binary mask data if 'bin_masks' in test_config.keys(): bin_mask_dir = os.path.join(dataset_dir, test_config['bin_masks']) # Input camera intrinsics camera_intrinsics_fn = os.path.join(dataset_dir, 'camera_intrinsics.intr') camera_intrs = CameraIntrinsics.load(camera_intrinsics_fn) image_ids = np.arange(indices_arr.size) ################################################################## # Process each image ################################################################## for image_id in tqdm(image_ids): base_name = 'image_{:06d}'.format(indices_arr[image_id]) output_name = 'image_{:06d}'.format(image_id) depth_image_fn = base_name + '.npy' # Extract depth image depth_data = np.load(os.path.join(depth_dir, depth_image_fn)) depth_im = DepthImage(depth_data, camera_intrs.frame) depth_im = depth_im.inpaint(0.25) # Mask out bin pixels if appropriate/necessary if bin_mask_dir: mask_im = BinaryImage.open( os.path.join(bin_mask_dir, base_name + '.png'), camera_intrs.frame) mask_im = mask_im.resize(depth_im.shape[:2]) depth_im = depth_im.mask_binary(mask_im) point_cloud = camera_intrs.deproject(depth_im) point_cloud.remove_zero_points() pcl_cloud = pcl.PointCloud(point_cloud.data.T.astype(np.float32)) tree = pcl_cloud.make_kdtree() if detector_type == 'euclidean': segmentor = pcl_cloud.make_EuclideanClusterExtraction() segmentor.set_ClusterTolerance(config['tolerance']) elif detector_type == 'region_growing': segmentor = pcl_cloud.make_RegionGrowing(ksearch=50) segmentor.set_NumberOfNeighbours(config['n_neighbors']) segmentor.set_CurvatureThreshold(config['curvature']) segmentor.set_SmoothnessThreshold(config['smoothness']) else: print('PCL detector type not supported') exit() segmentor.set_MinClusterSize(config['min_cluster_size']) segmentor.set_MaxClusterSize(config['max_cluster_size']) segmentor.set_SearchMethod(tree) cluster_indices = segmentor.Extract() # Set up predicted masks and metadata indiv_pred_masks = [] r_info = { 'rois': [], 'scores': [], 'class_ids': [], } for i, cluster in enumerate(cluster_indices): points = pcl_cloud.to_array()[cluster] indiv_pred_mask = camera_intrs.project_to_image( PointCloud(points.T, frame=camera_intrs.frame)).to_binary() indiv_pred_mask.data[indiv_pred_mask.data > 0] = 1 indiv_pred_masks.append(indiv_pred_mask.data) # Compute bounding box, score, class_id nonzero_pix = np.nonzero(indiv_pred_mask.data) min_x, max_x = np.min(nonzero_pix[1]), np.max(nonzero_pix[1]) min_y, max_y = np.min(nonzero_pix[0]), np.max(nonzero_pix[0]) r_info['rois'].append([min_y, min_x, max_y, max_x]) r_info['scores'].append(1.0) r_info['class_ids'].append(1) r_info['rois'] = np.array(r_info['rois']) r_info['scores'] = np.array(r_info['scores']) r_info['class_ids'] = np.array(r_info['class_ids']) # Write the predicted masks and metadata if indiv_pred_masks: pred_mask_output = np.stack(indiv_pred_masks).astype(np.uint8) else: pred_mask_output = np.array(indiv_pred_masks).astype(np.uint8) # Save out ground-truth mask as array of shape (n, h, w) indiv_gt_masks = [] gt_mask = cv2.imread(os.path.join(gt_mask_dir, base_name + '.png')) gt_mask = cv2.resize(gt_mask, (depth_im.shape[1], depth_im.shape[0])).astype( np.uint8)[:, :, 0] num_gt_masks = np.max(gt_mask) for i in range(1, num_gt_masks + 1): indiv_gt_mask = (gt_mask == i) if np.any(indiv_gt_mask): indiv_gt_masks.append(gt_mask == i) gt_mask_output = np.stack(indiv_gt_masks) np.save(os.path.join(resized_segmask_dir, output_name + '.npy'), gt_mask_output) np.save(os.path.join(pred_dir, output_name + '.npy'), pred_mask_output) np.save(os.path.join(pred_info_dir, output_name + '.npy'), r_info) print('Saved prediction masks to:\t {}'.format(pred_dir)) print('Saved prediction info (bboxes, scores, classes) to:\t {}'.format( pred_info_dir)) print('Saved transformed GT segmasks to:\t {}'.format(resized_segmask_dir)) return pred_dir, pred_info_dir, resized_segmask_dir