def inpaint_depth_image(d_img, ix=0, iy=0): """Inpaint depth image on raw depth values. Only import code here to avoid making them required if we're not inpainting. Also, inpainting is slow, so crop some irrelevant values. But BE CAREFUL! Make sure any cropping here will lead to logical consistency with the processing in `camera.process_img_for_net` later. For now we crop the 'later part' of each dimension, which still leads to > 2x speed-up. The window size is 3 which I think means we can get away with a pixel difference of 3 when cropping but to be safe let's add a bit more, 50 pix to each side. For `ix` and `iy` see `camera.process_img_for_net`, makes inpainting faster. """ d_img = d_img[ix:685, iy:1130] from perception import (ColorImage, DepthImage) print('now in-painting the depth image (shape {}), ix, iy = {}, {}...'. format(d_img.shape, ix, iy)) start_t = time.time() d_img = DepthImage(d_img) d_img = d_img.inpaint() # inpaint, then get d_img right away d_img = d_img.data # get raw data back from the class cum_t = time.time() - start_t print('finished in-painting in {:.2f} seconds'.format(cum_t)) return d_img
def plan_grasp(self, depth, rgb, resetting=False, camera_intr=None, segmask=None): """ Computes possible grasps. Parameters ---------- depth: type `numpy` depth image rgb: type `numpy` rgb image camera_intr: type `perception.CameraIntrinsics` Intrinsic camera object. segmask: type `perception.BinaryImage` Binary segmask of detected object Returns ------- type `GQCNNGrasp` Computed optimal grasp. """ if "FC" in self.model: assert not (segmask is None), "Fully-Convolutional policy expects a segmask." if camera_intr is None: camera_intr_filename = os.path.join( os.path.dirname(os.path.realpath(__file__)), "gqcnn/data/calib/primesense/primesense.intr") camera_intr = CameraIntrinsics.load(camera_intr_filename) depth_im = DepthImage(depth, frame=camera_intr.frame) color_im = ColorImage(rgb, frame=camera_intr.frame) valid_px_mask = depth_im.invalid_pixel_mask().inverse() if segmask is None: segmask = valid_px_mask else: segmask = segmask.mask_binary(valid_px_mask) # Inpaint. depth_im = depth_im.inpaint( rescale_factor=self.config["inpaint_rescale_factor"]) # Aggregate color and depth images into a single # BerkeleyAutomation/perception `RgbdImage`. self.rgbd_im = RgbdImage.from_color_and_depth(color_im, depth_im) # Create an `RgbdImageState` with the `RgbdImage` and `CameraIntrinsics`. state = RgbdImageState(self.rgbd_im, camera_intr, segmask=segmask) # Set input sizes for fully-convolutional policy. if "FC" in self.model: self.policy_config["metric"]["fully_conv_gqcnn_config"][ "im_height"] = depth_im.shape[0] self.policy_config["metric"]["fully_conv_gqcnn_config"][ "im_width"] = depth_im.shape[1] return self.execute_policy(state, resetting)
def depth_to_bin(img_file): # Load the image as a numpy array and the camera intrinsics image = np.load(img_file) # Create and deproject a depth image of the data using the camera intrinsics di = DepthImage(image, frame=ci.frame) di = di.inpaint() bi = di.to_binary(threshold=0.85) #vis2d.figure() #vis2d.imshow(di) #vis2d.imshow(bi) #vis2d.show() return bi
def get_state(self, depth, segmask): # Read images. depth_im = DepthImage(depth, frame=self.camera_intr.frame) color_im = ColorImage(np.zeros([depth_im.height, depth_im.width, 3]).astype(np.uint8), frame=self.camera_intr.frame) segmask = BinaryImage(segmask.astype(np.uint8) * 255, frame=self.camera_intr.frame) # Inpaint. depth_im = depth_im.inpaint(rescale_factor=self.inpaint_rescale_factor) # Create state. rgbd_im = RgbdImage.from_color_and_depth(color_im, depth_im) state = RgbdImageState(rgbd_im, self.camera_intr, segmask=segmask) return state, rgbd_im
def largest_planar_surface(filename): # Load the image as a numpy array and the camera intrinsics image = np.load(filename) # Create and deproject a depth image of the data using the camera intrinsics di = DepthImage(image, frame=ci.frame) di = di.inpaint() pc = ci.deproject(di) # Make a PCL type point cloud from the image p = pcl.PointCloud(pc.data.T.astype(np.float32)) # Make a segmenter and segment the point cloud. seg = p.make_segmenter() seg.set_model_type(pcl.SACMODEL_PARALLEL_PLANE) seg.set_method_type(pcl.SAC_RANSAC) seg.set_distance_threshold(0.005) indices, model = seg.segment() return indices, model, image, pc
color_im = ColorImage(np.zeros([depth_im.height, depth_im.width, 3]).astype(np.uint8), frame=camera_intr.frame) # Optionally read a segmask. segmask = None if segmask_filename is not None: segmask = BinaryImage.open(segmask_filename) valid_px_mask = depth_im.invalid_pixel_mask().inverse() if segmask is None: segmask = valid_px_mask else: segmask = segmask.mask_binary(valid_px_mask) # Inpaint. depth_im = depth_im.inpaint(rescale_factor=inpaint_rescale_factor) if "input_images" in policy_config["vis"] and policy_config["vis"][ "input_images"]: vis.figure(size=(10, 10)) num_plot = 1 if segmask is not None: num_plot = 2 vis.subplot(1, num_plot, 1) vis.imshow(depth_im) if segmask is not None: vis.subplot(1, num_plot, 2) vis.imshow(segmask) vis.show() # Create state.
grasp_policy = CrossEntropyRobustGraspingPolicy(cfg['policy']) # grasp_policy = RobustGraspingPolicy(cfg['policy']) #%% img = rosco.rgb d = rosco.depth #%% plt.imshow(img) #%% plt.imshow(d) #print(img) #print(d) #%% color_im = ColorImage(img.astype(np.uint8), encoding="bgr8", frame='pcl') depth_im = DepthImage(d.astype(np.float32), frame='pcl') color_im = color_im.inpaint(rescale_factor=cfg['inpaint_rescale_factor']) depth_im = depth_im.inpaint(rescale_factor=cfg['inpaint_rescale_factor']) rgbd_im = RgbdImage.from_color_and_depth(color_im, depth_im) rgbd_state = RgbdImageState(rgbd_im, camera_int) #%% grasp = grasp_policy(rgbd_state) #%% img2 = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img2 = cv2.circle(img2,(int(grasp.grasp.center.x),int(grasp.grasp.center.y)),2,(255,0,0),3) plt.imshow(img2) #%% img3 = cv2.cvtColor(d, cv2.COLOR_BGR2RGB) img3 = cv2.circle(img3,(int(grasp.grasp.center.x),int(grasp.grasp.center.y)),2,(255,0,0),3) plt.imshow(img3) #%% vis.figure(size=(16,16))
def detect(detector_type, config, run_dir, test_config): """Run PCL-based detection on a depth-image-based dataset. Parameters ---------- config : dict config for a PCL detector run_dir : str Directory to save outputs in. Output will be saved in pred_masks, pred_info, and modal_segmasks_processed subdirectories. test_config : dict config containing dataset information """ ################################################################## # Set up output directories ################################################################## # Create subdirectory for prediction masks pred_dir = os.path.join(run_dir, 'pred_masks') mkdir_if_missing(pred_dir) # Create subdirectory for prediction scores & bboxes pred_info_dir = os.path.join(run_dir, 'pred_info') mkdir_if_missing(pred_info_dir) # Create subdirectory for transformed GT segmasks resized_segmask_dir = os.path.join(run_dir, 'modal_segmasks_processed') mkdir_if_missing(resized_segmask_dir) ################################################################## # Set up input directories ################################################################## dataset_dir = test_config['path'] indices_arr = np.load(os.path.join(dataset_dir, test_config['indices'])) # Input depth image data (numpy files, not .pngs) depth_dir = os.path.join(dataset_dir, test_config['images']) # Input GT binary masks dir gt_mask_dir = os.path.join(dataset_dir, test_config['masks']) # Input binary mask data if 'bin_masks' in test_config.keys(): bin_mask_dir = os.path.join(dataset_dir, test_config['bin_masks']) # Input camera intrinsics camera_intrinsics_fn = os.path.join(dataset_dir, 'camera_intrinsics.intr') camera_intrs = CameraIntrinsics.load(camera_intrinsics_fn) image_ids = np.arange(indices_arr.size) ################################################################## # Process each image ################################################################## for image_id in tqdm(image_ids): base_name = 'image_{:06d}'.format(indices_arr[image_id]) output_name = 'image_{:06d}'.format(image_id) depth_image_fn = base_name + '.npy' # Extract depth image depth_data = np.load(os.path.join(depth_dir, depth_image_fn)) depth_im = DepthImage(depth_data, camera_intrs.frame) depth_im = depth_im.inpaint(0.25) # Mask out bin pixels if appropriate/necessary if bin_mask_dir: mask_im = BinaryImage.open( os.path.join(bin_mask_dir, base_name + '.png'), camera_intrs.frame) mask_im = mask_im.resize(depth_im.shape[:2]) depth_im = depth_im.mask_binary(mask_im) point_cloud = camera_intrs.deproject(depth_im) point_cloud.remove_zero_points() pcl_cloud = pcl.PointCloud(point_cloud.data.T.astype(np.float32)) tree = pcl_cloud.make_kdtree() if detector_type == 'euclidean': segmentor = pcl_cloud.make_EuclideanClusterExtraction() segmentor.set_ClusterTolerance(config['tolerance']) elif detector_type == 'region_growing': segmentor = pcl_cloud.make_RegionGrowing(ksearch=50) segmentor.set_NumberOfNeighbours(config['n_neighbors']) segmentor.set_CurvatureThreshold(config['curvature']) segmentor.set_SmoothnessThreshold(config['smoothness']) else: print('PCL detector type not supported') exit() segmentor.set_MinClusterSize(config['min_cluster_size']) segmentor.set_MaxClusterSize(config['max_cluster_size']) segmentor.set_SearchMethod(tree) cluster_indices = segmentor.Extract() # Set up predicted masks and metadata indiv_pred_masks = [] r_info = { 'rois': [], 'scores': [], 'class_ids': [], } for i, cluster in enumerate(cluster_indices): points = pcl_cloud.to_array()[cluster] indiv_pred_mask = camera_intrs.project_to_image( PointCloud(points.T, frame=camera_intrs.frame)).to_binary() indiv_pred_mask.data[indiv_pred_mask.data > 0] = 1 indiv_pred_masks.append(indiv_pred_mask.data) # Compute bounding box, score, class_id nonzero_pix = np.nonzero(indiv_pred_mask.data) min_x, max_x = np.min(nonzero_pix[1]), np.max(nonzero_pix[1]) min_y, max_y = np.min(nonzero_pix[0]), np.max(nonzero_pix[0]) r_info['rois'].append([min_y, min_x, max_y, max_x]) r_info['scores'].append(1.0) r_info['class_ids'].append(1) r_info['rois'] = np.array(r_info['rois']) r_info['scores'] = np.array(r_info['scores']) r_info['class_ids'] = np.array(r_info['class_ids']) # Write the predicted masks and metadata if indiv_pred_masks: pred_mask_output = np.stack(indiv_pred_masks).astype(np.uint8) else: pred_mask_output = np.array(indiv_pred_masks).astype(np.uint8) # Save out ground-truth mask as array of shape (n, h, w) indiv_gt_masks = [] gt_mask = cv2.imread(os.path.join(gt_mask_dir, base_name + '.png')) gt_mask = cv2.resize(gt_mask, (depth_im.shape[1], depth_im.shape[0])).astype( np.uint8)[:, :, 0] num_gt_masks = np.max(gt_mask) for i in range(1, num_gt_masks + 1): indiv_gt_mask = (gt_mask == i) if np.any(indiv_gt_mask): indiv_gt_masks.append(gt_mask == i) gt_mask_output = np.stack(indiv_gt_masks) np.save(os.path.join(resized_segmask_dir, output_name + '.npy'), gt_mask_output) np.save(os.path.join(pred_dir, output_name + '.npy'), pred_mask_output) np.save(os.path.join(pred_info_dir, output_name + '.npy'), r_info) print('Saved prediction masks to:\t {}'.format(pred_dir)) print('Saved prediction info (bboxes, scores, classes) to:\t {}'.format( pred_info_dir)) print('Saved transformed GT segmasks to:\t {}'.format(resized_segmask_dir)) return pred_dir, pred_info_dir, resized_segmask_dir
def _obs_callback(self, obs): depth_image = DepthImage(np.array(obs.image_data).reshape((obs.height, obs.width))) color_image = depth_image.inpaint(rescale_factor=0.25).to_color() self._current_image = color_image self.image_signal.emit()
def plan_grasp(self, req): """ Grasp planner request handler . Parameters --------- req: :obj:`ROS ServiceRequest` ROS ServiceRequest for grasp planner service """ rospy.loginfo('Planning Grasp') # set min dimensions pad = max( math.ceil( np.sqrt(2) * (float(self.cfg['policy']['metric']['crop_width']) / 2)), math.ceil( np.sqrt(2) * (float(self.cfg['policy']['metric']['crop_height']) / 2))) min_width = 2 * pad + self.cfg['policy']['metric']['crop_width'] min_height = 2 * pad + self.cfg['policy']['metric']['crop_height'] # get the raw depth and color images as ROS Image objects raw_color = req.color_image raw_depth = req.depth_image segmask = None raw_segmask = req.segmask # get the raw camera info as ROS CameraInfo object raw_camera_info = req.camera_info # get the bounding box as a custom ROS BoundingBox msg bounding_box = req.bounding_box # wrap the camera info in a perception CameraIntrinsics object camera_intrinsics = CameraIntrinsics( raw_camera_info.header.frame_id, raw_camera_info.K[0], raw_camera_info.K[4], raw_camera_info.K[2], raw_camera_info.K[5], raw_camera_info.K[1], raw_camera_info.height, raw_camera_info.width) ### create wrapped Perception RGB and Depth Images by unpacking the ROS Images using CVBridge ### try: color_image = ColorImage(self.cv_bridge.imgmsg_to_cv2( raw_color, "rgb8"), frame=camera_intrinsics.frame) depth_image = DepthImage(self.cv_bridge.imgmsg_to_cv2( raw_depth, desired_encoding="passthrough"), frame=camera_intrinsics.frame) segmask = BinaryImage(self.cv_bridge.imgmsg_to_cv2( raw_segmask, desired_encoding="passthrough"), frame=camera_intrinsics.frame) except CvBridgeError as cv_bridge_exception: rospy.logerr(cv_bridge_exception) # check image sizes if color_image.height != depth_image.height or \ color_image.width != depth_image.width: rospy.logerr( 'Color image and depth image must be the same shape! Color is %d x %d but depth is %d x %d' % (color_image.height, color_image.width, depth_image.height, depth_image.width)) raise rospy.ServiceException( 'Color image and depth image must be the same shape! Color is %d x %d but depth is %d x %d' % (color_image.height, color_image.width, depth_image.height, depth_image.width)) if color_image.height < min_height or color_image.width < min_width: rospy.logerr( 'Color image is too small! Must be at least %d x %d resolution but the requested image is only %d x %d' % (min_height, min_width, color_image.height, color_image.width)) raise rospy.ServiceException( 'Color image is too small! Must be at least %d x %d resolution but the requested image is only %d x %d' % (min_height, min_width, color_image.height, color_image.width)) # inpaint images color_image = color_image.inpaint( rescale_factor=self.cfg['inpaint_rescale_factor']) depth_image = depth_image.inpaint( rescale_factor=self.cfg['inpaint_rescale_factor']) # visualize if self.cfg['vis']['color_image']: vis.imshow(color_image) vis.show() if self.cfg['vis']['depth_image']: vis.imshow(depth_image) vis.show() if self.cfg['vis']['segmask'] and segmask is not None: vis.imshow(segmask) vis.show() # aggregate color and depth images into a single perception rgbdimage rgbd_image = RgbdImage.from_color_and_depth(color_image, depth_image) # calc crop parameters minX = bounding_box.minX - pad minY = bounding_box.minY - pad maxX = bounding_box.maxX + pad maxY = bounding_box.maxY + pad # contain box to image->don't let it exceed image height/width bounds if minX < 0: minX = 0 if minY < 0: minY = 0 if maxX > rgbd_image.width: maxX = rgbd_image.width if maxY > rgbd_image.height: maxY = rgbd_image.height centroidX = (maxX + minX) / 2 centroidY = (maxY + minY) / 2 # compute width and height width = maxX - minX height = maxY - minY # crop camera intrinsics and rgbd image cropped_camera_intrinsics = camera_intrinsics.crop( height, width, centroidY, centroidX) cropped_rgbd_image = rgbd_image.crop(height, width, centroidY, centroidX) cropped_segmask = None if segmask is not None: cropped_segmask = segmask.crop(height, width, centroidY, centroidX) # visualize if self.cfg['vis']['cropped_rgbd_image']: vis.imshow(cropped_rgbd_image) vis.show() # create an RGBDImageState with the cropped RGBDImage and CameraIntrinsics image_state = RgbdImageState(cropped_rgbd_image, cropped_camera_intrinsics, segmask=cropped_segmask) # execute policy try: return self.execute_policy(image_state, self.grasping_policy, self.grasp_pose_publisher, cropped_camera_intrinsics.frame) except NoValidGraspsException: rospy.logerr( 'While executing policy found no valid grasps from sampled antipodal point pairs. Aborting Policy!' ) raise rospy.ServiceException( 'While executing policy found no valid grasps from sampled antipodal point pairs. Aborting Policy!' )
def sample_grasp(self, env, mesh_obj, vis=True): # Setup sensor. #camera_intr = CameraIntrinsics.load(camera_intr_filename) # Read images. # get depth image from camera inpaint_rescale_factor = 1.0 segmask_filename = None _, center, extents, obj_xquat, bbox_xpos = self.get_bbox_properties( env, mesh_obj) camera_pos = copy.deepcopy(center) camera_pos[2] += 0.5 * extents[2] + 0.2 env.set_camera(camera_pos, np.array([0.7071068, 0, 0, -0.7071068]), camera_name=f"ext_camera_0") rgb, depth = env.render_from_camera(int(self.config.camera_img_height), int(self.config.camera_img_width), camera_name=f"ext_camera_0") # enforce zoom out from scipy.interpolate import interp2d center_x = self.config.camera_img_height / 2 + 1 center_y = self.config.camera_img_width / 2 + 1 img_height = self.config.camera_img_height img_width = self.config.camera_img_width xdense = np.linspace(0, img_height - 1, img_height) ydense = np.linspace(0, img_width - 1, img_width) xintr = (xdense - center_x) * (1.0 / self.rescale_factor) + center_x yintr = (ydense - center_y) * (1.0 / self.rescale_factor) + center_y xintr[xintr < 0] = 0 xintr[xintr > (img_height - 1)] = img_height - 1 yintr[yintr < 0] = 0 yintr[yintr > (img_width - 1)] = img_width - 1 fr = interp2d(xdense, ydense, rgb[:, :, 0], kind="linear") rgb_r_new = fr(xintr, yintr) fg = interp2d(xdense, ydense, rgb[:, :, 1], kind="linear") rgb_g_new = fg(xintr, yintr) fb = interp2d(xdense, ydense, rgb[:, :, 2], kind="linear") rgb_b_new = fb(xintr, yintr) rgb_new = np.stack([rgb_r_new, rgb_g_new, rgb_b_new], axis=2) fd = interp2d(xdense, ydense, depth, kind="linear") depth_new = fd(xintr, yintr) #from skimage.transform import resize #rgb22, depth2 = env.render_from_camera(int(self.config.camera_img_height) , int(self.config.camera_img_width), camera_name=f"ext_camera_0") #import ipdb; ipdb.set_trace() # visualize the interpolation #import imageio #imageio.imwrite(f"tmp/rgb_{self.iter_id}.png", rgb) #imageio.imwrite(f"tmp/rgb2_{self.iter_id}.png", rgb_new) #imageio.imwrite(f"tmp/depth_{self.iter_id}.png", depth) #imageio.imwrite(f"tmp/depth2_{self.iter_id}.png", depth_new) #import ipdb; ipdb.set_trace() rgb = rgb_new depth = depth_new depth = depth * self.rescale_factor # rgb: 128 x 128 x 1 # depth: 128 x 128 x 1 scaled_camera_fov_y = self.config.camera_fov_y aspect = 1 scaled_fovx = 2 * np.arctan( np.tan(np.deg2rad(scaled_camera_fov_y) * 0.5) * aspect) scaled_fovx = np.rad2deg(scaled_fovx) scaled_fovy = scaled_camera_fov_y cx = self.config.camera_img_width * 0.5 cy = self.config.camera_img_height * 0.5 scaled_fx = cx / np.tan(np.deg2rad( scaled_fovx / 2.)) * (self.rescale_factor) scaled_fy = cy / np.tan(np.deg2rad( scaled_fovy / 2.)) * (self.rescale_factor) camera_intr = CameraIntrinsics(frame='phoxi', fx=scaled_fx, fy=scaled_fy, cx=self.config.camera_img_width * 0.5, cy=self.config.camera_img_height * 0.5, height=self.config.camera_img_height, width=self.config.camera_img_width) depth_im = DepthImage(depth, frame=camera_intr.frame) color_im = ColorImage(np.zeros([depth_im.height, depth_im.width, 3]).astype(np.uint8), frame=camera_intr.frame) # Optionally read a segmask. valid_px_mask = depth_im.invalid_pixel_mask().inverse() segmask = valid_px_mask # Inpaint. depth_im = depth_im.inpaint(rescale_factor=inpaint_rescale_factor) # Create state. rgbd_im = RgbdImage.from_color_and_depth(color_im, depth_im) state = RgbdImageState(rgbd_im, camera_intr, segmask=segmask) # Query policy. policy_start = time.time() action = self.policy(state) print("Planning took %.3f sec" % (time.time() - policy_start)) # numpy array with 2 values grasp_center = action.grasp.center._data[:, 0] #(width, depth) grasp_depth = action.grasp.depth * (1 / self.rescale_factor) grasp_angle = action.grasp.angle #np.pi*0.3 if self.config.data_collection_mode: self.current_grasp = action.grasp depth_im = state.rgbd_im.depth scale = 1.0 depth_im_scaled = depth_im.resize(scale) translation = scale * np.array([ depth_im.center[0] - grasp_center[1], depth_im.center[1] - grasp_center[0] ]) im_tf = depth_im_scaled im_tf = depth_im_scaled.transform(translation, grasp_angle) im_tf = im_tf.crop(self.gqcnn_image_size, self.gqcnn_image_size) # get the patch self.current_patch = im_tf.raw_data XYZ_origin, gripper_quat = self.compute_grasp_pts_from_grasp_sample( grasp_center, grasp_depth, grasp_angle, env) return XYZ_origin[:, 0], gripper_quat # Vis final grasp. if vis: from visualization import Visualizer2D as vis vis.figure(size=(10, 10)) vis.imshow(rgbd_im.depth, vmin=self.policy_config["vis"]["vmin"], vmax=self.policy_config["vis"]["vmax"]) vis.grasp(action.grasp, scale=2.5, show_center=False, show_axis=True) vis.title("Planned grasp at depth {0:.3f}m with Q={1:.3f}".format( action.grasp.depth, action.q_value)) vis.show(f"tmp/grasp2_{mesh_obj.name}_{self.iter_id}.png") vis.figure(size=(10, 10)) vis.imshow(im_tf, vmin=self.policy_config["vis"]["vmin"], vmax=self.policy_config["vis"]["vmax"]) vis.show(f"tmp/cropd_{mesh_obj.name}_{self.iter_id}.png") import ipdb ipdb.set_trace() return XYZ_origin[:, 0], gripper_quat import ipdb ipdb.set_trace()