def test_binary_init(self): # valid data random_valid_data = (255.0 * np.random.rand(IM_HEIGHT, IM_WIDTH)).astype( np.uint8) binary_data = 255 * (random_valid_data > 128) im = BinaryImage(random_valid_data) self.assertEqual(im.height, IM_HEIGHT) self.assertEqual(im.width, IM_WIDTH) self.assertEqual(im.channels, 1) self.assertTrue(np.allclose(im.data, binary_data)) # invalid channels random_data = np.random.rand(IM_HEIGHT, IM_WIDTH, 3).astype(np.uint8) caught_bad_channels = False try: im = BinaryImage(random_data) except: caught_bad_channels = True self.assertTrue(caught_bad_channels) # invalid type random_data = np.random.rand(IM_HEIGHT, IM_WIDTH).astype(np.float32) caught_bad_dtype = False try: im = BinaryImage(random_data) except: caught_bad_dtype = True self.assertTrue(caught_bad_dtype)
def register_example(reg_example, aligner, model): ses = reg_example.salient_edge_set # Pre-process mesh mesh = ses.mesh edge_mask = ses.edge_mask edge_pc_obj = PointCloud(generate_canonical_pc(mesh, edge_mask).T, frame='obj') # Process Depth Image ci = reg_example.camera_intrs depth_im = reg_example.depth_im point_cloud_im = ci.deproject_to_image(depth_im) normal_cloud_im = point_cloud_im.normal_cloud_im() joined = np.dstack((depth_im.data, normal_cloud_im.data)) mask = model.predict(joined[np.newaxis, :, :, :])[0] mask *= 255.0 mask = BinaryImage(mask.astype(np.uint8)) depth_im_edges = depth_im.mask_binary(mask) edge_pc_cam = ci.deproject(depth_im_edges) edge_pc_cam.remove_zero_points() # Align the two point sets with Super4PCS T_obj_camera_est = aligner.align(edge_pc_cam, edge_pc_obj) return T_obj_camera_est
def _action_callback(self, action): # Update image with binary mask binary_data = np.frombuffer(action.mask_data, dtype=np.uint8).reshape(action.height, action.width) binary_image = BinaryImage(binary_data) mask = binary_image.nonzero_pixels() self._current_image.data[mask[:,0], mask[:,1], :] = (0.3 * self._current_image.data[mask[:,0], mask[:,1], :] + 0.7 * np.array([200, 30, 30], dtype=np.uint8)) # Paint the appropriate action type over the current image action_type = action.action_type action_data = action.action_data if action_type == 'parallel_jaw': location = np.array([action_data[0], action_data[1]], dtype=np.uint32) axis = np.array([action_data[2], action_data[3]]) if axis[0] != 0: angle = np.rad2deg(np.arctan((axis[1] / axis[0]))) else: angle = 90 jaw_image = ColorImage(imutils.rotate_bound(self._jaw_image.data, angle)) self._current_image = self._overlay_image(self._current_image, jaw_image, location) elif action_type == 'suction': location = np.array([action_data[0], action_data[1]], dtype=np.uint32) self._current_image = self._overlay_image(self._current_image, self._suction_image, location) elif action_type == 'push': start = np.array([action_data[0], action_data[1]], dtype=np.int32) end = np.array([action_data[2], action_data[3]], dtype=np.int32) axis = (end - start).astype(np.float32) axis_len = np.linalg.norm(axis) if axis_len == 0: axis = np.array([0.0, 1.0]) axis_len = 1.0 axis = axis / axis_len if axis[0] != 0: angle = np.rad2deg(np.arctan2(axis[1], axis[0])) else: angle = (90.0 if axis[1] > 0 else -90.0) start_image = ColorImage(imutils.rotate_bound(self._push_image.data, angle)) end_image = ColorImage(imutils.rotate_bound(self._push_end_image.data, angle)) start = np.array(start - axis * 0.5 * self._push_image.width, dtype=np.int32) end = np.array(end + axis * 0.5 * self._push_end_image.width, dtype=np.int32) self._current_image = self._overlay_image(self._current_image, start_image, start) self._current_image = self._overlay_image(self._current_image, end_image, end) # Update display self.image_signal.emit() self.conf_signal.emit(action.confidence)
def rendered_images(data, render_mode=RenderMode.SEGMASK): rendered_images = [] num_images = data.attrs[NUM_IMAGES_KEY] for i in range(num_images): # get the image data y'all image_key = IMAGE_KEY + '_' + str(i) image_data = data[image_key] image_arr = np.array(image_data[IMAGE_DATA_KEY]) frame = image_data.attrs[IMAGE_FRAME_KEY] if render_mode == RenderMode.SEGMASK: image = BinaryImage(image_arr, frame) elif render_mode == RenderMode.DEPTH: image = DepthImage(image_arr, frame) elif render_mode == RenderMode.SCALED_DEPTH: image = ColorImage(image_arr, frame) R_camera_table = image_data.attrs[CAM_ROT_KEY] t_camera_table = image_data.attrs[CAM_POS_KEY] frame = image_data.attrs[CAM_FRAME_KEY] T_camera_world = RigidTransform(R_camera_table, t_camera_table, from_frame=frame, to_frame='world') rendered_images.append(ObjectRender(image, T_camera_world)) return rendered_images
def plan_grasp_segmask(self, req): """Grasp planner request handler. Parameters --------- req: :obj:`ROS ServiceRequest` ROS `ServiceRequest` for grasp planner service. """ color_im, depth_im, camera_intr = self.read_images(req) raw_segmask = req.segmask try: segmask = BinaryImage(self.cv_bridge.imgmsg_to_cv2( raw_segmask, desired_encoding="passthrough"), frame=camera_intr.frame) except CvBridgeError as cv_bridge_exception: rospy.logerr(cv_bridge_exception) if color_im.height != segmask.height or \ color_im.width != segmask.width: msg = ("Images and segmask must be the same shape! Color image is" " %d x %d but segmask is %d x %d") % ( color_im.height, color_im.width, segmask.height, segmask.width) rospy.logerr(msg) raise rospy.ServiceException(msg) return self._plan_grasp(color_im, depth_im, camera_intr, segmask=segmask)
def load(save_dir): if not os.path.exists(save_dir): raise ValueError('Directory %s does not exist!' % (save_dir)) color_image_filename = os.path.join(save_dir, 'color.png') depth_image_filename = os.path.join(save_dir, 'depth.npy') camera_intr_filename = os.path.join(save_dir, 'camera.intr') segmask_filename = os.path.join(save_dir, 'segmask.npy') obj_segmask_filename = os.path.join(save_dir, 'obj_segmask.npy') state_filename = os.path.join(save_dir, 'state.pkl') camera_intr = CameraIntrinsics.load(camera_intr_filename) color = ColorImage.open(color_image_filename, frame=camera_intr.frame) depth = DepthImage.open(depth_image_filename, frame=camera_intr.frame) segmask = None if os.path.exists(segmask_filename): segmask = BinaryImage.open(segmask_filename, frame=camera_intr.frame) obj_segmask = None if os.path.exists(obj_segmask_filename): obj_segmask = SegmentationImage.open(obj_segmask_filename, frame=camera_intr.frame) fully_observed = None if os.path.exists(state_filename): fully_observed = pkl.load(open(state_filename, 'rb')) return RgbdImageState(RgbdImage.from_color_and_depth(color, depth), camera_intr, segmask=segmask, obj_segmask=obj_segmask, fully_observed=fully_observed)
def get_state(self, depth, segmask): # Read images. depth_im = DepthImage(depth, frame=self.camera_intr.frame) color_im = ColorImage(np.zeros([depth_im.height, depth_im.width, 3]).astype(np.uint8), frame=self.camera_intr.frame) segmask = BinaryImage(segmask.astype(np.uint8) * 255, frame=self.camera_intr.frame) # Inpaint. depth_im = depth_im.inpaint(rescale_factor=self.inpaint_rescale_factor) # Create state. rgbd_im = RgbdImage.from_color_and_depth(color_im, depth_im) state = RgbdImageState(rgbd_im, self.camera_intr, segmask=segmask) return state, rgbd_im
def bbox_to_mask(bbox, c_img): loX, loY, hiX, hiY = bbox bin_img = np.zeros(c_img.shape[0:2]) #don't include workspace points for x in range(loX, hiX): for y in range(loY, hiY): r, g, b = c_img[y][x] if r < 240 or g < 240 or b < 240: bin_img[y][x] = 255 return BinaryImage(bin_img.astype(np.uint8))
def plan_grasp_segmask_handler(self, req): """Grasp planner request handler. Parameters --------- req: :obj:`ROS ServiceRequest` ROS `ServiceRequest` for grasp planner service. """ camera_data = self.read_images(req) raw_segmask = req.segmask # create segmentation mask try: camera_data.seg_img = BinaryImage( self.cv_bridge.imgmsg_to_cv2(raw_segmask, desired_encoding="passthrough"), frame=camera_data.intrinsic_params.frame) except CvBridgeError as cv_bridge_exception: rospy.logerr(cv_bridge_exception) if camera_data.rgb_img.height != camera_data.seg_img.height or \ camera_data.rgb_img.width != camera_data.seg_img.width: msg = ("Images and segmask must be the same shape! Color image is" " %d x %d but segmask is %d x %d") % ( camera_data.rgb_img.height, camera_data.rgb_img.width, camera_data.seg_img.height, camera_data.seg_img.width) rospy.logerr(msg) raise rospy.ServiceException(msg) self.grasp_poses = [] ok = self.plan_grasp(camera_data, n_candidates=1) if ok: return self._create_grasp_planner_srv_msg() else: return None
def run_gqcnn(depth,seg_mask): best_angle = 0; best_point = [0,0]; best_dist = 0; depth_im =DepthImage(depth.astype("float32"), frame=camera_intr.frame) color_im = ColorImage(np.zeros([imageWidth, imageHeight,3]).astype(np.uint8), frame=camera_intr.frame) print(seg_mask) segmask = BinaryImage(seg_mask) print(segmask) rgbd_im = RgbdImage.from_color_and_depth(color_im, depth_im) state_gqcnn = RgbdImageState(rgbd_im, camera_intr, segmask=segmask) policy_start = time.time() try: action = policy(state_gqcnn) logger.info("Planning took %.3f sec" % (time.time() - policy_start)) best_point = [action.grasp.center[0],action.grasp.center[1]]; best_point = [action.grasp.center[0],action.grasp.center[1]]; best_angle = float(action.grasp.angle)*180/3.141592 except Exception as inst: print(inst) return best_angle,best_point,best_dist
def run_gqcnn(depth, seg_mask): best_angle = 0 best_point = [0, 0] best_dist = 0 depth_im = DepthImage(depth.astype("float32"), frame=camera_intr.frame) color_im = ColorImage(np.zeros([imageWidth, imageHeight, 3]).astype(np.uint8), frame=camera_intr.frame) segmask = BinaryImage(seg_mask) rgbd_im = RgbdImage.from_color_and_depth(color_im, depth_im) state_gqcnn = RgbdImageState(rgbd_im, camera_intr, segmask=segmask) policy_start = time.time() q_value = -1 try: action = policy(state_gqcnn) best_point = [action.grasp.center[0], action.grasp.center[1]] best_angle = float(action.grasp.angle) * 180 / 3.141592 q_value = action.q_value print("inner : ", action.q_value) except Exception as inst: print(inst) return q_value
'gqcnn', config['metric']) for ind in range(num_image_tensors): print("File: " + str(ind)) image_arr = np.load(image_tensors[ind])['arr_0'] binary_arr = np.load(binary_image_tensors[ind])['arr_0'] start_ind_arr = np.load(start_ind_tensors[ind])['arr_0'] end_ind_arr = np.load(end_ind_tensors[ind])['arr_0'] datapoints_in_file = min(len(start_ind_arr), DATAPOINTS_PER_FILE) for i in range(datapoints_in_file): start_ind = start_ind_arr[i] end_ind = end_ind_arr[i] # depth_im = DepthImage(image_arr[i,...]) binary_im = BinaryImage(binary_arr[i, ...]) grasps, metrics = load_grasps_for_image(start_ind, end_ind, grasp_tensors, metric_tensors) # # read pixel offsets # cx = depth_im.center[1] # cy = depth_im.center[0] # for g, grasp in enumerate(grasps): # current_index = start_ind + g # grasp_x = grasp[1] # grasp_y = grasp[0] # grasp_angle = grasp[3] # # center images on the grasp, rotate to image x axis
# Read images. depth_data = np.load(depth_im_filename) depth_data = depth_data.astype(np.float32) / 1000.0 depth_im = DepthImage(depth_data, frame=camera_intr.frame) color_im = ColorImage(np.zeros([depth_im.height, depth_im.width, 3]).astype(np.uint8), frame=camera_intr.frame) # Optionally read a segmask. mask = np.zeros((camera_intr.height, camera_intr.width, 1), dtype=np.uint8) c = np.array([165, 460, 500, 135]) r = np.array([165, 165, 370, 370]) rr, cc = skimage.draw.polygon(r, c, shape=mask.shape) mask[rr, cc, 0] = 255 segmask = BinaryImage(mask) if segmask_filename is not None: segmask = BinaryImage.open(segmask_filename) valid_px_mask = depth_im.invalid_pixel_mask().inverse() if segmask is None: segmask = valid_px_mask else: segmask = segmask.mask_binary(valid_px_mask) # Create new cloud. point_cloud = camera_intr.deproject(depth_im) point_cloud.remove_zero_points() pcl_cloud = pcl.PointCloud(point_cloud.data.T.astype(np.float32)) tree = pcl_cloud.make_kdtree() # Find large clusters (likely to be real objects instead of noise).
def run_experiment(): """ Run the experiment """ # get the images from the sensor previous_grasp = None while True: rospy.loginfo("Waiting for images") start_time = time.time() raw_color = rospy.wait_for_message("/camera/rgb/image_color", Image) raw_depth = rospy.wait_for_message("/camera/depth_registered/image", Image) image_load_time = time.time() - start_time rospy.loginfo('Images loaded in: ' + str(image_load_time) + ' secs.') ### Create wrapped Perception RGB and Depth Images by unpacking the ROS Images using CVBridge ### try: color_image = perception.ColorImage( cv_bridge.imgmsg_to_cv2(raw_color, "rgb8"), frame=T_camera_world.from_frame) depth_image = perception.DepthImage( cv_bridge.imgmsg_to_cv2(raw_depth, desired_encoding="passthrough"), frame=T_camera_world.from_frame) except CvBridgeError as cv_bridge_exception: rospy.logerr(cv_bridge_exception) # inpaint to remove holes inpainted_color_image = color_image.inpaint( rescale_factor=INPAINT_RESCALE_FACTOR) inpainted_depth_image = depth_image.inpaint( rescale_factor=INPAINT_RESCALE_FACTOR) if DETECT_OBJECTS: detector = RgbdDetectorFactory.detector('point_cloud_box') detections = detector.detect(inpainted_color_image, inpainted_depth_image, detector_cfg, camera_intrinsics, T_camera_world, vis_foreground=False, vis_segmentation=False) detected_obj = None if previous_grasp is not None: position = previous_grasp.pose.position position = np.array([position.x, position.y, position.z]) center_point = Point(position, camera_intrinsics.frame) center_pixel = camera_intrinsics.project(center_point, camera_intrinsics.frame) i, j = center_pixel.y, center_pixel.x if DETECT_OBJECTS: for detection in detections: binaryIm = detection.binary_im if binaryIm[i, j]: segmask = binaryIm detected_obj = detection break else: # Generate an ellipse inverse segmask centered on previous grasp y, x = np.ogrid[-i:IMAGE_HEIGHT - i, -j:IMAGE_WIDTH - j] circlemask = x * x + y * y <= CIRCLE_RAD * CIRCLE_RAD segmask_data = np.ones( (IMAGE_HEIGHT, IMAGE_WIDTH), dtype=np.uint8) * 255 segmask_data[circlemask] = 0 segmask = BinaryImage(segmask_data, camera_intrinsics.frame) else: segmask = BinaryImage( np.ones((IMAGE_HEIGHT, IMAGE_WIDTH), dtype=np.uint8) * 255, camera_intrinsics.frame) segmask._encoding = 'mono8' if VISUALIZE_DETECTOR_OUTPUT: vis.figure() vis.subplot(1, 2, 1) vis.imshow(detected_obj.color_thumbnail) vis.subplot(1, 2, 2) vis.imshow(detected_obj.depth_thumbnail) vis.show() try: rospy.loginfo('Planning Grasp') start_time = time.time() planned_grasp_data = plan_grasp(inpainted_color_image.rosmsg, inpainted_depth_image.rosmsg, segmask.rosmsg, raw_camera_info, boundingBox) grasp_plan_time = time.time() - start_time rospy.loginfo('Total grasp planning time: ' + str(grasp_plan_time) + ' secs.') rospy.loginfo('Queueing Grasp') previous_grasp = planned_grasp_data.grasp execute_grasp(previous_grasp) # raw_input("Press ENTER to resume") except rospy.ServiceException as e: rospy.logerr(e) previous_grasp = None raw_input("Press ENTER to resume")
def visualize(self): """ Visualize predictions """ logging.info('Visualizing ' + self.datapoint_type) # iterate through shuffled file indices for i in self.indices: im_filename = self.im_filenames[i] pose_filename = self.pose_filenames[i] label_filename = self.label_filenames[i] logging.info('Loading Image File: ' + im_filename + ' Pose File: ' + pose_filename + ' Label File: ' + label_filename) # load tensors from files metric_tensor = np.load(os.path.join(self.data_dir, label_filename))['arr_0'] label_tensor = 1 * (metric_tensor > self.metric_thresh) image_tensor = np.load(os.path.join(self.data_dir, im_filename))['arr_0'] hand_poses_tensor = np.load( os.path.join(self.data_dir, pose_filename))['arr_0'] pose_tensor = self._read_pose_data(hand_poses_tensor, self.input_data_mode) # score with neural network pred_p_success_tensor = self._gqcnn.predict( image_tensor, pose_tensor) # compute results classification_result = ClassificationResult( [pred_p_success_tensor], [label_tensor]) logging.info('Error rate on files: %.3f' % (classification_result.error_rate)) logging.info('Precision on files: %.3f' % (classification_result.precision)) logging.info('Recall on files: %.3f' % (classification_result.recall)) mispred_ind = classification_result.mispredicted_indices() correct_ind = classification_result.correct_indices() # IPython.embed() if self.datapoint_type == 'true_positive' or self.datapoint_type == 'true_negative': vis_ind = correct_ind else: vis_ind = mispred_ind num_visualized = 0 # visualize for ind in vis_ind: # limit the number of sampled datapoints displayed per object if num_visualized >= self.samples_per_object: break num_visualized += 1 # don't visualize the datapoints that we don't want if self.datapoint_type == 'true_positive': if classification_result.labels[ind] == 0: continue elif self.datapoint_type == 'true_negative': if classification_result.labels[ind] == 1: continue elif self.datapoint_type == 'false_positive': if classification_result.labels[ind] == 0: continue elif self.datapoint_type == 'false_negative': if classification_result.labels[ind] == 1: continue logging.info('Datapoint %d of files for %s' % (ind, im_filename)) logging.info('Depth: %.3f' % (hand_poses_tensor[ind, 2])) data = image_tensor[ind, ...] if self.display_image_type == RenderMode.SEGMASK: image = BinaryImage(data) elif self.display_image_type == RenderMode.GRAYSCALE: image = GrayscaleImage(data) elif self.display_image_type == RenderMode.COLOR: image = ColorImage(data) elif self.display_image_type == RenderMode.DEPTH: image = DepthImage(data) elif self.display_image_type == RenderMode.RGBD: image = RgbdImage(data) elif self.display_image_type == RenderMode.GD: image = GdImage(data) vis2d.figure() if self.display_image_type == RenderMode.RGBD: vis2d.subplot(1, 2, 1) vis2d.imshow(image.color) grasp = Grasp2D(Point(image.center, 'img'), 0, hand_poses_tensor[ind, 2], self.gripper_width_m) grasp.camera_intr = grasp.camera_intr.resize(1.0 / 3.0) vis2d.grasp(grasp) vis2d.subplot(1, 2, 2) vis2d.imshow(image.depth) vis2d.grasp(grasp) elif self.display_image_type == RenderMode.GD: vis2d.subplot(1, 2, 1) vis2d.imshow(image.gray) grasp = Grasp2D(Point(image.center, 'img'), 0, hand_poses_tensor[ind, 2], self.gripper_width_m) grasp.camera_intr = grasp.camera_intr.resize(1.0 / 3.0) vis2d.grasp(grasp) vis2d.subplot(1, 2, 2) vis2d.imshow(image.depth) vis2d.grasp(grasp) else: vis2d.imshow(image) grasp = Grasp2D(Point(image.center, 'img'), 0, hand_poses_tensor[ind, 2], self.gripper_width_m) grasp.camera_intr = grasp.camera_intr.resize(1.0 / 3.0) vis2d.grasp(grasp) vis2d.title('Datapoint %d: Pred: %.3f Label: %.3f' % (ind, classification_result.pred_probs[ind, 1], classification_result.labels[ind])) vis2d.show() # cleanup self._cleanup()
def _plan_grasp(self, color_im, depth_im, camera_intr, bounding_box=None, segmask=None): """ Grasp planner request handler . Parameters --------- req: :obj:`ROS ServiceRequest` ROS ServiceRequest for grasp planner service """ rospy.loginfo('Planning Grasp') # inpaint images color_im = color_im.inpaint( rescale_factor=self.cfg['inpaint_rescale_factor']) depth_im = depth_im.inpaint( rescale_factor=self.cfg['inpaint_rescale_factor']) # init segmask if segmask is None: segmask = BinaryImage(255 * np.ones(depth_im.shape).astype(np.uint8), frame=color_im.frame) # visualize if self.cfg['vis']['color_image']: vis.imshow(color_im) vis.show() if self.cfg['vis']['depth_image']: vis.imshow(depth_im) vis.show() if self.cfg['vis']['segmask'] and segmask is not None: vis.imshow(segmask) vis.show() # aggregate color and depth images into a single perception rgbdimage rgbd_im = RgbdImage.from_color_and_depth(color_im, depth_im) # mask bounding box if bounding_box is not None: # calc bb parameters min_x = bounding_box.minX min_y = bounding_box.minY max_x = bounding_box.maxX max_y = bounding_box.maxY # contain box to image->don't let it exceed image height/width bounds if min_x < 0: min_x = 0 if min_y < 0: min_y = 0 if max_x > rgbd_im.width: max_x = rgbd_im.width if max_y > rgbd_im.height: max_y = rgbd_im.height # mask bb_segmask_arr = np.zeros([rgbd_image.height, rgbd_image.width]) bb_segmask_arr[min_y:max_y, min_x:max_x] = 255 bb_segmask = BinaryImage(bb_segmask_arr.astype(np.uint8), segmask.frame) segmask = segmask.mask_binary(bb_segmask) # visualize if self.cfg['vis']['rgbd_state']: masked_rgbd_im = rgbd_im.mask_binary(segmask) vis.figure() vis.subplot(1, 2, 1) vis.imshow(masked_rgbd_im.color) vis.subplot(1, 2, 2) vis.imshow(masked_rgbd_im.depth) vis.show() # create an RGBDImageState with the cropped RGBDImage and CameraIntrinsics rgbd_state = RgbdImageState(rgbd_im, camera_intr, segmask=segmask) # execute policy try: return self.execute_policy(rgbd_state, self.grasping_policy, self.grasp_pose_publisher, camera_intr.frame) except NoValidGraspsException: rospy.logerr( 'While executing policy found no valid grasps from sampled antipodal point pairs. Aborting Policy!' ) raise rospy.ServiceException( 'While executing policy found no valid grasps from sampled antipodal point pairs. Aborting Policy!' )
def plan_grasp(self, camera_data, n_candidates=1): """Grasp planner. Parameters --------- req: :obj:`ROS ServiceRequest` ROS `ServiceRequest` for grasp planner service. """ self._camera_data = camera_data # --- Inpaint images --- # color_im = camera_data.rgb_img.inpaint( rescale_factor=self.cfg["inpaint_rescale_factor"]) depth_im = camera_data.depth_img.inpaint( rescale_factor=self.cfg["inpaint_rescale_factor"]) # --- Init segmask --- # if camera_data.seg_img is None: segmask = BinaryImage(255 * np.ones(depth_im.shape).astype(np.uint8), frame=color_im.frame) else: segmask = camera_data.seg_mask # --- Aggregate color and depth images into a single # BerkeleyAutomation/perception `RgbdImage` --- # rgbd_im = RgbdImage.from_color_and_depth(color_im, depth_im) # --- Mask bounding box --- # if camera_data.bounding_box is not None: # Calc bb parameters. min_x = camera_data.bounding_box['min_x'] min_y = camera_data.bounding_box['min_y'] max_x = camera_data.bounding_box['max_x'] max_y = camera_data.bounding_box['max_y'] # Contain box to image->don't let it exceed image height/width # bounds. if min_x < 0: min_x = 0 if min_y < 0: min_y = 0 if max_x > rgbd_im.width: max_x = rgbd_im.width if max_y > rgbd_im.height: max_y = rgbd_im.height # Mask. bb_segmask_arr = np.zeros([rgbd_im.height, rgbd_im.width]) bb_segmask_arr[min_y:max_y, min_x:max_x] = 255 bb_segmask = BinaryImage(bb_segmask_arr.astype(np.uint8), segmask.frame) segmask = segmask.mask_binary(bb_segmask) # --- Create an `RgbdImageState` with the cropped `RgbdImage` and `CameraIntrinsics` --- # rgbd_state = RgbdImageState(rgbd_im, camera_data.intrinsic_params, segmask=segmask) # --- Execute policy --- # try: grasps_and_predictions = self.execute_policy(rgbd_state, self.grasping_policy, camera_data.intrinsic_params.frame, n_candidates) self._dexnet_gp = grasps_and_predictions # --- project planar grasps to 3D space --- # l = [] for gp in grasps_and_predictions: # my method pose_6d = self.transform_grasp_to_6D(gp[0], camera_data.intrinsic_params) pos = pose_6d[:3,3] rot = pose_6d[:3, :3] grasp_6D = Grasp6D(position=pos, rotation=rot, width=gp[0].width, score= gp[1], ref_frame=camera_data.intrinsic_params.frame) l.append(grasp_6D) # dexnet method --> needs autolab_core installed as catkin package # 6D_gp = gp[0].pose() self.grasp_poses = l self.best_grasp = l[0] self.visualize() return True except NoValidGraspsException: warnings.warn(("While executing policy found no valid grasps from sampled antipodal point pairs!")) return False
def create_camera_data(self, rgb: np.ndarray, depth: np.ndarray, cam_intr_frame: str, fx: float, fy: float, cx: float, cy: float, skew: float, w: int, h: int, seg_mask: np.ndarray = np.empty(shape=(0,)), bbox: tuple = ()): """Create the CameraData object in the correct format expected by gqcnn Parameters --------- req: rgb: np.ndarray: rgb image depth: np.ndarray: depth image cam_intr_frame: str: the reference frame of the images. Grasp poses are computed wrt this frame fx: float: focal length (x) fy: float: focal length (y) cx: float: principal point (x) cy: float: principal point (y) skew: float: skew coefficient w: int: width h: int: height opt: seg_mask: np.ndarray: segmentation mask bbox: tuple: a tuple of 4 values that define the mask bounding box = (x_min, y_min, x_max, y_max) Returns: CameraData: object that stores the input data required by plan_grasp() """ camera_data = CameraData() # Create images camera_data.rgb_img = ColorImage(rgb, frame=cam_intr_frame) camera_data.depth_img = DepthImage(depth, frame=cam_intr_frame) if seg_mask.size > 0: camera_data.seg_img = BinaryImage(seg_mask, cam_intr_frame) # Check image sizes if camera_data.rgb_img.height != camera_data.depth_img.height or \ camera_data.rgb_img.width != camera_data.depth_img.width: msg = ("Color image and depth image must be the same shape! Color" " is %d x %d but depth is %d x %d") % ( camera_data.rgb_img.height, camera_data.rgb_img.width, camera_data.depth_img.height, camera_data.depth_img.width) raise AssertionError(msg) if (camera_data.rgb_img.height < self.min_height or camera_data.rgb_img.width < self.min_width): msg = ("Color image is too small! Must be at least %d x %d" " resolution but the requested image is only %d x %d") % ( self.min_height, self.min_width, camera_data.rgb_img.height, camera_data.rgb_img.width) raise AssertionError(msg) if camera_data.rgb_img.height != camera_data.seg_img.height or \ camera_data.rgb_img.width != camera_data.seg_img.width: msg = ("Images and segmask must be the same shape! Color image is" " %d x %d but segmask is %d x %d") % ( camera_data.rgb_img.height, camera_data.rgb_img.width, camera_data.seg_img.height, camera_data.seg_img.width) raise AssertionError(msg) # set intrinsic params camera_data.intrinsic_params = CameraIntrinsics(cam_intr_frame, fx, fy, cx, cy, skew, h, w) # set mask bounding box if len(bbox) == 4: camera_data.bounding_box = {'min_x': bbox[0], 'min_y': bbox[1], 'max_x': bbox[2], 'max_y': bbox[3]} return camera_data
def detect_img(): global color_img global mask global depth_im global color_im global segmask global center global angle global best_angle global prev_q_value global best_center global depth_frame global frames def nothing(x): pass mask = np.ones((640, 480), dtype=np.uint8) * 255 cv2.namedWindow("rgb", cv2.WINDOW_NORMAL) cv2.resizeWindow("rgb", 1280, 720) cv2.createTrackbar('W', "rgb", 0, 100, nothing) cv2.namedWindow("mask", cv2.WINDOW_NORMAL) cv2.resizeWindow("mask", 640, 480) cv2.namedWindow("depth", cv2.WINDOW_NORMAL) cv2.resizeWindow("depth", 1000, 1000) line_arr1 = [0, 1, 3, 2, 0, 4, 6, 2] line_arr2 = [5, 4, 6, 7, 5, 1, 3, 7] divide_value = 65535 pipeline.start(config) frames = pipeline.wait_for_frames() while 1: try: frames = pipeline.wait_for_frames() w = cv2.getTrackbarPos('W', "rgb") depth_frame = frames.get_depth_frame() color_frame = frames.get_color_frame() if not depth_frame or not color_frame: continue depth_to_color_img = np.asanyarray( depth_frame.get_data()) / divide_value color_img = np.asanyarray(color_frame.get_data()) #print(depth_to_color_img) depth_im = DepthImage(depth_to_color_img.astype("float32"), frame=camera_intr.frame) color_im = ColorImage(np.zeros([480, 640, 3]).astype(np.uint8), frame=camera_intr.frame) seg_mask = cv2.resize(mask.copy(), (640, 480), interpolation=cv2.INTER_CUBIC) segmask = BinaryImage(seg_mask) show_img = cv2.resize(color_img.copy(), (640, 480), interpolation=cv2.INTER_CUBIC) try: ret, img_binary = cv2.threshold(mask, 127, 255, 0) contours, color_hierachy = cv2.findContours( img_binary.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) c0 = np.reshape(contours[0], (-1, 2)) show_img = cv2.drawContours(np.uint8(show_img), contours, -1, (0, 255, 0), 1) except: pass show_img_resize = cv2.resize(show_img.copy(), (640, 480), interpolation=cv2.INTER_CUBIC) try: r = 50 show_img_resize = cv2.line( show_img_resize, (int(best_center[0] - r * math.cos(best_angle / 180 * math.pi)), int(best_center[1] - r * math.sin(best_angle / 180 * math.pi))), (int(best_center[0] + r * math.cos(best_angle / 180 * math.pi)), int(best_center[1] + r * math.sin(best_angle / 180 * math.pi))), (0, 0, 255), 5) show_img_resize = cv2.line( show_img_resize, (int(best_center[0]), int(best_center[1])), (int(best_center[0] + 1), int(best_center[1])), (0, 136, 255), 7) show_img_resize = cv2.line( show_img_resize, (int(center[0] - r * math.cos(angle / 180 * math.pi)), int(center[1] - r * math.sin(angle / 180 * math.pi))), (int(center[0] + r * math.cos(angle / 180 * math.pi)), int(center[1] + r * math.sin(angle / 180 * math.pi))), (255, 100, 255), 5) show_img_resize = cv2.line( show_img_resize, (int(center[0]), int(center[1])), (int(center[0] + 1), int(center[1])), (100, 136, 255), 7) except: pass depth_colormap = cv2.applyColorMap( cv2.convertScaleAbs(depth_to_color_img * 1000, alpha=2), 1) cv2.imshow("rgb", show_img_resize) cv2.imshow("depth", depth_colormap) cv2.imshow("mask", seg_mask) k = cv2.waitKey(5) & 0xFF if k == ord('s'): cv2.destroyWindow("rgb") except Exception as inst: print(inst) pass end()
def detect(detector_type, config, run_dir, test_config): """Run PCL-based detection on a depth-image-based dataset. Parameters ---------- config : dict config for a PCL detector run_dir : str Directory to save outputs in. Output will be saved in pred_masks, pred_info, and modal_segmasks_processed subdirectories. test_config : dict config containing dataset information """ ################################################################## # Set up output directories ################################################################## # Create subdirectory for prediction masks pred_dir = os.path.join(run_dir, 'pred_masks') mkdir_if_missing(pred_dir) # Create subdirectory for prediction scores & bboxes pred_info_dir = os.path.join(run_dir, 'pred_info') mkdir_if_missing(pred_info_dir) # Create subdirectory for transformed GT segmasks resized_segmask_dir = os.path.join(run_dir, 'modal_segmasks_processed') mkdir_if_missing(resized_segmask_dir) ################################################################## # Set up input directories ################################################################## dataset_dir = test_config['path'] indices_arr = np.load(os.path.join(dataset_dir, test_config['indices'])) # Input depth image data (numpy files, not .pngs) depth_dir = os.path.join(dataset_dir, test_config['images']) # Input GT binary masks dir gt_mask_dir = os.path.join(dataset_dir, test_config['masks']) # Input binary mask data if 'bin_masks' in test_config.keys(): bin_mask_dir = os.path.join(dataset_dir, test_config['bin_masks']) # Input camera intrinsics camera_intrinsics_fn = os.path.join(dataset_dir, 'camera_intrinsics.intr') camera_intrs = CameraIntrinsics.load(camera_intrinsics_fn) image_ids = np.arange(indices_arr.size) ################################################################## # Process each image ################################################################## for image_id in tqdm(image_ids): base_name = 'image_{:06d}'.format(indices_arr[image_id]) output_name = 'image_{:06d}'.format(image_id) depth_image_fn = base_name + '.npy' # Extract depth image depth_data = np.load(os.path.join(depth_dir, depth_image_fn)) depth_im = DepthImage(depth_data, camera_intrs.frame) depth_im = depth_im.inpaint(0.25) # Mask out bin pixels if appropriate/necessary if bin_mask_dir: mask_im = BinaryImage.open( os.path.join(bin_mask_dir, base_name + '.png'), camera_intrs.frame) mask_im = mask_im.resize(depth_im.shape[:2]) depth_im = depth_im.mask_binary(mask_im) point_cloud = camera_intrs.deproject(depth_im) point_cloud.remove_zero_points() pcl_cloud = pcl.PointCloud(point_cloud.data.T.astype(np.float32)) tree = pcl_cloud.make_kdtree() if detector_type == 'euclidean': segmentor = pcl_cloud.make_EuclideanClusterExtraction() segmentor.set_ClusterTolerance(config['tolerance']) elif detector_type == 'region_growing': segmentor = pcl_cloud.make_RegionGrowing(ksearch=50) segmentor.set_NumberOfNeighbours(config['n_neighbors']) segmentor.set_CurvatureThreshold(config['curvature']) segmentor.set_SmoothnessThreshold(config['smoothness']) else: print('PCL detector type not supported') exit() segmentor.set_MinClusterSize(config['min_cluster_size']) segmentor.set_MaxClusterSize(config['max_cluster_size']) segmentor.set_SearchMethod(tree) cluster_indices = segmentor.Extract() # Set up predicted masks and metadata indiv_pred_masks = [] r_info = { 'rois': [], 'scores': [], 'class_ids': [], } for i, cluster in enumerate(cluster_indices): points = pcl_cloud.to_array()[cluster] indiv_pred_mask = camera_intrs.project_to_image( PointCloud(points.T, frame=camera_intrs.frame)).to_binary() indiv_pred_mask.data[indiv_pred_mask.data > 0] = 1 indiv_pred_masks.append(indiv_pred_mask.data) # Compute bounding box, score, class_id nonzero_pix = np.nonzero(indiv_pred_mask.data) min_x, max_x = np.min(nonzero_pix[1]), np.max(nonzero_pix[1]) min_y, max_y = np.min(nonzero_pix[0]), np.max(nonzero_pix[0]) r_info['rois'].append([min_y, min_x, max_y, max_x]) r_info['scores'].append(1.0) r_info['class_ids'].append(1) r_info['rois'] = np.array(r_info['rois']) r_info['scores'] = np.array(r_info['scores']) r_info['class_ids'] = np.array(r_info['class_ids']) # Write the predicted masks and metadata if indiv_pred_masks: pred_mask_output = np.stack(indiv_pred_masks).astype(np.uint8) else: pred_mask_output = np.array(indiv_pred_masks).astype(np.uint8) # Save out ground-truth mask as array of shape (n, h, w) indiv_gt_masks = [] gt_mask = cv2.imread(os.path.join(gt_mask_dir, base_name + '.png')) gt_mask = cv2.resize(gt_mask, (depth_im.shape[1], depth_im.shape[0])).astype( np.uint8)[:, :, 0] num_gt_masks = np.max(gt_mask) for i in range(1, num_gt_masks + 1): indiv_gt_mask = (gt_mask == i) if np.any(indiv_gt_mask): indiv_gt_masks.append(gt_mask == i) gt_mask_output = np.stack(indiv_gt_masks) np.save(os.path.join(resized_segmask_dir, output_name + '.npy'), gt_mask_output) np.save(os.path.join(pred_dir, output_name + '.npy'), pred_mask_output) np.save(os.path.join(pred_info_dir, output_name + '.npy'), r_info) print('Saved prediction masks to:\t {}'.format(pred_dir)) print('Saved prediction info (bboxes, scores, classes) to:\t {}'.format( pred_info_dir)) print('Saved transformed GT segmasks to:\t {}'.format(resized_segmask_dir)) return pred_dir, pred_info_dir, resized_segmask_dir
def generate_segmask_dataset(output_dataset_path, config, save_tensors=True, warm_start=False): """ Generate a segmentation training dataset Parameters ---------- dataset_path : str path to store the dataset config : dict dictionary-like objects containing parameters of the simulator and visualization save_tensors : bool save tensor datasets (for recreating state) warm_start : bool restart dataset generation from a previous state """ # read subconfigs dataset_config = config['dataset'] image_config = config['images'] vis_config = config['vis'] # debugging debug = config['debug'] if debug: np.random.seed(SEED) # read general parameters num_states = config['num_states'] num_images_per_state = config['num_images_per_state'] states_per_flush = config['states_per_flush'] states_per_garbage_collect = config['states_per_garbage_collect'] # set max obj per state max_objs_per_state = config['state_space']['heap']['max_objs'] # read image parameters im_height = config['state_space']['camera']['im_height'] im_width = config['state_space']['camera']['im_width'] segmask_channels = max_objs_per_state + 1 # create the dataset path and all subfolders if they don't exist if not os.path.exists(output_dataset_path): os.mkdir(output_dataset_path) image_dir = os.path.join(output_dataset_path, 'images') if not os.path.exists(image_dir): os.mkdir(image_dir) color_dir = os.path.join(image_dir, 'color_ims') if image_config['color'] and not os.path.exists(color_dir): os.mkdir(color_dir) depth_dir = os.path.join(image_dir, 'depth_ims') if image_config['depth'] and not os.path.exists(depth_dir): os.mkdir(depth_dir) amodal_dir = os.path.join(image_dir, 'amodal_masks') if image_config['amodal'] and not os.path.exists(amodal_dir): os.mkdir(amodal_dir) modal_dir = os.path.join(image_dir, 'modal_masks') if image_config['modal'] and not os.path.exists(modal_dir): os.mkdir(modal_dir) semantic_dir = os.path.join(image_dir, 'semantic_masks') if image_config['semantic'] and not os.path.exists(semantic_dir): os.mkdir(semantic_dir) # setup logging experiment_log_filename = os.path.join(output_dataset_path, 'dataset_generation.log') if os.path.exists(experiment_log_filename) and not warm_start: os.remove(experiment_log_filename) Logger.add_log_file(logger, experiment_log_filename, global_log_file=True) config.save( os.path.join(output_dataset_path, 'dataset_generation_params.yaml')) metadata = {} num_prev_states = 0 # set dataset params if save_tensors: # read dataset subconfigs state_dataset_config = dataset_config['states'] image_dataset_config = dataset_config['images'] state_tensor_config = state_dataset_config['tensors'] image_tensor_config = image_dataset_config['tensors'] obj_pose_dim = POSE_DIM * max_objs_per_state obj_com_dim = POINT_DIM * max_objs_per_state state_tensor_config['fields']['obj_poses']['height'] = obj_pose_dim state_tensor_config['fields']['obj_coms']['height'] = obj_com_dim state_tensor_config['fields']['obj_ids']['height'] = max_objs_per_state image_tensor_config['fields']['camera_pose']['height'] = POSE_DIM if image_config['color']: image_tensor_config['fields']['color_im'] = { 'dtype': 'uint8', 'channels': 3, 'height': im_height, 'width': im_width } if image_config['depth']: image_tensor_config['fields']['depth_im'] = { 'dtype': 'float32', 'channels': 1, 'height': im_height, 'width': im_width } if image_config['modal']: image_tensor_config['fields']['modal_segmasks'] = { 'dtype': 'uint8', 'channels': segmask_channels, 'height': im_height, 'width': im_width } if image_config['amodal']: image_tensor_config['fields']['amodal_segmasks'] = { 'dtype': 'uint8', 'channels': segmask_channels, 'height': im_height, 'width': im_width } if image_config['semantic']: image_tensor_config['fields']['semantic_segmasks'] = { 'dtype': 'uint8', 'channels': 1, 'height': im_height, 'width': im_width } # create dataset filenames state_dataset_path = os.path.join(output_dataset_path, 'state_tensors') image_dataset_path = os.path.join(output_dataset_path, 'image_tensors') if warm_start: if not os.path.exists(state_dataset_path) or not os.path.exists( image_dataset_path): logger.error( 'Attempting to warm start without saved tensor dataset') exit(1) # open datasets logger.info('Opening state dataset') state_dataset = TensorDataset.open(state_dataset_path, access_mode='READ_WRITE') logger.info('Opening image dataset') image_dataset = TensorDataset.open(image_dataset_path, access_mode='READ_WRITE') # read configs state_tensor_config = state_dataset.config image_tensor_config = image_dataset.config # clean up datasets (there may be datapoints with indices corresponding to non-existent data) num_state_datapoints = state_dataset.num_datapoints num_image_datapoints = image_dataset.num_datapoints num_prev_states = num_state_datapoints # clean up images image_ind = num_image_datapoints - 1 image_datapoint = image_dataset[image_ind] while image_ind > 0 and image_datapoint[ 'state_ind'] >= num_state_datapoints: image_ind -= 1 image_datapoint = image_dataset[image_ind] images_to_remove = num_image_datapoints - 1 - image_ind logger.info('Deleting last %d image tensors' % (images_to_remove)) if images_to_remove > 0: image_dataset.delete_last(images_to_remove) num_image_datapoints = image_dataset.num_datapoints else: # create datasets from scratch logger.info('Creating datasets') state_dataset = TensorDataset(state_dataset_path, state_tensor_config) image_dataset = TensorDataset(image_dataset_path, image_tensor_config) # read templates state_datapoint = state_dataset.datapoint_template image_datapoint = image_dataset.datapoint_template if warm_start: if not os.path.exists( os.path.join(output_dataset_path, 'metadata.json')): logger.error( 'Attempting to warm start without previously created dataset') exit(1) # Read metadata and indices metadata = json.load( open(os.path.join(output_dataset_path, 'metadata.json'), 'r')) test_inds = np.load(os.path.join(image_dir, 'test_indices.npy')).tolist() train_inds = np.load(os.path.join(image_dir, 'train_indices.npy')).tolist() # set obj ids and splits reverse_obj_ids = metadata['obj_ids'] obj_id_map = utils.reverse_dictionary(reverse_obj_ids) obj_splits = metadata['obj_splits'] obj_keys = obj_splits.keys() mesh_filenames = metadata['meshes'] # Get list of images generated so far generated_images = sorted( os.listdir(color_dir)) if image_config['color'] else sorted( os.listdir(depth_dir)) num_total_images = len(generated_images) # Do our own calculation if no saved tensors if num_prev_states == 0: num_prev_states = num_total_images // num_images_per_state # Find images to remove and remove them from all relevant places if they exist num_images_to_remove = num_total_images - (num_prev_states * num_images_per_state) logger.info( 'Deleting last {} invalid images'.format(num_images_to_remove)) for k in range(num_images_to_remove): im_name = generated_images[-(k + 1)] im_basename = os.path.splitext(im_name)[0] im_ind = int(im_basename.split('_')[1]) if os.path.exists(os.path.join(depth_dir, im_name)): os.remove(os.path.join(depth_dir, im_name)) if os.path.exists(os.path.join(color_dir, im_name)): os.remove(os.path.join(color_dir, im_name)) if os.path.exists(os.path.join(semantic_dir, im_name)): os.remove(os.path.join(semantic_dir, im_name)) if os.path.exists(os.path.join(modal_dir, im_basename)): shutil.rmtree(os.path.join(modal_dir, im_basename)) if os.path.exists(os.path.join(amodal_dir, im_basename)): shutil.rmtree(os.path.join(amodal_dir, im_basename)) if im_ind in train_inds: train_inds.remove(im_ind) elif im_ind in test_inds: test_inds.remove(im_ind) else: # Create initial env to generate metadata env = BinHeapEnv(config) obj_id_map = env.state_space.obj_id_map obj_keys = env.state_space.obj_keys obj_splits = env.state_space.obj_splits mesh_filenames = env.state_space.mesh_filenames save_obj_id_map = obj_id_map.copy() save_obj_id_map[ENVIRONMENT_KEY] = np.iinfo(np.uint32).max reverse_obj_ids = utils.reverse_dictionary(save_obj_id_map) metadata['obj_ids'] = reverse_obj_ids metadata['obj_splits'] = obj_splits metadata['meshes'] = mesh_filenames json.dump(metadata, open(os.path.join(output_dataset_path, 'metadata.json'), 'w'), indent=JSON_INDENT, sort_keys=True) train_inds = [] test_inds = [] # generate states and images state_id = num_prev_states while state_id < num_states: # create env and set objects create_start = time.time() env = BinHeapEnv(config) env.state_space.obj_id_map = obj_id_map env.state_space.obj_keys = obj_keys env.state_space.set_splits(obj_splits) env.state_space.mesh_filenames = mesh_filenames create_stop = time.time() logger.info('Creating env took %.3f sec' % (create_stop - create_start)) # sample states states_remaining = num_states - state_id for i in range(min(states_per_garbage_collect, states_remaining)): # log current rollout if state_id % config['log_rate'] == 0: logger.info('State: %06d' % (state_id)) try: # reset env env.reset() state = env.state split = state.metadata['split'] # render state if vis_config['state']: env.view_3d_scene() # Save state if desired if save_tensors: # set obj state variables obj_pose_vec = np.zeros(obj_pose_dim) obj_com_vec = np.zeros(obj_com_dim) obj_id_vec = np.iinfo( np.uint32).max * np.ones(max_objs_per_state) j = 0 for obj_state in state.obj_states: obj_pose_vec[j * POSE_DIM:(j + 1) * POSE_DIM] = obj_state.pose.vec obj_com_vec[j * POINT_DIM:(j + 1) * POINT_DIM] = obj_state.center_of_mass obj_id_vec[j] = int(obj_id_map[obj_state.key]) j += 1 # store datapoint env params state_datapoint['state_id'] = state_id state_datapoint['obj_poses'] = obj_pose_vec state_datapoint['obj_coms'] = obj_com_vec state_datapoint['obj_ids'] = obj_id_vec state_datapoint['split'] = split # store state datapoint image_start_ind = image_dataset.num_datapoints image_end_ind = image_start_ind + num_images_per_state state_datapoint['image_start_ind'] = image_start_ind state_datapoint['image_end_ind'] = image_end_ind # clean up del obj_pose_vec del obj_com_vec del obj_id_vec # add state state_dataset.add(state_datapoint) # render images for k in range(num_images_per_state): # reset the camera if num_images_per_state > 1: env.reset_camera() obs = env.render_camera_image(color=image_config['color']) if image_config['color']: color_obs, depth_obs = obs else: depth_obs = obs # vis obs if vis_config['obs']: if image_config['depth']: plt.figure() plt.imshow(depth_obs) plt.title('Depth Observation') if image_config['color']: plt.figure() plt.imshow(color_obs) plt.title('Color Observation') plt.show() if image_config['modal'] or image_config[ 'amodal'] or image_config['semantic']: # render segmasks amodal_segmasks, modal_segmasks = env.render_segmentation_images( ) # retrieve segmask data modal_segmask_arr = np.iinfo(np.uint8).max * np.ones( [im_height, im_width, segmask_channels], dtype=np.uint8) amodal_segmask_arr = np.iinfo(np.uint8).max * np.ones( [im_height, im_width, segmask_channels], dtype=np.uint8) stacked_segmask_arr = np.zeros( [im_height, im_width, 1], dtype=np.uint8) modal_segmask_arr[:, :, :env. num_objects] = modal_segmasks amodal_segmask_arr[:, :, :env. num_objects] = amodal_segmasks if image_config['semantic']: for j in range(env.num_objects): this_obj_px = np.where( modal_segmasks[:, :, j] > 0) stacked_segmask_arr[this_obj_px[0], this_obj_px[1], 0] = j + 1 # visualize if vis_config['semantic']: plt.figure() plt.imshow(stacked_segmask_arr.squeeze()) plt.show() if save_tensors: # save image data as tensors if image_config['color']: image_datapoint['color_im'] = color_obs if image_config['depth']: image_datapoint['depth_im'] = depth_obs[:, :, None] if image_config['modal']: image_datapoint[ 'modal_segmasks'] = modal_segmask_arr if image_config['amodal']: image_datapoint[ 'amodal_segmasks'] = amodal_segmask_arr if image_config['semantic']: image_datapoint[ 'semantic_segmasks'] = stacked_segmask_arr image_datapoint['camera_pose'] = env.camera.pose.vec image_datapoint[ 'camera_intrs'] = env.camera.intrinsics.vec image_datapoint['state_ind'] = state_id image_datapoint['split'] = split # add image image_dataset.add(image_datapoint) # Save depth image and semantic masks if image_config['color']: ColorImage(color_obs).save( os.path.join( color_dir, 'image_{:06d}.png'.format( num_images_per_state * state_id + k))) if image_config['depth']: DepthImage(depth_obs).save( os.path.join( depth_dir, 'image_{:06d}.png'.format( num_images_per_state * state_id + k))) if image_config['modal']: modal_id_dir = os.path.join( modal_dir, 'image_{:06d}'.format(num_images_per_state * state_id + k)) if not os.path.exists(modal_id_dir): os.mkdir(modal_id_dir) for i in range(env.num_objects): BinaryImage(modal_segmask_arr[:, :, i]).save( os.path.join(modal_id_dir, 'channel_{:03d}.png'.format(i))) if image_config['amodal']: amodal_id_dir = os.path.join( amodal_dir, 'image_{:06d}'.format(num_images_per_state * state_id + k)) if not os.path.exists(amodal_id_dir): os.mkdir(amodal_id_dir) for i in range(env.num_objects): BinaryImage(amodal_segmask_arr[:, :, i]).save( os.path.join(amodal_id_dir, 'channel_{:03d}.png'.format(i))) if image_config['semantic']: GrayscaleImage(stacked_segmask_arr.squeeze()).save( os.path.join( semantic_dir, 'image_{:06d}.png'.format( num_images_per_state * state_id + k))) # Save split if split == TRAIN_ID: train_inds.append(num_images_per_state * state_id + k) else: test_inds.append(num_images_per_state * state_id + k) # auto-flush after every so many timesteps if state_id % states_per_flush == 0: np.save(os.path.join(image_dir, 'train_indices.npy'), train_inds) np.save(os.path.join(image_dir, 'test_indices.npy'), test_inds) if save_tensors: state_dataset.flush() image_dataset.flush() # delete action objects for obj_state in state.obj_states: del obj_state del state gc.collect() # update state id state_id += 1 except Exception as e: # log an error logger.warning('Heap failed!') logger.warning('%s' % (str(e))) logger.warning(traceback.print_exc()) if debug: raise del env gc.collect() env = BinHeapEnv(config) env.state_space.obj_id_map = obj_id_map env.state_space.obj_keys = obj_keys env.state_space.set_splits(obj_splits) env.state_space.mesh_filenames = mesh_filenames # garbage collect del env gc.collect() # write all datasets to file, save indices np.save(os.path.join(image_dir, 'train_indices.npy'), train_inds) np.save(os.path.join(image_dir, 'test_indices.npy'), test_inds) if save_tensors: state_dataset.flush() image_dataset.flush() logger.info('Generated %d image datapoints' % (state_id * num_images_per_state))
def wrapped_images(self, mesh, object_to_camera_poses, render_mode, stable_pose=None, mat_props=None, light_props=None,debug=False): """Create ObjectRender objects of the given mesh at the list of object to camera poses. Parameters ---------- mesh : :obj:`Mesh3D` The mesh to be rendered. object_to_camera_poses : :obj:`list` of :obj:`RigidTransform` A list of object to camera transforms to render from. render_mode : int One of RenderMode.COLOR, RenderMode.DEPTH, or RenderMode.SCALED_DEPTH. stable_pose : :obj:`StablePose` A stable pose to render the object in. mat_props : :obj:`MaterialProperties` Material properties for the mesh light_props : :obj:`MaterialProperties` Lighting properties for the scene debug : bool Whether or not to debug the C++ meshrendering code. Returns ------- :obj:`list` of :obj:`ObjectRender` A list of ObjectRender objects generated from the given parameters. """ # pre-multiply the stable pose world_to_camera_poses = [T_obj_camera.as_frames('obj', 'camera') for T_obj_camera in object_to_camera_poses] if stable_pose is not None: t_obj_stp = np.array([0,0,-stable_pose.r.dot(stable_pose.x0)[2]]) T_obj_stp = RigidTransform(rotation=stable_pose.r, translation=t_obj_stp, from_frame='obj', to_frame='stp') stp_to_camera_poses = copy.copy(object_to_camera_poses) object_to_camera_poses = [] for T_stp_camera in stp_to_camera_poses: T_stp_camera.from_frame = 'stp' object_to_camera_poses.append(T_stp_camera.dot(T_obj_stp)) # set lighting mode enable_lighting = True if render_mode == RenderMode.SEGMASK or render_mode == RenderMode.DEPTH or \ render_mode == RenderMode.DEPTH_SCENE: enable_lighting = False # render both image types (doesn't really cost any time) color_ims, depth_ims = self.images(mesh, object_to_camera_poses, mat_props=mat_props, light_props=light_props, enable_lighting=enable_lighting, debug=debug) # convert to image wrapper classes images = [] if render_mode == RenderMode.SEGMASK: # wrap binary images for binary_im in color_ims: images.append(BinaryImage(binary_im[:,:,0], frame=self._camera_intr.frame, threshold=0)) elif render_mode == RenderMode.COLOR: # wrap color images for color_im in color_ims: images.append(ColorImage(color_im, frame=self._camera_intr.frame)) elif render_mode == RenderMode.COLOR_SCENE: # wrap color and depth images for color_im in color_ims: images.append(ColorImage(color_im, frame=self._camera_intr.frame)) # render images of scene objects color_scene_ims = {} for name, scene_obj in self._scene.iteritems(): scene_object_to_camera_poses = [] for world_to_camera_pose in world_to_camera_poses: scene_object_to_camera_poses.append(world_to_camera_pose * scene_obj.T_mesh_world) color_scene_ims[name] = self.wrapped_images(scene_obj.mesh, scene_object_to_camera_poses, RenderMode.COLOR, mat_props=scene_obj.mat_props, light_props=light_props, debug=debug) # combine with scene images # TODO: re-add farther for i in range(len(images)): for j, name in enumerate(color_scene_ims.keys()): zero_px = images[i].zero_pixels() images[i].data[zero_px[:,0], zero_px[:,1], :] = color_scene_ims[name][i].image.data[zero_px[:,0], zero_px[:,1], :] elif render_mode == RenderMode.DEPTH: # render depth image for depth_im in depth_ims: images.append(DepthImage(depth_im, frame=self._camera_intr.frame)) elif render_mode == RenderMode.DEPTH_SCENE: # create empty depth images for depth_im in depth_ims: images.append(DepthImage(depth_im, frame=self._camera_intr.frame)) # render images of scene objects depth_scene_ims = {} for name, scene_obj in self._scene.iteritems(): scene_object_to_camera_poses = [] for world_to_camera_pose in world_to_camera_poses: scene_object_to_camera_poses.append(world_to_camera_pose * scene_obj.T_mesh_world) depth_scene_ims[name] = self.wrapped_images(scene_obj.mesh, scene_object_to_camera_poses, RenderMode.DEPTH, mat_props=scene_obj.mat_props, light_props=light_props) # combine with scene images for i in range(len(images)): for j, name in enumerate(depth_scene_ims.keys()): images[i] = images[i].combine_with(depth_scene_ims[name][i].image) elif render_mode == RenderMode.RGBD: # create RGB-D images for color_im, depth_im in zip(color_ims, depth_ims): c = ColorImage(color_im, frame=self._camera_intr.frame) d = DepthImage(depth_im, frame=self._camera_intr.frame) images.append(RgbdImage.from_color_and_depth(c, d)) elif render_mode == RenderMode.RGBD_SCENE: # create RGB-D images for color_im, depth_im in zip(color_ims, depth_ims): c = ColorImage(color_im, frame=self._camera_intr.frame) d = DepthImage(depth_im, frame=self._camera_intr.frame) images.append(RgbdImage.from_color_and_depth(c, d)) # render images of scene objects rgbd_scene_ims = {} for name, scene_obj in self._scene.iteritems(): scene_object_to_camera_poses = [] for world_to_camera_pose in world_to_camera_poses: scene_object_to_camera_poses.append(world_to_camera_pose * scene_obj.T_mesh_world) rgbd_scene_ims[name] = self.wrapped_images(scene_obj.mesh, scene_object_to_camera_poses, RenderMode.RGBD, mat_props=scene_obj.mat_props, light_props=light_props, debug=debug) # combine with scene images for i in range(len(images)): for j, name in enumerate(rgbd_scene_ims.keys()): images[i] = images[i].combine_with(rgbd_scene_ims[name][i].image) elif render_mode == RenderMode.SCALED_DEPTH: # convert to color image for depth_im in depth_ims: d = DepthImage(depth_im, frame=self._camera_intr.frame) images.append(d.to_color()) else: raise ValueError('Render mode %s not supported' %(render_mode)) # create object renders if stable_pose is not None: object_to_camera_poses = copy.copy(stp_to_camera_poses) rendered_images = [] for image, T_obj_camera in zip(images, object_to_camera_poses): T_camera_obj = T_obj_camera.inverse() rendered_images.append(ObjectRender(image, T_camera_obj)) return rendered_images
def visualize_tensor_dataset(dataset, config): """ Visualizes a Tensor dataset. Parameters ---------- dataset : :obj:`TensorDataset` dataset to visualize config : :obj:`autolab_core.YamlConfig` parameters for visualization Notes ----- Required parameters of config are specified in Other Parameters Other Parameters ---------------- field_name : str name of the field in the TensorDataset to visualize (defaults to depth_ims_tf_table, which is a single view point cloud of the object on a table) field_type : str type of image that the field name correspondes to (defaults to depth, can also be `segmask` if using the field `object_masks`) print_fields : :obj:`list` of str names of additiona fields to print to the command line filter : :obj:`dict` mapping str to :obj:`dict` contraints that all displayed datapoints must satisfy (supports any univariate field name as a key and numeric thresholds) gripper_width_px : float width of the gripper to plot in pixels font_size : int size of font on the rendered images """ # shuffle the tensor indices indices = dataset.datapoint_indices np.random.shuffle(indices) # read config field_name = config['field_name'] field_type = config['field_type'] font_size = config['font_size'] print_fields = config['print_fields'] gripper_width_px = config['gripper_width_px'] num = 0 for i, ind in enumerate(indices): datapoint = dataset[ind] data = datapoint[field_name] if field_type == RenderMode.SEGMASK: image = BinaryImage(data) elif field_type == RenderMode.DEPTH: image = DepthImage(data) else: raise ValueError('Field type %s not supported!' % (field_type)) skip_datapoint = False for f, filter_cfg in config['filter'].iteritems(): data = datapoint[f] if 'greater_than' in filter_cfg.keys( ) and data < filter_cfg['greater_than']: skip_datapoint = True break elif 'less_than' in filter_cfg.keys( ) and data > filter_cfg['less_than']: skip_datapoint = True break if skip_datapoint: continue logging.info('DATAPOINT %d' % (num)) for f in print_fields: data = datapoint[f] logging.info('Field %s:' % (f)) print data grasp_2d = Grasp2D(Point(image.center), 0, datapoint['hand_poses'][2]) vis2d.figure() if field_type == RenderMode.RGBD: vis2d.subplot(1, 2, 1) vis2d.imshow(image.color) vis2d.grasp(grasp_2d, width=gripper_width_px) vis2d.subplot(1, 2, 2) vis2d.imshow(image.depth) vis2d.grasp(grasp_2d, width=gripper_width_px) elif field_type == RenderMode.GD: vis2d.subplot(1, 2, 1) vis2d.imshow(image.gray) vis2d.grasp(grasp_2d, width=gripper_width_px) vis2d.subplot(1, 2, 2) vis2d.imshow(image.depth) vis2d.grasp(grasp_2d, width=gripper_width_px) else: vis2d.imshow(image) vis2d.grasp(grasp_2d, width=gripper_width_px) vis2d.title('Datapoint %d: %s' % (ind, field_type)) vis2d.show() num += 1
depth_im_filename = args.depth_im_filename camera_intr_filename = args.camera_intr_filename config_filename = args.config_filename # read config config = YamlConfig(config_filename) policy_config = config['policy'] # read image depth_im_arr = pkl.load(open(depth_im_filename, 'r')) depth_im = DepthImage(depth_im_arr) color_im = ColorImage( np.zeros([depth_im.height, depth_im.width, 3]).astype(np.uint8)) rgbd_im = RgbdImage.from_color_and_depth(color_im, depth_im) segmask_arr = 255 * (depth_im_arr < 1.0) segmask = BinaryImage(segmask_arr.astype(np.uint8)) camera_intr = CameraIntrinsics.load(camera_intr_filename) state = RgbdImageState(rgbd_im, camera_intr, segmask=segmask) # init policy policy = UniformRandomGraspingPolicy(policy_config) policy_start = time.time() action = policy(state) logging.info('Planning took %.3f sec' % (time.time() - policy_start)) # vis final grasp if policy_config['vis']['final_grasp']: vis.figure(size=(10, 10)) vis.imshow(rgbd_im.depth, vmin=policy_config['vis']['vmin'], vmax=policy_config['vis']['vmax'])
def wrapped_render(self, render_modes, front_and_back=False): """Render images of the scene and wrap them with Image wrapper classes from the Berkeley AUTOLab's perception module. Parameters ---------- render_modes : list of perception.RenderMode A list of the desired image types to return, from the perception module's RenderMode enum. front_and_back : bool If True, all surface normals are treated as if they're facing the camera. Returns ------- list of perception.Image A list containing a corresponding Image sub-class for each type listed in render_modes. """ # Render raw images render_color = False for mode in render_modes: if mode != RenderMode.DEPTH and mode != RenderMode.SCALED_DEPTH: render_color = True break color_im, depth_im = None, None if render_color: color_im, depth_im = self.render(render_color, front_and_back=front_and_back) else: depth_im = self.render(render_color) # For each specified render mode, add an Image object of the appropriate type images = [] for render_mode in render_modes: # Then, convert them to an image wrapper class if render_mode == RenderMode.SEGMASK: images.append( BinaryImage((depth_im > 0.0).astype(np.uint8), frame=self.camera.intrinsics.frame, threshold=0)) elif render_mode == RenderMode.COLOR: images.append( ColorImage(color_im, frame=self.camera.intrinsics.frame)) elif render_mode == RenderMode.GRAYSCALE: images.append( ColorImage( color_im, frame=self.camera.intrinsics.frame).to_grayscale()) elif render_mode == RenderMode.DEPTH: images.append( DepthImage(depth_im, frame=self.camera.intrinsics.frame)) elif render_mode == RenderMode.SCALED_DEPTH: images.append( DepthImage(depth_im, frame=self.camera.intrinsics.frame).to_color()) elif render_mode == RenderMode.RGBD: c = ColorImage(color_im, frame=self.camera.intrinsics.frame) d = DepthImage(depth_im, frame=self.camera.intrinsics.frame) images.append(RgbdImage.from_color_and_depth(c, d)) elif render_mode == RenderMode.GD: g = ColorImage( color_im, frame=self.camera.intrinsics.frame).to_grayscale() d = DepthImage(depth_im, frame=self.camera.intrinsics.frame) images.append(GdImage.from_grayscale_and_depth(g, d)) else: raise ValueError( 'Render mode {} not supported'.format(render_mode)) return images
plan_grasp_segmask = rospy.ServiceProxy( '%s/grasp_planner_segmask' % (namespace), GQCNNGraspPlannerSegmask) cv_bridge = CvBridge() # setup sensor camera_intr = CameraIntrinsics.load(camera_intr_filename) # read images depth_im = DepthImage.open(depth_im_filename, frame=camera_intr.frame) color_im = ColorImage(np.zeros([depth_im.height, depth_im.width, 3]).astype(np.uint8), frame=camera_intr.frame) # read segmask if segmask_filename is not None: segmask = BinaryImage.open(segmask_filename, frame=camera_intr.frame) grasp_resp = plan_grasp_segmask(color_im.rosmsg, depth_im.rosmsg, camera_intr.rosmsg, segmask.rosmsg) else: grasp_resp = plan_grasp(color_im.rosmsg, depth_im.rosmsg, camera_intr.rosmsg) grasp = grasp_resp.grasp # convert to a grasp action grasp_type = grasp.grasp_type if grasp_type == GQCNNGrasp.PARALLEL_JAW: center = Point(np.array([grasp.center_px[0], grasp.center_px[1]]), frame=camera_intr.frame) grasp_2d = Grasp2D(center, grasp.angle, grasp.depth,
def _plan_grasp(self, color_im, depth_im, camera_intr, bounding_box=None, segmask=None): """Grasp planner request handler. Parameters --------- req: :obj:`ROS ServiceRequest` ROS `ServiceRequest` for grasp planner service. """ rospy.loginfo("Planning Grasp") # Inpaint images. color_im = color_im.inpaint( rescale_factor=self.cfg["inpaint_rescale_factor"]) depth_im = depth_im.inpaint( rescale_factor=self.cfg["inpaint_rescale_factor"]) # Init segmask. if segmask is None: segmask = BinaryImage(255 * np.ones(depth_im.shape).astype(np.uint8), frame=color_im.frame) # Visualize. if self.cfg["vis"]["color_image"]: vis.imshow(color_im) vis.show() if self.cfg["vis"]["depth_image"]: vis.imshow(depth_im) vis.show() if self.cfg["vis"]["segmask"] and segmask is not None: vis.imshow(segmask) vis.show() # Aggregate color and depth images into a single # BerkeleyAutomation/perception `RgbdImage`. rgbd_im = RgbdImage.from_color_and_depth(color_im, depth_im) # Mask bounding box. if bounding_box is not None: # Calc bb parameters. min_x = bounding_box.minX min_y = bounding_box.minY max_x = bounding_box.maxX max_y = bounding_box.maxY # Contain box to image->don't let it exceed image height/width # bounds. if min_x < 0: min_x = 0 if min_y < 0: min_y = 0 if max_x > rgbd_im.width: max_x = rgbd_im.width if max_y > rgbd_im.height: max_y = rgbd_im.height # Mask. bb_segmask_arr = np.zeros([rgbd_im.height, rgbd_im.width]) bb_segmask_arr[min_y:max_y, min_x:max_x] = 255 bb_segmask = BinaryImage(bb_segmask_arr.astype(np.uint8), segmask.frame) segmask = segmask.mask_binary(bb_segmask) # Visualize. if self.cfg["vis"]["rgbd_state"]: masked_rgbd_im = rgbd_im.mask_binary(segmask) vis.figure() vis.subplot(1, 2, 1) vis.imshow(masked_rgbd_im.color) vis.subplot(1, 2, 2) vis.imshow(masked_rgbd_im.depth) vis.show() # Create an `RgbdImageState` with the cropped `RgbdImage` and # `CameraIntrinsics`. rgbd_state = RgbdImageState(rgbd_im, camera_intr, segmask=segmask) # Execute policy. try: return self.execute_policy(rgbd_state, self.grasping_policy, self.grasp_pose_publisher, camera_intr.frame) except NoValidGraspsException: rospy.logerr( ("While executing policy found no valid grasps from sampled" " antipodal point pairs. Aborting Policy!")) raise rospy.ServiceException( ("While executing policy found no valid grasps from sampled" " antipodal point pairs. Aborting Policy!"))
frame=camera_intr.frame, encoding="bgr8") depth_im = DepthImage(depth_cvmat, frame=camera_intr.frame) # check image sizes. if (color_im.height != depth_im.height or color_im.width != depth_im.width): msg = ("Color image and depth image must be the same shape! Color" " is %d x %d but depth is %d x %d") % ( color_im.height, color_im.width, depth_im.height, depth_im.width) print(msg) # assume not mask is provided # segmask = depth_im.invalid_pixel_mask().inverse() segmask = BinaryImage(255 * np.ones(depth_im.shape).astype(np.uint8), frame=color_im.frame) # inpaint images. # color_im = color_im.inpaint( # rescale_factor=config["inpaint_rescale_factor"]) # depth_im = depth_im.inpaint( # rescale_factor=config["inpaint_rescale_factor"]) # Aggregate color and depth images into a single # BerkeleyAutomation/perception `RgbdImage`. rgbd_im = RgbdImage.from_color_and_depth(color_im, depth_im) state = RgbdImageState(rgbd_im, camera_intr, segmask=segmask) # vis.imshow(segmask) # vis.imshow(rgbd_im)
policy_config["metric"]["gqcnn_model"]) # Setup sensor. camera_intr = CameraIntrinsics.load(camera_intr_filename) # Read images. depth_data = np.load(depth_im_filename) depth_im = DepthImage(depth_data, frame=camera_intr.frame) color_im = ColorImage(np.zeros([depth_im.height, depth_im.width, 3]).astype(np.uint8), frame=camera_intr.frame) # Optionally read a segmask. segmask = None if segmask_filename is not None: segmask = BinaryImage.open(segmask_filename) valid_px_mask = depth_im.invalid_pixel_mask().inverse() if segmask is None: segmask = valid_px_mask else: segmask = segmask.mask_binary(valid_px_mask) # Inpaint. depth_im = depth_im.inpaint(rescale_factor=inpaint_rescale_factor) if "input_images" in policy_config["vis"] and policy_config["vis"][ "input_images"]: vis.figure(size=(10, 10)) num_plot = 1 if segmask is not None: num_plot = 2
def _compute_edge_mask(self, salient_edge_set, depth_im, ci, T_obj_camera): """Compute the edge mask for a given salient edge set, depth image, camera intrinsics, and object-to-camera transform. """ mesh = salient_edge_set.mesh edge_mask = salient_edge_set.edge_mask # Allocate array for storing mask di_mask = np.zeros(depth_im.data.shape, dtype=np.uint8) # --------------- # Compute edge endpoint coordinates in camera frame and up/down biases for each # --------------- m = mesh.copy().apply_transform(T_obj_camera.matrix) vertex_inds = m.face_adjacency_edges[edge_mask] endpoints_3d = m.vertices[vertex_inds] # For each edge, compute the face that we should be sampling most closely against vecs = (endpoints_3d[:,0] + endpoints_3d[:,1]) / 2.0 face_inds = m.face_adjacency[edge_mask] normals = m.face_normals[face_inds] dots = np.einsum('ijk,ik->ij', normals, vecs) valid = np.where(np.logical_not((dots > 0).all(1)))[0] vertex_inds = vertex_inds[valid] # Find "other" vertex per edge of interest for i in range(len(vertex_inds)): # Extract other vertex end endpoint_inds = vertex_inds[i] # Project vertices down to 2D endpoints = m.vertices[endpoint_inds] points = endpoints.T points_proj = ci._K.dot(points) point_depths = np.tile(points_proj[2,:], [3, 1]) points_proj = np.divide(points_proj, point_depths).T[:,:2] point_depths = point_depths[0][:2] endpoints = points_proj[:2] delta = endpoints[1] - endpoints[0] dy = np.abs(delta[1]) dx = np.abs(delta[0]) depth_offset=5e-3 # Move along y if dy > dx: order = np.argsort(endpoints[:,1]) endpoints = endpoints[order] depths = point_depths[order] # x = my + b delta = endpoints[1] - endpoints[0] slope = delta[0] / delta[1] intercept = endpoints[0][0] - slope * endpoints[0][1] # Move along y axis y = int(endpoints[0][1]) + 1 y_end = int(endpoints[1][1]) while y <= y_end: if y < 0 or y >= di_mask.shape[0]: y += 1 continue exp_x = slope * y + intercept x = int(exp_x) xs_to_attempt = [x, x+1, x-1, x+2, x-2] exp_depth = ((y - endpoints[0][1]) / delta[1]) * (depths[1] - depths[0]) + depths[0] for x in xs_to_attempt: if x < 0 or x >= di_mask.shape[1]: continue depth = depth_im.data[y,x] if np.abs(depth - exp_depth) < depth_offset: di_mask[y][x] = 0xFF break y += 1 else: order = np.argsort(endpoints[:,0]) endpoints = endpoints[order] depths = point_depths[order] # y = mx + b delta = endpoints[1] - endpoints[0] slope = delta[1] / delta[0] intercept = endpoints[0][1] - slope * endpoints[0][0] # Move along y axis x = int(endpoints[0][0]) + 1 x_end = int(endpoints[1][0]) while x <= x_end: if x < 0 or x >= di_mask.shape[1]: x += 1 continue exp_y = slope * x + intercept y = int(exp_y) ys_to_attempt = [y, y+1, y-1, y+2, y-2] exp_depth = ((x - endpoints[0][0]) / delta[0]) * (depths[1] - depths[0]) + depths[0] for y in ys_to_attempt: if y < 0 or y >= di_mask.shape[0]: continue depth = depth_im.data[y,x] if np.abs(depth - exp_depth) < depth_offset: di_mask[y][x] = 0xFF break x += 1 return BinaryImage(di_mask)