示例#1
0
def inpaint_depth_image(d_img, ix=0, iy=0):
    """Inpaint depth image on raw depth values.

    Only import code here to avoid making them required if we're not inpainting.

    Also, inpainting is slow, so crop some irrelevant values. But BE CAREFUL!
    Make sure any cropping here will lead to logical consistency with the
    processing in `camera.process_img_for_net` later. For now we crop the 'later
    part' of each dimension, which still leads to > 2x speed-up. The
    window size is 3 which I think means we can get away with a pixel difference
    of 3 when cropping but to be safe let's add a bit more, 50 pix to each side.

    For `ix` and `iy` see `camera.process_img_for_net`, makes inpainting faster.
    """
    d_img = d_img[ix:685, iy:1130]
    from perception import (ColorImage, DepthImage)
    print('now in-painting the depth image (shape {}), ix, iy = {}, {}...'.
          format(d_img.shape, ix, iy))
    start_t = time.time()
    d_img = DepthImage(d_img)
    d_img = d_img.inpaint()  # inpaint, then get d_img right away
    d_img = d_img.data  # get raw data back from the class
    cum_t = time.time() - start_t
    print('finished in-painting in {:.2f} seconds'.format(cum_t))
    return d_img
示例#2
0
    def plan_grasp(self,
                   depth,
                   rgb,
                   resetting=False,
                   camera_intr=None,
                   segmask=None):
        """
        Computes possible grasps.
        Parameters
        ----------
        depth: type `numpy`
            depth image
        rgb: type `numpy`
            rgb image
        camera_intr: type `perception.CameraIntrinsics`
            Intrinsic camera object.
        segmask: type `perception.BinaryImage`
            Binary segmask of detected object
        Returns
        -------
        type `GQCNNGrasp`
            Computed optimal grasp.
        """
        if "FC" in self.model:
            assert not (segmask is
                        None), "Fully-Convolutional policy expects a segmask."
        if camera_intr is None:
            camera_intr_filename = os.path.join(
                os.path.dirname(os.path.realpath(__file__)),
                "gqcnn/data/calib/primesense/primesense.intr")
            camera_intr = CameraIntrinsics.load(camera_intr_filename)

        depth_im = DepthImage(depth, frame=camera_intr.frame)
        color_im = ColorImage(rgb, frame=camera_intr.frame)

        valid_px_mask = depth_im.invalid_pixel_mask().inverse()
        if segmask is None:
            segmask = valid_px_mask
        else:
            segmask = segmask.mask_binary(valid_px_mask)

        # Inpaint.
        depth_im = depth_im.inpaint(
            rescale_factor=self.config["inpaint_rescale_factor"])
        # Aggregate color and depth images into a single
        # BerkeleyAutomation/perception `RgbdImage`.
        self.rgbd_im = RgbdImage.from_color_and_depth(color_im, depth_im)
        # Create an `RgbdImageState` with the `RgbdImage` and `CameraIntrinsics`.
        state = RgbdImageState(self.rgbd_im, camera_intr, segmask=segmask)

        # Set input sizes for fully-convolutional policy.
        if "FC" in self.model:
            self.policy_config["metric"]["fully_conv_gqcnn_config"][
                "im_height"] = depth_im.shape[0]
            self.policy_config["metric"]["fully_conv_gqcnn_config"][
                "im_width"] = depth_im.shape[1]

        return self.execute_policy(state, resetting)
def depth_to_bin(img_file):
    # Load the image as a numpy array and the camera intrinsics
    image = np.load(img_file)
    # Create and deproject a depth image of the data using the camera intrinsics
    di = DepthImage(image, frame=ci.frame)
    di = di.inpaint()
    bi = di.to_binary(threshold=0.85)
    #vis2d.figure()
    #vis2d.imshow(di)
    #vis2d.imshow(bi)
    #vis2d.show()
    return bi
    def get_state(self, depth, segmask):
        # Read images.
        depth_im = DepthImage(depth, frame=self.camera_intr.frame)
        color_im = ColorImage(np.zeros([depth_im.height, depth_im.width,
                                        3]).astype(np.uint8),
                              frame=self.camera_intr.frame)
        segmask = BinaryImage(segmask.astype(np.uint8) * 255,
                              frame=self.camera_intr.frame)

        # Inpaint.
        depth_im = depth_im.inpaint(rescale_factor=self.inpaint_rescale_factor)

        # Create state.
        rgbd_im = RgbdImage.from_color_and_depth(color_im, depth_im)
        state = RgbdImageState(rgbd_im, self.camera_intr, segmask=segmask)
        return state, rgbd_im
def largest_planar_surface(filename):
    # Load the image as a numpy array and the camera intrinsics
    image = np.load(filename)
    # Create and deproject a depth image of the data using the camera intrinsics
    di = DepthImage(image, frame=ci.frame)
    di = di.inpaint()
    pc = ci.deproject(di)
    # Make a PCL type point cloud from the image
    p = pcl.PointCloud(pc.data.T.astype(np.float32))
    # Make a segmenter and segment the point cloud.
    seg = p.make_segmenter()
    seg.set_model_type(pcl.SACMODEL_PARALLEL_PLANE)
    seg.set_method_type(pcl.SAC_RANSAC)
    seg.set_distance_threshold(0.005)
    indices, model = seg.segment()
    return indices, model, image, pc
示例#6
0
    color_im = ColorImage(np.zeros([depth_im.height, depth_im.width,
                                    3]).astype(np.uint8),
                          frame=camera_intr.frame)

    # Optionally read a segmask.
    segmask = None
    if segmask_filename is not None:
        segmask = BinaryImage.open(segmask_filename)
    valid_px_mask = depth_im.invalid_pixel_mask().inverse()
    if segmask is None:
        segmask = valid_px_mask
    else:
        segmask = segmask.mask_binary(valid_px_mask)

    # Inpaint.
    depth_im = depth_im.inpaint(rescale_factor=inpaint_rescale_factor)

    if "input_images" in policy_config["vis"] and policy_config["vis"][
            "input_images"]:
        vis.figure(size=(10, 10))
        num_plot = 1
        if segmask is not None:
            num_plot = 2
        vis.subplot(1, num_plot, 1)
        vis.imshow(depth_im)
        if segmask is not None:
            vis.subplot(1, num_plot, 2)
            vis.imshow(segmask)
        vis.show()

    # Create state.
示例#7
0
grasp_policy = CrossEntropyRobustGraspingPolicy(cfg['policy'])
# grasp_policy = RobustGraspingPolicy(cfg['policy'])
#%%
img = rosco.rgb
d = rosco.depth
#%%
plt.imshow(img)
#%%
plt.imshow(d)
#print(img)
#print(d)
#%%
color_im = ColorImage(img.astype(np.uint8), encoding="bgr8", frame='pcl')
depth_im = DepthImage(d.astype(np.float32), frame='pcl')
color_im = color_im.inpaint(rescale_factor=cfg['inpaint_rescale_factor'])
depth_im = depth_im.inpaint(rescale_factor=cfg['inpaint_rescale_factor'])
rgbd_im = RgbdImage.from_color_and_depth(color_im, depth_im)
rgbd_state = RgbdImageState(rgbd_im, camera_int)
#%%
grasp = grasp_policy(rgbd_state)
#%%
img2 = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img2 = cv2.circle(img2,(int(grasp.grasp.center.x),int(grasp.grasp.center.y)),2,(255,0,0),3)
plt.imshow(img2)

#%%
img3 = cv2.cvtColor(d, cv2.COLOR_BGR2RGB)
img3 = cv2.circle(img3,(int(grasp.grasp.center.x),int(grasp.grasp.center.y)),2,(255,0,0),3)
plt.imshow(img3)
#%%
vis.figure(size=(16,16))
示例#8
0
def detect(detector_type, config, run_dir, test_config):
    """Run PCL-based detection on a depth-image-based dataset.

    Parameters
    ----------
    config : dict
        config for a PCL detector
    run_dir : str
        Directory to save outputs in. Output will be saved in pred_masks, pred_info,
        and modal_segmasks_processed subdirectories.
    test_config : dict
        config containing dataset information
    """

    ##################################################################
    # Set up output directories
    ##################################################################

    # Create subdirectory for prediction masks
    pred_dir = os.path.join(run_dir, 'pred_masks')
    mkdir_if_missing(pred_dir)

    # Create subdirectory for prediction scores & bboxes
    pred_info_dir = os.path.join(run_dir, 'pred_info')
    mkdir_if_missing(pred_info_dir)

    # Create subdirectory for transformed GT segmasks
    resized_segmask_dir = os.path.join(run_dir, 'modal_segmasks_processed')
    mkdir_if_missing(resized_segmask_dir)

    ##################################################################
    # Set up input directories
    ##################################################################

    dataset_dir = test_config['path']
    indices_arr = np.load(os.path.join(dataset_dir, test_config['indices']))

    # Input depth image data (numpy files, not .pngs)
    depth_dir = os.path.join(dataset_dir, test_config['images'])

    # Input GT binary masks dir
    gt_mask_dir = os.path.join(dataset_dir, test_config['masks'])

    # Input binary mask data
    if 'bin_masks' in test_config.keys():
        bin_mask_dir = os.path.join(dataset_dir, test_config['bin_masks'])

    # Input camera intrinsics
    camera_intrinsics_fn = os.path.join(dataset_dir, 'camera_intrinsics.intr')
    camera_intrs = CameraIntrinsics.load(camera_intrinsics_fn)

    image_ids = np.arange(indices_arr.size)

    ##################################################################
    # Process each image
    ##################################################################
    for image_id in tqdm(image_ids):
        base_name = 'image_{:06d}'.format(indices_arr[image_id])
        output_name = 'image_{:06d}'.format(image_id)
        depth_image_fn = base_name + '.npy'

        # Extract depth image
        depth_data = np.load(os.path.join(depth_dir, depth_image_fn))
        depth_im = DepthImage(depth_data, camera_intrs.frame)
        depth_im = depth_im.inpaint(0.25)

        # Mask out bin pixels if appropriate/necessary
        if bin_mask_dir:
            mask_im = BinaryImage.open(
                os.path.join(bin_mask_dir, base_name + '.png'),
                camera_intrs.frame)
            mask_im = mask_im.resize(depth_im.shape[:2])
            depth_im = depth_im.mask_binary(mask_im)

        point_cloud = camera_intrs.deproject(depth_im)
        point_cloud.remove_zero_points()
        pcl_cloud = pcl.PointCloud(point_cloud.data.T.astype(np.float32))
        tree = pcl_cloud.make_kdtree()
        if detector_type == 'euclidean':
            segmentor = pcl_cloud.make_EuclideanClusterExtraction()
            segmentor.set_ClusterTolerance(config['tolerance'])
        elif detector_type == 'region_growing':
            segmentor = pcl_cloud.make_RegionGrowing(ksearch=50)
            segmentor.set_NumberOfNeighbours(config['n_neighbors'])
            segmentor.set_CurvatureThreshold(config['curvature'])
            segmentor.set_SmoothnessThreshold(config['smoothness'])
        else:
            print('PCL detector type not supported')
            exit()

        segmentor.set_MinClusterSize(config['min_cluster_size'])
        segmentor.set_MaxClusterSize(config['max_cluster_size'])
        segmentor.set_SearchMethod(tree)
        cluster_indices = segmentor.Extract()

        # Set up predicted masks and metadata
        indiv_pred_masks = []
        r_info = {
            'rois': [],
            'scores': [],
            'class_ids': [],
        }

        for i, cluster in enumerate(cluster_indices):
            points = pcl_cloud.to_array()[cluster]
            indiv_pred_mask = camera_intrs.project_to_image(
                PointCloud(points.T, frame=camera_intrs.frame)).to_binary()
            indiv_pred_mask.data[indiv_pred_mask.data > 0] = 1
            indiv_pred_masks.append(indiv_pred_mask.data)

            # Compute bounding box, score, class_id
            nonzero_pix = np.nonzero(indiv_pred_mask.data)
            min_x, max_x = np.min(nonzero_pix[1]), np.max(nonzero_pix[1])
            min_y, max_y = np.min(nonzero_pix[0]), np.max(nonzero_pix[0])
            r_info['rois'].append([min_y, min_x, max_y, max_x])
            r_info['scores'].append(1.0)
            r_info['class_ids'].append(1)

        r_info['rois'] = np.array(r_info['rois'])
        r_info['scores'] = np.array(r_info['scores'])
        r_info['class_ids'] = np.array(r_info['class_ids'])

        # Write the predicted masks and metadata
        if indiv_pred_masks:
            pred_mask_output = np.stack(indiv_pred_masks).astype(np.uint8)
        else:
            pred_mask_output = np.array(indiv_pred_masks).astype(np.uint8)

        # Save out ground-truth mask as array of shape (n, h, w)
        indiv_gt_masks = []
        gt_mask = cv2.imread(os.path.join(gt_mask_dir, base_name + '.png'))
        gt_mask = cv2.resize(gt_mask,
                             (depth_im.shape[1], depth_im.shape[0])).astype(
                                 np.uint8)[:, :, 0]
        num_gt_masks = np.max(gt_mask)
        for i in range(1, num_gt_masks + 1):
            indiv_gt_mask = (gt_mask == i)
            if np.any(indiv_gt_mask):
                indiv_gt_masks.append(gt_mask == i)
        gt_mask_output = np.stack(indiv_gt_masks)

        np.save(os.path.join(resized_segmask_dir, output_name + '.npy'),
                gt_mask_output)
        np.save(os.path.join(pred_dir, output_name + '.npy'), pred_mask_output)
        np.save(os.path.join(pred_info_dir, output_name + '.npy'), r_info)

    print('Saved prediction masks to:\t {}'.format(pred_dir))
    print('Saved prediction info (bboxes, scores, classes) to:\t {}'.format(
        pred_info_dir))
    print('Saved transformed GT segmasks to:\t {}'.format(resized_segmask_dir))

    return pred_dir, pred_info_dir, resized_segmask_dir
示例#9
0
 def _obs_callback(self, obs):
     depth_image = DepthImage(np.array(obs.image_data).reshape((obs.height, obs.width)))
     color_image = depth_image.inpaint(rescale_factor=0.25).to_color()
     self._current_image = color_image
     self.image_signal.emit()
示例#10
0
    def plan_grasp(self, req):
        """ Grasp planner request handler .
        
        Parameters
        ---------
        req: :obj:`ROS ServiceRequest`
            ROS ServiceRequest for grasp planner service
        """
        rospy.loginfo('Planning Grasp')

        # set min dimensions
        pad = max(
            math.ceil(
                np.sqrt(2) *
                (float(self.cfg['policy']['metric']['crop_width']) / 2)),
            math.ceil(
                np.sqrt(2) *
                (float(self.cfg['policy']['metric']['crop_height']) / 2)))
        min_width = 2 * pad + self.cfg['policy']['metric']['crop_width']
        min_height = 2 * pad + self.cfg['policy']['metric']['crop_height']

        # get the raw depth and color images as ROS Image objects
        raw_color = req.color_image
        raw_depth = req.depth_image
        segmask = None
        raw_segmask = req.segmask

        # get the raw camera info as ROS CameraInfo object
        raw_camera_info = req.camera_info

        # get the bounding box as a custom ROS BoundingBox msg
        bounding_box = req.bounding_box

        # wrap the camera info in a perception CameraIntrinsics object
        camera_intrinsics = CameraIntrinsics(
            raw_camera_info.header.frame_id, raw_camera_info.K[0],
            raw_camera_info.K[4], raw_camera_info.K[2], raw_camera_info.K[5],
            raw_camera_info.K[1], raw_camera_info.height,
            raw_camera_info.width)

        ### create wrapped Perception RGB and Depth Images by unpacking the ROS Images using CVBridge ###
        try:
            color_image = ColorImage(self.cv_bridge.imgmsg_to_cv2(
                raw_color, "rgb8"),
                                     frame=camera_intrinsics.frame)
            depth_image = DepthImage(self.cv_bridge.imgmsg_to_cv2(
                raw_depth, desired_encoding="passthrough"),
                                     frame=camera_intrinsics.frame)
            segmask = BinaryImage(self.cv_bridge.imgmsg_to_cv2(
                raw_segmask, desired_encoding="passthrough"),
                                  frame=camera_intrinsics.frame)
        except CvBridgeError as cv_bridge_exception:
            rospy.logerr(cv_bridge_exception)

        # check image sizes
        if color_image.height != depth_image.height or \
           color_image.width != depth_image.width:
            rospy.logerr(
                'Color image and depth image must be the same shape! Color is %d x %d but depth is %d x %d'
                % (color_image.height, color_image.width, depth_image.height,
                   depth_image.width))
            raise rospy.ServiceException(
                'Color image and depth image must be the same shape! Color is %d x %d but depth is %d x %d'
                % (color_image.height, color_image.width, depth_image.height,
                   depth_image.width))

        if color_image.height < min_height or color_image.width < min_width:
            rospy.logerr(
                'Color image is too small! Must be at least %d x %d resolution but the requested image is only %d x %d'
                %
                (min_height, min_width, color_image.height, color_image.width))
            raise rospy.ServiceException(
                'Color image is too small! Must be at least %d x %d resolution but the requested image is only %d x %d'
                %
                (min_height, min_width, color_image.height, color_image.width))

        # inpaint images
        color_image = color_image.inpaint(
            rescale_factor=self.cfg['inpaint_rescale_factor'])
        depth_image = depth_image.inpaint(
            rescale_factor=self.cfg['inpaint_rescale_factor'])

        # visualize
        if self.cfg['vis']['color_image']:
            vis.imshow(color_image)
            vis.show()
        if self.cfg['vis']['depth_image']:
            vis.imshow(depth_image)
            vis.show()
        if self.cfg['vis']['segmask'] and segmask is not None:
            vis.imshow(segmask)
            vis.show()

        # aggregate color and depth images into a single perception rgbdimage
        rgbd_image = RgbdImage.from_color_and_depth(color_image, depth_image)

        # calc crop parameters
        minX = bounding_box.minX - pad
        minY = bounding_box.minY - pad
        maxX = bounding_box.maxX + pad
        maxY = bounding_box.maxY + pad

        # contain box to image->don't let it exceed image height/width bounds
        if minX < 0:
            minX = 0
        if minY < 0:
            minY = 0
        if maxX > rgbd_image.width:
            maxX = rgbd_image.width
        if maxY > rgbd_image.height:
            maxY = rgbd_image.height

        centroidX = (maxX + minX) / 2
        centroidY = (maxY + minY) / 2

        # compute width and height
        width = maxX - minX
        height = maxY - minY

        # crop camera intrinsics and rgbd image
        cropped_camera_intrinsics = camera_intrinsics.crop(
            height, width, centroidY, centroidX)
        cropped_rgbd_image = rgbd_image.crop(height, width, centroidY,
                                             centroidX)
        cropped_segmask = None
        if segmask is not None:
            cropped_segmask = segmask.crop(height, width, centroidY, centroidX)

        # visualize
        if self.cfg['vis']['cropped_rgbd_image']:
            vis.imshow(cropped_rgbd_image)
            vis.show()

        # create an RGBDImageState with the cropped RGBDImage and CameraIntrinsics
        image_state = RgbdImageState(cropped_rgbd_image,
                                     cropped_camera_intrinsics,
                                     segmask=cropped_segmask)

        # execute policy
        try:
            return self.execute_policy(image_state, self.grasping_policy,
                                       self.grasp_pose_publisher,
                                       cropped_camera_intrinsics.frame)
        except NoValidGraspsException:
            rospy.logerr(
                'While executing policy found no valid grasps from sampled antipodal point pairs. Aborting Policy!'
            )
            raise rospy.ServiceException(
                'While executing policy found no valid grasps from sampled antipodal point pairs. Aborting Policy!'
            )
示例#11
0
    def sample_grasp(self, env, mesh_obj, vis=True):
        # Setup sensor.
        #camera_intr = CameraIntrinsics.load(camera_intr_filename)

        # Read images.
        # get depth image from camera

        inpaint_rescale_factor = 1.0
        segmask_filename = None

        _, center, extents, obj_xquat, bbox_xpos = self.get_bbox_properties(
            env, mesh_obj)
        camera_pos = copy.deepcopy(center)
        camera_pos[2] += 0.5 * extents[2] + 0.2

        env.set_camera(camera_pos,
                       np.array([0.7071068, 0, 0, -0.7071068]),
                       camera_name=f"ext_camera_0")
        rgb, depth = env.render_from_camera(int(self.config.camera_img_height),
                                            int(self.config.camera_img_width),
                                            camera_name=f"ext_camera_0")
        # enforce zoom out

        from scipy.interpolate import interp2d

        center_x = self.config.camera_img_height / 2 + 1
        center_y = self.config.camera_img_width / 2 + 1
        img_height = self.config.camera_img_height
        img_width = self.config.camera_img_width
        xdense = np.linspace(0, img_height - 1, img_height)
        ydense = np.linspace(0, img_width - 1, img_width)
        xintr = (xdense - center_x) * (1.0 / self.rescale_factor) + center_x
        yintr = (ydense - center_y) * (1.0 / self.rescale_factor) + center_y
        xintr[xintr < 0] = 0
        xintr[xintr > (img_height - 1)] = img_height - 1
        yintr[yintr < 0] = 0
        yintr[yintr > (img_width - 1)] = img_width - 1

        fr = interp2d(xdense, ydense, rgb[:, :, 0], kind="linear")
        rgb_r_new = fr(xintr, yintr)
        fg = interp2d(xdense, ydense, rgb[:, :, 1], kind="linear")
        rgb_g_new = fg(xintr, yintr)
        fb = interp2d(xdense, ydense, rgb[:, :, 2], kind="linear")
        rgb_b_new = fb(xintr, yintr)
        rgb_new = np.stack([rgb_r_new, rgb_g_new, rgb_b_new], axis=2)

        fd = interp2d(xdense, ydense, depth, kind="linear")
        depth_new = fd(xintr, yintr)

        #from skimage.transform import resize
        #rgb22, depth2 = env.render_from_camera(int(self.config.camera_img_height) , int(self.config.camera_img_width), camera_name=f"ext_camera_0")

        #import ipdb; ipdb.set_trace()

        # visualize the interpolation
        #import imageio
        #imageio.imwrite(f"tmp/rgb_{self.iter_id}.png", rgb)
        #imageio.imwrite(f"tmp/rgb2_{self.iter_id}.png", rgb_new)
        #imageio.imwrite(f"tmp/depth_{self.iter_id}.png", depth)
        #imageio.imwrite(f"tmp/depth2_{self.iter_id}.png", depth_new)
        #import ipdb; ipdb.set_trace()

        rgb = rgb_new
        depth = depth_new

        depth = depth * self.rescale_factor

        # rgb: 128 x 128 x 1
        # depth: 128 x 128 x 1
        scaled_camera_fov_y = self.config.camera_fov_y
        aspect = 1
        scaled_fovx = 2 * np.arctan(
            np.tan(np.deg2rad(scaled_camera_fov_y) * 0.5) * aspect)
        scaled_fovx = np.rad2deg(scaled_fovx)
        scaled_fovy = scaled_camera_fov_y

        cx = self.config.camera_img_width * 0.5
        cy = self.config.camera_img_height * 0.5
        scaled_fx = cx / np.tan(np.deg2rad(
            scaled_fovx / 2.)) * (self.rescale_factor)
        scaled_fy = cy / np.tan(np.deg2rad(
            scaled_fovy / 2.)) * (self.rescale_factor)

        camera_intr = CameraIntrinsics(frame='phoxi',
                                       fx=scaled_fx,
                                       fy=scaled_fy,
                                       cx=self.config.camera_img_width * 0.5,
                                       cy=self.config.camera_img_height * 0.5,
                                       height=self.config.camera_img_height,
                                       width=self.config.camera_img_width)

        depth_im = DepthImage(depth, frame=camera_intr.frame)
        color_im = ColorImage(np.zeros([depth_im.height, depth_im.width,
                                        3]).astype(np.uint8),
                              frame=camera_intr.frame)

        # Optionally read a segmask.

        valid_px_mask = depth_im.invalid_pixel_mask().inverse()
        segmask = valid_px_mask

        # Inpaint.
        depth_im = depth_im.inpaint(rescale_factor=inpaint_rescale_factor)

        # Create state.
        rgbd_im = RgbdImage.from_color_and_depth(color_im, depth_im)
        state = RgbdImageState(rgbd_im, camera_intr, segmask=segmask)

        # Query policy.
        policy_start = time.time()
        action = self.policy(state)
        print("Planning took %.3f sec" % (time.time() - policy_start))
        # numpy array with 2 values
        grasp_center = action.grasp.center._data[:, 0]  #(width, depth)
        grasp_depth = action.grasp.depth * (1 / self.rescale_factor)
        grasp_angle = action.grasp.angle  #np.pi*0.3

        if self.config.data_collection_mode:

            self.current_grasp = action.grasp

            depth_im = state.rgbd_im.depth
            scale = 1.0
            depth_im_scaled = depth_im.resize(scale)
            translation = scale * np.array([
                depth_im.center[0] - grasp_center[1],
                depth_im.center[1] - grasp_center[0]
            ])
            im_tf = depth_im_scaled
            im_tf = depth_im_scaled.transform(translation, grasp_angle)
            im_tf = im_tf.crop(self.gqcnn_image_size, self.gqcnn_image_size)

            # get the patch
            self.current_patch = im_tf.raw_data

        XYZ_origin, gripper_quat = self.compute_grasp_pts_from_grasp_sample(
            grasp_center, grasp_depth, grasp_angle, env)

        return XYZ_origin[:, 0], gripper_quat

        # Vis final grasp.
        if vis:
            from visualization import Visualizer2D as vis
            vis.figure(size=(10, 10))
            vis.imshow(rgbd_im.depth,
                       vmin=self.policy_config["vis"]["vmin"],
                       vmax=self.policy_config["vis"]["vmax"])
            vis.grasp(action.grasp,
                      scale=2.5,
                      show_center=False,
                      show_axis=True)
            vis.title("Planned grasp at depth {0:.3f}m with Q={1:.3f}".format(
                action.grasp.depth, action.q_value))
            vis.show(f"tmp/grasp2_{mesh_obj.name}_{self.iter_id}.png")

            vis.figure(size=(10, 10))

            vis.imshow(im_tf,
                       vmin=self.policy_config["vis"]["vmin"],
                       vmax=self.policy_config["vis"]["vmax"])
            vis.show(f"tmp/cropd_{mesh_obj.name}_{self.iter_id}.png")

        import ipdb
        ipdb.set_trace()

        return XYZ_origin[:, 0], gripper_quat

        import ipdb
        ipdb.set_trace()