示例#1
0
    def _process_request(self, cv_color, cv_depth, bbox):
        """
        Takes RGBD image and bounding box coordinates to infer the 3D keypoints
        """

        # Parse the bounding box
        top_left, bottom_right = PixelCoord(), PixelCoord()
        top_left.x = bbox.x_offset
        top_left.y = bbox.y_offset
        bottom_right.x = bbox.x_offset + bbox.width
        bottom_right.y = bbox.y_offset + bbox.height

        # Perform the inference
        imgproc_out = inference.proc_input_img_raw(cv_color, cv_depth,
                                                   top_left, bottom_right)

        # keypointxy_depth_scaled = inference.inference_resnet_nostage(self._network, imgproc_out)
        keypointxy_depth_scaled = self._query_network(imgproc_out)

        keypointxy_depth_realunit = inference.get_keypoint_xy_depth_real_unit(
            keypointxy_depth_scaled)
        # print("keypoint_x_y_depth:", keypointxy_depth_realunit)

        keypoint_xy_depth_img, camera_keypoint = inference.get_3d_prediction_K(
            keypointxy_depth_realunit, imgproc_out.bbox2patch, self.K_inv)

        # print(keypoint_xy_depth_img)
        # print("cam keypoint", camera_keypoint)
        return keypoint_xy_depth_img, camera_keypoint
示例#2
0
    def test_in_image(self):
        from mankey.utils.imgproc import PixelCoord, rectify_bbox_in_image

        # Some test of rectification
        topleft, bottomright = PixelCoord(), PixelCoord()
        topleft.x = 0
        topleft.y = 10
        bottomright.x = 20
        bottomright.y = 50

        # Test of in_image
        rectified_topleft, rectified_bottomright = rectify_bbox_in_image(
            topleft, bottomright, 640, 480)
        self.assertEqual(rectified_bottomright.x - rectified_topleft.x,
                         rectified_bottomright.y - rectified_topleft.y)
        self.assertEqual(rectified_bottomright.x - rectified_topleft.x, 40)
示例#3
0
class SupervisedKeypointDBEntry:
    # The path to rgb is must
    rgb_image_path = ''

    # The path to depth image
    depth_image_path = ''

    # The path to mask image
    binary_mask_path = ''

    # If length zero, indicates no depth
    @property
    def has_depth(self):
        return len(self.depth_image_path) > 0

    @property
    def has_mask(self):
        return len(self.binary_mask_path) > 0

    # The bounding box is tight
    bbox_top_left = PixelCoord()
    bbox_bottom_right = PixelCoord()

    # The information related to keypoint
    # All of these element should be in size of (3, n_keypoint)
    # The first element iterate over x, y, or z, the second element iterate over keypoints
    keypoint_camera = None  # The position of keypoint expressed in camera frame using meter as unit

    # (pixel_x, pixel_y, mm_depth) for each keypoint
    # Note that the pixel might be outside the image space
    keypoint_pixelxy_depth = None

    # Each element indicate the validity of the corresponded keypoint coordinate
    # 1 means valid, 0 means not valid
    keypoint_validity_weight = None
    on_boundary = False

    # The pose of the camera
    # Homogeneous transformation matrix
    camera_in_world = np.ndarray(shape=[4, 4])

    # xyzrot
    delta_rotation_matrix = np.ndarray(shape=[3, 3])
    delta_translation = np.ndarray(shape=[3,])
    gripper_pose = np.ndarray(shape=[4, 4])
    step_size = np.ndarray(shape=[1,])
示例#4
0
    def test_center_aligned(self):
        from mankey.utils.imgproc import PixelCoord, rectify_bbox_center_align

        # Some test of rectification
        topleft, bottomright = PixelCoord(), PixelCoord()
        topleft.x = 0
        topleft.y = 10
        bottomright.x = 20
        bottomright.y = 50

        # Test of center-aligned
        rectified_topleft, rectified_bottomright = rectify_bbox_center_align(
            topleft, bottomright)
        self.assertEqual(rectified_bottomright.x - rectified_topleft.x,
                         rectified_bottomright.y - rectified_topleft.y)
        self.assertEqual(rectified_bottomright.x + rectified_topleft.x,
                         topleft.x + bottomright.x)
        self.assertEqual(rectified_bottomright.y + rectified_topleft.y,
                         topleft.y + bottomright.y)
    def process_raw(
        self,
        cv_rgb,  # type: np.ndarray
        cv_depth,  # type: np.ndarray
        bbox,  #type: np.ndarray  [x,y,w,h]
    ):  # type: (np.ndarray, np.ndarray, np.ndarray [x_min,y_min,x_max,y_max]) -> np.ndarray
        # Parse the bounding box
        top_left, bottom_right = PixelCoord(), PixelCoord()
        top_left.x = bbox[0]
        top_left.y = bbox[1]
        bottom_right.x = bbox[2]
        bottom_right.y = bbox[3]

        # Perform the inference
        imgproc_out = inference.proc_input_img_raw(cv_rgb, cv_depth, top_left,
                                                   bottom_right)
        keypointxy_depth_scaled = inference.inference_resnet_nostage(
            self._network, imgproc_out)
        keypointxy_depth_realunit = inference.get_keypoint_xy_depth_real_unit(
            keypointxy_depth_scaled)
        _, camera_keypoint = inference.get_3d_prediction(
            keypointxy_depth_realunit, imgproc_out.bbox2patch)
        return camera_keypoint
示例#6
0
    def process_request_raw(
            self,
            cv_color,  # type: np.ndarray
            cv_depth,  # type: np.ndarray
            bbox,  # type: RegionOfInterest
    ):  # type: (np.ndarray, np.ndarray, RegionOfInterest) -> np.ndarray
        # Parse the bounding box
        top_left, bottom_right = PixelCoord(), PixelCoord()
        top_left.x = bbox.x_offset
        top_left.y = bbox.y_offset
        bottom_right.x = bbox.x_offset + bbox.width
        bottom_right.y = bbox.y_offset + bbox.height

        # Perform the inference
        imgproc_out = inference.proc_input_img_raw(
            cv_color, cv_depth,
            top_left, bottom_right)
        keypointxy_depth_scaled = inference.inference_resnet_nostage(self._network, imgproc_out)
        keypointxy_depth_realunit = inference.get_keypoint_xy_depth_real_unit(keypointxy_depth_scaled)
        _, camera_keypoint = inference.get_3d_prediction(
            keypointxy_depth_realunit,
            imgproc_out.bbox2patch)
        return camera_keypoint
示例#7
0
    def _get_transformed_keypoint(
            transform: np.ndarray, entry: SupervisedKeypointDBEntry,
            patch_width: int, patch_height: int) -> (np.ndarray, np.ndarray):
        """
        Given the bounding box to patch transform, compute the transform keypoint
        and their validity. Note that transformed pixel might not be int
        :param transform: 3x3 homogeneous transform matrix
        :param entry:
        :param patch_width:
        :param patch_height:
        :return: A tuple contains the transformed pixelxy_depth and validity
        """
        from mankey.utils.imgproc import transform_2d, PixelCoord, pixel_in_bbox

        # Allocate the space
        n_keypoint = entry.keypoint_pixelxy_depth.shape[1]
        transformed_pixelxy_depth = np.zeros((3, n_keypoint))
        transformed_validity_weight = np.ones((3, n_keypoint))

        # Construct bounding box
        top_left = PixelCoord()
        top_left.x = 0
        top_left.y = 0
        bottom_right = PixelCoord()
        bottom_right.x = patch_width
        bottom_right.y = patch_height

        # Do transform
        pixel = PixelCoord()
        for i in range(n_keypoint):
            transformed_pixelxy_depth[0:2, i] = transform_2d(
                entry.keypoint_pixelxy_depth[0:2, i], transform)
            transformed_pixelxy_depth[2, i] = entry.keypoint_pixelxy_depth[2,
                                                                           i]

            # Check validity
            pixel.x = int(transformed_pixelxy_depth[0, i])
            pixel.y = int(transformed_pixelxy_depth[1, i])
            if not pixel_in_bbox(pixel, top_left, bottom_right):
                transformed_validity_weight[0, i] = 0
                transformed_validity_weight[1, i] = 0
                transformed_validity_weight[2, i] = 0

        # OK
        return transformed_pixelxy_depth, transformed_validity_weight
示例#8
0
    def _get_image_entry(self, image_map, scene_root: str) -> SupervisedKeypointDBEntry:
        entry = SupervisedKeypointDBEntry()
        # The path for rgb image
        #rgb_name = image_map['rgb_image_filename']
        # multi-view pic, the main pic is chosen now
        #rgb_name = image_map['rgb_image_filename'][0]
        rgb_path = []
        for rgb_name in image_map['rgb_image_filename']:    
            rgb_path.append(os.path.join(scene_root, 'processed/images/' + rgb_name))
        #assert os.path.exists(rgb_path)
        entry.rgb_image_path = rgb_path

        # The path for depth image
        #depth_name = image_map['depth_image_filename']
        # multi-view pic, the main pic is chosen now
        #depth_name = image_map['depth_image_filename'][0]
        rgb_path = []
        for rgb_name in image_map['rgb_image_filename']:    
        depth_path = os.path.join(scene_root, 'processed/images/' + depth_name)
        assert os.path.exists(depth_path) # Spartan must have depth image
        entry.depth_image_path = depth_path
        
        # The path for pcd
        ''' old version
        pcd_name = depth_name.split('.')[0] + '.npy'
        pcd_path = os.path.join(scene_root, 'processed/pcd/' + pcd_name)
        assert os.path.exists(pcd_path)
        entry.pcd_path = pcd_path
        '''
        pcd_name = image_map['pcd']
        pcd_path = os.path.join(scene_root, 'processed/pcd_seg_heatmap_3kpt/' + pcd_name)
        assert os.path.exists(pcd_path)
        entry.pcd_path = pcd_path
        
        # pcd centroid & pcd mean
        entry.pcd_centroid = np.array(image_map['pcd_centroid'])
        entry.pcd_mean = np.array(image_map['pcd_mean'])
        
        '''
        # The path for mask image
        mask_name = depth_name[0:6] + '_mask.png'
        mask_path = os.path.join(scene_root, 'processed/image_masks/' + mask_name)
        assert os.path.exists(mask_path)
        entry.binary_mask_path = mask_path
        '''
        # xyzrot
        entry.delta_rotation_matrix = np.array(image_map['delta_rotation_matrix']).reshape((3,3))
        #entry.delta_rot_cls = np.array(image_map['cls']).reshape((3,))
        entry.delta_translation = np.array(image_map['delta_translation']).reshape((3,))
        entry.gripper_pose = np.array(image_map['gripper_pose']).reshape((4,4))
        #step_size_value = max(min(image_map['step_size'], 1.0), 0.0)
        step_size_value = np.linalg.norm(entry.delta_translation)
        if step_size_value == 0:
            entry.unit_delta_translation = entry.delta_translation
        else:
            entry.unit_delta_translation = entry.delta_translation / step_size_value
        step_size_value = step_size_value*100
        if step_size_value >= 1.0:
            entry.step_size = np.array([1.0]).reshape((1,))
        else:
            entry.step_size = np.array([step_size_value]).reshape((1,))

        # The camera pose in world
        camera2world_map = image_map['camera_to_world']
        entry.camera_in_world = camera2world_from_map(camera2world_map)

        # The bounding box
        top_left = PixelCoord()
        bottom_right = PixelCoord()
        top_left.x, top_left.y = image_map['bbox_top_left_xy'][0], image_map['bbox_top_left_xy'][1]
        bottom_right.x, bottom_right.y = image_map['bbox_bottom_right_xy'][0], image_map['bbox_bottom_right_xy'][1]
        entry.bbox_top_left = top_left
        entry.bbox_bottom_right = bottom_right

        # The size of keypoint
        keypoint_camera_frame_list = image_map['3d_keypoint_camera_frame']
        n_keypoint = len(keypoint_camera_frame_list)
        if self._num_keypoint < 0:
            self._num_keypoint = n_keypoint
        else:
            assert self._num_keypoint == n_keypoint

        # The keypoint in camera frame
        entry.keypoint_camera = np.zeros((3, n_keypoint))
        for i in range(n_keypoint):
            for j in range(3):
                entry.keypoint_camera[j, i] = keypoint_camera_frame_list[i][j]

        # The pixel coordinate and depth of keypoint
        keypoint_pixelxy_depth_list = image_map['keypoint_pixel_xy_depth']
        assert n_keypoint == len(keypoint_pixelxy_depth_list)
        entry.keypoint_pixelxy_depth = np.zeros((3, n_keypoint), dtype=np.int)
        for i in range(n_keypoint):
            for j in range(3):
                entry.keypoint_pixelxy_depth[j, i] = keypoint_pixelxy_depth_list[i][j]

        # Check the validity
        entry.keypoint_validity_weight = np.ones((3, n_keypoint))
        for i in range(n_keypoint):
            pixel = PixelCoord()
            pixel.x = entry.keypoint_pixelxy_depth[0, i]
            pixel.y = entry.keypoint_pixelxy_depth[1, i]
            depth_mm = entry.keypoint_pixelxy_depth[2, i]
            valid = True
            if depth_mm < 0:  # The depth cannot be negative
                valid = False

            # The pixel must be in bounding box
            if not pixel_in_bbox(pixel, entry.bbox_top_left, entry.bbox_bottom_right):
                valid = False

            # Invalid all the dimension
            if not valid:
                entry.keypoint_validity_weight[0, i] = 0
                entry.keypoint_validity_weight[1, i] = 0
                entry.keypoint_validity_weight[2, i] = 0
                entry.on_boundary = True

        # OK
        return entry

    def _check_image_entry(self, entry: SupervisedKeypointDBEntry) -> bool:
        # Check the bounding box
        if entry.bbox_top_left.x is None or entry.bbox_top_left.y is None:
            return False

        if entry.bbox_bottom_right.x is None or entry.bbox_bottom_right.y is None:
            return False

        # OK
        return True
示例#9
0
    def _get_image_entry(self, image_map,
                         scene_root: str) -> SupervisedKeypointDBEntry:
        entry = SupervisedKeypointDBEntry()
        # The path for rgb image
        rgb_name = image_map['rgb_image_filename']
        rgb_path = os.path.join(scene_root, 'processed/images/' + rgb_name)
        assert os.path.exists(rgb_path)
        entry.rgb_image_path = rgb_path

        # The path for depth image
        depth_name = image_map['depth_image_filename']
        depth_path = os.path.join(scene_root, 'processed/images/' + depth_name)
        assert os.path.exists(depth_path)  # Spartan must have depth image
        entry.depth_image_path = depth_path
        '''
        # The path for mask image
        mask_name = depth_name[0:6] + '_mask.png'
        mask_path = os.path.join(scene_root, 'processed/image_masks/' + mask_name)
        assert os.path.exists(mask_path)
        entry.binary_mask_path = mask_path
        '''
        # xyzrot
        entry.delta_rotation_matrix = np.array(
            image_map['delta_rotation_matrix']).reshape((3, 3))
        entry.delta_translation = np.array(
            image_map['delta_translation']).reshape((3, ))
        entry.gripper_pose = np.array(image_map['gripper_pose']).reshape(
            (4, 4))
        step_size_value = max(min(image_map['step_size'], 1.0), 0.0)
        entry.step_size = np.array([step_size_value]).reshape((1, ))

        # The camera pose in world
        camera2world_map = image_map['camera_to_world']
        entry.camera_in_world = camera2world_from_map(camera2world_map)

        # The bounding box
        top_left = PixelCoord()
        bottom_right = PixelCoord()
        top_left.x, top_left.y = image_map['bbox_top_left_xy'][0], image_map[
            'bbox_top_left_xy'][1]
        bottom_right.x, bottom_right.y = image_map['bbox_bottom_right_xy'][
            0], image_map['bbox_bottom_right_xy'][1]
        entry.bbox_top_left = top_left
        entry.bbox_bottom_right = bottom_right

        # The size of keypoint
        keypoint_camera_frame_list = image_map['3d_keypoint_camera_frame']
        n_keypoint = len(keypoint_camera_frame_list)
        if self._num_keypoint < 0:
            self._num_keypoint = n_keypoint
        else:
            assert self._num_keypoint == n_keypoint

        # The keypoint in camera frame
        entry.keypoint_camera = np.zeros((3, n_keypoint))
        for i in range(n_keypoint):
            for j in range(3):
                entry.keypoint_camera[j, i] = keypoint_camera_frame_list[i][j]

        # The pixel coordinate and depth of keypoint
        keypoint_pixelxy_depth_list = image_map['keypoint_pixel_xy_depth']
        assert n_keypoint == len(keypoint_pixelxy_depth_list)
        entry.keypoint_pixelxy_depth = np.zeros((3, n_keypoint), dtype=np.int)
        for i in range(n_keypoint):
            for j in range(3):
                entry.keypoint_pixelxy_depth[
                    j, i] = keypoint_pixelxy_depth_list[i][j]

        # Check the validity
        entry.keypoint_validity_weight = np.ones((3, n_keypoint))
        for i in range(n_keypoint):
            pixel = PixelCoord()
            pixel.x = entry.keypoint_pixelxy_depth[0, i]
            pixel.y = entry.keypoint_pixelxy_depth[1, i]
            depth_mm = entry.keypoint_pixelxy_depth[2, i]
            valid = True
            if depth_mm < 0:  # The depth cannot be negative
                valid = False

            # The pixel must be in bounding box
            if not pixel_in_bbox(pixel, entry.bbox_top_left,
                                 entry.bbox_bottom_right):
                valid = False

            # Invalid all the dimension
            if not valid:
                entry.keypoint_validity_weight[0, i] = 0
                entry.keypoint_validity_weight[1, i] = 0
                entry.keypoint_validity_weight[2, i] = 0
                entry.on_boundary = True

        # OK
        return entry