Exemplo n.º 1
0
    def _load_and_postprocess(self, file_path, key, version="1.0.0"):
        """
        Loads an image and post process it.

        :param file_path: Image path. Type: string.
        :param key: The image's key with regards to the hdf5 file. Type: string.
        :param version: The version number original data. Type: String. Default: 1.0.0.
        :return: The post-processed image that was loaded using the file path.
        """
        data = WriterUtility.load_output_file(resolve_path(file_path),
                                              self.write_alpha_channel,
                                              remove=False)
        data, new_key, new_version = self._apply_postprocessing(
            key, data, version)
        if isinstance(data, np.ndarray):
            print("Key: " + key + " - shape: " + str(data.shape) +
                  " - dtype: " + str(data.dtype) + " - path: " + file_path)
        else:
            print("Key: " + key + " - path: " + file_path)
        return data, new_key, new_version
    def _write_frames(chunks_dir: str,
                      dataset_objects: list,
                      depths: List[np.ndarray] = [],
                      colors: List[np.ndarray] = [],
                      color_file_format: str = "PNG",
                      depth_scale: float = 1.0,
                      frames_per_chunk: int = 1000,
                      m2mm: bool = True,
                      ignore_dist_thres: float = 100.,
                      save_world2cam: bool = True,
                      jpg_quality: int = 95):
        """Write each frame's ground truth into chunk directory in BOP format

        :param chunks_dir: Path to the output directory of the current chunk.
        :param dataset_objects: Save annotations for these objects.
        :param depths: List of depth images in m to save
        :param colors: List of color images to save
        :param color_file_format: File type to save color images. Available: "PNG", "JPEG"
        :param jpg_quality: If color_file_format is "JPEG", save with the given quality.
        :param depth_scale: Multiply the uint16 output depth image with this factor to get depth in mm. Used to trade-off between depth accuracy 
            and maximum depth value. Default corresponds to 65.54m maximum depth and 1mm accuracy.
        :param ignore_dist_thres: Distance between camera and object after which object is ignored. Mostly due to failed physics.
        :param m2mm: Original bop annotations and models are in mm. If true, we convert the gt annotations to mm here. This
            is needed if BopLoader option mm2m is used.
        :param frames_per_chunk: Number of frames saved in each chunk (called scene in BOP) 
        """

        # Format of the depth images.
        depth_ext = '.png'

        rgb_tpath = os.path.join(chunks_dir, '{chunk_id:06d}', 'rgb',
                                 '{im_id:06d}' + '{im_type}')
        depth_tpath = os.path.join(chunks_dir, '{chunk_id:06d}', 'depth',
                                   '{im_id:06d}' + depth_ext)
        chunk_camera_tpath = os.path.join(chunks_dir, '{chunk_id:06d}',
                                          'scene_camera.json')
        chunk_gt_tpath = os.path.join(chunks_dir, '{chunk_id:06d}',
                                      'scene_gt.json')

        # Paths to the already existing chunk folders (such folders may exist
        # when appending to an existing dataset).
        chunk_dirs = sorted(glob.glob(os.path.join(chunks_dir, '*')))
        chunk_dirs = [d for d in chunk_dirs if os.path.isdir(d)]

        # Get ID's of the last already existing chunk and frame.
        curr_chunk_id = 0
        curr_frame_id = 0
        if len(chunk_dirs):
            last_chunk_dir = sorted(chunk_dirs)[-1]
            last_chunk_gt_fpath = os.path.join(last_chunk_dir, 'scene_gt.json')
            chunk_gt = BopWriterUtility._load_json(last_chunk_gt_fpath,
                                                   keys_to_int=True)

            # Last chunk and frame ID's.
            last_chunk_id = int(os.path.basename(last_chunk_dir))
            last_frame_id = int(sorted(chunk_gt.keys())[-1])

            # Current chunk and frame ID's.
            curr_chunk_id = last_chunk_id
            curr_frame_id = last_frame_id + 1
            if curr_frame_id % frames_per_chunk == 0:
                curr_chunk_id += 1
                curr_frame_id = 0

        # Initialize structures for the GT annotations and camera info.
        chunk_gt = {}
        chunk_camera = {}
        if curr_frame_id != 0:
            # Load GT and camera info of the chunk we are appending to.
            chunk_gt = BopWriterUtility._load_json(
                chunk_gt_tpath.format(chunk_id=curr_chunk_id),
                keys_to_int=True)
            chunk_camera = BopWriterUtility._load_json(
                chunk_camera_tpath.format(chunk_id=curr_chunk_id),
                keys_to_int=True)

        # Go through all frames.
        num_new_frames = bpy.context.scene.frame_end - bpy.context.scene.frame_start

        if len(depths) != len(colors) != num_new_frames:
            raise Exception(
                "The amount of images stored in the depths/colors does not correspond to the amount"
                "of images specified by frame_start to frame_end.")

        for frame_id in range(bpy.context.scene.frame_start,
                              bpy.context.scene.frame_end):
            # Activate frame.
            bpy.context.scene.frame_set(frame_id)

            # Reset data structures and prepare folders for a new chunk.
            if curr_frame_id == 0:
                chunk_gt = {}
                chunk_camera = {}
                os.makedirs(
                    os.path.dirname(
                        rgb_tpath.format(chunk_id=curr_chunk_id,
                                         im_id=0,
                                         im_type='PNG')))
                os.makedirs(
                    os.path.dirname(
                        depth_tpath.format(chunk_id=curr_chunk_id, im_id=0)))

            # Get GT annotations and camera info for the current frame.

            # Output translation gt in m or mm
            unit_scaling = 1000. if m2mm else 1.

            chunk_gt[curr_frame_id] = BopWriterUtility._get_frame_gt(
                dataset_objects, unit_scaling, ignore_dist_thres)
            chunk_camera[curr_frame_id] = BopWriterUtility._get_frame_camera(
                save_world2cam, depth_scale, unit_scaling)

            if colors:
                color_rgb = colors[frame_id]
                color_bgr = color_rgb[..., ::-1].copy()
                if color_file_format == 'PNG':
                    rgb_fpath = rgb_tpath.format(chunk_id=curr_chunk_id,
                                                 im_id=curr_frame_id,
                                                 im_type='.png')
                    cv2.imwrite(rgb_fpath, color_bgr)
                elif color_file_format == 'JPEG':
                    rgb_fpath = rgb_tpath.format(chunk_id=curr_chunk_id,
                                                 im_id=curr_frame_id,
                                                 im_type='.jpg')
                    cv2.imwrite(rgb_fpath, color_bgr,
                                [int(cv2.IMWRITE_JPEG_QUALITY), jpg_quality])
            else:
                rgb_output = Utility.find_registered_output_by_key("colors")
                if rgb_output is None:
                    raise Exception("RGB image has not been rendered.")
                color_ext = '.png' if rgb_output['path'].endswith(
                    'png') else '.jpg'
                # Copy the resulting RGB image.
                rgb_fpath = rgb_tpath.format(chunk_id=curr_chunk_id,
                                             im_id=curr_frame_id,
                                             im_type=color_ext)
                shutil.copyfile(rgb_output['path'] % frame_id, rgb_fpath)

            if depths:
                depth = depths[frame_id]
            else:
                # Load the resulting dist image.
                dist_output = Utility.find_registered_output_by_key("distance")
                if dist_output is None:
                    raise Exception("Distance image has not been rendered.")
                distance = WriterUtility.load_output_file(resolve_path(
                    dist_output['path'] % frame_id),
                                                          remove=False)
                depth = dist2depth(distance)

            # Scale the depth to retain a higher precision (the depth is saved
            # as a 16-bit PNG image with range 0-65535).
            depth_mm = 1000.0 * depth  # [m] -> [mm]
            depth_mm_scaled = depth_mm / float(depth_scale)

            # Save the scaled depth image.
            depth_fpath = depth_tpath.format(chunk_id=curr_chunk_id,
                                             im_id=curr_frame_id)
            BopWriterUtility._save_depth(depth_fpath, depth_mm_scaled)

            # Save the chunk info if we are at the end of a chunk or at the last new frame.
            if ((curr_frame_id + 1) % frames_per_chunk == 0) or \
                    (frame_id == num_new_frames - 1):

                # Save GT annotations.
                BopWriterUtility._save_json(
                    chunk_gt_tpath.format(chunk_id=curr_chunk_id), chunk_gt)

                # Save camera info.
                BopWriterUtility._save_json(
                    chunk_camera_tpath.format(chunk_id=curr_chunk_id),
                    chunk_camera)

                # Update ID's.
                curr_chunk_id += 1
                curr_frame_id = 0
            else:
                curr_frame_id += 1