class MPII_DB(Dataset):
    def __init__(
        self,
        root_dir: str,
        split: str = "train",
        seed: int = 5,
        train_ratio: float = 0.9,
    ):
        """Initializes the MPII dataset class, relevant paths and the Joints
        initializes the class for remapping of MPII formatted joints to that of AIT.
        joints mapping at
        https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/doc/output.md#hand-output-format

        Args:
            root_dir (str): Path to the directory with image samples.
            split (str): To select train or test split.
        """
        self.root_dir = root_dir
        self.split = split
        self.seed = seed
        self.train_ratio = train_ratio
        split_set = "train" if self.split in ["train", "val"] else split
        self.image_dir_path = os.path.join(self.root_dir,
                                           f"manual_{split_set}")
        self.label_dir_path = os.path.join(self.root_dir,
                                           f"manual_{split_set}")
        self.img_names = self.get_image_names()
        self.labels = self.get_labels()
        self.indices = self.create_train_val_split()
        # To convert from MPII to AIT format.
        self.joints = Joints()

    def get_image_names(self) -> List[str]:
        """Gets the name of all the files in root_dir.
        Make sure there are only image in that directory as it reads all the file names.

        Returns:
            List[str]: List of image names.
        """

        img_names = [
            file_name for file_name in next(os.walk(self.image_dir_path))[2]
            if ".jpg" in file_name
        ]
        img_names.sort()
        # popping, images with annottaaions out of image bounds
        try:
            img_names.remove("Ricki_unit_8.flv_000003_l.jpg")
            img_names.remove("Ricki_unit_8.flv_000002_l.jpg")
        except Exception as e:
            print(f"Out of frame images not found {e}")
        return img_names

    def get_labels(self) -> Dict[str, dict]:
        label_file_names = [
            file_name for file_name in next(os.walk(self.label_dir_path))[2]
            if ".json" in file_name
        ]
        labels = {
            file_name.replace(".json", ""):
            read_json(os.path.join(self.label_dir_path, file_name))
            for file_name in label_file_names
        }
        return labels

    def create_train_val_split(self) -> np.array:
        """Creates split for train and val data in mpii
        Raises:
            NotImplementedError: In case the split doesn't match test, train or val.

        Returns:
            np.array: array of indices
        """
        num_unique_images = len(self.img_names)
        train_indices, val_indices = train_test_split(
            np.arange(num_unique_images),
            train_size=self.train_ratio,
            random_state=self.seed,
        )
        if self.split == "train":
            return np.sort(train_indices)
        elif self.split == "val":
            return np.sort(val_indices)
        elif self.split == "test":
            return np.arange(len(self.img_names))
        else:
            raise NotImplementedError

    def __len__(self):
        return len(self.indices)

    def __getitem__(self,
                    idx: int) -> Dict[str, Union[np.array, torch.Tensor]]:
        """Returns a sample corresponding to the index.

        Args:
            idx (int): index

        Returns:
            dict: item with following elements.
                "image" in opencv bgr format.
                "K": camera params (Indetity matrix in this case)
                "joints3D": 3D coordinates of joints in AIT format. (z coordinate is 1.0)
        """

        if torch.is_tensor(idx):
            idx = idx.tolist()
        idx_ = self.indices[idx]
        img_name = os.path.join(self.image_dir_path, self.img_names[idx_])
        img = cv2.cvtColor(cv2.imread(img_name), cv2.COLOR_BGR2RGB)
        # mpii follow the same strategy as the freihand for joint naming.
        label = self.labels[self.img_names[idx_].replace(".jpg", "")]
        joints3D = self.joints.freihand_to_ait(
            torch.tensor(label["hand_pts"]).float())
        if label["is_left"] == 1:
            # flipping horizontally to make it right hand
            img = cv2.flip(img, 1)
            # width - x coord
            joints3D[:, 0] = img.shape[1] - joints3D[:, 0]
        camera_param = torch.eye(3).float()
        joints_valid = torch.ones_like(joints3D[..., -1:])
        sample = {
            "image": img,
            "K": camera_param,
            "joints3D": joints3D,
            "joints_valid": joints_valid,
        }

        return sample
Пример #2
0
class F_DB(Dataset):
    """Class to load samples from the Freihand dataset.
    Inherits from the Dataset class in  torch.utils.data.
    Note: The keypoints are mapped to format used at AIT.
    Refer to joint_mapping.json in src/data_loader/utils.
    """

    def __init__(
        self, root_dir: str, split: str, seed: int = 5, train_ratio: float = 0.9
    ):
        """Initializes the freihand dataset class, relevant paths and the Joints
        class for remapping of freihand formatted joints to that of AIT.

        Args:
            root_dir (str): Path to the directory with image samples.
        """
        self.root_dir = root_dir
        self.split = split
        self.seed = seed
        self.train_ratio = train_ratio
        self.labels = self.get_labels()
        self.scale = self.get_scale()
        self.camera_param = self.get_camera_param()
        self.img_names, self.img_path = self.get_image_names()
        self.indices = self.create_train_val_split()
        # To convert from freihand to AIT format.
        self.joints = Joints()

    def create_train_val_split(self) -> np.array:
        """Creates split for train and val data in freihand

        Raises:
            NotImplementedError: In case the split doesn't match test, train or val.

        Returns:
            np.array: array of indices
        """
        num_unique_images = len(self.camera_param)
        train_indices, val_indices = train_test_split(
            np.arange(num_unique_images),
            train_size=self.train_ratio,
            random_state=self.seed,
        )
        if self.split == "train":
            train_indices = np.sort(train_indices)
            train_indices = np.concatenate(
                (
                    train_indices,
                    train_indices + num_unique_images,
                    train_indices + num_unique_images * 2,
                    train_indices + num_unique_images * 3,
                ),
                axis=0,
            )
            return train_indices
        elif self.split == "val":
            val_indices = np.sort(val_indices)
            val_indices = np.concatenate(
                (
                    val_indices,
                    val_indices + num_unique_images,
                    val_indices + num_unique_images * 2,
                    val_indices + num_unique_images * 3,
                ),
                axis=0,
            )
            return val_indices
        elif self.split == "test":
            return np.arange(len(self.camera_param))
        else:
            raise NotImplementedError

    def get_image_names(self) -> Tuple[List[str], str]:
        """Gets the name of all the files in root_dir.
        Make sure there are only image in that directory as it reads all the file names.

        Returns:
            List[str]: List of image names.
            str: base path for image directory
        """
        if self.split in ["train", "val"]:
            img_path = os.path.join(self.root_dir, "training", "rgb")
        else:
            img_path = os.path.join(self.root_dir, "evaluation", "rgb")
        img_names = next(os.walk(img_path))[2]
        img_names.sort()
        return img_names, img_path

    def get_labels(self) -> list:
        """Extacts the labels(joints coordinates) from the label_json at labels_path
        Returns:
            list: List of all the the coordinates(32650).
        """
        if self.split in ["train", "val"]:
            labels_path = os.path.join(self.root_dir, "training_xyz.json")
            return read_json(labels_path)
        else:
            return None

    def get_scale(self) -> list:
        """Extacts the scale from freihand data."""
        if self.split in ["train", "val"]:
            labels_path = os.path.join(self.root_dir, "training_scale.json")
        else:
            labels_path = os.path.join(self.root_dir, "evaluation_scale.json")
        return read_json(labels_path)

    def get_camera_param(self) -> list:
        """Extacts the camera parameters from the camera_param_json at camera_param_path.
        Returns:
            list: List of camera paramters for all images(32650)
        """
        if self.split in ["train", "val"]:
            camera_param_path = os.path.join(self.root_dir, "training_K.json")
        else:
            camera_param_path = os.path.join(self.root_dir, "evaluation_K.json")
        return read_json(camera_param_path)

    def __len__(self):
        return len(self.indices)

    def create_sudo_bound_box(self, scale) -> Tensor:
        max_bound = torch.tensor([224.0, 224.0])
        min_bound = torch.tensor([0.0, 0.0])
        c = (max_bound + min_bound) / 2.0
        s = ((max_bound - min_bound) / 2.0) * scale
        bound_box = torch.tensor(
            [[0, 0, 0]]
            + [[s[0], s[1], 1]] * 5
            + [[-s[0], s[1], 1]] * 5
            + [[s[0], -s[1], 1]] * 5
            + [[-s[0], -s[1], 1]] * 5
        ) + torch.tensor([c[0], c[1], 0])
        return bound_box.float()

    def __getitem__(self, idx: int) -> dict:
        """Returns a sample corresponding to the index.

        Args:
            idx (int): index

        Returns:
            dict: item with following elements.
                "image" in opencv bgr format.
                "K": camera params
                "joints3D": 3D coordinates of joints in AIT format.
        """

        if torch.is_tensor(idx):
            idx = idx.tolist()
        idx_ = self.indices[idx]
        img_name = os.path.join(self.img_path, self.img_names[idx_])
        img = cv2.cvtColor(cv2.imread(img_name),cv2.COLOR_BGR2RGB)
        if self.labels is not None:
            camera_param = torch.tensor(self.camera_param[idx_ % 32560]).float()
            joints3D = self.joints.freihand_to_ait(
                torch.tensor(self.labels[idx_ % 32560]).float()
            )
        else:
            camera_param = torch.tensor(self.camera_param[idx_]).float()
            joints2d_orthogonal = self.create_sudo_bound_box(scale=BOUND_BOX_SCALE)
            joints3D = convert_2_5D_to_3D(
                joints2d_orthogonal, scale=1.0, K=camera_param.clone()
            )
        joints_valid = torch.ones_like(joints3D[..., -1:])
        sample = {
            "image": img,
            "K": camera_param,
            "joints3D": joints3D,
            "joints_valid": joints_valid,
        }
        return sample