Пример #1
0
def convert_mask(path: str, mask_path: str) -> None:
    """Convert the mat format labels of the PASCALContext dataset to masks.

    The file structure of the input path should be like::

            <path>
                <trainval>
                    <image_name>.mat
                    ...

    Arguments:
        path: The root directory of the dataset.
        mask_path: The root directory where to save the masks.

    Raises:
        ModuleImportError: When the module "scipy" or "Pillow" can not be found.

    """
    try:
        from PIL import Image  # pylint: disable=import-outside-toplevel
        from scipy.io import loadmat  # pylint: disable=import-outside-toplevel
    except ModuleNotFoundError as error:
        module_name = error.name
        package_name = "Pillow" if module_name == "PIL" else None
        raise ModuleImportError(module_name=module_name,
                                package_name=package_name) from error

    root_path = os.path.abspath(os.path.expanduser(path))
    root_mask_path = os.path.abspath(os.path.expanduser(mask_path))

    for mat_path in glob(os.path.join(root_path, "trainval", "*.mat")):
        stem = os.path.splitext(os.path.basename(mat_path))[0]
        mat = loadmat(mat_path)
        image = Image.fromarray(mat["LabelMap"])
        image.save(os.path.join(root_mask_path, f"{stem}.png"))
Пример #2
0
def _get_data(stem: str, image_path: str, annotation_path: str) -> Data:
    try:
        import xmltodict  # pylint: disable=import-outside-toplevel
    except ModuleNotFoundError as error:
        raise ModuleImportError(module_name=error.name) from error
    data = Data(os.path.join(image_path, f"{stem}.jpg"))
    box2d = []
    with open(os.path.join(annotation_path, f"{stem}.xml"),
              encoding="utf-8") as fp:
        labels: Any = xmltodict.parse(fp.read())
    objects = labels["annotation"]["object"]

    if not isinstance(objects, list):
        objects = [objects]
    for item in objects:
        category = item["name"]
        attributes = {k: bool(int(v)) for k, v in item["actions"].items()}
        bndbox = item["bndbox"]
        box2d.append(
            LabeledBox2D(
                float(bndbox["xmin"]),
                float(bndbox["ymin"]),
                float(bndbox["xmax"]),
                float(bndbox["ymax"]),
                category=category,
                attributes=attributes,
            ))
    data.label.box2d = box2d
    return data
Пример #3
0
def _get_labels_map(path: str) -> Dict[str, Tuple[str, Dict[str, Any]]]:
    """Get celebrity_image_data from .mat file.

    Arguments:
        path: The root directory of the dataset.

    Raises:
        ModuleImportError: When the module "h5py" can not be found.

    Returns:
        A Dict of attributes.

    """
    try:
        from h5py import File  # pylint: disable=import-outside-toplevel
    except ModuleNotFoundError as error:
        raise ModuleImportError(module_name=error.name) from error

    mat_file = File(path, "r")
    celebrity_image_data = mat_file["celebrityImageData"]
    celebrity_data = mat_file["celebrityData"]

    # Name is a h5r object which can be searched in .mat file.
    id2name_map = {
        identity: _hdf5_to_str(mat_file[name])
        for identity, name in zip(celebrity_data["identity"][0], celebrity_data["name"][0])
    }
    labels_map = {}
    # The "name" is not the name of the celebrity but the name of the image file.
    for name, identity, *values in zip(*(celebrity_image_data[key][0] for key in _MAT_KEYS)):
        attribute = {"name": id2name_map[identity]}
        attribute.update(zip(_MAT_KEYS[2:], values))
        labels_map[_hdf5_to_str(mat_file[name])] = (str(int(identity)).zfill(4), attribute)
    return labels_map
Пример #4
0
def LeedsSportsPose(path: str) -> Dataset:
    """`Leeds Sports Pose <http://sam.johnson.io/research/lsp.html>`_ dataset.

    The folder structure should be like::

        <path>
            joints.mat
            images/
                im0001.jpg
                im0002.jpg
                ...

    Arguments:
        path: The root directory of the dataset.

    Raises:
        ModuleImportError: When the module "scipy" can not be found.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    try:
        from scipy.io import loadmat  # pylint: disable=import-outside-toplevel
    except ModuleNotFoundError as error:
        raise ModuleImportError(module_name=error.name) from error

    root_path = os.path.abspath(os.path.expanduser(path))

    dataset = Dataset(DATASET_NAME)
    dataset.load_catalog(
        os.path.join(os.path.dirname(__file__), "catalog.json"))
    segment = dataset.create_segment()

    mat = loadmat(os.path.join(root_path, "joints.mat"))

    joints = mat["joints"].T
    image_paths = glob(os.path.join(root_path, "images", "*.jpg"))
    for image_path in image_paths:
        data = Data(image_path)
        data.label.keypoints2d = []
        index = int(os.path.basename(image_path)
                    [2:6]) - 1  # get image index from "im0001.jpg"

        keypoints = LabeledKeypoints2D()
        for keypoint in joints[index]:
            keypoints.append(
                Keypoint2D(keypoint[0], keypoint[1], int(not keypoint[2])))

        data.label.keypoints2d.append(keypoints)
        segment.append(data)
    return dataset
Пример #5
0
def FLIC(path: str) -> Dataset:
    """`FLIC <https://bensapp.github.io/flic-dataset.html>`_ dataset.

    The folder structure should be like::

        <path>
            exampls.mat
            images/
                2-fast-2-furious-00003571.jpg
                ...

    Arguments:
        path: The root directory of the dataset.

    Raises:
        ModuleImportError: When the module "scipy" can not be found.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    try:
        from scipy.io import loadmat  # pylint: disable=import-outside-toplevel
    except ModuleNotFoundError as error:
        raise ModuleImportError(module_name=error.name) from error

    root_path = os.path.abspath(os.path.expanduser(path))

    dataset = Dataset(DATASET_NAME)

    annotations = loadmat(os.path.join(root_path,
                                       "examples.mat"))["examples"][0]
    dataset.create_segment("train")
    dataset.create_segment("test")
    dataset.load_catalog(
        os.path.join(os.path.dirname(__file__), "catalog.json"))

    # try whether the dataset has bad segment
    try:
        _ = annotations["isbad"]
        flag = True
        dataset.create_segment("bad")
        dataset.catalog.classification.add_attribute(name="isunchecked",
                                                     type_="boolean")
    except ValueError:
        flag = False

    for data, segment_name in _get_data(root_path, annotations, flag):
        dataset[segment_name].append(data)

    return dataset
Пример #6
0
def _get_data_part1(root_path: str, aniamls: Iterable[str]) -> Iterator[Data]:
    try:
        import xmltodict  # pylint: disable=import-outside-toplevel
    except ModuleNotFoundError as error:
        raise ModuleImportError(module_name=error.name) from error

    for animal in aniamls:
        for image_path in glob(
                os.path.join(root_path, "keypoint_image_part1", animal,
                             "*.jpg")):
            data = Data(
                image_path,
                target_remote_path=f"{animal}/{os.path.basename(image_path)}")

            for annotation_path in glob(
                    os.path.join(
                        root_path,
                        "PASCAL2011_animal_annotation",
                        animal,
                        f"{os.path.splitext(os.path.basename(image_path))[0]}_*.xml",
                    )):

                with open(annotation_path, encoding="utf-8") as fp:
                    labels: Any = xmltodict.parse(fp.read())

                box2d = labels["annotation"]["visible_bounds"]
                data.label.box2d = [
                    LabeledBox2D.from_xywh(
                        x=float(box2d["@xmin"]),
                        y=float(box2d["@ymin"]),
                        width=float(box2d["@width"]),
                        height=float(box2d["@height"]),
                        category=animal,
                    )
                ]

                keypoints2d: List[Tuple[float, float, int]] = [
                    ()
                ] * 20  # type: ignore[list-item]
                for keypoint in labels["annotation"]["keypoints"]["keypoint"]:
                    keypoints2d[_KEYPOINT_TO_INDEX[keypoint["@name"]]] = (
                        float(keypoint["@x"]),
                        float(keypoint["@y"]),
                        int(keypoint["@visible"]),
                    )
                data.label.keypoints2d = [
                    LabeledKeypoints2D(keypoints2d, category=animal)
                ]

            yield data
Пример #7
0
def SVHN(path: str) -> Dataset:
    """`SVHN <http://ufldl.stanford.edu/housenumbers>`_ dataset.

    The file structure should be like::

        <path>
            Cropped/
                extra_32x32.mat
                test_32x32.mat
                train_32x32.mat
            FullNumbers/
                extra/
                    116507.png
                    116508.png
                    ...
                    digitStruct.mat
                    see_bboxes.m
                test/
                train/

    Arguments:
        path: The root directory of the dataset.

    Raises:
        ModuleImportError: When the module "h5py" can not be found.

    Returns:
        Loaded :class: `~tensorbay.dataset.dataset.Dataset` instance.

    """
    try:
        from h5py import File  # pylint: disable=import-outside-toplevel
    except ModuleNotFoundError as error:
        raise ModuleImportError(module_name=error.name) from error

    root_path = os.path.join(os.path.abspath(os.path.expanduser(path)),
                             "FullNumbers")
    dataset = Dataset(DATASET_NAME)
    dataset.load_catalog(
        os.path.join(os.path.dirname(__file__), "catalog.json"))

    for segment_name in _SEGMENTS:
        segment = dataset.create_segment(segment_name)
        file_path = os.path.join(root_path, segment_name)
        mat = File(os.path.join(file_path, "digitStruct.mat"))
        names = mat["digitStruct"]["name"]
        bboxes = mat["digitStruct"]["bbox"]
        for name, bbox in zip(names, bboxes):
            segment.append(_get_data(mat, name, bbox, file_path))
    return dataset
Пример #8
0
def Flower17(path: str) -> Dataset:
    """`17 Category Flower <http://www.robots.ox.ac.uk/~vgg/data/flowers/17/index.html>`_ dataset.

    The dataset are 3 separate splits.
    The results in the paper are averaged over the 3 splits.
    We just use (trn1, val1, tst1) to split it.

    The file structure should be like::

                <path>
                    jpg/
                        image_0001.jpg
                        ...
                    datasplits.mat

    Arguments:
        path: The root directory of the dataset.

    Raises:
        ModuleImportError: When the module "scipy" can not be found.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    try:
        from scipy.io import loadmat  # pylint: disable=import-outside-toplevel
    except ModuleNotFoundError as error:
        raise ModuleImportError(module_name=error.name) from error

    root_path = os.path.abspath(os.path.expanduser(path))
    segment_info = loadmat(os.path.join(root_path, "datasplits.mat"))

    dataset = Dataset(DATASET_NAME_17)
    dataset.load_catalog(os.path.join(os.path.dirname(__file__), "catalog_17.json"))
    index_to_category = dataset.catalog.classification.get_index_to_category()
    for key, value in _SEGMENT_NAMES_17.items():
        segment = dataset.create_segment(key)
        segment_info[value][0].sort()
        for index in segment_info[value][0]:
            data = Data(os.path.join(root_path, "jpg", f"image_{index:04d}.jpg"))

            # There are 80 images for each category
            data.label.classification = Classification(
                category=index_to_category[(index - 1) // 80]
            )
            segment.append(data)

    return dataset
Пример #9
0
def Flower102(path: str) -> Dataset:
    """`102 Category Flower <http://www.robots.ox.ac.uk/~vgg/data/flowers/102/index.html>`_ dataset.

    The file structure should be like::

        <path>
            jpg/
                image_00001.jpg
                ...
            imagelabels.mat
            setid.mat

    Arguments:
        path: The root directory of the dataset.

    Raises:
        ModuleImportError: When the module "scipy" can not be found.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    try:
        from scipy.io import loadmat  # pylint: disable=import-outside-toplevel
    except ModuleNotFoundError as error:
        raise ModuleImportError(module_name=error.name) from error

    root_path = os.path.abspath(os.path.expanduser(path))
    labels = loadmat(os.path.join(root_path, "imagelabels.mat"))["labels"][0]
    segment_info = loadmat(os.path.join(root_path, "setid.mat"))

    dataset = Dataset(DATASET_NAME_102)
    dataset.load_catalog(os.path.join(os.path.dirname(__file__), "catalog_102.json"))
    index_to_category = dataset.catalog.classification.get_index_to_category()
    for key, value in _SEGMENT_NAMES_102.items():
        segment = dataset.create_segment(key)
        segment_info[value][0].sort()
        for index in segment_info[value][0]:
            data = Data(os.path.join(root_path, "jpg", f"image_{index:05d}.jpg"))
            data.label.classification = Classification(
                index_to_category[int(labels[index - 1]) - 1]
            )
            segment.append(data)
    return dataset
Пример #10
0
def _get_data_part2(root_path: str, aniamls: Iterable[str]) -> Iterator[Data]:
    try:
        import xmltodict  # pylint: disable=import-outside-toplevel
    except ModuleNotFoundError as error:
        raise ModuleImportError(module_name=error.name) from error

    for animal in aniamls:
        for image_path in glob(
                os.path.join(root_path, "animalpose_image_part2", animal,
                             "*.jpeg")):
            data = Data(
                image_path,
                target_remote_path=f"{animal}/{os.path.basename(image_path)}")

            annotation_path = os.path.join(
                root_path,
                "animalpose_anno2",
                animal,
                f"{os.path.splitext(os.path.basename(image_path))[0]}.xml",
            )

            with open(annotation_path, encoding="utf-8") as fp:
                labels: Any = xmltodict.parse(fp.read())

            box2d = labels["annotation"]["visible_bounds"]
            data.label.box2d = [
                LabeledBox2D.from_xywh(
                    x=float(box2d["@xmin"]),
                    y=float(
                        box2d["@xmax"]),  # xmax means ymin in the annotation
                    width=float(box2d["@width"]),
                    height=float(box2d["@height"]),
                    category=animal,
                )
            ]

            keypoints2d = LabeledKeypoints2D(category=animal)
            for keypoint in labels["annotation"]["keypoints"]["keypoint"]:
                keypoints2d.append(
                    Keypoint2D(float(keypoint["@x"]), float(keypoint["@y"]),
                               int(keypoint["@visible"])))
            data.label.keypoints2d = [keypoints2d]
            yield data
Пример #11
0
def _load_sensors(calib_path: str) -> Sensors:
    try:
        import yaml  # pylint: disable=import-outside-toplevel
    except ModuleNotFoundError as error:
        raise ModuleImportError(module_name=error.name,
                                package_name="pyyaml") from error

    sensors = Sensors()

    lidar = Lidar("LIDAR")
    lidar.set_extrinsics()
    sensors.add(lidar)

    with open(os.path.join(calib_path, "extrinsics.yaml"),
              "r",
              encoding="utf-8") as fp:
        extrinsics = yaml.load(fp, Loader=yaml.FullLoader)

    for camera_calibration_file in glob(os.path.join(calib_path,
                                                     "[0-9]*.yaml")):
        with open(camera_calibration_file, "r", encoding="utf-8") as fp:
            camera_calibration = yaml.load(fp, Loader=yaml.FullLoader)

        # camera_calibration_file looks like:
        # /path-to-CADC/2018_03_06/calib/00.yaml
        camera_name = f"CAM{os.path.splitext(os.path.basename(camera_calibration_file))[0]}"
        camera = Camera(camera_name)
        camera.description = camera_calibration["camera_name"]

        camera.set_extrinsics(matrix=extrinsics[f"T_LIDAR_{camera_name}"])

        camera_matrix = camera_calibration["camera_matrix"]["data"]
        camera.set_camera_matrix(
            matrix=[camera_matrix[:3], camera_matrix[3:6], camera_matrix[6:9]])

        distortion = camera_calibration["distortion_coefficients"]["data"]
        camera.set_distortion_coefficients(
            **dict(zip(("k1", "k2", "p1", "p2", "k3"), distortion)))

        sensors.add(camera)
    return sensors
Пример #12
0
def BSTLD(path: str) -> Dataset:
    """`BSTLD <https://hci.iwr.uni-heidelberg.de/content\
    /bosch-small-traffic-lights-dataset>`_ dataset.

    The file structure should be like::

        <path>
            rgb/
                additional/
                    2015-10-05-10-52-01_bag/
                        <image_name>.jpg
                        ...
                    ...
                test/
                    <image_name>.jpg
                    ...
                train/
                    2015-05-29-15-29-39_arastradero_traffic_light_loop_bag/
                        <image_name>.jpg
                        ...
                    ...
            test.yaml
            train.yaml
            additional_train.yaml

    Arguments:
        path: The root directory of the dataset.

    Raises:
        ModuleImportError: When the module "yaml" can not be found.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    try:
        import yaml  # pylint: disable=import-outside-toplevel
    except ModuleNotFoundError as error:
        raise ModuleImportError(module_name=error.name,
                                package_name="pyyaml") from error

    root_path = os.path.abspath(os.path.expanduser(path))

    dataset = Dataset(DATASET_NAME)
    dataset.load_catalog(
        os.path.join(os.path.dirname(__file__), "catalog.json"))

    for mode, label_file_name in _LABEL_FILENAME_DICT.items():
        segment = dataset.create_segment(mode)
        label_file_path = os.path.join(root_path, label_file_name)

        with open(label_file_path, encoding="utf-8") as fp:
            labels = yaml.load(fp, yaml.FullLoader)

        for label in labels:
            if mode == "test":
                # the path in test label file looks like:
                # /absolute/path/to/<image_name>.png
                file_path = os.path.join(root_path, "rgb", "test",
                                         label["path"].rsplit("/", 1)[-1])
            else:
                # the path in label file looks like:
                # ./rgb/additional/2015-10-05-10-52-01_bag/<image_name>.png
                file_path = os.path.join(root_path,
                                         *label["path"][2:].split("/"))
            data = Data(file_path)
            data.label.box2d = [
                LabeledBox2D(
                    box["x_min"],
                    box["y_min"],
                    box["x_max"],
                    box["y_max"],
                    category=box["label"],
                    attributes={"occluded": box["occluded"]},
                ) for box in label["boxes"]
            ]
            segment.append(data)

    return dataset
Пример #13
0
 def __getattribute__(self, name: str) -> None:
     raise ModuleImportError(module_name="xmltodict")
Пример #14
0
 def __getattribute__(self, name: str) -> None:
     raise ModuleImportError(module_name="pillow")