示例#1
0
def _get_data_part1(root_path: str, aniamls: Iterable[str]) -> Iterator[Data]:
    try:
        import xmltodict  # pylint: disable=import-outside-toplevel
    except ModuleNotFoundError as error:
        raise ModuleImportError(module_name=error.name) from error

    for animal in aniamls:
        for image_path in glob(
                os.path.join(root_path, "keypoint_image_part1", animal,
                             "*.jpg")):
            data = Data(
                image_path,
                target_remote_path=f"{animal}/{os.path.basename(image_path)}")

            for annotation_path in glob(
                    os.path.join(
                        root_path,
                        "PASCAL2011_animal_annotation",
                        animal,
                        f"{os.path.splitext(os.path.basename(image_path))[0]}_*.xml",
                    )):

                with open(annotation_path, encoding="utf-8") as fp:
                    labels: Any = xmltodict.parse(fp.read())

                box2d = labels["annotation"]["visible_bounds"]
                data.label.box2d = [
                    LabeledBox2D.from_xywh(
                        x=float(box2d["@xmin"]),
                        y=float(box2d["@ymin"]),
                        width=float(box2d["@width"]),
                        height=float(box2d["@height"]),
                        category=animal,
                    )
                ]

                keypoints2d: List[Tuple[float, float, int]] = [
                    ()
                ] * 20  # type: ignore[list-item]
                for keypoint in labels["annotation"]["keypoints"]["keypoint"]:
                    keypoints2d[_KEYPOINT_TO_INDEX[keypoint["@name"]]] = (
                        float(keypoint["@x"]),
                        float(keypoint["@y"]),
                        int(keypoint["@visible"]),
                    )
                data.label.keypoints2d = [
                    LabeledKeypoints2D(keypoints2d, category=animal)
                ]

            yield data
示例#2
0
def CarConnection(path: str) -> Dataset:
    """`Car Connection Picture <https://github.com/nicolas-gervais\
    /predicting-car-price-from-scraped-data/tree/master/picture-scraper>`_ dataset.

    The file structure should be like::

        <path>
            <imagename>.jpg
            ...

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    root_path = os.path.abspath(os.path.expanduser(path))

    dataset = Dataset(DATASET_NAME)
    dataset.load_catalog(
        os.path.join(os.path.dirname(__file__), "catalog.json"))
    segment = dataset.create_segment()

    image_paths = glob(os.path.join(root_path, "*.jpg"))
    keys = dataset.catalog.classification.attributes.keys()

    for image_path in image_paths:
        data = Data(image_path)
        basename = os.path.basename(image_path)
        label = _extract_label_from_basename(keys, basename)
        data.label.classification = label
        segment.append(data)

    return dataset
示例#3
0
def _load_negative_segment(root_path: str) -> Segment:
    segment = Segment("negative")
    for negative_image_path in glob(os.path.join(root_path, "negatives", "negativePics", "*.png")):
        data = Data(negative_image_path)
        data.label.box2d = []
        segment.append(data)
    return segment
示例#4
0
def _load_segment_10k(dataset: Dataset, root_path: str, labels_dir: str) -> None:
    for segment_name in _SEGMENT_NAMES:
        segment = dataset.create_segment(segment_name)
        image_paths = glob(os.path.join(root_path, "images", "10k", segment_name, "*.jpg"))

        print(f"Reading data to segment '{segment_name}'...")
        if segment_name == "test":
            for image_path in image_paths:
                segment.append(Data(image_path))
        else:
            single_channel_mask_dirs: Dict[str, str] = {}
            original_mask_dirs: Dict[str, str] = {}
            for seg_type, dir_names in _SEGMENTATIONS_INFO.items():
                original_mask_dirs[seg_type] = os.path.join(labels_dir, *dir_names, segment_name)
                if seg_type != "sem":
                    single_channel_mask_dir = os.path.join(
                        labels_dir,
                        "single_channel_mask",
                        segment_name,
                        dir_names[0],
                    )
                    single_channel_mask_dirs[seg_type] = single_channel_mask_dir
                    os.makedirs(single_channel_mask_dir, exist_ok=True)

            label_contents = _read_label_file_10k(labels_dir, segment_name)
            for image_path in image_paths:
                segment.append(
                    _get_data_10k(
                        image_path,
                        original_mask_dirs,
                        label_contents[os.path.basename(image_path)],
                        single_channel_mask_dirs,
                    )
                )
            print(f"Finished reading data to segment '{segment_name}'")
示例#5
0
def CACD(path: str) -> Dataset:
    """`Cross-Age Celebrity Dataset (CACD) <https://bcsiriuschen.github.io/CARC/>`_ dataset.

    The file structure should be like::

        <path>
            CACD2000/
                14_Aaron_Johnson_0001.jpg
                ...
            celebrity2000.mat

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    root_path = os.path.abspath(os.path.expanduser(path))
    dataset = Dataset(DATASET_NAME)
    dataset.catalog.classification = _get_subcatalog()
    segment = dataset.create_segment()
    image_files = glob(os.path.join(root_path, "CACD2000", "*.jpg"))
    labels_map = _get_labels_map(os.path.join(root_path, "celebrity2000.mat"))
    for image in image_files:
        category, attribute = labels_map[os.path.basename(image)]
        image_data = Data(image)
        image_data.label.classification = Classification(category, attribute)
        segment.append(image_data)
    return dataset
示例#6
0
def _load_segment_100k(dataset: Dataset, root_path: str, labels_dir: str) -> None:
    for segment_name in _SEGMENT_NAMES:
        segment = dataset.create_segment(segment_name)
        image_paths = glob(os.path.join(root_path, "images", "100k", segment_name, "*.jpg"))

        print(f"Reading data to segment '{segment_name}'...")
        if segment_name == "test":
            for image_path in image_paths:
                segment.append(Data(image_path))
        else:
            label_contents = _read_label_file_100k(labels_dir, segment_name)
            for image_path in image_paths:
                data = Data(image_path)
                box2d: List[LabeledBox2D] = []
                polygon: List[LabeledPolygon] = []
                polyline2d: List[LabeledPolyline2D] = []
                label = data.label
                label_content = label_contents[os.path.basename(image_path)]
                label.classification = Classification(attributes=label_content["attributes"])
                for label_info in label_content["labels"]:
                    if "box2d" in label_info:
                        _add_box2d_label(label_info, box2d)
                    if "poly2d" in label_info:
                        _add_poly2d_label_100k(label_info, polygon, polyline2d)
                label.box2d = box2d
                label.polygon = polygon
                label.polyline2d = polyline2d
                segment.append(data)
        print(f"Finished reading data to segment '{segment_name}'")
def _load_tracking_segment(
    dataset: Dataset,
    images_dir: str,
    labels_dir: str,
    tracking_type: str,
) -> None:
    for segment_prefix in _SEGMENT_NAMES:
        image_subdirs = glob(os.path.join(images_dir, segment_prefix, "*"))
        segment_labels_dir = os.path.join(labels_dir, "polygons",
                                          segment_prefix)
        original_mask_dir = os.path.join(labels_dir, "bitmasks",
                                         segment_prefix)
        mask_dir = os.path.join(labels_dir, "single_channel_masks",
                                segment_prefix)
        os.makedirs(mask_dir, exist_ok=True)

        if segment_prefix == "test":
            generate_data: _DATA_GENERATOR = _generate_test_data
        else:
            generate_data = _generate_data
        for image_subdir in image_subdirs:
            segment = dataset.create_segment(
                f"{segment_prefix}_{os.path.basename(image_subdir)}")
            segment.extend(
                generate_data(
                    image_subdir,
                    segment_labels_dir,
                    original_mask_dir,
                    mask_dir,
                    tracking_type,
                ))
def _get_segment(path: str, segment_name: str) -> Segment:
    segment = Segment(segment_name)
    image_paths = glob(os.path.join(path, segment_name, "*.png"))

    for image_path in image_paths:
        segment.append(Data(image_path))
    return segment
示例#9
0
def convert_mask(path: str, mask_path: str) -> None:
    """Convert the mat format labels of the PASCALContext dataset to masks.

    The file structure of the input path should be like::

            <path>
                <trainval>
                    <image_name>.mat
                    ...

    Arguments:
        path: The root directory of the dataset.
        mask_path: The root directory where to save the masks.

    Raises:
        ModuleImportError: When the module "scipy" or "Pillow" can not be found.

    """
    try:
        from PIL import Image  # pylint: disable=import-outside-toplevel
        from scipy.io import loadmat  # pylint: disable=import-outside-toplevel
    except ModuleNotFoundError as error:
        module_name = error.name
        package_name = "Pillow" if module_name == "PIL" else None
        raise ModuleImportError(module_name=module_name,
                                package_name=package_name) from error

    root_path = os.path.abspath(os.path.expanduser(path))
    root_mask_path = os.path.abspath(os.path.expanduser(mask_path))

    for mat_path in glob(os.path.join(root_path, "trainval", "*.mat")):
        stem = os.path.splitext(os.path.basename(mat_path))[0]
        mat = loadmat(mat_path)
        image = Image.fromarray(mat["LabelMap"])
        image.save(os.path.join(root_mask_path, f"{stem}.png"))
def PASCALContext(mask_path: str, image_path: str) -> Dataset:
    """`PASCALContext <https://cs.stanford.edu/~roozbeh/pascal-context/>`_ dataset.

    The file structure should be like::

        <mask_path>
            <image_name>.png
            ...

        <image_path>
            <image_name>.jpg
            ...

    Arguments:
        mask_path: The root directory of the dataset mask.
        image_path: The root directory of the dataset image.

    Returns:
        Loaded :class: `~tensorbay.dataset.dataset.Dataset` instance.

    """
    root_mask_path = os.path.abspath(os.path.expanduser(mask_path))
    root_image_path = os.path.abspath(os.path.expanduser(image_path))

    dataset = Dataset(DATASET_NAME)
    dataset.load_catalog(os.path.join(os.path.dirname(__file__), "catalog.json"))

    segment = dataset.create_segment("trainval")
    for mask_filename in glob(os.path.join(root_mask_path, "*.png")):
        stem = os.path.splitext(os.path.basename(mask_filename))[0]
        data = Data(os.path.join(root_image_path, f"{stem}.jpg"))
        data.label.semantic_mask = SemanticMask(mask_filename)
        segment.append(data)

    return dataset
示例#11
0
def FSDD(path: str) -> Dataset:
    """`Free Spoken Digit <https://github.com/Jakobovski/free-spoken-digit-dataset>`_ dataset.

    The file structure should be like::

        <path>
            recordings/
                0_george_0.wav
                0_george_1.wav
                ...

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    label_map = {}
    for key, value in _METADATA.items():
        attributes = {"name": key}
        attributes.update(value)
        label_map[key] = attributes

    dataset = Dataset(DATASET_NAME)
    dataset.load_catalog(
        os.path.join(os.path.dirname(__file__), "catalog.json"))
    segment = dataset.create_segment()
    audio_paths = glob(os.path.join(path, "recordings", "*.wav"))
    for audio_path in audio_paths:
        category, name = os.path.basename(audio_path).split("_")[:2]
        data = Data(audio_path)
        data.label.classification = Classification(category, label_map[name])
        segment.append(data)
    return dataset
示例#12
0
def ImageEmotionArtphoto(path: str) -> Dataset:
    """`Image Emotion-art Photo <https://www.imageemotion.org/>`_ dataset.

    The file structure should be like::

        <path>
            <filename>.jpg
            ...

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    root_path = os.path.abspath(os.path.expanduser(path))

    dataset = Dataset(DATASET_NAME_ARTPHOTO)
    dataset.load_catalog(
        os.path.join(os.path.dirname(__file__), "catalog_artphoto.json"))
    segment = dataset.create_segment()

    image_paths = glob(os.path.join(root_path, "*.jpg"))

    for image_path in image_paths:
        image_category = os.path.basename(image_path).split("_", 1)[0]

        data = Data(image_path)
        data.label.classification = Classification(category=image_category)
        segment.append(data)

    return dataset
示例#13
0
def _read_label_file_100k(label_dir: str, segment_name: str) -> Dict[str, Any]:
    source_label_contents = []
    label_filenames = glob(os.path.join(label_dir, "**", f"*_{segment_name}.json"), recursive=True)

    label_prefixes = set(_LABEL_TYPE_INFO_100K)
    for label_filename in label_filenames:
        label_file_basename = os.path.basename(label_filename)
        label_prefix = label_file_basename.replace(f"_{segment_name}.json", "")
        try:
            label_prefixes.remove(label_prefix)
        except KeyError:
            warn_message = f"Invalid label file name '{label_file_basename}'! Ignoring.."
            warn(warn_message)
            continue

        label_description = _LABEL_TYPE_INFO_100K[label_prefix][0]
        print(f"Reading '{label_description}' labels to segment '{segment_name}'...")
        with open(label_filename, "r", encoding="utf-8") as fp:
            source_label_contents.append(json.load(fp))
        print(f"Finished reading '{label_description}' labels to segment '{segment_name}'...")

    for missing_label_prefix in label_prefixes:
        warn_message = (
            f"Missing label file '{missing_label_prefix}_{segment_name}.json'! "
            f"The correspondent '{_LABEL_TYPE_INFO_100K[missing_label_prefix][1]}' "
            f"label will be set to empty!"
        )
        warn(warn_message)

    print(f"Merging '{segment_name}' labels...")
    label_contents = _merge_label(source_label_contents)
    print(f"Finished merging '{segment_name}' labels")
    return label_contents
示例#14
0
def AnimalsWithAttributes2(path: str) -> Dataset:
    """`Animals with attributes 2 <https://cvml.ist.ac.at/AwA2/>`_ dataset.

    The file structure should be like::

        <path>
            classes.txt
            predicates.txt
            predicate-matrix-binary.txt
            JPEGImages/
                <classname>/
                    <imagename>.jpg
                ...
            ...

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    root_path = os.path.abspath(os.path.expanduser(path))

    dataset = Dataset(DATASET_NAME)
    dataset.load_catalog(
        os.path.join(os.path.dirname(__file__), "catalog.json"))
    segment = dataset.create_segment()

    with open(os.path.join(root_path, "classes.txt"), encoding="utf-8") as fp:
        class_names = [line[:-1].split("\t", 1)[-1] for line in fp]

    with open(os.path.join(root_path, "predicates.txt"),
              encoding="utf-8") as fp:
        attribute_keys = [line[:-1].split("\t", 1)[-1] for line in fp]

    with open(os.path.join(root_path, "predicate-matrix-binary.txt"),
              encoding="utf-8") as fp:
        attribute_values = [line[:-1].split(" ") for line in fp]

    attribute_mapping = {}
    for class_name, values in zip(class_names, attribute_values):
        attribute_mapping[class_name] = Classification(
            category=class_name,
            attributes=dict(
                zip(attribute_keys, (bool(int(value)) for value in values))),
        )

    for class_name in sorted(os.listdir(os.path.join(root_path,
                                                     "JPEGImages"))):
        image_paths = glob(
            os.path.join(root_path, "JPEGImages", class_name, "*.jpg"))
        label = attribute_mapping[class_name]
        for image_path in image_paths:
            data = Data(image_path)
            data.label.classification = label
            segment.append(data)

    return dataset
示例#15
0
def DeepRoute(path: str) -> Dataset:
    """`DeepRoute <https://gas.graviti.cn/dataset/graviti-open-dataset\
    /DeepRoute>`_ dataset.

    The file structure should be like::

        <path>
            pointcloud/
                00001.bin
                00002.bin
                ...
                10000.bin
            groundtruth/
                00001.txt
                00002.txt
                ...
                10000.txt

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    root_path = os.path.abspath(os.path.expanduser(path))

    dataset = Dataset(DATASET_NAME)
    dataset.load_catalog(os.path.join(os.path.dirname(__file__), "catalog.json"))
    segment = dataset.create_segment()

    point_cloud_paths = glob(os.path.join(root_path, "pointcloud", "*.bin"))

    for point_cloud_path in point_cloud_paths:
        point_cloud_id = os.path.splitext(os.path.basename(point_cloud_path))[0]
        label_path = os.path.join(root_path, "groundtruth", f"{point_cloud_id}.txt")

        data = Data(point_cloud_path)
        data.label.box3d = []

        with open(label_path, encoding="utf-8") as fp:
            annotations = json.load(fp)["objects"]

        for annotation in annotations:
            bounding_box = annotation["bounding_box"]
            position = annotation["position"]

            label = LabeledBox3D(
                size=(bounding_box["length"], bounding_box["width"], bounding_box["height"]),
                translation=(position["x"], position["y"], position["z"]),
                rotation=from_rotation_vector((0, 0, annotation["heading"])),
                category=annotation["type"],
            )
            data.label.box3d.append(label)

        segment.append(data)

    return dataset
示例#16
0
def KenyanFoodType(path: str) -> Dataset:
    """`Kenyan Food Type <https://github.com/monajalal/Kenyan-Food>`_ dataset.

    The file structure should be like::

        <path>
            test.csv
            test/
                bhaji/
                    1611654056376059197.jpg
                    ...
                chapati/
                    1451497832469337023.jpg
                    ...
                ...
            train/
                bhaji/
                    190393222473009410.jpg
                    ...
                chapati/
                    1310641031297661755.jpg
                    ...
            val/
                bhaji/
                    1615408264598518873.jpg
                    ...
                chapati/
                    1553618479852020228.jpg
                    ...

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    root_path = os.path.abspath(os.path.expanduser(path))
    dataset = Dataset(DATASET_NAME_FOOD_TYPE)
    dataset.load_catalog(
        os.path.join(os.path.dirname(__file__), "catalog_food_type.json"))

    for segment_name in SEGMENTS_FOOD_TYPE:
        segment = dataset.create_segment(segment_name)
        segment_path = os.path.join(root_path, segment_name)
        for category in sorted(os.listdir(segment_path)):
            image_paths = glob(os.path.join(segment_path, category, "*.jpg"))
            label = Classification(category)
            for image_path in image_paths:
                data = Data(image_path)
                data.label.classification = label
                segment.append(data)
    return dataset
示例#17
0
def RarePlanesReal(path: str) -> Dataset:
    """`RarePlanesReal <https://www.cosmiqworks.org/RarePlanes/>`_ dataset.

    The folder structure should be like::

        <path>
            metadata_annotations/
                RarePlanes_Public_Metadata.csv
                RarePlanes_Test_Coco_Annotations_tiled.json
                RarePlanes_Train_Coco_Annotations_tiled.json
            test/
                PS-RGB_tiled/
                    105_104001003108D900_tile_47.png
                    ...
            train/
                PS-RGB_tiled/
                    100_1040010029990A00_tile_319.png
                    ...

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    root_path = os.path.abspath(os.path.expanduser(path))
    dataset = Dataset(DATASET_NAME)
    dataset.load_catalog(
        os.path.join(os.path.dirname(__file__), "catalog.json"))
    catalog = dataset.catalog

    annotations_dir = os.path.join(root_path, "metadata_annotations")
    classification_attributes = _get_classification_attributes(
        os.path.join(annotations_dir, "RarePlanes_Public_Metadata.csv"),
        catalog.classification.attributes.keys(),
    )
    for segment_name in _SEGMENT_NAMES:
        segment = dataset.create_segment(segment_name)
        image_name_to_polygons = _get_polygon_labels(
            annotations_dir, segment_name, catalog.polygon.attributes.keys())
        for image_path in glob(
                os.path.join(root_path, segment_name, "PS-RGB_tiled",
                             "*.png")):
            data = Data(image_path)
            label = data.label
            filename = os.path.basename(image_path)
            image_id = filename.rsplit("_", 2)[0]
            label.polygon = image_name_to_polygons[filename]
            label.classification = Classification(
                attributes=classification_attributes[image_id])
            segment.append(data)
    return dataset
def BioIDFace(path: str) -> Dataset:
    """`BioID Face <https://www.bioid.com/facedb/>`_ dataset.

    The folder structure should be like::

                <path>
                    BioID-FaceDatabase-V1.2/
                        BioID_0000.eye
                        BioID_0000.pgm
                        ...
                    points_20/
                        bioid_0000.pts

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    root_path = os.path.abspath(os.path.expanduser(path))

    dataset = Dataset(DATASET_NAME)
    dataset.load_catalog(
        os.path.join(os.path.dirname(__file__), "catalog.json"))
    segment = dataset.create_segment()

    image_paths = glob(
        os.path.join(root_path, "BioID-FaceDatabase-V1.2", "*.pgm"))
    face_keypoints_paths = glob(os.path.join(root_path, "points_20", "*.pts"))

    for image_path, face_keypoints_path in zip(image_paths,
                                               face_keypoints_paths):
        data = Data(image_path)
        data.label.keypoints2d = _get_label(
            f"{os.path.splitext(image_path)[0]}.eye", face_keypoints_path)

        segment.append(data)

    return dataset
示例#19
0
def LeedsSportsPose(path: str) -> Dataset:
    """`Leeds Sports Pose <http://sam.johnson.io/research/lsp.html>`_ dataset.

    The folder structure should be like::

        <path>
            joints.mat
            images/
                im0001.jpg
                im0002.jpg
                ...

    Arguments:
        path: The root directory of the dataset.

    Raises:
        ModuleImportError: When the module "scipy" can not be found.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    try:
        from scipy.io import loadmat  # pylint: disable=import-outside-toplevel
    except ModuleNotFoundError as error:
        raise ModuleImportError(module_name=error.name) from error

    root_path = os.path.abspath(os.path.expanduser(path))

    dataset = Dataset(DATASET_NAME)
    dataset.load_catalog(
        os.path.join(os.path.dirname(__file__), "catalog.json"))
    segment = dataset.create_segment()

    mat = loadmat(os.path.join(root_path, "joints.mat"))

    joints = mat["joints"].T
    image_paths = glob(os.path.join(root_path, "images", "*.jpg"))
    for image_path in image_paths:
        data = Data(image_path)
        data.label.keypoints2d = []
        index = int(os.path.basename(image_path)
                    [2:6]) - 1  # get image index from "im0001.jpg"

        keypoints = LabeledKeypoints2D()
        for keypoint in joints[index]:
            keypoints.append(
                Keypoint2D(keypoint[0], keypoint[1], int(not keypoint[2])))

        data.label.keypoints2d.append(keypoints)
        segment.append(data)
    return dataset
def NightOwls(path: str) -> Dataset:
    """`NightOwls <http://www.nightowls-dataset.org/>`_ dataset.

    The file structure should be like::

        <path>
            nightowls_test/
                <image_name>.png
                ...
            nightowls_training/
                <image_name>.png
                ...
            nightowls_validation/
                <image_name>.png
                ...
            nightowls_training.json
            nightowls_validation.json

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    root_path = os.path.abspath(os.path.expanduser(path))

    dataset = Dataset(DATASET_NAME)
    dataset.notes.is_continuous = True
    dataset.load_catalog(
        os.path.join(os.path.dirname(__file__), "catalog.json"))

    for mode, (labels_filename,
               labels_handler) in _LABELS_HANDEL_METHODS.items():
        segment = dataset.create_segment(mode)

        image_paths = glob(
            os.path.join(root_path, f"nightowls_{mode}", "*.png"))

        labels = _load_labels(root_path, labels_filename)

        for image_path in image_paths:
            data = labels_handler(image_path, labels)  # pylint: disable=not-callable
            segment.append(data)

    return dataset
示例#21
0
def JHU_CROWD(path: str) -> Dataset:
    """`JHU-CROWD++ <http://www.crowd-counting.com/>`_ dataset.

    The file structure should be like::

        <path>
            train/
                images/
                    0000.jpg
                    ...
                gt/
                    0000.txt
                    ...
                image_labels.txt
            test/
            val/

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    dataset = Dataset(DATASET_NAME)
    dataset.load_catalog(
        os.path.join(os.path.dirname(__file__), "catalog.json"))
    for segment_name in SEGMENT_LIST:
        segment = dataset.create_segment(segment_name)
        segment_path = os.path.join(path, segment_name)
        image_root_path = os.path.join(segment_path, "images")
        image_paths = glob(os.path.join(image_root_path, "*.jpg"))

        image_labels = _load_image_labels(
            os.path.join(segment_path, "image_labels.txt"))
        for image_path in image_paths:
            data = Data(image_path)
            image_file = os.path.basename(image_path)
            label_file = image_file.replace("jpg", "txt")
            data.label.box2d = _load_box_labels(
                os.path.join(segment_path, "gt", label_file))
            data.label.classification = image_labels[os.path.splitext(
                image_file)[0]]
            segment.append(data)
    return dataset
示例#22
0
def _load_positive_segment(segment_name: str, segment_path: str) -> Segment:
    if segment_name.startswith("vid"):
        # Pad zero for segment name to change "vid0" to "vid00"
        segment_name = f"{segment_name[:3]}{int(segment_name[3:]):02}"
    segment = Segment(segment_name)
    annotation_file = glob(
        os.path.join(segment_path, "frameAnnotations-*",
                     "frameAnnotations.csv"))[0]
    image_folder = os.path.dirname(annotation_file)
    pre_filename = ""
    with open(annotation_file, "r", encoding="utf-8") as fp:
        for annotation in csv.DictReader(fp, delimiter=";"):
            filename = annotation["Filename"]

            if filename != pre_filename:
                data = Data(os.path.join(image_folder, filename))
                data.label.box2d = []
                segment.append(data)
                pre_filename = filename

            occluded, on_another_road = annotation[
                "Occluded,On another road"].split(",", 1)
            data.label.box2d.append(
                LabeledBox2D(
                    int(annotation["Upper left corner X"]),
                    int(annotation["Upper left corner Y"]),
                    int(annotation["Lower right corner X"]),
                    int(annotation["Lower right corner Y"]),
                    category=annotation["Annotation tag"],
                    attributes={
                        "Occluded":
                        bool(int(occluded)),
                        "On another road":
                        bool(int(on_another_road)),
                        "Origin file":
                        annotation["Origin file"],
                        "Origin frame number":
                        int(annotation["Origin frame number"]),
                        "Origin track":
                        annotation["Origin track"],
                        "Origin track frame number":
                        int(annotation["Origin track frame number"]),
                    },
                ))
    return segment
示例#23
0
def HeadPoseImage(path: str) -> Dataset:
    """`Head Pose Image <http://crowley-coutaz.fr\
    /Head%20Pose%20Image%20Database.html>`_ dataset.

    The file structure should be like::

        <path>
            Person01/
                person01100-90+0.jpg
                person01100-90+0.txt
                person01101-60-90.jpg
                person01101-60-90.txt
                ...
            Person02/
            Person03/
            ...
            Person15/

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    dataset = Dataset(DATASET_NAME)
    dataset.load_catalog(
        os.path.join(os.path.dirname(__file__), "catalog.json"))
    segment = dataset.create_segment()
    image_paths = glob(os.path.join(path, "Person*", "*.jpg"))

    for image_path in image_paths:
        image_name = os.path.basename(image_path)
        data = Data(image_path)
        data.label.box2d = [
            LabeledBox2D(
                *_load_label_box(image_path.replace("jpg", "txt")),
                category=image_name[6:8],
                attributes=_load_attributes(image_name),
            )
        ]
        segment.append(data)
    return dataset
示例#24
0
def _get_data_part2(root_path: str, aniamls: Iterable[str]) -> Iterator[Data]:
    try:
        import xmltodict  # pylint: disable=import-outside-toplevel
    except ModuleNotFoundError as error:
        raise ModuleImportError(module_name=error.name) from error

    for animal in aniamls:
        for image_path in glob(
                os.path.join(root_path, "animalpose_image_part2", animal,
                             "*.jpeg")):
            data = Data(
                image_path,
                target_remote_path=f"{animal}/{os.path.basename(image_path)}")

            annotation_path = os.path.join(
                root_path,
                "animalpose_anno2",
                animal,
                f"{os.path.splitext(os.path.basename(image_path))[0]}.xml",
            )

            with open(annotation_path, encoding="utf-8") as fp:
                labels: Any = xmltodict.parse(fp.read())

            box2d = labels["annotation"]["visible_bounds"]
            data.label.box2d = [
                LabeledBox2D.from_xywh(
                    x=float(box2d["@xmin"]),
                    y=float(
                        box2d["@xmax"]),  # xmax means ymin in the annotation
                    width=float(box2d["@width"]),
                    height=float(box2d["@height"]),
                    category=animal,
                )
            ]

            keypoints2d = LabeledKeypoints2D(category=animal)
            for keypoint in labels["annotation"]["keypoints"]["keypoint"]:
                keypoints2d.append(
                    Keypoint2D(float(keypoint["@x"]), float(keypoint["@y"]),
                               int(keypoint["@visible"])))
            data.label.keypoints2d = [keypoints2d]
            yield data
示例#25
0
def KylbergTexture(path: str) -> Dataset:
    """`Kylberg Texture <http://www.cb.uu.se/~gustaf/texture/>`_ dataset.

    The file structure should be like::

        <path>
            originalPNG/
                <imagename>.png
                ...
            withoutRotateAll/
                <imagename>.png
                ...
            RotateAll/
                <imagename>.png
                ...

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    root_path = os.path.abspath(os.path.expanduser(path))

    dataset = Dataset(DATASET_NAME)
    dataset.load_catalog(
        os.path.join(os.path.dirname(__file__), "catalog.json"))

    for segment_name, label_getter in _LABEL_GETTERS.items():
        image_paths = glob(os.path.join(root_path, segment_name, "*.png"))

        segment = dataset.create_segment(segment_name)

        for image_path in image_paths:
            data = Data(image_path)
            stem = os.path.splitext(os.path.basename(image_path))[0]
            data.label.classification = label_getter(stem)
            segment.append(data)

    return dataset
示例#26
0
def _load_sensors(calib_path: str) -> Sensors:
    try:
        import yaml  # pylint: disable=import-outside-toplevel
    except ModuleNotFoundError as error:
        raise ModuleImportError(module_name=error.name,
                                package_name="pyyaml") from error

    sensors = Sensors()

    lidar = Lidar("LIDAR")
    lidar.set_extrinsics()
    sensors.add(lidar)

    with open(os.path.join(calib_path, "extrinsics.yaml"),
              "r",
              encoding="utf-8") as fp:
        extrinsics = yaml.load(fp, Loader=yaml.FullLoader)

    for camera_calibration_file in glob(os.path.join(calib_path,
                                                     "[0-9]*.yaml")):
        with open(camera_calibration_file, "r", encoding="utf-8") as fp:
            camera_calibration = yaml.load(fp, Loader=yaml.FullLoader)

        # camera_calibration_file looks like:
        # /path-to-CADC/2018_03_06/calib/00.yaml
        camera_name = f"CAM{os.path.splitext(os.path.basename(camera_calibration_file))[0]}"
        camera = Camera(camera_name)
        camera.description = camera_calibration["camera_name"]

        camera.set_extrinsics(matrix=extrinsics[f"T_LIDAR_{camera_name}"])

        camera_matrix = camera_calibration["camera_matrix"]["data"]
        camera.set_camera_matrix(
            matrix=[camera_matrix[:3], camera_matrix[3:6], camera_matrix[6:9]])

        distortion = camera_calibration["distortion_coefficients"]["data"]
        camera.set_distortion_coefficients(
            **dict(zip(("k1", "k2", "p1", "p2", "k3"), distortion)))

        sensors.add(camera)
    return sensors
示例#27
0
def CoinImage(path: str) -> Dataset:
    """`Coin Image <https://cvl.tuwien.ac.at/research/cvl-databases/coin-image-dataset/>`_ dataset.

    The file structure should be like::

        <path>
            classes.csv
            <imagename>.png
            ...

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    root_path = os.path.abspath(os.path.expanduser(path))

    dataset = Dataset(DATASET_NAME)
    dataset.load_catalog(
        os.path.join(os.path.dirname(__file__), "catalog.json"))
    segment = dataset.create_segment()

    csv_path = os.path.join(root_path, "classes.csv")
    with open(csv_path, "r", encoding="utf-8") as fp:
        reader = csv.reader(fp, delimiter=";")
        mapping: Dict[str, str] = dict(
            row for row in reader)  # type: ignore[arg-type, misc]

    image_paths = glob(os.path.join(root_path, "*.png"))

    for image_path in image_paths:
        data = Data(image_path)
        filename = os.path.basename(image_path)
        class_id = filename[5:].split("_", 1)[0]
        data.label.classification = Classification(category=mapping[class_id])
        segment.append(data)

    return dataset
示例#28
0
def TLR(path: str) -> Dataset:
    """`TLR <http://www.lara.prd.fr/benchmarks/trafficlightsrecognition>`_ dataset.

    The file structure should like::

        <path>
            root_path/
                Lara3D_URbanSeq1_JPG/
                    frame_011149.jpg
                    frame_011150.jpg
                    frame_<frame_index>.jpg
                    ...
                Lara_UrbanSeq1_GroundTruth_cvml.xml

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    root_path = os.path.abspath(os.path.expanduser(path))

    dataset = Dataset(DATASET_NAME)
    dataset.load_catalog(
        os.path.join(os.path.dirname(__file__), "catalog.json"))
    segment = dataset.create_segment()

    file_paths = glob(os.path.join(root_path, "Lara3D_UrbanSeq1_JPG", "*.jpg"))
    labels = _parse_xml(
        os.path.join(root_path, "Lara_UrbanSeq1_GroundTruth_cvml.xml"))
    for file_path in file_paths:
        # the image file name looks like:
        # frame_000001.jpg
        frame_index = int(os.path.basename(file_path)[6:-4])
        data = Data(file_path)
        data.label.box2d = labels[frame_index]
        segment.append(data)
    return dataset
示例#29
0
def DogsVsCats(path: str) -> Dataset:
    """`Dogs vs Cats <https://www.kaggle.com/c/dogs-vs-cats>`_ dataset.

    The file structure should be like::

        <path>
            train/
                cat.0.jpg
                ...
                dog.0.jpg
                ...
            test/
                1000.jpg
                1001.jpg
                ...

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    root_path = os.path.abspath(os.path.expanduser(path))
    dataset = Dataset(DATASET_NAME)
    dataset.load_catalog(
        os.path.join(os.path.dirname(__file__), "catalog.json"))

    for segment_name, is_labeled in _SEGMENTS.items():
        segment = dataset.create_segment(segment_name)
        image_paths = glob(os.path.join(root_path, segment_name, "*.jpg"))
        for image_path in image_paths:
            data = Data(image_path)
            if is_labeled:
                data.label.classification = Classification(
                    os.path.basename(image_path)[:3])
            segment.append(data)

    return dataset
示例#30
0
def RarePlanesSynthetic(path: str) -> Dataset:
    """`RarePlanesSynthetic <https://www.cosmiqworks.org/RarePlanes/>`_ dataset.

    The file structure of RarePlanesSynthetic looks like::

        <path>
            images/
                Atlanta_Airport_0_0_101_1837.png
                ...
            masks/
                Atlanta_Airport_0_0_101_1837_mask.png
                ...
            xmls/
                Atlanta_Airport_0_0_101_1837.xml
                ...

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    root_path = os.path.abspath(os.path.expanduser(path))
    dataset = Dataset(DATASET_NAME)
    dataset.load_catalog(
        os.path.join(os.path.dirname(__file__), "catalog.json"))
    category_ids = dataset.catalog.panoptic_mask.get_category_to_index()
    segment = dataset.create_segment()
    original_mask_dir = os.path.join(root_path, "masks")
    new_mask_dir = os.path.join(root_path, "new_masks")
    os.makedirs(new_mask_dir, exist_ok=True)
    annotation_dir = os.path.join(root_path, "xmls")
    for image_path in glob(os.path.join(root_path, "images", "*.png")):
        segment.append(
            _get_data(image_path, original_mask_dir, annotation_dir,
                      new_mask_dir, category_ids))
    return dataset