def test_eq(self):
        classification1 = Classification("cat", {"gender": "male"})
        classification2 = Classification("cat", {"gender": "male"})
        classification3 = Classification("dog", {"gender": "male"})

        assert classification1 == classification2
        assert classification1 != classification3
    def test_dumps(self):
        classification = Classification(category="cat",
                                        attributes={"gender": "male"})

        assert classification.dumps() == {
            "category": "cat",
            "attributes": {
                "gender": "male"
            }
        }
    def test_eq(self):
        label1 = Label()
        label1.classification = Classification("cat", {"color": "white"})

        label2 = Label()
        label2.classification = Classification("cat", {"color": "white"})

        label3 = Label()
        label3.classification = Classification("cat", {"color": "black"})

        assert label1 == label2
        assert label1 != label3
Пример #4
0
def _load_segment_100k(dataset: Dataset, root_path: str, labels_dir: str) -> None:
    for segment_name in _SEGMENT_NAMES:
        segment = dataset.create_segment(segment_name)
        image_paths = glob(os.path.join(root_path, "images", "100k", segment_name, "*.jpg"))

        print(f"Reading data to segment '{segment_name}'...")
        if segment_name == "test":
            for image_path in image_paths:
                segment.append(Data(image_path))
        else:
            label_contents = _read_label_file_100k(labels_dir, segment_name)
            for image_path in image_paths:
                data = Data(image_path)
                box2d: List[LabeledBox2D] = []
                polygon: List[LabeledPolygon] = []
                polyline2d: List[LabeledPolyline2D] = []
                label = data.label
                label_content = label_contents[os.path.basename(image_path)]
                label.classification = Classification(attributes=label_content["attributes"])
                for label_info in label_content["labels"]:
                    if "box2d" in label_info:
                        _add_box2d_label(label_info, box2d)
                    if "poly2d" in label_info:
                        _add_poly2d_label_100k(label_info, polygon, polyline2d)
                label.box2d = box2d
                label.polygon = polygon
                label.polyline2d = polyline2d
                segment.append(data)
        print(f"Finished reading data to segment '{segment_name}'")
    def test_import_cloud_files_to_fusiondataset(self, accesskey, url, config_name):
        gas_client = GAS(access_key=accesskey, url=url)
        try:
            cloud_client = gas_client.get_cloud_client(config_name)
        except ResourceNotExistError:
            pytest.skip(f"skip this case because there's no {config_name} config")

        auth_data = cloud_client.list_auth_data("tests")[:5]
        dataset_name = get_dataset_name()
        dataset_client = gas_client.create_dataset(dataset_name, True, config_name=config_name)

        dataset = FusionDataset(name=dataset_name)
        segment = dataset.create_segment("Segment1")
        lidar = Lidar("LIDAR")
        segment.sensors.add(lidar)

        for data in auth_data:
            data.label.classification = Classification("cat", attributes={"color": "red"})
            frame = Frame()
            frame["LIDAR"] = data
            segment.append(frame)

        dataset_client = gas_client.upload_dataset(dataset, jobs=5)
        dataset_client.commit("import data")

        segment1 = FusionSegment("Segment1", client=dataset_client)
        assert len(segment1) == len(segment)
        assert segment1[0]["LIDAR"].path == segment[0]["LIDAR"].path.split("/")[-1]
        assert segment1[0]["LIDAR"].label.classification.category == "cat"
        assert segment1[0]["LIDAR"].label.classification.attributes["color"] == "red"
        assert len(auth_data) == len(segment)

        gas_client.delete_dataset(dataset_name)
Пример #6
0
def CACD(path: str) -> Dataset:
    """`Cross-Age Celebrity Dataset (CACD) <https://bcsiriuschen.github.io/CARC/>`_ dataset.

    The file structure should be like::

        <path>
            CACD2000/
                14_Aaron_Johnson_0001.jpg
                ...
            celebrity2000.mat

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    root_path = os.path.abspath(os.path.expanduser(path))
    dataset = Dataset(DATASET_NAME)
    dataset.catalog.classification = _get_subcatalog()
    segment = dataset.create_segment()
    image_files = glob(os.path.join(root_path, "CACD2000", "*.jpg"))
    labels_map = _get_labels_map(os.path.join(root_path, "celebrity2000.mat"))
    for image in image_files:
        category, attribute = labels_map[os.path.basename(image)]
        image_data = Data(image)
        image_data.label.classification = Classification(category, attribute)
        segment.append(image_data)
    return dataset
Пример #7
0
def ImageEmotionArtphoto(path: str) -> Dataset:
    """`Image Emotion-art Photo <https://www.imageemotion.org/>`_ dataset.

    The file structure should be like::

        <path>
            <filename>.jpg
            ...

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    root_path = os.path.abspath(os.path.expanduser(path))

    dataset = Dataset(DATASET_NAME_ARTPHOTO)
    dataset.load_catalog(
        os.path.join(os.path.dirname(__file__), "catalog_artphoto.json"))
    segment = dataset.create_segment()

    image_paths = glob(os.path.join(root_path, "*.jpg"))

    for image_path in image_paths:
        image_category = os.path.basename(image_path).split("_", 1)[0]

        data = Data(image_path)
        data.label.classification = Classification(category=image_category)
        segment.append(data)

    return dataset
Пример #8
0
def FSDD(path: str) -> Dataset:
    """`Free Spoken Digit <https://github.com/Jakobovski/free-spoken-digit-dataset>`_ dataset.

    The file structure should be like::

        <path>
            recordings/
                0_george_0.wav
                0_george_1.wav
                ...

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    label_map = {}
    for key, value in _METADATA.items():
        attributes = {"name": key}
        attributes.update(value)
        label_map[key] = attributes

    dataset = Dataset(DATASET_NAME)
    dataset.load_catalog(
        os.path.join(os.path.dirname(__file__), "catalog.json"))
    segment = dataset.create_segment()
    audio_paths = glob(os.path.join(path, "recordings", "*.wav"))
    for audio_path in audio_paths:
        category, name = os.path.basename(audio_path).split("_")[:2]
        data = Data(audio_path)
        data.label.classification = Classification(category, label_map[name])
        segment.append(data)
    return dataset
Пример #9
0
def COVID_CT(path: str) -> Dataset:
    """`COVID-CT <https://github.com/UCSD-AI4H/COVID-CT>`_ dataset.

    The file structure should be like::

        <path>
            Data-split/
                COVID/
                    testCT_COVID.txt
                    trainCT_COVID.txt
                    valCT_COVID.txt
                NonCOVID/
                    testCT_NonCOVID.txt
                    trainCT_NonCOVID.txt
                    valCT_NonCOVID.txt
            Images-processed/
                CT_COVID/
                    ...
                    2020.01.24.919183-p27-132.png
                    2020.01.24.919183-p27-133.png
                    ...
                    PIIS0140673620303603%8.png
                    ...
                CT_NonCOVID/
                    0.jpg
                    1%0.jog
                    ...
                    91%1.jpg
                    102.png
                    ...
                    2341.png

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.
    """
    root_path = os.path.abspath(os.path.expanduser(path))
    dataset = Dataset(DATASET_NAME)
    dataset.load_catalog(
        os.path.join(os.path.dirname(__file__), "catalog.json"))
    data_split_path = os.path.join(root_path, "Data-split")
    images_processed_path = os.path.join(root_path, "Images-processed")

    for segment_name, (split_filename, image_dir,
                       category) in _SEGMENT_TO_PATH.items():
        segment = dataset.create_segment(segment_name)
        image_dir = os.path.join(images_processed_path, image_dir)
        with open(os.path.join(data_split_path, category, split_filename),
                  "r",
                  encoding="utf-8") as fp:
            for line in fp:
                image_path = os.path.join(image_dir, line.strip("\n"))
                data = Data(image_path)
                data.label.classification = Classification(category)
                segment.append(data)

    return dataset
Пример #10
0
def AnimalsWithAttributes2(path: str) -> Dataset:
    """`Animals with attributes 2 <https://cvml.ist.ac.at/AwA2/>`_ dataset.

    The file structure should be like::

        <path>
            classes.txt
            predicates.txt
            predicate-matrix-binary.txt
            JPEGImages/
                <classname>/
                    <imagename>.jpg
                ...
            ...

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    root_path = os.path.abspath(os.path.expanduser(path))

    dataset = Dataset(DATASET_NAME)
    dataset.load_catalog(
        os.path.join(os.path.dirname(__file__), "catalog.json"))
    segment = dataset.create_segment()

    with open(os.path.join(root_path, "classes.txt"), encoding="utf-8") as fp:
        class_names = [line[:-1].split("\t", 1)[-1] for line in fp]

    with open(os.path.join(root_path, "predicates.txt"),
              encoding="utf-8") as fp:
        attribute_keys = [line[:-1].split("\t", 1)[-1] for line in fp]

    with open(os.path.join(root_path, "predicate-matrix-binary.txt"),
              encoding="utf-8") as fp:
        attribute_values = [line[:-1].split(" ") for line in fp]

    attribute_mapping = {}
    for class_name, values in zip(class_names, attribute_values):
        attribute_mapping[class_name] = Classification(
            category=class_name,
            attributes=dict(
                zip(attribute_keys, (bool(int(value)) for value in values))),
        )

    for class_name in sorted(os.listdir(os.path.join(root_path,
                                                     "JPEGImages"))):
        image_paths = glob(
            os.path.join(root_path, "JPEGImages", class_name, "*.jpg"))
        label = attribute_mapping[class_name]
        for image_path in image_paths:
            data = Data(image_path)
            data.label.classification = label
            segment.append(data)

    return dataset
Пример #11
0
def _extract_label_from_basename(keys: Tuple[str, ...],
                                 filename: str) -> Classification:
    make, model, *spec_values = filename.split("_")[:-1]

    attributes = dict(zip(keys, map(_transfer_attribute_type, spec_values)))

    category = ".".join((make, model))

    return Classification(attributes=attributes, category=category)
Пример #12
0
def _get_classification(
        classification_labels: List[Dict[str, str]]) -> Classification:
    attributes: Dict[str, Union[int, float, str]] = {}
    for indices, attribute_getter in _ATTRIBUTES_GETTER.items():
        for index in indices:
            classification_label = classification_labels[index]
            attributes[classification_label["@name"]] = attribute_getter(
                classification_label["@value"])
    return Classification(attributes=attributes)
Пример #13
0
def OxfordIIITPet(path: str) -> Dataset:
    """`OxfordIIITPet <https://www.robots.ox.ac.uk/~vgg/data/pets/>`_ dataset.

    The file structure should be like::

        <path>
            annotations/
                trimaps/
                    Bombay_113.png
                    Bombay_114.png
                    ...
                xmls/
                    Birman_174.xml
                    Birman_175.xml
                    ...
                list.txt
                test.txt
                trainval.txt
                README
            images/
                Bombay_117.jpg
                Bombay_118.jpg
                ...

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class: `~tensorbay.dataset.dataset.Dataset` instance.

    """
    root_path = os.path.abspath(os.path.expanduser(path))
    dataset = Dataset(DATASET_NAME)
    dataset.load_catalog(
        os.path.join(os.path.dirname(__file__), "catalog.json"))
    trainval_segment = dataset.create_segment("trainval")
    test_segment = dataset.create_segment("test")
    annotation_path = os.path.join(root_path, "annotations")
    for image_path in glob(os.path.join(root_path, "images", "*.jpg")):
        image_name = os.path.splitext(os.path.basename(image_path))[0]
        name = "Cat" if image_name.istitle() else "Dog"
        category, num = image_name.rsplit("_", 1)

        data = Data(image_path,
                    target_remote_path=f"{category}_{num.zfill(3)}.jpg")
        label = data.label
        label.classification = Classification(category=f"{name}.{category}")
        label.semantic_mask = SemanticMask(
            os.path.join(annotation_path, "trimaps", f"{image_name}.png"))
        xml_path = os.path.join(annotation_path, "xmls", f"{image_name}.xml")
        if os.path.exists(xml_path):
            label.box2d = _get_box_label(xml_path)
            trainval_segment.append(data)
        else:
            test_segment.append(data)
    return dataset
Пример #14
0
def RarePlanesReal(path: str) -> Dataset:
    """`RarePlanesReal <https://www.cosmiqworks.org/RarePlanes/>`_ dataset.

    The folder structure should be like::

        <path>
            metadata_annotations/
                RarePlanes_Public_Metadata.csv
                RarePlanes_Test_Coco_Annotations_tiled.json
                RarePlanes_Train_Coco_Annotations_tiled.json
            test/
                PS-RGB_tiled/
                    105_104001003108D900_tile_47.png
                    ...
            train/
                PS-RGB_tiled/
                    100_1040010029990A00_tile_319.png
                    ...

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    root_path = os.path.abspath(os.path.expanduser(path))
    dataset = Dataset(DATASET_NAME)
    dataset.load_catalog(
        os.path.join(os.path.dirname(__file__), "catalog.json"))
    catalog = dataset.catalog

    annotations_dir = os.path.join(root_path, "metadata_annotations")
    classification_attributes = _get_classification_attributes(
        os.path.join(annotations_dir, "RarePlanes_Public_Metadata.csv"),
        catalog.classification.attributes.keys(),
    )
    for segment_name in _SEGMENT_NAMES:
        segment = dataset.create_segment(segment_name)
        image_name_to_polygons = _get_polygon_labels(
            annotations_dir, segment_name, catalog.polygon.attributes.keys())
        for image_path in glob(
                os.path.join(root_path, segment_name, "PS-RGB_tiled",
                             "*.png")):
            data = Data(image_path)
            label = data.label
            filename = os.path.basename(image_path)
            image_id = filename.rsplit("_", 2)[0]
            label.polygon = image_name_to_polygons[filename]
            label.classification = Classification(
                attributes=classification_attributes[image_id])
            segment.append(data)
    return dataset
Пример #15
0
def KenyanFoodType(path: str) -> Dataset:
    """`Kenyan Food Type <https://github.com/monajalal/Kenyan-Food>`_ dataset.

    The file structure should be like::

        <path>
            test.csv
            test/
                bhaji/
                    1611654056376059197.jpg
                    ...
                chapati/
                    1451497832469337023.jpg
                    ...
                ...
            train/
                bhaji/
                    190393222473009410.jpg
                    ...
                chapati/
                    1310641031297661755.jpg
                    ...
            val/
                bhaji/
                    1615408264598518873.jpg
                    ...
                chapati/
                    1553618479852020228.jpg
                    ...

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    root_path = os.path.abspath(os.path.expanduser(path))
    dataset = Dataset(DATASET_NAME_FOOD_TYPE)
    dataset.load_catalog(
        os.path.join(os.path.dirname(__file__), "catalog_food_type.json"))

    for segment_name in SEGMENTS_FOOD_TYPE:
        segment = dataset.create_segment(segment_name)
        segment_path = os.path.join(root_path, segment_name)
        for category in sorted(os.listdir(segment_path)):
            image_paths = glob(os.path.join(segment_path, category, "*.jpg"))
            label = Classification(category)
            for image_path in image_paths:
                data = Data(image_path)
                data.label.classification = label
                segment.append(data)
    return dataset
Пример #16
0
def _extract_classification(
        path: str,
        classification_catalog: ClassificationSubcatalog) -> Classification:
    with open(path, encoding="utf-8") as fp:
        attribute_names = classification_catalog.attributes.keys()
        csv_reader = csv.reader(fp)
        elements = next(csv_reader)
        attributes = {
            attribute_name: bool(int(value))
            for attribute_name, value in zip(attribute_names, elements)
        }

    return Classification(attributes=attributes)
Пример #17
0
def _load_image_labels(file_path: str) -> Dict[str, Classification]:
    with open(file_path, encoding="utf-8") as fp:
        image_labels = {}
        for line in fp:
            img_index, count, scene, weather, distractor = line.strip().split(",")
            attributes = {
                "total-count": int(count),
                "scene-type": scene,
                "weather-condition": _WEATHER_CONDITION_MAP[int(weather)],
                "distractor": bool(int(distractor)),
            }
            image_labels[img_index] = Classification(attributes=attributes)
    return image_labels
Пример #18
0
def COVIDChestXRay(path: str) -> Dataset:
    """`COVID-chestxray <https://github.com/ieee8023/covid-chestxray-dataset>`_ dataset.

    The file structure should be like::

        <path>
            images/
                0a7faa2a.jpg
                000001-2.png
                000001-3.jpg
                1B734A89-A1BF-49A8-A1D3-66FAFA4FAC5D.jpeg
                ...
            volumes/
                coronacases_org_001.nii.gz
                ....
            metadata.csv
            ...

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    root_path = os.path.abspath(os.path.expanduser(path))

    dataset = Dataset(DATASET_NAME)
    dataset.load_catalog(
        os.path.join(os.path.dirname(__file__), "catalog.json"))

    segment = dataset.create_segment()

    csv_path = os.path.join(root_path, "metadata.csv")

    with open(csv_path, encoding="utf-8") as fp:
        csv_reader = csv.DictReader(fp)
        for attributes in csv_reader:
            folder = attributes.pop("folder")
            # The 20 images invovled in "volumes" folder currently are invalid to download.
            if folder == "volumes":
                continue
            image_path = os.path.join(root_path, folder,
                                      attributes.pop("filename"))
            category = attributes.pop("finding").strip()
            data = Data(image_path)
            data.label.classification = Classification(
                category=category, attributes=_convert_type(attributes))
            segment.append(data)
    return dataset
Пример #19
0
def Flower17(path: str) -> Dataset:
    """`17 Category Flower <http://www.robots.ox.ac.uk/~vgg/data/flowers/17/index.html>`_ dataset.

    The dataset are 3 separate splits.
    The results in the paper are averaged over the 3 splits.
    We just use (trn1, val1, tst1) to split it.

    The file structure should be like::

                <path>
                    jpg/
                        image_0001.jpg
                        ...
                    datasplits.mat

    Arguments:
        path: The root directory of the dataset.

    Raises:
        ModuleImportError: When the module "scipy" can not be found.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    try:
        from scipy.io import loadmat  # pylint: disable=import-outside-toplevel
    except ModuleNotFoundError as error:
        raise ModuleImportError(module_name=error.name) from error

    root_path = os.path.abspath(os.path.expanduser(path))
    segment_info = loadmat(os.path.join(root_path, "datasplits.mat"))

    dataset = Dataset(DATASET_NAME_17)
    dataset.load_catalog(os.path.join(os.path.dirname(__file__), "catalog_17.json"))
    index_to_category = dataset.catalog.classification.get_index_to_category()
    for key, value in _SEGMENT_NAMES_17.items():
        segment = dataset.create_segment(key)
        segment_info[value][0].sort()
        for index in segment_info[value][0]:
            data = Data(os.path.join(root_path, "jpg", f"image_{index:04d}.jpg"))

            # There are 80 images for each category
            data.label.classification = Classification(
                category=index_to_category[(index - 1) // 80]
            )
            segment.append(data)

    return dataset
Пример #20
0
def RP2K(path: str) -> Dataset:
    """`RP2K <https://www.pinlandata.com/rp2k_dataset>`_ dataset.

    The file structure of RP2K looks like::

        <path>
            all/
                test/
                    <catagory>/
                        <image_name>.jpg
                        ...
                    ...
                train/
                    <catagory>/
                        <image_name>.jpg
                        ...
                    ...

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    root_path = os.path.join(os.path.abspath(os.path.expanduser(path)), "all")
    dataset = Dataset(DATASET_NAME)
    dataset.load_catalog(
        os.path.join(os.path.dirname(__file__), "catalog.json"))

    for segment_name in ("train", "test"):
        segment = dataset.create_segment(segment_name)
        segment_path = os.path.join(root_path, segment_name)
        categories = os.listdir(segment_path)
        categories.sort()
        for category in categories:
            category_dir = os.path.join(segment_path, category)
            if not os.path.isdir(category_dir):
                continue
            image_paths = _glob(category_dir, ("*.jpg", "*.png"))
            for image_path in image_paths:
                remote_path = os.path.basename(image_path).replace(" ", "_")
                data = Data(local_path=image_path,
                            target_remote_path=remote_path)
                data.label.classification = Classification(category)
                segment.append(data)

    return dataset
    def test_dumps(self):
        contents = {
            "CLASSIFICATION": {"category": "cat", "attributes": {"gender": "male"}},
            "BOX2D": [
                {
                    "box2d": {"xmin": 1, "ymin": 1, "xmax": 2, "ymax": 2},
                    "category": "dog",
                    "attributes": {"gender": "female"},
                }
            ],
        }

        label = Label()
        label.classification = Classification.loads(contents["CLASSIFICATION"])
        label.box2d = [LabeledBox2D.loads(contents["BOX2D"][0])]
        assert label.dumps() == contents
Пример #22
0
def _get_original_png_label(stem: str) -> Classification:
    """Get label from stem of originalPng image name.

    Arguments:
        stem: Stem of originalPng image name like "blanket1-a".

    Returns:
        Label of originalPng image.

    """
    class_name, original_image_number = stem.split("-", 1)
    attributes = {
        "original image sample number": original_image_number,
        "patch number": None,
        "rotated degrees": 0,
    }
    return Classification(category=class_name, attributes=attributes)
Пример #23
0
def _get_without_rotate_all_label(stem: str) -> Classification:
    """Get label from stem of withoutRotateAll image name.

    Arguments:
        stem: Stem of withoutRotateAll image name like "blanket1-a-p001".

    Returns:
        Label of withoutRotateAll image.

    """
    class_name, original_image_number, patch_number = stem.split("-", 2)
    attributes = {
        "original image sample number": original_image_number,
        "patch number": int(patch_number[1:]),
        "rotated degrees": 0,
    }
    return Classification(category=class_name, attributes=attributes)
Пример #24
0
def AADB(path: str) -> Dataset:
    """`AADB <https://www.ics.uci.edu/~skong2/aesthetics.html>`_ dataset.

    The file structure looks like:

        <path>
            AADB_newtest/
                0.500_farm1_487_20167490236_ae920475e2_b.jpg
                ...
            datasetImages_warp256/
                farm1_441_19470426814_baae1eb396_b.jpg
                ...
            imgListFiles_label/
                imgList<segment_name>Regression_<attribute_name>.txt
                ...

    Arguments:
        path: the root directory of the dataset.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    root_path = os.path.abspath(os.path.expanduser(path))
    dataset = Dataset(DATASET_NAME)
    dataset.load_catalog(
        os.path.join(os.path.dirname(__file__), "catalog.json"))
    attribute_names = dataset.catalog.classification.attributes.keys()

    for mode, image_dir, label_file_prefix in _SEGMENTS_INFO:
        image_name_handler: Callable[[str, Dict[str, float]], str] = (
            (lambda image_name, attributes:
             f"{attributes['score']:.3f}_{image_name}")
            if mode == "new_test" else (lambda image_name, _: image_name))

        segment = dataset.create_segment(mode)
        attributes_map = _extract_attributes_map(root_path, label_file_prefix,
                                                 attribute_names)
        for image_name, attributes in attributes_map.items():
            real_image_name = image_name_handler(image_name, attributes)
            image_path = os.path.join(root_path, image_dir, real_image_name)
            data = Data(image_path)
            data.label.classification = Classification(attributes=attributes)
            segment.append(data)

    return dataset
Пример #25
0
def Flower102(path: str) -> Dataset:
    """`102 Category Flower <http://www.robots.ox.ac.uk/~vgg/data/flowers/102/index.html>`_ dataset.

    The file structure should be like::

        <path>
            jpg/
                image_00001.jpg
                ...
            imagelabels.mat
            setid.mat

    Arguments:
        path: The root directory of the dataset.

    Raises:
        ModuleImportError: When the module "scipy" can not be found.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    try:
        from scipy.io import loadmat  # pylint: disable=import-outside-toplevel
    except ModuleNotFoundError as error:
        raise ModuleImportError(module_name=error.name) from error

    root_path = os.path.abspath(os.path.expanduser(path))
    labels = loadmat(os.path.join(root_path, "imagelabels.mat"))["labels"][0]
    segment_info = loadmat(os.path.join(root_path, "setid.mat"))

    dataset = Dataset(DATASET_NAME_102)
    dataset.load_catalog(os.path.join(os.path.dirname(__file__), "catalog_102.json"))
    index_to_category = dataset.catalog.classification.get_index_to_category()
    for key, value in _SEGMENT_NAMES_102.items():
        segment = dataset.create_segment(key)
        segment_info[value][0].sort()
        for index in segment_info[value][0]:
            data = Data(os.path.join(root_path, "jpg", f"image_{index:05d}.jpg"))
            data.label.classification = Classification(
                index_to_category[int(labels[index - 1]) - 1]
            )
            segment.append(data)
    return dataset
Пример #26
0
def ImageEmotionAbstract(path: str) -> Dataset:
    """`Image Emotion-abstract <https://www.imageemotion.org/>`_ dataset.

    The file structure should be like::

        <path>
            ABSTRACT_groundTruth.csv
            abstract_xxxx.jpg
            ...

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    root_path = os.path.abspath(os.path.expanduser(path))

    dataset = Dataset(DATASET_NAME_ABSTRACT)
    dataset.load_catalog(
        os.path.join(os.path.dirname(__file__), "catalog_abstract.json"))
    segment = dataset.create_segment()

    csv_path = os.path.join(root_path, "ABSTRACT_groundTruth.csv")
    with open(csv_path, "r", encoding="utf-8") as fp:
        reader = csv.DictReader(fp)
        reader.fieldnames = [
            field.strip("'")
            for field in reader.fieldnames  # type:ignore[union-attr]
        ]

        for row in reader:
            image_path = os.path.join(root_path, row.pop("").strip("'"))

            data = Data(image_path)
            values = {key: int(value) for key, value in row.items()}

            data.label.classification = Classification(attributes=values)
            segment.append(data)

    return dataset
Пример #27
0
def KenyanFoodOrNonfood(path: str) -> Dataset:
    """`Kenyan Food or Nonfood <https://github.com/monajalal/Kenyan-Food>`_ dataset.

    The file structure should be like::

        <path>
            images/
                food/
                    236171947206673742.jpg
                    ...
                nonfood/
                    168223407.jpg
                    ...
            data.csv
            split.py
            test.txt
            train.txt

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    root_path = os.path.abspath(os.path.expanduser(path))
    dataset = Dataset(DATASET_NAME_FOOD_OR_NONFOOD)
    dataset.load_catalog(
        os.path.join(os.path.dirname(__file__),
                     "catalog_food_or_nonfood.json"))

    for segment_name, filename in SEGMENTS_FOOD_OR_NONFOOD.items():
        segment = dataset.create_segment(segment_name)
        with open(os.path.join(root_path, filename), encoding="utf-8") as fp:
            for image_path in fp:
                image_path = os.path.join(root_path, image_path)
                data = Data(image_path.strip())
                category = image_path.split("/")[1]
                data.label.classification = Classification(category)
                segment.append(data)
    return dataset
Пример #28
0
def _get_classifications(
    label_path: str,
    attribute_names: Tuple[str, ...],
) -> Dict[str, Classification]:
    all_classifications = {}
    with open(os.path.join(label_path, "identity_meta.csv"),
              encoding="utf-8") as fp:
        # The normal format of each line of the file is
        # Class_ID, Name, Sample_Num, Flag, Gender,
        # n000001, "14th_Dalai_Lama",424,0, m,
        # n000002, "A_Fine_Frenzy",315,1, f,
        # n000003, "A._A._Gill",205,1, m,
        # ...
        for line in islice(csv.reader(fp), 1, None):
            # The normal format of each line of the file is
            # '<class_id>, "<class_name>",<sample_num>,<flag>,<gender>,\n'
            # but now there is an error type
            # '<class_id>, "<class_,name>",<sample_num>,<flag>,<gender>\n'
            if line[-1] != "":
                # join the splitted "class_ and name"
                line[1] = "".join(islice(line, 1, 3))
                # Starting from the 4th element, each element moves forward one step
                line.pop(2)
                line[4] = line[4].rstrip("\n")
            category_id = line[0]
            category_name = line[1].strip(' "')
            attributes = dict(
                zip(
                    attribute_names,
                    (
                        int(line[2]),
                        bool(int(line[3])),
                        line[4],
                    ),
                ))
            all_classifications[category_id] = Classification(
                category=category_name,
                attributes=attributes,
            )
    return all_classifications
Пример #29
0
def CoinImage(path: str) -> Dataset:
    """`Coin Image <https://cvl.tuwien.ac.at/research/cvl-databases/coin-image-dataset/>`_ dataset.

    The file structure should be like::

        <path>
            classes.csv
            <imagename>.png
            ...

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    root_path = os.path.abspath(os.path.expanduser(path))

    dataset = Dataset(DATASET_NAME)
    dataset.load_catalog(
        os.path.join(os.path.dirname(__file__), "catalog.json"))
    segment = dataset.create_segment()

    csv_path = os.path.join(root_path, "classes.csv")
    with open(csv_path, "r", encoding="utf-8") as fp:
        reader = csv.reader(fp, delimiter=";")
        mapping: Dict[str, str] = dict(
            row for row in reader)  # type: ignore[arg-type, misc]

    image_paths = glob(os.path.join(root_path, "*.png"))

    for image_path in image_paths:
        data = Data(image_path)
        filename = os.path.basename(image_path)
        class_id = filename[5:].split("_", 1)[0]
        data.label.classification = Classification(category=mapping[class_id])
        segment.append(data)

    return dataset
Пример #30
0
def _get_data(path: str, annotations: Any,
              flag: bool) -> Iterator[Tuple[Data, str]]:
    filepath_to_data: Dict[str, Data] = {}

    for annotation in annotations:
        filepath = annotation["filepath"][0]

        keypoints = LabeledKeypoints2D(
            annotation["coords"].T[_VALID_KEYPOINT_INDICES],
            attributes={
                "poselet_hit_idx": annotation["poselet_hit_idx"].T.tolist()
            },
        )
        box2d = LabeledBox2D(*annotation["torsobox"][0].tolist())

        if filepath not in filepath_to_data:
            data = Data(os.path.join(path, "images", filepath))
            data.label.keypoints2d = [keypoints]
            data.label.box2d = [box2d]
            attribute = {"currframe": int(annotation["currframe"][0][0])}

            if flag:
                attribute["isunchecked"] = bool(annotation["isunchecked"])
            data.label.classification = Classification(
                category=annotation["moviename"][0], attributes=attribute)
            filepath_to_data[filepath] = data

            if annotation["istrain"]:
                segment_name = "train"
            elif annotation["istest"]:
                segment_name = "test"
            else:
                segment_name = "bad"
            yield data, segment_name

        else:
            image_data = filepath_to_data[filepath]
            image_data.label.keypoints2d.append(keypoints)
            image_data.label.box2d.append(box2d)