def test_eq(self): classification1 = Classification("cat", {"gender": "male"}) classification2 = Classification("cat", {"gender": "male"}) classification3 = Classification("dog", {"gender": "male"}) assert classification1 == classification2 assert classification1 != classification3
def test_dumps(self): classification = Classification(category="cat", attributes={"gender": "male"}) assert classification.dumps() == { "category": "cat", "attributes": { "gender": "male" } }
def test_eq(self): label1 = Label() label1.classification = Classification("cat", {"color": "white"}) label2 = Label() label2.classification = Classification("cat", {"color": "white"}) label3 = Label() label3.classification = Classification("cat", {"color": "black"}) assert label1 == label2 assert label1 != label3
def _load_segment_100k(dataset: Dataset, root_path: str, labels_dir: str) -> None: for segment_name in _SEGMENT_NAMES: segment = dataset.create_segment(segment_name) image_paths = glob(os.path.join(root_path, "images", "100k", segment_name, "*.jpg")) print(f"Reading data to segment '{segment_name}'...") if segment_name == "test": for image_path in image_paths: segment.append(Data(image_path)) else: label_contents = _read_label_file_100k(labels_dir, segment_name) for image_path in image_paths: data = Data(image_path) box2d: List[LabeledBox2D] = [] polygon: List[LabeledPolygon] = [] polyline2d: List[LabeledPolyline2D] = [] label = data.label label_content = label_contents[os.path.basename(image_path)] label.classification = Classification(attributes=label_content["attributes"]) for label_info in label_content["labels"]: if "box2d" in label_info: _add_box2d_label(label_info, box2d) if "poly2d" in label_info: _add_poly2d_label_100k(label_info, polygon, polyline2d) label.box2d = box2d label.polygon = polygon label.polyline2d = polyline2d segment.append(data) print(f"Finished reading data to segment '{segment_name}'")
def test_import_cloud_files_to_fusiondataset(self, accesskey, url, config_name): gas_client = GAS(access_key=accesskey, url=url) try: cloud_client = gas_client.get_cloud_client(config_name) except ResourceNotExistError: pytest.skip(f"skip this case because there's no {config_name} config") auth_data = cloud_client.list_auth_data("tests")[:5] dataset_name = get_dataset_name() dataset_client = gas_client.create_dataset(dataset_name, True, config_name=config_name) dataset = FusionDataset(name=dataset_name) segment = dataset.create_segment("Segment1") lidar = Lidar("LIDAR") segment.sensors.add(lidar) for data in auth_data: data.label.classification = Classification("cat", attributes={"color": "red"}) frame = Frame() frame["LIDAR"] = data segment.append(frame) dataset_client = gas_client.upload_dataset(dataset, jobs=5) dataset_client.commit("import data") segment1 = FusionSegment("Segment1", client=dataset_client) assert len(segment1) == len(segment) assert segment1[0]["LIDAR"].path == segment[0]["LIDAR"].path.split("/")[-1] assert segment1[0]["LIDAR"].label.classification.category == "cat" assert segment1[0]["LIDAR"].label.classification.attributes["color"] == "red" assert len(auth_data) == len(segment) gas_client.delete_dataset(dataset_name)
def CACD(path: str) -> Dataset: """`Cross-Age Celebrity Dataset (CACD) <https://bcsiriuschen.github.io/CARC/>`_ dataset. The file structure should be like:: <path> CACD2000/ 14_Aaron_Johnson_0001.jpg ... celebrity2000.mat Arguments: path: The root directory of the dataset. Returns: Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance. """ root_path = os.path.abspath(os.path.expanduser(path)) dataset = Dataset(DATASET_NAME) dataset.catalog.classification = _get_subcatalog() segment = dataset.create_segment() image_files = glob(os.path.join(root_path, "CACD2000", "*.jpg")) labels_map = _get_labels_map(os.path.join(root_path, "celebrity2000.mat")) for image in image_files: category, attribute = labels_map[os.path.basename(image)] image_data = Data(image) image_data.label.classification = Classification(category, attribute) segment.append(image_data) return dataset
def ImageEmotionArtphoto(path: str) -> Dataset: """`Image Emotion-art Photo <https://www.imageemotion.org/>`_ dataset. The file structure should be like:: <path> <filename>.jpg ... Arguments: path: The root directory of the dataset. Returns: Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance. """ root_path = os.path.abspath(os.path.expanduser(path)) dataset = Dataset(DATASET_NAME_ARTPHOTO) dataset.load_catalog( os.path.join(os.path.dirname(__file__), "catalog_artphoto.json")) segment = dataset.create_segment() image_paths = glob(os.path.join(root_path, "*.jpg")) for image_path in image_paths: image_category = os.path.basename(image_path).split("_", 1)[0] data = Data(image_path) data.label.classification = Classification(category=image_category) segment.append(data) return dataset
def FSDD(path: str) -> Dataset: """`Free Spoken Digit <https://github.com/Jakobovski/free-spoken-digit-dataset>`_ dataset. The file structure should be like:: <path> recordings/ 0_george_0.wav 0_george_1.wav ... Arguments: path: The root directory of the dataset. Returns: Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance. """ label_map = {} for key, value in _METADATA.items(): attributes = {"name": key} attributes.update(value) label_map[key] = attributes dataset = Dataset(DATASET_NAME) dataset.load_catalog( os.path.join(os.path.dirname(__file__), "catalog.json")) segment = dataset.create_segment() audio_paths = glob(os.path.join(path, "recordings", "*.wav")) for audio_path in audio_paths: category, name = os.path.basename(audio_path).split("_")[:2] data = Data(audio_path) data.label.classification = Classification(category, label_map[name]) segment.append(data) return dataset
def COVID_CT(path: str) -> Dataset: """`COVID-CT <https://github.com/UCSD-AI4H/COVID-CT>`_ dataset. The file structure should be like:: <path> Data-split/ COVID/ testCT_COVID.txt trainCT_COVID.txt valCT_COVID.txt NonCOVID/ testCT_NonCOVID.txt trainCT_NonCOVID.txt valCT_NonCOVID.txt Images-processed/ CT_COVID/ ... 2020.01.24.919183-p27-132.png 2020.01.24.919183-p27-133.png ... PIIS0140673620303603%8.png ... CT_NonCOVID/ 0.jpg 1%0.jog ... 91%1.jpg 102.png ... 2341.png Arguments: path: The root directory of the dataset. Returns: Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance. """ root_path = os.path.abspath(os.path.expanduser(path)) dataset = Dataset(DATASET_NAME) dataset.load_catalog( os.path.join(os.path.dirname(__file__), "catalog.json")) data_split_path = os.path.join(root_path, "Data-split") images_processed_path = os.path.join(root_path, "Images-processed") for segment_name, (split_filename, image_dir, category) in _SEGMENT_TO_PATH.items(): segment = dataset.create_segment(segment_name) image_dir = os.path.join(images_processed_path, image_dir) with open(os.path.join(data_split_path, category, split_filename), "r", encoding="utf-8") as fp: for line in fp: image_path = os.path.join(image_dir, line.strip("\n")) data = Data(image_path) data.label.classification = Classification(category) segment.append(data) return dataset
def AnimalsWithAttributes2(path: str) -> Dataset: """`Animals with attributes 2 <https://cvml.ist.ac.at/AwA2/>`_ dataset. The file structure should be like:: <path> classes.txt predicates.txt predicate-matrix-binary.txt JPEGImages/ <classname>/ <imagename>.jpg ... ... Arguments: path: The root directory of the dataset. Returns: Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance. """ root_path = os.path.abspath(os.path.expanduser(path)) dataset = Dataset(DATASET_NAME) dataset.load_catalog( os.path.join(os.path.dirname(__file__), "catalog.json")) segment = dataset.create_segment() with open(os.path.join(root_path, "classes.txt"), encoding="utf-8") as fp: class_names = [line[:-1].split("\t", 1)[-1] for line in fp] with open(os.path.join(root_path, "predicates.txt"), encoding="utf-8") as fp: attribute_keys = [line[:-1].split("\t", 1)[-1] for line in fp] with open(os.path.join(root_path, "predicate-matrix-binary.txt"), encoding="utf-8") as fp: attribute_values = [line[:-1].split(" ") for line in fp] attribute_mapping = {} for class_name, values in zip(class_names, attribute_values): attribute_mapping[class_name] = Classification( category=class_name, attributes=dict( zip(attribute_keys, (bool(int(value)) for value in values))), ) for class_name in sorted(os.listdir(os.path.join(root_path, "JPEGImages"))): image_paths = glob( os.path.join(root_path, "JPEGImages", class_name, "*.jpg")) label = attribute_mapping[class_name] for image_path in image_paths: data = Data(image_path) data.label.classification = label segment.append(data) return dataset
def _extract_label_from_basename(keys: Tuple[str, ...], filename: str) -> Classification: make, model, *spec_values = filename.split("_")[:-1] attributes = dict(zip(keys, map(_transfer_attribute_type, spec_values))) category = ".".join((make, model)) return Classification(attributes=attributes, category=category)
def _get_classification( classification_labels: List[Dict[str, str]]) -> Classification: attributes: Dict[str, Union[int, float, str]] = {} for indices, attribute_getter in _ATTRIBUTES_GETTER.items(): for index in indices: classification_label = classification_labels[index] attributes[classification_label["@name"]] = attribute_getter( classification_label["@value"]) return Classification(attributes=attributes)
def OxfordIIITPet(path: str) -> Dataset: """`OxfordIIITPet <https://www.robots.ox.ac.uk/~vgg/data/pets/>`_ dataset. The file structure should be like:: <path> annotations/ trimaps/ Bombay_113.png Bombay_114.png ... xmls/ Birman_174.xml Birman_175.xml ... list.txt test.txt trainval.txt README images/ Bombay_117.jpg Bombay_118.jpg ... Arguments: path: The root directory of the dataset. Returns: Loaded :class: `~tensorbay.dataset.dataset.Dataset` instance. """ root_path = os.path.abspath(os.path.expanduser(path)) dataset = Dataset(DATASET_NAME) dataset.load_catalog( os.path.join(os.path.dirname(__file__), "catalog.json")) trainval_segment = dataset.create_segment("trainval") test_segment = dataset.create_segment("test") annotation_path = os.path.join(root_path, "annotations") for image_path in glob(os.path.join(root_path, "images", "*.jpg")): image_name = os.path.splitext(os.path.basename(image_path))[0] name = "Cat" if image_name.istitle() else "Dog" category, num = image_name.rsplit("_", 1) data = Data(image_path, target_remote_path=f"{category}_{num.zfill(3)}.jpg") label = data.label label.classification = Classification(category=f"{name}.{category}") label.semantic_mask = SemanticMask( os.path.join(annotation_path, "trimaps", f"{image_name}.png")) xml_path = os.path.join(annotation_path, "xmls", f"{image_name}.xml") if os.path.exists(xml_path): label.box2d = _get_box_label(xml_path) trainval_segment.append(data) else: test_segment.append(data) return dataset
def RarePlanesReal(path: str) -> Dataset: """`RarePlanesReal <https://www.cosmiqworks.org/RarePlanes/>`_ dataset. The folder structure should be like:: <path> metadata_annotations/ RarePlanes_Public_Metadata.csv RarePlanes_Test_Coco_Annotations_tiled.json RarePlanes_Train_Coco_Annotations_tiled.json test/ PS-RGB_tiled/ 105_104001003108D900_tile_47.png ... train/ PS-RGB_tiled/ 100_1040010029990A00_tile_319.png ... Arguments: path: The root directory of the dataset. Returns: Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance. """ root_path = os.path.abspath(os.path.expanduser(path)) dataset = Dataset(DATASET_NAME) dataset.load_catalog( os.path.join(os.path.dirname(__file__), "catalog.json")) catalog = dataset.catalog annotations_dir = os.path.join(root_path, "metadata_annotations") classification_attributes = _get_classification_attributes( os.path.join(annotations_dir, "RarePlanes_Public_Metadata.csv"), catalog.classification.attributes.keys(), ) for segment_name in _SEGMENT_NAMES: segment = dataset.create_segment(segment_name) image_name_to_polygons = _get_polygon_labels( annotations_dir, segment_name, catalog.polygon.attributes.keys()) for image_path in glob( os.path.join(root_path, segment_name, "PS-RGB_tiled", "*.png")): data = Data(image_path) label = data.label filename = os.path.basename(image_path) image_id = filename.rsplit("_", 2)[0] label.polygon = image_name_to_polygons[filename] label.classification = Classification( attributes=classification_attributes[image_id]) segment.append(data) return dataset
def KenyanFoodType(path: str) -> Dataset: """`Kenyan Food Type <https://github.com/monajalal/Kenyan-Food>`_ dataset. The file structure should be like:: <path> test.csv test/ bhaji/ 1611654056376059197.jpg ... chapati/ 1451497832469337023.jpg ... ... train/ bhaji/ 190393222473009410.jpg ... chapati/ 1310641031297661755.jpg ... val/ bhaji/ 1615408264598518873.jpg ... chapati/ 1553618479852020228.jpg ... Arguments: path: The root directory of the dataset. Returns: Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance. """ root_path = os.path.abspath(os.path.expanduser(path)) dataset = Dataset(DATASET_NAME_FOOD_TYPE) dataset.load_catalog( os.path.join(os.path.dirname(__file__), "catalog_food_type.json")) for segment_name in SEGMENTS_FOOD_TYPE: segment = dataset.create_segment(segment_name) segment_path = os.path.join(root_path, segment_name) for category in sorted(os.listdir(segment_path)): image_paths = glob(os.path.join(segment_path, category, "*.jpg")) label = Classification(category) for image_path in image_paths: data = Data(image_path) data.label.classification = label segment.append(data) return dataset
def _extract_classification( path: str, classification_catalog: ClassificationSubcatalog) -> Classification: with open(path, encoding="utf-8") as fp: attribute_names = classification_catalog.attributes.keys() csv_reader = csv.reader(fp) elements = next(csv_reader) attributes = { attribute_name: bool(int(value)) for attribute_name, value in zip(attribute_names, elements) } return Classification(attributes=attributes)
def _load_image_labels(file_path: str) -> Dict[str, Classification]: with open(file_path, encoding="utf-8") as fp: image_labels = {} for line in fp: img_index, count, scene, weather, distractor = line.strip().split(",") attributes = { "total-count": int(count), "scene-type": scene, "weather-condition": _WEATHER_CONDITION_MAP[int(weather)], "distractor": bool(int(distractor)), } image_labels[img_index] = Classification(attributes=attributes) return image_labels
def COVIDChestXRay(path: str) -> Dataset: """`COVID-chestxray <https://github.com/ieee8023/covid-chestxray-dataset>`_ dataset. The file structure should be like:: <path> images/ 0a7faa2a.jpg 000001-2.png 000001-3.jpg 1B734A89-A1BF-49A8-A1D3-66FAFA4FAC5D.jpeg ... volumes/ coronacases_org_001.nii.gz .... metadata.csv ... Arguments: path: The root directory of the dataset. Returns: Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance. """ root_path = os.path.abspath(os.path.expanduser(path)) dataset = Dataset(DATASET_NAME) dataset.load_catalog( os.path.join(os.path.dirname(__file__), "catalog.json")) segment = dataset.create_segment() csv_path = os.path.join(root_path, "metadata.csv") with open(csv_path, encoding="utf-8") as fp: csv_reader = csv.DictReader(fp) for attributes in csv_reader: folder = attributes.pop("folder") # The 20 images invovled in "volumes" folder currently are invalid to download. if folder == "volumes": continue image_path = os.path.join(root_path, folder, attributes.pop("filename")) category = attributes.pop("finding").strip() data = Data(image_path) data.label.classification = Classification( category=category, attributes=_convert_type(attributes)) segment.append(data) return dataset
def Flower17(path: str) -> Dataset: """`17 Category Flower <http://www.robots.ox.ac.uk/~vgg/data/flowers/17/index.html>`_ dataset. The dataset are 3 separate splits. The results in the paper are averaged over the 3 splits. We just use (trn1, val1, tst1) to split it. The file structure should be like:: <path> jpg/ image_0001.jpg ... datasplits.mat Arguments: path: The root directory of the dataset. Raises: ModuleImportError: When the module "scipy" can not be found. Returns: Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance. """ try: from scipy.io import loadmat # pylint: disable=import-outside-toplevel except ModuleNotFoundError as error: raise ModuleImportError(module_name=error.name) from error root_path = os.path.abspath(os.path.expanduser(path)) segment_info = loadmat(os.path.join(root_path, "datasplits.mat")) dataset = Dataset(DATASET_NAME_17) dataset.load_catalog(os.path.join(os.path.dirname(__file__), "catalog_17.json")) index_to_category = dataset.catalog.classification.get_index_to_category() for key, value in _SEGMENT_NAMES_17.items(): segment = dataset.create_segment(key) segment_info[value][0].sort() for index in segment_info[value][0]: data = Data(os.path.join(root_path, "jpg", f"image_{index:04d}.jpg")) # There are 80 images for each category data.label.classification = Classification( category=index_to_category[(index - 1) // 80] ) segment.append(data) return dataset
def RP2K(path: str) -> Dataset: """`RP2K <https://www.pinlandata.com/rp2k_dataset>`_ dataset. The file structure of RP2K looks like:: <path> all/ test/ <catagory>/ <image_name>.jpg ... ... train/ <catagory>/ <image_name>.jpg ... ... Arguments: path: The root directory of the dataset. Returns: Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance. """ root_path = os.path.join(os.path.abspath(os.path.expanduser(path)), "all") dataset = Dataset(DATASET_NAME) dataset.load_catalog( os.path.join(os.path.dirname(__file__), "catalog.json")) for segment_name in ("train", "test"): segment = dataset.create_segment(segment_name) segment_path = os.path.join(root_path, segment_name) categories = os.listdir(segment_path) categories.sort() for category in categories: category_dir = os.path.join(segment_path, category) if not os.path.isdir(category_dir): continue image_paths = _glob(category_dir, ("*.jpg", "*.png")) for image_path in image_paths: remote_path = os.path.basename(image_path).replace(" ", "_") data = Data(local_path=image_path, target_remote_path=remote_path) data.label.classification = Classification(category) segment.append(data) return dataset
def test_dumps(self): contents = { "CLASSIFICATION": {"category": "cat", "attributes": {"gender": "male"}}, "BOX2D": [ { "box2d": {"xmin": 1, "ymin": 1, "xmax": 2, "ymax": 2}, "category": "dog", "attributes": {"gender": "female"}, } ], } label = Label() label.classification = Classification.loads(contents["CLASSIFICATION"]) label.box2d = [LabeledBox2D.loads(contents["BOX2D"][0])] assert label.dumps() == contents
def _get_original_png_label(stem: str) -> Classification: """Get label from stem of originalPng image name. Arguments: stem: Stem of originalPng image name like "blanket1-a". Returns: Label of originalPng image. """ class_name, original_image_number = stem.split("-", 1) attributes = { "original image sample number": original_image_number, "patch number": None, "rotated degrees": 0, } return Classification(category=class_name, attributes=attributes)
def _get_without_rotate_all_label(stem: str) -> Classification: """Get label from stem of withoutRotateAll image name. Arguments: stem: Stem of withoutRotateAll image name like "blanket1-a-p001". Returns: Label of withoutRotateAll image. """ class_name, original_image_number, patch_number = stem.split("-", 2) attributes = { "original image sample number": original_image_number, "patch number": int(patch_number[1:]), "rotated degrees": 0, } return Classification(category=class_name, attributes=attributes)
def AADB(path: str) -> Dataset: """`AADB <https://www.ics.uci.edu/~skong2/aesthetics.html>`_ dataset. The file structure looks like: <path> AADB_newtest/ 0.500_farm1_487_20167490236_ae920475e2_b.jpg ... datasetImages_warp256/ farm1_441_19470426814_baae1eb396_b.jpg ... imgListFiles_label/ imgList<segment_name>Regression_<attribute_name>.txt ... Arguments: path: the root directory of the dataset. Returns: Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance. """ root_path = os.path.abspath(os.path.expanduser(path)) dataset = Dataset(DATASET_NAME) dataset.load_catalog( os.path.join(os.path.dirname(__file__), "catalog.json")) attribute_names = dataset.catalog.classification.attributes.keys() for mode, image_dir, label_file_prefix in _SEGMENTS_INFO: image_name_handler: Callable[[str, Dict[str, float]], str] = ( (lambda image_name, attributes: f"{attributes['score']:.3f}_{image_name}") if mode == "new_test" else (lambda image_name, _: image_name)) segment = dataset.create_segment(mode) attributes_map = _extract_attributes_map(root_path, label_file_prefix, attribute_names) for image_name, attributes in attributes_map.items(): real_image_name = image_name_handler(image_name, attributes) image_path = os.path.join(root_path, image_dir, real_image_name) data = Data(image_path) data.label.classification = Classification(attributes=attributes) segment.append(data) return dataset
def Flower102(path: str) -> Dataset: """`102 Category Flower <http://www.robots.ox.ac.uk/~vgg/data/flowers/102/index.html>`_ dataset. The file structure should be like:: <path> jpg/ image_00001.jpg ... imagelabels.mat setid.mat Arguments: path: The root directory of the dataset. Raises: ModuleImportError: When the module "scipy" can not be found. Returns: Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance. """ try: from scipy.io import loadmat # pylint: disable=import-outside-toplevel except ModuleNotFoundError as error: raise ModuleImportError(module_name=error.name) from error root_path = os.path.abspath(os.path.expanduser(path)) labels = loadmat(os.path.join(root_path, "imagelabels.mat"))["labels"][0] segment_info = loadmat(os.path.join(root_path, "setid.mat")) dataset = Dataset(DATASET_NAME_102) dataset.load_catalog(os.path.join(os.path.dirname(__file__), "catalog_102.json")) index_to_category = dataset.catalog.classification.get_index_to_category() for key, value in _SEGMENT_NAMES_102.items(): segment = dataset.create_segment(key) segment_info[value][0].sort() for index in segment_info[value][0]: data = Data(os.path.join(root_path, "jpg", f"image_{index:05d}.jpg")) data.label.classification = Classification( index_to_category[int(labels[index - 1]) - 1] ) segment.append(data) return dataset
def ImageEmotionAbstract(path: str) -> Dataset: """`Image Emotion-abstract <https://www.imageemotion.org/>`_ dataset. The file structure should be like:: <path> ABSTRACT_groundTruth.csv abstract_xxxx.jpg ... Arguments: path: The root directory of the dataset. Returns: Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance. """ root_path = os.path.abspath(os.path.expanduser(path)) dataset = Dataset(DATASET_NAME_ABSTRACT) dataset.load_catalog( os.path.join(os.path.dirname(__file__), "catalog_abstract.json")) segment = dataset.create_segment() csv_path = os.path.join(root_path, "ABSTRACT_groundTruth.csv") with open(csv_path, "r", encoding="utf-8") as fp: reader = csv.DictReader(fp) reader.fieldnames = [ field.strip("'") for field in reader.fieldnames # type:ignore[union-attr] ] for row in reader: image_path = os.path.join(root_path, row.pop("").strip("'")) data = Data(image_path) values = {key: int(value) for key, value in row.items()} data.label.classification = Classification(attributes=values) segment.append(data) return dataset
def KenyanFoodOrNonfood(path: str) -> Dataset: """`Kenyan Food or Nonfood <https://github.com/monajalal/Kenyan-Food>`_ dataset. The file structure should be like:: <path> images/ food/ 236171947206673742.jpg ... nonfood/ 168223407.jpg ... data.csv split.py test.txt train.txt Arguments: path: The root directory of the dataset. Returns: Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance. """ root_path = os.path.abspath(os.path.expanduser(path)) dataset = Dataset(DATASET_NAME_FOOD_OR_NONFOOD) dataset.load_catalog( os.path.join(os.path.dirname(__file__), "catalog_food_or_nonfood.json")) for segment_name, filename in SEGMENTS_FOOD_OR_NONFOOD.items(): segment = dataset.create_segment(segment_name) with open(os.path.join(root_path, filename), encoding="utf-8") as fp: for image_path in fp: image_path = os.path.join(root_path, image_path) data = Data(image_path.strip()) category = image_path.split("/")[1] data.label.classification = Classification(category) segment.append(data) return dataset
def _get_classifications( label_path: str, attribute_names: Tuple[str, ...], ) -> Dict[str, Classification]: all_classifications = {} with open(os.path.join(label_path, "identity_meta.csv"), encoding="utf-8") as fp: # The normal format of each line of the file is # Class_ID, Name, Sample_Num, Flag, Gender, # n000001, "14th_Dalai_Lama",424,0, m, # n000002, "A_Fine_Frenzy",315,1, f, # n000003, "A._A._Gill",205,1, m, # ... for line in islice(csv.reader(fp), 1, None): # The normal format of each line of the file is # '<class_id>, "<class_name>",<sample_num>,<flag>,<gender>,\n' # but now there is an error type # '<class_id>, "<class_,name>",<sample_num>,<flag>,<gender>\n' if line[-1] != "": # join the splitted "class_ and name" line[1] = "".join(islice(line, 1, 3)) # Starting from the 4th element, each element moves forward one step line.pop(2) line[4] = line[4].rstrip("\n") category_id = line[0] category_name = line[1].strip(' "') attributes = dict( zip( attribute_names, ( int(line[2]), bool(int(line[3])), line[4], ), )) all_classifications[category_id] = Classification( category=category_name, attributes=attributes, ) return all_classifications
def CoinImage(path: str) -> Dataset: """`Coin Image <https://cvl.tuwien.ac.at/research/cvl-databases/coin-image-dataset/>`_ dataset. The file structure should be like:: <path> classes.csv <imagename>.png ... Arguments: path: The root directory of the dataset. Returns: Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance. """ root_path = os.path.abspath(os.path.expanduser(path)) dataset = Dataset(DATASET_NAME) dataset.load_catalog( os.path.join(os.path.dirname(__file__), "catalog.json")) segment = dataset.create_segment() csv_path = os.path.join(root_path, "classes.csv") with open(csv_path, "r", encoding="utf-8") as fp: reader = csv.reader(fp, delimiter=";") mapping: Dict[str, str] = dict( row for row in reader) # type: ignore[arg-type, misc] image_paths = glob(os.path.join(root_path, "*.png")) for image_path in image_paths: data = Data(image_path) filename = os.path.basename(image_path) class_id = filename[5:].split("_", 1)[0] data.label.classification = Classification(category=mapping[class_id]) segment.append(data) return dataset
def _get_data(path: str, annotations: Any, flag: bool) -> Iterator[Tuple[Data, str]]: filepath_to_data: Dict[str, Data] = {} for annotation in annotations: filepath = annotation["filepath"][0] keypoints = LabeledKeypoints2D( annotation["coords"].T[_VALID_KEYPOINT_INDICES], attributes={ "poselet_hit_idx": annotation["poselet_hit_idx"].T.tolist() }, ) box2d = LabeledBox2D(*annotation["torsobox"][0].tolist()) if filepath not in filepath_to_data: data = Data(os.path.join(path, "images", filepath)) data.label.keypoints2d = [keypoints] data.label.box2d = [box2d] attribute = {"currframe": int(annotation["currframe"][0][0])} if flag: attribute["isunchecked"] = bool(annotation["isunchecked"]) data.label.classification = Classification( category=annotation["moviename"][0], attributes=attribute) filepath_to_data[filepath] = data if annotation["istrain"]: segment_name = "train" elif annotation["istest"]: segment_name = "test" else: segment_name = "bad" yield data, segment_name else: image_data = filepath_to_data[filepath] image_data.label.keypoints2d.append(keypoints) image_data.label.box2d.append(box2d)