def mask_dataset(imgs_per_set=10, sub_datasets=None) -> List[TaggedData]: if sub_datasets is None: sub_datasets = ["train", "val", "test"] data_points: List[TaggedData] = [] for sub_dataset in sub_datasets: data_points.extend([MockTaggedData(f"{sub_dataset}/mask_{i}.jpg", get_img()) for i in range(imgs_per_set)]) data_points.extend([MockTaggedData(f"{sub_dataset}/image_{i}.jpg", get_img()) for i in range(imgs_per_set)]) return data_points
def classification_dataset(imgs_per_set=10, amount_categories=5) -> List[TaggedData]: data_points: List[TaggedData] = [] sub_datasets = ["train", "val", "test"] labels = [f"cat{i}" for i in range(amount_categories)] for sub_dataset in sub_datasets: for label in labels: data_points.extend( [MockTaggedData(f"{sub_dataset}/{label}/rgb_{i}.jpg", get_img()) for i in range(imgs_per_set)] ) data_points.extend( [MockTaggedData(f"{sub_dataset}/{label}/bw_{i}.jpg", get_img()) for i in range(imgs_per_set)] ) return data_points
def test_selective_operation(): ds_tagged_data = [] for subset in ["train", "val"]: for category_index in range(10): for img_index in range(100): new_object = MockTaggedData( relative_path=f"{subset}/cat{category_index}/{img_index}.png", data=category_index ) ds_tagged_data.append(new_object) template = "{{subset}}/{{label}}/*.png" add_label = LabelByTemplate(template=template) split_into_subset = SplitByTemplate(template=template) limit_train = LimitSamplesByBin(sample_limit=50, bin_creator=lambda x: x.label) limit_val = LimitSamplesByBin(sample_limit=10, bin_creator=lambda x: x.label) limit_both = SelectiveSubsetOperation({"train": limit_train, "val": limit_val}) operations = [split_into_subset, add_label, limit_both] ds = Dataset(operations=operations, output_function=classification_output) ds.form(ds_tagged_data) assert len(ds.train) == 50 * 10 assert len(ds.val) == 10 * 10
def test_meta_provider(): ds_tagged_data = [] for subset in ["train", "val"]: for category_index in range(10): for img_index in range(100): new_object = MockTaggedData( relative_path= f"{subset}/cat{category_index}/{img_index}.png", data=category_index) ds_tagged_data.append(new_object) template = "{{subset}}/{{label}}/*.png" add_label = LabelByTemplate(template=template) split_into_subset = SplitByTemplate(template=template) label_to_index = LabelToIndex() operations = [split_into_subset, add_label, label_to_index] ds = Dataset(operations=operations, output_function=classification_output) ds.form(ds_tagged_data) ds_mapping = ds.meta.index_to_label for label, value in ds.train: assert f"cat{value}" == ds_mapping[label]
def test_image(): rgb_img = get_img(width=300, height=200) rgb_tagged_data = MockTaggedData("", rgb_img) image_ds_object_01 = DataSetObject(rgb_tagged_data) assert image_ds_object_01.width == 300 assert image_ds_object_01.height == 200 bw_img = get_img(width=300, height=200, bw=True) bw_tagged_data = MockTaggedData("", bw_img) image_ds_object_03 = DataSetObject(bw_tagged_data) assert image_ds_object_03.width == 300 assert image_ds_object_03.height == 200
def detection_collection( image_count=15, max_annotations_per_image=5, img_width=1280, img_height=720, amount_of_classes=3, ): images = [ DataSetObject(tagged_data=MockTaggedData(f"{i}.jpg", get_img())) for i in range(image_count) ] for image in images: img_annotation_iterations = random.randint( 1, max_annotations_per_image - 1) for _ in range(img_annotation_iterations): image.add_annotation( get_random_bounding_box( img_width=img_width, img_height=img_height, amount_of_classes=amount_of_classes, )) label_mapping = { label_index: f"label_{label_index}" for label_index in range(amount_of_classes) } return images, label_mapping
def test_mask(): img = get_img() mask = get_img() image_data = MockTaggedData("", img) mask_data = MockTaggedData("", mask) image_ds_object = DataSetObject(image_data, output_function=single_mask_output) image_ds_object.annotations.append(Mask(mask_data)) ds = Dataset() ds.form_from_ds_objects([image_ds_object]) img_output, mask_output = ds[0] assert np.all(img_output == img) assert np.all(mask_output == mask)
def test_different_input(): ds_tagged_data = [MockTaggedData(f"{i}.png", i) for i in range(10)] ds = Dataset(output_function=triple_output) ds.form(ds_tagged_data) possible_values = {i * 3 for i in range(10)} for ds_object in ds: assert ds_object in possible_values possible_values.remove(ds_object)
def mask_objects() -> List[DataSetObject]: inputs = [] for subset_name in ["train", "test", "val"]: for img_type in ["mask", "image"]: for img_index in range(10): tagged_data = MockTaggedData( relative_path=f"{subset_name}/{img_type}/{img_index}.jpg", data="{subset_name}{img_index}", ) inputs.append(DataSetObject(tagged_data)) return inputs
def test_classification_output(): img = get_img() image_data = MockTaggedData("", img) image_ds_object = DataSetObject(image_data, output_function=classification_output) image_ds_object.label = 5 img_output, label = image_ds_object.output() assert np.all(img_output == img) assert label == 5
def test_coco(detection_collection): images, label_mapping = detection_collection coco_dict = detection_collection_to_coco_dict(images, label_mapping) annotation_file = MockTaggedData(relative_path="train.json", data=coco_dict) tagged_data = [annotation_file ] + [detection_input for detection_input in images] coco_transform = SingleCoco(annotation_file="train.json", data_folder="") ds = Dataset(operations=[coco_transform]) ds.form(tagged_data) assert len(ds) == len(images)
def test_adding(): ds_tagged_data = [] for subset in ["train", "val"]: for img_index in range(100): new_object = MockTaggedData( relative_path=f"{subset}/{img_index}.png", data=f"{subset}/{img_index}.png") ds_tagged_data.append(new_object) template = "{{subset}}/*.png" split_into_subset = SplitByTemplate(template=template) operations = [split_into_subset] ds = Dataset(operations=operations, output_function=classification_output) ds.form(ds_tagged_data) assert len(ds) == len(ds.train + ds.val) assert set(ds.objects) == set((ds.train + ds.val).objects)
def __init__(self, rel_path: RelFilePath, data: Any): super().__init__(MockTaggedData(rel_path, data))