def _import(src_file, instance_data, load_data_callback=None): with TemporaryDirectory() as tmp_dir: Archive(src_file.name).extractall(tmp_dir) image_info = {} frames = [YoloExtractor.name_from_path(osp.relpath(p, tmp_dir)) for p in glob(osp.join(tmp_dir, '**', '*.txt'), recursive=True)] root_hint = find_dataset_root( [DatasetItem(id=frame) for frame in frames], instance_data) for frame in frames: frame_info = None try: frame_id = match_dm_item(DatasetItem(id=frame), instance_data, root_hint=root_hint) frame_info = instance_data.frame_info[frame_id] except Exception: # nosec pass if frame_info is not None: image_info[frame] = (frame_info['height'], frame_info['width']) dataset = Dataset.import_from(tmp_dir, 'yolo', env=dm_env, image_info=image_info) if load_data_callback is not None: load_data_callback(dataset, instance_data) import_dm_annotations(dataset, instance_data)
def test_can_import(self): source_dataset = Dataset.from_iterable( [DatasetItem(id='1', image=np.ones((10, 10, 3)))]) zip_path = osp.join(DUMMY_DATASET_DIR, '1.zip') parsed_dataset = Dataset.import_from(zip_path, format='image_zip') compare_datasets(self, source_dataset, parsed_dataset)
def test_can_import_masks(self): expected_dataset = Dataset.from_iterable([ DatasetItem(id='1', subset='train', image=np.ones((2, 5, 3)), annotations=[ Mask(group=0, image=np.array([[0, 1, 1, 0, 0], [0, 0, 0, 0, 0]]), attributes={ 'index': 0, 'color': '108 225 132', 'text': 'F', 'center': '0 1' }), Mask(group=1, image=np.array([[0, 0, 0, 1, 0], [0, 0, 0, 1, 0]]), attributes={ 'index': 1, 'color': '82 174 214', 'text': 'T', 'center': '1 3' }), Mask(group=1, image=np.array([[0, 0, 0, 0, 0], [0, 0, 0, 0, 1]]), attributes={ 'index': 2, 'color': '241 73 144', 'text': 'h', 'center': '1 4' }), ] ), ]) dataset = Dataset.import_from( osp.join(DUMMY_DATASET_DIR, 'text_segmentation'), 'icdar') compare_datasets(self, expected_dataset, dataset)
def _test_can_save_and_load(self, source_dataset, test_dir, **kwargs): archive_path = osp.join( test_dir, kwargs.get('name', ImageZipPath.DEFAULT_ARCHIVE_NAME)) ImageZipConverter.convert(source_dataset, test_dir, **kwargs) parsed_dataset = Dataset.import_from(archive_path, 'image_zip') compare_datasets(self, source_dataset, parsed_dataset)
def _import(src_file, instance_data, load_data_callback=None): with TemporaryDirectory() as tmp_dir: Archive(src_file.name).extractall(tmp_dir) dataset = Dataset.import_from(tmp_dir, 'tf_detection_api', env=dm_env) if load_data_callback is not None: load_data_callback(dataset, instance_data) import_dm_annotations(dataset, instance_data)
def test_can_import_from_directory(self): source_dataset = Dataset.from_iterable([ DatasetItem(id='1', image=np.ones((10, 10, 3))), DatasetItem(id='2', image=np.ones((5, 10, 3))) ]) parsed_dataset = Dataset.import_from(DUMMY_DATASET_DIR, format='image_zip') compare_datasets(self, source_dataset, parsed_dataset)
def test_can_save_in_another_format(self): dataset1 = Dataset.from_iterable([ DatasetItem(id=100, subset='train', image=np.ones((10, 6, 3)), annotations=[ Bbox(1, 2, 3, 3, label=0), ]), ], categories=['a', 'b']) dataset2 = Dataset.from_iterable([ DatasetItem(id=100, subset='train', image=np.ones((10, 6, 3)), annotations=[ Bbox(1, 2, 3, 4, label=1), Bbox(5, 6, 2, 3, label=2), ]), ], categories=['a', 'b', 'c']) expected = Dataset.from_iterable( [ DatasetItem(id=100, subset='train', image=np.ones((10, 6, 3)), annotations=[ Bbox(1, 2, 3, 4, label=2), Bbox(5, 6, 2, 3, label=3), Bbox(1, 2, 3, 3, label=1), ]), ], categories=['background', 'a', 'b', 'c']) with TestDir() as test_dir: dataset1_url = osp.join(test_dir, 'dataset1') dataset2_url = osp.join(test_dir, 'dataset2') dataset1.export(dataset1_url, 'coco', save_images=True) dataset2.export(dataset2_url, 'voc', save_images=True) result_dir = osp.join(test_dir, 'result') run(self, 'merge', '-o', result_dir, '-f', 'yolo', dataset2_url + ':voc', dataset1_url + ':coco', '--', '--save-images') compare_datasets(self, expected, Dataset.import_from(result_dir, 'yolo'), require_images=True)
def test_can_save_and_load_image_with_custom_extension(self): expected = Dataset.from_iterable([ DatasetItem(id='a/3', image=Image(path='a/3.qq', data=np.zeros((3, 4, 3)))), ]) with TestDir() as test_dir: image_path = osp.join(test_dir, 'a', '3.jpg') save_image(image_path, expected.get('a/3').image.data, create_dir=True) os.rename(image_path, osp.join(test_dir, 'a', '3.qq')) actual = Dataset.import_from(test_dir, 'image_dir', exts='qq') compare_datasets(self, expected, actual, require_images=True)
def test_can_load_image(self): expected_dataset = Dataset.from_iterable( [ DatasetItem(id='img0', subset='train', image=np.ones((8, 8, 3)), annotations=[ Bbox(0, 2, 4, 2, label=0, z_order=1, attributes={ 'occluded': True, 'a1': True, 'a2': 'v3' }), PolyLine([1, 2, 3, 4, 5, 6, 7, 8], attributes={'occluded': False}), ], attributes={'frame': 0}), DatasetItem(id='img1', subset='train', image=np.ones((10, 10, 3)), annotations=[ Polygon([1, 2, 3, 4, 6, 5], z_order=1, attributes={'occluded': False}), Points([1, 2, 3, 4, 5, 6], label=1, z_order=2, attributes={'occluded': False}), ], attributes={'frame': 1}), ], categories={ AnnotationType.label: LabelCategories.from_iterable([ ['label1', '', {'a1', 'a2'}], ['label2'], ]) }) parsed_dataset = Dataset.import_from(DUMMY_IMAGE_DATASET_DIR, 'cvat') compare_datasets(self, expected_dataset, parsed_dataset)
def test_can_import_captions(self): expected_dataset = Dataset.from_iterable([ DatasetItem(id='word_1', subset='train', image=np.ones((10, 15, 3)), annotations=[ Caption('PROPER'), ] ), DatasetItem(id='word_2', subset='train', image=np.ones((10, 15, 3)), annotations=[ Caption("Canon"), ] ), ]) dataset = Dataset.import_from( osp.join(DUMMY_DATASET_DIR, 'word_recognition'), 'icdar') compare_datasets(self, expected_dataset, dataset)
def test_can_import_bboxes(self): expected_dataset = Dataset.from_iterable([ DatasetItem(id='img_1', subset='train', image=np.ones((10, 15, 3)), annotations=[ Polygon([0, 0, 3, 1, 4, 6, 1, 7], attributes={'text': 'FOOD'}), ] ), DatasetItem(id='img_2', subset='train', image=np.ones((10, 15, 3)), annotations=[ Bbox(0, 0, 2, 3, attributes={'text': 'RED'}), Bbox(3, 3, 2, 3, attributes={'text': 'LION'}), ] ), ]) dataset = Dataset.import_from( osp.join(DUMMY_DATASET_DIR, 'text_localization'), 'icdar') compare_datasets(self, expected_dataset, dataset)
def test_can_load_video(self): expected_dataset = Dataset.from_iterable( [ DatasetItem(id='frame_000010', subset='annotations', image=255 * np.ones((20, 25, 3)), annotations=[ Bbox(3, 4, 7, 1, label=2, id=0, attributes={ 'occluded': True, 'outside': False, 'keyframe': True, 'track_id': 0 }), Points( [21.95, 8.00, 2.55, 15.09, 2.23, 3.16], label=0, id=1, attributes={ 'occluded': False, 'outside': False, 'keyframe': True, 'track_id': 1, 'hgl': 'hgkf', }), ], attributes={'frame': 10}), DatasetItem(id='frame_000013', subset='annotations', image=255 * np.ones((20, 25, 3)), annotations=[ Bbox(7, 6, 7, 2, label=2, id=0, attributes={ 'occluded': False, 'outside': True, 'keyframe': True, 'track_id': 0 }), Points( [21.95, 8.00, 9.55, 15.09, 5.23, 1.16], label=0, id=1, attributes={ 'occluded': False, 'outside': True, 'keyframe': True, 'track_id': 1, 'hgl': 'jk', }), PolyLine( [ 7.85, 13.88, 3.50, 6.67, 15.90, 2.00, 13.31, 7.21 ], label=2, id=2, attributes={ 'occluded': False, 'outside': False, 'keyframe': True, 'track_id': 2, }), ], attributes={'frame': 13}), DatasetItem(id='frame_000016', subset='annotations', image=Image(path='frame_0000016.png', size=(20, 25)), annotations=[ Bbox(8, 7, 6, 10, label=2, id=0, attributes={ 'occluded': False, 'outside': True, 'keyframe': True, 'track_id': 0 }), PolyLine( [ 7.85, 13.88, 3.50, 6.67, 15.90, 2.00, 13.31, 7.21 ], label=2, id=2, attributes={ 'occluded': False, 'outside': True, 'keyframe': True, 'track_id': 2, }), ], attributes={'frame': 16}), ], categories={ AnnotationType.label: LabelCategories.from_iterable([['klhg', '', {'hgl'}], ['z U k'], ['II']]), }) parsed_dataset = Dataset.import_from(DUMMY_VIDEO_DATASET_DIR, 'cvat') compare_datasets(self, expected_dataset, parsed_dataset)
def test_can_run_patch(self): dataset = Dataset.from_iterable( [ # Must be overridden DatasetItem(id=100, subset='train', image=np.ones((10, 6, 3)), annotations=[ Bbox(1, 2, 3, 3, label=0), ]), # Must be kept DatasetItem(id=1, image=np.ones((5, 4, 3)), annotations=[Bbox(1, 2, 3, 4, label=1)]), ], categories=['a', 'b']) patch = Dataset.from_iterable( [ # Must override DatasetItem( id=100, subset='train', image=np.ones((10, 6, 3)), annotations=[ Bbox(1, 2, 3, 4, label=0), # Label must be remapped Bbox(5, 6, 2, 3, label=1), # Label must be remapped Bbox(2, 2, 2, 3, label=2), # Will be dropped due to label ]), # Must be added DatasetItem( id=2, image=np.ones((5, 4, 3)), annotations=[ Bbox(1, 2, 3, 2, label=1) # Label must be remapped ]), ], categories=['b', 'a', 'c']) expected = Dataset.from_iterable([ DatasetItem(id=100, subset='train', image=np.ones((10, 6, 3)), annotations=[ Bbox(1, 2, 3, 4, label=1, id=1, group=1), Bbox(5, 6, 2, 3, label=0, id=2, group=2), ]), DatasetItem(id=1, image=np.ones((5, 4, 3)), annotations=[Bbox(1, 2, 3, 4, label=1, id=1, group=1) ]), DatasetItem(id=2, image=np.ones((5, 4, 3)), annotations=[Bbox(1, 2, 3, 2, label=0, id=2, group=2) ]), ], categories=['a', 'b']) with TestDir() as test_dir: dataset_url = osp.join(test_dir, 'dataset1') patch_url = osp.join(test_dir, 'dataset2') dataset.export(dataset_url, 'coco', save_images=True) patch.export(patch_url, 'voc', save_images=True) run(self, 'patch', '--overwrite', dataset_url + ':coco', patch_url + ':voc', '--', '--reindex=1', '--save-images') compare_datasets(self, expected, Dataset.import_from(dataset_url, format='coco'), require_images=True, ignored_attrs='*')
def test_can_load(self): pcd1 = osp.join(DUMMY_DATASET_DIR, 'velodyne_points', 'data', '0000000000.pcd') pcd2 = osp.join(DUMMY_DATASET_DIR, 'velodyne_points', 'data', '0000000001.pcd') pcd3 = osp.join(DUMMY_DATASET_DIR, 'velodyne_points', 'data', '0000000002.pcd') image1 = osp.join(DUMMY_DATASET_DIR, 'IMAGE_00', 'data', '0000000000.png') image2 = osp.join(DUMMY_DATASET_DIR, 'IMAGE_00', 'data', '0000000001.png') image3 = osp.join(DUMMY_DATASET_DIR, 'IMAGE_00', 'data', '0000000002.png') expected_label_cat = LabelCategories(attributes={'occluded'}) expected_label_cat.add('bus') expected_label_cat.add('car') expected_dataset = Dataset.from_iterable( [ DatasetItem(id='0000000000', annotations=[ Cuboid3d(position=[1, 2, 3], scale=[7.95, -3.62, -1.03], label=1, attributes={ 'occluded': False, 'track_id': 1 }), Cuboid3d(position=[1, 1, 0], scale=[8.34, 23.01, -0.76], label=0, attributes={ 'occluded': False, 'track_id': 2 }) ], point_cloud=pcd1, related_images=[image1], attributes={'frame': 0}), DatasetItem(id='0000000001', annotations=[ Cuboid3d(position=[0, 1, 0], scale=[8.34, 23.01, -0.76], rotation=[1, 1, 3], label=0, attributes={ 'occluded': True, 'track_id': 2 }) ], point_cloud=pcd2, related_images=[image2], attributes={'frame': 1}), DatasetItem(id='0000000002', annotations=[ Cuboid3d(position=[1, 2, 3], scale=[-9.41, 13.54, 0.24], label=1, attributes={ 'occluded': False, 'track_id': 3 }) ], point_cloud=pcd3, related_images=[image3], attributes={'frame': 2}) ], categories={AnnotationType.label: expected_label_cat}) parsed_dataset = Dataset.import_from(DUMMY_DATASET_DIR, 'kitti_raw') compare_datasets_3d(self, expected_dataset, parsed_dataset, require_point_cloud=True)
def _import(src_file, task_data): with TemporaryDirectory() as tmp_dir: Archive(src_file.name).extractall(tmp_dir) dataset = Dataset.import_from(tmp_dir, 'tf_detection_api', env=dm_env) import_dm_annotations(dataset, task_data)
def test_can_load(self): pcd1 = osp.join(DUMMY_DATASET_DIR, 'ds0', 'pointcloud', 'frame1.pcd') pcd2 = osp.join(DUMMY_DATASET_DIR, 'ds0', 'pointcloud', 'frame2.pcd') image1 = osp.join(DUMMY_DATASET_DIR, 'ds0', 'related_images', 'frame1_pcd', 'img2.png') image2 = osp.join(DUMMY_DATASET_DIR, 'ds0', 'related_images', 'frame2_pcd', 'img1.png') label_cat = LabelCategories(attributes={'tag1', 'tag3'}) label_cat.add('car') label_cat.add('bus') expected_dataset = Dataset.from_iterable( [ DatasetItem(id='frame1', annotations=[ Cuboid3d(id=755220128, label=0, position=[0.47, 0.23, 0.79], scale=[0.01, 0.01, 0.01], attributes={ 'track_id': 231825, 'tag1': 'fd', 'tag3': '4s' }), Cuboid3d(id=755337225, label=0, position=[0.36, 0.64, 0.93], scale=[0.01, 0.01, 0.01], attributes={ 'track_id': 231831, 'tag1': 'v12', 'tag3': '' }), ], point_cloud=pcd1, related_images=[image1], attributes={ 'frame': 0, 'description': '', 'tag1': '25dsd', 'tag2': 65 }), DatasetItem(id='frame2', annotations=[ Cuboid3d(id=216, label=1, position=[0.59, 14.41, -0.61], attributes={ 'track_id': 36, 'tag1': '', 'tag3': '' }) ], point_cloud=pcd2, related_images=[image2], attributes={ 'frame': 1, 'description': '' }), ], categories={AnnotationType.label: label_cat}) parsed_dataset = Dataset.import_from(DUMMY_DATASET_DIR, 'sly_pointcloud') compare_datasets_3d(self, expected_dataset, parsed_dataset, require_point_cloud=True)