def test_patch_fails_on_inplace_update_without_overwrite(self): dataset = Dataset.from_iterable([ DatasetItem(id=1, image=np.zeros((3, 5, 3)), annotations=[Bbox(1, 2, 3, 4, label=1)]), ], categories=['a', 'b']) patch = Dataset.from_iterable([ DatasetItem(id=2, image=np.zeros((3, 4, 3)), annotations=[Bbox(1, 2, 3, 2, label=1)]), ], categories=['b', 'a', 'c']) with TestDir() as test_dir: dataset_url = osp.join(test_dir, 'dataset1') patch_url = osp.join(test_dir, 'dataset2') dataset.export(dataset_url, 'coco', save_images=True) patch.export(patch_url, 'coco', save_images=True) run(self, 'patch', dataset_url + ':coco', patch_url + ':coco', expected_code=1)
def test_json_report(self): with suppress_output(), TestDir() as test_dir: report_path = osp.join(test_dir, 'report.json') run(self, 'detect-format', '--show-rejections', '--json-report', report_path, ADE20K2017_DIR) with open(report_path, 'rb') as report_file: report = json.load(report_file) self.assertIsInstance(report, dict) self.assertIn('detected_formats', report) self.assertEqual(['ade20k2017'], report['detected_formats']) self.assertIn('rejected_formats', report) self.assertIn('ade20k2020', report['rejected_formats']) ade20k2020_rejection = report['rejected_formats']['ade20k2020'] self.assertIn('reason', ade20k2020_rejection) self.assertEqual(ade20k2020_rejection['reason'], 'unmet_requirements') self.assertIn('message', ade20k2020_rejection) self.assertIsInstance(ade20k2020_rejection['message'], str) self.assertTrue('*/**/*.json' in ade20k2020_rejection['message']) self.assertIn('image_dir', report['rejected_formats']) image_dir_rejection = report['rejected_formats']['image_dir'] self.assertIn('reason', image_dir_rejection) self.assertEqual(image_dir_rejection['reason'], 'insufficient_confidence') self.assertIn('message', image_dir_rejection) self.assertIsInstance(image_dir_rejection['message'], str)
def test_can_run_equality_diff(self): dataset1 = Dataset.from_iterable([ DatasetItem(id=100, subset='train', image=np.ones((10, 6, 3)), annotations=[ Bbox(1, 2, 3, 4, label=0), ]), ], categories=['a', 'b']) dataset2 = Dataset.from_iterable([ DatasetItem(id=100, subset='train', image=np.ones((10, 6, 3)), annotations=[ Bbox(1, 2, 3, 4, label=1), Bbox(5, 6, 7, 8, label=2), ]), ], categories=['a', 'b', 'c']) with TestDir() as test_dir: dataset1_url = osp.join(test_dir, 'dataset1') dataset2_url = osp.join(test_dir, 'dataset2') dataset1.export(dataset1_url, 'coco', save_images=True) dataset2.export(dataset2_url, 'voc', save_images=True) result_dir = osp.join(test_dir, 'cmp_result') run(self, 'diff', dataset1_url + ':coco', dataset2_url + ':voc', '-m', 'equality', '-o', result_dir) self.assertEqual({'diff.json'}, set(os.listdir(result_dir)))
def test_convert_from_voc_format(self): """ <b>Description:</b> Ensure that the dataset can be converted from VOC format with command `datum convert`. <b>Expected results:</b> A ImageNet dataset that matches the expected dataset. <b>Steps:</b> 1. Get path to the source dataset from assets. 2. Convert source dataset to LabelMe format, using the `convert` command. 3. Verify that resulting dataset is equal to the expected dataset. """ labels = sorted([l.name for l in VOC.VocLabel if l.value % 2 == 1]) expected_dataset = Dataset.from_iterable([ DatasetItem(id='/'.join([label, '2007_000001']), subset='default', annotations=[Label(i)]) for i, label in enumerate(labels) ] + [DatasetItem(id='no_label/2007_000002', subset='default', image=np.ones((10, 20, 3))) ], categories=labels ) voc_dir = osp.join(DUMMY_DATASETS_DIR, 'voc_dataset1') with TestDir() as test_dir: imagenet_dir = osp.join(test_dir, 'imagenet') run(self, 'convert', '-if', 'voc', '-i', voc_dir, '-f', 'imagenet', '-o', imagenet_dir, '--', '--save-image') target_dataset = Dataset.import_from(imagenet_dir, format='imagenet') compare_datasets(self, expected_dataset, target_dataset, require_images=True)
def test_transform_fails_on_inplace_update_of_stage(self): with TestDir() as test_dir: dataset_url = osp.join(test_dir, 'dataset') dataset = Dataset.from_iterable([ DatasetItem(id=1, annotations=[Bbox(1, 2, 3, 4, label=1)]), ], categories=['a', 'b']) dataset.export(dataset_url, 'coco', save_images=True) project_dir = osp.join(test_dir, 'proj') with Project.init(project_dir) as project: project.import_source('source-1', dataset_url, 'coco', no_cache=True) project.commit('first commit') with self.subTest('without overwrite'): run(self, 'transform', '-p', project_dir, '-t', 'random_split', 'HEAD:source-1', expected_code=1) with self.subTest('with overwrite'): with self.assertRaises(ReadonlyDatasetError): run(self, 'transform', '-p', project_dir, '--overwrite', '-t', 'random_split', 'HEAD:source-1')
def test_can_delete_labels_from_yolo_dataset(self): target_dataset = Dataset.from_iterable([ DatasetItem(id='1', subset='train', image=np.ones((10, 15, 3)), annotations=[Bbox(0.0, 2.0, 4.0, 2.0, label=0)]) ], categories=['label_2']) with TestDir() as test_dir: yolo_dir = osp.join( __file__[:__file__.rfind(osp.join('tests', ''))], 'tests', 'assets', 'yolo_dataset') run(self, 'create', '-o', test_dir) run(self, 'import', '-p', test_dir, '-f', 'yolo', yolo_dir) run(self, 'filter', '-p', test_dir, '-m', 'i+a', '-e', "/item/annotation[label='label_2']") run(self, 'transform', '-p', test_dir, '-t', 'remap_labels', '--', '-l', 'label_2:label_2', '--default', 'delete') export_dir = osp.join(test_dir, 'export') run(self, 'export', '-p', test_dir, '-o', export_dir, '-f', 'yolo', '--', '--save-image') parsed_dataset = Dataset.import_from(export_dir, format='yolo') compare_datasets(self, target_dataset, parsed_dataset)
def test_can_convert_voc_to_yolo(self): target_dataset = Dataset.from_iterable( [ DatasetItem(id='2007_000001', subset='train', image=np.ones((10, 20, 3)), annotations=[ Bbox(1.0, 2.0, 2.0, 2.0, label=8), Bbox(4.0, 5.0, 2.0, 2.0, label=15), Bbox(5.5, 6, 2, 2, label=22), ]) ], categories=[ label.name for label in VOC.make_voc_categories()[AnnotationType.label] ]) with TestDir() as test_dir: voc_dir = osp.join( __file__[:__file__.rfind(osp.join('tests', ''))], 'tests', 'assets', 'voc_dataset', 'voc_dataset1') yolo_dir = osp.join(test_dir, 'yolo_dir') run(self, 'convert', '-if', 'voc', '-i', voc_dir, '-f', 'yolo', '-o', yolo_dir, '--', '--save-images') parsed_dataset = Dataset.import_from(yolo_dir, format='yolo') compare_datasets(self, target_dataset, parsed_dataset, require_images=True)
def test_unambiguous(self): output_file = io.StringIO() with contextlib.redirect_stdout(output_file): run(self, 'detect-format', ADE20K2017_DIR) output = output_file.getvalue() self.assertIn(Ade20k2017Importer.NAME, output) self.assertNotIn(Ade20k2020Importer.NAME, output)
def test_can_convert_voc_as_coco(self): voc_dir = osp.join(__file__[:__file__.rfind(osp.join('tests', ''))], 'tests', 'assets', 'voc_dataset', 'voc_dataset1') with TestDir() as test_dir: result_dir = osp.join(test_dir, 'coco_export') run(self, 'convert', '-if', 'voc', '-i', voc_dir, '-f', 'coco', '-o', result_dir, '--', '--save-images', '--reindex', '1') self.assertTrue(osp.isdir(result_dir))
def test_can_save_in_another_format(self): dataset1 = Dataset.from_iterable([ DatasetItem(id=100, subset='train', image=np.ones((10, 6, 3)), annotations=[ Bbox(1, 2, 3, 3, label=0), ]), ], categories=['a', 'b']) dataset2 = Dataset.from_iterable([ DatasetItem(id=100, subset='train', image=np.ones((10, 6, 3)), annotations=[ Bbox(1, 2, 3, 4, label=1), Bbox(5, 6, 2, 3, label=2), ]), ], categories=['a', 'b', 'c']) expected = Dataset.from_iterable( [ DatasetItem(id=100, subset='train', image=np.ones((10, 6, 3)), annotations=[ Bbox(1, 2, 3, 4, label=2), Bbox(5, 6, 2, 3, label=3), Bbox(1, 2, 3, 3, label=1), ]), ], categories=['background', 'a', 'b', 'c']) with TestDir() as test_dir: dataset1_url = osp.join(test_dir, 'dataset1') dataset2_url = osp.join(test_dir, 'dataset2') dataset1.export(dataset1_url, 'coco', save_images=True) dataset2.export(dataset2_url, 'voc', save_images=True) result_dir = osp.join(test_dir, 'result') run(self, 'merge', '-o', result_dir, '-f', 'yolo', dataset2_url + ':voc', dataset1_url + ':coco', '--', '--save-images') compare_datasets(self, expected, Dataset.import_from(result_dir, 'yolo'), require_images=True)
def test_rejections(self): output_file = io.StringIO() with contextlib.redirect_stdout(output_file): run(self, 'detect-format', '--show-rejections', ADE20K2017_DIR) output = output_file.getvalue() self.assertIn(Ade20k2017Importer.NAME, output) self.assertIn(Ade20k2020Importer.NAME, output) self.assertIn('*/**/*.json', output) self.assertIn(ImageDirImporter.NAME, output)
def test_transform_fails_on_inplace_update_without_overwrite(self): with TestDir() as test_dir: Dataset.from_iterable([ DatasetItem(id=1, annotations=[Bbox(1, 2, 3, 4, label=1)]), ], categories=['a', 'b']).export(test_dir, 'coco') run(self, 'transform', '-t', 'random_split', test_dir + ':coco', expected_code=1)
def test_can_display_video_import_warning_in_add(self): with TestDir() as test_dir: proj_dir = osp.join(test_dir, 'proj') run(self, 'create', '-o', proj_dir) video_dir = osp.join(proj_dir, 'src') os.makedirs(video_dir) make_sample_video(osp.join(video_dir, 'video.avi'), frames=4) with self.assertLogs() as capture: run(self, 'add', '-f', 'video_frames', '-p', proj_dir, '-r', 'video.avi', video_dir) self.assertTrue('results across multiple runs' in \ '\n'.join(capture.output))
def test_can_split_video(self): on_exit_do(MediaManager.get_instance().clear) test_dir = scope_add(TestDir()) video_path = osp.join(test_dir, 'video.avi') make_sample_video(video_path, frames=10) output_dir = osp.join(test_dir, 'result') run(TestCase(), 'util', 'split_video', '-i', video_path, '-o', output_dir, '--image-ext', '.jpg', '--start-frame', '2', '--end-frame', '8', '--step', '2') assert set(os.listdir(output_dir)) == {'%06d.jpg' % n for n in range(2, 8, 2)}
def test_convert_to_voc_format(self): """ <b>Description:</b> Ensure that the dataset can be converted to VOC format with command `datum convert`. <b>Expected results:</b> A VOC dataset that matches the expected dataset. <b>Steps:</b> 1. Get path to the source dataset from assets. 2. Convert source dataset to VOC format, using the `convert` command. 3. Verify that resulting dataset is equal to the expected dataset. """ label_map = OrderedDict(('label_' + str(i), [None, [], []]) for i in range(10)) label_map['background'] = [None, [], []] label_map.move_to_end('background', last=False) expected_dataset = Dataset.from_iterable([ DatasetItem(id='1', subset='default', image=np.ones((16, 16, 3)), annotations=[ Bbox(0.0, 4.0, 4.0, 8.0, attributes={ 'difficult': False, 'truncated': False, 'occluded': False, 'visibility': '1.0', 'ignored': 'False' }, id=1, label=3, group=1 ) ] ) ], categories=VOC.make_voc_categories(label_map)) mot_dir = osp.join(__file__[:__file__.rfind(osp.join('tests', ''))], 'tests', 'assets', 'mot_dataset') with TestDir() as test_dir: voc_dir = osp.join(test_dir, 'voc') run(self, 'convert', '-if', 'mot_seq', '-i', mot_dir, '-f', 'voc', '-o', voc_dir, '--', '--save-images') target_dataset = Dataset.import_from(voc_dir, format='voc') compare_datasets(self, expected_dataset, target_dataset, require_images=True)
def test_ambiguous(self): with TestDir() as test_dir: annotation_dir = osp.join(test_dir, 'training/street') os.makedirs(annotation_dir) for asset in [ osp.join(ADE20K2017_DIR, 'training/street/1_atr.txt'), osp.join(ADE20K2020_DIR, 'training/street/1.json'), ]: shutil.copy(asset, annotation_dir) output_file = io.StringIO() with contextlib.redirect_stdout(output_file): run(self, 'detect-format', test_dir) output = output_file.getvalue() self.assertIn(Ade20k2017Importer.NAME, output) self.assertIn(Ade20k2020Importer.NAME, output)
def test_can_transform_dataset_inplace(self): test_dir = scope_add(TestDir()) Dataset.from_iterable([ DatasetItem(1, annotations=[Label(0)]), DatasetItem(2, annotations=[Label(1)]), ], categories=['a', 'b']).export(test_dir, 'coco') run(self, 'transform', '-t', 'remap_labels', '--overwrite', test_dir + ':coco', '--', '-l', 'a:cat', '-l', 'b:dog') expected_dataset = Dataset.from_iterable([ DatasetItem(1, annotations=[Label(0, id=1, group=1)]), DatasetItem(2, annotations=[Label(1, id=2, group=2)]), ], categories=['cat', 'dog']) compare_datasets(self, expected_dataset, Dataset.import_from(test_dir, 'coco'), ignored_attrs='*')
def test_can_save_and_load_yolo_dataset(self): target_dataset = Dataset.from_iterable( [ DatasetItem(id='1', subset='train', image=np.ones((10, 15, 3)), annotations=[ Bbox(3.0, 3.0, 2.0, 3.0, label=4), Bbox(0.0, 2.0, 4.0, 2.0, label=2) ]) ], categories=['label_' + str(i) for i in range(10)]) with TestDir() as test_dir: yolo_dir = osp.join( __file__[:__file__.rfind(osp.join('tests', ''))], 'tests', 'assets', 'yolo_dataset') run(self, 'create', '-o', test_dir) run(self, 'import', '-p', test_dir, '-f', 'yolo', yolo_dir) export_dir = osp.join(test_dir, 'export_dir') run(self, 'export', '-p', test_dir, '-o', export_dir, '-f', 'yolo', '--', '--save-images') parsed_dataset = Dataset.import_from(export_dir, format='yolo') compare_datasets(self, target_dataset, parsed_dataset)
def test_can_list_project_info(self): coco_dir = osp.join(__file__[:__file__.rfind(osp.join('tests', ''))], 'tests', 'assets', 'coco_dataset', 'coco_instances') with TestDir() as test_dir: run(self, 'create', '-o', test_dir) run(self, 'import', '-f', 'coco', '-p', test_dir, coco_dir) with self.subTest("on project"): run(self, 'project', 'info', '-p', test_dir) with self.subTest("on project revision"): run(self, 'project', 'info', '-p', test_dir, 'HEAD')
def test_can_change_extension_for_images_in_zip(self): source_dataset = Dataset.from_iterable([ DatasetItem(id='1', image=np.ones((5, 5, 3))), DatasetItem(id='2', image=np.ones((2, 8, 3))) ]) with TestDir() as test_dir: source_dataset.export(test_dir, format='image_dir', image_ext='.jpg') zip_path = osp.join(test_dir, 'images.zip') make_zip_archive(test_dir, zip_path) proj_dir = osp.join(test_dir, 'proj') run(self, 'create', '-o', proj_dir) run(self, 'import', '-p', proj_dir, '-f', 'image_zip', zip_path) export_path = osp.join(test_dir, 'export.zip') run(self, 'export', '-p', proj_dir, '-f', 'image_zip', '-o', test_dir, '--overwrite', '--', '--name', osp.basename(export_path), '--image-ext', '.png') self.assertTrue(osp.isfile(export_path)) with ZipFile(export_path, 'r') as zf: images = {f.filename for f in zf.filelist} self.assertTrue(images == {'1.png', '2.png'})
def test_can_chain_transforms_in_working_tree_without_hashing(self): test_dir = scope_add(TestDir()) source_url = osp.join(test_dir, 'test_repo') dataset = Dataset.from_iterable([ DatasetItem(1, annotations=[Label(0)]), DatasetItem(2, annotations=[Label(1)]), ], categories=['a', 'b']) dataset.save(source_url) project_dir = osp.join(test_dir, 'proj') run(self, 'create', '-o', project_dir) run(self, 'import', '-p', project_dir, '-n', 'source1', '--format', DEFAULT_FORMAT, source_url) run(self, 'filter', '-p', project_dir, '-e', '/item/annotation[label="b"]') run(self, 'transform', '-p', project_dir, '-t', 'rename', '--', '-e', '|2|qq|') run(self, 'transform', '-p', project_dir, '-t', 'remap_labels', '--', '-l', 'a:cat', '-l', 'b:dog') project = scope_add(Project(project_dir)) built_dataset = project.working_tree.make_dataset() expected_dataset = Dataset.from_iterable([ DatasetItem('qq', annotations=[Label(1)]), ], categories=['cat', 'dog']) compare_datasets(self, expected_dataset, built_dataset) with self.assertRaises(Exception): compare_dirs(self, source_url, project.source_data_dir('source1')) source1_target = project.working_tree.build_targets['source1'] self.assertEqual(4, len(source1_target.stages)) self.assertEqual('', source1_target.stages[0].hash) self.assertEqual('', source1_target.stages[1].hash) self.assertEqual('', source1_target.stages[2].hash)
def test_can_export_coco_as_voc(self): # TODO: use subformats once importers are removed coco_dir = osp.join(__file__[:__file__.rfind(osp.join('tests', ''))], 'tests', 'assets', 'coco_dataset', 'coco_instances') with TestDir() as test_dir: run(self, 'create', '-o', test_dir) run(self, 'import', '-f', 'coco', '-p', test_dir, coco_dir) result_dir = osp.join(test_dir, 'voc_export') run(self, 'export', '-f', 'voc', '-p', test_dir, '-o', result_dir, '--', '--save-images') self.assertTrue(osp.isdir(result_dir))
def test_can_export_zip_images_from_coco_dataset(self): with TestDir() as test_dir: coco_dir = osp.join( __file__[:__file__.rfind(osp.join('tests', ''))], 'tests', 'assets', 'coco_dataset', 'coco') run(self, 'create', '-o', test_dir) run(self, 'import', '-p', test_dir, '-f', 'coco', coco_dir) export_path = osp.join(test_dir, 'export.zip') run(self, 'export', '-p', test_dir, '-f', 'image_zip', '-o', test_dir, '--overwrite', '--', '--name', osp.basename(export_path)) self.assertTrue(osp.isfile(export_path)) with ZipFile(export_path, 'r') as zf: images = {f.filename for f in zf.filelist} self.assertTrue(images == {'a.jpg', 'b.jpg'})
def test_export_to_voc_format(self): label_map = OrderedDict(('label_%s' % i, [None, [], []]) for i in range(10)) label_map['background'] = [None, [], []] label_map.move_to_end('background', last=False) expected_dataset = Dataset.from_iterable([ DatasetItem(id='1', subset='train', image=np.ones((10, 15, 3)), annotations=[ Bbox(0.0, 2.0, 4.0, 2.0, attributes={ 'difficult': False, 'truncated': False, 'occluded': False }, id=1, label=3, group=1 ), Bbox(3.0, 3.0, 2.0, 3.0, attributes={ 'difficult': False, 'truncated': False, 'occluded': False }, id=2, label=5, group=2 ) ] ) ], categories=VOC.make_voc_categories(label_map)) with TestDir() as test_dir: yolo_dir = osp.join(__file__[:__file__.rfind(osp.join('tests', ''))], 'tests', 'assets', 'yolo_dataset') run(self, 'create', '-o', test_dir) run(self, 'import', '-p', test_dir, '-f', 'yolo', yolo_dir) voc_export = osp.join(test_dir, 'voc_export') run(self, 'export', '-p', test_dir, '-f', 'voc', '-o', voc_export, '--', '--save-images') parsed_dataset = Dataset.import_from(voc_export, format='voc') compare_datasets(self, expected_dataset, parsed_dataset, require_images=True)
def _test_can_save_and_load(self, project_path, source_path, expected_dataset, dataset_format, result_path='', label_map=None): run(self, 'create', '-o', project_path) extra_args = [] if result_path: extra_args += ['-r', result_path] run(self, 'import', '-p', project_path, '-f', dataset_format, *extra_args, source_path) result_dir = osp.join(project_path, 'result') extra_args = ['--', '--save-images'] if label_map: extra_args += ['--label-map', label_map] run(self, 'export', '-f', dataset_format, '-p', project_path, '-o', result_dir, *extra_args) result_path = osp.join(result_dir, result_path) parsed_dataset = Dataset.import_from(result_path, dataset_format) compare_datasets(self, expected_dataset, parsed_dataset, require_images=True)
def test_label_projection_with_masks(self): expected_dataset = Dataset.from_iterable([ DatasetItem(id='2007_000001', subset='train', image=np.ones((10, 20, 3)), annotations=[ Bbox(1, 2, 2, 2, label=3, attributes={ 'pose': VOC.VocPose(1).name, 'truncated': True, 'difficult': False, 'occluded': False, }, id=1, group=1, ), ] ), DatasetItem(id='2007_000002', subset='test', image=np.ones((10, 20, 3))), ], categories=VOC.make_voc_categories({ 'background': [(0, 0, 0), [], []], # Added on export 'a': [(128, 0, 0), [], []], # Generated by the transform 'b': [(0, 128, 0), [], []], # Generated by the transform 'cat': [(64, 0, 0), [], []] # Original })) dataset_path = osp.join(DUMMY_DATASETS_DIR, 'voc_dataset1') with TestDir() as test_dir: run(self, 'create', '-o', test_dir) run(self, 'import', '-p', test_dir, '-f', 'voc', dataset_path) run(self, 'transform', '-p', test_dir, '-t', 'project_labels', '--', '-l', 'a', '-l', 'b', '-l', 'cat') parsed_dataset = Dataset.import_from( osp.join(test_dir, 'source-1'), 'voc') compare_datasets(self, expected_dataset, parsed_dataset)
def test_can_export_mot_as_yolo(self): target_dataset = Dataset.from_iterable( [ DatasetItem(id='1', subset='train', annotations=[Bbox(0.0, 4.0, 4.0, 8.0, label=2)]) ], categories=['label_' + str(i) for i in range(10)]) with TestDir() as test_dir: mot_dir = osp.join( __file__[:__file__.rfind(osp.join('tests', ''))], 'tests', 'assets', 'mot_dataset') run(self, 'create', '-o', test_dir) run(self, 'import', '-p', test_dir, '-f', 'mot_seq', mot_dir) yolo_dir = osp.join(test_dir, 'yolo_dir') run(self, 'export', '-p', test_dir, '-o', yolo_dir, '-f', 'yolo', '--', '--save-images') parsed_dataset = Dataset.import_from(yolo_dir, format='yolo') compare_datasets(self, target_dataset, parsed_dataset)
def test_can_save_and_load(self): source_dataset = Dataset.from_iterable([ DatasetItem(id='1', image=np.ones((5, 5, 3))), DatasetItem(id='2', image=np.ones((2, 8, 3))) ]) with TestDir() as test_dir: source_dataset.export(test_dir, format='image_dir') zip_path = osp.join(test_dir, 'images.zip') make_zip_archive(test_dir, zip_path) proj_dir = osp.join(test_dir, 'proj') run(self, 'create', '-o', proj_dir) run(self, 'import', '-p', proj_dir, '-f', 'image_zip', zip_path) result_dir = osp.join(test_dir, 'result') export_path = osp.join(result_dir, 'export.zip') run(self, 'export', '-p', proj_dir, '-f', 'image_zip', '-o', result_dir, '--', '--name', osp.basename(export_path)) parsed_dataset = Dataset.import_from(export_path, format='image_zip') compare_datasets(self, source_dataset, parsed_dataset)
def test_can_ignore_non_supported_subsets(self): source_dataset = Dataset.from_iterable( [ DatasetItem(id='img1', subset='test', image=np.ones((10, 20, 3)), annotations=[Bbox(1.0, 2.0, 1.0, 1.0, label=0)]), DatasetItem(id='img2', subset='train', image=np.ones((10, 5, 3)), annotations=[Bbox(3.0, 1.0, 2.0, 1.0, label=1)]) ], categories=[str(i) for i in range(4)]) target_dataset = Dataset.from_iterable( [ DatasetItem(id='img2', subset='train', image=np.ones((10, 5, 3)), annotations=[Bbox(3.0, 1.0, 2.0, 1.0, label=1)]) ], categories=[str(i) for i in range(4)]) with TestDir() as test_dir: dataset_dir = osp.join(test_dir, 'dataset_dir') source_dataset.save(dataset_dir, save_images=True) proj_dir = osp.join(test_dir, 'proj') run(self, 'create', '-o', proj_dir) run(self, 'import', '-p', proj_dir, '-f', 'datumaro', dataset_dir) yolo_dir = osp.join(test_dir, 'yolo_dir') run(self, 'export', '-p', proj_dir, '-o', yolo_dir, '-f', 'yolo', '--', '--save-images') parsed_dataset = Dataset.import_from(yolo_dir, format='yolo') compare_datasets(self, target_dataset, parsed_dataset)
def test_preparing_dataset_for_train_model(self): """ <b>Description:</b> Testing a particular example of working with VOC dataset. <b>Expected results:</b> A VOC dataset that matches the expected result. <b>Steps:</b> 1. Get path to the source dataset from assets. 2. Create a datumaro project and add source dataset to it. 3. Leave only non-occluded annotations with `filter` command. 4. Split the dataset into subsets with `transform` command. 5. Export the project to a VOC dataset with `export` command. 6. Verify that the resulting dataset is equal to the expected result. """ expected_dataset = Dataset.from_iterable([ DatasetItem(id='c', subset='train', annotations=[ Bbox(3.0, 1.0, 8.0, 5.0, attributes={ 'truncated': False, 'occluded': False, 'difficult': False }, id=1, label=2, group=1 ) ] ), DatasetItem(id='d', subset='test', annotations=[ Bbox(4.0, 4.0, 4.0, 4.0, attributes={ 'truncated': False, 'occluded': False, 'difficult': False }, id=1, label=3, group=1 ) ] ) ], categories=VOC.make_voc_categories()) dataset_path = osp.join(DUMMY_DATASETS_DIR, 'voc_dataset2') with TestDir() as test_dir: run(self, 'create', '-o', test_dir) run(self, 'import', '-p', test_dir, '-f', 'voc', dataset_path) run(self, 'filter', '-p', test_dir, '-m', 'i+a', '-e', "/item/annotation[occluded='False']") run(self, 'transform', '-p', test_dir, '-t', 'random_split', '--', '-s', 'test:.5', '-s', 'train:.5', '--seed', '1') export_path = osp.join(test_dir, 'dataset') run(self, 'export', '-p', test_dir, '-f', 'voc', '-o', export_path, '--', '--label-map', 'voc') parsed_dataset = Dataset.import_from(export_path, format='voc') compare_datasets(self, expected_dataset, parsed_dataset)