def test_can_import_points(self): expected_dataset = Dataset.from_iterable([ DatasetItem(id=1, subset='train', image=Image(path='1.jpg', size=(5, 5)), annotations=[ Points([0, 0, 0, 2, 4, 1], [0, 1, 2], label=1, group=1, id=1, attributes={'is_crowd': False}), Polygon([0, 0, 4, 0, 4, 4], label=1, group=1, id=1, attributes={'is_crowd': False}), Points([1, 2, 3, 4, 2, 3], group=2, id=2, attributes={'is_crowd': False}), Bbox(1, 2, 2, 2, group=2, id=2, attributes={'is_crowd': False}), Points([1, 2, 0, 2, 4, 1], label=0, group=3, id=3, attributes={'is_crowd': False}), Bbox(0, 1, 4, 1, label=0, group=3, id=3, attributes={'is_crowd': False}), Points([0, 0, 1, 2, 3, 4], [0, 1, 2], group=5, id=5, attributes={'is_crowd': False}), Bbox(1, 2, 2, 2, group=5, id=5, attributes={'is_crowd': False}), ], attributes={'id': 1}), ], categories={ AnnotationType.label: LabelCategories.from_iterable(['a', 'b']), AnnotationType.points: PointsCategories.from_iterable( (i, None, [[0, 1], [1, 2]]) for i in range(2) ), }) dataset = Dataset.import_from( osp.join(DUMMY_DATASET_DIR, 'coco_person_keypoints'), 'coco') compare_datasets(self, expected_dataset, dataset)
def test_can_import(self): expected_dataset = Dataset.from_iterable( [ DatasetItem(id='1', image=np.ones((8, 8, 3)), annotations=[Label(0), Label(1)]), DatasetItem( id='2', image=np.ones( (10, 10, 3)), annotations=[Label(0)]), ], categories={ AnnotationType.label: LabelCategories.from_iterable('label_' + str(label) for label in range(2)), }) dataset = Dataset.import_from(DUMMY_DATASET_DIR, 'imagenet') compare_datasets(self, expected_dataset, dataset, require_images=True)
def from_iterable(cls, iterable: Iterable[DatasetItem], categories: Union[Dict, List[str]] = None): if isinstance(categories, list): categories = { AnnotationType.label: LabelCategories.from_iterable(categories) } if not categories: categories = {} class _extractor(Extractor): def __iter__(self): return iter(iterable) def categories(self): return categories return cls.from_extractors(_extractor())
def test_can_save_and_load(self): source_dataset = Dataset.from_iterable([ DatasetItem(id='1', subset='train', image=np.ones((8, 8, 3)), annotations=[ Bbox(0, 2, 4, 2, label=0, group=1), Points([3.2, 3.12, 4.11, 3.2, 2.11, 2.5, 3.5, 2.11, 3.8, 2.13], label=0, group=1), ] ), DatasetItem(id='2', subset='train', image=np.ones((10, 10, 3)), annotations=[ Points([4.23, 4.32, 5.34, 4.45, 3.54, 3.56, 4.52, 3.51, 4.78, 3.34], label=1, group=1), ] ), DatasetItem(id='3', subset='train', image=np.ones((8, 8, 3)), annotations=[Label(2, group=1)] ), DatasetItem(id='4', subset='train', image=np.ones((10, 10, 3)), annotations=[ Bbox(0, 2, 4, 2, label=3, group=1), Points([3.2, 3.12, 4.11, 3.2, 2.11, 2.5, 3.5, 2.11, 3.8, 2.13], label=3, group=1), ] ), DatasetItem(id='a/5', subset='train', image=np.ones((8, 8, 3)), annotations=[ Bbox(2, 2, 2, 2, group=1), ] ), DatasetItem(id='label_0', subset='train', image=np.ones((8, 8, 3)), ), ], categories={ AnnotationType.label: LabelCategories.from_iterable( [('label_%s' % i, 'class_%s' % i) for i in range(5)]), }) with TestDir() as test_dir: VggFace2Converter.convert(source_dataset, test_dir, save_images=True) parsed_dataset = Dataset.import_from(test_dir, 'vgg_face2') compare_datasets(self, source_dataset, parsed_dataset)
def _make_label_id_map(self): source_labels = { id: label.name for id, label in enumerate(self._extractor.categories().get( AnnotationType.label, LabelCategories()).items) } target_labels = { label.name: id for id, label in enumerate(self._categories[ AnnotationType.label].items) } id_mapping = { src_id: target_labels.get(src_label, 0) for src_id, src_label in source_labels.items() } def map_id(src_id): return id_mapping.get(src_id, 0) return map_id
def test_can_import(self): expected_dataset = Dataset.from_iterable( [ DatasetItem(id=1, subset='train', image=np.ones((10, 15, 3)), annotations=[ Bbox(0, 2, 4, 2, label=2), Bbox(3, 3, 2, 3, label=4), ]), ], categories={ AnnotationType.label: LabelCategories.from_iterable('label_' + str(i) for i in range(10)), }) dataset = Dataset.import_from(DUMMY_DATASET_DIR, 'yolo') compare_datasets(self, expected_dataset, dataset)
def test_can_import(self): expected_dataset = Dataset.from_iterable([ DatasetItem(id=1, image=np.ones((16, 16, 3)), annotations=[ Bbox(0, 4, 4, 8, label=2, attributes={ 'occluded': False, 'visibility': 1.0, 'ignored': False, }), ] ), ], categories={ AnnotationType.label: LabelCategories.from_iterable( 'label_' + str(label) for label in range(10)), }) dataset = Dataset.import_from(DUMMY_DATASET_DIR, 'mot_seq') compare_datasets(self, expected_dataset, dataset)
def test_can_save_bboxes(self): test_dataset = Dataset.from_iterable([ DatasetItem(id=1, subset='train', image=np.ones((16, 16, 3)), annotations=[ Bbox(0, 4, 4, 8, label=2), Bbox(0, 4, 4, 4, label=3), Bbox(2, 4, 4, 4), ], attributes={'source_id': ''} ), ], categories={ AnnotationType.label: LabelCategories.from_iterable( 'label_' + str(label) for label in range(10)), }) with TestDir() as test_dir: self._test_save_and_load( test_dataset, partial(TfDetectionApiConverter.convert, save_images=True), test_dir)
def test_relative_paths(self): source_dataset = Dataset.from_iterable([ DatasetItem(id='1', subset='train', image=np.ones((4, 2, 3))), DatasetItem(id='subdir1/1', subset='train', image=np.ones((2, 6, 3))), DatasetItem(id='subdir2/1', subset='train', image=np.ones((5, 4, 3))), ], categories={ AnnotationType.label: LabelCategories(), }) for save_images in {True, False}: with self.subTest(save_images=save_images): with TestDir() as test_dir: YoloConverter.convert(source_dataset, test_dir, save_images=save_images) parsed_dataset = YoloImporter()(test_dir).make_dataset() compare_datasets(self, source_dataset, parsed_dataset)
def test_can_load_dataset_with_exact_image_info(self): source_dataset = Dataset.from_iterable([ DatasetItem(id=1, subset='train', image=Image(path='1.jpg', size=(10, 15)), annotations=[ Bbox(0, 2, 4, 2, label=2), Bbox(3, 3, 2, 3, label=4), ]), ], categories={ AnnotationType.label: LabelCategories.from_iterable( 'label_' + str(i) for i in range(10)), }) with TestDir() as test_dir: YoloConverter.convert(source_dataset, test_dir) parsed_dataset = YoloImporter()(test_dir, image_info={'1': (10, 15)}).make_dataset() compare_datasets(self, source_dataset, parsed_dataset)
def categories(self): label_cat = LabelCategories() label_cat.add('label0') label_cat.add('label9') label_cat.add('label4') mask_cat = MaskCategories(colormap={ k: v for k, v in mask_tools.generate_colormap(5).items() if k in { 0, 1, 3, 4 } }) return { AnnotationType.label: label_cat, AnnotationType.mask: mask_cat, }
def test_can_save_and_load_with_multiple_labels(self): source_dataset = Dataset.from_iterable( [ DatasetItem(id='1', image=np.ones((8, 8, 3)), annotations=[Label(0), Label(1)]), DatasetItem(id='2', image=np.ones((10, 10, 3)), annotations=[Label(0), Label(1)]), DatasetItem(id='3', image=np.ones((10, 10, 3)), annotations=[Label(0), Label(2)]), DatasetItem(id='4', image=np.ones((8, 8, 3)), annotations=[Label(2), Label(4)]), DatasetItem(id='5', image=np.ones((10, 10, 3)), annotations=[Label(3), Label(4)]), DatasetItem( id='6', image=np.ones((10, 10, 3)), ), DatasetItem(id='7', image=np.ones((8, 8, 3))), ], categories={ AnnotationType.label: LabelCategories.from_iterable('label_' + str(label) for label in range(5)), }) with TestDir() as test_dir: ImagenetConverter.convert(source_dataset, test_dir, save_images=True) parsed_dataset = Dataset.import_from(test_dir, 'imagenet') compare_datasets(self, source_dataset, parsed_dataset, require_images=True)
def test_can_save_dataset_with_image_info(self): source_dataset = Dataset.from_iterable([ DatasetItem(id=1, subset='train', image=Image(path='1.jpg', size=(10, 15)), annotations=[ Bbox(0, 2, 4, 2, label=2), Bbox(3, 3, 2, 3, label=4), ]), ], categories={ AnnotationType.label: LabelCategories.from_iterable( 'label_' + str(i) for i in range(10)), }) with TestDir() as test_dir: YoloConverter.convert(source_dataset, test_dir) save_image(osp.join(test_dir, 'obj_train_data', '1.jpg'), np.ones((10, 15, 3))) # put the image for dataset parsed_dataset = YoloImporter()(test_dir).make_dataset() compare_datasets(self, source_dataset, parsed_dataset)
def __call__(self, extractor, save_dir): os.makedirs(save_dir, exist_ok=True) label_categories = extractor.categories().get(AnnotationType.label, LabelCategories()) get_label = lambda label_id: label_categories.items[label_id].name \ if label_id is not None else '' label_ids = OrderedDict((label.name, 1 + idx) for idx, label in enumerate(label_categories.items)) map_label_id = lambda label_id: label_ids.get(get_label(label_id), 0) self._get_label = get_label self._get_label_id = map_label_id subsets = extractor.subsets() if len(subsets) == 0: subsets = [ None ] for subset_name in subsets: if subset_name: subset = extractor.get_subset(subset_name) else: subset_name = DEFAULT_SUBSET_NAME subset = extractor labelmap_path = osp.join(save_dir, DetectionApiPath.LABELMAP_FILE) with codecs.open(labelmap_path, 'w', encoding='utf8') as f: for label, idx in label_ids.items(): f.write( 'item {\n' + ('\tid: %s\n' % (idx)) + ("\tname: '%s'\n" % (label)) + '}\n\n' ) anno_path = osp.join(save_dir, '%s.tfrecord' % (subset_name)) with tf.io.TFRecordWriter(anno_path) as writer: for item in subset: tf_example = self._make_tf_example(item) writer.write(tf_example.SerializeToString())
def test_can_save_dataset_with_cyrillic_and_spaces_in_filename(self): dataset = Dataset.from_iterable( [ DatasetItem(id="кириллица с пробелом", image=np.ones((8, 8, 3)), annotations=[Label(0), Label(1)]), ], categories={ AnnotationType.label: LabelCategories.from_iterable('label_' + str(label) for label in range(2)), }) with TestDir() as test_dir: ImagenetTxtConverter.convert(dataset, test_dir, save_images=True) parsed_dataset = Dataset.import_from(test_dir, 'imagenet_txt') compare_datasets(self, dataset, parsed_dataset, require_images=True)
def _load_categories(self, label_map_source): if label_map_source == LabelmapType.camvid.name: # use the default Camvid colormap label_map = CamvidLabelMap elif label_map_source == LabelmapType.source.name and \ AnnotationType.mask not in self._extractor.categories(): # generate colormap for input labels labels = self._extractor.categories() \ .get(AnnotationType.label, LabelCategories()) label_map = OrderedDict((item.name, None) for item in labels.items) elif label_map_source == LabelmapType.source.name and \ AnnotationType.mask in self._extractor.categories(): # use source colormap labels = self._extractor.categories()[AnnotationType.label] colors = self._extractor.categories()[AnnotationType.mask] label_map = OrderedDict() for idx, item in enumerate(labels.items): color = colors.colormap.get(idx) if color is not None: label_map[item.name] = color elif isinstance(label_map_source, dict): label_map = OrderedDict( sorted(label_map_source.items(), key=lambda e: e[0])) elif isinstance(label_map_source, str) and osp.isfile(label_map_source): label_map = parse_label_map(label_map_source) else: raise Exception("Wrong labelmap specified, " "expected one of %s or a file path" % \ ', '.join(t.name for t in LabelmapType)) self._categories = make_camvid_categories(label_map) self._label_map = label_map self._label_id_mapping = self._make_label_id_map()
def test_can_import(self): expected_dataset = Dataset.from_iterable([ DatasetItem(id='1', subset='train', image=np.zeros((8, 6, 3)), annotations=[Label(0)] ), DatasetItem(id='2', subset='train', image=np.zeros((2, 8, 3)), annotations=[Label(5)] ), DatasetItem(id='3', subset='train', annotations=[Label(3)] ), DatasetItem(id='4', subset='train', annotations=[Label(5)] ), ], categories={ AnnotationType.label: LabelCategories.from_iterable( 'label_%s' % label for label in range(10)), }) dataset = Dataset.import_from(DUMMY_DATASET_DIR, 'imagenet_txt') compare_datasets(self, expected_dataset, dataset, require_images=True)
def test_can_save_and_load(self): source_dataset = Dataset.from_iterable( [ DatasetItem(id=1, subset='train', image=np.ones((8, 8, 3)), annotations=[ Bbox(0, 2, 4, 2, label=2), Bbox(0, 1, 2, 3, label=4), ]), DatasetItem(id=2, subset='train', image=np.ones((10, 10, 3)), annotations=[ Bbox(0, 2, 4, 2, label=2), Bbox(3, 3, 2, 3, label=4), Bbox(2, 1, 2, 3, label=4), ]), DatasetItem(id=3, subset='valid', image=np.ones((8, 8, 3)), annotations=[ Bbox(0, 1, 5, 2, label=2), Bbox(0, 2, 3, 2, label=5), Bbox(0, 2, 4, 2, label=6), Bbox(0, 7, 3, 2, label=7), ]), ], categories={ AnnotationType.label: LabelCategories.from_iterable('label_' + str(i) for i in range(10)), }) with TestDir() as test_dir: YoloConverter.convert(source_dataset, test_dir, save_images=True) parsed_dataset = Dataset.import_from(test_dir, 'yolo') compare_datasets(self, source_dataset, parsed_dataset)
def _make_label_id_map(self): source_labels = { id: label.name for id, label in enumerate(self._extractor.categories().get( AnnotationType.label, LabelCategories()).items) } target_labels = { label.name: id for id, label in enumerate(self._categories[ AnnotationType.label].items) } id_mapping = { src_id: target_labels.get(src_label, 0) for src_id, src_label in source_labels.items() } void_labels = [ src_label for src_id, src_label in source_labels.items() if src_label not in target_labels ] if void_labels: log.warning("The following labels are remapped to background: %s" % ', '.join(void_labels)) log.debug("Saving segmentations with the following label mapping: \n%s" % '\n'.join(["#%s '%s' -> #%s '%s'" % ( src_id, src_label, id_mapping[src_id], self._categories[AnnotationType.label] \ .items[id_mapping[src_id]].name ) for src_id, src_label in source_labels.items() ]) ) def map_id(src_id): return id_mapping.get(src_id, 0) return map_id
def _load_categories(self): self._categories[AnnotationType.label] = LabelCategories() label_cat = LabelCategories() path = osp.join(self._dataset_dir, WiderFacePath.LABELS_FILE) if osp.isfile(path): with open(path, encoding='utf-8') as labels_file: labels = [s.strip() for s in labels_file] for label in labels: label_cat.add(label) else: subset_path = osp.join(self._dataset_dir, WiderFacePath.SUBSET_DIR + self._subset, WiderFacePath.IMAGES_DIR) if osp.isdir(subset_path): for images_dir in sorted(os.listdir(subset_path)): if osp.isdir(osp.join(subset_path, images_dir)) and \ images_dir != WiderFacePath.IMAGES_DIR_NO_LABEL: if '--' in images_dir: images_dir = images_dir.split('--')[1] label_cat.add(images_dir) return { AnnotationType.label: label_cat }
def test_can_save_masks(self): test_dataset = Dataset.from_iterable([ DatasetItem(id=1, subset='train', image=np.ones((4, 5, 3)), annotations=[ Mask(image=np.array([ [1, 0, 0, 1], [0, 1, 1, 0], [0, 1, 1, 0], [1, 0, 0, 1], ]), label=1), ], attributes={'source_id': ''} ), ], categories={ AnnotationType.label: LabelCategories.from_iterable( 'label_' + str(label) for label in range(10)), }) with TestDir() as test_dir: self._test_save_and_load( test_dataset, partial(TfDetectionApiConverter.convert, save_masks=True), test_dir)
def test_can_save_dataset_with_unknown_image_formats(self): test_dataset = Dataset.from_iterable([ DatasetItem(id=1, image=ByteImage(data=encode_image( np.ones((5, 4, 3)), 'png'), path='1/q.e'), attributes={'source_id': ''}), DatasetItem(id=2, image=ByteImage(data=encode_image( np.ones((6, 4, 3)), 'png'), ext='qwe'), attributes={'source_id': ''}) ], categories={ AnnotationType.label: LabelCategories(), }) with TestDir() as test_dir: self._test_save_and_load( test_dataset, partial(TfDetectionApiConverter.convert, save_images=True), test_dir)
def test_can_save_and_load_with_multiple_labels(self): source_dataset = Dataset.from_iterable([ DatasetItem(id='1', subset='train', annotations=[Label(1), Label(3)] ), DatasetItem(id='2', subset='train', image=np.zeros((8, 6, 3)), annotations=[Label(0)] ), DatasetItem(id='3', subset='train', image=np.zeros((2, 8, 3)), ), ], categories={ AnnotationType.label: LabelCategories.from_iterable( 'label_' + str(label) for label in range(10)), }) with TestDir() as test_dir: ImagenetTxtConverter.convert(source_dataset, test_dir, save_images=True) parsed_dataset = Dataset.import_from(test_dir, 'imagenet_txt') compare_datasets(self, source_dataset, parsed_dataset, require_images=True)
def from_iterable(cls, iterable: Iterable[DatasetItem], categories: Union[CategoriesInfo, List[str]] = None, env: Environment = None): if isinstance(categories, list): categories = { AnnotationType.label: LabelCategories.from_iterable(categories) } if not categories: categories = {} class _extractor(Extractor): def __init__(self): super().__init__(length=len(iterable) \ if hasattr(iterable, '__len__') else None) def __iter__(self): return iter(iterable) def categories(self): return categories return cls.from_extractors(_extractor(), env=env)
def test_can_save_dataset_with_no_subsets(self): source_dataset = Dataset.from_iterable( [ DatasetItem(id='a/b/c', image=np.zeros((8, 4, 3)), annotations=[Label(1)]), ], categories={ AnnotationType.label: LabelCategories.from_iterable('label_' + str(label) for label in range(10)), }) with TestDir() as test_dir: ImagenetTxtConverter.convert(source_dataset, test_dir, save_images=True) parsed_dataset = Dataset.import_from(test_dir, 'imagenet_txt') compare_datasets(self, source_dataset, parsed_dataset, require_images=True)
def _load_categories(self): label_cat = LabelCategories() path = osp.join(self._dataset_dir, VggFace2Path.LABELS_FILE) if osp.isfile(path): with open(path, encoding='utf-8') as labels_file: lines = [s.strip() for s in labels_file] for line in lines: objects = line.split() label = objects[0] class_name = None if 1 < len(objects): class_name = objects[1] label_cat.add(label, parent=class_name) else: subset_path = osp.join(self._dataset_dir, self._subset) if osp.isdir(subset_path): for images_dir in sorted(os.listdir(subset_path)): if osp.isdir(osp.join(subset_path, images_dir)) and \ images_dir != VggFace2Path.IMAGES_DIR_NO_LABEL: label_cat.add(images_dir) return {AnnotationType.label: label_cat}
def test_can_save_and_load_keypoints(self): source_dataset = Dataset.from_iterable( [ DatasetItem( id=1, subset='train', image=np.zeros((5, 5, 3)), annotations=[ # Full instance annotations: polygon + keypoints Points([0, 0, 0, 2, 4, 1], [0, 1, 2], label=3, group=1, id=1), Polygon([0, 0, 4, 0, 4, 4], label=3, group=1, id=1), # Full instance annotations: bbox + keypoints Points([1, 2, 3, 4, 2, 3], group=2, id=2), Bbox(1, 2, 2, 2, group=2, id=2), # Solitary keypoints Points([1, 2, 0, 2, 4, 1], label=5, id=3), # Some other solitary annotations (bug #1387) Polygon([0, 0, 4, 0, 4, 4], label=3, id=4), # Solitary keypoints with no label Points([0, 0, 1, 2, 3, 4], [0, 1, 2], id=5), ]), ], categories={ AnnotationType.label: LabelCategories.from_iterable(str(i) for i in range(10)), AnnotationType.points: PointsCategories.from_iterable( (i, None, [[0, 1], [1, 2]]) for i in range(10)), }) target_dataset = Dataset.from_iterable( [ DatasetItem(id=1, subset='train', image=np.zeros((5, 5, 3)), annotations=[ Points([0, 0, 0, 2, 4, 1], [0, 1, 2], label=3, group=1, id=1, attributes={'is_crowd': False}), Polygon([0, 0, 4, 0, 4, 4], label=3, group=1, id=1, attributes={'is_crowd': False}), Points([1, 2, 3, 4, 2, 3], group=2, id=2, attributes={'is_crowd': False}), Bbox(1, 2, 2, 2, group=2, id=2, attributes={'is_crowd': False}), Points([1, 2, 0, 2, 4, 1], label=5, group=3, id=3, attributes={'is_crowd': False}), Bbox(0, 1, 4, 1, label=5, group=3, id=3, attributes={'is_crowd': False}), Points([0, 0, 1, 2, 3, 4], [0, 1, 2], group=5, id=5, attributes={'is_crowd': False}), Bbox(1, 2, 2, 2, group=5, id=5, attributes={'is_crowd': False}), ], attributes={'id': 1}), ], categories={ AnnotationType.label: LabelCategories.from_iterable(str(i) for i in range(10)), AnnotationType.points: PointsCategories.from_iterable( (i, None, [[0, 1], [1, 2]]) for i in range(10)), }) with TestDir() as test_dir: self._test_save_and_load(source_dataset, CocoPersonKeypointsConverter.convert, test_dir, target_dataset=target_dataset)
def test_can_save_bboxes(self): source_dataset = Dataset.from_iterable( [ DatasetItem(id=1, subset='train', image=np.ones((16, 16, 3)), annotations=[ Bbox(0, 4, 4, 8, label=2, attributes={ 'occluded': True, }), Bbox(0, 4, 4, 4, label=3, attributes={ 'visibility': 0.4, }), Bbox(2, 4, 4, 4, attributes={'ignored': True}), ]), DatasetItem(id=2, subset='val', image=np.ones((8, 8, 3)), annotations=[ Bbox(1, 2, 4, 2, label=3), ]), DatasetItem( id=3, subset='test', image=np.ones((5, 4, 3)) * 3, ), ], categories={ AnnotationType.label: LabelCategories.from_iterable('label_' + str(label) for label in range(10)), }) target_dataset = Dataset.from_iterable( [ DatasetItem(id=1, image=np.ones((16, 16, 3)), annotations=[ Bbox(0, 4, 4, 8, label=2, attributes={ 'occluded': True, 'visibility': 0.0, 'ignored': False, }), Bbox(0, 4, 4, 4, label=3, attributes={ 'occluded': False, 'visibility': 0.4, 'ignored': False, }), Bbox(2, 4, 4, 4, attributes={ 'occluded': False, 'visibility': 1.0, 'ignored': True, }), ]), DatasetItem(id=2, image=np.ones((8, 8, 3)), annotations=[ Bbox(1, 2, 4, 2, label=3, attributes={ 'occluded': False, 'visibility': 1.0, 'ignored': False, }), ]), DatasetItem( id=3, image=np.ones((5, 4, 3)) * 3, ), ], categories={ AnnotationType.label: LabelCategories.from_iterable('label_' + str(label) for label in range(10)), }) with TestDir() as test_dir: self._test_save_and_load(source_dataset, partial(MotSeqGtConverter.convert, save_images=True), test_dir, target_dataset=target_dataset, require_images=True)
def _load_categories(self, labels): return { AnnotationType.label: LabelCategories().from_iterable(labels) }
def _parse_meta(context): ev, el = next(context) if not (ev == 'start' and el.tag == 'annotations'): raise Exception("Unexpected token ") categories = {} frame_size = None has_z_order = False mode = 'annotation' labels = OrderedDict() label = None # Recursive descent parser el = None states = ['annotations'] def accepted(expected_state, tag, next_state=None): state = states[-1] if state == expected_state and el is not None and el.tag == tag: if not next_state: next_state = tag states.append(next_state) return True return False def consumed(expected_state, tag): state = states[-1] if state == expected_state and el is not None and el.tag == tag: states.pop() return True return False for ev, el in context: if ev == 'start': if accepted('annotations', 'meta'): pass elif accepted('meta', 'task'): pass elif accepted('task', 'z_order'): pass elif accepted('task', 'original_size'): frame_size = [None, None] elif accepted('original_size', 'height', next_state='frame_height'): pass elif accepted('original_size', 'width', next_state='frame_width'): pass elif accepted('task', 'labels'): pass elif accepted('labels', 'label'): label = { 'name': None, 'attributes': set() } elif accepted('label', 'name', next_state='label_name'): pass elif accepted('label', 'attributes'): pass elif accepted('attributes', 'attribute'): pass elif accepted('attribute', 'name', next_state='attr_name'): pass elif accepted('annotations', 'image') or \ accepted('annotations', 'track') or \ accepted('annotations', 'tag'): break else: pass elif ev == 'end': if consumed('meta', 'meta'): break elif consumed('task', 'task'): pass elif consumed('z_order', 'z_order'): has_z_order = (el.text == 'True') elif consumed('original_size', 'original_size'): pass elif consumed('frame_height', 'height'): frame_size[0] = int(el.text) elif consumed('frame_width', 'width'): frame_size[1] = int(el.text) elif consumed('label_name', 'name'): label['name'] = el.text elif consumed('attr_name', 'name'): label['attributes'].add(el.text) elif consumed('attribute', 'attribute'): pass elif consumed('attributes', 'attributes'): pass elif consumed('label', 'label'): labels[label['name']] = label['attributes'] label = None elif consumed('labels', 'labels'): pass else: pass assert len(states) == 1 and states[0] == 'annotations', \ "Expected 'meta' section in the annotation file, path: %s" % states common_attrs = ['occluded'] if has_z_order: common_attrs.append('z_order') if mode == 'interpolation': common_attrs.append('keyframe') common_attrs.append('outside') label_cat = LabelCategories(attributes=common_attrs) for label, attrs in labels.items(): label_cat.add(label, attributes=attrs) categories[AnnotationType.label] = label_cat return categories, frame_size