示例#1
0
    def convert(self, devkit_dir, has_background=True):
        """
        Args:
            devkit_dir: path to VOC2007 devkit dir (e.g. .../VOCdevkit/VOC2007)
            has_background: allows to add background label to label map
        """
        if isinstance(has_background, str):
            has_background = string_to_bool(has_background)

        class_to_ind = prepare_detection_labels(has_background)
        devkit_dir = get_path(devkit_dir, is_directory=True)

        annotation_directory = get_path(devkit_dir / 'Annotations',
                                        is_directory=True)
        images_directory = get_path(devkit_dir / 'JPEGImages',
                                    is_directory=True)

        detections = []
        image_set_file = devkit_dir / 'ImageSets' / 'Main' / 'test.txt'
        for image in tqdm(read_txt(image_set_file, sep=None)):
            file_path = annotation_directory / '{}.xml'.format(image)
            tree = ET.parse(str(file_path))

            identifier = tree.find('.//filename').text
            image_path = images_directory / identifier

            if not image_path.is_file():
                raise FileNotFoundError("{}: {}".format(
                    os.strerror(errno.ENOENT), image_path))

            labels, x_mins, y_mins, x_maxs, y_maxs = [], [], [], [], []
            difficult_indices = []
            for entry in tree.getroot():
                if not entry.tag.startswith('object'):
                    continue

                bbox = entry.find('bndbox')
                difficult = int(entry.find('difficult').text)

                if difficult == 1:
                    difficult_indices.append(len(labels))

                labels.append(class_to_ind[entry.find('name').text])
                x_mins.append(float(bbox.find('xmin').text) - 1)
                y_mins.append(float(bbox.find('ymin').text) - 1)
                x_maxs.append(float(bbox.find('xmax').text) - 1)
                y_maxs.append(float(bbox.find('ymax').text) - 1)

            image_annotation = DetectionAnnotation(identifier, labels, x_mins,
                                                   y_mins, x_maxs, y_maxs)
            image_annotation.metadata['difficult_boxes'] = difficult_indices

            detections.append(image_annotation)

        meta = {'label_map': reverse_label_map(class_to_ind)}
        if has_background:
            meta['background_label'] = 0

        return detections, meta
示例#2
0
    def test_filter_by_visibility_does_nothing_with_default_visibility_level_and_partially_occluded(self):
        config = [{'type': 'filter', 'apply_to': 'annotation', 'min_visibility': 'partially occluded'}]
        annotation = DetectionAnnotation(y_mins=[5.0, 10.0], y_maxs=[15.0, 40.0])
        expected = DetectionAnnotation(y_mins=[5.0, 10.0], y_maxs=[15.0, 40.0], metadata={'difficult_boxes': []})

        postprocess_data(PostprocessingExecutor(config), [annotation], [None])

        assert annotation == expected
示例#3
0
    def test_filter_annotations_by_labels_with_remove_on_container_using_apply_to(self):
        config = [{'type': 'filter', 'apply_to': 'annotation', 'labels': ['to_be_filtered'], 'remove_filtered': True}]
        annotation = ContainerAnnotation({'annotation': DetectionAnnotation(labels=['some_label', 'to_be_filtered'])})
        expected = ContainerAnnotation({'annotation': DetectionAnnotation(labels=['some_label'])})

        postprocess_data(PostprocessingExecutor(config), [annotation], [None])

        assert annotation == expected
示例#4
0
    def test_filter_annotations_by_min_confidence_do_nothing(self):
        config = [{'type': 'filter', 'apply_to': 'annotation', 'min_confidence': 0.5, 'remove_filtered': True}]
        annotations = [DetectionAnnotation(labels=['a', 'b']), DetectionAnnotation(labels=['c', 'd'])]
        expected_annotations = [DetectionAnnotation(labels=['a', 'b']), DetectionAnnotation(labels=['c', 'd'])]

        postprocess_data(PostprocessingExecutor(config), annotations, [None, None])

        assert np.array_equal(annotations, expected_annotations)
示例#5
0
    def test_filter_regular_annotations_by_labels_with_ignore(self):
        config = [{'type': 'filter', 'apply_to': 'annotation', 'labels': ['to_be_filtered'], 'remove_filtered': False}]
        annotation = DetectionAnnotation(labels=['some_label', 'to_be_filtered'])
        expected = DetectionAnnotation(labels=['some_label', 'to_be_filtered'], metadata={'difficult_boxes': [1]})

        postprocess_data(PostprocessingExecutor(config), [annotation], [None])

        assert annotation == expected
示例#6
0
    def test_filter_container_annotations_by_labels_with_ignore_using_source(self):
        config = [{'type': 'filter', 'annotation_source': 'annotation',
                   'labels': ['to_be_filtered'], 'remove_filtered': False}]
        annotation = ContainerAnnotation({'annotation': DetectionAnnotation(labels=['some_label', 'to_be_filtered'])})
        expected = ContainerAnnotation({'annotation': DetectionAnnotation(labels=['some_label', 'to_be_filtered'],
                                                                          metadata={'difficult_boxes': [1]})})

        postprocess_data(PostprocessingExecutor(config), [annotation], [None])

        assert annotation == expected
示例#7
0
    def test_filter_container_annotations_and_regular_predictions_by_labels_with_remove_using_apply_to(self):
        config = [{'type': 'filter', 'apply_to': 'all', 'labels': ['to_be_filtered'], 'remove_filtered': True}]
        prediction = DetectionPrediction(labels=['some_label', 'to_be_filtered'])
        expected_prediction = DetectionPrediction(labels=['some_label'])
        annotation = ContainerAnnotation({'annotation': DetectionAnnotation(labels=['some_label', 'to_be_filtered'])})
        expected_annotation = ContainerAnnotation({'annotation': DetectionAnnotation(labels=['some_label'])})

        postprocess_data(PostprocessingExecutor(config), [annotation], [prediction])

        assert prediction == expected_prediction and annotation == expected_annotation
示例#8
0
    def test_filter_regular_annotations_by_labels_with_remove_using_annotation_source_warm_user_warning(self):
        config = [{'type': 'filter', 'annotation_source': 'annotation',
                   'labels': ['to_be_filtered'], 'remove_filtered': True}]
        annotation = DetectionAnnotation(labels=['some_label', 'to_be_filtered'])
        expected = DetectionAnnotation(labels=['some_label'])

        with pytest.warns(UserWarning):
            postprocess_data(PostprocessingExecutor(config), [annotation], [None])

        assert annotation == expected
示例#9
0
    def test_filter_multi_source_annotations_by_labels_with_remove(self):
        config = [{'type': 'filter', 'annotation_source': ['annotation1', 'annotation2'],
                   'labels': ['to_be_filtered'], 'remove_filtered': True}]
        annotation = ContainerAnnotation({'annotation1': DetectionAnnotation(labels=['some_label', 'to_be_filtered']),
                                          'annotation2': DetectionAnnotation(labels=['some_label', 'to_be_filtered'])})
        expected = ContainerAnnotation({'annotation1': DetectionAnnotation(labels=['some_label']),
                                        'annotation2': DetectionAnnotation(labels=['some_label'])})

        postprocess_data(PostprocessingExecutor(config), [annotation], [None])

        assert annotation == expected
示例#10
0
    def test_filter_by_visibility_filters_partially_occluded(self):
        config = [{'type': 'filter', 'apply_to': 'annotation', 'min_visibility': 'partially occluded',
                   'remove_filtered': True}]
        annotation = DetectionAnnotation(
            y_mins=[5.0, 10.0], y_maxs=[15.0, 40.0], metadata={'visibilities': ['heavy occluded', 'partially occluded']}
        )
        expected = DetectionAnnotation(
            y_mins=[10.0], y_maxs=[40.0], metadata={'visibilities': ['heavy occluded', 'partially occluded']}
        )

        postprocess_data(PostprocessingExecutor(config), [annotation], [None])

        assert annotation == expected
示例#11
0
    def test_filter_by_visibility_does_nothing_with_annotations_without_visibility(self):
        config = [{'type': 'filter', 'apply_to': 'annotation', 'min_visibility': 'heavy occluded'}]
        annotations = [
            DetectionAnnotation(y_mins=[5.0, 10.0], y_maxs=[15.0, 40.0]),
            DetectionAnnotation(y_mins=[5.0, 10.0], y_maxs=[35.0, 50.0])
        ]
        expected = [
            DetectionAnnotation(y_mins=[5.0, 10.0], y_maxs=[15.0, 40.0], metadata={'difficult_boxes': []}),
            DetectionAnnotation(y_mins=[5.0, 10.0], y_maxs=[35.0, 50.0], metadata={'difficult_boxes': []})
        ]

        postprocess_data(PostprocessingExecutor(config), annotations, [None, None])

        assert np.array_equal(annotations, expected)
示例#12
0
    def test_filter_annotations_by_height_range_with_remove(self):
        config = [{'type': 'filter', 'apply_to': 'annotation', 'height_range': '(10.0, 20.0)', 'remove_filtered': True}]
        annotations = [
            DetectionAnnotation(y_mins=[5.0, 10.0], y_maxs=[15.0, 10.0]),
            DetectionAnnotation(y_mins=[5.0, 10.0], y_maxs=[35.0, 40.0])
        ]
        expected = [
            DetectionAnnotation(y_mins=[5.0], y_maxs=[15.0]),
            DetectionAnnotation(y_mins=[], y_maxs=[])
        ]

        postprocess_data(PostprocessingExecutor(config), annotations, [None, None])

        assert np.array_equal(annotations, expected)
示例#13
0
    def test_filter_regular_annotations_and_container_predictions_by_labels_with_ignore_using_apply_to(self):
        config = [{'type': 'filter', 'apply_to': 'all', 'labels': ['to_be_filtered'], 'remove_filtered': False}]
        prediction = ContainerPrediction(
            {'detection_out': DetectionPrediction(labels=['some_label', 'to_be_filtered'])})
        expected_prediction = ContainerPrediction(
            {'detection_out': DetectionPrediction(labels=['some_label', 'to_be_filtered'],
                                                  metadata={'difficult_boxes': [1]})})
        annotation = DetectionAnnotation(labels=['some_label', 'to_be_filtered'])
        expected_annotation = DetectionAnnotation(labels=['some_label', 'to_be_filtered'],
                                                  metadata={'difficult_boxes': [1]})

        postprocess_data(PostprocessingExecutor(config), [annotation], [prediction])

        assert prediction == expected_prediction and annotation == expected_annotation
示例#14
0
    def test_filter_annotations_by_height_range_with_ignored(self):
        config = [{'type': 'filter', 'apply_to': 'annotation', 'height_range': '(10.0, 20.0)',
                   'remove_filtered': False}]
        annotations = [
            DetectionAnnotation(y_mins=[5.0, 10.0], y_maxs=[15.0, 10.0]),
            DetectionAnnotation(y_mins=[5.0, 10.0], y_maxs=[35.0, 40.0])
        ]
        expected = [
            DetectionAnnotation(y_mins=[5.0, 10.0], y_maxs=[15.0, 10.0], metadata={'difficult_boxes': [1]}),
            DetectionAnnotation(y_mins=[5.0, 10.0], y_maxs=[35.0, 40.0], metadata={'difficult_boxes': [0, 1]})
        ]

        postprocess_data(PostprocessingExecutor(config), annotations, [None, None])

        assert np.array_equal(annotations, expected)
示例#15
0
    def test_cast_to_int_to_greater(self):
        config = [{'type': 'cast_to_int', 'round_policy': 'greater'}]
        annotation = DetectionAnnotation(x_mins=[-1, 9], y_mins=[0, 11], x_maxs=[5, 10], y_maxs=[5, 10])
        prediction = DetectionPrediction(
            x_mins=[-1.1, -9.9],
            y_mins=[0.5, 11.5],
            x_maxs=[5.9, 10.9],
            y_maxs=[5.1, 10.1]
        )
        expected_annotation = DetectionAnnotation(x_mins=[-1, 9], y_mins=[0, 11], x_maxs=[5, 10], y_maxs=[5, 10])
        expected_prediction = DetectionPrediction(x_mins=[-1, -9], y_mins=[1, 12], x_maxs=[6, 11], y_maxs=[6, 11])

        postprocess_data(PostprocessingExecutor(config), [annotation], [prediction])

        assert prediction == expected_prediction and annotation == expected_annotation
示例#16
0
    def test_clip_annotation_normalized_boxes_with_size_as_normalized(self):
        config = [{'type': 'clip_boxes', 'apply_to': 'annotation', 'boxes_normalized': True, 'size': 10}]
        meta = {'image_size': (10, 10, 3)}
        annotation = DetectionAnnotation(x_mins=[-1, 9], y_mins=[0, 11], x_maxs=[5, 10], y_maxs=[5, 10], metadata=meta)
        expected = DetectionAnnotation(
            x_mins=[pytest.approx(0), pytest.approx(1)],
            y_mins=[pytest.approx(0), pytest.approx(1)],
            x_maxs=[pytest.approx(1), pytest.approx(1)],
            y_maxs=[pytest.approx(1), pytest.approx(1)],
            metadata=meta
        )

        postprocess_data(PostprocessingExecutor(config), [annotation], [None])

        assert annotation == expected
示例#17
0
def make_representation(bounding_boxes,
                        is_ground_truth=False,
                        score=None,
                        meta=None):
    """
    Args:
        bounding_boxes: string or list of strings `score label x0 y0 x1 y1; label score x0 y0 x1 y1; ...`.
        is_ground_truth: True if bbs are annotation boxes.
        score: value in [0, 1], if not None, all prediction boxes are considered with the given score.
        meta: metadata for representation
    """

    if not isinstance(bounding_boxes, list):
        bounding_boxes = [bounding_boxes]

    result = []
    for idx, box in enumerate(bounding_boxes):
        if box == "":
            arr = np.array([]).reshape((0, 5))
        else:
            arr = np.array(
                [np.fromstring(row, sep=' ') for row in box.split(';')])

        if is_ground_truth or score:
            assert arr.shape[1] == 5
        elif not is_ground_truth and not score:
            assert arr.shape[1] == 6

        if not is_ground_truth and score:
            score_ = score
            if np.isscalar(score_) or len(score_) == 1:
                score_ = np.full(arr.shape[0], score_)
            arr = np.c_[score_, arr]

        if is_ground_truth:
            detection = DetectionAnnotation(str(idx), arr[:, 0], arr[:, 1],
                                            arr[:, 2], arr[:, 3], arr[:, 4])
        else:
            detection = DetectionPrediction(str(idx), arr[:, 1], arr[:, 0],
                                            arr[:, 2], arr[:, 3], arr[:, 4],
                                            arr[:, 5])

        if meta:
            detection.metadata = meta[idx]

        result.append(detection)

    return result
示例#18
0
    def test_filter_annotations_source_not_found_raise_config_error_exception(self):
        config = [{'type': 'filter', 'annotation_source': 'ann', 'labels': ['to_be_filtered']}]
        annotation = ContainerAnnotation({'annotation': DetectionAnnotation(labels=['some_label', 'to_be_filtered'])})
        executor = PostprocessingExecutor(config)

        with pytest.raises(ConfigError):
            postprocess_data(executor, [annotation], [None])
示例#19
0
    def convert(self, annotation_file: str, label_start=1):
        annotation_file = pathlib.Path(annotation_file).absolute()
        check_exists(annotation_file.as_posix())
        annotations = []

        with open(annotation_file.as_posix(), 'r') as wider_annotation:
            image_annotations = wider_annotation.read()
            image_annotations = image_annotations.split('\n')
            image_ids = []
            for image_id, line in enumerate(image_annotations):
                if '.jpg' in line:
                    image_ids.append(image_id)

            for image_id in image_ids:
                identifier = image_annotations[image_id]
                bbox_count = image_annotations[image_id+1]
                bbox_lines = image_annotations[image_id+2:image_id+2+int(bbox_count)]
                x_mins, y_mins, x_maxs, y_maxs = [], [], [], []
                for bbox in bbox_lines:
                    x_min, y_min, width, height = bbox.split(' ')[0:4]
                    x_mins.append(int(x_min))
                    y_mins.append(int(y_min))
                    x_maxs.append(int(x_min) + int(width))
                    y_maxs.append(int(y_min) + int(height))
                annotations.append(DetectionAnnotation(identifier, [int(label_start)]*len(x_mins),
                                                       x_mins, y_mins, x_maxs, y_maxs))
        return annotations, {'label_map': {0: '__background__', int(label_start): 'face'}, 'background_label': 0}
    def test_accuracy_with_unsupported_annotation_type_as_annotation_source_for_container_raises_config_error(self):
        annotations = [ContainerAnnotation({'annotation': DetectionAnnotation('identifier', 3)})]
        predictions = [ClassificationPrediction('identifier', [1.0, 1.0, 1.0, 4.0])]

        dispatcher = MetricsExecutor([{'type': 'accuracy', 'top_k': 1, 'annotation_source': 'annotation'}], None)
        with pytest.raises(ConfigError):
            dispatcher.update_metrics_on_batch(annotations, predictions)
示例#21
0
def make_representation(bounding_boxes, is_ground_truth=False, score=None):
    """
    Args:
        bounding_boxes: string or list of strings `score label x0 y0 x1 y1; label score x0 y0 x1 y1; ...`
        is_ground_truth: True if bbs are annotation boxes
        score: value in [0, 1], if not None, all prediction boxes are considered with the given score
    """
    if not isinstance(bounding_boxes, list):
        bounding_boxes = [bounding_boxes]
    res = []
    for i, bb in enumerate(bounding_boxes):
        arr = np.array(np.mat(bb))

        if bb == "":
            arr = np.array([]).reshape((0, 5))

        if is_ground_truth or score is not None:
            assert arr.shape[1] == 5
        elif not is_ground_truth and score is None:
            assert arr.shape[1] == 6
        if not is_ground_truth and score is not None:
            arr = np.c_[np.full(arr.shape[0], score), arr]

        if is_ground_truth:
            r = DetectionAnnotation(str(i), arr[:, 0], arr[:, 1], arr[:, 2],
                                    arr[:, 3], arr[:, 4])
        else:
            r = DetectionPrediction(str(i), arr[:, 1], arr[:, 0], arr[:, 2],
                                    arr[:, 3], arr[:, 4], arr[:, 5])
        res.append(r)
    return res
    def test_accuracy_with_wrong_annotation_type_raise_config_error_exception(self):
        annotations = [DetectionAnnotation('identifier', 3)]
        predictions = [ClassificationPrediction('identifier', [1.0, 1.0, 1.0, 4.0])]

        dispatcher = MetricsExecutor([{'type': 'accuracy', 'top_k': 1}], None)
        with pytest.raises(ConfigError):
            dispatcher.update_metrics_on_batch(annotations, predictions)
    def test_accuracy_with_unsupported_annotations_in_container_raise_config_error_exception(self):
        annotations = [ContainerAnnotation({'annotation': DetectionAnnotation('identifier', 3)})]
        predictions = [ClassificationPrediction('identifier', [1.0, 1.0, 1.0, 4.0])]
        config = {'annotation': 'mocked', 'metrics': [
            {'type': 'accuracy', 'top_k': 1}]}

        dispatcher = MetricsExecutor(config, None)
        with pytest.raises(ConfigError):
            dispatcher.update_metrics_on_batch(annotations, predictions)
示例#24
0
    def test_clip_predictions_denormalized_boxes_with_size(self):
        config = [{'type': 'clip_boxes', 'apply_to': 'prediction', 'boxes_normalized': False, 'size': 10}]
        annotation = DetectionAnnotation(metadata={'image_size': (10, 10, 3)})
        prediction = DetectionPrediction(x_mins=[-1, 9], y_mins=[0, 11], x_maxs=[5, 10], y_maxs=[5, 10])
        expected_prediction = DetectionPrediction(
            x_mins=[pytest.approx(0), pytest.approx(9)],
            y_mins=[pytest.approx(0), pytest.approx(10)],
            x_maxs=[pytest.approx(5), pytest.approx(10)],
            y_maxs=[pytest.approx(5), pytest.approx(10)]
        )

        postprocess_data(PostprocessingExecutor(config), [annotation], [prediction])

        assert prediction == expected_prediction
示例#25
0
    def test_resize_prediction_boxes(self):
        config = [{'type': 'resize_prediction_boxes'}]
        annotation = DetectionAnnotation(metadata={'image_size': (100, 100, 3)})
        prediction = DetectionPrediction(x_mins=[0, 7], y_mins=[0, 7], x_maxs=[5, 8], y_maxs=[5, 8])
        expected_prediction = DetectionPrediction(
            x_mins=[pytest.approx(0), pytest.approx(700)],
            y_mins=[pytest.approx(0), pytest.approx(700)],
            x_maxs=[pytest.approx(500), pytest.approx(800)],
            y_maxs=[pytest.approx(500), pytest.approx(800)]
        )

        postprocess_data(PostprocessingExecutor(config), [annotation], [prediction])

        assert prediction == expected_prediction
示例#26
0
    def convert(self, wider_annotation: str, label_start=1):
        """
        Args:
            wider_annotation: path to wider validation file
            label_start: start index for labels
        """
        wider_annotation = get_path(wider_annotation)

        image_annotations = read_txt(wider_annotation)
        image_ids = []
        for image_id, line in enumerate(image_annotations):
            if '.jpg' in line:
                image_ids.append(image_id)

        annotations = []
        for image_id in image_ids:
            identifier = image_annotations[image_id]
            bbox_count = image_annotations[image_id + 1]
            bbox_lines = image_annotations[image_id + 2:image_id + 2 +
                                           int(bbox_count)]

            x_mins, y_mins, x_maxs, y_maxs = [], [], [], []
            for bbox in bbox_lines:
                x_min, y_min, x_max, y_max = convert_bboxes_xywh_to_x1y1x2y2(
                    *(map(float, (bbox.split(' ')[0:4]))))
                x_mins.append(x_min)
                y_mins.append(y_min)
                x_maxs.append(x_max)
                y_maxs.append(y_max)

            annotations.append(
                DetectionAnnotation(identifier,
                                    [int(label_start)] * len(x_mins), x_mins,
                                    y_mins, x_maxs, y_maxs))

        return annotations, {
            'label_map': {
                0: '__background__',
                int(label_start): 'face'
            },
            'background_label': 0
        }
示例#27
0
    def convert(self, devkit_dir):
        """
        Args:
            devkit_dir: path to VOC2007 devkit dir (e.g. .../VOCdevkit/VOC2007)
        """
        devkit_dir = Path(devkit_dir)
        check_exists(devkit_dir.as_posix())

        annotation_directory = devkit_dir / 'Annotations'
        images_directory = devkit_dir / 'JPEGImages'
        self.image_root = images_directory.as_posix()

        check_exists(annotation_directory.as_posix())
        check_exists(images_directory.as_posix())

        detections = []

        image_set_file = devkit_dir / 'ImageSets' / 'Main' / 'test.txt'

        with image_set_file.open() as f:
            image_list = f.read().strip().split()

        for image in tqdm(image_list):
            file_path = annotation_directory / '{}.xml'.format(image)
            tree = ET.parse(file_path.as_posix())

            identifier = tree.find('.//filename').text
            image_path = images_directory / identifier

            if not image_path.is_file():
                raise FileNotFoundError("{}: {}".format(
                    os.strerror(errno.ENOENT), image_path))

            labels, x_mins, y_mins, x_maxs, y_maxs = [], [], [], [], []
            difficult_indices = []
            for entry in tree.getroot():
                if not entry.tag.startswith('object'):
                    continue

                bbox = entry.find('bndbox')
                difficult = int(entry.find('difficult').text)

                if difficult == 1:
                    difficult_indices.append(len(labels))

                labels.append(_CLASS_TO_IND[entry.find('name').text])
                x_mins.append(float(bbox.find('xmin').text) - 1)
                y_mins.append(float(bbox.find('ymin').text) - 1)
                x_maxs.append(float(bbox.find('xmax').text) - 1)
                y_maxs.append(float(bbox.find('ymax').text) - 1)

            image_annotation = DetectionAnnotation(identifier, labels, x_mins,
                                                   y_mins, x_maxs, y_maxs)
            image_annotation.metadata['difficult_boxes'] = difficult_indices

            detections.append(image_annotation)

        meta = {
            'label_map': dict(enumerate(_VOC_CLASSES)),
            'background_label': 0
        }

        return detections, meta
示例#28
0
    def convert(self,
                file_path,
                image_names=None,
                label_start=1,
                background_label=None):
        root = read_xml(file_path)

        labels_set = self.get_label_set(root)

        label_start = int(label_start)
        labels_set = sorted(labels_set)
        class_to_ind = dict(
            zip(labels_set,
                list(range(label_start,
                           len(labels_set) + label_start + 1))))
        label_map = {}
        for class_label, ind in class_to_ind.items():
            label_map[ind] = class_label

        annotations = []
        for frames in root:
            for frame in frames:
                identifier = frame.tag + '.png'
                labels, x_mins, y_mins, x_maxs, y_maxs = [], [], [], [], []
                difficult_indices = []
                for annotation in frame:
                    label = annotation.find('type')
                    if label is None:
                        raise ValueError(
                            '"{}" contains detection without "{}"'.format(
                                file_path, 'type'))

                    box = annotation.find('roi')
                    if box is None:
                        raise ValueError(
                            '"{}" contains detection without "{}"'.format(
                                file_path, 'roi'))
                    box = list(map(float, box.text.split()))

                    is_ignored = annotation.find('is_ignored')
                    if is_ignored is not None and int(is_ignored.text) == 1:
                        difficult_indices.append(len(labels))

                    labels.append(class_to_ind[label.text])
                    x_min, y_min, x_max, y_max = convert_bboxes_xywh_to_x1y1x2y2(
                        *box)
                    x_mins.append(x_min)
                    y_mins.append(y_min)
                    x_maxs.append(x_max)
                    y_maxs.append(y_max)

                detection_annotation = DetectionAnnotation(
                    identifier, labels, x_mins, y_mins, x_maxs, y_maxs)
                detection_annotation.metadata[
                    'difficult_boxes'] = difficult_indices
                annotations.append(detection_annotation)

        if image_names:
            self.rename_identifiers(annotations, image_names)

        meta = {}
        if background_label:
            self.add_background(label_map, meta, background_label)
        meta['label_map'] = label_map

        return annotations, meta
    def convert(self, file_path, image_names=None, label_start=1, background_label=None):
        """
        Args:
            file_path: path to file with data
        """
        check_exists(file_path)
        label_start = int(label_start)

        tree = ET.parse(file_path)
        labels_set = set()
        for frames in tree.getroot():
            for frame in frames:
                for annotation in frame:
                    label = annotation.find('type')
                    if label is None:
                        raise ValueError('"{}" contains detection without label'.format(file_path))

                    labels_set.add(label.text)

        labels_set = sorted(labels_set)
        class_to_ind = dict(zip(labels_set, list(range(label_start, len(labels_set) + label_start + 1))))
        label_map = {}
        for class_label, ind in class_to_ind.items():
            label_map[ind] = class_label

        annotations = []
        for frames in tree.getroot():
            for frame in frames:
                identifier = frame.tag + '.png'
                labels, x_mins, y_mins, x_maxs, y_maxs = [], [], [], [], []
                difficult_indices = []
                for annotation in frame:
                    label = annotation.find('type')
                    if label is None:
                        raise ValueError('"{}" contains detection without "{}"'.format(file_path, 'type'))

                    box = annotation.find('roi')
                    if box is None:
                        raise ValueError('"{}" contains detection without "{}"'.format(file_path, 'roi'))
                    box = list(map(float, box.text.split()))

                    is_ignored = annotation.find('is_ignored')
                    if is_ignored is not None and int(is_ignored.text) == 1:
                        difficult_indices.append(len(labels))

                    labels.append(class_to_ind[label.text])
                    x_mins.append(box[0])
                    y_mins.append(box[1])
                    x_maxs.append(box[0] + box[2])
                    y_maxs.append(box[1] + box[3])

                detection_annotation = DetectionAnnotation(identifier, labels, x_mins, y_mins, x_maxs, y_maxs)
                detection_annotation.metadata['difficult_boxes'] = difficult_indices
                annotations.append(detection_annotation)

        if image_names is not None:
            self.rename_identifiers(annotations, image_names)
        meta = {}
        self.add_background(label_map, meta, background_label)
        meta['label_map'] = label_map

        return annotations, meta