示例#1
0
    def test_project_can_merge_item_annotations(self):
        class TestExtractor1(Extractor):
            def __iter__(self):
                yield DatasetItem(id=1,
                                  subset='train',
                                  annotations=[
                                      Label(2, id=3),
                                      Label(3, attributes={'x': 1}),
                                  ])

        class TestExtractor2(Extractor):
            def __iter__(self):
                yield DatasetItem(id=1,
                                  subset='train',
                                  annotations=[
                                      Label(3, attributes={'x': 1}),
                                      Label(4, id=4),
                                  ])

        project = Project()
        project.env.extractors.register('t1', TestExtractor1)
        project.env.extractors.register('t2', TestExtractor2)
        project.add_source('source1', {'format': 't1'})
        project.add_source('source2', {'format': 't2'})

        merged = project.make_dataset()

        self.assertEqual(1, len(merged))

        item = next(iter(merged))
        self.assertEqual(3, len(item.annotations))
示例#2
0
文件: __init__.py 项目: zz202/cvat
def create_command(args):
    project_dir = osp.abspath(args.dst_dir)

    project_env_dir = osp.join(project_dir, DEFAULT_CONFIG.env_dir)
    if osp.isdir(project_env_dir) and os.listdir(project_env_dir):
        if not args.overwrite:
            raise CliException("Directory '%s' already exists "
                "(pass --overwrite to overwrite)" % project_env_dir)
        else:
            shutil.rmtree(project_env_dir, ignore_errors=True)

    own_dataset_dir = osp.join(project_dir, DEFAULT_CONFIG.dataset_dir)
    if osp.isdir(own_dataset_dir) and os.listdir(own_dataset_dir):
        if not args.overwrite:
            raise CliException("Directory '%s' already exists "
                "(pass --overwrite to overwrite)" % own_dataset_dir)
        else:
            # NOTE: remove the dir to avoid using data from previous project
            shutil.rmtree(own_dataset_dir)

    project_name = args.name
    if project_name is None:
        project_name = osp.basename(project_dir)

    log.info("Creating project at '%s'" % project_dir)

    Project.generate(project_dir, {
        'project_name': project_name,
    })

    log.info("Project has been created at '%s'" % project_dir)

    return 0
示例#3
0
    def test_custom_extractor_can_be_created(self):
        class CustomExtractor(Extractor):
            def __iter__(self):
                return iter([
                    DatasetItem(id=0, subset='train'),
                    DatasetItem(id=1, subset='train'),
                    DatasetItem(id=2, subset='train'),
                    DatasetItem(id=3, subset='test'),
                    DatasetItem(id=4, subset='test'),
                    DatasetItem(id=1),
                    DatasetItem(id=2),
                    DatasetItem(id=3),
                ])

        extractor_name = 'ext1'
        project = Project()
        project.env.extractors.register(extractor_name, CustomExtractor)
        project.add_source('src1', {
            'url': 'path',
            'format': extractor_name,
        })

        dataset = project.make_dataset()

        compare_datasets(self, CustomExtractor(), dataset)
示例#4
0
    def __call__(self, path, **extra_params):
        from datumaro.components.project import Project  # cyclic import
        project = Project()

        if path.endswith('.data') and osp.isfile(path):
            config_paths = [path]
        else:
            config_paths = glob(osp.join(path, '*.data'))

        if not osp.exists(path) or not config_paths:
            raise Exception("Failed to find 'yolo' dataset at '%s'" % path)

        for config_path in config_paths:
            log.info("Found a dataset at '%s'" % config_path)

            source_name = '%s_%s' % (osp.basename(osp.dirname(config_path)),
                                     osp.splitext(
                                         osp.basename(config_path))[0])
            project.add_source(
                source_name, {
                    'url': config_path,
                    'format': 'yolo',
                    'options': dict(extra_params),
                })

        return project
示例#5
0
    def test_can_batch_launch_custom_model(self):
        class TestExtractor(Extractor):
            def __iter__(self):
                for i in range(5):
                    yield DatasetItem(id=i,
                                      subset='train',
                                      image=np.array([i]))

        class TestLauncher(Launcher):
            def launch(self, inputs):
                for i, inp in enumerate(inputs):
                    yield [Label(attributes={'idx': i, 'data': inp.item()})]

        model_name = 'model'
        launcher_name = 'custom_launcher'

        project = Project()
        project.env.launchers.register(launcher_name, TestLauncher)
        project.add_model(model_name, {'launcher': launcher_name})
        model = project.make_executable_model(model_name)
        extractor = TestExtractor()

        batch_size = 3
        executor = InferenceWrapper(extractor, model, batch_size=batch_size)

        for item in executor:
            self.assertEqual(1, len(item.annotations))
            self.assertEqual(
                int(item.id) % batch_size,
                item.annotations[0].attributes['idx'])
            self.assertEqual(int(item.id),
                             item.annotations[0].attributes['data'])
示例#6
0
    def __call__(self, path, **extra_params):
        from datumaro.components.project import Project  # cyclic import
        project = Project()

        subset_paths = self.find_subsets(path)

        if len(subset_paths) == 0:
            raise Exception("Failed to find 'cvat' dataset at '%s'" % path)

        for subset_path in subset_paths:
            if not osp.isfile(subset_path):
                continue

            log.info("Found a dataset at '%s'" % subset_path)

            subset_name = osp.splitext(osp.basename(subset_path))[0]

            project.add_source(
                subset_name, {
                    'url': subset_path,
                    'format': self.EXTRACTOR_NAME,
                    'options': dict(extra_params),
                })

        return project
示例#7
0
    def test_can_batch_launch_custom_model(self):
        dataset = Dataset.from_iterable([
            DatasetItem(id=i, subset='train', image=np.array([i]))
            for i in range(5)
        ],
                                        categories=['label'])

        class TestLauncher(Launcher):
            def launch(self, inputs):
                for i, inp in enumerate(inputs):
                    yield [Label(0, attributes={'idx': i, 'data': inp.item()})]

        model_name = 'model'
        launcher_name = 'custom_launcher'

        project = Project()
        project.env.launchers.register(launcher_name, TestLauncher)
        project.add_model(model_name, {'launcher': launcher_name})
        model = project.make_executable_model(model_name)

        batch_size = 3
        executor = ModelTransform(dataset, model, batch_size=batch_size)

        for item in executor:
            self.assertEqual(1, len(item.annotations))
            self.assertEqual(
                int(item.id) % batch_size,
                item.annotations[0].attributes['idx'])
            self.assertEqual(int(item.id),
                             item.annotations[0].attributes['data'])
示例#8
0
    def __call__(self, path, **extra_params):
        from datumaro.components.project import Project  # cyclic import
        project = Project()

        if path.endswith('.json') and osp.isfile(path):
            subset_paths = [path]
        else:
            subset_paths = glob(osp.join(path, '*.json'))

            if osp.basename(
                    osp.normpath(path)) != DatumaroPath.ANNOTATIONS_DIR:
                path = osp.join(path, DatumaroPath.ANNOTATIONS_DIR)
            subset_paths += glob(osp.join(path, '*.json'))

        if len(subset_paths) == 0:
            raise Exception("Failed to find 'datumaro' dataset at '%s'" % path)

        for subset_path in subset_paths:
            if not osp.isfile(subset_path):
                continue

            log.info("Found a dataset at '%s'" % subset_path)

            subset_name = osp.splitext(osp.basename(subset_path))[0]

            project.add_source(
                subset_name, {
                    'url': subset_path,
                    'format': self.EXTRACTOR_NAME,
                    'options': dict(extra_params),
                })

        return project
示例#9
0
    def test_source_datasets_can_be_merged(self):
        class TestExtractor(Extractor):
            def __init__(self, url, n=0, s=0):
                super().__init__(length=n)
                self.n = n
                self.s = s

            def __iter__(self):
                for i in range(self.n):
                    yield DatasetItem(id=self.s + i, subset='train')

        e_name1 = 'e1'
        e_name2 = 'e2'
        n1 = 2
        n2 = 4

        project = Project()
        project.env.extractors.register(e_name1,
                                        lambda p: TestExtractor(p, n=n1))
        project.env.extractors.register(e_name2,
                                        lambda p: TestExtractor(p, n=n2, s=n1))
        project.add_source('source1', {'format': e_name1})
        project.add_source('source2', {'format': e_name2})

        dataset = project.make_dataset()

        self.assertEqual(n1 + n2, len(dataset))
示例#10
0
def is_project_path(value):
    if value:
        try:
            Project.load(value)
            return True
        except Exception:
            pass
    return False
示例#11
0
    def test_project_own_dataset_can_be_modified(self):
        project = Project()
        dataset = project.make_dataset()

        item = DatasetItem(id=1)
        dataset.put(item)

        self.assertEqual(item, next(iter(dataset)))
示例#12
0
    def test_source_false_when_source_doesnt_exist(self):
        source_name = 'qwerty'
        project = Project()
        project.add_source(source_name)
        target = SourceTarget(project=project)

        status = target.test(source_name + '123')

        self.assertFalse(status)
示例#13
0
    def test_source_true_when_source_exists(self):
        source_name = 'qwerty'
        project = Project()
        project.add_source(source_name)
        target = SourceTarget(project=project)

        status = target.test(source_name)

        self.assertTrue(status)
示例#14
0
    def test_add_source(self):
        source_name = 'source'
        origin = Source({'url': 'path', 'format': 'ext'})
        project = Project()

        project.add_source(source_name, origin)

        added = project.get_source(source_name)
        self.assertIsNotNone(added)
        self.assertEqual(added, origin)
示例#15
0
    def test_added_source_can_be_saved(self):
        source_name = 'source'
        origin = Source({
            'url': 'path',
        })
        project = Project()
        project.add_source(source_name, origin)

        saved = project.config

        self.assertEqual(origin, saved.sources[source_name])
示例#16
0
    def test_can_save_and_load_own_dataset(self):
        with TestDir() as test_dir:
            src_project = Project()
            src_dataset = src_project.make_dataset()
            item = DatasetItem(id=1)
            src_dataset.put(item)
            src_dataset.save(test_dir)

            loaded_project = Project.load(test_dir)
            loaded_dataset = loaded_project.make_dataset()

            self.assertEqual(list(src_dataset), list(loaded_dataset))
示例#17
0
    def __call__(self, path, **extra_params):
        from datumaro.components.project import Project  # cyclic import
        project = Project()

        subsets = self.find_subsets(path)
        if len(subsets) == 0:
            raise Exception("Failed to find dataset at '%s'" % path)

        for desc in subsets:
            source_name = osp.splitext(osp.basename(desc['url']))[0]
            project.add_source(source_name, desc)

        return project
示例#18
0
    def test_can_do_transform_with_custom_model(self):
        class TestExtractorSrc(Extractor):
            def __iter__(self):
                for i in range(2):
                    yield DatasetItem(id=i,
                                      image=np.ones([2, 2, 3]) * i,
                                      annotations=[Label(i)])

        class TestLauncher(Launcher):
            def launch(self, inputs):
                for inp in inputs:
                    yield [Label(inp[0, 0, 0])]

        class TestConverter(Converter):
            def __call__(self, extractor, save_dir):
                for item in extractor:
                    with open(osp.join(save_dir, '%s.txt' % item.id),
                              'w') as f:
                        f.write(str(item.annotations[0].label) + '\n')

        class TestExtractorDst(Extractor):
            def __init__(self, url):
                super().__init__()
                self.items = [
                    osp.join(url, p) for p in sorted(os.listdir(url))
                ]

            def __iter__(self):
                for path in self.items:
                    with open(path, 'r') as f:
                        index = osp.splitext(osp.basename(path))[0]
                        label = int(f.readline().strip())
                        yield DatasetItem(id=index, annotations=[Label(label)])

        model_name = 'model'
        launcher_name = 'custom_launcher'
        extractor_name = 'custom_extractor'

        project = Project()
        project.env.launchers.register(launcher_name, TestLauncher)
        project.env.extractors.register(extractor_name, TestExtractorSrc)
        project.env.converters.register(extractor_name, TestConverter)
        project.add_model(model_name, {'launcher': launcher_name})
        project.add_source('source', {'format': extractor_name})

        with TestDir() as test_dir:
            project.make_dataset().apply_model(model=model_name,
                                               save_dir=test_dir)

            result = Project.load(test_dir)
            result.env.extractors.register(extractor_name, TestExtractorDst)
            it = iter(result.make_dataset())
            item1 = next(it)
            item2 = next(it)
            self.assertEqual(0, item1.annotations[0].label)
            self.assertEqual(1, item2.annotations[0].label)
示例#19
0
    def test_project_filter_can_be_applied(self):
        class TestExtractor(Extractor):
            def __iter__(self):
                for i in range(10):
                    yield DatasetItem(id=i, subset='train')

        e_type = 'type'
        project = Project()
        project.env.extractors.register(e_type, TestExtractor)
        project.add_source('source', {'format': e_type})

        dataset = project.make_dataset().extract('/item[id < 5]')

        self.assertEqual(5, len(dataset))
示例#20
0
    def __call__(self, path):
        from datumaro.components.project import Project  # cyclic import
        project = Project()

        if not osp.exists(path):
            raise Exception("Failed to find 'datumaro' dataset at '%s'" % path)

        source_name = osp.splitext(osp.basename(path))[0]
        project.add_source(source_name, {
            'url': path,
            'format': self.EXTRACTOR_NAME,
        })

        return project
示例#21
0
文件: image_dir.py 项目: zz202/cvat
    def __call__(self, path, **extra_params):
        from datumaro.components.project import Project # cyclic import
        project = Project()

        if not osp.isdir(path):
            raise Exception("Can't find a directory at '%s'" % path)

        source_name = osp.basename(osp.normpath(path))
        project.add_source(source_name, {
            'url': source_name,
            'format': self.EXTRACTOR_NAME,
            'options': dict(extra_params),
        })

        return project
示例#22
0
    def test_transform_fails_on_inplace_update_of_stage(self):
        with TestDir() as test_dir:
            dataset_url = osp.join(test_dir, 'dataset')
            dataset = Dataset.from_iterable([
                DatasetItem(id=1, annotations=[Bbox(1, 2, 3, 4, label=1)]),
            ],
                                            categories=['a', 'b'])
            dataset.export(dataset_url, 'coco', save_images=True)

            project_dir = osp.join(test_dir, 'proj')
            with Project.init(project_dir) as project:
                project.import_source('source-1',
                                      dataset_url,
                                      'coco',
                                      no_cache=True)
                project.commit('first commit')

            with self.subTest('without overwrite'):
                run(self,
                    'transform',
                    '-p',
                    project_dir,
                    '-t',
                    'random_split',
                    'HEAD:source-1',
                    expected_code=1)

            with self.subTest('with overwrite'):
                with self.assertRaises(ReadonlyDatasetError):
                    run(self, 'transform', '-p', project_dir, '--overwrite',
                        '-t', 'random_split', 'HEAD:source-1')
示例#23
0
    def test_can_release_resources_on_checkout(self, fxt_sample_video):
        test_dir = scope_add(TestDir())

        project = scope_add(Project.init(test_dir))

        src_url = osp.join(test_dir, 'src')
        src = Dataset.from_iterable([
            DatasetItem(1),
        ], categories=['a'])
        src.save(src_url)
        project.add_source(src_url, 'datumaro')
        project.commit('commit 1')

        project.remove_source('src', keep_data=False)

        project.import_source('src',
                              osp.dirname(fxt_sample_video),
                              'video_frames',
                              rpath=osp.basename(fxt_sample_video))
        project.commit('commit 2')

        assert len(project.working_tree.make_dataset()) == 4
        assert osp.isdir(osp.join(test_dir, 'src'))

        project.checkout('HEAD~1')

        assert len(project.working_tree.make_dataset()) == 1
示例#24
0
def main(args=None):
    parser = build_parser()
    args = parser.parse_args(args)

    project_path = args.project_dir
    if is_project_path(project_path):
        project = Project.load(project_path)
    else:
        project = None
    try:
        args.target = target_selector(
            ProjectTarget(is_default=True, project=project),
            SourceTarget(project=project),
            ExternalDatasetTarget(),
            ImageTarget()
        )(args.target)
        if args.target[0] == TargetKinds.project:
            if is_project_path(args.target[1]):
                args.project_dir = osp.dirname(osp.abspath(args.target[1]))
    except argparse.ArgumentTypeError as e:
        print(e)
        parser.print_help()
        return 1

    return process_command(args.target, args.params, args)
    def test_can_import(self):
        target_dataset = Dataset.from_iterable(
            [
                DatasetItem(id=1,
                            subset='train',
                            image=np.ones((16, 16, 3)),
                            annotations=[
                                Bbox(0, 4, 4, 8, label=2),
                                Bbox(0, 4, 4, 4, label=3),
                                Bbox(2, 4, 4, 4),
                            ],
                            attributes={'source_id': '1'}),
                DatasetItem(id=2,
                            subset='val',
                            image=np.ones((8, 8, 3)),
                            annotations=[
                                Bbox(1, 2, 4, 2, label=3),
                            ],
                            attributes={'source_id': '2'}),
                DatasetItem(id=3,
                            subset='test',
                            image=np.ones((5, 4, 3)) * 3,
                            attributes={'source_id': '3'}),
            ],
            categories={
                AnnotationType.label:
                LabelCategories.from_iterable('label_' + str(label)
                                              for label in range(10)),
            })

        dataset = Project.import_from(DUMMY_DATASET_DIR, 'tf_detection_api') \
            .make_dataset()

        compare_datasets(self, target_dataset, dataset)
示例#26
0
    def test_can_import(self):
        class DstExtractor(Extractor):
            def __iter__(self):
                return iter([
                    DatasetItem(id=1,
                                subset='train',
                                image=np.ones((10, 15, 3)),
                                annotations=[
                                    Bbox(0, 2, 4, 2, label=2),
                                    Bbox(3, 3, 2, 3, label=4),
                                ]),
                ])

            def categories(self):
                label_categories = LabelCategories()
                for i in range(10):
                    label_categories.add('label_' + str(i))
                return {
                    AnnotationType.label: label_categories,
                }

        dataset = Project.import_from(DUMMY_DATASET_DIR, 'yolo') \
            .make_dataset()

        compare_datasets(self, DstExtractor(), dataset)
    def test_can_import(self):
        expected_dataset = Dataset.from_iterable(
            [
                DatasetItem(id=1,
                            image=np.ones((16, 16, 3)),
                            annotations=[
                                Bbox(0,
                                     4,
                                     4,
                                     8,
                                     label=2,
                                     attributes={
                                         'occluded': False,
                                         'visibility': 1.0,
                                         'ignored': False,
                                     }),
                            ]),
            ],
            categories={
                AnnotationType.label:
                LabelCategories.from_iterable('label_' + str(label)
                                              for label in range(10)),
            })

        dataset = Project.import_from(DUMMY_DATASET_DIR, 'mot_seq') \
            .make_dataset()

        compare_datasets(self, expected_dataset, dataset)
示例#28
0
    def test_can_import(self):
        class DstExtractor(Extractor):
            def __iter__(self):
                return iter([
                    DatasetItem(id=1,
                                image=np.ones((10, 5, 3)),
                                subset='val',
                                annotations=[
                                    Polygon([0, 0, 1, 0, 1, 2, 0, 2],
                                            label=0,
                                            id=1,
                                            group=1,
                                            attributes={'is_crowd': False}),
                                    Mask(np.array([[1, 0, 0, 1, 0]] * 5 +
                                                  [[1, 1, 1, 1, 0]] * 5),
                                         label=0,
                                         id=2,
                                         group=2,
                                         attributes={'is_crowd': True}),
                                ]),
                ])

            def categories(self):
                label_cat = LabelCategories()
                label_cat.add('TEST')
                return {AnnotationType.label: label_cat}

        with TestDir() as test_dir:
            self.COCO_dataset_generate(test_dir)

            dataset = Project.import_from(test_dir, 'coco').make_dataset()

            compare_datasets(self, DstExtractor(), dataset)
示例#29
0
    def test_can_save_and_load(self):
        with TestDir() as test_dir:
            source_dataset = self.TestExtractor()

            converter = DatumaroConverter(save_images=True,
                                          apply_colormap=True)
            converter(source_dataset, test_dir.path)

            project = Project.import_from(test_dir.path, 'datumaro')
            parsed_dataset = project.make_dataset()

            self.assertListEqual(
                sorted(source_dataset.subsets()),
                sorted(parsed_dataset.subsets()),
            )

            self.assertEqual(len(source_dataset), len(parsed_dataset))

            for subset_name in source_dataset.subsets():
                source_subset = source_dataset.get_subset(subset_name)
                parsed_subset = parsed_dataset.get_subset(subset_name)
                for idx, (item_a, item_b) in enumerate(
                        zip_longest(source_subset, parsed_subset)):
                    self.assertEqual(item_a, item_b, str(idx))

            self.assertEqual(source_dataset.categories(),
                             parsed_dataset.categories())
示例#30
0
    def test_ambiguous_format(self):
        test_dir = scope_add(TestDir())

        dataset_url = osp.join(test_dir, 'source')

        # create an ambiguous dataset by merging annotations from
        # datasets in different formats
        annotation_dir = osp.join(dataset_url, 'training/street')
        assets_dir = osp.join(osp.dirname(__file__), '../assets')
        os.makedirs(annotation_dir)
        for asset in [
                'ade20k2017_dataset/dataset/training/street/1_atr.txt',
                'ade20k2020_dataset/dataset/training/street/1.json',
        ]:
            shutil.copy(osp.join(assets_dir, asset), annotation_dir)

        with self.subTest("no context"):
            with self.assertRaises(WrongRevpathError) as cm:
                parse_full_revpath(dataset_url)
            self.assertEqual({ProjectNotFoundError, MultipleFormatsMatchError},
                             set(type(e) for e in cm.exception.problems))

        proj_dir = osp.join(test_dir, 'proj')
        proj = scope_add(Project.init(proj_dir))

        with self.subTest("in context"):
            with self.assertRaises(WrongRevpathError) as cm:
                parse_full_revpath(dataset_url, proj)
            self.assertEqual({UnknownTargetError, MultipleFormatsMatchError},
                             set(type(e) for e in cm.exception.problems))