Exemplo n.º 1
0
    def test_can_release_resources_on_checkout(self, fxt_sample_video):
        test_dir = scope_add(TestDir())

        project = scope_add(Project.init(test_dir))

        src_url = osp.join(test_dir, 'src')
        src = Dataset.from_iterable([
            DatasetItem(1),
        ], categories=['a'])
        src.save(src_url)
        project.add_source(src_url, 'datumaro')
        project.commit('commit 1')

        project.remove_source('src', keep_data=False)

        project.import_source('src',
                              osp.dirname(fxt_sample_video),
                              'video_frames',
                              rpath=osp.basename(fxt_sample_video))
        project.commit('commit 2')

        assert len(project.working_tree.make_dataset()) == 4
        assert osp.isdir(osp.join(test_dir, 'src'))

        project.checkout('HEAD~1')

        assert len(project.working_tree.make_dataset()) == 1
Exemplo n.º 2
0
    def test_ambiguous_format(self):
        test_dir = scope_add(TestDir())

        dataset_url = osp.join(test_dir, 'source')

        # create an ambiguous dataset by merging annotations from
        # datasets in different formats
        annotation_dir = osp.join(dataset_url, 'training/street')
        assets_dir = osp.join(osp.dirname(__file__), '../assets')
        os.makedirs(annotation_dir)
        for asset in [
                'ade20k2017_dataset/dataset/training/street/1_atr.txt',
                'ade20k2020_dataset/dataset/training/street/1.json',
        ]:
            shutil.copy(osp.join(assets_dir, asset), annotation_dir)

        with self.subTest("no context"):
            with self.assertRaises(WrongRevpathError) as cm:
                parse_full_revpath(dataset_url)
            self.assertEqual({ProjectNotFoundError, MultipleFormatsMatchError},
                             set(type(e) for e in cm.exception.problems))

        proj_dir = osp.join(test_dir, 'proj')
        proj = scope_add(Project.init(proj_dir))

        with self.subTest("in context"):
            with self.assertRaises(WrongRevpathError) as cm:
                parse_full_revpath(dataset_url, proj)
            self.assertEqual({UnknownTargetError, MultipleFormatsMatchError},
                             set(type(e) for e in cm.exception.problems))
Exemplo n.º 3
0
def validate_command(args):
    has_sep = '--' in args._positionals
    if has_sep:
        pos = args._positionals.index('--')
        if 1 < pos:
            raise argparse.ArgumentError(
                None, message="Expected no more than 1 target argument")
    else:
        pos = 1
    args.target = (args._positionals[:pos] or ['project'])[0]
    args.extra_args = args._positionals[pos + has_sep:]

    show_plugin_help = '-h' in args.extra_args or '--help' in args.extra_args

    project = None
    try:
        project = scope_add(load_project(args.project_dir))
    except ProjectNotFoundError:
        if not show_plugin_help and args.project_dir:
            raise

    if project is not None:
        env = project.env
    else:
        env = Environment()

    try:
        validator_type = env.validators[args.task]
    except KeyError:
        raise CliException("Validator type '%s' is not found" % args.task)

    extra_args = validator_type.parse_cmdline(args.extra_args)

    dataset, target_project = parse_full_revpath(args.target, project)
    if target_project:
        scope_add(target_project)

    dst_file_name = f'validation-report'
    if args.subset_name is not None:
        dataset = dataset.get_subset(args.subset_name)
        dst_file_name += f'-{args.subset_name}'

    validator = validator_type(**extra_args)
    report = validator.validate(dataset)

    def _make_serializable(d):
        for key, val in list(d.items()):
            # tuple key to str
            if isinstance(key, tuple):
                d[str(key)] = val
                d.pop(key)
            if isinstance(val, dict):
                _make_serializable(val)

    _make_serializable(report)

    dst_file = generate_next_file_name(dst_file_name, ext='.json')
    log.info("Writing project validation results to '%s'" % dst_file)
    dump_json_file(dst_file, report, indent=True, allow_numpy=True)
Exemplo n.º 4
0
def info_command(args):
    project = None
    try:
        project = scope_add(load_project(args.project_dir))
    except ProjectNotFoundError:
        if args.project_dir:
            raise

    try:
        # TODO: avoid computing working tree hashes
        dataset, target_project = parse_full_revpath(args.target, project)
        if target_project:
            scope_add(target_project)
    except DatasetMergeError as e:
        dataset = None
        dataset_problem = "Can't merge project sources automatically: %s " \
            "Conflicting sources are: %s" % (e, ', '.join(e.sources))
    except MissingObjectError as e:
        dataset = None
        dataset_problem = str(e)

    def print_dataset_info(dataset, indent=''):
        print("%slength:" % indent, len(dataset))

        categories = dataset.categories()
        print("%scategories:" % indent, ', '.join(c.name for c in categories))

        for cat_type, cat in categories.items():
            print("%s  %s:" % (indent, cat_type.name))
            if cat_type == AnnotationType.label:
                print("%s    count:" % indent, len(cat.items))

                count_threshold = 10
                if args.all:
                    count_threshold = len(cat.items)
                labels = ', '.join(c.name for c in cat.items[:count_threshold])
                if count_threshold < len(cat.items):
                    labels += " (and %s more)" % (len(cat.items) -
                                                  count_threshold)
                print("%s    labels:" % indent, labels)

    if dataset is not None:
        print_dataset_info(dataset)

        subsets = dataset.subsets()
        print("subsets:", ', '.join(subsets))
        for subset_name in subsets:
            subset = dataset.get_subset(subset_name)
            print("  '%s':" % subset_name)
            print_dataset_info(subset, indent="    ")
    else:
        print("Dataset info is not available: ", dataset_problem)

    return 0
Exemplo n.º 5
0
def add_command(args):
    show_plugin_help = '-h' in args.extra_args or '--help' in args.extra_args

    project = None
    try:
        project = scope_add(load_project(args.project_dir))
    except ProjectNotFoundError:
        if not show_plugin_help and args.project_dir:
            raise

    if project is not None:
        env = project.env
    else:
        env = Environment()

    name = args.name
    if name:
        if name in project.models:
            raise CliException("Model '%s' already exists" % name)
    else:
        name = generate_next_name(list(project.models),
                                  'model',
                                  sep='-',
                                  default=0)

    try:
        launcher = env.launchers[args.launcher]
    except KeyError:
        raise CliException("Launcher '%s' is not found" % args.launcher)

    cli_plugin = getattr(launcher, 'cli_plugin', launcher)
    model_args = cli_plugin.parse_cmdline(args.extra_args)

    if args.copy:
        log.info("Copying model data")

        model_dir = project.model_data_dir(name)
        os.makedirs(model_dir, exist_ok=False)
        on_error_do(rmtree, model_dir, ignore_errors=True)

        try:
            cli_plugin.copy_model(model_dir, model_args)
        except (AttributeError, NotImplementedError):
            raise NotImplementedError(
                "Can't copy: copying is not available for '%s' models. " %
                args.launcher)

    project.add_model(name, launcher=args.launcher, options=model_args)
    on_error_do(project.remove_model, name, ignore_errors=True)

    if not args.no_check:
        log.info("Checking the model...")
        project.make_model(name)

    project.save()

    log.info("Model '%s' with launcher '%s' has been added to project", name,
             args.launcher)

    return 0
Exemplo n.º 6
0
def remove_command(args):
    project = scope_add(load_project(args.project_dir))

    project.remove_model(args.name)
    project.save()

    return 0
Exemplo n.º 7
0
def detect_format_command(args):
    project = None
    try:
        project = scope_add(load_project(args.project_dir))
    except ProjectNotFoundError:
        if args.project_dir:
            raise

    if project is not None:
        env = project.env
    else:
        env = Environment()

    report = {'rejected_formats': {}}

    def rejection_callback(
        format_name: str,
        reason: RejectionReason,
        human_message: str,
    ):
        report['rejected_formats'][format_name] = {
            'reason': reason.name,
            'message': human_message,
        }

    detected_formats = detect_dataset_format(
        ((format_name, importer.detect)
         for format_name, importer in env.importers.items.items()),
        args.url,
        rejection_callback=rejection_callback,
    )
    report['detected_formats'] = detected_formats

    if len(detected_formats) == 1:
        print(f"Detected format: {detected_formats[0]}")
    elif len(detected_formats) == 0:
        print("Unable to detect the format")
    else:
        print("Ambiguous dataset; detected the following formats:")
        print()
        for format_name in sorted(detected_formats):
            print(f"- {format_name}")

    if args.show_rejections:
        print()
        if report['rejected_formats']:
            print("The following formats were rejected:")
            print()

            for format_name, rejection in sorted(
                    report['rejected_formats'].items()):
                print(f"{format_name}:")
                for line in rejection['message'].split('\n'):
                    print(f"  {line}")
        else:
            print("No formats were rejected.")

    if args.json_report:
        dump_json_file(args.json_report, report, indent=True)
Exemplo n.º 8
0
    def test_can_release_resources_on_remove(self, fxt_sample_video):
        test_dir = scope_add(TestDir())

        project = scope_add(Project.init(test_dir))

        project.import_source('src',
                              osp.dirname(fxt_sample_video),
                              'video_frames',
                              rpath=osp.basename(fxt_sample_video))
        project.commit('commit 1')

        assert len(project.working_tree.make_dataset()) == 4
        assert osp.isdir(osp.join(test_dir, 'src'))

        project.remove_source('src', keep_data=False)

        assert not osp.exists(osp.join(test_dir, 'src'))
Exemplo n.º 9
0
def info_command(args):
    project = scope_add(load_project(args.project_dir))

    if args.name:
        print(project.models[args.name])
    else:
        for name, conf in project.models.items():
            print(name)
            if args.verbose:
                print(dict(conf))
Exemplo n.º 10
0
def patch_command(args):
    project = None
    try:
        project = scope_add(load_project(args.project_dir))
    except ProjectNotFoundError:
        if args.project_dir:
            raise

    if project is not None:
        env = project.env
    else:
        env = Environment()

    target_dataset, _project = parse_full_revpath(args.target, project)
    if _project is not None:
        scope_add(_project)

    try:
        converter = env.converters[target_dataset.format]
    except KeyError:
        raise CliException("Converter for format '%s' is not found" % \
            args.format)

    extra_args = converter.parse_cmdline(args.extra_args)

    dst_dir = args.dst_dir or target_dataset.data_path
    if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir):
        raise CliException("Directory '%s' already exists "
                           "(pass --overwrite to overwrite)" % dst_dir)
    dst_dir = osp.abspath(dst_dir)

    patch_dataset, _project = parse_full_revpath(args.patch, project)
    if _project is not None:
        scope_add(_project)

    target_dataset.update(patch_dataset)

    target_dataset.save(save_dir=dst_dir, **extra_args)

    log.info("Patched dataset has been saved to '%s'" % dst_dir)

    return 0
Exemplo n.º 11
0
def log_command(args):
    project = scope_add(load_project(args.project_dir))

    revisions = project.history(args.max_count)
    if revisions:
        for rev, message in revisions:
            print('%s %s' % (rev, message))
    else:
        print("(Project history is empty)")

    return 0
Exemplo n.º 12
0
def info_command(args):
    project = scope_add(load_project(args.project_dir))

    if args.name:
        source = project.working_tree.sources[args.name]
        print(source)
    else:
        for name, conf in project.working_tree.sources.items():
            print(name)
            if args.verbose:
                print(conf)
Exemplo n.º 13
0
    def test_can_chain_transforms_in_working_tree_without_hashing(self):
        test_dir = scope_add(TestDir())
        source_url = osp.join(test_dir, 'test_repo')
        dataset = Dataset.from_iterable([
            DatasetItem(1, annotations=[Label(0)]),
            DatasetItem(2, annotations=[Label(1)]),
        ],
                                        categories=['a', 'b'])
        dataset.save(source_url)

        project_dir = osp.join(test_dir, 'proj')
        run(self, 'create', '-o', project_dir)
        run(self, 'import', '-p', project_dir, '-n', 'source1', '--format',
            DEFAULT_FORMAT, source_url)
        run(self, 'filter', '-p', project_dir, '-e',
            '/item/annotation[label="b"]')
        run(self, 'transform', '-p', project_dir, '-t', 'rename', '--', '-e',
            '|2|qq|')
        run(self, 'transform', '-p', project_dir, '-t', 'remap_labels', '--',
            '-l', 'a:cat', '-l', 'b:dog')

        project = scope_add(Project(project_dir))
        built_dataset = project.working_tree.make_dataset()

        expected_dataset = Dataset.from_iterable([
            DatasetItem('qq', annotations=[Label(1)]),
        ],
                                                 categories=['cat', 'dog'])
        compare_datasets(self, expected_dataset, built_dataset)

        with self.assertRaises(Exception):
            compare_dirs(self, source_url, project.source_data_dir('source1'))

        source1_target = project.working_tree.build_targets['source1']
        self.assertEqual(4, len(source1_target.stages))
        self.assertEqual('', source1_target.stages[0].hash)
        self.assertEqual('', source1_target.stages[1].hash)
        self.assertEqual('', source1_target.stages[2].hash)
Exemplo n.º 14
0
def remove_command(args):
    project = scope_add(load_project(args.project_dir))

    if not args.names:
        raise CliException("Expected source name")

    for name in args.names:
        project.remove_source(name, force=args.force, keep_data=args.keep_data)
    project.working_tree.save()

    log.info("Sources '%s' have been removed from the project" % \
        ', '.join(args.names))

    return 0
Exemplo n.º 15
0
def run_command(args):
    dst_dir = args.dst_dir
    if dst_dir:
        if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir):
            raise CliException("Directory '%s' already exists "
                               "(pass --overwrite overwrite)" % dst_dir)
    else:
        dst_dir = generate_next_file_name('%s-inference' % args.model_name)
    dst_dir = osp.abspath(dst_dir)

    project = scope_add(load_project(args.project_dir))

    dataset, target_project = parse_full_revpath(args.target, project)
    if target_project:
        scope_add(target_project)

    model = project.make_model(args.model_name)
    inference = dataset.run_model(model)
    inference.save(dst_dir)

    log.info("Inference results have been saved to '%s'" % dst_dir)

    return 0
Exemplo n.º 16
0
    def test_can_split_video(self):
        on_exit_do(MediaManager.get_instance().clear)

        test_dir = scope_add(TestDir())
        video_path = osp.join(test_dir, 'video.avi')
        make_sample_video(video_path, frames=10)

        output_dir = osp.join(test_dir, 'result')

        run(TestCase(), 'util', 'split_video',
            '-i', video_path, '-o', output_dir, '--image-ext', '.jpg',
            '--start-frame', '2', '--end-frame', '8', '--step', '2')

        assert set(os.listdir(output_dir)) == {'%06d.jpg' % n
            for n in range(2, 8, 2)}
Exemplo n.º 17
0
def stats_command(args):
    project = None
    try:
        project = scope_add(load_project(args.project_dir))
    except ProjectNotFoundError:
        if args.project_dir:
            raise

    dataset, target_project = parse_full_revpath(args.target, project)
    if target_project:
        scope_add(target_project)

    if args.subset:
        dataset = dataset.get_subset(args.subset)

    stats = {}
    if args.image_stats:
        stats.update(compute_image_statistics(dataset))
    if args.ann_stats:
        stats.update(compute_ann_statistics(dataset))

    dst_file = generate_next_file_name('statistics', ext='.json')
    log.info("Writing project statistics to '%s'" % dst_file)
    dump_json_file(dst_file, stats, indent=True)
Exemplo n.º 18
0
def status_command(args):
    project = scope_add(load_project(args.project_dir))

    statuses = project.status()

    if project.branch:
        print("On branch '%s', commit %s" % (project.branch, project.head_rev))
    else:
        print("HEAD is detached at commit %s" % project.head_rev)

    if statuses:
        for target, status in statuses.items():
            print('%s\t%s' % (status.name, target))
    else:
        print("Working directory clean")

    return 0
Exemplo n.º 19
0
    def test_can_split_and_load(self, fxt_sample_video):
        test_dir = scope_add(TestDir())
        on_exit_do(MediaManager.get_instance().clear)

        expected = Dataset.from_iterable([
            DatasetItem('frame_%06d' % i, image=np.ones((4, 6, 3)) * i)
            for i in range(4)
        ])

        dataset = Dataset.import_from(fxt_sample_video,
                                      'video_frames',
                                      start_frame=0,
                                      end_frame=4,
                                      name_pattern='frame_%06d')
        dataset.export(format='image_dir', save_dir=test_dir, image_ext='.jpg')

        actual = Dataset.import_from(test_dir, 'image_dir')
        compare_datasets(TestCase(), expected, actual)
Exemplo n.º 20
0
def commit_command(args):
    project = scope_add(load_project(args.project_dir))

    old_tree = project.head

    new_commit = project.commit(args.message,
                                allow_empty=args.allow_empty,
                                allow_foreign=args.allow_foreign,
                                no_cache=args.no_cache)

    new_tree = project.working_tree
    diff = project.diff(old_tree, new_tree)

    print("Moved to commit '%s' %s" % (new_commit, args.message))
    print(" %s targets changed" % len(diff))
    for t, s in diff.items():
        print(" %s %s" % (s.name, t))

    return 0
Exemplo n.º 21
0
def info_command(args):
    project = scope_add(load_project(args.project_dir))
    rev = project.get_rev(args.revision)
    env = rev.env

    print("Project:")
    print("  location:", project._root_dir)
    print("Plugins:")
    print("  extractors:",
          ', '.join(sorted(set(env.extractors) | set(env.importers))))
    print("  converters:", ', '.join(env.converters))
    print("  launchers:", ', '.join(env.launchers))

    print("Models:")
    for model_name, model in project.models.items():
        print("  model '%s':" % model_name)
        print("    type:", model.launcher)

    print("Sources:")
    for source_name, source in rev.sources.items():
        print("  '%s':" % source_name)
        print("    format:", source.format)
        print("    url:", osp.abspath(source.url) if source.url else '')
        print(
            "    location:",
            osp.abspath(
                osp.join(project.source_data_dir(source_name), source.path)))
        print("    options:", source.options)

        print("    stages:")
        for stage in rev.build_targets[source_name].stages:
            print("      '%s':" % stage.name)
            print("        type:", stage.type)
            print("        hash:", stage.hash)
            print("        cached:",
                  project.is_obj_cached(stage.hash) if stage.hash else 'n/a')
            if stage.kind:
                print("        kind:", stage.kind)
            if stage.params:
                print("        parameters:", stage.params)

    return 0
Exemplo n.º 22
0
    def test_can_transform_dataset_inplace(self):
        test_dir = scope_add(TestDir())
        Dataset.from_iterable([
            DatasetItem(1, annotations=[Label(0)]),
            DatasetItem(2, annotations=[Label(1)]),
        ],
                              categories=['a', 'b']).export(test_dir, 'coco')

        run(self, 'transform', '-t', 'remap_labels', '--overwrite',
            test_dir + ':coco', '--', '-l', 'a:cat', '-l', 'b:dog')

        expected_dataset = Dataset.from_iterable([
            DatasetItem(1, annotations=[Label(0, id=1, group=1)]),
            DatasetItem(2, annotations=[Label(1, id=2, group=2)]),
        ],
                                                 categories=['cat', 'dog'])
        compare_datasets(self,
                         expected_dataset,
                         Dataset.import_from(test_dir, 'coco'),
                         ignored_attrs='*')
Exemplo n.º 23
0
def checkout_command(args):
    has_sep = '--' in args._positionals
    if has_sep:
        pos = args._positionals.index('--')
        if 1 < pos:
            raise argparse.ArgumentError(
                None, message="Expected no more than 1 revision argument")
    else:
        pos = 1
    args.rev = (args._positionals[:pos] or [None])[0]
    args.sources = args._positionals[pos + has_sep:]
    if has_sep and not args.sources:
        raise argparse.ArgumentError(
            'sources',
            message="When '--' is used, "
            "at least 1 source name must be specified")

    project = scope_add(load_project(args.project_dir))

    project.checkout(rev=args.rev, sources=args.sources, force=args.force)

    return 0
Exemplo n.º 24
0
def explain_command(args):
    from matplotlib import cm
    import cv2

    project = scope_add(load_project(args.project_dir))

    model = project.working_tree.models.make_executable_model(args.model)

    if str(args.algorithm).lower() != 'rise':
        raise NotImplementedError()

    from datumaro.components.algorithms.rise import RISE
    rise = RISE(model,
                max_samples=args.max_samples,
                mask_width=args.mask_width,
                mask_height=args.mask_height,
                prob=args.prob,
                iou_thresh=args.iou_thresh,
                nms_thresh=args.nms_iou_thresh,
                det_conf_thresh=args.det_conf_thresh,
                batch_size=args.batch_size)

    if args.target and is_image(args.target):
        image_path = args.target
        image = load_image(image_path)

        log.info("Running inference explanation for '%s'" % image_path)
        heatmap_iter = rise.apply(image, progressive=args.display)

        image = image / 255.0
        file_name = osp.splitext(osp.basename(image_path))[0]
        if args.display:
            for i, heatmaps in enumerate(heatmap_iter):
                for j, heatmap in enumerate(heatmaps):
                    hm_painted = cm.jet(heatmap)[:, :, 2::-1]
                    disp = (image + hm_painted) / 2
                    cv2.imshow('heatmap-%s' % j, hm_painted)
                    cv2.imshow(file_name + '-heatmap-%s' % j, disp)
                cv2.waitKey(10)
                print("Iter", i, "of", args.max_samples, end='\r')
        else:
            heatmaps = next(heatmap_iter)

        if args.save_dir is not None:
            log.info("Saving inference heatmaps at '%s'" % args.save_dir)
            os.makedirs(args.save_dir, exist_ok=True)

            for j, heatmap in enumerate(heatmaps):
                save_path = osp.join(args.save_dir,
                                     file_name + '-heatmap-%s.png' % j)
                save_image(save_path, heatmap * 255.0)
        else:
            for j, heatmap in enumerate(heatmaps):
                disp = (image + cm.jet(heatmap)[:, :, 2::-1]) / 2
                cv2.imshow(file_name + '-heatmap-%s' % j, disp)
            cv2.waitKey(0)

    else:
        dataset, target_project = \
            parse_full_revpath(args.target or 'project', project)
        if target_project:
            scope_add(target_project)

        log.info("Running inference explanation for '%s'" % args.target)

        for item in dataset:
            image = item.image.data
            if image is None:
                log.warning("Item %s does not have image data. Skipping.",
                            item.id)
                continue

            heatmap_iter = rise.apply(image)

            image = image / 255.0
            heatmaps = next(heatmap_iter)

            if args.save_dir is not None:
                log.info("Saving inference heatmaps to '%s'" % args.save_dir)
                os.makedirs(args.save_dir, exist_ok=True)

                for j, heatmap in enumerate(heatmaps):
                    save_image(osp.join(args.save_dir,
                                        item.id + '-heatmap-%s.png' % j),
                               heatmap * 255.0,
                               create_dir=True)

            if not args.save_dir or args.display:
                for j, heatmap in enumerate(heatmaps):
                    disp = (image + cm.jet(heatmap)[:, :, 2::-1]) / 2
                    cv2.imshow(item.id + '-heatmap-%s' % j, disp)
                cv2.waitKey(0)

    return 0
Exemplo n.º 25
0
def add_command(args):
    # Workaround. Required positionals consume positionals from the end
    args._positionals += join_cli_args(args, 'path', 'extra_args')

    has_sep = '--' in args._positionals
    if has_sep:
        pos = args._positionals.index('--')
    else:
        pos = 1
    args.path = (args._positionals[:pos] or [''])[0]
    args.extra_args = args._positionals[pos + has_sep:]

    show_plugin_help = '-h' in args.extra_args or '--help' in args.extra_args

    project = None
    try:
        project = scope_add(load_project(args.project_dir))
    except ProjectNotFoundError:
        if not show_plugin_help:
            raise

    if project is not None:
        env = project.env
    else:
        env = Environment()

    fmt = args.format
    if fmt in env.importers:
        arg_parser = env.importers[fmt]
    elif fmt in env.extractors:
        arg_parser = env.extractors[fmt]
    else:
        raise CliException("Unknown format '%s'. A format can be added"
                           " by providing an Extractor and Importer plugins" %
                           fmt)

    extra_args = arg_parser.parse_cmdline(args.extra_args)

    if fmt == 'video_frames':
        show_video_import_warning()

    name, _ = project.add_source(args.path,
                                 format=args.format,
                                 options=extra_args,
                                 rpath=args.rpath)
    on_error_do(project.remove_source,
                name,
                ignore_errors=True,
                kwargs={
                    'force': True,
                    'keep_data': True
                })

    if not args.no_check:
        log.info("Checking the source...")
        project.working_tree.make_dataset(name)

    project.working_tree.save()

    log.info("Source '%s' with format '%s' has been added to the project",
             name, args.format)

    return 0
Exemplo n.º 26
0
    def test_can_parse(self):
        test_dir = scope_add(TestDir())

        dataset_url = osp.join(test_dir, 'source')
        Dataset.from_iterable([DatasetItem(1)]).save(dataset_url)

        proj_dir = osp.join(test_dir, 'proj')
        proj = scope_add(Project.init(proj_dir))
        proj.import_source('source-1', dataset_url, format=DEFAULT_FORMAT)
        ref = proj.commit("second commit", allow_empty=True)

        with self.subTest("project"):
            dataset, project = parse_full_revpath(proj_dir)
            if project:
                scope_add(project)
            self.assertTrue(isinstance(dataset, IDataset))
            self.assertTrue(isinstance(project, Project))

        with self.subTest("project ref"):
            dataset, project = parse_full_revpath(f"{proj_dir}@{ref}")
            if project:
                scope_add(project)
            self.assertTrue(isinstance(dataset, IDataset))
            self.assertTrue(isinstance(project, Project))

        with self.subTest("project ref source"):
            dataset, project = parse_full_revpath(f"{proj_dir}@{ref}:source-1")
            if project:
                scope_add(project)
            self.assertTrue(isinstance(dataset, IDataset))
            self.assertTrue(isinstance(project, Project))

        with self.subTest("project ref source stage"):
            dataset, project = parse_full_revpath(
                f"{proj_dir}@{ref}:source-1.root")
            if project:
                scope_add(project)
            self.assertTrue(isinstance(dataset, IDataset))
            self.assertTrue(isinstance(project, Project))

        with self.subTest("ref"):
            dataset, project = parse_full_revpath(ref, proj)
            if project:
                scope_add(project)
            self.assertTrue(isinstance(dataset, IDataset))
            self.assertEqual(None, project)

        with self.subTest("ref source"):
            dataset, project = parse_full_revpath(f"{ref}:source-1", proj)
            if project:
                scope_add(project)
            self.assertTrue(isinstance(dataset, IDataset))
            self.assertEqual(None, project)

        with self.subTest("ref source stage"):
            dataset, project = parse_full_revpath(f"{ref}:source-1.root", proj)
            if project:
                scope_add(project)
            self.assertTrue(isinstance(dataset, IDataset))
            self.assertEqual(None, project)

        with self.subTest("source"):
            dataset, project = parse_full_revpath("source-1", proj)
            if project:
                scope_add(project)
            self.assertTrue(isinstance(dataset, IDataset))
            self.assertEqual(None, project)

        with self.subTest("source stage"):
            dataset, project = parse_full_revpath("source-1.root", proj)
            if project:
                scope_add(project)
            self.assertTrue(isinstance(dataset, IDataset))
            self.assertEqual(None, project)

        with self.subTest("dataset (in context)"):
            dataset, project = parse_full_revpath(dataset_url, proj)
            if project:
                scope_add(project)
            self.assertTrue(isinstance(dataset, IDataset))
            self.assertEqual(None, project)

        with self.subTest("dataset format (in context)"):
            dataset, project = parse_full_revpath(f"{dataset_url}:datumaro",
                                                  proj)
            if project:
                scope_add(project)
            self.assertTrue(isinstance(dataset, IDataset))
            self.assertEqual(None, project)

        with self.subTest("dataset (no context)"):
            dataset, project = parse_full_revpath(dataset_url)
            if project:
                scope_add(project)
            self.assertTrue(isinstance(dataset, IDataset))
            self.assertEqual(None, project)

        with self.subTest("dataset format (no context)"):
            dataset, project = parse_full_revpath(f"{dataset_url}:datumaro")
            if project:
                scope_add(project)
            self.assertTrue(isinstance(dataset, IDataset))
            self.assertEqual(None, project)
Exemplo n.º 27
0
def diff_command(args):
    dst_dir = args.dst_dir
    if dst_dir:
        if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir):
            raise CliException("Directory '%s' already exists "
                               "(pass --overwrite to overwrite)" % dst_dir)
    else:
        dst_dir = generate_next_file_name('diff')
    dst_dir = osp.abspath(dst_dir)

    if not osp.exists(dst_dir):
        on_error_do(rmtree, dst_dir, ignore_errors=True)
        os.makedirs(dst_dir)

    project = None
    try:
        project = scope_add(load_project(args.project_dir))
    except ProjectNotFoundError:
        if args.project_dir:
            raise

    try:
        if not args.second_target:
            first_dataset = project.working_tree.make_dataset()
            second_dataset, target_project = \
                parse_full_revpath(args.first_target, project)
            if target_project:
                scope_add(target_project)
        else:
            first_dataset, target_project = \
                parse_full_revpath(args.first_target, project)
            if target_project:
                scope_add(target_project)

            second_dataset, target_project = \
                parse_full_revpath(args.second_target, project)
            if target_project:
                scope_add(target_project)
    except Exception as e:
        raise CliException(str(e))

    if args.method is ComparisonMethod.equality:
        if args.ignore_field:
            args.ignore_field = eq_default_if
        comparator = ExactComparator(match_images=args.match_images,
                                     ignored_fields=args.ignore_field,
                                     ignored_attrs=args.ignore_attr,
                                     ignored_item_attrs=args.ignore_item_attr)
        matches, mismatches, a_extra, b_extra, errors = \
            comparator.compare_datasets(first_dataset, second_dataset)

        output = {
            "mismatches": mismatches,
            "a_extra_items": sorted(a_extra),
            "b_extra_items": sorted(b_extra),
            "errors": errors,
        }
        if args.all:
            output["matches"] = matches

        output_file = osp.join(
            dst_dir,
            generate_next_file_name('diff', ext='.json', basedir=dst_dir))
        log.info("Saving diff to '%s'" % output_file)
        dump_json_file(output_file, output, indent=True)

        print("Found:")
        print("The first project has %s unmatched items" % len(a_extra))
        print("The second project has %s unmatched items" % len(b_extra))
        print("%s item conflicts" % len(errors))
        print("%s matching annotations" % len(matches))
        print("%s mismatching annotations" % len(mismatches))
    elif args.method is ComparisonMethod.distance:
        comparator = DistanceComparator(iou_threshold=args.iou_thresh)

        with DiffVisualizer(save_dir=dst_dir,
                            comparator=comparator,
                            output_format=args.format) as visualizer:
            log.info("Saving diff to '%s'" % dst_dir)
            visualizer.save(first_dataset, second_dataset)

    return 0
Exemplo n.º 28
0
def export_command(args):
    has_sep = '--' in args._positionals
    if has_sep:
        pos = args._positionals.index('--')
        if 1 < pos:
            raise argparse.ArgumentError(
                None, message="Expected no more than 1 target argument")
    else:
        pos = 1
    args.target = (args._positionals[:pos] or \
        [ProjectBuildTargets.MAIN_TARGET])[0]
    args.extra_args = args._positionals[pos + has_sep:]

    show_plugin_help = '-h' in args.extra_args or '--help' in args.extra_args

    project = None
    try:
        project = scope_add(load_project(args.project_dir))
    except ProjectNotFoundError:
        if not show_plugin_help:
            raise

    if project is not None:
        env = project.env
    else:
        env = Environment()

    try:
        converter = env.converters[args.format]
    except KeyError:
        raise CliException("Converter for format '%s' is not found" % \
            args.format)

    extra_args = converter.parse_cmdline(args.extra_args)

    dst_dir = args.dst_dir
    if dst_dir:
        if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir):
            raise CliException("Directory '%s' already exists "
                               "(pass --overwrite to overwrite)" % dst_dir)
    else:
        dst_dir = generate_next_file_name('export-%s' % \
            make_file_name(args.format))
    dst_dir = osp.abspath(dst_dir)

    if args.filter:
        filter_args = FilterModes.make_filter_args(args.filter_mode)
        filter_expr = args.filter

    log.info("Loading the project...")

    dataset = project.working_tree.make_dataset(args.target)
    if args.filter:
        dataset.filter(filter_expr, **filter_args)

    log.info("Exporting...")

    dataset.export(save_dir=dst_dir, format=converter, **extra_args)

    log.info("Results have been saved to '%s'" % dst_dir)

    return 0
Exemplo n.º 29
0
def filter_command(args):
    project = None
    try:
        project = scope_add(load_project(args.project_dir))
    except ProjectNotFoundError:
        if args.project_dir:
            raise

    filter_args = FilterModes.make_filter_args(args.mode)
    filter_expr = args.filter

    if args.dry_run:
        dataset, _project = parse_full_revpath(args.target, project)
        if _project:
            scope_add(_project)

        dataset = dataset.filter(expr=filter_expr, **filter_args)

        for item in dataset:
            encoded_item = DatasetItemEncoder.encode(item,
                                                     dataset.categories())
            xml_item = DatasetItemEncoder.to_string(encoded_item)
            print(xml_item)
        return 0

    if not args.filter:
        raise CliException("Expected a filter expression ('-e' argument)")

    is_target = project is not None and \
        args.target in project.working_tree.build_targets
    if is_target:
        if not args.dst_dir and args.stage and (args.target != \
                ProjectBuildTargets.strip_target_name(args.target)):
            raise CliException("Adding a stage is only allowed for "
                               "project targets, not their stages.")

        if args.target == ProjectBuildTargets.MAIN_TARGET:
            targets = list(project.working_tree.sources)
        else:
            targets = [args.target]

        build_tree = project.working_tree.clone()
        for target in targets:
            build_tree.build_targets.add_filter_stage(target,
                                                      expr=filter_expr,
                                                      params=filter_args)

    if args.apply:
        log.info("Filtering...")

        if is_target and not args.dst_dir:
            for target in targets:
                dataset = project.working_tree.make_dataset(
                    build_tree.make_pipeline(target))

                # Source might be missing in the working dir, so we specify
                # the output directory.
                # We specify save_images here as a heuristic. It can probably
                # be improved by checking if there are images in the dataset
                # directory.
                dataset.save(project.source_data_dir(target), save_images=True)

            log.info("Finished")
        else:
            dataset, _project = parse_full_revpath(args.target, project)
            if _project:
                scope_add(_project)

            dst_dir = args.dst_dir or dataset.data_path
            if not args.overwrite and osp.isdir(dst_dir) and os.listdir(
                    dst_dir):
                raise CliException("Directory '%s' already exists "
                                   "(pass --overwrite to overwrite)" % dst_dir)
            dst_dir = osp.abspath(dst_dir)

            dataset.filter(filter_expr, *filter_args)
            dataset.save(dst_dir, save_images=True)

            log.info("Results have been saved to '%s'" % dst_dir)

    if is_target and args.stage:
        project.working_tree.config.update(build_tree.config)
        project.working_tree.save()

    return 0
Exemplo n.º 30
0
def transform_command(args):
    has_sep = '--' in args._positionals
    if has_sep:
        pos = args._positionals.index('--')
        if 1 < pos:
            raise argparse.ArgumentError(
                None, message="Expected no more than 1 target argument")
    else:
        pos = 1
    args.target = (args._positionals[:pos] or \
        [ProjectBuildTargets.MAIN_TARGET])[0]
    args.extra_args = args._positionals[pos + has_sep:]

    show_plugin_help = '-h' in args.extra_args or '--help' in args.extra_args

    project = None
    try:
        project = scope_add(load_project(args.project_dir))
    except ProjectNotFoundError:
        if not show_plugin_help and args.project_dir:
            raise

    if project is not None:
        env = project.env
    else:
        env = Environment()

    try:
        transform = env.transforms[args.transform]
    except KeyError:
        raise CliException("Transform '%s' is not found" % args.transform)

    extra_args = transform.parse_cmdline(args.extra_args)

    is_target = project is not None and \
        args.target in project.working_tree.build_targets
    if is_target:
        if not args.dst_dir and args.stage and (args.target != \
                ProjectBuildTargets.strip_target_name(args.target)):
            raise CliException("Adding a stage is only allowed for "
                               "project targets, not their stages.")

        if args.target == ProjectBuildTargets.MAIN_TARGET:
            targets = list(project.working_tree.sources)
        else:
            targets = [args.target]

        build_tree = project.working_tree.clone()
        for target in targets:
            build_tree.build_targets.add_transform_stage(target,
                                                         args.transform,
                                                         params=extra_args)

    if args.apply:
        log.info("Transforming...")

        if is_target and not args.dst_dir:
            for target in targets:
                dataset = project.working_tree.make_dataset(
                    build_tree.make_pipeline(target))

                # Source might be missing in the working dir, so we specify
                # the output directory
                # We specify save_images here as a heuristic. It can probably
                # be improved by checking if there are images in the dataset
                # directory.
                dataset.save(project.source_data_dir(target), save_images=True)

            log.info("Finished")
        else:
            dataset, _project = parse_full_revpath(args.target, project)
            if _project:
                scope_add(_project)

            dst_dir = args.dst_dir or dataset.data_path
            if not args.overwrite and osp.isdir(dst_dir) and os.listdir(
                    dst_dir):
                raise CliException("Directory '%s' already exists "
                                   "(pass --overwrite to overwrite)" % dst_dir)
            dst_dir = osp.abspath(dst_dir)

            dataset.transform(args.transform, **extra_args)
            dataset.save(dst_dir, save_images=True)

            log.info("Results have been saved to '%s'" % dst_dir)

    if is_target and args.stage:
        project.working_tree.config.update(build_tree.config)
        project.working_tree.save()

    return 0