Exemplo n.º 1
0
def detect_format(dataset_dir, args):
    """try to figure out what type of dataset we may have"""

    # datumaro gets confused with 2 other formats, but the others
    # don't have the `.datumaro/config.yaml` file.
    if dataset_dir.joinpath(".datumaro", "config.yaml").exists():
        return "datumaro"

    env = Environment()
    matches = []
    for format_name in env.importers.items:
        importer = env.make_importer(format_name)
        try:
            match = importer.detect(os.fspath(dataset_dir))
            if match:
                matches.append(format_name)
        except NotImplementedError:
            pass

    if args.verbose:
        print(f"Found {matches} for {dataset_dir}")

    # everything seems to match 'image_dir'....
    matches = [m for m in matches if not m == "image_dir"]

    if len(matches) == 1:
        return matches[0]

    assert False, f"Unrecognized dataset format, {matches}"
    return None
Exemplo n.º 2
0
def import_command(args):
    project_dir = osp.abspath(args.dst_dir)

    project_env_dir = osp.join(project_dir, DEFAULT_CONFIG.env_dir)
    if osp.isdir(project_env_dir) and os.listdir(project_env_dir):
        if not args.overwrite:
            raise CliException("Directory '%s' already exists "
                               "(pass --overwrite to force creation)" %
                               project_env_dir)
        else:
            shutil.rmtree(project_env_dir, ignore_errors=True)

    own_dataset_dir = osp.join(project_dir, DEFAULT_CONFIG.dataset_dir)
    if osp.isdir(own_dataset_dir) and os.listdir(own_dataset_dir):
        if not args.overwrite:
            raise CliException("Directory '%s' already exists "
                               "(pass --overwrite to force creation)" %
                               own_dataset_dir)
        else:
            # NOTE: remove the dir to avoid using data from previous project
            shutil.rmtree(own_dataset_dir)

    project_name = args.name
    if project_name is None:
        project_name = osp.basename(project_dir)

    try:
        env = Environment()
        importer = env.make_importer(args.format)
    except KeyError:
        raise CliException("Importer for format '%s' is not found" % \
            args.format)

    extra_args = {}
    if hasattr(importer, 'from_cmdline'):
        extra_args = importer.from_cmdline(args.extra_args)

    log.info("Importing project from '%s' as '%s'" % \
        (args.source, args.format))

    source = osp.abspath(args.source)
    project = importer(source, **extra_args)
    project.config.project_name = project_name
    project.config.project_dir = project_dir

    if not args.skip_check or args.copy:
        log.info("Checking the dataset...")
        dataset = project.make_dataset()
    if args.copy:
        log.info("Cloning data...")
        dataset.save(merge=True, save_images=True)
    else:
        project.save()

    log.info("Project has been created at '%s'" % project_dir)

    return 0
Exemplo n.º 3
0
def import_command(args):
    project_dir = osp.abspath(args.dst_dir)

    project_env_dir = osp.join(project_dir, DEFAULT_CONFIG.env_dir)
    if osp.isdir(project_env_dir) and os.listdir(project_env_dir):
        if not args.overwrite:
            raise CliException("Directory '%s' already exists "
                "(pass --overwrite to overwrite)" % project_env_dir)
        else:
            shutil.rmtree(project_env_dir, ignore_errors=True)

    own_dataset_dir = osp.join(project_dir, DEFAULT_CONFIG.dataset_dir)
    if osp.isdir(own_dataset_dir) and os.listdir(own_dataset_dir):
        if not args.overwrite:
            raise CliException("Directory '%s' already exists "
                "(pass --overwrite to overwrite)" % own_dataset_dir)
        else:
            # NOTE: remove the dir to avoid using data from previous project
            shutil.rmtree(own_dataset_dir)

    project_name = args.name
    if project_name is None:
        project_name = osp.basename(project_dir)

    env = Environment()
    log.info("Importing project from '%s'" % args.source)

    extra_args = {}
    if not args.format:
        if args.extra_args:
            raise CliException("Extra args can not be used without format")

        log.info("Trying to detect dataset format...")

        matches = []
        for format_name in env.importers.items:
            log.debug("Checking '%s' format...", format_name)
            importer = env.make_importer(format_name)
            try:
                match = importer.detect(args.source)
                if match:
                    log.debug("format matched")
                    matches.append((format_name, importer))
            except NotImplementedError:
                log.debug("Format '%s' does not support auto detection.",
                    format_name)

        if len(matches) == 0:
            log.error("Failed to detect dataset format automatically. "
                "Try to specify format with '-f/--format' parameter.")
            return 1
        elif len(matches) != 1:
            log.error("Multiple formats match the dataset: %s. "
                "Try to specify format with '-f/--format' parameter.",
                ', '.join(m[0] for m in matches))
            return 2

        format_name, importer = matches[0]
        args.format = format_name
    else:
        try:
            importer = env.make_importer(args.format)
            if hasattr(importer, 'from_cmdline'):
                extra_args = importer.from_cmdline(args.extra_args)
        except KeyError:
            raise CliException("Importer for format '%s' is not found" % \
                args.format)

    log.info("Importing project as '%s'" % args.format)

    source = osp.abspath(args.source)
    project = importer(source, **extra_args)
    project.config.project_name = project_name
    project.config.project_dir = project_dir

    if not args.skip_check or args.copy:
        log.info("Checking the dataset...")
        dataset = project.make_dataset()
    if args.copy:
        log.info("Cloning data...")
        dataset.save(merge=True, save_images=True)
    else:
        project.save()

    log.info("Project has been created at '%s'" % project_dir)

    return 0
Exemplo n.º 4
0
def convert_command(args):
    env = Environment()

    try:
        converter = env.converters.get(args.output_format)
    except KeyError:
        raise CliException("Converter for format '%s' is not found" % \
            args.output_format)
    extra_args = converter.from_cmdline(args.extra_args)

    def converter_proxy(extractor, save_dir):
        return converter.convert(extractor, save_dir, **extra_args)

    filter_args = FilterModes.make_filter_args(args.filter_mode)

    if not args.input_format:
        matches = []
        for format_name in env.importers.items:
            log.debug("Checking '%s' format...", format_name)
            importer = env.make_importer(format_name)
            try:
                match = importer.detect(args.source)
                if match:
                    log.debug("format matched")
                    matches.append((format_name, importer))
            except NotImplementedError:
                log.debug("Format '%s' does not support auto detection.",
                          format_name)

        if len(matches) == 0:
            log.error(
                "Failed to detect dataset format. "
                "Try to specify format with '-if/--input-format' parameter.")
            return 1
        elif len(matches) != 1:
            log.error(
                "Multiple formats match the dataset: %s. "
                "Try to specify format with '-if/--input-format' parameter.",
                ', '.join(m[0] for m in matches))
            return 2

        format_name, importer = matches[0]
        args.input_format = format_name
        log.info("Source dataset format detected as '%s'", args.input_format)
    else:
        try:
            importer = env.make_importer(args.input_format)
            if hasattr(importer, 'from_cmdline'):
                extra_args = importer.from_cmdline()
        except KeyError:
            raise CliException("Importer for format '%s' is not found" % \
                args.input_format)

    source = osp.abspath(args.source)

    dst_dir = args.dst_dir
    if dst_dir:
        if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir):
            raise CliException("Directory '%s' already exists "
                               "(pass --overwrite to overwrite)" % dst_dir)
    else:
        dst_dir = generate_next_file_name('%s-%s' % \
            (osp.basename(source), make_file_name(args.output_format)))
    dst_dir = osp.abspath(dst_dir)

    project = importer(source)
    dataset = project.make_dataset()

    log.info("Exporting the dataset")
    dataset.export_project(save_dir=dst_dir,
                           converter=converter_proxy,
                           filter_expr=args.filter,
                           **filter_args)

    log.info("Dataset exported to '%s' as '%s'" % \
        (dst_dir, args.output_format))

    return 0