Пример #1
    def import_from(cls,
                    path: str,
                    format: str = None,
                    env: Environment = None,
                    **kwargs) -> 'Dataset':
        from datumaro.components.config_model import Source

        if env is None:
            env = Environment()

        if not format:
            format = cls.detect(path, env)

        # TODO: remove importers, put this logic into extractors
        if format in env.importers:
            importer = env.make_importer(format)
            with logging_disabled(log.INFO):
                project = importer(path, **kwargs)
            detected_sources = list(project.config.sources.values())
        elif format in env.extractors:
            detected_sources = [{
                'url': path,
                'format': format,
                'options': kwargs
            raise DatumaroError(
                "Unknown source format '%s'. To make it "
                "available, add the corresponding Extractor implementation "
                "to the environment" % format)

        extractors = []
        for src_conf in detected_sources:
            if not isinstance(src_conf, Source):
                src_conf = Source(src_conf)
                env.make_extractor(src_conf.format, src_conf.url,

        dataset = cls.from_extractors(*extractors, env=env)
        dataset._source_path = path
        dataset._format = format
        return dataset
Пример #2
 def detect(cls, path):
     with logging_disabled(log.WARN):
         return len(cls.find_subsets(path)) != 0
Пример #3
    def import_from(cls,
                    path: str,
                    format: Optional[str] = None,
                    env: Optional[Environment] = None,
                    progress_reporter: Optional[ProgressReporter] = None,
                    error_policy: Optional[ImportErrorPolicy] = None,
                    **kwargs) -> Dataset:
        Creates a `Dataset` instance from a dataset on the disk.

            path - The input file or directory path
            format - Dataset format.
                If a string is passed, it is treated as a plugin name,
                which is searched for in the `env` plugin context.
                If not set, will try to detect automatically,
                using the `env` plugin context.
            env - A plugin collection. If not set, the built-in plugins are used
            progress_reporter - An object to report progress.
                Implies earger loading.
            error_policy - An object to report format-related errors.
                Implies earger loading.
            **kwargs - Parameters for the format

        if env is None:
            env = Environment()

        if not format:
            format = cls.detect(path, env=env)

        # TODO: remove importers, put this logic into extractors
        if format in env.importers:
            importer = env.make_importer(format)
            with logging_disabled(log.INFO):
                detected_sources = importer(path, **kwargs)
        elif format in env.extractors:
            detected_sources = [{
                'url': path,
                'format': format,
                'options': kwargs
            raise UnknownFormatError(format)

        # TODO: probably, should not be available in lazy mode, because it
        # becomes unreliable and error-prone. For progress reporting it
        # makes little sense, because loading stage is spread over other
        # operations. Error reporting is going to be unreliable.
        has_ctx_args = progress_reporter is not None or error_policy is not None
        eager = has_ctx_args

        if not progress_reporter:
            progress_reporter = NullProgressReporter()
        pbars = progress_reporter.split(len(detected_sources))

            extractors = []
            for src_conf, pbar in zip(detected_sources, pbars):
                if not isinstance(src_conf, Source):
                    src_conf = Source(src_conf)

                extractor_kwargs = dict(src_conf.options)

                assert 'ctx' not in extractor_kwargs
                extractor_kwargs['ctx'] = ImportContext(
                    progress_reporter=pbar, error_policy=error_policy)

                        env.make_extractor(src_conf.format, src_conf.url,
                except TypeError as e:
                    # TODO: for backward compatibility. To be removed after 0.3
                    if "unexpected keyword argument 'ctx'" not in str(e):

                    if has_ctx_args:
                            "It seems that '%s' extractor "
                            "does not support progress and error reporting, "
                            "it will be disabled" % src_conf.format,

                        env.make_extractor(src_conf.format, src_conf.url,

            dataset = cls.from_extractors(*extractors, env=env)
            if eager:
        except _ImportFail as e:
            raise e.__cause__

        dataset._source_path = path
        dataset._format = format

        return dataset