Пример #1
0
def guess_dataset_ios(path: str) -> List[Tuple[DatasetIO, float]]:
    """
    Guess suitable DatasetIO objects for a file system path or URL given by *path*.
    Returns a list of (DatasetIO, fitness) tuples, sorted by descending fitness values.
    Fitness values are in the interval (0, 1].
    The first entry is the most appropriate DatasetIO object.
    :param path: A file system path or URL.
    :return: A list of (DatasetIO, fitness) tuples.
    """
    if os.path.isfile(path):
        input_type = "file"
    elif os.path.isdir(path):
        input_type = "dir"
    elif path.find("://") > 0:
        input_type = "url"
    else:
        input_type = None

    dataset_ios = get_extension_registry().find_components(
        EXTENSION_POINT_DATASET_IOS)

    dataset_io_fitness_list = []
    for dataset_io in dataset_ios:
        fitness = dataset_io.fitness(path, path_type=input_type)
        if fitness > 0.0:
            dataset_io_fitness_list.append((dataset_io, fitness))

    dataset_io_fitness_list.sort(key=lambda item: -item[1])
    return dataset_io_fitness_list
Пример #2
0
def query_dataset_io(
        filter_fn: Callable[[DatasetIO], bool] = None) -> List[DatasetIO]:
    dataset_ios = get_extension_registry().find_components(
        EXTENSION_POINT_DATASET_IOS)
    if filter_fn is None:
        return dataset_ios
    return list(filter(filter_fn, dataset_ios))
Пример #3
0
def _dump_extensions(point: str) -> int:
    count = 0
    for extension in get_extension_registry().find_extensions(point):
        print(
            f'  {extension.name:>24s}  {extension.metadata.get("description", "<no description>")}'
        )
        count += 1
    return count
Пример #4
0
Файл: io.py Проект: micder/xcube
def store_info(store_id: str, store_params: List[str], show_params: bool,
               show_openers: bool, show_writers: bool, show_data_ids: bool,
               use_json_format: bool):
    """
    Show data store information.

    Dumps detailed data store information in human readable form or as JSON, when using the --json option.

    You can obtain valid STORE names using command "xcube store list".

    Note some stores require provision of parameters PARAMS
    when using one of the options --openers/-O, --writers/-W, or --data/-D.
    To find out which parameters are available use the command with just the --params/-P option first.
    """
    extension = get_extension_registry().get_extension(
        EXTENSION_POINT_DATA_STORES, store_id)
    from xcube.core.store import get_data_store_params_schema
    from xcube.core.store import MutableDataStore
    params_schema = get_data_store_params_schema(store_id)
    description = extension.metadata.get('description')
    requires_store_instance = any((show_openers, show_writers, show_data_ids))
    data_store = _new_data_store(
        store_id, store_params) if requires_store_instance else None
    if use_json_format:
        d = dict()
        d['store_id'] = store_id
        if description:
            d['description'] = description
        if show_params:
            d['params_schema'] = params_schema.to_dict()
        if show_openers:
            d['opener_ids'] = data_store.get_data_opener_ids()
        if show_writers and isinstance(data_store, MutableDataStore):
            d['writer_ids'] = data_store.get_data_writer_ids()
        if show_data_ids:
            d['data_ids'] = list(data_store.get_data_ids())
        if show_openers:
            print(json.dumps(d, indent=2))
    else:
        print(f'\nData store description:')
        print(f'  {description or _NO_DESCRIPTION}')
        if show_params:
            print(_format_params_schema(params_schema))
        if show_openers:
            print(f'\nData openers:')
            _dump_store_openers(data_store)
        if show_writers:
            if isinstance(data_store, MutableDataStore):
                print(f'\nData writers:')
                _dump_store_writers(data_store)
            else:
                print(
                    f'No writers available, because data store "{store_id}" is not mutable.'
                )
        if show_data_ids:
            print(f'\nData resources:')
            count = _dump_store_data_ids(data_store)
            print(f'{count} data resource{"s" if count != 1 else ""} found.')
Пример #5
0
def _format_info():
    from xcube.util.plugin import get_extension_registry

    iproc_extensions = get_extension_registry().find_extensions(
        EXTENSION_POINT_INPUT_PROCESSORS)
    dsio_extensions = get_extension_registry().find_extensions(
        EXTENSION_POINT_DATASET_IOS,
        lambda e: 'w' in e.metadata.get('modes', set()))

    help_text = '\nInput processors to be used with option --proc:\n'
    help_text += _format_input_processors(iproc_extensions)
    help_text += '\nFor more input processors use existing "xcube-gen-..." plugins ' \
                 "from the xcube's GitHub organisation or write your own plugin.\n"
    help_text += '\n'
    help_text += '\nOutput formats to be used with option --format:\n'
    help_text += _format_dataset_ios(dsio_extensions)
    help_text += '\n'

    return help_text
Пример #6
0
def _dump_named_extensions(point: str, names: Sequence[str]) -> int:
    count = 0
    for name in names:
        extension = get_extension_registry().get_extension(point, name)
        if extension:
            print(
                f'  {name:>24s}  {extension.metadata.get("description", _NO_DESCRIPTION)}'
            )
        else:
            print(f'  {name:>24s}  {_UNKNOWN_EXTENSION}')
        count += 1
    return count
Пример #7
0
def find_data_writer_extensions(
        predicate: ExtensionPredicate = None,
        extension_registry: Optional[ExtensionRegistry] = None
) -> List[Extension]:
    """
    Get registered data writer extensions using the optional filter function *predicate*.

    :param predicate: An optional filter function.
    :param extension_registry: Optional extension registry. If not given, the global extension registry will be used.
    :return: List of matching extensions.
    """
    extension_registry = extension_registry or get_extension_registry()
    return extension_registry.find_extensions(EXTENSION_POINT_DATA_WRITERS,
                                              predicate=predicate)
Пример #8
0
def find_data_store_extensions(
        predicate: ExtensionPredicate = None,
        extension_registry: Optional[ExtensionRegistry] = None
) -> List[Extension]:
    """
    Find data store extensions using the optional filter function *predicate*.

    :param predicate: An optional filter function.
    :param extension_registry: Optional extension registry. If not given, the global extension registry will be used.
    :return: List of data store extensions.
    """
    extension_registry = extension_registry or get_extension_registry()
    return extension_registry.find_extensions(EXTENSION_POINT_DATA_STORES,
                                              predicate=predicate)
Пример #9
0
def writer_info(writer_id: str):
    """
    Show data opener information.
    You can obtain valid WRITER names using command "xcube io writer list".
    """
    extension = get_extension_registry().get_extension(
        EXTENSION_POINT_DATA_WRITERS, writer_id)
    description = extension.metadata.get('description')
    if description:
        print(description)
    from xcube.core.store import new_data_writer
    writer_ = new_data_writer(writer_id)
    params_schema = writer_.get_write_data_params_schema()
    print(_format_params_schema(params_schema))
Пример #10
0
def find_dataset_io(format_name: str, modes: Iterable[str] = None, default: DatasetIO = None) -> Optional[DatasetIO]:
    modes = set(modes) if modes else None
    format_name = format_name.lower()
    dataset_ios = get_extension_registry().find_components(EXTENSION_POINT_DATASET_IOS)
    for dataset_io in dataset_ios:
        # noinspection PyUnresolvedReferences
        if format_name == dataset_io.name.lower():
            # noinspection PyTypeChecker
            if not modes or modes.issubset(dataset_io.modes):
                return dataset_io
    for dataset_io in dataset_ios:
        # noinspection PyUnresolvedReferences
        if format_name == dataset_io.ext.lower():
            # noinspection PyTypeChecker
            if not modes or modes.issubset(dataset_io.modes):
                return dataset_io
    return default
Пример #11
0
def new_data_writer(writer_id: str,
                    extension_registry: Optional[ExtensionRegistry] = None,
                    **writer_params) -> 'DataWriter':
    """
    Get an instance of the data writer identified by *writer_id*.

    The optional, extra writer parameters *writer_params* may be used by data store
    (``xcube.core.store.DataStore``) implementations so they can share their internal state with the writer.

    :param writer_id: The data writer identifier.
    :param extension_registry: Optional extension registry. If not given, the global extension registry will be used.
    :param writer_params: Extra writer parameters.
    :return: A data writer instance.
    """
    assert_given(writer_id, 'writer_id')
    extension_registry = extension_registry or get_extension_registry()
    return extension_registry.get_component(EXTENSION_POINT_DATA_WRITERS,
                                            writer_id)(**writer_params)
Пример #12
0
def get_data_store_class(
    data_store_id: str,
    extension_registry: Optional[ExtensionRegistry] = None
) -> Union[Type['DataStore'], Type['MutableDataStore']]:
    """
    Get the class for the data store identified by *data_store_id*.

    :param data_store_id: A data store identifier.
    :param extension_registry: Optional extension registry.
        If not given, the global extension registry will be used.
    :return: The class for the data store.
    """
    extension_registry = extension_registry or get_extension_registry()
    if not extension_registry.has_extension(EXTENSION_POINT_DATA_STORES,
                                            data_store_id):
        raise DataStoreError(f'Unknown data store "{data_store_id}"'
                             f' (may be due to missing xcube plugin)')
    return extension_registry.get_component(EXTENSION_POINT_DATA_STORES,
                                            data_store_id)
Пример #13
0
def new_data_opener(opener_id: str,
                    extension_registry: Optional[ExtensionRegistry] = None,
                    **opener_params) -> 'DataOpener':
    """
    Get an instance of the data opener identified by *opener_id*.

    The optional, extra opener parameters *opener_params* may
    be used by data store (``xcube.core.store.DataStore``)
    implementations so they can share their internal state with the opener.

    :param opener_id: The data opener identifier.
    :param extension_registry: Optional extension registry.
        If not given, the global extension registry will be used.
    :param opener_params: Extra opener parameters.
    :return: A data opener instance.
    """
    assert_given(opener_id, 'opener_id')
    extension_registry = extension_registry or get_extension_registry()
    if not extension_registry.has_extension(EXTENSION_POINT_DATA_OPENERS,
                                            opener_id):
        raise DataStoreError(f'A data opener named'
                             f' {opener_id!r} is not registered')
    return extension_registry.get_component(EXTENSION_POINT_DATA_OPENERS,
                                            opener_id)(**opener_params)
Пример #14
0
def get_extension(name: str):
    return get_extension_registry().get_extension(EXTENSION_POINT_DATASET_IOS,
                                                  name)
Пример #15
0
Файл: io.py Проект: micder/xcube
def dump(output_file_path: str, config_file_path: Optional[str],
         type_specifier: Optional[str]):
    """
    Dump metadata of given data stores.

    Dumps data store metadata and metadata for a store's data resources
    for given data stores  into a JSON file.
    Data stores may be selected and configured by a configuration file CONFIG,
    which may have JSON or YAML format.
    For example, this YAML configuration configures a single directory data store:

    \b
    this_dir:
        title: Current Dir
        description: A store that represents my current directory
        store_id: "directory"
        store_params:
            base_dir: "."

    """
    from xcube.core.store import DataStoreConfig
    from xcube.core.store import DataStorePool
    import time

    if config_file_path:
        store_pool = DataStorePool.from_file(config_file_path)
    else:
        extensions = get_extension_registry().find_extensions(
            EXTENSION_POINT_DATA_STORES)
        store_configs = {
            extension.name:
            DataStoreConfig(extension.name,
                            title=extension.metadata.get('title'),
                            description=extension.metadata.get('description'))
            for extension in extensions
            if extension.name not in ('memory', 'directory', 's3')
        }
        store_pool = DataStorePool(store_configs)

    stores = []
    for store_instance_id in store_pool.store_instance_ids:
        t0 = time.perf_counter()
        print(f'Generating entries for store "{store_instance_id}"...')
        try:
            store_instance = store_pool.get_store(store_instance_id)
        except BaseException as error:
            print(f'error: cannot open store "{store_instance_id}": {error}',
                  file=sys.stderr)
            continue

        try:
            search_result = [
                dsd.to_dict() for dsd in store_instance.search_data(
                    type_specifier=type_specifier)
            ]
        except BaseException as error:
            print(f'error: cannot search store "{store_instance_id}": {error}',
                  file=sys.stderr)
            continue

        store_config = store_pool.get_store_config(store_instance_id)
        stores.append(
            dict(store_instance_id=store_instance_id,
                 store_id=store_instance_id,
                 title=store_config.title,
                 description=store_config.description,
                 type_specifier=type_specifier,
                 datasets=search_result))
        print('Done after {:.2f} seconds'.format(time.perf_counter() - t0))

    with open(output_file_path, 'w') as fp:
        json.dump(dict(stores=stores), fp, indent=2)

    print(f'Dumped {len(stores)} store(s) to {output_file_path}.')
Пример #16
0
from xcube.version import version


# noinspection PyShadowingBuiltins,PyUnusedLocal
@click.group(name='xcube')
@click.version_option(version)
@cli_option_traceback
@cli_option_scheduler
def cli(traceback=False, scheduler=None):
    """
    xcube Toolkit
    """


# Add registered CLI commands
for command in get_extension_registry().find_components(
        EXTENSION_POINT_CLI_COMMANDS):
    cli.add_command(command)


def main(args=None):
    # noinspection PyBroadException
    ctx_obj = new_cli_ctx_obj()
    try:
        exit_code = cli.main(args=args, obj=ctx_obj, standalone_mode=False)
    except Exception as e:
        exit_code = handle_cli_exception(e,
                                         traceback_mode=ctx_obj.get(
                                             "traceback", False))
    sys.exit(exit_code)

Пример #17
0
def find_input_processor_class(name: str):
    extension = get_extension_registry().get_extension(
        EXTENSION_POINT_INPUT_PROCESSORS, name)
    if not extension:
        return None
    return extension.component
Пример #18
0
def dump(output_file_path: Optional[str], config_file_path: Optional[str],
         data_type: Optional[str], short_form: bool, include_props: str,
         exclude_props: str, csv_format: bool, yaml_format: bool,
         json_format: bool):
    """
    Dump metadata of given data stores.

    Dumps data store metadata and metadata for a store's data resources
    for given data stores  into a JSON file.
    Data stores may be selected and configured by a configuration file CONFIG,
    which may have JSON or YAML format.
    For example, this YAML configuration configures a single directory data store:

    \b
    this_dir:
        title: Current Dir
        description: A store that represents my current directory
        store_id: "directory"
        store_params:
            base_dir: "."

    """
    from xcube.core.store import DataStoreConfig
    from xcube.core.store import DataStorePool
    import yaml
    import json
    import os.path

    if csv_format:
        output_format = 'csv'
        ext = '.csv'
    elif yaml_format:
        output_format = 'yaml'
        ext = '.yml'
    elif json_format:
        output_format = 'json'
        ext = '.json'
    elif output_file_path is not None:
        path_no_ext, ext = os.path.splitext(output_file_path)
        if ext in ('.csv', '.txt'):
            output_format = 'csv'
        elif ext in ('.yaml', '.yml'):
            output_format = 'yaml'
        else:
            output_format = 'json'
    else:
        output_format = 'json'
        ext = '.json'

    if output_file_path is None:
        path_no_ext, _ = os.path.splitext(_DEFAULT_DUMP_OUTPUT)
        output_file_path = path_no_ext + ext

    include_props = _parse_props(include_props) if include_props else None
    exclude_props = _parse_props(exclude_props) if exclude_props else None

    if short_form:
        short_include_props = _parse_props(_SHORT_INCLUDE)
        include_props = include_props or {}
        for data_key in ('store', 'data', 'var'):
            include_props[data_key] = include_props.get(data_key, set()).union(
                short_include_props[data_key])

    if config_file_path:
        store_pool = DataStorePool.from_file(config_file_path)
    else:
        extensions = get_extension_registry().find_extensions(
            EXTENSION_POINT_DATA_STORES)
        store_configs = {
            extension.name:
            DataStoreConfig(extension.name,
                            title=extension.metadata.get('title'),
                            description=extension.metadata.get('description'))
            for extension in extensions
            if extension.name not in ('memory', 'directory', 's3')
        }
        store_pool = DataStorePool(store_configs)

    dump_data = _get_store_data_var_tuples(store_pool, data_type,
                                           include_props, exclude_props)

    if output_format == 'csv':
        column_names = None
        column_names_set = None
        rows = []
        for store_dict, data_dict, var_dict in dump_data:
            if store_dict is None:
                break
            row = {}
            row.update({'store.' + k: v for k, v in store_dict.items()})
            row.update({'data.' + k: v for k, v in data_dict.items()})
            row.update({'var.' + k: v for k, v in var_dict.items()})
            rows.append(row)
            if column_names_set is None:
                column_names = list(row.keys())
                column_names_set = set(column_names)
            else:
                for k in row.keys():
                    if k not in column_names_set:
                        column_names.append(k)
                        column_names_set.add(k)

        def format_cell_value(value: Any) -> str:
            return str(value) if value is not None else ''

        sep = '\t'
        with open(output_file_path, 'w') as fp:
            if column_names:
                fp.write(sep.join(column_names) + '\n')
                for row in rows:
                    fp.write(
                        sep.join(
                            map(format_cell_value,
                                tuple(row.get(k)
                                      for k in column_names))) + '\n')

        print(f'Dumped {len(rows)} store entry/ies to {output_file_path}.')

    else:
        last_store_dict = None
        last_data_dict = None
        vars_list = []
        data_list = []
        store_list = []
        for store_dict, data_dict, var_dict in dump_data:
            if data_dict is not last_data_dict or data_dict is None:
                if last_data_dict is not None:
                    last_data_dict['data_vars'] = vars_list
                    vars_list = []
                    data_list.append(last_data_dict)
                last_data_dict = data_dict
            if store_dict is not last_store_dict or store_dict is None:
                if last_store_dict is not None:
                    last_store_dict['data'] = data_list
                    data_list = []
                    store_list.append(last_store_dict)
                last_store_dict = store_dict
            if var_dict:
                vars_list.append(var_dict)

        with open(output_file_path, 'w') as fp:
            if output_format == 'json':
                json.dump(dict(stores=store_list), fp, indent=2)
            else:
                yaml.dump(dict(stores=store_list), fp, indent=2)

        print(
            f'Dumped entries of {len(store_list)} store(s) to {output_file_path}.'
        )