def load_and_check_config(config_path: Optional[str]) -> Dict[str, Any]: """Check the minimal configuration is set to run the api or raise an error explanation. Args: config_path: Path to the configuration file to load Raises: Error if the setup is not as expected Returns: configuration as a dict """ if not config_path: raise EnvironmentError("Configuration file must be defined") if not os.path.exists(config_path): raise FileNotFoundError( f"Configuration file {config_path} does not exist") cfg = config.read(config_path) if "storage" not in cfg: raise KeyError("Missing 'storage' configuration") return cfg
def test_load_from_envvar_no_default_config(swh_config, monkeypatch): config_path = str(swh_config) monkeypatch.setenv("SWH_CONFIG_FILENAME", config_path) actual_config = config.load_from_envvar() expected_config = config.read(config_path) assert actual_config == expected_config
def test_read_no_default_conf(swh_config): """If no default config if provided to read, this should directly parse the config file yaml """ config_path = str(swh_config) actual_config = config.read(config_path) with open(config_path) as f: expected_config = yaml.safe_load(f) assert actual_config == expected_config
def cli(ctx, config_file): """Software Heritage graph tools.""" ctx.ensure_object(dict) conf = config.read(config_file, DEFAULT_CONFIG) if "graph" not in conf: raise ValueError( 'no "graph" stanza found in configuration file %s' % config_file ) ctx.obj["config"] = conf
def lister(ctx, config_file): """Software Heritage Lister tools.""" from swh.core import config ctx.ensure_object(dict) if not config_file: config_file = os.environ.get("SWH_CONFIG_FILENAME") conf = config.read(config_file) ctx.obj["config"] = conf
def indexer_cli_group(ctx, config_file): """Software Heritage Indexer tools. The Indexer is used to mine the content of the archive and extract derived information from archive source code artifacts. """ from swh.core import config ctx.ensure_object(dict) conf = config.read(config_file) ctx.obj["config"] = conf
def loader(ctx, config_file): """Loader cli tools""" from os import environ from swh.core.config import read ctx.ensure_object(dict) logger.debug("ctx: %s", ctx) if not config_file: config_file = environ.get("SWH_CONFIG_FILENAME") ctx.obj["config"] = read(config_file) logger.debug("config_file: %s", config_file) logger.debug("config: ", ctx.obj["config"])
def objstorage_cli_group(ctx, config_file): """Software Heritage Objstorage tools.""" from swh.core import config if not config_file: config_file = os.environ.get("SWH_CONFIG_FILENAME") if config_file: if not os.path.exists(config_file): raise ValueError("%s does not exist" % config_file) conf = config.read(config_file) else: conf = {} ctx.ensure_object(dict) ctx.obj["config"] = conf
def test_load_from_envvar_with_default_config(swh_config, monkeypatch): default_config = { "number": 666, "something-cool": ["something", "cool"], } config_path = str(swh_config) monkeypatch.setenv("SWH_CONFIG_FILENAME", config_path) actual_config = config.load_from_envvar(default_config) expected_config = config.read(config_path) expected_config.update( {"number": 666, "something-cool": ["something", "cool"],} ) assert actual_config == expected_config
def setUp(self): self.runner = CliRunner() tmpconf = NamedTemporaryFile(mode="w", delete=False, prefix="swh-graph-test", suffix=".yml") # bare bone configuration, to allow testing the compression pipeline # with minimum RAM requirements on trivial graphs tmpconf.write(""" graph: compress: batch_size: 1000 """) tmpconf.close() self.conffile = Path(tmpconf.name) self.config = config.read(self.conffile, cli.DEFAULT_CONFIG)
def storage(ctx, config_file, check_config): """Software Heritage Storage tools.""" from swh.core import config if not config_file: config_file = os.environ.get("SWH_CONFIG_FILENAME") if config_file: if not os.path.exists(config_file): raise ValueError("%s does not exist" % config_file) conf = config.read(config_file) else: conf = {} if "storage" not in conf: ctx.fail("You must have a storage configured in your config file.") ctx.ensure_object(dict) ctx.obj["config"] = conf ctx.obj["check_config"] = check_config
def load_and_check_config(config_path: str, type: str = "postgresql") -> Dict: """Check the minimal configuration is set to run the api or raise an error explanation. Args: config_path: Configuration file path to load type: Configuration type, for 'postgresql' type (the default), more checks are done. Raises: Error if the setup is not as expected Returns: configuration as a dict """ if not config_path: raise EnvironmentError("Configuration file must be defined") if not os.path.exists(config_path): raise FileNotFoundError(f"Configuration file {config_path} does not exist") cfg = config.read(config_path) vcfg = cfg.get("scheduler") if not vcfg: raise KeyError("Missing '%scheduler' configuration") if type == "postgresql": cls = vcfg.get("cls") if cls not in ("local", "postgresql"): raise ValueError( "The scheduler backend can only be started with a 'postgresql' " "configuration" ) db = vcfg.get("db") if not db: raise KeyError("Invalid configuration; missing 'db' config entry") return cfg
def cli(ctx, config_file, database, url, no_stdout): """Software Heritage Scheduler tools. Use a local scheduler instance by default (plugged to the main scheduler db). """ try: from psycopg2 import OperationalError except ImportError: class OperationalError(Exception): pass from swh.core import config from swh.scheduler import DEFAULT_CONFIG, get_scheduler ctx.ensure_object(dict) logger = logging.getLogger(__name__) scheduler = None conf = config.read(config_file, DEFAULT_CONFIG) if "scheduler" not in conf: raise ValueError("missing 'scheduler' configuration") if database: conf["scheduler"]["cls"] = "postgresql" conf["scheduler"]["db"] = database elif url: conf["scheduler"]["cls"] = "remote" conf["scheduler"]["url"] = url sched_conf = conf["scheduler"] try: logger.debug("Instantiating scheduler with %s", sched_conf) scheduler = get_scheduler(**sched_conf) except (ValueError, OperationalError): # it's the subcommand to decide whether not having a proper # scheduler instance is a problem. pass ctx.obj["scheduler"] = scheduler ctx.obj["config"] = conf
def load_and_check_config(config_path: Optional[str], type: str = "local") -> Dict[str, Any]: """Check the minimal configuration is set to run the api or raise an error explanation. Args: config_path: Path to the configuration file to load type: configuration type. For 'local' type, more checks are done. Raises: Error if the setup is not as expected Returns: configuration as a dict """ if not config_path: raise EnvironmentError("Configuration file must be defined") if not os.path.exists(config_path): raise FileNotFoundError( f"Configuration file {config_path} does not exist") cfg = config.read(config_path) if "indexer_storage" not in cfg: raise KeyError("Missing '%indexer_storage' configuration") if type == "local": vcfg = cfg["indexer_storage"] cls = vcfg.get("cls") if cls != "local": raise ValueError( "The indexer_storage backend can only be started with a " "'local' configuration") if not vcfg.get("db"): raise ValueError( "Invalid configuration; missing 'db' config entry") return cfg
def deposit_autoconfig(deposit_config_path): """Enforce config for deposit classes inherited from APIConfig.""" cfg = read(deposit_config_path) if "scheduler" in cfg: # scheduler setup: require the check-deposit and load-deposit tasks scheduler = get_scheduler(**cfg["scheduler"]) task_types = [ { "type": "check-deposit", "backend_name": "swh.deposit.loader.tasks.ChecksDepositTsk", "description": "Check deposit metadata/archive before loading", "num_retries": 3, }, { "type": "load-deposit", "backend_name": "swh.loader.package.deposit.tasks.LoadDeposit", "description": "Loading deposit archive into swh archive", "num_retries": 3, }, ] for task_type in task_types: scheduler.create_task_type(task_type)
def test_read(swh_config): # when res = config.read(str(swh_config), default_conf) # then assert res == parsed_conffile
def test_read_empty_file(): # when res = config.read(None, default_conf) # then assert res == parsed_default_conf
def cook( ctx, config_file: str, swhid: CoreSWHID, outfile: io.RawIOBase, bundle_type: Optional[str], ): """ Runs a vault cooker for a single object (identified by a SWHID), and outputs it to the given file. """ from swh.core import config from swh.model.swhids import ObjectType from swh.objstorage.exc import ObjNotFoundError from swh.objstorage.factory import get_objstorage from swh.storage import get_storage from .cookers import get_cooker_cls from .in_memory_backend import InMemoryVaultBackend conf = config.read(config_file) try: from swh.graph.client import RemoteGraphClient # optional dependency graph = RemoteGraphClient(**conf["graph"]) if conf.get("graph") else None except ModuleNotFoundError: if conf.get("graph"): raise EnvironmentError( "Graph configuration required but module is not installed." ) else: graph = None backend = InMemoryVaultBackend() if bundle_type is None: if swhid.object_type in ( ObjectType.RELEASE, ObjectType.SNAPSHOT, ): bundle_type = "git_bare" elif swhid.object_type in (ObjectType.DIRECTORY,): bundle_type = "flat" else: raise click.ClickException( "No default bundle type for this kind of object, " "use --bundle-type to choose one" ) try: cooker_cls = get_cooker_cls(bundle_type, swhid.object_type) except ValueError as e: raise click.ClickException(*e.args) storage = get_storage(**conf["storage"]) objstorage = get_objstorage(**conf["objstorage"]) if "objstorage" in conf else None cooker = cooker_cls( swhid=swhid, backend=backend, storage=storage, graph=graph, objstorage=objstorage, max_bundle_size=None, # No need for a size limit, we are running locally ) cooker.cook() try: bundle = backend.fetch(cooker_cls.BUNDLE_TYPE, swhid) except ObjNotFoundError: bundle = None if bundle is None: import pdb pdb.set_trace() raise click.ClickException("Cooker did not write a bundle to the backend.") outfile.write(bundle)
def test_support_non_existing_conffile(tmp_path): # when res = config.read(str(tmp_path / "void.yml"), default_conf) # then assert res == parsed_default_conf
def test_raise_on_broken_file_perms(swh_config_unreadable): with pytest.raises(PermissionError): config.read(str(swh_config_unreadable), default_conf)
def test_support_empty_conffile(swh_config_empty): # when res = config.read(str(swh_config_empty), default_conf) # then assert res == parsed_default_conf