def _initialize_tenant_parser(tenant_sources_repo, tenant_sources_file, connections): if tenant_sources_repo: # Config entry must be in format <connection_name>:<repo> con_name, repo_name = tenant_sources_repo.split(":", 1) con = connections.get(con_name) if not con: raise ScraperConfigurationError( f"Cannot load tenant sources from repo '{repo_name}'. " f"Specified connection '{con_name}' is not avilable." ) provider = con.provider repo_class = REPOS.get(provider) if not repo_class: raise ScraperConfigurationError( f"Cannot load tenant sources from repo '{repo_name}'. " f"Unknown connection provider '{provider}'." ) repo = repo_class(repo_name, con) tenant_parser = TenantParser(sources_repo=repo) else: tenant_parser = TenantParser(sources_file=tenant_sources_file) tenant_parser.parse() return tenant_parser
def main(ctx, verbosity): configure_logger(verbosity) # Load the configurations from file config = Config(root_path=".") config.from_object(default_settings) config.from_envvar(ZUBBI_SETTINGS_ENV) # Validate the configuration tenant_sources_repo = config.get("TENANT_SOURCES_REPO") tenant_sources_file = config.get("TENANT_SOURCES_FILE") # Fail if both are set or none of both is set if ( not tenant_sources_file and not tenant_sources_repo or (tenant_sources_file and tenant_sources_repo) ): raise ScraperConfigurationError( "Either one of 'TENANT_SOURCES_REPO' " "and 'TENANT_SOURCES_FILE' must be set, " "but not both." ) # Store the config in click's context object to be available for subcommands ctx.obj = {"config": config} if ctx.invoked_subcommand is None: ctx.invoke(scrape)
def get_web_url_builder(self, web_type, web_url, url): web_url = web_url or url url_builder_class = self.WEB_URL_BUILDERS.get(web_type) if url_builder_class is None: raise ScraperConfigurationError( "Could not initialize Gerrit connection due to an unsupported web_type '{}'" .format(web_type)) return url_builder_class(web_url)
def init_connections(config): # Initialize Elasticsearch connection es_config = get_elasticsearch_parameters_from_config(config) init_elasticsearch_con(**es_config) connections = {} for con_name, con_data in config["CONNECTIONS"].items(): # Look up the connection provider and initialize it with the remaining # config keys. Abstraction for e.g. the following: # gh_con = GitHubConnection(**con_data) # connections['github'] = gh_con provider = con_data.pop("provider") con_class = CONNECTIONS.get(provider) if not con_class: raise ScraperConfigurationError( "Could not init connection '{}'. Specified provider '{}' is not" " available.".format(con_name, provider)) con = con_class(**con_data) con.init() connections[con_name] = con return connections
def _load_tenant_sources_from_repo(self, sources_repo): LOGGER.info("Collecting tenant sources from repo '%s'", sources_repo) sources = [] try: tenants = sources_repo.list_directory(TENANTS_DIRECTORY) except CheckoutError: raise ScraperConfigurationError( "Cannot load tenant sources. Repo '{}' does not contain a " "'tenants' folder".format(sources_repo.repo_name)) for tenant in tenants.keys(): try: sources_yaml = sources_repo.check_out_file( os.path.join("tenants", tenant, "sources.yaml")) settings_yaml = sources_repo.check_out_file( os.path.join("tenants", tenant, "settings.yaml")) # NOTE (fschmidt): We parse both files and create the same data # structure like zuul does for the main.yaml file. tenant_sources = { # Load the settings first, as they contain different keys "tenant": yaml.safe_load(settings_yaml) } # Update the tenant_sources with the sources file and wrap them # in a 'source' key tenant_sources["tenant"]["source"] = yaml.safe_load( sources_yaml) sources.append(tenant_sources) except CheckoutError as e: # If a single tenant is missing the required file, we just skip it LOGGER.warning( "Either 'settings.yaml' or 'sources.yaml' are " "missing or empty in repo '%s': %s", sources_repo.repo_name, e, ) return sources