def parse(self, load_config_file_path): """Parse external config file""" load_config_file = load_module("external_load_config", load_config_file_path) for a in dir(load_config_file): if a == "parser_factory": value = getattr(load_config_file, a) self.config["parser_factory"] = value elif not a.startswith("__") and not isinstance(getattr(load_config_file, a), collections.Callable): value = getattr(load_config_file, a) if value: if a == "navigable_objects": self.config["load_filters"] = LoadFilters.set_load_filters(navigable_objects=value) if a == "words_to_index": word_list = set([]) with open(value) as fh: for line in fh: word_list.add(line.strip()) self.config["words_to_index"] = word_list elif a == "plain_text_obj": if "load_filters" not in self.config: self.config["load_filters"] = LoadFilters.DefaultLoadFilters self.config["load_filters"].append(LoadFilters.store_in_plain_text(*value)) elif a == "store_words_and_ids": if "load_filters" not in self.config: self.config["load_filters"] = LoadFilters.DefaultLoadFilters self.config["load_filters"].append(LoadFilters.store_words_and_philo_ids) elif a == "pos_tagger": if "load_filters" not in self.config: self.config["load_filters"] = LoadFilters.DefaultLoadFilters self.config["load_filters"].append(LoadFilters.pos_tagger(value)) else: self.config[a] = value
def __init__(self, **loader_options): self.parse_pool = None self.default_object_level = loader_options["default_object_level"] self.token_regex = loader_options["token_regex"] os.system(f"mkdir -p {self.destination}") os.mkdir(self.workdir) os.mkdir(self.textdir) load_config_path = os.path.join(loader_options["data_destination"], "load_config.py") # Loading these from a load_config would crash the parser for a number of reasons... values_to_ignore = [ "load_filters", "post_filters", "parser_factory", "data_destination", "db_destination", "dbname", ] if loader_options["load_config"]: shutil.copy(loader_options["load_config"], load_config_path) config_obj = load_module("external_load_config", loader_options["load_config"]) already_configured_values = {} for attribute in dir(config_obj): if not attribute.startswith("__") and not isinstance( getattr(config_obj, attribute), collections.Callable): already_configured_values[attribute] = getattr( config_obj, attribute) with open(load_config_path, "a") as load_config_copy: print("\n\n## The values below were also used for loading ##", file=load_config_copy) for option in loader_options: if (option not in already_configured_values and option not in values_to_ignore and option != "web_config"): print("%s = %s\n" % (option, repr(loader_options[option])), file=load_config_copy) else: with open(load_config_path, "w") as load_config_copy: print("#!/usr/bin/env python3", file=load_config_copy) print( '"""This is a dump of the default configuration used to load this database,', file=load_config_copy) print( "including non-configurable options. You can use this file to reload", file=load_config_copy) print( 'the current database using the -l flag. See load documentation for more details"""\n\n', file=load_config_copy, ) for option in loader_options: if option not in values_to_ignore and option != "web_config": print("%s = %s\n" % (option, repr(loader_options[option])), file=load_config_copy) if "web_config" in loader_options: web_config_path = os.path.join(loader_options["data_destination"], "web_config.cfg") print("\nSaving predefined web_config.cfg file to %s..." % web_config_path) with open(web_config_path, "w") as w: w.write(loader_options["web_config"]) self.predefined_web_config = True else: self.predefined_web_config = False self.theme = loader_options["theme"] self.filenames = [] self.raw_files = [] self.deleted_files = [] self.metadata_fields = [] self.metadata_hierarchy = [] self.metadata_types = {} self.normalized_fields = [] self.metadata_fields_not_found = [] self.sort_order = ""
#!/usr/bin/env python3 """Bootstrap Web app""" import os.path from philologic.runtime import WebConfig from philologic.runtime import WSGIHandler from philologic.runtime import access_control from philologic.utils import load_module config = WebConfig(os.path.abspath(os.path.dirname(__file__))) global_config = load_module("philologic4", config.global_config_location) path = os.path.abspath(os.path.dirname(__file__)) dbname = path.strip().split("/")[-1] config = WebConfig(os.path.abspath(os.path.dirname(__file__))) config_location = os.path.join("app/assets/css/split/", os.path.basename(config.theme)) if os.path.realpath(os.path.abspath(config.theme)) == os.path.realpath(os.path.abspath(config_location)): theme = config_location elif os.path.exists(config_location) and config.production: theme = config_location else: os.system("cp %s %s" % (config.theme, config_location)) theme = config_location css_files = [ "app/assets/css/bootstrap.min.css", "app/assets/css/split/style.css", "app/assets/css/split/searchForm.css",
#!/usr/bin env python3 """CLI parser for philoload4 command""" import collections import os import sys from glob import glob from argparse import ArgumentParser from philologic.loadtime import Loader, LoadFilters, Parser, PlainTextParser, PostFilters from philologic.utils import pretty_print, load_module # Load global config CONFIG_PATH = os.getenv("PHILOLOGIC_CONFIG", "/etc/philologic/philologic4.cfg") CONFIG_FILE = load_module("philologic4", CONFIG_PATH) if CONFIG_FILE.url_root is None: print("url_root variable is not set in /etc/philologic/philologic4.cfg", file=sys.stderr) print( "See https://github.com/ARTFL-Project/PhiloLogic4/blob/master/docs/installation.md.", file=sys.stderr) exit() elif CONFIG_FILE.web_app_dir is None: print("web_app_dir variable is not set in /etc/philologic/philologic4.cfg", file=sys.stderr) print( "See https://github.com/ARTFL-Project/PhiloLogic4/blob/master/docs/installation.md.", file=sys.stderr) exit() elif CONFIG_FILE.database_root is None:
def __init__(self, **loader_options): self.omax = [1, 1, 1, 1, 1, 1, 1, 1, 1] self.parse_pool = None self.types = OBJECT_TYPES self.tables = DEFAULT_TABLES self.sort_by_word = SORT_BY_WORD self.sort_by_id = SORT_BY_ID self.debug = loader_options["debug"] self.default_object_level = loader_options["default_object_level"] self.post_filters = loader_options["post_filters"] self.words_to_index = loader_options["words_to_index"] self.token_regex = loader_options["token_regex"] self.parser_config = {} for option in PARSER_OPTIONS: try: self.parser_config[option] = loader_options[option] except KeyError: # option hasn't been set pass self.setup_dir(loader_options["data_destination"]) load_config_path = os.path.join(loader_options["data_destination"], "load_config.py") # Loading these from a load_config would crash the parser for a number of reasons... values_to_ignore = [ "load_filters", "post_filters", "parser_factory", "data_destination", "db_destination", "dbname" ] if loader_options["load_config"]: shutil.copy(loader_options["load_config"], load_config_path) config_obj = load_module("external_load_config", loader_options["load_config"]) already_configured_values = {} for attribute in dir(config_obj): if not attribute.startswith("__") and not isinstance( getattr(config_obj, attribute), collections.Callable): already_configured_values[attribute] = getattr( config_obj, attribute) with open(load_config_path, "a") as load_config_copy: print("\n\n## The values below were also used for loading ##", file=load_config_copy) for option in loader_options: if option not in already_configured_values and option not in values_to_ignore and option != "web_config": print("%s = %s\n" % (option, repr(loader_options[option])), file=load_config_copy) else: with open(load_config_path, "w") as load_config_copy: print("#!/usr/bin/env python3", file=load_config_copy) print( '"""This is a dump of the default configuration used to load this database,', file=load_config_copy) print( "including non-configurable options. You can use this file to reload", file=load_config_copy) print( 'the current database using the -l flag. See load documentation for more details"""\n\n', file=load_config_copy) for option in loader_options: if option not in values_to_ignore and option != "web_config": print("%s = %s\n" % (option, repr(loader_options[option])), file=load_config_copy) if "web_config" in loader_options: web_config_path = os.path.join(loader_options["data_destination"], "web_config.cfg") print("\nSaving predefined web_config.cfg file to %s..." % web_config_path) with open(web_config_path, "w") as w: w.write(loader_options["web_config"]) self.predefined_web_config = True else: self.predefined_web_config = False self.theme = loader_options["theme"] self.metadata_fields = [] self.metadata_hierarchy = [] self.metadata_types = {} self.normalized_fields = [] self.metadata_fields_not_found = []
def check_access(environ, config): db = DB(config.db_path + "/data/") incoming_address, match_domain = get_client_info(environ) if config.access_file: if os.path.isabs(config.access_file): access_file = config.access_file else: access_file = os.path.join(config.db_path, "data", config.access_file) if not os.path.isfile(access_file): print( f"ACCESS FILE DOES NOT EXIST. UNAUTHORIZED ACCESS TO: {incoming_address} from domain {match_domain}", file=sys.stderr, ) return () else: print("UNAUTHORIZED ACCESS TO: %s from domain %s" % (incoming_address, match_domain), file=sys.stderr) return () # Load access config file. If loading fails, don't grant access. try: access_config = load_module("access_config", access_file) except Exception as e: print("ACCESS ERROR", repr(e), file=sys.stderr) print("UNAUTHORIZED ACCESS TO: %s from domain %s" % (incoming_address, match_domain), file=sys.stderr) return () # Let's first check if the IP is local and grant access if it is. for ip_range in ip_ranges: if ip_range.search(incoming_address): return make_token(incoming_address, db) try: domain_list = set(access_config.domain_list) except: domain_list = [] try: allowed_ips = set([]) for ip in access_config.allowed_ips: split_numbers = ip.split(".") if len(split_numbers) == 4: if re.search(r"\d+-\d+", split_numbers[3]): for last_num in range( int(split_numbers[3].split("-")[0]), int(split_numbers[3].split("-")[1]) + 1): allowed_ips.add(".".join(split_numbers[:3]) + "." + str(last_num)) elif re.search(r"\d+-\A", split_numbers[3]): for last_num in range(int(split_numbers[3].split("-")[0]), 255): allowed_ips.add(".".join(split_numbers[:3]) + "." + str(last_num)) else: allowed_ips.add(ip) else: allowed_ips.add(ip) except Exception as e: print(repr(e), file=sys.stderr) allowed_ips = [] try: blocked_ips = set(access_config.blocked_ips) except: blocked_ips = [] if incoming_address not in blocked_ips: if match_domain in domain_list: return make_token(incoming_address, db) else: for domain in domain_list: if domain in match_domain: return make_token(incoming_address, db) for ip_range in allowed_ips: if re.search(r"^%s.*" % ip_range, incoming_address): print("PASS", file=sys.stderr) return make_token(incoming_address, db) # If no token returned, we block access. print("UNAUTHORIZED ACCESS TO: %s from domain %s" % (incoming_address, match_domain), file=sys.stderr) return ()
#!/usr/bin env python3 """CLI parser for philoload4 command""" import collections import os import sys from glob import glob from argparse import ArgumentParser from philologic.loadtime import Loader, LoadFilters, Parser, PlainTextParser, PostFilters from philologic.utils import pretty_print, load_module # Load global config CONFIG_PATH = os.getenv("PHILOLOGIC_CONFIG", "/etc/philologic/philologic4.cfg") CONFIG_FILE = load_module("philologic4", CONFIG_PATH) if CONFIG_FILE.url_root is None: print("url_root variable is not set in /etc/philologic/philologic4.cfg", file=sys.stderr) print("See https://github.com/ARTFL-Project/PhiloLogic4/blob/master/docs/installation.md.", file=sys.stderr) exit() elif CONFIG_FILE.web_app_dir is None: print("web_app_dir variable is not set in /etc/philologic/philologic4.cfg", file=sys.stderr) print("See https://github.com/ARTFL-Project/PhiloLogic4/blob/master/docs/installation.md.", file=sys.stderr) exit() elif CONFIG_FILE.database_root is None: print("database_root variable is not set in /etc/philologic/philologic4.cfg", file=sys.stderr) print("See https://github.com/ARTFL-Project/PhiloLogic4/blob/master/docs/installation.md.", file=sys.stderr) exit() class LoadOptions:
#!/usr/bin/env python3 """Bootstrap Web app""" import os.path from philologic.runtime import WebConfig from philologic.runtime import WSGIHandler from philologic.runtime import access_control from philologic.utils import load_module config = WebConfig(os.path.abspath(os.path.dirname(__file__))) global_config = load_module("philologic4", config.global_config_location) path = os.path.abspath(os.path.dirname(__file__)) dbname = path.strip().split("/")[-1] config = WebConfig(os.path.abspath(os.path.dirname(__file__))) config_location = os.path.join("app/assets/css/split/", os.path.basename(config.theme)) if os.path.realpath(os.path.abspath(config.theme)) == os.path.realpath( os.path.abspath(config_location)): theme = config_location elif os.path.exists(config_location) and config.production: theme = config_location else: os.system("cp %s %s" % (config.theme, config_location)) theme = config_location css_files = [ "app/assets/css/bootstrap.min.css", "app/assets/css/split/style.css", "app/assets/css/split/searchForm.css",
def __init__(self, **loader_options): self.omax = [1, 1, 1, 1, 1, 1, 1, 1, 1] self.parse_pool = None self.types = OBJECT_TYPES self.tables = DEFAULT_TABLES self.sort_by_word = SORT_BY_WORD self.sort_by_id = SORT_BY_ID self.debug = loader_options["debug"] self.default_object_level = loader_options["default_object_level"] self.post_filters = loader_options["post_filters"] self.words_to_index = loader_options["words_to_index"] self.token_regex = loader_options["token_regex"] self.parser_config = {} for option in PARSER_OPTIONS: try: self.parser_config[option] = loader_options[option] except KeyError: # option hasn't been set pass self.setup_dir(loader_options["data_destination"]) load_config_path = os.path.join(loader_options["data_destination"], "load_config.py") # Loading these from a load_config would crash the parser for a number of reasons... values_to_ignore = ["load_filters", "post_filters", "parser_factory", "data_destination", "db_destination", "dbname"] if loader_options["load_config"]: shutil.copy(loader_options["load_config"], load_config_path) config_obj = load_module("external_load_config", loader_options["load_config"]) already_configured_values = {} for attribute in dir(config_obj): if not attribute.startswith("__") and not isinstance(getattr(config_obj, attribute), collections.Callable): already_configured_values[attribute] = getattr(config_obj, attribute) with open(load_config_path, "a") as load_config_copy: print("\n\n## The values below were also used for loading ##", file=load_config_copy) for option in loader_options: if option not in already_configured_values and option not in values_to_ignore and option != "web_config": print("%s = %s\n" % (option, repr(loader_options[option])), file=load_config_copy) else: with open(load_config_path, "w") as load_config_copy: print("#!/usr/bin/env python3", file=load_config_copy) print('"""This is a dump of the default configuration used to load this database,', file=load_config_copy) print("including non-configurable options. You can use this file to reload", file=load_config_copy) print('the current database using the -l flag. See load documentation for more details"""\n\n', file=load_config_copy) for option in loader_options: if option not in values_to_ignore and option != "web_config": print("%s = %s\n" % (option, repr(loader_options[option])), file=load_config_copy) if "web_config" in loader_options: web_config_path = os.path.join(loader_options["data_destination"], "web_config.cfg") print("\nSaving predefined web_config.cfg file to %s..." % web_config_path) with open(web_config_path, "w") as w: w.write(loader_options["web_config"]) self.predefined_web_config = True else: self.predefined_web_config = False self.theme = loader_options["theme"] self.filenames = [] self.raw_files = [] self.deleted_files = [] self.metadata_fields = [] self.metadata_hierarchy = [] self.metadata_types = {} self.normalized_fields = [] self.metadata_fields_not_found = [] self.sort_order = ""