import os import sys from philologic.LoadOptions import LoadOptions from philologic.Loader import Loader, setup_db_dir # Load global config config_file = imp.load_source("philologic4", "/etc/philologic/philologic4.cfg") os.environ["LC_ALL"] = "C" # Exceedingly important to get uniform sort order. os.environ["PYTHONIOENCODING"] = "utf-8" if __name__ == '__main__': load_options = LoadOptions() load_options.parse(sys.argv) setup_db_dir(load_options["db_destination"], load_options["web_app_dir"], force_delete=load_options.force_delete) # Database load l = Loader(**load_options.values) l.add_files(load_options.files) if load_options.bibliography: load_metadata = l.parse_bibliography_file(load_options.bibliography, load_options.sort_order) else: load_metadata = l.parse_metadata(load_options.sort_order, header=load_options.header) l.parse_files(load_options.cores, load_metadata) l.merge_objects() l.analyze() l.setup_sql_load() l.post_processing() l.finish()
extra_locals["word_regex"] = word_regex extra_locals["punct_regex"] = punct_regex ################################ ## Don't edit unless you know ## ## what you're doing ## ################################ os.environ["LC_ALL"] = "C" # Exceedingly important to get uniform sort order. os.environ["PYTHONIOENCODING"] = "utf-8" db_destination = database_root + dbname data_destination = db_destination + "/data" db_url = url_root + dbname setup_db_dir(db_destination, template_dir) #################### ## Load the files ## #################### l = Loader(data_destination, load_filters=filters, post_filters=post_filters, tables=tables, xpaths=xpaths, metadata_xpaths=metadata_xpaths, pseudo_empty_tags=pseudo_empty_tags, suppress_tags=suppress_tags, token_regex=token_regex, default_object_level=default_object_level,
from __future__ import print_function import imp import os import sys from philologic.LoadOptions import LoadOptions, config_file from philologic.Loader import Loader, setup_db_dir os.environ["LC_ALL"] = "C" # Exceedingly important to get uniform sort order. os.environ["PYTHONIOENCODING"] = "utf-8" if __name__ == '__main__': load_options = LoadOptions() load_options.parse(sys.argv) setup_db_dir(load_options["db_destination"], load_options["web_app_dir"], force_delete=load_options.force_delete) # Database load l = Loader(**load_options.values) l.add_files(load_options.files) if load_options.bibliography: load_metadata = l.parse_bibliography_file(load_options.bibliography, load_options.sort_order) else: load_metadata = l.parse_metadata(load_options.sort_order, header=load_options.header) l.parse_files(load_options.cores, load_metadata) l.merge_objects() l.analyze() l.setup_sql_load()
## ["date", "author", "title"] sort_order = ["date", "author", "title", "filename"] ################################ ## Don't edit unless you know ## ## what you're doing ## ################################ os.environ["LC_ALL"] = "C" # Exceedingly important to get uniform sort order. os.environ["PYTHONIOENCODING"] = "utf-8" db_destination = database_root + dbname data_destination = db_destination + "/data" db_url = url_root + dbname setup_db_dir(db_destination, template_dir) #################### ## Load the files ## #################### l = Loader(data_destination, load_filters=filters, post_filters=post_filters, tables=tables, xpaths=xpaths, metadata_xpaths=metadata_xpaths, pseudo_empty_tags=pseudo_empty_tags, suppress_tags=suppress_tags, token_regex=token_regex,
################################ ## Don't edit unless you know ## ## what you're doing ## ################################ os.environ["LC_ALL"] = "C" # Exceedingly important to get uniform sort order. os.environ["PYTHONIOENCODING"] = "utf-8" db_destination = database_root + dbname data_destination = db_destination + "/data" db_url = url_root + dbname if __name__ == "__main__": setup_db_dir(db_destination, template_dir, force_delete=True) #################### ## Load the files ## #################### l = Loader( data_destination, load_filters=filters, post_filters=post_filters, tables=tables, xpaths=xpaths, metadata_xpaths=metadata_xpaths, pseudo_empty_tags=pseudo_empty_tags, suppress_tags=suppress_tags, token_regex=token_regex,