# Configuration defaults. # # You should never edit this file directly for deployment or in the developer # setup. Wherever possible use environment variables to override the # defaults. import os from servicelayer import env from servicelayer import settings as sls from flask_babel import lazy_gettext # Show error messages to the user. DEBUG = env.to_bool('ALEPH_DEBUG', False) # Propose HTTP caching to the user agents. CACHE = env.to_bool('ALEPH_CACHE', not DEBUG) # Puts the system into read-only mode and displays a warning. MAINTENANCE = env.to_bool('ALEPH_MAINTENANCE', False) # Unit test context. TESTING = False ############################################################################### # General instance information APP_TITLE = env.get('ALEPH_APP_TITLE', lazy_gettext('Aleph')) APP_DESCRIPTION = env.get('ALEPH_APP_DESCRIPTION', '') APP_NAME = env.get('ALEPH_APP_NAME', 'aleph') APP_UI_URL = env.get('ALEPH_UI_URL', 'http://localhost:8080/') APP_LOGO = env.get('ALEPH_LOGO', '/static/logo.png') APP_FAVICON = env.get('ALEPH_FAVICON', '/static/logo.png')
# Configuration defaults. # # You should never edit this file directly for deployment or in the developer # setup. Wherever possible use environment variables to override the # defaults. import os import uuid from servicelayer import env from flask_babel import lazy_gettext # Show error messages to the user. DEBUG = env.to_bool('ALEPH_DEBUG', False) # Propose HTTP caching to the user agents. CACHE = env.to_bool('ALEPH_CACHE', not DEBUG) # Puts the system into read-only mode and displays a warning. MAINTENANCE = env.to_bool('ALEPH_MAINTENANCE', False) # Unit test context. TESTING = False ############################################################################### # General instance information APP_TITLE = env.get('ALEPH_APP_TITLE', lazy_gettext('Aleph')) APP_DESCRIPTION = env.get('ALEPH_APP_DESCRIPTION', '') APP_NAME = env.get('ALEPH_APP_NAME', 'aleph') APP_UI_URL = env.get('ALEPH_UI_URL', 'http://localhost:8080/') APP_LOGO = env.get('ALEPH_LOGO', '/static/logo.png') APP_FAVICON = env.get('ALEPH_FAVICON', '/static/favicon.png') # Show a system-wide banner in the user interface. APP_BANNER = env.get('ALEPH_APP_BANNER')
# Configuration defaults. # # You should never edit this file directly for deployment or in the developer # setup. Wherever possible use environment variables to override the # defaults. import os from servicelayer import env from servicelayer import settings as sls from flask_babel import lazy_gettext # Show error messages to the user. DEBUG = env.to_bool('ALEPH_DEBUG', False) # Propose HTTP caching to the user agents. CACHE = env.to_bool('ALEPH_CACHE', not DEBUG) # Puts the system into read-only mode and displays a warning. MAINTENANCE = env.to_bool('ALEPH_MAINTENANCE', False) # Unit test context. TESTING = False ############################################################################### # General instance information APP_TITLE = env.get('ALEPH_APP_TITLE', lazy_gettext('Aleph')) APP_DESCRIPTION = env.get('ALEPH_APP_DESCRIPTION', '') APP_NAME = env.get('ALEPH_APP_NAME', 'aleph') APP_UI_URL = env.get('ALEPH_UI_URL', 'http://localhost:8080/') APP_LOGO = env.get('ALEPH_LOGO', '/static/logo.png') APP_FAVICON = env.get('ALEPH_FAVICON', '/static/logo.png') # Show a system-wide banner in the user interface. APP_BANNER = env.get('ALEPH_APP_BANNER')
from servicelayer import env from servicelayer import settings as sls from ftmstore import settings as sts TESTING = False # Document conversion service CONVERT_URL = env.get("UNOSERVICE_URL", "http://convert-document:3000/convert") CONVERT_URL = env.get("INGESTORS_CONVERT_DOCUMENT_URL", CONVERT_URL) CONVERT_TIMEOUT = env.to_int("INGESTORS_CONVERT_TIMEOUT", 7200) # 2 hrs CONVERT_RETRIES = env.to_int("INGESTORS_CONVERT_RETRIES", 256) # Enable (expensive!) Google Cloud API OCR_VISION_API = env.to_bool("INGESTORS_OCR_VISION_API", False) # Geonames data file GEONAMES_PATH = env.get("INGESTORS_GEONAMES_PATH", "/ingestors/data/geonames.txt") # FastText lid model file LID_MODEL_PATH = env.get("INGESTORS_LID_MODEL_PATH", "/ingestors/data/lid.176.ftz") # Disable entity extraction ANALYZE_ENTITIES = env.to_bool("INGESTORS_ANALYZE_ENTITIES", True) # List available NER models NER_MODELS = set(env.to_list("INGESTORS_NER_MODELS", ["eng"])) NER_DISABLE = ["ara"] NER_DISABLE = set(env.to_list("INGESTORS_NER_DISABLE", NER_DISABLE)) NER_DEFAULT_MODEL = "xx"
def is_available(cls): try: from google.cloud.vision import ImageAnnotatorClient # noqa except ImportError: return False return env.to_bool('OCR_VISION_API', False)
import os import pkg_resources import multiprocessing from servicelayer import env from servicelayer import settings as sls ############################################################################### # Core configuration VERSION = pkg_resources.get_distribution('memorious').version APP_NAME = env.get('MEMORIOUS_APP_NAME', 'memorious') # Enable debug logging etc. DEBUG = env.to_bool('MEMORIOUS_DEBUG', default=False) TESTING = False # Base operating path BASE_PATH = os.path.join(os.getcwd(), 'data') BASE_PATH = env.get('MEMORIOUS_BASE_PATH', BASE_PATH) # Override servicelayer archive if undefined sls.ARCHIVE_PATH = sls.ARCHIVE_PATH or os.path.join(BASE_PATH, 'archive') # Directory which contains crawler pipeline YAML specs CONFIG_PATH = env.get('MEMORIOUS_CONFIG_PATH') # Try and run scrapers in a way that only acquires new data INCREMENTAL = env.to_bool('MEMORIOUS_INCREMENTAL', default=True) # How many days until an incremental crawl expires EXPIRE = env.to_int('MEMORIOUS_EXPIRE', 1)
from servicelayer import env from servicelayer import settings as sls from ftmstore import settings as sts TESTING = False # Document conversion service CONVERT_URL = env.get('UNOSERVICE_URL', 'http://convert-document:3000/convert') CONVERT_URL = env.get('INGESTORS_CONVERT_DOCUMENT_URL', CONVERT_URL) CONVERT_TIMEOUT = env.to_int('INGESTORS_CONVERT_TIMEOUT', 7200) # 2 hrs # Enable (expensive!) Google Cloud API OCR_VISION_API = env.to_bool('INGESTORS_OCR_VISION_API', False) # Geonames data file GEONAMES_PATH = env.get('INGESTORS_GEONAMES_PATH', '/ingestors/data/geonames.txt') # FastText lid model file LID_MODEL_PATH = env.get('INGESTORS_LID_MODEL_PATH', '/ingestors/data/lid.176.ftz') # Disable entity extraction ANALYZE_ENTITIES = env.to_bool('INGESTORS_ANALYZE_ENTITIES', True) # List available NER models NER_MODELS = set(env.to_list('INGESTORS_NER_MODELS', ['eng'])) NER_DEFAULT_MODEL = 'xx' # Use the environment variable set in aleph.env sts.DATABASE_URI = env.get('ALEPH_DATABASE_URI', sts.DATABASE_URI)
# Configuration defaults. # # You should never edit this file directly for deployment or in the developer # setup. Wherever possible use environment variables to override the # defaults. import os from servicelayer import env from urllib.parse import urlparse from flask_babel import lazy_gettext from datetime import timedelta # The aleph module directory APP_DIR = os.path.abspath(os.path.dirname(__file__)) # Show error messages to the user. DEBUG = env.to_bool("ALEPH_DEBUG", False) # Profile requests PROFILE = env.to_bool("ALEPH_PROFILE", False) # Propose HTTP caching to the user agents. CACHE = env.to_bool("ALEPH_CACHE", not DEBUG) # Puts the system into read-only mode and displays a warning. MAINTENANCE = env.to_bool("ALEPH_MAINTENANCE", False) # Unit test context. TESTING = False ############################################################################### # General instance information APP_TITLE = env.get("ALEPH_APP_TITLE", lazy_gettext("Aleph")) APP_NAME = env.get("ALEPH_APP_NAME", "aleph") APP_UI_URL = env.get("ALEPH_UI_URL", "http://localhost:8080/")
import os import pkg_resources from servicelayer import env from servicelayer import settings as sls ############################################################################### # Core configuration VERSION = pkg_resources.get_distribution("memorious").version APP_NAME = env.get("MEMORIOUS_APP_NAME", "memorious") # Enable debug logging etc. DEBUG = env.to_bool("MEMORIOUS_DEBUG", default=False) TESTING = False # Base operating path BASE_PATH = os.path.join(os.getcwd(), "data") BASE_PATH = env.get("MEMORIOUS_BASE_PATH", BASE_PATH) # Override servicelayer archive if undefined sls.ARCHIVE_PATH = sls.ARCHIVE_PATH or os.path.join(BASE_PATH, "archive") # Directory which contains crawler pipeline YAML specs CONFIG_PATH = env.get("MEMORIOUS_CONFIG_PATH") # Try and run scrapers in a way that only acquires new data INCREMENTAL = env.to_bool("MEMORIOUS_INCREMENTAL", default=True) # Continue running the crawler even when we encounter an error CONTINUE_ON_ERROR = env.to_bool("MEMORIOUS_CONTINUE_ON_ERROR", default=False) # How many days until an incremental crawl expires
# Redis cache # URL format: redis://localhost:6379/0 REDIS_URL = env.get("REDIS_URL") REDIS_SHORT = 84700 REDIS_LONG = REDIS_SHORT * 200 REDIS_EXPIRE = env.to_int("REDIS_EXPIRE", REDIS_SHORT * 7) REDIS_PREFIX = "sla" # Persistent database tags TAGS_DATABASE_URI = env.get("TAGS_DATABASE_URI", "sqlite://") # Worker WORKER_RETRY = env.to_int("WORKER_RETRY", 3) WORKER_THREADS = env.to_int("WORKER_THREADS", multiprocessing.cpu_count()) WORKER_REPORTING = env.to_bool("WORKER_REPORTING", True) # Amazon client credentials AWS_KEY_ID = env.get("AWS_ACCESS_KEY_ID") AWS_SECRET_KEY = env.get("AWS_SECRET_ACCESS_KEY") AWS_REGION = env.get("AWS_REGION", "eu-west-1") # S3 compatible Minio host if using Minio for storage ARCHIVE_ENDPOINT_URL = env.get("ARCHIVE_ENDPOINT_URL") # Storage type (either 's3', 'gs', or 'file', i.e. local file system): ARCHIVE_TYPE = env.get("ARCHIVE_TYPE", "file") ARCHIVE_BUCKET = env.get("ARCHIVE_BUCKET") ARCHIVE_PATH = env.get("ARCHIVE_PATH") PUBLICATION_BUCKET = env.get("PUBLICATION_BUCKET", ARCHIVE_BUCKET) # Logging
import os import pkg_resources import multiprocessing from servicelayer import env from servicelayer import settings as sls ############################################################################### # Core configuration VERSION = pkg_resources.get_distribution("memorious").version APP_NAME = env.get("MEMORIOUS_APP_NAME", "memorious") # Enable debug logging etc. DEBUG = env.to_bool("MEMORIOUS_DEBUG", default=False) TESTING = False # Base operating path BASE_PATH = os.path.join(os.getcwd(), "data") BASE_PATH = env.get("MEMORIOUS_BASE_PATH", BASE_PATH) # Override servicelayer archive if undefined sls.ARCHIVE_PATH = sls.ARCHIVE_PATH or os.path.join(BASE_PATH, "archive") # Directory which contains crawler pipeline YAML specs CONFIG_PATH = env.get("MEMORIOUS_CONFIG_PATH") # Try and run scrapers in a way that only acquires new data INCREMENTAL = env.to_bool("MEMORIOUS_INCREMENTAL", default=True) # How many days until an incremental crawl expires EXPIRE = env.to_int("MEMORIOUS_EXPIRE", 1)
from servicelayer import env from servicelayer import settings as sls from ftmstore import settings as sts TESTING = False # When set to True, a debugpy server will be enabled in cli.py process() DEBUGPY_PROCESS = env.to_bool("INGESTORS_DEBUGPY_PROCESS", False) # The address that the debugpy server should bind to DEBUGPY_ADDRESS = env.get("INGESTORS_DEBUGPY_ADDRESS", "0.0.0.0") # The port that the debugpy server should listen for a connection on DEBUGPY_PORT = env.to_int("INGESTORS_DEBUGPY_PORT", 5678) # When set to True, after setting up the debug server the application will block # and wait for a client connection before continuing with processing DEBUGPY_WAIT_FOR_CLIENT = env.to_bool("INGESTORS_DEBUGPY_WAIT_FOR_CLIENT", False) # Document conversion service CONVERT_URL = env.get("UNOSERVICE_URL", "http://convert-document:3000/convert") CONVERT_URL = env.get("INGESTORS_CONVERT_DOCUMENT_URL", CONVERT_URL) CONVERT_TIMEOUT = env.to_int("INGESTORS_CONVERT_TIMEOUT", 7200) # 2 hrs # Enable (expensive!) Google Cloud API OCR_VISION_API = env.to_bool("INGESTORS_OCR_VISION_API", False) # Enable Google Cloud Translation API TRANSLATION_API = env.to_bool("INGESTORS_TRANSLATION_API", False) # White list of language IDs for languages that should be translated # An empty white list is considered a wildcard, allowing all languages to be translated TRANSLATION_LANGUAGE_WHITE_LIST = env.to_list("INGESTORS_TRANSLATION_LANGUAGE_WHITE_LIST", None)
from servicelayer import env UNOSERVICE_URL = env.get('UNOSERVICE_URL') OCR_VISION_API = env.to_bool('OCR_VISION_API', False)