class Config: """Configuration for service.""" # SOURCES_TOPIC = ENVIRONMENT.get_value("SOURCES_KAFKA_TOPIC", default="platform.sources.event-stream") SOURCES_TOPIC = CONFIGURATOR.get_kafka_topic("platform.sources.event-stream") SOURCES_KAFKA_HOST = CONFIGURATOR.get_kafka_broker_host() SOURCES_KAFKA_PORT = CONFIGURATOR.get_kafka_broker_port() SOURCES_KAFKA_ADDRESS = f"{SOURCES_KAFKA_HOST}:{SOURCES_KAFKA_PORT}" SOURCES_API_HOST = CONFIGURATOR.get_endpoint_host("sources-api", "svc", "localhost") SOURCES_API_PORT = CONFIGURATOR.get_endpoint_port("sources-api", "svc", "3000") SOURCES_API_URL = f"http://{SOURCES_API_HOST}:{SOURCES_API_PORT}" SOURCES_API_PREFIX = ENVIRONMENT.get_value("SOURCES_API_PREFIX", default="/api/v1.0") SOURCES_INTERNAL_API_PREFIX = ENVIRONMENT.get_value("SOURCES_INTERNAL_API_PREFIX", default="/internal/v1.0") SOURCES_PROBE_HEADER = ENVIRONMENT.get_value( "SOURCES_PROBE_HEADER", default="eyJpZGVudGl0eSI6IHsiYWNjb3VudF9udW1iZXIiOiAiMTIzNDUiLCAib3JnX2lkIjogIjEyMzQ1In19Cg==", ) SOURCES_FAKE_HEADER = ENVIRONMENT.get_value( "SOURCES_FAKE_HEADER", default=( "eyJpZGVudGl0eSI6IHsiYWNjb3VudF9udW1iZXIiOiAiMTIzNDUiLCAidXNlciI6IHsiaXNfb3J" "nX2FkbWluIjogImZhbHNlIiwgInVzZXJuYW1lIjogInNvdXJjZXMiLCAiZW1haWwiOiAic291cm" "Nlc0Bzb3VyY2VzLmlvIn0sICJpbnRlcm5hbCI6IHsib3JnX2lkIjogIjU0MzIxIn19fQ==" ), ) SOURCES_FAKE_CLUSTER_HEADER = ENVIRONMENT.get_value( "SOURCES_FAKE_CLUSTER_HEADER", default=( "eyJpZGVudGl0eSI6IHsiYWNjb3VudF9udW1iZXIiOiAiMTIzNDUiLCAiYXV0aF90eXBlIjogInVoYy1" "hdXRoIiwgInR5cGUiOiAiU3lzdGVtIiwgInN5c3RlbSI6IHsiY2x1c3Rlcl9pZCI6ICIwYmIyOTEzNS1k" "NmQxLTQ3OGItYjViNi02YmQxMjljYjZkNWQifSwgImludGVybmFsIjogeyJvcmdfaWQiOiAiNTQzMjEifX19" ), ) SOURCES_PSK = ENVIRONMENT.get_value("SOURCES_PSK", default="sources-psk") RETRY_SECONDS = ENVIRONMENT.int("RETRY_SECONDS", default=10)
# Setup the database for use in Celery # django.setup() # LOG.info("Database configured.") # 'app' is the recommended convention from celery docs # following this for ease of comparison to reference implementation app = LoggingCelery("koku", log="koku.log:TaskRootLogging", backend=settings.CELERY_RESULTS_URL, broker=settings.CELERY_BROKER_URL) app.config_from_object("django.conf:settings", namespace="CELERY") LOG.info("Celery autodiscover tasks.") # Specify the number of celery tasks to run before recycling the celery worker. MAX_CELERY_TASKS_PER_WORKER = ENVIRONMENT.int("MAX_CELERY_TASKS_PER_WORKER", default=10) app.conf.worker_max_tasks_per_child = MAX_CELERY_TASKS_PER_WORKER # Toggle to enable/disable scheduled checks for new reports. if ENVIRONMENT.bool("SCHEDULE_REPORT_CHECKS", default=False): # The interval to scan for new reports. REPORT_CHECK_INTERVAL = datetime.timedelta( minutes=ENVIRONMENT.int("SCHEDULE_CHECK_INTERVAL", default=60)) CHECK_REPORT_UPDATES_DEF = { "task": "masu.celery.tasks.check_report_updates", "schedule": REPORT_CHECK_INTERVAL.seconds, "args": [], } app.conf.beat_schedule["check-report-updates"] = CHECK_REPORT_UPDATES_DEF
LOG.info("Starting celery.") # Setup the database for use in Celery # django.setup() # LOG.info("Database configured.") # 'app' is the recommended convention from celery docs # following this for ease of comparison to reference implementation app = LoggingCelery( "koku", log="koku.log:TaskRootLogging", backend=settings.CELERY_RESULTS_URL, broker=settings.CELERY_BROKER_URL ) app.config_from_object("django.conf:settings", namespace="CELERY") LOG.info("Celery autodiscover tasks.") # Specify the number of celery tasks to run before recycling the celery worker. MAX_CELERY_TASKS_PER_WORKER = ENVIRONMENT.int("MAX_CELERY_TASKS_PER_WORKER", default=10) app.conf.worker_max_tasks_per_child = MAX_CELERY_TASKS_PER_WORKER # Timeout threshold for a worker process to startup WORKER_PROC_ALIVE_TIMEOUT = ENVIRONMENT.int("WORKER_PROC_ALIVE_TIMEOUT", default=4) app.conf.worker_proc_alive_timeout = WORKER_PROC_ALIVE_TIMEOUT LOG.info(f"Celery worker alive timeout = {app.conf.worker_proc_alive_timeout}") # Toggle to enable/disable scheduled checks for new reports. if ENVIRONMENT.bool("SCHEDULE_REPORT_CHECKS", default=False): # The interval to scan for new reports. REPORT_CHECK_INTERVAL = timedelta(minutes=ENVIRONMENT.int("SCHEDULE_CHECK_INTERVAL", default=60)) CHECK_REPORT_UPDATES_DEF = { "task": "masu.celery.tasks.check_report_updates", "schedule": REPORT_CHECK_INTERVAL.seconds,
class Config: """Configuration for app.""" DEBUG = ENVIRONMENT.bool("DEVELOPMENT", default=False) # Set method for retreiving CUR accounts. 'db' or 'network' ACCOUNT_ACCESS_TYPE = ENVIRONMENT.get_value( "ACCOUNT_ACCESS_TYPE", default=DEFAULT_ACCOUNT_ACCCESS_TYPE) # Data directory for processing incoming data. This is the OCP PVC mount point. PVC_DIR = ENVIRONMENT.get_value("PVC_DIR", default=DEFAULT_PVC_DIR) # File retention time for cleaning out the volume (in seconds) # defaults to 1 day VOLUME_FILE_RETENTION = ENVIRONMENT.int( "VOLUME_FILE_RETENTION", default=DEFAULT_VOLUME_FILE_RETENTION) # OCP intermediate report storage INSIGHTS_LOCAL_REPORT_DIR = f"{PVC_DIR}/insights_local" # Processing intermediate report storage TMP_DIR = f"{PVC_DIR}/processing" # S3 path root for warehoused data WAREHOUSE_PATH = "data" CSV_DATA_TYPE = "csv" PARQUET_DATA_TYPE = "parquet" REPORT_PROCESSING_BATCH_SIZE = ENVIRONMENT.int( "REPORT_PROCESSING_BATCH_SIZE", default=DEFAULT_REPORT_PROCESSING_BATCH_SIZE) AWS_DATETIME_STR_FORMAT = "%Y-%m-%dT%H:%M:%SZ" OCP_DATETIME_STR_FORMAT = "%Y-%m-%d %H:%M:%S +0000 UTC" AZURE_DATETIME_STR_FORMAT = "%Y-%m-%d" # Override the service's current date time time. Format: "%Y-%m-%d %H:%M:%S" MASU_DATE_OVERRIDE = ENVIRONMENT.get_value( "DATE_OVERRIDE", default=DEFAULT_MASU_DATE_OVERRIDE) # Retention policy for the number of months of report data to keep. MASU_RETAIN_NUM_MONTHS = settings.RETAIN_NUM_MONTHS MASU_RETAIN_NUM_MONTHS_LINE_ITEM_ONLY = ENVIRONMENT.int( "RETAIN_NUM_MONTHS", default=DEFAULT_MASU_RETAIN_NUM_MONTHS_LINE_ITEM_ONLY) # TODO: Remove this if/when reporting model files are owned by masu # The decimal precision of our database Numeric columns REPORTING_DECIMAL_PRECISION = 9 # Specify the number of months (bills) to ingest INITIAL_INGEST_NUM_MONTHS = ENVIRONMENT.int( "INITIAL_INGEST_NUM_MONTHS", default=DEFAULT_INITIAL_INGEST_NUM_MONTHS) # Override the initial ingest requirement to allow INITIAL_INGEST_NUM_MONTHS INGEST_OVERRIDE = ENVIRONMENT.bool("INITIAL_INGEST_OVERRIDE", default=DEFAULT_INGEST_OVERRIDE) # Trino enablement TRINO_ENABLED = ENVIRONMENT.bool("ENABLE_PARQUET_PROCESSING", default=DEFAULT_ENABLE_PARQUET_PROCESSING) # Insights Kafka INSIGHTS_KAFKA_HOST = CONFIGURATOR.get_kafka_broker_host() INSIGHTS_KAFKA_PORT = CONFIGURATOR.get_kafka_broker_port() INSIGHTS_KAFKA_ADDRESS = f"{INSIGHTS_KAFKA_HOST}:{INSIGHTS_KAFKA_PORT}" HCCM_TOPIC = CONFIGURATOR.get_kafka_topic("platform.upload.hccm") VALIDATION_TOPIC = CONFIGURATOR.get_kafka_topic( "platform.upload.validation") # Flag to signal whether or not to connect to upload service KAFKA_CONNECT = ENVIRONMENT.bool("KAFKA_CONNECT", default=DEFAULT_KAFKA_CONNECT) RETRY_SECONDS = ENVIRONMENT.int("RETRY_SECONDS", default=DEFAULT_RETRY_SECONDS) DEL_RECORD_LIMIT = ENVIRONMENT.int("DELETE_CYCLE_RECORD_LIMIT", default=DEFAULT_DEL_RECORD_LIMIT) MAX_ITERATIONS = ENVIRONMENT.int("DELETE_CYCLE_MAX_RETRY", default=DEFAULT_MAX_ITERATIONS)