def handle(self, addrport=f"0.0.0.0:{CLOWDER_PORT}", *args, **options): """Sources command customization point.""" timeout = 5 # Koku API server is responsible for running all database migrations. The sources client # server and kafka listener thread should only be started if migration execution is # complete. # This is a special case because check_migrations() returns three values # True means migrations are up-to-date while check_migrations() != True: # noqa LOG.warning(f"Migrations not done. Sleeping {timeout} seconds.") time.sleep(timeout) LOG.info("Starting Sources Kafka Handler") initialize_sources_integration() LOG.info("Starting Sources Client Server") if ENVIRONMENT.bool("RUN_GUNICORN", default=True): options = get_config_from_module_name("gunicorn_conf") options["bind"] = addrport SourcesApplication(application, options).run() else: from django.core.management import call_command options["use_reloader"] = False options.pop("skip_checks", None) call_command("runserver", addrport, *args, **options)
# following this for ease of comparison to reference implementation app = LoggingCelery("koku", log="koku.log:TaskRootLogging", backend=settings.CELERY_RESULTS_URL, broker=settings.CELERY_BROKER_URL) app.config_from_object("django.conf:settings", namespace="CELERY") LOG.info("Celery autodiscover tasks.") # Specify the number of celery tasks to run before recycling the celery worker. MAX_CELERY_TASKS_PER_WORKER = ENVIRONMENT.int("MAX_CELERY_TASKS_PER_WORKER", default=10) app.conf.worker_max_tasks_per_child = MAX_CELERY_TASKS_PER_WORKER # Toggle to enable/disable scheduled checks for new reports. if ENVIRONMENT.bool("SCHEDULE_REPORT_CHECKS", default=False): # The interval to scan for new reports. REPORT_CHECK_INTERVAL = datetime.timedelta( minutes=ENVIRONMENT.int("SCHEDULE_CHECK_INTERVAL", default=60)) CHECK_REPORT_UPDATES_DEF = { "task": "masu.celery.tasks.check_report_updates", "schedule": REPORT_CHECK_INTERVAL.seconds, "args": [], } app.conf.beat_schedule["check-report-updates"] = CHECK_REPORT_UPDATES_DEF # Specify the day of the month for removal of expired report data. REMOVE_EXPIRED_REPORT_DATA_ON_DAY = ENVIRONMENT.int( "REMOVE_EXPIRED_REPORT_DATA_ON_DAY", default=1)
import sys from django.conf import settings from django.db import connections from django.test.runner import DiscoverRunner from django.test.utils import get_unique_databases_and_mirrors from scripts.insert_org_tree import UploadAwsTree from tenant_schemas.utils import tenant_context from api.models import Customer from api.models import Tenant from api.report.test.utils import NiseDataLoader from koku.env import ENVIRONMENT from reporting.models import OCPEnabledTagKeys GITHUB_ACTIONS = ENVIRONMENT.bool("GITHUB_ACTIONS", default=False) LOG = logging.getLogger(__name__) OCP_ENABLED_TAGS = ["app", "storageclass", "environment", "version"] if GITHUB_ACTIONS: sys.stdout = open(os.devnull, "w") class KokuTestRunner(DiscoverRunner): """Koku Test Runner for Unit Tests.""" account = "10001" schema = f"acct{account}" settings.HOSTNAME = "koku-worker-10-abcdef" def setup_databases(self, **kwargs):
class Config: """Configuration for app.""" DEBUG = ENVIRONMENT.bool("DEVELOPMENT", default=False) # Set method for retreiving CUR accounts. 'db' or 'network' ACCOUNT_ACCESS_TYPE = ENVIRONMENT.get_value( "ACCOUNT_ACCESS_TYPE", default=DEFAULT_ACCOUNT_ACCCESS_TYPE) # Data directory for processing incoming data. This is the OCP PVC mount point. PVC_DIR = ENVIRONMENT.get_value("PVC_DIR", default=DEFAULT_PVC_DIR) # File retention time for cleaning out the volume (in seconds) # defaults to 1 day VOLUME_FILE_RETENTION = ENVIRONMENT.int( "VOLUME_FILE_RETENTION", default=DEFAULT_VOLUME_FILE_RETENTION) # OCP intermediate report storage INSIGHTS_LOCAL_REPORT_DIR = f"{PVC_DIR}/insights_local" # Processing intermediate report storage TMP_DIR = f"{PVC_DIR}/processing" # S3 path root for warehoused data WAREHOUSE_PATH = "data" CSV_DATA_TYPE = "csv" PARQUET_DATA_TYPE = "parquet" REPORT_PROCESSING_BATCH_SIZE = ENVIRONMENT.int( "REPORT_PROCESSING_BATCH_SIZE", default=DEFAULT_REPORT_PROCESSING_BATCH_SIZE) AWS_DATETIME_STR_FORMAT = "%Y-%m-%dT%H:%M:%SZ" OCP_DATETIME_STR_FORMAT = "%Y-%m-%d %H:%M:%S +0000 UTC" AZURE_DATETIME_STR_FORMAT = "%Y-%m-%d" # Override the service's current date time time. Format: "%Y-%m-%d %H:%M:%S" MASU_DATE_OVERRIDE = ENVIRONMENT.get_value( "DATE_OVERRIDE", default=DEFAULT_MASU_DATE_OVERRIDE) # Retention policy for the number of months of report data to keep. MASU_RETAIN_NUM_MONTHS = settings.RETAIN_NUM_MONTHS MASU_RETAIN_NUM_MONTHS_LINE_ITEM_ONLY = ENVIRONMENT.int( "RETAIN_NUM_MONTHS", default=DEFAULT_MASU_RETAIN_NUM_MONTHS_LINE_ITEM_ONLY) # TODO: Remove this if/when reporting model files are owned by masu # The decimal precision of our database Numeric columns REPORTING_DECIMAL_PRECISION = 9 # Specify the number of months (bills) to ingest INITIAL_INGEST_NUM_MONTHS = ENVIRONMENT.int( "INITIAL_INGEST_NUM_MONTHS", default=DEFAULT_INITIAL_INGEST_NUM_MONTHS) # Override the initial ingest requirement to allow INITIAL_INGEST_NUM_MONTHS INGEST_OVERRIDE = ENVIRONMENT.bool("INITIAL_INGEST_OVERRIDE", default=DEFAULT_INGEST_OVERRIDE) # Trino enablement TRINO_ENABLED = ENVIRONMENT.bool("ENABLE_PARQUET_PROCESSING", default=DEFAULT_ENABLE_PARQUET_PROCESSING) # Insights Kafka INSIGHTS_KAFKA_HOST = CONFIGURATOR.get_kafka_broker_host() INSIGHTS_KAFKA_PORT = CONFIGURATOR.get_kafka_broker_port() INSIGHTS_KAFKA_ADDRESS = f"{INSIGHTS_KAFKA_HOST}:{INSIGHTS_KAFKA_PORT}" HCCM_TOPIC = CONFIGURATOR.get_kafka_topic("platform.upload.hccm") VALIDATION_TOPIC = CONFIGURATOR.get_kafka_topic( "platform.upload.validation") # Flag to signal whether or not to connect to upload service KAFKA_CONNECT = ENVIRONMENT.bool("KAFKA_CONNECT", default=DEFAULT_KAFKA_CONNECT) RETRY_SECONDS = ENVIRONMENT.int("RETRY_SECONDS", default=DEFAULT_RETRY_SECONDS) DEL_RECORD_LIMIT = ENVIRONMENT.int("DELETE_CYCLE_RECORD_LIMIT", default=DEFAULT_DEL_RECORD_LIMIT) MAX_ITERATIONS = ENVIRONMENT.int("DELETE_CYCLE_MAX_RETRY", default=DEFAULT_MAX_ITERATIONS)
"""Listener entry point.""" import logging import time from django.core.management.base import BaseCommand from prometheus_client import start_http_server from koku.database import check_migrations from koku.env import ENVIRONMENT from masu.external.kafka_msg_handler import initialize_kafka_handler from masu.prometheus_stats import WORKER_REGISTRY LOG = logging.getLogger(__name__) CLOWDER_METRICS_PORT = 9999 if ENVIRONMENT.bool("CLOWDER_ENABLED", default=False): from app_common_python import LoadedConfig CLOWDER_METRICS_PORT = LoadedConfig.metricsPort class Command(BaseCommand): """Django command to launch listener.""" def handle(self, *args, **kwargs): """Initialize the prometheus exporter and koku-listener.""" while not check_migrations(): LOG.warning("Migrations not done. Sleeping") time.sleep(5) LOG.info("Initializing the prometheus exporter") start_http_server(CLOWDER_METRICS_PORT, registry=WORKER_REGISTRY)
import json import logging import threading from abc import ABC from abc import abstractmethod from http.server import HTTPServer from prometheus_client.exposition import MetricsHandler from koku.env import ENVIRONMENT from masu.prometheus_stats import WORKER_REGISTRY LOG = logging.getLogger(__name__) CLOWDER_METRICS_PORT = 9000 if ENVIRONMENT.bool("CLOWDER_ENABLED", default=False): from app_common_python import LoadedConfig CLOWDER_METRICS_PORT = LoadedConfig.metricsPort SERVER_TYPE = "liveness/readiness/metrics" if ENVIRONMENT.bool("MASU", default=False) or ENVIRONMENT.bool("SOURCES", default=False): SERVER_TYPE = "metrics" def start_probe_server(server_cls, logger=LOG): """Start the probe server.""" httpd = HTTPServer(("0.0.0.0", CLOWDER_METRICS_PORT), server_cls) httpd.RequestHandlerClass.logger = logger def start_server():
# along with this program. If not, see <https://www.gnu.org/licenses/>. # """Sources URL Configuration. The `urlpatterns` list routes URLs to views. For more information please see: https://docs.djangoproject.com/en/2.0/topics/http/urls/ """ from django.conf import settings from django.conf.urls import include from django.conf.urls import url from django.urls import path from koku.env import ENVIRONMENT from sources.kafka_listener import initialize_sources_integration IS_SOURCES = ENVIRONMENT.bool("SOURCES", default=False) if IS_SOURCES: initialize_sources_integration() API_PATH_PREFIX = settings.API_PATH_PREFIX if API_PATH_PREFIX != "": if API_PATH_PREFIX.startswith("/"): API_PATH_PREFIX = API_PATH_PREFIX[1:] if not API_PATH_PREFIX.endswith("/"): API_PATH_PREFIX = API_PATH_PREFIX + "/" # pylint: disable=invalid-name urlpatterns = [ url(fr"^{API_PATH_PREFIX}v1/", include("sources.api.urls")), path("", include("django_prometheus.urls")), ]
# along with this program. If not, see <https://www.gnu.org/licenses/>. # """Sources URL Configuration. The `urlpatterns` list routes URLs to views. For more information please see: https://docs.djangoproject.com/en/2.0/topics/http/urls/ """ from django.conf import settings from django.conf.urls import include, url from django.urls import path from sources.kafka_listener import initialize_sources_integration from koku.env import ENVIRONMENT is_sources = ENVIRONMENT.bool('SOURCES', default=False) if is_sources: initialize_sources_integration() API_PATH_PREFIX = settings.API_PATH_PREFIX if API_PATH_PREFIX != '': if API_PATH_PREFIX.startswith('/'): API_PATH_PREFIX = API_PATH_PREFIX[1:] if not API_PATH_PREFIX.endswith('/'): API_PATH_PREFIX = API_PATH_PREFIX + '/' # pylint: disable=invalid-name urlpatterns = [ url(r'^{}v1/'.format(API_PATH_PREFIX), include('sources.api.urls')), path('', include('django_prometheus.urls')), ]