Пример #1
0
def get_headers():
    """Retrieves headers (e.g., user agent string) from environment
    variables

    Retrieves user agent string information to use in requests to
    third-party services.

    Args:
        N/A

    Returns:
        Headers dict for the requests library, in the form:
            {'User-Agent': '<user agent string>'}

    Raises:
        Exception: General exception, since scraper cannot
        proceed without this

    """
    in_prod = environment.in_gcp()
    if not in_prod:
        user_agent_string = (
            "For any issues, concerns, or rate constraints,"
            "e-mail [email protected]"
        )
    else:
        user_agent_string = secrets.get_secret("user_agent")

    if not user_agent_string:
        raise Exception("No user agent string")

    headers = {"User-Agent": user_agent_string}
    return headers
Пример #2
0
 def __init__(self) -> None:
     prefix = "" if not in_gcp() else f"{project_id()}-"
     self.allowlist_path = GcsfsFilePath.from_absolute_path(
         f"{prefix}case-triage-data/allowlist_v2.json"
     )
     self.allowed_users: List[str] = []
     self.admin_users: List[str] = []
Пример #3
0
    def __init__(self, write_key: str):
        is_local = not in_gcp()

        # When `send` is set to False, we do not send any logs to Segment.
        # We also set `debug` to True locally for more logging during development.
        self.client = Client(
            write_key,
            send=(not is_local),
            debug=is_local,
        )
Пример #4
0
    def direct_ingest_storage_directory(self) -> GcsfsDirectoryPath:
        if in_gcp():
            return gcsfs_direct_ingest_storage_directory_path_for_region(
                region_code=self.region_code,
                system_level=SystemLevel.STATE,
                ingest_instance=DirectIngestInstance.PRIMARY,
            )

        # Local override
        return GcsfsDirectoryPath.from_absolute_path(
            f"recidiviz-staging-direct-ingest-state-storage/{self.region_code.lower()}"
        )
Пример #5
0
    def init_engines_for_server_postgres_instances(cls) -> None:
        if not environment.in_gcp():
            logging.info(
                "Environment is not GCP, not connecting to postgres instances."
            )
            return

        cls.init_engine(SchemaType.JAILS)
        cls.init_engine(SchemaType.STATE)
        cls.init_engine(SchemaType.OPERATIONS)
        cls.init_engine(SchemaType.JUSTICE_COUNTS)
        cls.init_engine(SchemaType.CASE_TRIAGE)
Пример #6
0
def _regions_matching_environment(region_codes: Set[str]) -> Set[str]:
    """Filter to regions with the matching environment.

    If we are running locally, include all supported regions.
    """
    if not environment.in_gcp():
        return region_codes
    gcp_env = environment.get_gcp_environment()
    return {
        region_code
        for region_code in region_codes
        if regions.get_region(region_code).environment == gcp_env
    }
Пример #7
0
    def _file_pointer_for_path(self, path: GcsfsFilePath,
                               encoding: str) -> TextIO:
        """Returns a file pointer for the given path."""

        # From the GCSFileSystem docs (https://gcsfs.readthedocs.io/en/latest/api.html#gcsfs.core.GCSFileSystem),
        # 'google_default' means we should look for local credentials set up via `gcloud login`. The project this is
        # reading from may have to match the project default you have set locally (check via `gcloud info` and set via
        # `gcloud config set project [PROJECT_ID]`. If we are running in the GCP environment, we should be able to query
        # the internal metadata for credentials.
        token = "google_default" if not environment.in_gcp() else "cloud"
        return self.gcs_file_system.open(path.uri(),
                                         encoding=encoding,
                                         token=token)
    def get_engine_for_database(
            cls, database_key: SQLAlchemyDatabaseKey) -> Optional[Engine]:
        """Retrieve the engine for a given database.

        Will attempt to create the engine if it does not already exist."""
        if database_key not in cls._engine_for_database:
            if not environment.in_gcp():
                logging.info(
                    "Environment is not GCP, not connecting to postgres instance for [%s].",
                    database_key,
                )
                return None
            cls.init_engine(database_key)
        return cls._engine_for_database.get(database_key, None)
Пример #9
0
    def start_timers(self) -> None:
        """Starts store refresh timers for all stores that are a subclass of the AdminPanelStore class."""
        if in_gcp() or in_development():
            stores_with_timers = [
                self.ingest_metadata_store,
                self.validation_metadata_store,
                self.ingest_data_freshness_store,
                self.validation_status_store,
            ]

            for store in stores_with_timers:
                RepeatedTimer(15 * 60,
                              store.recalculate_store,
                              run_immediately=True).start()
def store_validation_results(
    validation_results: List[ValidationResultForStorage], ) -> None:
    if not environment.in_gcp():
        logging.info(
            "Skipping storing [%d] validation results in BigQuery.",
            len(validation_results),
        )
        return

    bq_client = BigQueryClientImpl()
    bq_client.insert_into_table(
        bq_client.dataset_ref_for_id(
            VALIDATION_RESULTS_BIGQUERY_ADDRESS.dataset_id),
        VALIDATION_RESULTS_BIGQUERY_ADDRESS.table_id,
        [result.to_serializable() for result in validation_results],
    )
Пример #11
0
def create_cache_ingest_file_as_parquet_task(
    gcs_file: GcsfsFilePath,
    separator: str,
    encoding: str,
    quoting: int,
    custom_line_terminator: Optional[str],
) -> None:
    if in_gcp():
        task_manager = (
            AdminPanelDataDiscoveryCloudTaskManager()
        )  # type: AbstractAdminPanelDataDiscoveryCloudTaskManager
    else:
        task_manager = DevelopmentAdminPanelDataDiscoveryCloudTaskManager()

    task_manager.create_cache_ingest_file_as_parquet_task(
        gcs_file, separator, encoding, quoting, custom_line_terminator)
Пример #12
0
def get_proxies(use_test=False):
    """Retrieves proxy username/pass from environment variables

    Retrieves proxy information to use in requests to third-party
    services. If not in production environment, defaults to test proxy
    credentials (so problems during test runs don't risk our main proxy
    IP's reputation).

    Args:
        use_test: (bool) Use test proxy credentials, not prod

    Returns:
        Proxies dict for requests library, in the form:
            {'<protocol>': '<http://<proxy creds>@<proxy url>'}

    Raises:
        Exception: General exception, since scraper cannot
        proceed without this

    """
    if not environment.in_gcp() or use_test:
        return None

    user_var = "proxy_user"
    pass_var = "proxy_password"

    proxy_url = secrets.get_secret("proxy_url")

    if proxy_url is None:
        raise Exception("No proxy url")

    # On the proxy side, a random ip is chosen for a session it has not seen
    # so collisions can still happen so we increase the integer to reduce the
    # odds.
    base_proxy_user = secrets.get_secret(user_var)
    proxy_user = PROXY_USER_TEMPLATE.format(base_proxy_user, random.random())
    proxy_password = secrets.get_secret(pass_var)

    if (base_proxy_user is None) or (proxy_password is None):
        raise Exception("No proxy user/pass")

    proxy_credentials = proxy_user + ":" + proxy_password
    proxy_request_url = "http://" + proxy_credentials + "@" + proxy_url

    proxies = {"http": proxy_request_url, "https": proxy_request_url}

    return proxies
Пример #13
0
def setup() -> None:
    """Setup logging"""
    # Set the region on log records.
    logging.setLogRecordFactory(ContextualLogRecord)
    logger = logging.getLogger()

    # Send logs directly via the logging client if possible. This ensures trace
    # ids are propagated and allows us to send structured messages.
    if environment.in_gcp():
        client = Client()
        structured_handler = StructuredAppEngineHandler(client)
        handlers.setup_logging(structured_handler, log_level=logging.INFO)

        before_request_handler = StructuredAppEngineHandler(
            client, name=BEFORE_REQUEST_LOG)
        logging.getLogger(BEFORE_REQUEST_LOG).addHandler(
            before_request_handler)

        # Streams unstructured logs to stdout - these logs will still show up
        # under the appengine.googleapis.com/stdout Stackdriver logs bucket,
        # even if other logs are stalled on the global interpreter lock or some
        # other issue.
        stdout_handler = logging.StreamHandler(sys.stdout)
        handlers.setup_logging(stdout_handler, log_level=logging.INFO)
        for handler in logger.handlers:
            if not isinstance(
                    handler,
                (StructuredAppEngineHandler, logging.StreamHandler)):
                logger.removeHandler(handler)
    else:
        logging.basicConfig()

    for handler in logger.handlers:
        # If we aren't writing directly to Stackdriver, prefix the log with important
        # context that would be in the labels.
        if not isinstance(handler, StructuredAppEngineHandler):
            handler.setFormatter(
                logging.Formatter(
                    "[pid: %(process)d] (%(region)s) %(module)s/%(funcName)s : %(message)s"
                ))

    # Export gunicorn errors using the same handlers as other logs, so that they
    # go to Stackdriver in production.
    gunicorn_logger = logging.getLogger("gunicorn.error")
    gunicorn_logger.handlers = logger.handlers
Пример #14
0
    def __init__(self) -> None:
        self.database_key = SQLAlchemyDatabaseKey.for_schema(SchemaType.CASE_TRIAGE)

        prefix = "" if not in_gcp() else f"{project_id()}-"
        self.allowlist_path = GcsfsFilePath.from_absolute_path(
            f"{prefix}case-triage-data/allowlist_v2.json"
        )
        self.feature_gate_path = GcsfsFilePath.from_absolute_path(
            f"{prefix}case-triage-data/feature_variants.json"
        )

        self.case_triage_allowed_users: List[str] = []
        self.case_triage_admin_users: List[str] = []
        self.case_triage_demo_users: List[str] = []

        # Map from feature name to a map of email addresses to variants
        # of the feature that they are in.
        self.feature_variants: Dict[str, Dict[str, FeatureGateInfo]] = {}
Пример #15
0
def _get_metadata(url: str) -> Optional[str]:
    if url in _metadata_cache:
        return _metadata_cache[url]

    if not allow_local_metadata_call:
        if environment.in_test() or not environment.in_gcp():
            raise RuntimeError(
                "May not be called from test, should this have a local override?"
            )

    try:
        r = requests.get(BASE_METADATA_URL + url, headers=HEADERS, timeout=TIMEOUT)
        r.raise_for_status()
        _metadata_cache[url] = r.text
        return r.text
    except Exception as e:
        logging.error("Failed to fetch metadata [%s]: [%s]", url, e)
        return None
Пример #16
0
def setup():
    """Setup logging"""
    # Set the region on log records.
    default_factory = logging.getLogRecordFactory()
    logging.setLogRecordFactory(partial(region_record_factory,
                                        default_factory))

    logger = logging.getLogger()

    # Send logs directly via the logging client if possible. This ensures trace
    # ids are propogated and allows us to send structured messages.
    if environment.in_gcp():
        client = Client()
        handler = StructuredAppEngineHandler(client)
        handlers.setup_logging(handler, log_level=logging.INFO)

        # Streams unstructured logs to stdout - these logs will still show up
        # under the appengine.googleapis.com/stdout Stackdriver logs bucket,
        # even if other logs are stalled on the global interpreter lock or some
        # other issue.
        stdout_handler = logging.StreamHandler(sys.stdout)
        handlers.setup_logging(stdout_handler, log_level=logging.INFO)
        for handler in logger.handlers:
            if not isinstance(
                    handler,
                (StructuredAppEngineHandler, logging.StreamHandler)):
                logger.removeHandler(handler)
    else:
        logging.basicConfig()

    for handler in logger.handlers:
        # If writing directly to Stackdriver, send a structured message.
        if isinstance(handler, StructuredAppEngineHandler):
            handler.setFormatter(StructuredLogFormatter())
        # Otherwise, the default stream handler requires a string.
        else:
            handler.setFormatter(
                logging.Formatter(
                    "(%(region)s) %(module)s/%(funcName)s : %(message)s"))

    # Export gunicorn errors using the same handlers as other logs, so that they
    # go to Stackdriver in production.
    gunicorn_logger = logging.getLogger("gunicorn.error")
    gunicorn_logger.handlers = logger.handlers
Пример #17
0
    def _discovery_task() -> Tuple[str, int]:
        """Cloud task responsible for orchestrating ingest data parquet-ification tasks,
            loading parqueted files, and applying the DataDiscoveryArgs filters against the data

        Example:
            POST /admin/data_discovery/discovery_task
        Request Body:
            discovery_id: (string) The ID of this discovery task, as returned by /create_discovery
        Returns:
            N/A
        """
        body = get_cloud_task_json_body()
        if in_gcp():
            discover_data(body["discovery_id"])
        else:
            # Run discovery in a thread locally
            threading.Thread(target=discover_data, args=[body["discovery_id"]]).start()

        return "", HTTPStatus.OK
Пример #18
0
def retry_grpc(num_retries: int, fn: Callable[..., ReturnType], *args: Any,
               **kwargs: Any) -> ReturnType:
    """Retries a function call some number of times"""
    time_to_sleep = random.uniform(5, RETRY_SLEEP)
    for i in range(num_retries + 1):
        try:
            return fn(*args, **kwargs)
        except exceptions.InternalServerError as e:
            if i == num_retries:
                raise
            if "GOAWAY" in str(e) or "Deadline Exceeded" in str(e):
                logging.exception("Received exception: ")
                if environment.in_gcp():
                    logging.warning("Sleeping %.2f seconds and retrying",
                                    time_to_sleep)
                    time.sleep(time_to_sleep)
                    continue
            else:
                raise
    raise exceptions.ServiceUnavailable(
        f"Function unsuccessful {num_retries + 1} times")
Пример #19
0
def get_data_discovery_cache() -> redis.Redis:
    """
    Returns a client for the data discovery Redis instance.
    Redis commands can be issued directly to this client and all connection handling is done under inside `redis.Redis`.
    Idle connections will be closed by `redis.Redis` automatically.
    To get query cached data discovery information from the cache, you may want to provide this `Redis` instance
    to a `RedisCommunicator`, `DataDiscoveryArgsFactory`, or `SingleIngestFileParquetCache` class.
    """
    if not in_gcp():
        return redis.Redis()

    redis_host = get_secret("data_discovery_redis_host")
    redis_port = get_secret("data_discovery_redis_port")

    if redis_host and redis_port:
        return redis.Redis(
            host=redis_host,
            port=int(redis_port),
        )

    raise ValueError("Cannot find data discovery redis secrets")
Пример #20
0
    def _create_discovery() -> flask.Response:
        """Endpoint responsible for creating and enqueueing a new discovery task

        Example:
            POST /admin/data_discovery/create_discovery
        Request Body:
            JSON representation of the `DataDiscoveryArgs` data class
        Returns:
            JSON representation of the hydrated `DataDiscoveryArgs` data class
        """
        data_discovery_args = DataDiscoveryArgsFactory.create(**request.get_json())

        if in_gcp():
            task_manager: AbstractAdminPanelDataDiscoveryCloudTaskManager = (
                AdminPanelDataDiscoveryCloudTaskManager()
            )
        else:
            task_manager = DevelopmentAdminPanelDataDiscoveryCloudTaskManager()

        task_manager.create_discovery_task(data_discovery_args)

        return jsonify(attr.asdict(data_discovery_args))
Пример #21
0
def add_ingest_ops_routes(bp: Blueprint, admin_stores: AdminStores) -> None:
    """Adds routes for ingest operations."""

    project_id = GCP_PROJECT_STAGING if not in_gcp() else metadata.project_id()
    STATE_INGEST_EXPORT_URI = f"gs://{project_id}-cloud-sql-exports"

    @bp.route("/api/ingest_operations/fetch_ingest_state_codes",
              methods=["POST"])
    @requires_gae_auth
    def _fetch_ingest_state_codes() -> Tuple[str, HTTPStatus]:
        all_state_codes = (
            admin_stores.ingest_operations_store.state_codes_launched_in_env)
        state_code_info = fetch_state_codes(all_state_codes)
        return jsonify(state_code_info), HTTPStatus.OK

    # Start an ingest run for a specific instance
    @bp.route("/api/ingest_operations/<state_code_str>/start_ingest_run",
              methods=["POST"])
    @requires_gae_auth
    def _start_ingest_run(state_code_str: str) -> Tuple[str, HTTPStatus]:
        state_code = _get_state_code_from_str(state_code_str)
        instance = request.json["instance"]
        admin_stores.ingest_operations_store.start_ingest_run(
            state_code, instance)
        return "", HTTPStatus.OK

    # Update ingest queues
    @bp.route(
        "/api/ingest_operations/<state_code_str>/update_ingest_queues_state",
        methods=["POST"],
    )
    @requires_gae_auth
    def _update_ingest_queues_state(
            state_code_str: str) -> Tuple[str, HTTPStatus]:
        state_code = _get_state_code_from_str(state_code_str)
        new_queue_state = request.json["new_queue_state"]
        admin_stores.ingest_operations_store.update_ingest_queues_state(
            state_code, new_queue_state)
        return "", HTTPStatus.OK

    # Get all ingest queues and their state for given state code
    @bp.route("/api/ingest_operations/<state_code_str>/get_ingest_queue_states"
              )
    @requires_gae_auth
    def _get_ingest_queue_states(
            state_code_str: str) -> Tuple[str, HTTPStatus]:
        state_code = _get_state_code_from_str(state_code_str)
        ingest_queue_states = (admin_stores.ingest_operations_store.
                               get_ingest_queue_states(state_code))
        return jsonify(ingest_queue_states), HTTPStatus.OK

    # Get summaries of all ingest instances for state
    @bp.route(
        "/api/ingest_operations/<state_code_str>/get_ingest_instance_summaries"
    )
    @requires_gae_auth
    def _get_ingest_instance_summaries(
            state_code_str: str) -> Tuple[str, HTTPStatus]:
        state_code = _get_state_code_from_str(state_code_str)
        ingest_instance_summaries = (admin_stores.ingest_operations_store.
                                     get_ingest_instance_summaries(state_code))
        return jsonify(ingest_instance_summaries), HTTPStatus.OK

    @bp.route("/api/ingest_operations/export_database_to_gcs",
              methods=["POST"])
    @requires_gae_auth
    def _export_database_to_gcs() -> Tuple[str, HTTPStatus]:
        try:
            state_code = StateCode(request.json["stateCode"])
            ingest_instance = DirectIngestInstance(
                request.json["ingestInstance"].upper())
            db_version = ingest_instance.database_version(
                system_level=SystemLevel.STATE, state_code=state_code)
        except ValueError:
            return "invalid parameters provided", HTTPStatus.BAD_REQUEST

        lock_manager = DirectIngestRegionLockManager.for_state_ingest(
            state_code, ingest_instance)
        if not lock_manager.can_proceed():
            return (
                "other locks blocking ingest have been acquired; aborting operation",
                HTTPStatus.CONFLICT,
            )

        db_key = SQLAlchemyDatabaseKey.for_state_code(state_code, db_version)
        cloud_sql_client = CloudSQLClientImpl(project_id=project_id)

        operation_id = cloud_sql_client.export_to_gcs_sql(
            db_key,
            GcsfsFilePath.from_absolute_path(
                f"{STATE_INGEST_EXPORT_URI}/{db_version.value}/{state_code.value}"
            ),
        )
        if operation_id is None:
            return (
                "Cloud SQL export operation was not started successfully.",
                HTTPStatus.INTERNAL_SERVER_ERROR,
            )

        operation_succeeded = cloud_sql_client.wait_until_operation_completed(
            operation_id, seconds_to_wait=GCS_IMPORT_EXPORT_TIMEOUT_SEC)
        if not operation_succeeded:
            return (
                "Cloud SQL import did not complete within 60 seconds",
                HTTPStatus.INTERNAL_SERVER_ERROR,
            )

        return operation_id, HTTPStatus.OK

    @bp.route("/api/ingest_operations/import_database_from_gcs",
              methods=["POST"])
    @requires_gae_auth
    def _import_database_from_gcs() -> Tuple[str, HTTPStatus]:
        try:
            state_code = StateCode(request.json["stateCode"])
            db_version = SQLAlchemyStateDatabaseVersion(
                request.json["importToDatabaseVersion"].lower())
            ingest_instance = DirectIngestInstance.for_state_database_version(
                database_version=db_version, state_code=state_code)
            exported_db_version = SQLAlchemyStateDatabaseVersion(
                request.json["exportedDatabaseVersion"].lower())
        except ValueError:
            return "invalid parameters provided", HTTPStatus.BAD_REQUEST

        if db_version == SQLAlchemyStateDatabaseVersion.LEGACY:
            return "ingestInstance cannot be LEGACY", HTTPStatus.BAD_REQUEST

        lock_manager = DirectIngestRegionLockManager.for_state_ingest(
            state_code, ingest_instance=ingest_instance)
        if not lock_manager.can_proceed():
            return (
                "other locks blocking ingest have been acquired; aborting operation",
                HTTPStatus.CONFLICT,
            )

        db_key = SQLAlchemyDatabaseKey.for_state_code(state_code, db_version)
        cloud_sql_client = CloudSQLClientImpl(project_id=project_id)

        operation_id = cloud_sql_client.import_gcs_sql(
            db_key,
            GcsfsFilePath.from_absolute_path(
                f"{STATE_INGEST_EXPORT_URI}/{exported_db_version.value}/{state_code.value}"
            ),
        )
        if operation_id is None:
            return (
                "Cloud SQL import operation was not started successfully.",
                HTTPStatus.INTERNAL_SERVER_ERROR,
            )

        operation_succeeded = cloud_sql_client.wait_until_operation_completed(
            operation_id, seconds_to_wait=GCS_IMPORT_EXPORT_TIMEOUT_SEC)
        if not operation_succeeded:
            return (
                "Cloud SQL import did not complete within 60 seconds",
                HTTPStatus.INTERNAL_SERVER_ERROR,
            )

        return operation_id, HTTPStatus.OK

    @bp.route("/api/ingest_operations/acquire_ingest_lock", methods=["POST"])
    @requires_gae_auth
    def _acquire_ingest_lock() -> Tuple[str, HTTPStatus]:
        try:
            state_code = StateCode(request.json["stateCode"])
            ingest_instance = DirectIngestInstance(
                request.json["ingestInstance"])
        except ValueError:
            return "invalid parameters provided", HTTPStatus.BAD_REQUEST

        lock_manager = DirectIngestRegionLockManager.for_state_ingest(
            state_code, ingest_instance=ingest_instance)
        try:
            lock_manager.acquire_lock()
        except GCSPseudoLockAlreadyExists:
            return "lock already exists", HTTPStatus.CONFLICT

        if not lock_manager.can_proceed():
            try:
                lock_manager.release_lock()
            except Exception as e:
                logging.exception(e)
            return (
                "other locks blocking ingest have been acquired; releasing lock",
                HTTPStatus.CONFLICT,
            )

        return "", HTTPStatus.OK

    @bp.route("/api/ingest_operations/release_ingest_lock", methods=["POST"])
    @requires_gae_auth
    def _release_ingest_lock() -> Tuple[str, HTTPStatus]:
        try:
            state_code = StateCode(request.json["stateCode"])
            ingest_instance = DirectIngestInstance(
                request.json["ingestInstance"])
        except ValueError:
            return "invalid parameters provided", HTTPStatus.BAD_REQUEST

        lock_manager = DirectIngestRegionLockManager.for_state_ingest(
            state_code, ingest_instance=ingest_instance)
        try:
            lock_manager.release_lock()
        except GCSPseudoLockDoesNotExist:
            return "lock does not exist", HTTPStatus.NOT_FOUND

        return "", HTTPStatus.OK

    @bp.route("/api/ingest_operations/pause_direct_ingest_instance",
              methods=["POST"])
    @requires_gae_auth
    def _pause_direct_ingest_instance() -> Tuple[str, HTTPStatus]:
        try:
            state_code = StateCode(request.json["stateCode"])
            ingest_instance = DirectIngestInstance(
                request.json["ingestInstance"])
        except ValueError:
            return "invalid parameters provided", HTTPStatus.BAD_REQUEST

        ingest_status_manager = DirectIngestInstanceStatusManager(
            region_code=state_code.value, ingest_instance=ingest_instance)
        try:
            ingest_status_manager.pause_instance()
        except Exception:
            return (
                "something went wrong pausing the intance",
                HTTPStatus.INTERNAL_SERVER_ERROR,
            )

        return "", HTTPStatus.OK
Пример #22
0
def test_in_prod_true(mock_os):
    mock_os.return_value = "production"
    assert environment.in_gcp()
Пример #23
0
def test_in_prod_false(mock_os):
    mock_os.return_value = "not production"
    assert not environment.in_gcp()
Пример #24
0
def should_persist() -> bool:
    """
    Determines whether objects should be writed to the database in this context.
    """
    return environment.in_gcp() or strtobool(
        (os.environ.get("PERSIST_LOCALLY", "false")))
Пример #25
0
from recidiviz.case_triage.util import get_local_secret, get_rate_limit_storage_uri
from recidiviz.persistence.database.schema_utils import SchemaType
from recidiviz.persistence.database.sqlalchemy_database_key import SQLAlchemyDatabaseKey
from recidiviz.persistence.database.sqlalchemy_engine_manager import (
    SQLAlchemyEngineManager, )
from recidiviz.tools.postgres import local_postgres_helpers
from recidiviz.utils.auth.auth0 import (
    Auth0Config,
    build_auth0_authorization_decorator,
    get_userinfo,
)
from recidiviz.utils.environment import in_development, in_gcp, in_test
from recidiviz.utils.timer import RepeatedTimer

# Sentry setup
if in_gcp():
    sentry_sdk.init(
        # not a secret!
        dsn=
        "https://[email protected]/5623757",
        integrations=[FlaskIntegration()],
        # This value may need to be adjusted over time as usage increases.
        traces_sample_rate=1.0,
    )

# Flask setup
static_folder = os.path.abspath(
    os.path.join(
        os.path.dirname(os.path.realpath(__file__)),
        "../../frontends/case-triage/build/",
    ))
Пример #26
0
    scrape_aggregate_reports_blueprint, url_prefix="/scrape_aggregate_reports"
)
app.register_blueprint(store_single_count_blueprint, url_prefix="/single_count")
app.register_blueprint(cloud_sql_to_bq_blueprint, url_prefix="/cloud_sql_to_bq")
app.register_blueprint(backup_manager_blueprint, url_prefix="/backup_manager")
app.register_blueprint(dataflow_monitor_blueprint, url_prefix="/dataflow_monitor")
app.register_blueprint(validation_manager_blueprint, url_prefix="/validation_manager")
app.register_blueprint(
    calculation_data_storage_manager_blueprint,
    url_prefix="/calculation_data_storage_manager",
)
app.register_blueprint(reporting_endpoint_blueprint, url_prefix="/reporting")
app.register_blueprint(export_blueprint, url_prefix="/export")
app.register_blueprint(justice_counts_control, url_prefix="/justice_counts")

if environment.in_gcp():
    SQLAlchemyEngineManager.init_engines_for_server_postgres_instances()

# Export traces and metrics to stackdriver if running in GCP
if environment.in_gcp():
    monitoring.register_stackdriver_exporter()
    trace_exporter = stackdriver_trace.StackdriverExporter(
        project_id=metadata.project_id(), transport=AsyncTransport
    )
    trace_sampler = trace.CompositeSampler(
        {
            "/direct/process_job": samplers.AlwaysOnSampler(),
            # There are a lot of scraper requests, so they can use the default rate of 1 in 10k.
            "/scraper/": samplers.ProbabilitySampler(),
            "/scrape_aggregate_reports/": samplers.ProbabilitySampler(),
        },
Пример #27
0
 def __init__(self) -> None:
     if in_development():
         with local_project_id_override(GCP_PROJECT_STAGING):
             self._initialize_stores()
     elif in_gcp():
         self._initialize_stores()