def setUp(self) -> None: self.test_app = Flask(__name__) self.helpers = CaseTriageTestHelpers.from_test(self, self.test_app) self.database_key = SQLAlchemyDatabaseKey.for_schema( SchemaType.CASE_TRIAGE) self.overridden_env_vars = ( local_postgres_helpers.update_local_sqlalchemy_postgres_env_vars()) db_url = local_postgres_helpers.postgres_db_url_from_env_vars() engine = setup_scoped_sessions(self.test_app, db_url) # Auto-generate all tables that exist in our schema in this database self.database_key = SQLAlchemyDatabaseKey.for_schema( SchemaType.CASE_TRIAGE) self.database_key.declarative_meta.metadata.create_all(engine) self.demo_clients = get_fixture_clients() self.demo_opportunities = get_fixture_opportunities() self.client_1 = self.demo_clients[0] with self.helpers.using_demo_user(): self.helpers.create_case_update( self.client_1.person_external_id, CaseUpdateActionType.COMPLETED_ASSESSMENT.value, )
def setUp(self) -> None: self.database_key = SQLAlchemyDatabaseKey.canonical_for_schema( SchemaType.STATE) local_postgres_helpers.use_on_disk_postgresql_database( SQLAlchemyDatabaseKey.canonical_for_schema(SchemaType.STATE)) self.state_code = "US_XX"
def _main_database_key(cls) -> "SQLAlchemyDatabaseKey": if cls.schema_type() == SchemaType.STATE: state_code = StateCode(cls.region_code().upper()) return SQLAlchemyDatabaseKey.for_state_code( state_code, cls._main_ingest_instance().database_version( SystemLevel.STATE, state_code=state_code), ) return SQLAlchemyDatabaseKey.for_schema(cls.schema_type())
def ingest_database_key(self) -> SQLAlchemyDatabaseKey: schema_type = self.system_level.schema_type() if schema_type == SchemaType.STATE: state_code = StateCode(self.region_code().upper()) return SQLAlchemyDatabaseKey.for_state_code( state_code, self.ingest_instance.database_version(self.system_level, state_code=state_code), ) return SQLAlchemyDatabaseKey.for_schema(schema_type)
def test_state_legacy_db(self) -> None: db_key_1 = SQLAlchemyDatabaseKey(schema_type=SchemaType.STATE) db_key_1_dup = SQLAlchemyDatabaseKey.canonical_for_schema( schema_type=SchemaType.STATE) self.assertEqual(db_key_1, db_key_1_dup) # TODO(#7984): Once we have cut over all traffic to non-legacy state DBs and # removed the LEGACY database version, remove this part of the test. db_key_legacy = SQLAlchemyDatabaseKey.for_state_code( StateCode.US_AK, SQLAlchemyStateDatabaseVersion.LEGACY) self.assertEqual(db_key_1, db_key_legacy)
def test_canonical_for_schema_local_only(self) -> None: _ = SQLAlchemyDatabaseKey.canonical_for_schema( schema_type=SchemaType.STATE) with patch( "recidiviz.utils.environment.get_gcp_environment", Mock(return_value="production"), ): with self.assertRaises(RuntimeError): _ = SQLAlchemyDatabaseKey.canonical_for_schema( schema_type=SchemaType.STATE) _ = SQLAlchemyDatabaseKey.canonical_for_schema( schema_type=SchemaType.STATE)
def setUp(self) -> None: self.metadata_patcher = mock.patch("recidiviz.utils.metadata.project_id") self.mock_project_id_fn = self.metadata_patcher.start() self.mock_project_id_fn.return_value = "recidiviz-456" self.database_key = SQLAlchemyDatabaseKey.for_schema(SchemaType.JAILS) fakes.use_in_memory_sqlite_database(self.database_key)
def setUp(self) -> None: self.user_1_email = "*****@*****.**" self.mock_instance_id = "mock_instance_id" self.cloud_sql_client_patcher = patch( "recidiviz.cloud_sql.gcs_import_to_cloud_sql.CloudSQLClientImpl") self.mock_cloud_sql_client = MagicMock() self.cloud_sql_client_patcher.start( ).return_value = self.mock_cloud_sql_client self.mock_sqlalchemy_engine_manager = SQLAlchemyEngineManager setattr( self.mock_sqlalchemy_engine_manager, "get_stripped_cloudsql_instance_id", Mock(return_value=self.mock_instance_id), ) self.database_key = SQLAlchemyDatabaseKey.for_schema( SchemaType.CASE_TRIAGE) local_postgres_helpers.use_on_disk_postgresql_database( self.database_key) self.table_name = DashboardUserRestrictions.__tablename__ self.columns = [ col.name for col in DashboardUserRestrictions.__table__.columns ] self.gcs_uri = GcsfsFilePath.from_absolute_path( "US_MO/dashboard_user_restrictions.csv")
def _retrieve_data_for_top_opportunities(state_code: StateCode) -> List[Recipient]: """Fetches list of recipients from the Case Triage backend where we store information about which opportunities are active via the OpportunityPresenter.""" recipients = [] for officer_email in _top_opps_email_recipient_addresses(): mismatches = _get_mismatch_data_for_officer(officer_email) if mismatches is not None: with SessionFactory.using_database( SQLAlchemyDatabaseKey.for_schema(SchemaType.CASE_TRIAGE), autocommit=False, ) as session: officer = CaseTriageQuerier.officer_for_email(session, officer_email) recipients.append( Recipient.from_report_json( { utils.KEY_EMAIL_ADDRESS: officer_email, utils.KEY_STATE_CODE: state_code.value, utils.KEY_DISTRICT: None, OFFICER_GIVEN_NAME: officer.given_names, "mismatches": mismatches, } ) ) return recipients
def setUp(self) -> None: super().setUp() self.database_key = SQLAlchemyDatabaseKey.for_schema( SchemaType.CASE_TRIAGE) local_postgres_helpers.use_on_disk_postgresql_database( self.database_key)
def _fetch_po_user_feedback() -> Tuple[str, HTTPStatus]: with SessionFactory.using_database( SQLAlchemyDatabaseKey.for_schema(SchemaType.CASE_TRIAGE), autocommit=False ) as session: results = ( session.query(CaseUpdate) .filter( CaseUpdate.comment.isnot(None), CaseUpdate.officer_external_id.notlike("demo::%"), ) .all() ) return ( jsonify( [ { "personExternalId": res.person_external_id, "officerExternalId": res.officer_external_id, "actionType": res.action_type, "comment": res.comment, "timestamp": str(res.action_ts), } for res in results ] ), HTTPStatus.OK, )
def setup_scoped_sessions(app: Flask, db_url: URL) -> Engine: engine = SQLAlchemyEngineManager.init_engine_for_postgres_instance( database_key=SQLAlchemyDatabaseKey.for_schema(SchemaType.CASE_TRIAGE), db_url=db_url, ) session_factory = sessionmaker(bind=engine) app.scoped_session = flask_scoped_session(session_factory, app) return engine
def unsegmented_database_key(self) -> SQLAlchemyDatabaseKey: """Returns a key for the database associated with a particular unsegmented schema. Throws for state-segmented schemas. """ if self.is_state_segmented_refresh_schema(): raise ValueError(f"Unexpected schema type [{self.schema_type}]") return SQLAlchemyDatabaseKey.for_schema(self.schema_type)
def test_for_schema_throws_state(self) -> None: with self.assertRaises(ValueError) as e: _ = SQLAlchemyDatabaseKey.for_schema(SchemaType.STATE) self.assertEqual( "Must provide db name information to create a STATE database key.", str(e.exception), )
def _get_database_name_for_state(state_code: StateCode, instance: DirectIngestInstance) -> str: """Returns the database name for the given state and instance""" return SQLAlchemyDatabaseKey.for_state_code( state_code, instance.database_version(SystemLevel.STATE, state_code=state_code), ).db_name
def infer_release_on_open_bookings(region_code: str, last_ingest_time: datetime.datetime, custody_status: CustodyStatus) -> None: """ Look up all open bookings whose last_seen_time is earlier than the provided last_ingest_time in the provided region, update those bookings to have an inferred release date equal to the provided last_ingest_time. Args: region_code: the region_code last_ingest_time: The last time complete data was ingested for this region. In the normal ingest pipeline, this is the last start time of a background scrape for the region. custody_status: The custody status to be marked on the found open bookings. Defaults to INFERRED_RELEASE """ with SessionFactory.using_database( SQLAlchemyDatabaseKey.for_schema(SchemaType.JAILS)) as session: logging.info("Reading all bookings that happened before [%s]", last_ingest_time) people = county_dao.read_people_with_open_bookings_scraped_before_time( session, region_code, last_ingest_time) logging.info( "Found [%s] people with bookings that will be inferred released", len(people), ) for person in people: persistence_utils.remove_pii_for_person(person) _infer_release_date_for_bookings(person.bookings, last_ingest_time, custody_status) db_people = converter.convert_entity_people_to_schema_people(people) database.write_people( session, db_people, IngestMetadata( region=region_code, jurisdiction_id="", ingest_time=last_ingest_time, system_level=SystemLevel.COUNTY, database_key=SQLAlchemyDatabaseKey.for_schema( SchemaType.JAILS), ), )
def _get_all_schema_objects_in_db( self, schema_person_type: SchemaPersonType, schema: ModuleType, schema_object_type_names_to_ignore: List[str], ) -> List[DatabaseEntity]: """Generates a list of all schema objects stored in the database that can be reached from an object with the provided type. Args: schema_person_type: Class type of the root of the schema object graph (e.g. StatePerson). schema: The schema module that root_object_type is defined in. schema_object_type_names_to_ignore: type names for objects defined in the schema that we shouldn't assert are included in the object graph. Returns: A list of all schema objects that can be reached from the object graph rooted at the singular object of type |schema_person_type|. Throws: If more than one object of type |schema_person_type| exists in the DB. """ with SessionFactory.using_database( SQLAlchemyDatabaseKey.canonical_for_schema( schema_type_for_schema_module(schema)), autocommit=False, ) as session: person = one(session.query(schema_person_type).all()) schema_objects: Set[DatabaseEntity] = {person} unprocessed = list([person]) while unprocessed: schema_object = unprocessed.pop() related_entities = [] for (relationship_name ) in schema_object.get_relationship_property_names(): related = getattr(schema_object, relationship_name) # Relationship can return either a list or a single item if isinstance(related, DatabaseEntity): related_entities.append(related) if isinstance(related, list): related_entities.extend(related) for obj in related_entities: if obj not in schema_objects: schema_objects.add(obj) unprocessed.append(obj) self._check_all_non_history_schema_object_types_in_list( list(schema_objects), schema, schema_object_type_names_to_ignore) return list(schema_objects)
def test_for_state_code(self) -> None: primary = SQLAlchemyDatabaseKey.for_state_code( StateCode.US_MN, db_version=SQLAlchemyStateDatabaseVersion.PRIMARY) secondary = SQLAlchemyDatabaseKey.for_state_code( StateCode.US_MN, db_version=SQLAlchemyStateDatabaseVersion.SECONDARY) self.assertEqual( SQLAlchemyDatabaseKey(schema_type=SchemaType.STATE, db_name="us_mn_primary"), primary, ) self.assertEqual( SQLAlchemyDatabaseKey(schema_type=SchemaType.STATE, db_name="us_mn_secondary"), secondary, )
def setUp(self) -> None: self.db_dir = local_postgres_helpers.start_on_disk_postgresql_database( ) self.database_key = SQLAlchemyDatabaseKey.canonical_for_schema( self.schema_type) self.overridden_env_vars = ( local_postgres_helpers.update_local_sqlalchemy_postgres_env_vars()) self.engine = create_engine( local_postgres_helpers.postgres_db_url_from_env_vars())
def setUp(self) -> None: self.operations_key = SQLAlchemyDatabaseKey.for_schema( SchemaType.OPERATIONS) local_postgres_helpers.use_on_disk_postgresql_database( self.operations_key) self.us_xx_manager = DirectIngestInstanceStatusManager.add_instance( "US_XX", DirectIngestInstance.PRIMARY, True) self.us_ww_manager = DirectIngestInstanceStatusManager.add_instance( "US_WW", DirectIngestInstance.PRIMARY, False)
def test_get_all(self, state_codes_fn) -> None: all_keys = SQLAlchemyDatabaseKey.all() expected_all_keys = [ SQLAlchemyDatabaseKey(SchemaType.JAILS, db_name="postgres"), SQLAlchemyDatabaseKey(SchemaType.STATE, db_name="postgres"), SQLAlchemyDatabaseKey(SchemaType.OPERATIONS, db_name="postgres"), SQLAlchemyDatabaseKey(SchemaType.JUSTICE_COUNTS, db_name="postgres"), SQLAlchemyDatabaseKey(SchemaType.CASE_TRIAGE, db_name="postgres"), SQLAlchemyDatabaseKey(SchemaType.STATE, db_name="us_xx_primary"), SQLAlchemyDatabaseKey(SchemaType.STATE, db_name="us_ww_primary"), SQLAlchemyDatabaseKey(SchemaType.STATE, db_name="us_xx_secondary"), SQLAlchemyDatabaseKey(SchemaType.STATE, db_name="us_ww_secondary"), ] self.assertCountEqual(expected_all_keys, all_keys) state_codes_fn.assert_called()
def run_justice_counts_ingest_locally(manifest_file: str, clean_up_db: bool) -> None: tmp_db_dir = local_postgres_helpers.start_on_disk_postgresql_database() local_postgres_helpers.use_on_disk_postgresql_database( SQLAlchemyDatabaseKey.for_schema(SchemaType.JUSTICE_COUNTS)) fs = FakeGCSFileSystem() try: manual_upload.ingest(fs, test_utils.prepare_files(fs, manifest_file)) finally: cleanup_run(tmp_db_dir, clean_up_db)
def setUp(self) -> None: self.project_id = "recidiviz-456" self.region = "us_xx" self.project_id_patcher = patch("recidiviz.utils.metadata.project_id") self.project_id_patcher.start().return_value = self.project_id self.operations_database_key = SQLAlchemyDatabaseKey.for_schema( SchemaType.OPERATIONS) fakes.use_in_memory_sqlite_database(self.operations_database_key) self.us_xx_manager = DirectIngestInstanceStatusManager.add_instance( self.region, DirectIngestInstance.PRIMARY, is_paused=False)
def attempt_init_engines_for_server(cls, schema_types: Set[SchemaType]) -> None: """Attempts to initialize engines for the server for the given schema types. Ignores any connections that fail, so that a single down database does not cause our server to crash.""" for database_key in SQLAlchemyDatabaseKey.all(): if database_key.schema_type in schema_types: try: cls.init_engine(database_key) except BaseException: pass
def add_instance(region_code: str, ingest_instance: DirectIngestInstance, is_paused: bool) -> "DirectIngestInstanceStatusManager": with SessionFactory.using_database( SQLAlchemyDatabaseKey.for_schema( SchemaType.OPERATIONS)) as session: session.add( DirectIngestInstanceStatus( region_code=region_code.upper(), instance=ingest_instance.value, is_paused=is_paused, )) return DirectIngestInstanceStatusManager(region_code, ingest_instance)
def setUp(self) -> None: self.database_key = SQLAlchemyDatabaseKey.canonical_for_schema(SchemaType.STATE) local_postgres_helpers.use_on_disk_postgresql_database(self.database_key) # State persistence ends up having to instantiate the us_nd_controller to # get enum overrides, and the controller goes on to create bigquery, # storage, and tasks clients. self.bq_client_patcher = patch("google.cloud.bigquery.Client") self.storage_client_patcher = patch("google.cloud.storage.Client") self.task_client_patcher = patch("google.cloud.tasks_v2.CloudTasksClient") self.bq_client_patcher.start() self.storage_client_patcher.start() self.task_client_patcher.start()
def for_state( cls, region: str, enum_overrides: Optional[EnumOverrides] = None, ) -> IngestMetadata: return IngestMetadata( region=region, jurisdiction_id="", ingest_time=datetime.datetime(2020, 4, 14, 12, 31, 00), enum_overrides=enum_overrides or EnumOverrides.empty(), system_level=SystemLevel.STATE, database_key=SQLAlchemyDatabaseKey.canonical_for_schema(SchemaType.STATE), )
def database_key_for_segment( self, state_code: StateCode) -> SQLAlchemyDatabaseKey: """Returns a key for the database associated with a particular state segment. Throws for unsegmented schemas. """ if not self.is_state_segmented_refresh_schema(): raise ValueError( f"Only expect state-segmented schemas, found [{self.schema_type}]" ) if self.schema_type == SchemaType.STATE: if not self.direct_ingest_instance: raise ValueError( "Expected DirectIngestInstance to be non-None for STATE schema." ) return SQLAlchemyDatabaseKey.for_state_code( state_code=state_code, db_version=self.direct_ingest_instance.database_version( SystemLevel.STATE, state_code=state_code), ) return SQLAlchemyDatabaseKey.for_schema(self.schema_type)
def persist_to_database( region_code: str, session_start_time: datetime.datetime ) -> bool: """Reads all of the ingest infos from Datastore for a region and persists them to the database. """ region = regions.get_region(region_code) overrides = region.get_scraper_enum_overrides() ingest_info_data_list = _get_batch_ingest_info_list(region_code, session_start_time) logging.info("Received %s total ingest infos", len(ingest_info_data_list)) if ingest_info_data_list: proto, failed_tasks = _get_proto_from_batch_ingest_info_data_list( ingest_info_data_list ) if not proto.people: logging.error("Scrape session returned 0 people.") return False for batch_ingest_info_datum in failed_tasks.values(): logging.error( "Task with trace_id %s failed with error %s", batch_ingest_info_datum.trace_id, batch_ingest_info_datum.error, ) if _should_abort(len(failed_tasks), len(proto.people)): logging.error( "Too many scraper tasks failed(%s), aborting write", len(failed_tasks) ) return False metadata = IngestMetadata( region=region_code, jurisdiction_id=region.jurisdiction_id, ingest_time=session_start_time, facility_id=region.facility_id, enum_overrides=overrides, system_level=SystemLevel.COUNTY, database_key=SQLAlchemyDatabaseKey.for_schema(SchemaType.JAILS), ) did_write = persistence.write(proto, metadata) if did_write: datastore_ingest_info.batch_delete_ingest_infos_for_region(region_code) return did_write logging.error("No ingest infos received from Datastore") return False
def test_key_attributes_case_triage(self) -> None: key = SQLAlchemyDatabaseKey.for_schema(SchemaType.CASE_TRIAGE) self.assertEqual(key.declarative_meta, CaseTriageBase) self.assertTrue(os.path.exists(key.alembic_file)) self.assertTrue( key.alembic_file.endswith("migrations/case_triage_alembic.ini")) self.assertTrue(os.path.exists(key.migrations_location)) self.assertTrue( key.migrations_location.endswith("/migrations/case_triage")) self.assertEqual(key.isolation_level, None)