def match_and_write_people(session: Session) -> bool: logging.info("Starting entity matching") entity_matching_output = entity_matching.match( session, metadata.region, people) output_people = entity_matching_output.people total_root_entities = total_people \ if metadata.system_level == SystemLevel.COUNTY \ else entity_matching_output.total_root_entities logging.info("Completed entity matching with [%s] errors", entity_matching_output.error_count) logging.info( "Completed entity matching and have [%s] total people " "to commit to DB", len(output_people)) if _should_abort( total_root_entities=total_root_entities, conversion_result=conversion_result, entity_matching_errors=entity_matching_output.error_count, data_validation_errors=data_validation_errors): # TODO(#1665): remove once dangling PERSIST session # investigation is complete. logging.info("_should_abort_ was true after entity matching") return False database.write_people( session, output_people, metadata, orphaned_entities=entity_matching_output.orphaned_entities) logging.info("Successfully wrote to the database") return True
def infer_release_on_open_bookings(region_code: str, last_ingest_time: datetime.datetime, custody_status: CustodyStatus) -> None: """ Look up all open bookings whose last_seen_time is earlier than the provided last_ingest_time in the provided region, update those bookings to have an inferred release date equal to the provided last_ingest_time. Args: region_code: the region_code last_ingest_time: The last time complete data was ingested for this region. In the normal ingest pipeline, this is the last start time of a background scrape for the region. custody_status: The custody status to be marked on the found open bookings. Defaults to INFERRED_RELEASE """ session = SessionFactory.for_schema_base(JailsBase) try: logging.info("Reading all bookings that happened before [%s]", last_ingest_time) people = county_dao.read_people_with_open_bookings_scraped_before_time( session, region_code, last_ingest_time) logging.info( "Found [%s] people with bookings that will be inferred released", len(people), ) for person in people: persistence_utils.remove_pii_for_person(person) _infer_release_date_for_bookings(person.bookings, last_ingest_time, custody_status) db_people = converter.convert_entity_people_to_schema_people(people) database.write_people( session, db_people, IngestMetadata(region=region_code, jurisdiction_id="", ingest_time=last_ingest_time), ) session.commit() except Exception: session.rollback() raise finally: session.close()
def write(ingest_info, metadata): """ If in prod or if 'PERSIST_LOCALLY' is set to true, persist each person in the ingest_info. If a person with the given surname/birthday already exists, then update that person. Otherwise, simply log the given ingest_infos for debugging """ ingest_info_validator.validate(ingest_info) mtags = { monitoring.TagKey.SHOULD_PERSIST: _should_persist(), monitoring.TagKey.PERSISTED: False } total_people = _get_total_people(ingest_info, metadata) with monitoring.measurements(mtags) as measurements: # Convert the people one at a time and count the errors as they happen. conversion_result: IngestInfoConversionResult = \ ingest_info_converter.convert_to_persistence_entities(ingest_info, metadata) people, data_validation_errors = entity_validator.validate( conversion_result.people) logging.info( "Converted [%s] people with [%s] enum_parsing_errors, [%s]" " general_parsing_errors, [%s] protected_class_errors and " "[%s] data_validation_errors", len(people), conversion_result.enum_parsing_errors, conversion_result.general_parsing_errors, conversion_result.protected_class_errors, data_validation_errors) measurements.measure_int_put(m_people, len(people)) if _should_abort(total_root_entities=total_people, conversion_result=conversion_result, data_validation_errors=data_validation_errors): # TODO(#1665): remove once dangling PERSIST session investigation # is complete. logging.info("_should_abort_ was true after converting people") return False if not _should_persist(): return True persisted = False session = SessionFactory.for_schema_base( schema_base_for_system_level(metadata.system_level)) try: logging.info("Starting entity matching") entity_matching_output = entity_matching.match( session, metadata.region, people) people = entity_matching_output.people total_root_entities = total_people \ if metadata.system_level == SystemLevel.COUNTY \ else entity_matching_output.total_root_entities logging.info("Completed entity matching with [%s] errors", entity_matching_output.error_count) logging.info( "Completed entity matching and have [%s] total people " "to commit to DB", len(people)) if _should_abort( total_root_entities=total_root_entities, conversion_result=conversion_result, entity_matching_errors=entity_matching_output.error_count, data_validation_errors=data_validation_errors): # TODO(#1665): remove once dangling PERSIST session # investigation is complete. logging.info("_should_abort_ was true after entity matching") return False database.write_people( session, people, metadata, orphaned_entities=entity_matching_output.orphaned_entities) logging.info("Successfully wrote to the database") session.commit() persisted = True mtags[monitoring.TagKey.PERSISTED] = True except Exception as e: logging.exception("An exception was raised in write(): [%s]", type(e).__name__) # Record the error type that happened and increment the counter mtags[monitoring.TagKey.ERROR] = type(e).__name__ measurements.measure_int_put(m_errors, 1) session.rollback() raise finally: session.close() return persisted
def test_inferReleaseDateOnOpenBookings(self): # Arrange hold = county_entities.Hold.new_with_defaults(hold_id=ID, status=HoldStatus.ACTIVE, status_raw_text='ACTIVE') sentence = county_entities.Sentence.new_with_defaults( sentence_id=ID, status=SentenceStatus.SERVING, status_raw_text='SERVING', booking_id=ID) bond = county_entities.Bond.new_with_defaults( bond_id=ID, status=BondStatus.SET, status_raw_text='NOT_REQUIRED', booking_id=ID) charge = county_entities.Charge.new_with_defaults( charge_id=ID, status=ChargeStatus.PENDING, status_raw_text='PENDING', sentence=sentence, bond=bond) booking_open = county_entities.Booking.new_with_defaults( booking_id=ID, custody_status=CustodyStatus.IN_CUSTODY, custody_status_raw_text='IN CUSTODY', admission_date=DATE, last_seen_time=SCRAPER_START_DATETIME - timedelta(days=1), first_seen_time=SCRAPER_START_DATETIME - timedelta(days=1), charges=[charge], holds=[hold]) booking_resolved = attr.evolve(booking_open, booking_id=ID_2, custody_status=CustodyStatus.RELEASED, custody_status_raw_text='RELEASED', release_date=DATE_2, charges=[], holds=[]) booking_open_most_recent_scrape = attr.evolve( booking_open, booking_id=ID_3, last_seen_time=SCRAPER_START_DATETIME, charges=[], holds=[]) person = county_entities.Person.new_with_defaults( person_id=ID, region=REGION_1, jurisdiction_id=JURISDICTION_ID, bookings=[booking_open, booking_resolved]) person_unmatched = county_entities.Person.new_with_defaults( person_id=ID_2, region=REGION_1, jurisdiction_id=JURISDICTION_ID, bookings=[booking_open_most_recent_scrape]) session = SessionFactory.for_schema_base(JailsBase) database.write_people( session, converter.convert_entity_people_to_schema_people( [person, person_unmatched]), DEFAULT_METADATA) session.commit() session.close() expected_hold = attr.evolve(hold, status=HoldStatus.REMOVED_WITHOUT_INFO, status_raw_text=None) expected_sentence = attr.evolve( sentence, status=SentenceStatus.REMOVED_WITHOUT_INFO, status_raw_text=None) expected_bond = attr.evolve(bond, status=BondStatus.REMOVED_WITHOUT_INFO, status_raw_text=None) expected_charge = attr.evolve(charge, status=ChargeStatus.REMOVED_WITHOUT_INFO, status_raw_text=None, bond=expected_bond, sentence=expected_sentence) expected_resolved_booking = attr.evolve( booking_open, custody_status=CustodyStatus.INFERRED_RELEASE, custody_status_raw_text=None, release_date=SCRAPER_START_DATETIME.date(), release_date_inferred=True, charges=[expected_charge], holds=[expected_hold]) expected_person = attr.evolve( person, bookings=[expected_resolved_booking, booking_resolved]) # Act persistence.infer_release_on_open_bookings( REGION_1, SCRAPER_START_DATETIME, CustodyStatus.INFERRED_RELEASE) # Assert people = county_dao.read_people( SessionFactory.for_schema_base(JailsBase)) self.assertCountEqual(people, [expected_person, person_unmatched])