示例#1
0
        def match_and_write_people(session: Session) -> bool:
            logging.info("Starting entity matching")

            entity_matching_output = entity_matching.match(
                session, metadata.region, people)
            output_people = entity_matching_output.people
            total_root_entities = total_people \
                if metadata.system_level == SystemLevel.COUNTY \
                else entity_matching_output.total_root_entities
            logging.info("Completed entity matching with [%s] errors",
                         entity_matching_output.error_count)
            logging.info(
                "Completed entity matching and have [%s] total people "
                "to commit to DB", len(output_people))
            if _should_abort(
                    total_root_entities=total_root_entities,
                    conversion_result=conversion_result,
                    entity_matching_errors=entity_matching_output.error_count,
                    data_validation_errors=data_validation_errors):
                #  TODO(#1665): remove once dangling PERSIST session
                #   investigation is complete.
                logging.info("_should_abort_ was true after entity matching")
                return False

            database.write_people(
                session,
                output_people,
                metadata,
                orphaned_entities=entity_matching_output.orphaned_entities)
            logging.info("Successfully wrote to the database")
            return True
示例#2
0
def infer_release_on_open_bookings(region_code: str,
                                   last_ingest_time: datetime.datetime,
                                   custody_status: CustodyStatus) -> None:
    """
    Look up all open bookings whose last_seen_time is earlier than the
    provided last_ingest_time in the provided region, update those
    bookings to have an inferred release date equal to the provided
    last_ingest_time.

    Args:
        region_code: the region_code
        last_ingest_time: The last time complete data was ingested for this
            region. In the normal ingest pipeline, this is the last start time
            of a background scrape for the region.
        custody_status: The custody status to be marked on the found open
            bookings. Defaults to INFERRED_RELEASE
    """

    session = SessionFactory.for_schema_base(JailsBase)
    try:
        logging.info("Reading all bookings that happened before [%s]",
                     last_ingest_time)
        people = county_dao.read_people_with_open_bookings_scraped_before_time(
            session, region_code, last_ingest_time)

        logging.info(
            "Found [%s] people with bookings that will be inferred released",
            len(people),
        )
        for person in people:
            persistence_utils.remove_pii_for_person(person)
            _infer_release_date_for_bookings(person.bookings, last_ingest_time,
                                             custody_status)
        db_people = converter.convert_entity_people_to_schema_people(people)
        database.write_people(
            session,
            db_people,
            IngestMetadata(region=region_code,
                           jurisdiction_id="",
                           ingest_time=last_ingest_time),
        )
        session.commit()
    except Exception:
        session.rollback()
        raise
    finally:
        session.close()
示例#3
0
def write(ingest_info, metadata):
    """
    If in prod or if 'PERSIST_LOCALLY' is set to true, persist each person in
    the ingest_info. If a person with the given surname/birthday already exists,
    then update that person.

    Otherwise, simply log the given ingest_infos for debugging
    """
    ingest_info_validator.validate(ingest_info)

    mtags = {
        monitoring.TagKey.SHOULD_PERSIST: _should_persist(),
        monitoring.TagKey.PERSISTED: False
    }
    total_people = _get_total_people(ingest_info, metadata)
    with monitoring.measurements(mtags) as measurements:

        # Convert the people one at a time and count the errors as they happen.
        conversion_result: IngestInfoConversionResult = \
            ingest_info_converter.convert_to_persistence_entities(ingest_info,
                                                                  metadata)

        people, data_validation_errors = entity_validator.validate(
            conversion_result.people)
        logging.info(
            "Converted [%s] people with [%s] enum_parsing_errors, [%s]"
            " general_parsing_errors, [%s] protected_class_errors and "
            "[%s] data_validation_errors", len(people),
            conversion_result.enum_parsing_errors,
            conversion_result.general_parsing_errors,
            conversion_result.protected_class_errors, data_validation_errors)
        measurements.measure_int_put(m_people, len(people))

        if _should_abort(total_root_entities=total_people,
                         conversion_result=conversion_result,
                         data_validation_errors=data_validation_errors):
            #  TODO(#1665): remove once dangling PERSIST session investigation
            #   is complete.
            logging.info("_should_abort_ was true after converting people")
            return False

        if not _should_persist():
            return True

        persisted = False

        session = SessionFactory.for_schema_base(
            schema_base_for_system_level(metadata.system_level))

        try:
            logging.info("Starting entity matching")

            entity_matching_output = entity_matching.match(
                session, metadata.region, people)
            people = entity_matching_output.people
            total_root_entities = total_people \
                if metadata.system_level == SystemLevel.COUNTY \
                else entity_matching_output.total_root_entities
            logging.info("Completed entity matching with [%s] errors",
                         entity_matching_output.error_count)
            logging.info(
                "Completed entity matching and have [%s] total people "
                "to commit to DB", len(people))
            if _should_abort(
                    total_root_entities=total_root_entities,
                    conversion_result=conversion_result,
                    entity_matching_errors=entity_matching_output.error_count,
                    data_validation_errors=data_validation_errors):
                #  TODO(#1665): remove once dangling PERSIST session
                #   investigation is complete.
                logging.info("_should_abort_ was true after entity matching")
                return False

            database.write_people(
                session,
                people,
                metadata,
                orphaned_entities=entity_matching_output.orphaned_entities)
            logging.info("Successfully wrote to the database")
            session.commit()

            persisted = True
            mtags[monitoring.TagKey.PERSISTED] = True
        except Exception as e:
            logging.exception("An exception was raised in write(): [%s]",
                              type(e).__name__)
            # Record the error type that happened and increment the counter
            mtags[monitoring.TagKey.ERROR] = type(e).__name__
            measurements.measure_int_put(m_errors, 1)
            session.rollback()
            raise
        finally:
            session.close()
        return persisted
示例#4
0
    def test_inferReleaseDateOnOpenBookings(self):
        # Arrange
        hold = county_entities.Hold.new_with_defaults(hold_id=ID,
                                                      status=HoldStatus.ACTIVE,
                                                      status_raw_text='ACTIVE')
        sentence = county_entities.Sentence.new_with_defaults(
            sentence_id=ID,
            status=SentenceStatus.SERVING,
            status_raw_text='SERVING',
            booking_id=ID)
        bond = county_entities.Bond.new_with_defaults(
            bond_id=ID,
            status=BondStatus.SET,
            status_raw_text='NOT_REQUIRED',
            booking_id=ID)
        charge = county_entities.Charge.new_with_defaults(
            charge_id=ID,
            status=ChargeStatus.PENDING,
            status_raw_text='PENDING',
            sentence=sentence,
            bond=bond)
        booking_open = county_entities.Booking.new_with_defaults(
            booking_id=ID,
            custody_status=CustodyStatus.IN_CUSTODY,
            custody_status_raw_text='IN CUSTODY',
            admission_date=DATE,
            last_seen_time=SCRAPER_START_DATETIME - timedelta(days=1),
            first_seen_time=SCRAPER_START_DATETIME - timedelta(days=1),
            charges=[charge],
            holds=[hold])
        booking_resolved = attr.evolve(booking_open,
                                       booking_id=ID_2,
                                       custody_status=CustodyStatus.RELEASED,
                                       custody_status_raw_text='RELEASED',
                                       release_date=DATE_2,
                                       charges=[],
                                       holds=[])
        booking_open_most_recent_scrape = attr.evolve(
            booking_open,
            booking_id=ID_3,
            last_seen_time=SCRAPER_START_DATETIME,
            charges=[],
            holds=[])

        person = county_entities.Person.new_with_defaults(
            person_id=ID,
            region=REGION_1,
            jurisdiction_id=JURISDICTION_ID,
            bookings=[booking_open, booking_resolved])
        person_unmatched = county_entities.Person.new_with_defaults(
            person_id=ID_2,
            region=REGION_1,
            jurisdiction_id=JURISDICTION_ID,
            bookings=[booking_open_most_recent_scrape])

        session = SessionFactory.for_schema_base(JailsBase)
        database.write_people(
            session,
            converter.convert_entity_people_to_schema_people(
                [person, person_unmatched]), DEFAULT_METADATA)
        session.commit()
        session.close()

        expected_hold = attr.evolve(hold,
                                    status=HoldStatus.REMOVED_WITHOUT_INFO,
                                    status_raw_text=None)
        expected_sentence = attr.evolve(
            sentence,
            status=SentenceStatus.REMOVED_WITHOUT_INFO,
            status_raw_text=None)
        expected_bond = attr.evolve(bond,
                                    status=BondStatus.REMOVED_WITHOUT_INFO,
                                    status_raw_text=None)
        expected_charge = attr.evolve(charge,
                                      status=ChargeStatus.REMOVED_WITHOUT_INFO,
                                      status_raw_text=None,
                                      bond=expected_bond,
                                      sentence=expected_sentence)
        expected_resolved_booking = attr.evolve(
            booking_open,
            custody_status=CustodyStatus.INFERRED_RELEASE,
            custody_status_raw_text=None,
            release_date=SCRAPER_START_DATETIME.date(),
            release_date_inferred=True,
            charges=[expected_charge],
            holds=[expected_hold])
        expected_person = attr.evolve(
            person, bookings=[expected_resolved_booking, booking_resolved])

        # Act
        persistence.infer_release_on_open_bookings(
            REGION_1, SCRAPER_START_DATETIME, CustodyStatus.INFERRED_RELEASE)

        # Assert
        people = county_dao.read_people(
            SessionFactory.for_schema_base(JailsBase))
        self.assertCountEqual(people, [expected_person, person_unmatched])