示例#1
0
    def test_readPeople(self):
        # Arrange
        person = schema.StatePerson(person_id=8,
                                    full_name=_FULL_NAME,
                                    birthdate=_BIRTHDATE)
        person_different_name = schema.StatePerson(person_id=9,
                                                   full_name='diff_name')
        person_different_birthdate = schema.StatePerson(
            person_id=10, birthdate=datetime.date(year=2002, month=1, day=2))
        session = SessionFactory.for_schema_base(StateBase)
        session.add(person)
        session.add(person_different_name)
        session.add(person_different_birthdate)
        session.commit()

        # Act
        people = dao.read_people(session, full_name=None, birthdate=None)

        # Assert
        expected_people = [
            converter.convert_schema_object_to_entity(person),
            converter.convert_schema_object_to_entity(person_different_name),
            converter.convert_schema_object_to_entity(
                person_different_birthdate)
        ]

        self.assertCountEqual(people, expected_people)
示例#2
0
    def test_matchPerson_updateStatusOnOrphanedEntities(self):
        # Arrange
        schema_bond = schema.Bond(
            bond_id=_BOND_ID, status=BondStatus.PENDING.value,
            booking_id=_BOOKING_ID)
        schema_charge = schema.Charge(
            charge_id=_CHARGE_ID, status=ChargeStatus.PENDING.value,
            bond=schema_bond)
        schema_booking = schema.Booking(
            admission_date=_DATE_2, booking_id=_BOOKING_ID,
            custody_status=CustodyStatus.IN_CUSTODY.value, last_seen_time=_DATE,
            first_seen_time=_DATE, charges=[schema_charge])

        schema_person = schema.Person(
            person_id=_PERSON_ID, full_name=_FULL_NAME, birthdate=_DATE,
            jurisdiction_id=_JURISDICTION_ID, region=_REGION,
            bookings=[schema_booking])

        session = SessionFactory.for_schema_base(JailsBase)
        session.add(schema_person)
        session.commit()

        ingested_charge_no_bond = attr.evolve(
            converter.convert_schema_object_to_entity(schema_charge),
            charge_id=None,
            bond=None)
        ingested_booking = attr.evolve(
            converter.convert_schema_object_to_entity(schema_booking),
            booking_id=None,
            custody_status=CustodyStatus.RELEASED,
            charges=[ingested_charge_no_bond])
        ingested_person = attr.evolve(
            converter.convert_schema_object_to_entity(schema_person),
            person_id=None,
            bookings=[ingested_booking])

        # Act
        out = entity_matching.match(session, _REGION, [ingested_person])

        # Assert
        expected_orphaned_bond = attr.evolve(
            converter.convert_schema_object_to_entity(schema_bond),
            status=BondStatus.REMOVED_WITHOUT_INFO)
        expected_charge = attr.evolve(
            ingested_charge_no_bond, charge_id=schema_charge.charge_id)
        expected_booking = attr.evolve(
            ingested_booking, booking_id=schema_booking.booking_id,
            charges=[expected_charge])
        expected_person = attr.evolve(
            ingested_person, person_id=schema_person.person_id,
            bookings=[expected_booking])

        self.assertCountEqual(
            converter.convert_schema_objects_to_entity(out.people),
            [expected_person])
        self.assertCountEqual(
            converter.convert_schema_objects_to_entity(out.orphaned_entities),
            [expected_orphaned_bond])
        self.assertEqual(out.error_count, 0)
示例#3
0
    def test_matchPeople_errorCount(self):
        # Arrange
        schema_booking = schema.Booking(
            external_id=_EXTERNAL_ID, admission_date=_DATE_2,
            booking_id=_BOOKING_ID,
            custody_status=CustodyStatus.IN_CUSTODY.value, last_seen_time=_DATE,
            first_seen_time=_DATE)
        schema_booking_another = copy.deepcopy(schema_booking)
        schema_booking_another.booking_id = _BOOKING_ID_ANOTHER

        schema_person = schema.Person(
            person_id=_PERSON_ID, external_id=_EXTERNAL_ID,
            jurisdiction_id=_JURISDICTION_ID,
            full_name=_FULL_NAME, birthdate=_DATE,
            region=_REGION, bookings=[schema_booking, schema_booking_another])

        schema_person_another = schema.Person(person_id=_PERSON_ID_ANOTHER,
                                              jurisdiction_id=_JURISDICTION_ID,
                                              region=_REGION,
                                              full_name=_NAME_2,
                                              external_id=_EXTERNAL_ID_ANOTHER)

        session = SessionFactory.for_schema_base(JailsBase)
        session.add(schema_person)
        session.add(schema_person_another)
        session.commit()

        ingested_booking = attr.evolve(
            converter.convert_schema_object_to_entity(schema_booking),
            booking_id=None,
            custody_status=CustodyStatus.RELEASED)

        ingested_person = attr.evolve(
            converter.convert_schema_object_to_entity(schema_person),
            person_id=None,
            bookings=[ingested_booking])

        ingested_person_another = attr.evolve(
            converter.convert_schema_object_to_entity(schema_person_another),
            person_id=None
        )

        # Act
        out = entity_matching.match(
            session, _REGION, [ingested_person, ingested_person_another])

        # Assert
        expected_person = attr.evolve(ingested_person_another,
                                      person_id=schema_person_another.person_id)

        self.assertCountEqual(
            converter.convert_schema_objects_to_entity(out.people),
            [expected_person])
        self.assertCountEqual(
            converter.convert_schema_objects_to_entity(out.orphaned_entities),
            [])
        self.assertEqual(out.error_count, 1)
示例#4
0
    def test_readPersonIdsMatchMultiplePeople(self):
        # Arrange
        person1 = schema.StatePerson(person_id=1)
        person1_external_id = schema.StatePersonExternalId(
            person_external_id_id=1,
            external_id=_EXTERNAL_ID,
            id_type=external_id_types.US_ND_SID,
            state_code=_STATE_CODE,
            person=person1,
        )
        person1.external_ids = [person1_external_id]

        person2 = schema.StatePerson(person_id=2)
        person2_external_id = schema.StatePersonExternalId(
            person_external_id_id=2,
            external_id=_EXTERNAL_ID,
            id_type=external_id_types.US_ND_SID,
            state_code=_STATE_CODE,
            person=person2,
        )
        person2.external_ids = [person2_external_id]

        session = SessionFactory.for_schema_base(StateBase)
        session.add(person1)
        session.add(person2)
        session.commit()

        ingested_person = entities.StatePerson.new_with_defaults()

        ingested_person.external_ids = \
            [
                entities.StatePersonExternalId.new_with_defaults(
                    external_id=_EXTERNAL_ID,
                    id_type=external_id_types.US_ND_SID,
                    state_code=_STATE_CODE),
                entities.StatePersonExternalId.new_with_defaults(
                    external_id=_EXTERNAL_ID2,
                    id_type=external_id_types.US_ND_SID,
                    state_code=_STATE_CODE)
            ]

        # Act
        people = dao.read_people_by_external_ids(session, _REGION,
                                                 [ingested_person])

        # Assert
        expected_people = [
            converter.convert_schema_object_to_entity(person1),
            converter.convert_schema_object_to_entity(person2)
        ]

        self.assertCountEqual(people, expected_people)
示例#5
0
def _convert_and_normalize_record_trees(
        people: List[schema.StatePerson],
        populate_back_edges: bool = True) -> List[entities.StatePerson]:
    """Converts schema record trees to persistence layer models and removes
    any duplicate people created by how SQLAlchemy handles joins
    """
    converted_people: List[entities.StatePerson] = []
    count_by_id: Dict[int, int] = defaultdict(lambda: 0)
    for person in people:
        if count_by_id[person.person_id] == 0:
            converted = converter.convert_schema_object_to_entity(
                person, populate_back_edges)
            if not isinstance(converted, entities.StatePerson):
                raise ValueError(
                    f"Unexpected return type [{converted.__class__}]")
            converted_people.append(converted)
        count_by_id[person.person_id] += 1

    duplicates = [(person_id, count)
                  for person_id, count in count_by_id.items() if count > 1]
    if duplicates:
        id_counts = '\n'.join([
            'ID {} with count {}'.format(duplicate[0], duplicate[1])
            for duplicate in duplicates
        ])
        logging.error("Duplicate records returned for person IDs:\n%s",
                      id_counts)

    now = datetime.datetime.now()
    logging.info("Finished _convert_and_normalize_record_trees at time [%s]",
                 now.isoformat())
    return converted_people
示例#6
0
    def test_readPeopleByRootExternalIds_SentenceGroupExternalId(self):
        # Arrange
        person = schema.StatePerson(person_id=1)
        sentence_group = schema.StateSentenceGroup(
            sentence_group_id=1,
            external_id=_EXTERNAL_ID,
            status=StateSentenceStatus.PRESENT_WITHOUT_INFO.value,
            state_code=_STATE_CODE,
            person=person)
        sentence_group_2 = schema.StateSentenceGroup(
            sentence_group_id=2,
            external_id=_EXTERNAL_ID2,
            status=StateSentenceStatus.PRESENT_WITHOUT_INFO.value,
            state_code=_STATE_CODE,
            person=person)
        person.sentence_groups = [sentence_group, sentence_group_2]

        session = SessionFactory.for_schema_base(StateBase)
        session.add(person)
        session.commit()

        # Act
        people = dao.read_people_by_cls_external_ids(
            session, _STATE_CODE, entities.StateSentenceGroup, [_EXTERNAL_ID])

        # Assert
        expected_people = [converter.convert_schema_object_to_entity(person)]

        self.assertCountEqual(people, expected_people)
示例#7
0
    def test_readPeopleByRootExternalIds_entireTreeReturnedWithOneMatch(self):
        # Arrange
        person = schema.StatePerson(person_id=1)
        external_id_match = schema.StatePersonExternalId(
            person_external_id_id=1,
            external_id=_EXTERNAL_ID,
            id_type=external_id_types.US_ND_SID,
            state_code=_STATE_CODE,
            person=person,
        )
        external_id_no_match = schema.StatePersonExternalId(
            person_external_id_id=2,
            external_id=_EXTERNAL_ID,
            id_type=external_id_types.US_ND_SID,
            state_code=_STATE_CODE,
            person=person,
        )
        person.external_ids = [external_id_match, external_id_no_match]

        session = SessionFactory.for_schema_base(StateBase)
        session.add(person)
        session.commit()

        # Act
        people = dao.read_people_by_cls_external_ids(session, _STATE_CODE,
                                                     entities.StatePerson,
                                                     [_EXTERNAL_ID])

        # Assert
        expected_people = [converter.convert_schema_object_to_entity(person)]

        self.assertCountEqual(people, expected_people)
示例#8
0
    def test_readPeopleByExternalId(self):
        admission_date = datetime.datetime(2018, 6, 20)
        release_date = datetime.date(2018, 7, 20)
        closed_booking = Booking(
            custody_status=CustodyStatus.IN_CUSTODY.value,
            admission_date=admission_date,
            release_date=release_date,
            first_seen_time=admission_date,
            last_seen_time=admission_date)

        person_no_match = Person(person_id=1, region=_REGION,
                                 jurisdiction_id=_JURISDICTION_ID,
                                 bookings=[deepcopy(closed_booking)])
        person_match_external_id = Person(person_id=2, region=_REGION,
                                          jurisdiction_id=_JURISDICTION_ID,
                                          bookings=[closed_booking],
                                          external_id=_EXTERNAL_ID)

        session = SessionFactory.for_schema_base(JailsBase)
        session.add(person_no_match)
        session.add(person_match_external_id)
        session.commit()

        ingested_person = entities.Person.new_with_defaults(
            external_id=_EXTERNAL_ID)
        people = dao.read_people_by_external_ids(session, _REGION,
                                                 [ingested_person])

        expected_people = [
            converter.convert_schema_object_to_entity(person_match_external_id)]
        self.assertCountEqual(people, expected_people)
示例#9
0
def _convert_and_normalize_record_trees(people: List[Person]) -> List[entities.Person]:
    """Converts schema record trees to persistence layer models and removes
    any duplicate people created by how SQLAlchemy handles joins
    """
    converted_people: List[entities.Person] = []
    count_by_id: Dict[int, int] = defaultdict(lambda: 0)
    for person in people:
        if count_by_id[person.person_id] == 0:
            converted = converter.convert_schema_object_to_entity(person)
            if not isinstance(converted, entities.Person):
                raise ValueError(f"Unexpected return type [{converted.__class__}]")
            converted_people.append(converted)
        count_by_id[person.person_id] += 1

    duplicates = [
        (person_id, count) for person_id, count in count_by_id.items() if count > 1
    ]
    if duplicates:
        id_counts = "\n".join(
            [
                "ID {} with count {}".format(duplicate[0], duplicate[1])
                for duplicate in duplicates
            ]
        )
        logging.error("Duplicate records returned for person IDs:\n%s", id_counts)

    return converted_people
示例#10
0
    def test_readPeopleWithOpenBookingsBeforeDate(self):
        # Arrange
        person = Person(person_id=8, region=_REGION,
                        jurisdiction_id=_JURISDICTION_ID)
        person_resolved_booking = Person(person_id=9, region=_REGION,
                                         jurisdiction_id=_JURISDICTION_ID)
        person_most_recent_scrape = Person(person_id=10, region=_REGION,
                                           jurisdiction_id=_JURISDICTION_ID)
        person_wrong_region = Person(person_id=11, region=_REGION_ANOTHER,
                                     jurisdiction_id=_JURISDICTION_ID)

        release_date = datetime.date(2018, 7, 20)
        most_recent_scrape_date = datetime.datetime(2018, 6, 20)
        date_in_past = most_recent_scrape_date - datetime.timedelta(days=1)
        first_seen_time = most_recent_scrape_date - datetime.timedelta(days=3)

        # Bookings that should be returned
        open_booking_before_last_scrape = Booking(
            person_id=person.person_id,
            custody_status=CustodyStatus.IN_CUSTODY.value,
            first_seen_time=first_seen_time,
            last_seen_time=date_in_past)

        # Bookings that should not be returned
        open_booking_incorrect_region = Booking(
            person_id=person_wrong_region.person_id,
            custody_status=CustodyStatus.IN_CUSTODY.value,
            first_seen_time=first_seen_time,
            last_seen_time=date_in_past)
        open_booking_most_recent_scrape = Booking(
            person_id=person_most_recent_scrape.person_id,
            custody_status=CustodyStatus.IN_CUSTODY.value,
            first_seen_time=first_seen_time,
            last_seen_time=most_recent_scrape_date)
        resolved_booking = Booking(
            person_id=person_resolved_booking.person_id,
            custody_status=CustodyStatus.RELEASED.value,
            release_date=release_date,
            first_seen_time=first_seen_time,
            last_seen_time=date_in_past)

        session = SessionFactory.for_schema_base(JailsBase)
        session.add(person)
        session.add(person_resolved_booking)
        session.add(person_most_recent_scrape)
        session.add(person_wrong_region)
        session.add(open_booking_before_last_scrape)
        session.add(open_booking_incorrect_region)
        session.add(open_booking_most_recent_scrape)
        session.add(resolved_booking)
        session.commit()

        # Act
        people = dao.read_people_with_open_bookings_scraped_before_time(
            session, person.region, most_recent_scrape_date)

        # Assert
        self.assertEqual(people,
                         [converter.convert_schema_object_to_entity(person)])
    def _ingest_file_schema_metadata_as_entity(
            schema_metadata: schema.DirectIngestIngestFileMetadata) -> DirectIngestIngestFileMetadata:
        entity_metadata = convert_schema_object_to_entity(schema_metadata)

        if not isinstance(entity_metadata, DirectIngestIngestFileMetadata):
            raise ValueError(f'Unexpected metadata entity type: {type(entity_metadata)}')

        return entity_metadata
示例#12
0
    def test_readPeopleWithOpenBookings(self):
        admission_date = datetime.datetime(2018, 6, 20)
        release_date = datetime.date(2018, 7, 20)

        open_booking = Booking(
            custody_status=CustodyStatus.IN_CUSTODY.value,
            admission_date=admission_date,
            first_seen_time=admission_date,
            last_seen_time=admission_date,
        )
        closed_booking = Booking(
            custody_status=CustodyStatus.RELEASED.value,
            admission_date=admission_date,
            release_date=release_date,
            first_seen_time=admission_date,
            last_seen_time=admission_date,
        )

        person_no_match = Person(
            person_id=1,
            region=_REGION,
            jurisdiction_id=_JURISDICTION_ID,
            bookings=[deepcopy(open_booking)],
        )
        person_match_full_name = Person(
            person_id=2,
            region=_REGION,
            jurisdiction_id=_JURISDICTION_ID,
            bookings=[deepcopy(open_booking)],
            full_name=_FULL_NAME,
        )
        person_no_open_bookings = Person(
            person_id=6,
            region=_REGION,
            jurisdiction_id=_JURISDICTION_ID,
            full_name=_FULL_NAME,
            bookings=[closed_booking],
        )

        with SessionFactory.using_database(self.database_key,
                                           autocommit=False) as session:
            session.add(person_no_match)
            session.add(person_no_open_bookings)
            session.add(person_match_full_name)
            session.commit()

            info = IngestInfo()
            info.create_person(full_name=_FULL_NAME, person_id=_EXTERNAL_ID)
            people = dao.read_people_with_open_bookings(
                session, _REGION, info.people)

            expected_people = [
                converter.convert_schema_object_to_entity(p)
                for p in [person_match_full_name]
            ]
            self.assertCountEqual(people, expected_people)
    def test_matchPeople_twoMatchingPeople_PicksMostSimilar(self):
        # Arrange
        schema_person = schema.Person(
            person_id=_PERSON_ID,
            external_id=_EXTERNAL_ID,
            jurisdiction_id=_JURISDICTION_ID,
            full_name=_FULL_NAME,
            birthdate=_DATE,
            region=_REGION,
            gender=Gender.MALE.value,
        )

        schema_person_mismatch = copy.deepcopy(schema_person)
        schema_person_mismatch.person_id = _PERSON_ID_ANOTHER
        schema_person_mismatch.gender = Gender.FEMALE.value

        session = SessionFactory.for_schema_base(JailsBase)
        session.add(schema_person)
        session.add(schema_person_mismatch)
        session.commit()

        ingested_person = attr.evolve(
            converter.convert_schema_object_to_entity(schema_person), person_id=None
        )

        expected_person = attr.evolve(
            ingested_person, person_id=schema_person.person_id
        )

        # Act
        matched_entities = entity_matching.match(session, _REGION, [ingested_person])

        # Assert both schema objects are matches, but we select the most
        # similar one.
        self.assertTrue(
            county_matching_utils.is_person_match(
                db_entity=schema_person, ingested_entity=ingested_person
            )
        )
        self.assertTrue(
            county_matching_utils.is_person_match(
                db_entity=schema_person_mismatch, ingested_entity=ingested_person
            )
        )

        self.assertEqual(matched_entities.error_count, 0)
        self.assertEqual(len(matched_entities.orphaned_entities), 0)
        self.assertEqual(ingested_person, expected_person)
示例#14
0
    def test_readPeople_byFullName(self):
        # Arrange
        person = schema.StatePerson(person_id=8, full_name=_FULL_NAME)
        person_different_name = schema.StatePerson(person_id=9,
                                                   full_name='diff_name')

        session = SessionFactory.for_schema_base(StateBase)
        session.add(person)
        session.add(person_different_name)
        session.commit()

        # Act
        people = dao.read_people(session, full_name=_FULL_NAME, birthdate=None)

        # Assert
        expected_people = [converter.convert_schema_object_to_entity(person)]
        self.assertCountEqual(people, expected_people)
 def to_entity(self, schema_obj):
     return converter.convert_schema_object_to_entity(
         schema_obj, populate_back_edges=False)
 def assert_schema_objects_equal(self, expected: StateBase,
                                 actual: StateBase):
     self.assertEqual(
         converter.convert_schema_object_to_entity(expected),
         converter.convert_schema_object_to_entity(actual),
     )
    def test_matchPeople(self):
        # Arrange
        schema_booking = schema.Booking(
            admission_date=_DATE_2,
            booking_id=_BOOKING_ID,
            custody_status=CustodyStatus.IN_CUSTODY.value,
            last_seen_time=_DATE,
            first_seen_time=_DATE,
        )

        schema_person = schema.Person(
            person_id=_PERSON_ID,
            full_name=_FULL_NAME,
            birthdate=_DATE,
            jurisdiction_id=_JURISDICTION_ID,
            region=_REGION,
            bookings=[schema_booking],
        )

        schema_booking_external_id = schema.Booking(
            admission_date=_DATE_2,
            booking_id=_BOOKING_ID_ANOTHER,
            release_date=_DATE,
            custody_status=CustodyStatus.RELEASED.value,
            last_seen_time=_DATE,
            first_seen_time=_DATE,
        )

        schema_person_external_id = schema.Person(
            person_id=_PERSON_ID_ANOTHER,
            external_id=_EXTERNAL_ID,
            full_name=_FULL_NAME,
            birthdate=_DATE,
            jurisdiction_id=_JURISDICTION_ID,
            region=_REGION,
            bookings=[schema_booking_external_id],
        )

        with SessionFactory.using_database(self.database_key,
                                           autocommit=False) as session:
            session.add(schema_person)
            session.add(schema_person_external_id)
            session.commit()

            ingested_booking = attr.evolve(
                converter.convert_schema_object_to_entity(schema_booking),
                booking_id=None,
                custody_status=CustodyStatus.RELEASED,
            )
            ingested_person = attr.evolve(
                converter.convert_schema_object_to_entity(schema_person),
                person_id=None,
                bookings=[ingested_booking],
            )

            ingested_booking_external_id = attr.evolve(
                converter.convert_schema_object_to_entity(
                    schema_booking_external_id),
                booking_id=None,
                facility=_FACILITY,
            )
            ingested_person_external_id = attr.evolve(
                converter.convert_schema_object_to_entity(
                    schema_person_external_id),
                person_id=None,
                bookings=[ingested_booking_external_id],
            )

        # Act
        out = entity_matching.match(
            session, _REGION, [ingested_person_external_id, ingested_person])

        # Assert
        expected_booking = attr.evolve(ingested_booking,
                                       booking_id=_BOOKING_ID)
        expected_person = attr.evolve(ingested_person,
                                      person_id=_PERSON_ID,
                                      bookings=[expected_booking])

        expected_booking_external_id = attr.evolve(
            ingested_booking_external_id, booking_id=_BOOKING_ID_ANOTHER)
        expected_person_external_id = attr.evolve(
            ingested_person_external_id,
            person_id=_PERSON_ID_ANOTHER,
            bookings=[expected_booking_external_id],
        )
        self.assertCountEqual(
            converter.convert_schema_objects_to_entity(out.people),
            [expected_person_external_id, expected_person],
        )
        self.assertCountEqual(
            converter.convert_schema_objects_to_entity(out.orphaned_entities),
            [])
        self.assertEqual(out.error_count, 0)
示例#18
0
 def to_entity(self, schema_obj: DatabaseEntity) -> Entity:
     return converter.convert_schema_object_to_entity(
         schema_obj, populate_back_edges=False)