def test_populate_data_dbo_LSIR(self):
        expected = IngestInfo(
            state_people=[
                StatePerson(state_person_id='789C',
                            state_person_external_ids=[
                                StatePersonExternalId(state_person_external_id_id='789C', id_type=US_PA_PBPP),
                            ],
                            state_assessments=[
                                StateAssessment(state_assessment_id='789C-0-1',
                                                assessment_type='LSIR',
                                                assessment_class='RISK',
                                                assessment_date='01312001',
                                                assessment_score='14'),
                            ]),
                StatePerson(state_person_id='456B',
                            state_person_external_ids=[
                                StatePersonExternalId(state_person_external_id_id='456B', id_type=US_PA_PBPP),
                            ],
                            state_assessments=[
                                StateAssessment(state_assessment_id='456B-1-1',
                                                assessment_type='LSIR',
                                                assessment_class='RISK',
                                                assessment_date='12222005',
                                                assessment_score='23'),
                            ]),
                StatePerson(state_person_id='345E',
                            state_person_external_ids=[
                                StatePersonExternalId(state_person_external_id_id='345E', id_type=US_PA_PBPP),
                            ],
                            state_assessments=[
                                StateAssessment(state_assessment_id='345E-3-1',
                                                assessment_type='LSIR',
                                                assessment_class='RISK',
                                                assessment_date='01192006',
                                                assessment_score='30'),
                                StateAssessment(state_assessment_id='345E-3-2',
                                                assessment_type='LSIR',
                                                assessment_class='RISK',
                                                assessment_date='08032006',
                                                assessment_score='30'),
                                StateAssessment(state_assessment_id='345E-3-3',
                                                assessment_type='LSIR',
                                                assessment_class='RISK',
                                                assessment_date='01152007',
                                                assessment_score='31'),
                                StateAssessment(state_assessment_id='345E-4-1',
                                                assessment_type='LSIR',
                                                assessment_class='RISK',
                                                assessment_date='07142007',
                                                assessment_score='33'),
                            ]),
            ])

        self.run_parse_file_test(expected, 'dbo_LSIR')
    def test_populate_data_dbo_Offender(self):
        expected = IngestInfo(
            state_people=[
                StatePerson(state_person_id=' 123A ',
                            gender='M       ',
                            state_person_external_ids=[
                                StatePersonExternalId(state_person_external_id_id=' 123A ', id_type=US_PA_PBPP),
                                StatePersonExternalId(state_person_external_id_id='12345678', id_type=US_PA_SID),
                            ],
                            state_person_races=[StatePersonRace(race='B    ')],
                            ),
                StatePerson(state_person_id='456B ',
                            gender='M       ',
                            state_person_external_ids=[
                                StatePersonExternalId(state_person_external_id_id='456B ', id_type=US_PA_PBPP),
                                StatePersonExternalId(state_person_external_id_id='55554444', id_type=US_PA_SID),
                            ],
                            state_person_races=[StatePersonRace(race='I    ')],
                            ),
                StatePerson(state_person_id='789C ',
                            gender='  F      ',
                            state_person_external_ids=[
                                StatePersonExternalId(state_person_external_id_id='789C ', id_type=US_PA_PBPP),
                            ],
                            state_person_races=[StatePersonRace(race='N    ')],
                            ),
                StatePerson(state_person_id='012D ',
                            gender='  F      ',
                            state_person_external_ids=[
                                StatePersonExternalId(state_person_external_id_id='012D ', id_type=US_PA_PBPP),
                                StatePersonExternalId(state_person_external_id_id='99990000', id_type=US_PA_SID),
                            ],
                            state_person_races=[StatePersonRace(race='W    ')],
                            ),
                StatePerson(state_person_id='345E ',
                            gender='  M     ',
                            state_person_external_ids=[
                                StatePersonExternalId(state_person_external_id_id='345E ', id_type=US_PA_PBPP),
                                StatePersonExternalId(state_person_external_id_id='09876543', id_type=US_PA_SID),
                            ],
                            state_person_races=[StatePersonRace(race='W    ')],
                            )
            ])

        self.run_parse_file_test(expected, 'dbo_Offender')
    def test_populate_data_dbo_IcsDoc(self):
        expected = IngestInfo(state_people=[
            StatePerson(
                state_person_id='1',
                surname='RUSSELL',
                given_names='BERTRAND',
                gender='2',
                birthdate='19760318',
                current_address='123 Easy Street, PITTSBURGH, PA 16161',
                state_person_external_ids=[
                    StatePersonExternalId(state_person_external_id_id='1',
                                          id_type=US_PA_DOC),
                    StatePersonExternalId(state_person_external_id_id='123456',
                                          id_type=US_PA_CONT),
                    StatePersonExternalId(state_person_external_id_id='AB7413',
                                          id_type=US_PA_INMATE),
                    StatePersonExternalId(state_person_external_id_id='123A',
                                          id_type=US_PA_PBPP),
                    StatePersonExternalId(
                        state_person_external_id_id='12345678',
                        id_type=US_PA_SID),
                ],
                state_person_races=[StatePersonRace(race='2')],
                state_aliases=[
                    StateAlias(surname='RUSSELL',
                               given_names='BERTRAND',
                               alias_type='GIVEN_NAME')
                ]),
            StatePerson(
                state_person_id='2',
                surname='SARTRE',
                given_names='JEAN-PAUL',
                gender='2',
                birthdate='19821002',
                current_address='555 FLATBUSH DR, NEW YORK, NY 10031',
                state_person_external_ids=[
                    StatePersonExternalId(state_person_external_id_id='2',
                                          id_type=US_PA_DOC),
                    StatePersonExternalId(state_person_external_id_id='654321',
                                          id_type=US_PA_CONT),
                    StatePersonExternalId(state_person_external_id_id='GF3374',
                                          id_type=US_PA_INMATE),
                    StatePersonExternalId(state_person_external_id_id='456B',
                                          id_type=US_PA_PBPP),
                    StatePersonExternalId(
                        state_person_external_id_id='55554444',
                        id_type=US_PA_SID),
                ],
                state_person_races=[StatePersonRace(race='2')],
                state_aliases=[
                    StateAlias(surname='SARTRE',
                               given_names='JEAN-PAUL',
                               alias_type='GIVEN_NAME')
                ]),
            StatePerson(
                state_person_id='3',
                surname='KIERKEGAARD',
                given_names='SOREN',
                name_suffix='JR ',
                gender='1',
                birthdate='19911120',
                current_address=
                '5000 SUNNY LANE, APT. 55D, PHILADELPHIA, PA 19129',
                state_person_external_ids=[
                    StatePersonExternalId(state_person_external_id_id='3',
                                          id_type=US_PA_DOC),
                    StatePersonExternalId(state_person_external_id_id='445566',
                                          id_type=US_PA_CONT),
                    StatePersonExternalId(state_person_external_id_id='CJ1991',
                                          id_type=US_PA_INMATE),
                    StatePersonExternalId(state_person_external_id_id='012D',
                                          id_type=US_PA_PBPP),
                    StatePersonExternalId(
                        state_person_external_id_id='99990000',
                        id_type=US_PA_SID),
                ],
                state_person_races=[StatePersonRace(race='6')],
                state_aliases=[
                    StateAlias(surname='KIERKEGAARD',
                               given_names='SOREN',
                               name_suffix='JR ',
                               alias_type='GIVEN_NAME')
                ]),
            StatePerson(
                state_person_id='4',
                surname='RAWLS',
                given_names='JOHN',
                gender='2',
                birthdate='19890617',
                current_address='214 HAPPY PLACE, PHILADELPHIA, PA 19129',
                state_person_external_ids=[
                    StatePersonExternalId(state_person_external_id_id='4',
                                          id_type=US_PA_DOC),
                    StatePersonExternalId(state_person_external_id_id='778899',
                                          id_type=US_PA_CONT),
                    StatePersonExternalId(state_person_external_id_id='JE1989',
                                          id_type=US_PA_INMATE),
                    StatePersonExternalId(state_person_external_id_id='345E',
                                          id_type=US_PA_PBPP),
                    StatePersonExternalId(
                        state_person_external_id_id='09876543',
                        id_type=US_PA_SID),
                ],
                state_person_ethnicities=[StatePersonEthnicity(ethnicity='3')],
                state_aliases=[
                    StateAlias(surname='RAWLS',
                               given_names='JOHN',
                               alias_type='GIVEN_NAME')
                ]),
        ])

        self.run_parse_file_test(expected, 'dbo_IcsDoc')
    def test_populate_data_dbo_tblInmTestScore(self):
        expected = IngestInfo(
            state_people=[
                StatePerson(state_person_id='123456',
                            state_person_external_ids=[
                                StatePersonExternalId(state_person_external_id_id='123456', id_type=US_PA_CONTROL),
                            ],
                            state_assessments=[
                                StateAssessment(state_assessment_id='123456-1-1',
                                                assessment_type='CSS-M                                             ',
                                                assessment_class='SOCIAL',
                                                assessment_date='6/22/2008 13:20:54',
                                                assessment_score='19'),
                                StateAssessment(state_assessment_id='123456-2-1',
                                                assessment_type='HIQ                                               ',
                                                assessment_class='SOCIAL',
                                                assessment_date='7/12/2004 8:23:28',
                                                assessment_score='62'),
                                StateAssessment(state_assessment_id='123456-3-3',
                                                assessment_type='LSI-R                                             ',
                                                assessment_class='RISK',
                                                assessment_date='10/3/2010 12:11:41',
                                                assessment_score='25'),
                            ]),
                StatePerson(state_person_id='654321',
                            state_person_external_ids=[
                                StatePersonExternalId(state_person_external_id_id='654321', id_type=US_PA_CONTROL),
                            ],
                            state_assessments=[
                                StateAssessment(state_assessment_id='654321-1-1',
                                                assessment_type='CSS-M                                             ',
                                                assessment_class='SOCIAL',
                                                assessment_date='4/1/2003 11:42:17',
                                                assessment_score='22'),
                                StateAssessment(state_assessment_id='654321-3-1',
                                                assessment_type='LSI-R                                             ',
                                                assessment_class='RISK',
                                                assessment_date='6/8/2004 11:07:48',
                                                assessment_score='19'),
                                StateAssessment(state_assessment_id='654321-4-1',
                                                assessment_type='TCU                                               ',
                                                assessment_class='SUBSTANCE_ABUSE',
                                                assessment_date='1/4/2004 11:09:52',
                                                assessment_score='6'),
                                StateAssessment(state_assessment_id='654321-5-1',
                                                assessment_type='ST99                                              ',
                                                assessment_class='SEX_OFFENSE',
                                                assessment_date='7/5/2004 15:30:59',
                                                assessment_score='4'),
                            ]),
                StatePerson(state_person_id='445566',
                            state_person_external_ids=[
                                StatePersonExternalId(state_person_external_id_id='445566', id_type=US_PA_CONTROL),
                            ],
                            state_assessments=[
                                StateAssessment(state_assessment_id='445566-2-1',
                                                assessment_type='HIQ                                               ',
                                                assessment_class='SOCIAL',
                                                assessment_date='7/28/2005 10:33:31',
                                                assessment_score='61'),
                                StateAssessment(state_assessment_id='445566-3-2',
                                                assessment_type='LSI-R                                             ',
                                                assessment_class='RISK',
                                                assessment_date='12/19/2016 15:21:56',
                                                assessment_score='13'),
                            ]),
                StatePerson(state_person_id='778899',
                            state_person_external_ids=[
                                StatePersonExternalId(state_person_external_id_id='778899', id_type=US_PA_CONTROL),
                            ],
                            state_assessments=[
                                StateAssessment(state_assessment_id='778899-3-3',
                                                assessment_type='LSI-R                                             ',
                                                assessment_class='RISK',
                                                assessment_date='1/6/2017 18:16:56',
                                                assessment_score='14'),
                                StateAssessment(state_assessment_id='778899-6-1',
                                                assessment_type='RST                                               ',
                                                assessment_class='RISK',
                                                assessment_date='12/8/2012 15:09:08',
                                                assessment_score='9',
                                                assessment_metadata=json.dumps({"latest_version": False})),
                                StateAssessment(state_assessment_id='778899-6-2',
                                                assessment_type='RST                                               ',
                                                assessment_class='RISK',
                                                assessment_date='5/11/2018 15:54:06',
                                                assessment_score='7',
                                                assessment_metadata=json.dumps({"latest_version": True})),
                            ]),
            ])

        self.run_parse_file_test(expected, 'dbo_tblInmTestScore')
示例#5
0
 def setup_method(self, _):
     ii = IngestInfo()
     person = ii.create_person(person_id="test")
     booking = person.create_booking(booking_id="test")
     booking.booking_id = "test"
     self.ii = ii
示例#6
0
    def parse(self, json_people: Iterable[Dict]) -> IngestInfo:
        """Uses the JsonDataExtractor to convert JSON data at the person level
        to IngestInfo objects."""
        extractor = JsonDataExtractor(self.yaml_file)
        bond_extractor = JsonDataExtractor(self.bond_yaml_file)

        # Group people by person id. Since we're iterating over bookings, not
        # people, we have to manually merge people's bookings.
        people: Dict[str, List[Person]] = defaultdict(list)

        for person_dict in json_people:
            ii = extractor.extract_and_populate_data(person_dict)
            person = scraper_utils.one('person', ii)
            person.place_of_residence = self.get_address(person_dict)
            # TODO(1802): parse ethnicity in enum overrides
            if person.race == 'HISPANIC' or person.ethnicity == 'Y':
                person.race, person.ethnicity = None, 'HISPANIC'
            else:
                person.ethnicity = None

            booking = scraper_utils.one('booking', ii)
            booking.admission_reason = self.get_admission_reason(person_dict)

            for hold in person_dict['hold']:
                jurisdiction_name = hold['holding_for_agency']
                if jurisdiction_name == 'Request to Hold':
                    jurisdiction_name = hold['charges']
                booking.create_hold(hold_id=hold['pkey'],
                                    jurisdiction_name=jurisdiction_name)

            # Bonds are shared across all charges within a single case
            for bond_dict in person_dict['bond']:
                bond = scraper_utils.one(
                    'bond',
                    bond_extractor.extract_and_populate_data(bond_dict))
                case_pk = bond_dict['case_pk']
                matching_charges = (c for c in ii.get_all_charges()
                                    if c.case_number == case_pk)
                if matching_charges:
                    for charge in matching_charges:
                        charge.bond = bond
                else:
                    # Some bonds have no charges associated with their case
                    booking.create_charge(bond=bond)

            court_type = person_dict['booking']['commiting_authority']
            for charge in ii.get_all_charges():
                charge.court_type = court_type
                charge.status = self.get_charge_status(person_dict,
                                                       charge.charge_id)

                if charge.degree:
                    logging.info(
                        "Charge degree found, but we don't expect it "
                        "to be filled in: \n%s", ii)
                if charge.charge_class:
                    logging.info(
                        "Charge class found, but we don't expect it "
                        "to be filled in: \n%s", ii)
                if charge.number_of_counts:
                    match = re.search(r"([0-9]+) (?:other )?mitts",
                                      charge.number_of_counts, re.IGNORECASE)
                    charge.number_of_counts = match.group(1) if match else None

            for bond in ii.get_all_bonds(lambda b: b.bond_agent):
                # bond.speccond (stored temporarily in bond.bond_agent) might
                # have two cash values separated by a slash, indicating a
                # partial bond.
                if re.search(r'[0-9]+ */ *[0-9]+', bond.bond_agent):
                    bond.bond_type = BondType.PARTIAL_CASH.value
                bond.bond_agent = None

            people[person.person_id].append(person)

        def merge_bookings(dupes):
            base = dupes.pop()
            for p in dupes:
                base.bookings.extend(p.bookings)
            return base

        merged_people = [merge_bookings(dupes) for dupes in people.values()]
        return IngestInfo(people=merged_people)