def test_populate_data_dbo_LSIR(self): expected = IngestInfo( state_people=[ StatePerson(state_person_id='789C', state_person_external_ids=[ StatePersonExternalId(state_person_external_id_id='789C', id_type=US_PA_PBPP), ], state_assessments=[ StateAssessment(state_assessment_id='789C-0-1', assessment_type='LSIR', assessment_class='RISK', assessment_date='01312001', assessment_score='14'), ]), StatePerson(state_person_id='456B', state_person_external_ids=[ StatePersonExternalId(state_person_external_id_id='456B', id_type=US_PA_PBPP), ], state_assessments=[ StateAssessment(state_assessment_id='456B-1-1', assessment_type='LSIR', assessment_class='RISK', assessment_date='12222005', assessment_score='23'), ]), StatePerson(state_person_id='345E', state_person_external_ids=[ StatePersonExternalId(state_person_external_id_id='345E', id_type=US_PA_PBPP), ], state_assessments=[ StateAssessment(state_assessment_id='345E-3-1', assessment_type='LSIR', assessment_class='RISK', assessment_date='01192006', assessment_score='30'), StateAssessment(state_assessment_id='345E-3-2', assessment_type='LSIR', assessment_class='RISK', assessment_date='08032006', assessment_score='30'), StateAssessment(state_assessment_id='345E-3-3', assessment_type='LSIR', assessment_class='RISK', assessment_date='01152007', assessment_score='31'), StateAssessment(state_assessment_id='345E-4-1', assessment_type='LSIR', assessment_class='RISK', assessment_date='07142007', assessment_score='33'), ]), ]) self.run_parse_file_test(expected, 'dbo_LSIR')
def test_populate_data_dbo_Offender(self): expected = IngestInfo( state_people=[ StatePerson(state_person_id=' 123A ', gender='M ', state_person_external_ids=[ StatePersonExternalId(state_person_external_id_id=' 123A ', id_type=US_PA_PBPP), StatePersonExternalId(state_person_external_id_id='12345678', id_type=US_PA_SID), ], state_person_races=[StatePersonRace(race='B ')], ), StatePerson(state_person_id='456B ', gender='M ', state_person_external_ids=[ StatePersonExternalId(state_person_external_id_id='456B ', id_type=US_PA_PBPP), StatePersonExternalId(state_person_external_id_id='55554444', id_type=US_PA_SID), ], state_person_races=[StatePersonRace(race='I ')], ), StatePerson(state_person_id='789C ', gender=' F ', state_person_external_ids=[ StatePersonExternalId(state_person_external_id_id='789C ', id_type=US_PA_PBPP), ], state_person_races=[StatePersonRace(race='N ')], ), StatePerson(state_person_id='012D ', gender=' F ', state_person_external_ids=[ StatePersonExternalId(state_person_external_id_id='012D ', id_type=US_PA_PBPP), StatePersonExternalId(state_person_external_id_id='99990000', id_type=US_PA_SID), ], state_person_races=[StatePersonRace(race='W ')], ), StatePerson(state_person_id='345E ', gender=' M ', state_person_external_ids=[ StatePersonExternalId(state_person_external_id_id='345E ', id_type=US_PA_PBPP), StatePersonExternalId(state_person_external_id_id='09876543', id_type=US_PA_SID), ], state_person_races=[StatePersonRace(race='W ')], ) ]) self.run_parse_file_test(expected, 'dbo_Offender')
def test_populate_data_dbo_IcsDoc(self): expected = IngestInfo(state_people=[ StatePerson( state_person_id='1', surname='RUSSELL', given_names='BERTRAND', gender='2', birthdate='19760318', current_address='123 Easy Street, PITTSBURGH, PA 16161', state_person_external_ids=[ StatePersonExternalId(state_person_external_id_id='1', id_type=US_PA_DOC), StatePersonExternalId(state_person_external_id_id='123456', id_type=US_PA_CONT), StatePersonExternalId(state_person_external_id_id='AB7413', id_type=US_PA_INMATE), StatePersonExternalId(state_person_external_id_id='123A', id_type=US_PA_PBPP), StatePersonExternalId( state_person_external_id_id='12345678', id_type=US_PA_SID), ], state_person_races=[StatePersonRace(race='2')], state_aliases=[ StateAlias(surname='RUSSELL', given_names='BERTRAND', alias_type='GIVEN_NAME') ]), StatePerson( state_person_id='2', surname='SARTRE', given_names='JEAN-PAUL', gender='2', birthdate='19821002', current_address='555 FLATBUSH DR, NEW YORK, NY 10031', state_person_external_ids=[ StatePersonExternalId(state_person_external_id_id='2', id_type=US_PA_DOC), StatePersonExternalId(state_person_external_id_id='654321', id_type=US_PA_CONT), StatePersonExternalId(state_person_external_id_id='GF3374', id_type=US_PA_INMATE), StatePersonExternalId(state_person_external_id_id='456B', id_type=US_PA_PBPP), StatePersonExternalId( state_person_external_id_id='55554444', id_type=US_PA_SID), ], state_person_races=[StatePersonRace(race='2')], state_aliases=[ StateAlias(surname='SARTRE', given_names='JEAN-PAUL', alias_type='GIVEN_NAME') ]), StatePerson( state_person_id='3', surname='KIERKEGAARD', given_names='SOREN', name_suffix='JR ', gender='1', birthdate='19911120', current_address= '5000 SUNNY LANE, APT. 55D, PHILADELPHIA, PA 19129', state_person_external_ids=[ StatePersonExternalId(state_person_external_id_id='3', id_type=US_PA_DOC), StatePersonExternalId(state_person_external_id_id='445566', id_type=US_PA_CONT), StatePersonExternalId(state_person_external_id_id='CJ1991', id_type=US_PA_INMATE), StatePersonExternalId(state_person_external_id_id='012D', id_type=US_PA_PBPP), StatePersonExternalId( state_person_external_id_id='99990000', id_type=US_PA_SID), ], state_person_races=[StatePersonRace(race='6')], state_aliases=[ StateAlias(surname='KIERKEGAARD', given_names='SOREN', name_suffix='JR ', alias_type='GIVEN_NAME') ]), StatePerson( state_person_id='4', surname='RAWLS', given_names='JOHN', gender='2', birthdate='19890617', current_address='214 HAPPY PLACE, PHILADELPHIA, PA 19129', state_person_external_ids=[ StatePersonExternalId(state_person_external_id_id='4', id_type=US_PA_DOC), StatePersonExternalId(state_person_external_id_id='778899', id_type=US_PA_CONT), StatePersonExternalId(state_person_external_id_id='JE1989', id_type=US_PA_INMATE), StatePersonExternalId(state_person_external_id_id='345E', id_type=US_PA_PBPP), StatePersonExternalId( state_person_external_id_id='09876543', id_type=US_PA_SID), ], state_person_ethnicities=[StatePersonEthnicity(ethnicity='3')], state_aliases=[ StateAlias(surname='RAWLS', given_names='JOHN', alias_type='GIVEN_NAME') ]), ]) self.run_parse_file_test(expected, 'dbo_IcsDoc')
def test_populate_data_dbo_tblInmTestScore(self): expected = IngestInfo( state_people=[ StatePerson(state_person_id='123456', state_person_external_ids=[ StatePersonExternalId(state_person_external_id_id='123456', id_type=US_PA_CONTROL), ], state_assessments=[ StateAssessment(state_assessment_id='123456-1-1', assessment_type='CSS-M ', assessment_class='SOCIAL', assessment_date='6/22/2008 13:20:54', assessment_score='19'), StateAssessment(state_assessment_id='123456-2-1', assessment_type='HIQ ', assessment_class='SOCIAL', assessment_date='7/12/2004 8:23:28', assessment_score='62'), StateAssessment(state_assessment_id='123456-3-3', assessment_type='LSI-R ', assessment_class='RISK', assessment_date='10/3/2010 12:11:41', assessment_score='25'), ]), StatePerson(state_person_id='654321', state_person_external_ids=[ StatePersonExternalId(state_person_external_id_id='654321', id_type=US_PA_CONTROL), ], state_assessments=[ StateAssessment(state_assessment_id='654321-1-1', assessment_type='CSS-M ', assessment_class='SOCIAL', assessment_date='4/1/2003 11:42:17', assessment_score='22'), StateAssessment(state_assessment_id='654321-3-1', assessment_type='LSI-R ', assessment_class='RISK', assessment_date='6/8/2004 11:07:48', assessment_score='19'), StateAssessment(state_assessment_id='654321-4-1', assessment_type='TCU ', assessment_class='SUBSTANCE_ABUSE', assessment_date='1/4/2004 11:09:52', assessment_score='6'), StateAssessment(state_assessment_id='654321-5-1', assessment_type='ST99 ', assessment_class='SEX_OFFENSE', assessment_date='7/5/2004 15:30:59', assessment_score='4'), ]), StatePerson(state_person_id='445566', state_person_external_ids=[ StatePersonExternalId(state_person_external_id_id='445566', id_type=US_PA_CONTROL), ], state_assessments=[ StateAssessment(state_assessment_id='445566-2-1', assessment_type='HIQ ', assessment_class='SOCIAL', assessment_date='7/28/2005 10:33:31', assessment_score='61'), StateAssessment(state_assessment_id='445566-3-2', assessment_type='LSI-R ', assessment_class='RISK', assessment_date='12/19/2016 15:21:56', assessment_score='13'), ]), StatePerson(state_person_id='778899', state_person_external_ids=[ StatePersonExternalId(state_person_external_id_id='778899', id_type=US_PA_CONTROL), ], state_assessments=[ StateAssessment(state_assessment_id='778899-3-3', assessment_type='LSI-R ', assessment_class='RISK', assessment_date='1/6/2017 18:16:56', assessment_score='14'), StateAssessment(state_assessment_id='778899-6-1', assessment_type='RST ', assessment_class='RISK', assessment_date='12/8/2012 15:09:08', assessment_score='9', assessment_metadata=json.dumps({"latest_version": False})), StateAssessment(state_assessment_id='778899-6-2', assessment_type='RST ', assessment_class='RISK', assessment_date='5/11/2018 15:54:06', assessment_score='7', assessment_metadata=json.dumps({"latest_version": True})), ]), ]) self.run_parse_file_test(expected, 'dbo_tblInmTestScore')
def setup_method(self, _): ii = IngestInfo() person = ii.create_person(person_id="test") booking = person.create_booking(booking_id="test") booking.booking_id = "test" self.ii = ii
def parse(self, json_people: Iterable[Dict]) -> IngestInfo: """Uses the JsonDataExtractor to convert JSON data at the person level to IngestInfo objects.""" extractor = JsonDataExtractor(self.yaml_file) bond_extractor = JsonDataExtractor(self.bond_yaml_file) # Group people by person id. Since we're iterating over bookings, not # people, we have to manually merge people's bookings. people: Dict[str, List[Person]] = defaultdict(list) for person_dict in json_people: ii = extractor.extract_and_populate_data(person_dict) person = scraper_utils.one('person', ii) person.place_of_residence = self.get_address(person_dict) # TODO(1802): parse ethnicity in enum overrides if person.race == 'HISPANIC' or person.ethnicity == 'Y': person.race, person.ethnicity = None, 'HISPANIC' else: person.ethnicity = None booking = scraper_utils.one('booking', ii) booking.admission_reason = self.get_admission_reason(person_dict) for hold in person_dict['hold']: jurisdiction_name = hold['holding_for_agency'] if jurisdiction_name == 'Request to Hold': jurisdiction_name = hold['charges'] booking.create_hold(hold_id=hold['pkey'], jurisdiction_name=jurisdiction_name) # Bonds are shared across all charges within a single case for bond_dict in person_dict['bond']: bond = scraper_utils.one( 'bond', bond_extractor.extract_and_populate_data(bond_dict)) case_pk = bond_dict['case_pk'] matching_charges = (c for c in ii.get_all_charges() if c.case_number == case_pk) if matching_charges: for charge in matching_charges: charge.bond = bond else: # Some bonds have no charges associated with their case booking.create_charge(bond=bond) court_type = person_dict['booking']['commiting_authority'] for charge in ii.get_all_charges(): charge.court_type = court_type charge.status = self.get_charge_status(person_dict, charge.charge_id) if charge.degree: logging.info( "Charge degree found, but we don't expect it " "to be filled in: \n%s", ii) if charge.charge_class: logging.info( "Charge class found, but we don't expect it " "to be filled in: \n%s", ii) if charge.number_of_counts: match = re.search(r"([0-9]+) (?:other )?mitts", charge.number_of_counts, re.IGNORECASE) charge.number_of_counts = match.group(1) if match else None for bond in ii.get_all_bonds(lambda b: b.bond_agent): # bond.speccond (stored temporarily in bond.bond_agent) might # have two cash values separated by a slash, indicating a # partial bond. if re.search(r'[0-9]+ */ *[0-9]+', bond.bond_agent): bond.bond_type = BondType.PARTIAL_CASH.value bond.bond_agent = None people[person.person_id].append(person) def merge_bookings(dupes): base = dupes.pop() for p in dupes: base.bookings.extend(p.bookings) return base merged_people = [merge_bookings(dupes) for dupes in people.values()] return IngestInfo(people=merged_people)