def test_persist_to_db_different_regions(self, mock_write, _mock_region, mock_session_return): scrape_key1 = ScrapeKey(REGIONS[0], constants.ScrapeType.BACKGROUND) scrape_key2 = ScrapeKey(REGIONS[1], constants.ScrapeType.BACKGROUND) ii = ingest_info.IngestInfo() ii.create_person( person_id=TEST_ID, full_name=TEST_NAME).create_booking(booking_id=TEST_ID) ii2 = ingest_info.IngestInfo() ii2.create_person( person_id=TEST_ID, full_name=TEST_NAME2).create_booking(booking_id=TEST_ID) t = Task( task_type=constants.TaskType.SCRAPE_DATA, endpoint=TEST_ENDPOINT, response_type=constants.ResponseType.TEXT, ) t2 = Task( task_type=constants.TaskType.SCRAPE_DATA, endpoint=TEST_ENDPOINT, response_type=constants.ResponseType.TEXT, ) mock_session_1 = mock_session_return.return_value = create_mock_session( ) batch_persistence.write(ii, scrape_key1, t) expected_proto = serialization.convert_ingest_info_to_proto(ii) batch_persistence.persist_to_database(scrape_key1.region_code, mock_session_1.start) result_proto = mock_write.call_args[0][0] self.assertEqual(result_proto, expected_proto) # We expect the region that we persisted to have no more ingest infos. ingest_infos_1 = datastore_ingest_info.batch_get_ingest_infos_for_region( REGIONS[0], mock_session_1.start) self.assertEqual(len(ingest_infos_1), 0) mock_session_2 = mock_session_return.return_value = create_mock_session( ) batch_persistence.write(ii2, scrape_key2, t2) ingest_infos_2 = datastore_ingest_info.batch_get_ingest_infos_for_region( REGIONS[1], mock_session_2.start) self.assertEqual(len(ingest_infos_2), 1) expected_proto = serialization.convert_ingest_info_to_proto(ii2) batch_persistence.persist_to_database(scrape_key2.region_code, mock_session_2.start) result_proto = mock_write.call_args[0][0] self.assertEqual(result_proto, expected_proto) self.assertEqual(mock_write.call_count, 2)
def test_write_to_datastore(self, mock_session_return): mock_session = mock_session_return.return_value = create_mock_session() scrape_key = ScrapeKey(REGIONS[0], constants.ScrapeType.BACKGROUND) ii = ingest_info.IngestInfo() ii.create_person(full_name=TEST_NAME).create_booking( booking_id=TEST_ID) t = Task( task_type=constants.TaskType.SCRAPE_DATA, endpoint=TEST_ENDPOINT, response_type=constants.ResponseType.TEXT, ) task_hash = hash(json.dumps(t.to_serializable(), sort_keys=True)) expected_batch = BatchIngestInfoData(ingest_info=ii, task_hash=task_hash) batch_persistence.write(ii, scrape_key, t) batch_ingest_info_list = batch_persistence._get_batch_ingest_info_list( scrape_key.region_code, mock_session.start) self.assertEqual(len(batch_ingest_info_list), 1) self.assertEqual(expected_batch, batch_ingest_info_list[0])
def test_persist_to_db(self, mock_write, _mock_region, mock_session_return): mock_session = mock_session_return.return_value = create_mock_session() scrape_key = ScrapeKey(REGIONS[0], constants.ScrapeType.BACKGROUND) ii = ingest_info.IngestInfo() ii.create_person( person_id=TEST_ID, full_name=TEST_NAME).create_booking(booking_id=TEST_ID) t = Task( task_type=constants.TaskType.SCRAPE_DATA, endpoint=TEST_ENDPOINT, response_type=constants.ResponseType.TEXT, ) batch_persistence.write(ii, scrape_key, t) expected_proto = serialization.convert_ingest_info_to_proto(ii) batch_persistence.persist_to_database(scrape_key.region_code, mock_session.start) result_proto = mock_write.call_args[0][0] self.assertEqual(result_proto, expected_proto) # After we persist, there should no longer be ingest infos on Datastore ingest_infos = datastore_ingest_info.batch_get_ingest_infos_for_region( REGIONS[0], mock_session.start) self.assertEqual(len(ingest_infos), 0)
def validate_and_return_populate_data(self, content, expected_ingest_info=None, expected_single_counts=None, expected_persist=True, task=None, info=None): """This function runs populate_data and runs some extra validation on the output. Args: content: the content of the page to pass into get_more_tasks expected_ingest_info: the ingest info expected to be returned from `populate_data`. If `expected_ingest_info` is `None`, then expects the return value of `populate_data` to be `None`. expected_single_counts: the list of SingleCounts expected to be returned from `populate_data`. expected_persist: the expected value of persist to be returned from `populate_data`. task: the task that is being processed, optional. info: an ingest_info to use if provided. Returns: The result from populate_data in case the user needs to do any extra validations on the output. """ info = info or ingest_info.IngestInfo() task = task or Task(task_type=constants.TaskType.SCRAPE_DATA, endpoint='') scrape_data = self.scraper.populate_data(content, task, info) print('FINAL') print(scrape_data.ingest_info) print('EXPECTED') print(expected_ingest_info) if expected_ingest_info is None and expected_single_counts is None: if scrape_data: self.assertFalse(scrape_data.persist) else: self.assertIsNone(scrape_data) self.assertCountEqual(scrape_data.single_counts, expected_single_counts or []) metadata = IngestMetadata(self.scraper.region.region_code, self.scraper.region.jurisdiction_id, _FAKE_SCRAPER_START_TIME, self.scraper.get_enum_overrides()) self.validate_ingest(scrape_data.ingest_info, expected_ingest_info, metadata) assert scrape_data.persist == expected_persist return scrape_data
def test_multipleOpenBookings_raisesPersistenceError(self): ingest_info = ii.IngestInfo() person = ingest_info.create_person(full_name=FULL_NAME_1) person.create_booking(admission_date=DATE_RAW) person.create_booking(admission_date=DATE_RAW) self.assertFalse( persistence.write(convert_ingest_info_to_proto(ingest_info), DEFAULT_METADATA))
def test_yaml_is_correct(self) -> None: if self.yaml: with open(self.yaml, "r") as ymlfile: manifest = yaml.full_load(ymlfile) person = ingest_info.IngestInfo().create_person() booking = person.create_booking() charge = booking.create_charge() arrest = booking.create_arrest() sentence = charge.create_sentence() bond = charge.create_bond() object_verification_map = { "person": person, "booking": booking, "charge": charge, "arrest": arrest, "bond": bond, "sentence": sentence, } # Validate that key_mappings exists if "key_mappings" not in manifest: raise AttributeError("key_mappings must exist in the manifest") # Make sure there are no unknown keys for key in manifest: if key not in [ "key_mappings", "multi_key_mapping", "keys_to_ignore", "css_key_mappings", ]: raise AttributeError("Unknown yaml key %s" % key) # Make sure every mapped value in the yaml file exists as a variable # in the relevant class. for value in manifest["key_mappings"].values(): class_to_set, attr = value.split(".") if attr not in vars(object_verification_map[class_to_set]): raise AttributeError( "Attribute %s is unknown on %s, found in key_mappings" % (attr, class_to_set)) if "multi_key_mappings" in manifest: for value in manifest["multi_key_mappings"].values(): class_to_set, attr = value.split(".") if attr not in vars(object_verification_map[class_to_set]): raise AttributeError( "Attribute %s is unknown on %s, found in " "multi_key_mappings" % (attr, class_to_set))
def test_persist_duplicates_to_db(self, mock_write, _mock_region, mock_session_return): """Tests that duplicate ingest_info.Person objects are merged before write.""" mock_session = mock_session_return.return_value = create_mock_session() scrape_key = ScrapeKey(REGIONS[0], constants.ScrapeType.BACKGROUND) # Arrange ii = ingest_info.IngestInfo() ii.create_person( person_id=TEST_ID, full_name=TEST_NAME).create_booking(booking_id=TEST_ID) ii_2 = ingest_info.IngestInfo() ii.create_person(person_id=TEST_ID2, full_name=TEST_NAME2) ii_1_dup = copy.deepcopy(ii) t1, t2, t3 = (Task( task_type=constants.TaskType.SCRAPE_DATA, endpoint=TEST_ENDPOINT + str(i), response_type=constants.ResponseType.TEXT, ) for i in range(3)) batch_persistence.write(ii, scrape_key, t1) batch_persistence.write(ii_2, scrape_key, t2) batch_persistence.write(ii_1_dup, scrape_key, t3) batch_persistence.persist_to_database(scrape_key.region_code, mock_session.start) expected_ii = ingest_info.IngestInfo(people=ii.people + ii_2.people) expected_proto = serialization.convert_ingest_info_to_proto( expected_ii) result_proto = mock_write.call_args[0][0] self.assertEqual(result_proto, expected_proto)
def test_yaml_is_correct(self): if self.yaml: with open(self.yaml, 'r') as ymlfile: manifest = yaml.full_load(ymlfile) person = ingest_info.IngestInfo().create_person() booking = person.create_booking() charge = booking.create_charge() arrest = booking.create_arrest() sentence = charge.create_sentence() bond = charge.create_bond() object_verification_map = { 'person': person, 'booking': booking, 'charge': charge, 'arrest': arrest, 'bond': bond, 'sentence': sentence } # Validate that key_mappings exists if 'key_mappings' not in manifest: raise AttributeError("key_mappings must exist in the manifest") # Make sure there are no unknown keys for key in manifest: if (key not in [ 'key_mappings', 'multi_key_mapping', 'keys_to_ignore', 'css_key_mappings' ]): raise AttributeError("Unknown yaml key %s" % key) # Make sure every mapped value in the yaml file exists as a variable # in the relevant class. for value in manifest['key_mappings'].values(): class_to_set, attr = value.split('.') if attr not in vars(object_verification_map[class_to_set]): raise AttributeError( "Attribute %s is unknown on %s, found in key_mappings" % (attr, class_to_set)) if 'multi_key_mappings' in manifest: for value in manifest['multi_key_mappings'].values(): class_to_set, attr = value.split('.') if attr not in vars(object_verification_map[class_to_set]): raise AttributeError( "Attribute %s is unknown on %s, found in " "multi_key_mappings" % (attr, class_to_set))
def test_convert_ingest_info_one_charge_to_one_bond( self, mock_create: Mock ) -> None: mock_create.side_effect = self._create_generated_id info = ingest_info.IngestInfo() person = info.create_person() person.person_id = "id1" booking = person.create_booking() booking.booking_id = "id1" charge = booking.create_charge() charge.charge_id = "id1" bond1 = charge.create_bond() bond1.amount = "$1" charge = booking.create_charge() charge.charge_id = "id2" bond2 = charge.create_bond() bond2.amount = "$1" expected_proto = ingest_info_pb2.IngestInfo() proto_person = expected_proto.people.add() proto_person.person_id = "id1" proto_person.booking_ids.append("id1") proto_booking = expected_proto.bookings.add() proto_booking.booking_id = "id1" proto_booking.charge_ids.extend(["id1", "id2"]) proto_charge = expected_proto.charges.add() proto_charge.charge_id = "id1" proto_bond1 = expected_proto.bonds.add() proto_bond1.amount = "$1" proto_bond1.bond_id = "1_GENERATE" proto_charge.bond_id = proto_bond1.bond_id proto_charge = expected_proto.charges.add() proto_charge.charge_id = "id2" proto_bond2 = expected_proto.bonds.add() proto_bond2.amount = "$1" proto_bond2.bond_id = "2_GENERATE" proto_charge.bond_id = proto_bond2.bond_id proto = serialization.convert_ingest_info_to_proto(info) assert expected_proto == proto info_back = serialization.convert_proto_to_ingest_info(proto) assert info_back == info
def test_convert_ingest_info_one_charge_to_one_bond(self, mock_create): mock_create.side_effect = self._create_generated_id info = ingest_info.IngestInfo() person = info.create_person() person.person_id = 'id1' booking = person.create_booking() booking.booking_id = 'id1' charge = booking.create_charge() charge.charge_id = 'id1' bond1 = charge.create_bond() bond1.amount = '$1' charge = booking.create_charge() charge.charge_id = 'id2' bond2 = charge.create_bond() bond2.amount = '$1' expected_proto = ingest_info_pb2.IngestInfo() person = expected_proto.people.add() person.person_id = 'id1' person.booking_ids.append('id1') booking = expected_proto.bookings.add() booking.booking_id = 'id1' booking.charge_ids.extend(['id1', 'id2']) charge = expected_proto.charges.add() charge.charge_id = 'id1' proto_bond1 = expected_proto.bonds.add() proto_bond1.amount = '$1' proto_bond1.bond_id = '1_GENERATE' charge.bond_id = proto_bond1.bond_id charge = expected_proto.charges.add() charge.charge_id = 'id2' proto_bond2 = expected_proto.bonds.add() proto_bond2.amount = '$1' proto_bond2.bond_id = '2_GENERATE' charge.bond_id = proto_bond2.bond_id proto = ingest_utils.convert_ingest_info_to_proto(info) assert expected_proto == proto info_back = ingest_utils.convert_proto_to_ingest_info(proto) assert info_back == info
def test_convert_ingest_info_id_is_generated(self, mock_create): mock_create.side_effect = self._create_generated_id info = ingest_info.IngestInfo() person = info.create_person() person.surname = "testname" person.create_booking() expected_proto = ingest_info_pb2.IngestInfo() proto_person = expected_proto.people.add() proto_person.surname = "testname" proto_person.person_id = "1_GENERATE" proto_booking = expected_proto.bookings.add() proto_booking.booking_id = "2_GENERATE" proto_person.booking_ids.append(proto_booking.booking_id) proto = ingest_utils.convert_ingest_info_to_proto(info) assert proto == expected_proto info_back = ingest_utils.convert_proto_to_ingest_info(proto) assert info_back == info
def test_serializable(self): info = ingest_info.IngestInfo() person = info.create_person() person.person_id = 'id1' booking = person.create_booking() booking.booking_id = 'id1' charge = booking.create_charge() charge.charge_id = 'id1' bond1 = charge.create_bond() bond1.amount = '$1' charge = booking.create_charge() charge.charge_id = 'id2' bond2 = charge.create_bond() bond2.amount = '$1' converted_info = ingest_utils.ingest_info_from_serializable( ingest_utils.ingest_info_to_serializable(info)) assert converted_info == info
def test_persist_to_db_same_task_one_fail_one_pass(self, mock_write, _mock_region, mock_session_return): mock_session = mock_session_return.return_value = create_mock_session() scrape_key = ScrapeKey(REGIONS[0], constants.ScrapeType.BACKGROUND) mock_write.return_value = True ii = ingest_info.IngestInfo() ii.create_person( person_id=TEST_ID, full_name=TEST_NAME).create_booking(booking_id=TEST_ID) t = Task( task_type=constants.TaskType.SCRAPE_DATA, endpoint=TEST_ENDPOINT, response_type=constants.ResponseType.TEXT, ) # Because the tasks are the same, we expect that to be counted as a # pass. t2 = Task( task_type=constants.TaskType.SCRAPE_DATA, endpoint=TEST_ENDPOINT, response_type=constants.ResponseType.TEXT, ) batch_persistence.write(ii, scrape_key, t) batch_persistence.write_error(TEST_ERROR, TEST_TRACE, t2, scrape_key) expected_proto = serialization.convert_ingest_info_to_proto(ii) self.assertTrue( batch_persistence.persist_to_database(scrape_key.region_code, mock_session.start)) result_proto = mock_write.call_args[0][0] self.assertEqual(result_proto, expected_proto) ingest_infos = datastore_ingest_info.batch_get_ingest_infos_for_region( REGIONS[0], mock_session.start) self.assertEqual(len(ingest_infos), 0)
def test_serializable(self) -> None: info = ingest_info.IngestInfo() person = info.create_person() person.person_id = "id1" booking = person.create_booking() booking.booking_id = "id1" charge = booking.create_charge() charge.charge_id = "id1" bond1 = charge.create_bond() bond1.amount = "$1" charge = booking.create_charge() charge.charge_id = "id2" bond2 = charge.create_bond() bond2.amount = "$1" converted_info = serialization.ingest_info_from_serializable( serialization.ingest_info_to_serializable(info) ) assert converted_info == info
def test_convert_ingest_info_many_charge_to_one_bond(self, mock_create): mock_create.side_effect = self._create_generated_id info = ingest_info.IngestInfo() person = info.create_person() person.person_id = "id1" booking = person.create_booking() booking.booking_id = "id1" charge = booking.create_charge() charge.charge_id = "id1" bond1 = charge.create_bond() bond1.amount = "$1" charge = booking.create_charge() charge.charge_id = "id2" charge.bond = bond1 expected_proto = ingest_info_pb2.IngestInfo() person = expected_proto.people.add() person.person_id = "id1" person.booking_ids.append("id1") booking = expected_proto.bookings.add() booking.booking_id = "id1" booking.charge_ids.extend(["id1", "id2"]) charge = expected_proto.charges.add() charge.charge_id = "id1" proto_bond = expected_proto.bonds.add() proto_bond.amount = "$1" proto_bond.bond_id = "1_GENERATE" charge.bond_id = proto_bond.bond_id charge = expected_proto.charges.add() charge.charge_id = "id2" charge.bond_id = proto_bond.bond_id proto = ingest_utils.convert_ingest_info_to_proto(info) assert len(proto.bonds) == 1 assert expected_proto == proto info_back = ingest_utils.convert_proto_to_ingest_info(proto) assert info_back == info
def test_persist_to_db_failed_no_write(self, mock_write, _mock_region, mock_session_return): mock_session = mock_session_return.return_value = create_mock_session() scrape_key = ScrapeKey(REGIONS[0], constants.ScrapeType.BACKGROUND) ii = ingest_info.IngestInfo() ii.create_person( person_id=TEST_ID, full_name=TEST_NAME).create_booking(booking_id=TEST_ID) t = Task( task_type=constants.TaskType.SCRAPE_DATA, endpoint=TEST_ENDPOINT, response_type=constants.ResponseType.TEXT, ) # Because the tasks are different, we should fail. t2 = Task( task_type=constants.TaskType.SCRAPE_DATA, endpoint=TEST_ENDPOINT, response_type=constants.ResponseType.TEXT, params=TEST_PARAMS, ) batch_persistence.write(ii, scrape_key, t) batch_persistence.write_error(TEST_ERROR, TEST_TRACE, t2, scrape_key) self.assertFalse( batch_persistence.persist_to_database(scrape_key.region_code, mock_session.start)) self.assertEqual(mock_write.call_count, 0) # We should still have both items still on Datastore because they # weren't persisted. batch_ingest_info_data_list = batch_persistence._get_batch_ingest_info_list( scrape_key.region_code, mock_session.start) self.assertEqual(len(batch_ingest_info_data_list), 2)
def test_convert_ingest_info_id_is_not_generated(self): info = ingest_info.IngestInfo() person = info.create_person() person.person_id = "id1" person.surname = "testname" booking = person.create_booking() booking.booking_id = "id2" booking.admission_date = "testdate" expected_proto = ingest_info_pb2.IngestInfo() person = expected_proto.people.add() person.person_id = "id1" person.surname = "testname" person.booking_ids.append("id2") booking = expected_proto.bookings.add() booking.booking_id = "id2" booking.admission_date = "testdate" proto = ingest_utils.convert_ingest_info_to_proto(info) assert expected_proto == proto info_back = ingest_utils.convert_proto_to_ingest_info(proto) assert info_back == info
def test_convert_ingest_info_state_entities(self): # Arrange Python ingest info info = ingest_info.IngestInfo() person = info.create_state_person() person.state_person_id = 'person1' person.surname = 'testname' race = person.create_state_person_race() race.state_person_race_id = 'race1' race.race = 'white' ethnicity = person.create_state_person_ethnicity() ethnicity.state_person_ethnicity_id = 'ethnicity1' ethnicity.ethnicity = 'non-hispanic' external_id = person.create_state_person_external_id() external_id.state_person_external_id_id = 'external_id1' external_id.id_type = 'contrived' alias = person.create_state_alias() alias.state_alias_id = 'alias1' alias.surname = 'testerson' assessment = person.create_state_assessment() assessment.state_assessment_id = 'assessment1' assessment.assessment_score = '42' supervising_officer = person.create_state_agent() supervising_officer.state_agent_id = 'supervising_officer1' supervising_officer.full_name = 'Officer Supervising' assessment_agent = assessment.create_state_agent() assessment_agent.state_agent_id = 'agent1' assessment_agent.full_name = 'Officer Jones' program_assignment = person.create_state_program_assignment() program_assignment.state_program_assignment_id = 'assignment1' program_assignment.program_id = 'program_id1' program_assignment_agent = program_assignment.create_state_agent() program_assignment_agent.state_agent_id = 'program_agent1' program_assignment_agent.full_name = 'Officer Program' group = person.create_state_sentence_group() group.state_sentence_group_id = 'group1' fine = group.create_state_fine() fine.state_fine_id = 'fine1' incarceration_sentence = group.create_state_incarceration_sentence() incarceration_sentence.state_incarceration_sentence_id = 'is1' early_discharge1 = incarceration_sentence.create_state_early_discharge( ) early_discharge1.state_early_discharge_id = 'early_discharge1' charge1 = incarceration_sentence.create_state_charge() charge1.state_charge_id = 'charge1' charge1.classification_type = 'F' incarceration_period = incarceration_sentence. \ create_state_incarceration_period() incarceration_period.state_incarceration_period_id = 'ip1' incarceration_period.status = 'IN_CUSTODY' incarceration_period.specialized_purpose_for_incarceration = \ 'SHOCK INCARCERATION' incarceration_period.state_program_assignments = [program_assignment] incident = incarceration_period.create_state_incarceration_incident() incident.state_incarceration_incident_id = 'incident1' incident.incident_type = 'FISTICUFFS' incident_outcome = \ incident.create_state_incarceration_incident_outcome() incident_outcome.state_incarceration_incident_outcome_id = 'incident1-1' incident_outcome.outcome_type = 'FINE' incident_agent = incident.create_state_agent() incident_agent.state_agent_id = 'agent2' incident_agent.full_name = 'Officer Thompson' decision = incarceration_period.create_state_parole_decision() decision.state_parole_decision_id = 'decision1' decision_agent = decision.create_state_agent() decision_agent.state_agent_id = 'agent3' decision_agent.full_name = 'Officer Barkley' supervision_sentence = group.create_state_supervision_sentence() supervision_sentence.state_supervision_sentence_id = 'ss1' early_discharge2 = supervision_sentence.create_state_early_discharge() early_discharge2.state_early_discharge_id = 'early_discharge2' charge2 = supervision_sentence.create_state_charge() charge2.state_charge_id = 'charge2' charge2.classification_type = 'M' supervision_period = supervision_sentence. \ create_state_supervision_period() supervision_period.state_supervision_period_id = 'sp1' supervision_period.status = 'TERMINATED' supervision_period_agent = supervision_period.create_state_agent() supervision_period_agent.state_agent_id = 'agentPO' supervision_period_agent.full_name = 'Officer Paroley' supervision_period.state_program_assignments = [program_assignment] supervision_case_type_entry = supervision_period.create_state_supervision_case_type_entry( ) supervision_case_type_entry.case_type = 'case_type' supervision_case_type_entry.state_supervision_case_type_entry_id = 'case_type_entry_id' supervision_contact = supervision_period.create_state_supervision_contact( ) supervision_contact.state_supervision_contact_id = 'supervision_contact_id' supervision_contact.contact_type = 'contact_type' supervision_contacted_agent = supervision_contact.create_state_agent() supervision_contacted_agent.state_agent_id = 'agentPO' supervision_contacted_agent.full_name = 'Officer Paroley' violation = supervision_period.create_state_supervision_violation() violation.state_supervision_violation_id = 'violation1' violation.violated_conditions = 'cond' violation.is_violent = 'false' violation_type = violation.\ create_state_supervision_violation_type_entry() violation_type.state_supervision_violation_type_entry_id =\ 'violation_type_id' violation_type.violation_type = 'FELONY' violated_condition = \ violation.create_state_supervision_violated_condition_entry() violated_condition.state_supervision_violated_condition_entry_id =\ 'condition_id' violated_condition.condition = 'CURFEW' response = violation.create_state_supervision_violation_response() response.state_supervision_violation_response_id = 'response1' response_decision_agent = response.create_state_agent() response_decision_agent.state_agent_id = 'agentTERM' response_decision_agent.full_name = 'Officer Termy' response_decision = response.\ create_state_supervision_violation_response_decision_entry() response_decision.\ state_supervision_violation_response_decision_entry_id =\ 'response_decision_id' response_decision.decision = 'REVOCATION' response_decision.revocation_type = 'REINCARCERATION' bond = charge1.create_state_bond() bond.state_bond_id = 'bond1' court_case = charge2.create_state_court_case() court_case.state_court_case_id = 'case1' court_case_agent = court_case.create_state_agent() court_case_agent.state_agent_id = 'agentJ' court_case_agent.full_name = 'Judge Agent' # Arrange Proto ingest info expected_proto = ingest_info_pb2.IngestInfo() person_pb = expected_proto.state_people.add() person_pb.state_person_id = 'person1' person_pb.surname = 'testname' person_pb.state_person_race_ids.append('race1') race_pb = expected_proto.state_person_races.add() race_pb.state_person_race_id = 'race1' race_pb.race = 'white' person_pb.state_person_ethnicity_ids.append('ethnicity1') ethnicity_pb = expected_proto.state_person_ethnicities.add() ethnicity_pb.state_person_ethnicity_id = 'ethnicity1' ethnicity_pb.ethnicity = 'non-hispanic' person_pb.state_person_external_ids_ids.append( 'contrived:external_id1') external_id_pb = expected_proto.state_person_external_ids.add() external_id_pb.state_person_external_id_id = 'contrived:external_id1' external_id_pb.id_type = 'contrived' person_pb.state_alias_ids.append('alias1') alias_pb = expected_proto.state_aliases.add() alias_pb.state_alias_id = 'alias1' alias_pb.surname = 'testerson' person_pb.state_assessment_ids.append('assessment1') assessment_pb = expected_proto.state_assessments.add() assessment_pb.state_assessment_id = 'assessment1' assessment_pb.assessment_score = '42' person_pb.supervising_officer_id = 'supervising_officer1' supervising_officer_pb = expected_proto.state_agents.add() supervising_officer_pb.state_agent_id = 'supervising_officer1' supervising_officer_pb.full_name = 'Officer Supervising' assessment_pb.conducting_agent_id = 'agent1' assessment_agent_pb = expected_proto.state_agents.add() assessment_agent_pb.state_agent_id = 'agent1' assessment_agent_pb.full_name = 'Officer Jones' person_pb.state_program_assignment_ids.append('assignment1') program_assignment_pb = expected_proto.state_program_assignments.add() program_assignment_pb.state_program_assignment_id = 'assignment1' program_assignment_pb.program_id = 'program_id1' program_assignment_pb.referring_agent_id = 'program_agent1' program_assignment_agent_pb = expected_proto.state_agents.add() program_assignment_agent_pb.state_agent_id = 'program_agent1' program_assignment_agent_pb.full_name = 'Officer Program' person_pb.state_sentence_group_ids.append('group1') group_pb = expected_proto.state_sentence_groups.add() group_pb.state_sentence_group_id = 'group1' group_pb.state_fine_ids.append('fine1') fine_pb = expected_proto.state_fines.add() fine_pb.state_fine_id = 'fine1' group_pb.state_supervision_sentence_ids.append('ss1') supervision_sentence_pb = \ expected_proto.state_supervision_sentences.add() supervision_sentence_pb.state_supervision_sentence_id = 'ss1' supervision_sentence_pb.state_early_discharge_ids.append( 'early_discharge2') early_discharge2_pb = expected_proto.state_early_discharges.add() early_discharge2_pb.state_early_discharge_id = 'early_discharge2' supervision_sentence_pb.state_charge_ids.append('charge2') charge2_pb = expected_proto.state_charges.add() charge2_pb.state_charge_id = 'charge2' charge2_pb.classification_type = 'M' supervision_sentence_pb.state_supervision_period_ids.append('sp1') supervision_period_pb = expected_proto.state_supervision_periods.add() supervision_period_pb.state_supervision_period_id = 'sp1' supervision_period_pb.status = 'TERMINATED' supervision_period_pb.state_program_assignment_ids.append( 'assignment1') # An ordering requirement in the proto equality check at the end of this # test requires that this agent be added after agent1 and before agentPO court_case_agent_pb = expected_proto.state_agents.add() court_case_agent_pb.state_agent_id = 'agentJ' court_case_agent_pb.full_name = 'Judge Agent' supervision_period_pb.supervising_officer_id = 'agentPO' supervision_period_agent_pb = expected_proto.state_agents.add() supervision_period_agent_pb.state_agent_id = 'agentPO' supervision_period_agent_pb.full_name = 'Officer Paroley' supervision_case_type_entry_pb = expected_proto.state_supervision_case_type_entries.add( ) supervision_case_type_entry_pb.state_supervision_case_type_entry_id = 'case_type_entry_id' supervision_case_type_entry_pb.case_type = 'case_type' supervision_period_pb.state_supervision_case_type_entry_ids.append( 'case_type_entry_id') supervision_contact_pb = expected_proto.state_supervision_contacts.add( ) supervision_contact_pb.state_supervision_contact_id = 'supervision_contact_id' supervision_contact_pb.contact_type = 'contact_type' supervision_contact_pb.contacted_agent_id = 'agentPO' supervision_period_pb.state_supervision_contact_ids.append( 'supervision_contact_id') supervision_period_pb.state_supervision_violation_entry_ids.append( 'violation1') violation_pb = expected_proto.state_supervision_violations.add() violation_pb.state_supervision_violation_id = 'violation1' violation_pb.is_violent = 'false' violation_pb.violated_conditions = 'cond' violation_pb.state_supervision_violation_type_entry_ids.append( 'violation_type_id') violation_type_pb = \ expected_proto.state_supervision_violation_type_entries.add() violation_type_pb.state_supervision_violation_type_entry_id = \ 'violation_type_id' violation_type_pb.violation_type = 'FELONY' violation_pb.state_supervision_violated_condition_entry_ids.append( 'condition_id') violation_type_pb = \ expected_proto.state_supervision_violated_condition_entries.add() violation_type_pb.state_supervision_violated_condition_entry_id = \ 'condition_id' violation_type_pb.condition = 'CURFEW' violation_pb.state_supervision_violation_response_ids.append( 'response1') response_pb = expected_proto.state_supervision_violation_responses.add( ) response_pb.state_supervision_violation_response_id = 'response1' response_pb.decision_agent_ids.append('agentTERM') response_decision_agent_pb = expected_proto.state_agents.add() response_decision_agent_pb.state_agent_id = 'agentTERM' response_decision_agent_pb.full_name = 'Officer Termy' response_decision_pb = \ expected_proto.\ state_supervision_violation_response_decision_entries.add() response_decision_pb.\ state_supervision_violation_response_decision_entry_id = \ 'response_decision_id' response_decision_pb.decision = 'REVOCATION' response_decision_pb.revocation_type = 'REINCARCERATION' response_pb.\ state_supervision_violation_response_decision_entry_ids.append( 'response_decision_id' ) group_pb.state_incarceration_sentence_ids.append('is1') incarceration_sentence_pb = \ expected_proto.state_incarceration_sentences.add() incarceration_sentence_pb.state_incarceration_sentence_id = 'is1' incarceration_sentence_pb.state_early_discharge_ids.append( 'early_discharge1') early_discharge1_pb = expected_proto.state_early_discharges.add() early_discharge1_pb.state_early_discharge_id = 'early_discharge1' incarceration_sentence_pb.state_charge_ids.append('charge1') charge1_pb = expected_proto.state_charges.add() charge1_pb.state_charge_id = 'charge1' charge1_pb.classification_type = 'F' incarceration_sentence_pb.state_incarceration_period_ids.append('ip1') incarceration_period_pb = \ expected_proto.state_incarceration_periods.add() incarceration_period_pb.state_incarceration_period_id = 'ip1' incarceration_period_pb.status = 'IN_CUSTODY' incarceration_period_pb.specialized_purpose_for_incarceration = \ 'SHOCK INCARCERATION' incarceration_period_pb.state_incarceration_incident_ids \ .append('incident1') incident_pb = expected_proto.state_incarceration_incidents.add() incident_pb.state_incarceration_incident_id = 'incident1' incident_pb.incident_type = 'FISTICUFFS' incarceration_period_pb.state_program_assignment_ids.append( 'assignment1') incident_pb.responding_officer_id = 'agent2' incident_agent_pb = expected_proto.state_agents.add() incident_agent_pb.state_agent_id = 'agent2' incident_agent_pb.full_name = 'Officer Thompson' incident_pb.state_incarceration_incident_outcome_ids.append( 'incident1-1') incident_outcome_pb = \ expected_proto.state_incarceration_incident_outcomes.add() incident_outcome_pb.state_incarceration_incident_outcome_id = \ 'incident1-1' incident_outcome_pb.outcome_type = 'FINE' incarceration_period_pb.state_parole_decision_ids.append('decision1') decision_pb = expected_proto.state_parole_decisions.add() decision_pb.state_parole_decision_id = 'decision1' decision_pb.decision_agent_ids.append('agent3') decision_agent_pb = expected_proto.state_agents.add() decision_agent_pb.state_agent_id = 'agent3' decision_agent_pb.full_name = 'Officer Barkley' charge1_pb.state_bond_id = 'bond1' bond_pb = expected_proto.state_bonds.add() bond_pb.state_bond_id = 'bond1' charge2_pb.state_court_case_id = 'case1' court_case_pb = expected_proto.state_court_cases.add() court_case_pb.state_court_case_id = 'case1' court_case_pb.judge_id = 'agentJ' expected_info = copy.deepcopy(info) # Act & Assert proto = ingest_utils.convert_ingest_info_to_proto(info) assert expected_proto == proto info_back = ingest_utils.convert_proto_to_ingest_info(proto) assert info_back == expected_info # Assert that none of the proto's collections are empty, i.e. we've # tested all of the object graph proto_classes = [field.name for field in proto.DESCRIPTOR.fields] for cls in proto_classes: if cls.startswith('state_'): assert proto.__getattribute__(cls)
def validate_and_return_populate_data( self, content: Optional[html.HtmlElement], expected_ingest_info: Optional[IngestInfo] = None, expected_single_counts: Optional[List[SingleCount]] = None, expected_persist: bool = True, task: Optional[Task] = None, info: Optional[IngestInfo] = None, ) -> ScrapedData: """This function runs populate_data and runs some extra validation on the output. Args: content: the content of the page to pass into get_more_tasks expected_ingest_info: the ingest info expected to be returned from `populate_data`. If `expected_ingest_info` is `None`, then expects the return value of `populate_data` to be `None`. expected_single_counts: the list of SingleCounts expected to be returned from `populate_data`. expected_persist: the expected value of persist to be returned from `populate_data`. task: the task that is being processed, optional. info: an ingest_info to use if provided. Returns: The result from populate_data in case the user needs to do any extra validations on the output. """ info_to_ingest: IngestInfo = info or ingest_info.IngestInfo() task_to_process: Task = task or Task( task_type=constants.TaskType.SCRAPE_DATA, endpoint="") if self.scraper: scrape_data = self.scraper.populate_data(content, task_to_process, info_to_ingest) print("FINAL") print(scrape_data.ingest_info) print("EXPECTED") print(expected_ingest_info) if expected_ingest_info is None and expected_single_counts is None: if scrape_data: assert scrape_data.persist is False else: assert scrape_data is None if expected_single_counts and scrape_data.single_counts: assert len( scrape_data.single_counts) == len(expected_single_counts) diff = set(expected_single_counts) ^ set( scrape_data.single_counts) assert not diff metadata: IngestMetadata = IngestMetadata( region=self.scraper.region.region_code, jurisdiction_id=self.scraper.region.jurisdiction_id, ingest_time=_FAKE_SCRAPER_START_TIME, enum_overrides=self.scraper.get_enum_overrides(), system_level=SystemLevel.COUNTY, database_key=SQLAlchemyDatabaseKey.for_schema( SchemaType.JAILS), ) if scrape_data.ingest_info and expected_ingest_info: self.validate_ingest(scrape_data.ingest_info, expected_ingest_info, metadata) assert scrape_data.persist == expected_persist if scrape_data: return scrape_data raise ValueError("Scrape data was not provided ingest info")
def convert_proto_to_ingest_info( proto: ingest_info_pb2.IngestInfo) -> ingest_info.IngestInfo: """Populates an `IngestInfo` python object from the given proto """ person_map: Dict[str, ingest_info.Person] = \ dict(_proto_to_py(person, ingest_info.Person, 'person_id') for person in proto.people) booking_map: Dict[str, ingest_info.Booking] = \ dict(_proto_to_py(booking, ingest_info.Booking, 'booking_id') for booking in proto.bookings) charge_map: Dict[str, ingest_info.Charge] = \ dict(_proto_to_py(charge, ingest_info.Charge, 'charge_id') for charge in proto.charges) hold_map: Dict[str, ingest_info.Hold] = \ dict(_proto_to_py(hold, ingest_info.Hold, 'hold_id') for hold in proto.holds) arrest_map: Dict[str, ingest_info.Arrest] = \ dict(_proto_to_py(arrest, ingest_info.Arrest, 'arrest_id') for arrest in proto.arrests) bond_map: Dict[str, ingest_info.Bond] = \ dict(_proto_to_py(bond, ingest_info.Bond, 'bond_id') for bond in proto.bonds) sentence_map: Dict[str, ingest_info.Sentence] = \ dict(_proto_to_py(sentence, ingest_info.Sentence, 'sentence_id') for sentence in proto.sentences) state_person_map: Dict[str, ingest_info.StatePerson] = \ dict(_proto_to_py(state_person, ingest_info.StatePerson, 'state_person_id') for state_person in proto.state_people) state_person_race_map: Dict[str, ingest_info.StatePersonRace] = \ dict(_proto_to_py(race, ingest_info.StatePersonRace, 'state_person_race_id') for race in proto.state_person_races) state_person_ethnicity_map: Dict[str, ingest_info.StatePersonEthnicity] = \ dict(_proto_to_py(ethnicity, ingest_info.StatePersonEthnicity, 'state_person_ethnicity_id') for ethnicity in proto.state_person_ethnicities) state_person_external_id_map: Dict[str, ingest_info.StatePersonExternalId] = \ dict(_proto_to_py(external_id, ingest_info.StatePersonExternalId, 'state_person_external_id_id') for external_id in proto.state_person_external_ids) state_alias_map: Dict[str, ingest_info.StateAlias] = \ dict(_proto_to_py(alias, ingest_info.StateAlias, 'state_alias_id') for alias in proto.state_aliases) state_assessment_map: Dict[str, ingest_info.StateAssessment] = \ dict(_proto_to_py(assessment, ingest_info.StateAssessment, 'state_assessment_id') for assessment in proto.state_assessments) state_sentence_group_map: Dict[str, ingest_info.StateSentenceGroup] = \ dict(_proto_to_py(sentence_group, ingest_info.StateSentenceGroup, 'state_sentence_group_id') for sentence_group in proto.state_sentence_groups) state_supervision_sentence_map: \ Dict[str, ingest_info.StateSupervisionSentence] = \ dict(_proto_to_py(supervision_sentence, ingest_info.StateSupervisionSentence, 'state_supervision_sentence_id') for supervision_sentence in proto.state_supervision_sentences) state_incarceration_sentence_map: \ Dict[str, ingest_info.StateIncarcerationSentence] = \ dict(_proto_to_py(incarceration_sentence, ingest_info.StateIncarcerationSentence, 'state_incarceration_sentence_id') for incarceration_sentence in proto.state_incarceration_sentences) state_fine_map: Dict[str, ingest_info.StateFine] = \ dict(_proto_to_py(fine, ingest_info.StateFine, 'state_fine_id') for fine in proto.state_fines) state_charge_map: Dict[str, ingest_info.StateCharge] = \ dict(_proto_to_py(state_charge, ingest_info.StateCharge, 'state_charge_id') for state_charge in proto.state_charges) state_court_case_map: Dict[str, ingest_info.StateCourtCase] = \ dict(_proto_to_py(court_case, ingest_info.StateCourtCase, 'state_court_case_id') for court_case in proto.state_court_cases) state_bond_map: Dict[str, ingest_info.StateBond] = \ dict(_proto_to_py(state_bond, ingest_info.StateBond, 'state_bond_id') for state_bond in proto.state_bonds) state_incarceration_period_map: \ Dict[str, ingest_info.StateIncarcerationPeriod] \ = dict(_proto_to_py(incarceration_period, ingest_info.StateIncarcerationPeriod, 'state_incarceration_period_id') for incarceration_period in proto.state_incarceration_periods) state_supervision_period_map: \ Dict[str, ingest_info.StateSupervisionPeriod] = \ dict(_proto_to_py(supervision_period, ingest_info.StateSupervisionPeriod, 'state_supervision_period_id') for supervision_period in proto.state_supervision_periods) state_incarceration_incident_map: \ Dict[str, ingest_info.StateIncarcerationIncident] = \ dict(_proto_to_py(incarceration_incident, ingest_info.StateIncarcerationIncident, 'state_incarceration_incident_id') for incarceration_incident in proto.state_incarceration_incidents) state_incarceration_incident_outcome_map: \ Dict[str, ingest_info.StateIncarcerationIncidentOutcome] = \ dict(_proto_to_py(incarceration_incident_outcome, ingest_info.StateIncarcerationIncidentOutcome, 'state_incarceration_incident_outcome_id') for incarceration_incident_outcome in proto.state_incarceration_incident_outcomes) state_parole_decision_map: Dict[str, ingest_info.StateParoleDecision] = \ dict(_proto_to_py(parole_decision, ingest_info.StateParoleDecision, 'state_parole_decision_id') for parole_decision in proto.state_parole_decisions) state_supervision_violation_map: \ Dict[str, ingest_info.StateSupervisionViolation] = \ dict(_proto_to_py(supervision_violation, ingest_info.StateSupervisionViolation, 'state_supervision_violation_id') for supervision_violation in proto.state_supervision_violations) state_supervision_violation_response_map: \ Dict[str, ingest_info.StateSupervisionViolationResponse] = \ dict(_proto_to_py(violation_response, ingest_info.StateSupervisionViolationResponse, 'state_supervision_violation_response_id') for violation_response in proto.state_supervision_violation_responses) state_agent_map: Dict[str, ingest_info.StateAgent] = \ dict(_proto_to_py(agent, ingest_info.StateAgent, 'state_agent_id') for agent in proto.state_agents) # Wire bonds and sentences to respective charges for proto_charge in proto.charges: charge = charge_map[proto_charge.charge_id] if proto_charge.bond_id: charge.bond = bond_map[proto_charge.bond_id] if proto_charge.sentence_id: charge.sentence = sentence_map[proto_charge.sentence_id] # Wire arrests, charges, and holds to respective bookings for proto_booking in proto.bookings: booking = booking_map[proto_booking.booking_id] if proto_booking.arrest_id: booking.arrest = arrest_map[proto_booking.arrest_id] booking.charges = [ charge_map[proto_id] for proto_id in proto_booking.charge_ids ] booking.holds = [ hold_map[proto_id] for proto_id in proto_booking.hold_ids ] # Wire bookings to respective people for proto_person in proto.people: person = person_map[proto_person.person_id] person.bookings = [ booking_map[proto_id] for proto_id in proto_person.booking_ids ] def _wire_sentence_proto(proto_sentence, proto_sentence_id, proto_sentence_map): """Wires up child entities to their respective sentence types.""" sentence = proto_sentence_map[proto_sentence_id] sentence.state_charges = \ [state_charge_map[proto_id] for proto_id in proto_sentence.state_charge_ids] sentence.state_supervision_periods = \ [state_supervision_period_map[proto_id] for proto_id in proto_sentence.state_supervision_period_ids] sentence.state_incarceration_periods = \ [state_incarceration_period_map[proto_id] for proto_id in proto_sentence.state_incarceration_period_ids] # Wire agents to respective parent entities for proto_parole_decision in proto.state_parole_decisions: parole_decision = state_parole_decision_map[ proto_parole_decision.state_parole_decision_id] parole_decision.decision_agents = \ [state_agent_map[proto_id] for proto_id in proto_parole_decision.decision_agent_ids] for proto_court_case in proto.state_court_cases: state_court_case = state_court_case_map[ proto_court_case.state_court_case_id] if proto_court_case.judge_id: state_court_case.judge = state_agent_map[proto_court_case.judge_id] for proto_incident in proto.state_incarceration_incidents: incarceration_incident = state_incarceration_incident_map[ proto_incident.state_incarceration_incident_id] if proto_incident.responding_officer_id: incarceration_incident.responding_officer = \ state_agent_map[proto_incident.responding_officer_id] incarceration_incident_outcomes = [] for proto_incident_outcome in \ proto.state_incarceration_incident_outcomes: incarceration_incident_outcomes.append( state_incarceration_incident_outcome_map[ proto_incident_outcome. state_incarceration_incident_outcome_id]) incarceration_incident.state_incarceration_incident_outcomes = \ incarceration_incident_outcomes for proto_assessment in proto.state_assessments: assessment = state_assessment_map[proto_assessment.state_assessment_id] if proto_assessment.conducting_agent_id: assessment.conducting_agent = \ state_agent_map[proto_assessment.conducting_agent_id] # Wire child entities to respective violations for proto_supervision_violation in proto.state_supervision_violations: supervision_violation = state_supervision_violation_map[ proto_supervision_violation.state_supervision_violation_id] supervision_violation.state_supervision_violation_responses = \ [state_supervision_violation_response_map[proto_id] for proto_id in proto_supervision_violation. state_supervision_violation_response_ids] # Wire child entities to respective supervision periods for proto_supervision_period in proto.state_supervision_periods: supervision_period = state_supervision_period_map[ proto_supervision_period.state_supervision_period_id] supervision_period.state_supervision_violations = \ [state_supervision_violation_map[proto_id] for proto_id in proto_supervision_period.state_supervision_violation_ids] supervision_period.state_assessments = \ [state_assessment_map[proto_id] for proto_id in proto_supervision_period.state_assessment_ids] # Wire child entities to respective incarceration periods for proto_incarceration_period in proto.state_incarceration_periods: incarceration_period = state_incarceration_period_map[ proto_incarceration_period.state_incarceration_period_id] incarceration_period.state_incarceration_incidents = \ [state_incarceration_incident_map[proto_id] for proto_id in proto_incarceration_period.state_incarceration_incident_ids] incarceration_period.state_parole_decisions = \ [state_parole_decision_map[proto_id] for proto_id in proto_incarceration_period.state_parole_decision_ids] incarceration_period.state_assessments = \ [state_assessment_map[proto_id] for proto_id in proto_incarceration_period.state_assessment_ids] # Wire child entities to respective incarceration incidents for proto_incarceration_incident in proto.state_incarceration_incidents: incarceration_incident = state_incarceration_incident_map[ proto_incarceration_incident.state_incarceration_incident_id] incarceration_incident.state_incarceration_incident_outcomes = \ [state_incarceration_incident_outcome_map[proto_id] for proto_id in proto_incarceration_incident. state_incarceration_incident_outcome_ids] # Wire court cases and state bonds to respective state charges for proto_state_charge in proto.state_charges: state_charge = state_charge_map[proto_state_charge.state_charge_id] if proto_state_charge.state_court_case_id: state_charge.state_court_case = state_court_case_map[ proto_state_charge.state_court_case_id] if proto_state_charge.state_bond_id: state_charge.state_bond = state_bond_map[ proto_state_charge.state_bond_id] # Wire all state charges and period types to respective sentence types for proto_fine in proto.state_fines: fine = state_fine_map[proto_fine.state_fine_id] fine.state_charges = [ state_charge_map[proto_id] for proto_id in proto_fine.state_charge_ids ] for proto_incarceration_sentence in proto.state_incarceration_sentences: _wire_sentence_proto( proto_incarceration_sentence, proto_incarceration_sentence.state_incarceration_sentence_id, state_incarceration_sentence_map) for proto_supervision_sentence in proto.state_supervision_sentences: _wire_sentence_proto( proto_supervision_sentence, proto_supervision_sentence.state_supervision_sentence_id, state_supervision_sentence_map) # Wire all sentence types to respective sentence groups for proto_sentence_group in proto.state_sentence_groups: sentence_group = state_sentence_group_map[ proto_sentence_group.state_sentence_group_id] sentence_group.state_fines = [ state_fine_map[proto_id] for proto_id in proto_sentence_group.state_fine_ids ] sentence_group.state_incarceration_sentences = \ [state_incarceration_sentence_map[proto_id] for proto_id in proto_sentence_group.state_incarceration_sentence_ids] sentence_group.state_supervision_sentences = \ [state_supervision_sentence_map[proto_id] for proto_id in proto_sentence_group.state_supervision_sentence_ids] # Wire child entities to respective state people for proto_state_person in proto.state_people: state_person = state_person_map[proto_state_person.state_person_id] state_person.state_person_races = \ [state_person_race_map[proto_id] for proto_id in proto_state_person.state_person_race_ids] state_person.state_person_ethnicities = \ [state_person_ethnicity_map[proto_id] for proto_id in proto_state_person.state_person_ethnicity_ids] state_person.state_person_external_ids = \ [state_person_external_id_map[proto_id] for proto_id in proto_state_person.state_person_external_ids_ids] state_person.state_aliases = \ [state_alias_map[proto_id] for proto_id in proto_state_person.state_alias_ids] state_person.state_assessments = \ [state_assessment_map[proto_id] for proto_id in proto_state_person.state_assessment_ids] state_person.state_sentence_groups = \ [state_sentence_group_map[proto_id] for proto_id in proto_state_person.state_sentence_group_ids] def _process_external_ids(ii: ingest_info.IngestInfo) -> None: # Undo preprocessing on external_ids performed when converting from # py -> proto for p in ii.state_people: for ex_id in p.state_person_external_ids: existing_id = cast(str, ex_id.state_person_external_id_id) ex_id.state_person_external_id_id = get_external_id( synthetic_id=existing_id) # Wire people to ingest info ingest_info_py = ingest_info.IngestInfo() ingest_info_py.people.extend(person_map.values()) ingest_info_py.state_people.extend(state_person_map.values()) _process_external_ids(ingest_info_py) return ingest_info_py
def test_convert_ingest_info_state_entities(self): # Arrange Python ingest info info = ingest_info.IngestInfo() person = info.create_state_person() person.state_person_id = "person1" person.surname = "testname" race = person.create_state_person_race() race.state_person_race_id = "race1" race.race = "white" ethnicity = person.create_state_person_ethnicity() ethnicity.state_person_ethnicity_id = "ethnicity1" ethnicity.ethnicity = "non-hispanic" external_id = person.create_state_person_external_id() external_id.state_person_external_id_id = "external_id1" external_id.id_type = "contrived" alias = person.create_state_alias() alias.state_alias_id = "alias1" alias.surname = "testerson" assessment = person.create_state_assessment() assessment.state_assessment_id = "assessment1" assessment.assessment_score = "42" supervising_officer = person.create_state_agent() supervising_officer.state_agent_id = "supervising_officer1" supervising_officer.full_name = "Officer Supervising" assessment_agent = assessment.create_state_agent() assessment_agent.state_agent_id = "agent1" assessment_agent.full_name = "Officer Jones" program_assignment = person.create_state_program_assignment() program_assignment.state_program_assignment_id = "assignment1" program_assignment.program_id = "program_id1" program_assignment_agent = program_assignment.create_state_agent() program_assignment_agent.state_agent_id = "program_agent1" program_assignment_agent.full_name = "Officer Program" group = person.create_state_sentence_group() group.state_sentence_group_id = "group1" fine = group.create_state_fine() fine.state_fine_id = "fine1" incarceration_sentence = group.create_state_incarceration_sentence() incarceration_sentence.state_incarceration_sentence_id = "is1" early_discharge1 = incarceration_sentence.create_state_early_discharge() early_discharge1.state_early_discharge_id = "early_discharge1" charge1 = incarceration_sentence.create_state_charge() charge1.state_charge_id = "charge1" charge1.classification_type = "F" incarceration_period = ( incarceration_sentence.create_state_incarceration_period() ) incarceration_period.state_incarceration_period_id = "ip1" incarceration_period.status = "IN_CUSTODY" incarceration_period.specialized_purpose_for_incarceration = ( "SHOCK INCARCERATION" ) incarceration_period.state_program_assignments = [program_assignment] incident = incarceration_period.create_state_incarceration_incident() incident.state_incarceration_incident_id = "incident1" incident.incident_type = "FISTICUFFS" incident_outcome = incident.create_state_incarceration_incident_outcome() incident_outcome.state_incarceration_incident_outcome_id = "incident1-1" incident_outcome.outcome_type = "FINE" incident_agent = incident.create_state_agent() incident_agent.state_agent_id = "agent2" incident_agent.full_name = "Officer Thompson" decision = incarceration_period.create_state_parole_decision() decision.state_parole_decision_id = "decision1" decision_agent = decision.create_state_agent() decision_agent.state_agent_id = "agent3" decision_agent.full_name = "Officer Barkley" supervision_sentence = group.create_state_supervision_sentence() supervision_sentence.state_supervision_sentence_id = "ss1" early_discharge2 = supervision_sentence.create_state_early_discharge() early_discharge2.state_early_discharge_id = "early_discharge2" charge2 = supervision_sentence.create_state_charge() charge2.state_charge_id = "charge2" charge2.classification_type = "M" supervision_period = supervision_sentence.create_state_supervision_period() supervision_period.state_supervision_period_id = "sp1" supervision_period.status = "TERMINATED" supervision_period_agent = supervision_period.create_state_agent() supervision_period_agent.state_agent_id = "agentPO" supervision_period_agent.full_name = "Officer Paroley" supervision_period.state_program_assignments = [program_assignment] supervision_case_type_entry = ( supervision_period.create_state_supervision_case_type_entry() ) supervision_case_type_entry.case_type = "case_type" supervision_case_type_entry.state_supervision_case_type_entry_id = ( "case_type_entry_id" ) supervision_contact = supervision_period.create_state_supervision_contact() supervision_contact.state_supervision_contact_id = "supervision_contact_id" supervision_contact.contact_type = "contact_type" supervision_contacted_agent = supervision_contact.create_state_agent() supervision_contacted_agent.state_agent_id = "agentPO" supervision_contacted_agent.full_name = "Officer Paroley" violation = supervision_period.create_state_supervision_violation() violation.state_supervision_violation_id = "violation1" violation.violated_conditions = "cond" violation.is_violent = "false" violation_type = violation.create_state_supervision_violation_type_entry() violation_type.state_supervision_violation_type_entry_id = "violation_type_id" violation_type.violation_type = "FELONY" violated_condition = ( violation.create_state_supervision_violated_condition_entry() ) violated_condition.state_supervision_violated_condition_entry_id = ( "condition_id" ) violated_condition.condition = "CURFEW" response = violation.create_state_supervision_violation_response() response.state_supervision_violation_response_id = "response1" response_decision_agent = response.create_state_agent() response_decision_agent.state_agent_id = "agentTERM" response_decision_agent.full_name = "Officer Termy" response_decision = ( response.create_state_supervision_violation_response_decision_entry() ) response_decision.state_supervision_violation_response_decision_entry_id = ( "response_decision_id" ) response_decision.decision = "REVOCATION" response_decision.revocation_type = "REINCARCERATION" bond = charge1.create_state_bond() bond.state_bond_id = "bond1" court_case = charge2.create_state_court_case() court_case.state_court_case_id = "case1" court_case_agent = court_case.create_state_agent() court_case_agent.state_agent_id = "agentJ" court_case_agent.full_name = "Judge Agent" # Arrange Proto ingest info expected_proto = ingest_info_pb2.IngestInfo() person_pb = expected_proto.state_people.add() person_pb.state_person_id = "person1" person_pb.surname = "testname" person_pb.state_person_race_ids.append("race1") race_pb = expected_proto.state_person_races.add() race_pb.state_person_race_id = "race1" race_pb.race = "white" person_pb.state_person_ethnicity_ids.append("ethnicity1") ethnicity_pb = expected_proto.state_person_ethnicities.add() ethnicity_pb.state_person_ethnicity_id = "ethnicity1" ethnicity_pb.ethnicity = "non-hispanic" person_pb.state_person_external_ids_ids.append("contrived:external_id1") external_id_pb = expected_proto.state_person_external_ids.add() external_id_pb.state_person_external_id_id = "contrived:external_id1" external_id_pb.id_type = "contrived" person_pb.state_alias_ids.append("alias1") alias_pb = expected_proto.state_aliases.add() alias_pb.state_alias_id = "alias1" alias_pb.surname = "testerson" person_pb.state_assessment_ids.append("assessment1") assessment_pb = expected_proto.state_assessments.add() assessment_pb.state_assessment_id = "assessment1" assessment_pb.assessment_score = "42" person_pb.supervising_officer_id = "supervising_officer1" supervising_officer_pb = expected_proto.state_agents.add() supervising_officer_pb.state_agent_id = "supervising_officer1" supervising_officer_pb.full_name = "Officer Supervising" assessment_pb.conducting_agent_id = "agent1" assessment_agent_pb = expected_proto.state_agents.add() assessment_agent_pb.state_agent_id = "agent1" assessment_agent_pb.full_name = "Officer Jones" person_pb.state_program_assignment_ids.append("assignment1") program_assignment_pb = expected_proto.state_program_assignments.add() program_assignment_pb.state_program_assignment_id = "assignment1" program_assignment_pb.program_id = "program_id1" program_assignment_pb.referring_agent_id = "program_agent1" program_assignment_agent_pb = expected_proto.state_agents.add() program_assignment_agent_pb.state_agent_id = "program_agent1" program_assignment_agent_pb.full_name = "Officer Program" person_pb.state_sentence_group_ids.append("group1") group_pb = expected_proto.state_sentence_groups.add() group_pb.state_sentence_group_id = "group1" group_pb.state_fine_ids.append("fine1") fine_pb = expected_proto.state_fines.add() fine_pb.state_fine_id = "fine1" group_pb.state_supervision_sentence_ids.append("ss1") supervision_sentence_pb = expected_proto.state_supervision_sentences.add() supervision_sentence_pb.state_supervision_sentence_id = "ss1" supervision_sentence_pb.state_early_discharge_ids.append("early_discharge2") early_discharge2_pb = expected_proto.state_early_discharges.add() early_discharge2_pb.state_early_discharge_id = "early_discharge2" supervision_sentence_pb.state_charge_ids.append("charge2") charge2_pb = expected_proto.state_charges.add() charge2_pb.state_charge_id = "charge2" charge2_pb.classification_type = "M" supervision_sentence_pb.state_supervision_period_ids.append("sp1") supervision_period_pb = expected_proto.state_supervision_periods.add() supervision_period_pb.state_supervision_period_id = "sp1" supervision_period_pb.status = "TERMINATED" supervision_period_pb.state_program_assignment_ids.append("assignment1") # An ordering requirement in the proto equality check at the end of this # test requires that this agent be added after agent1 and before agentPO court_case_agent_pb = expected_proto.state_agents.add() court_case_agent_pb.state_agent_id = "agentJ" court_case_agent_pb.full_name = "Judge Agent" supervision_period_pb.supervising_officer_id = "agentPO" supervision_period_agent_pb = expected_proto.state_agents.add() supervision_period_agent_pb.state_agent_id = "agentPO" supervision_period_agent_pb.full_name = "Officer Paroley" supervision_case_type_entry_pb = ( expected_proto.state_supervision_case_type_entries.add() ) supervision_case_type_entry_pb.state_supervision_case_type_entry_id = ( "case_type_entry_id" ) supervision_case_type_entry_pb.case_type = "case_type" supervision_period_pb.state_supervision_case_type_entry_ids.append( "case_type_entry_id" ) supervision_contact_pb = expected_proto.state_supervision_contacts.add() supervision_contact_pb.state_supervision_contact_id = "supervision_contact_id" supervision_contact_pb.contact_type = "contact_type" supervision_contact_pb.contacted_agent_id = "agentPO" supervision_period_pb.state_supervision_contact_ids.append( "supervision_contact_id" ) supervision_period_pb.state_supervision_violation_entry_ids.append("violation1") violation_pb = expected_proto.state_supervision_violations.add() violation_pb.state_supervision_violation_id = "violation1" violation_pb.is_violent = "false" violation_pb.violated_conditions = "cond" violation_pb.state_supervision_violation_type_entry_ids.append( "violation_type_id" ) violation_type_pb = ( expected_proto.state_supervision_violation_type_entries.add() ) violation_type_pb.state_supervision_violation_type_entry_id = ( "violation_type_id" ) violation_type_pb.violation_type = "FELONY" violation_pb.state_supervision_violated_condition_entry_ids.append( "condition_id" ) violation_type_pb = ( expected_proto.state_supervision_violated_condition_entries.add() ) violation_type_pb.state_supervision_violated_condition_entry_id = "condition_id" violation_type_pb.condition = "CURFEW" violation_pb.state_supervision_violation_response_ids.append("response1") response_pb = expected_proto.state_supervision_violation_responses.add() response_pb.state_supervision_violation_response_id = "response1" response_pb.decision_agent_ids.append("agentTERM") response_decision_agent_pb = expected_proto.state_agents.add() response_decision_agent_pb.state_agent_id = "agentTERM" response_decision_agent_pb.full_name = "Officer Termy" response_decision_pb = ( expected_proto.state_supervision_violation_response_decision_entries.add() ) response_decision_pb.state_supervision_violation_response_decision_entry_id = ( "response_decision_id" ) response_decision_pb.decision = "REVOCATION" response_decision_pb.revocation_type = "REINCARCERATION" response_pb.state_supervision_violation_response_decision_entry_ids.append( "response_decision_id" ) group_pb.state_incarceration_sentence_ids.append("is1") incarceration_sentence_pb = expected_proto.state_incarceration_sentences.add() incarceration_sentence_pb.state_incarceration_sentence_id = "is1" incarceration_sentence_pb.state_early_discharge_ids.append("early_discharge1") early_discharge1_pb = expected_proto.state_early_discharges.add() early_discharge1_pb.state_early_discharge_id = "early_discharge1" incarceration_sentence_pb.state_charge_ids.append("charge1") charge1_pb = expected_proto.state_charges.add() charge1_pb.state_charge_id = "charge1" charge1_pb.classification_type = "F" incarceration_sentence_pb.state_incarceration_period_ids.append("ip1") incarceration_period_pb = expected_proto.state_incarceration_periods.add() incarceration_period_pb.state_incarceration_period_id = "ip1" incarceration_period_pb.status = "IN_CUSTODY" incarceration_period_pb.specialized_purpose_for_incarceration = ( "SHOCK INCARCERATION" ) incarceration_period_pb.state_incarceration_incident_ids.append("incident1") incident_pb = expected_proto.state_incarceration_incidents.add() incident_pb.state_incarceration_incident_id = "incident1" incident_pb.incident_type = "FISTICUFFS" incarceration_period_pb.state_program_assignment_ids.append("assignment1") incident_pb.responding_officer_id = "agent2" incident_agent_pb = expected_proto.state_agents.add() incident_agent_pb.state_agent_id = "agent2" incident_agent_pb.full_name = "Officer Thompson" incident_pb.state_incarceration_incident_outcome_ids.append("incident1-1") incident_outcome_pb = expected_proto.state_incarceration_incident_outcomes.add() incident_outcome_pb.state_incarceration_incident_outcome_id = "incident1-1" incident_outcome_pb.outcome_type = "FINE" incarceration_period_pb.state_parole_decision_ids.append("decision1") decision_pb = expected_proto.state_parole_decisions.add() decision_pb.state_parole_decision_id = "decision1" decision_pb.decision_agent_ids.append("agent3") decision_agent_pb = expected_proto.state_agents.add() decision_agent_pb.state_agent_id = "agent3" decision_agent_pb.full_name = "Officer Barkley" charge1_pb.state_bond_id = "bond1" bond_pb = expected_proto.state_bonds.add() bond_pb.state_bond_id = "bond1" charge2_pb.state_court_case_id = "case1" court_case_pb = expected_proto.state_court_cases.add() court_case_pb.state_court_case_id = "case1" court_case_pb.judge_id = "agentJ" expected_info = copy.deepcopy(info) # Act & Assert proto = ingest_utils.convert_ingest_info_to_proto(info) assert expected_proto == proto info_back = ingest_utils.convert_proto_to_ingest_info(proto) assert info_back == expected_info # Assert that none of the proto's collections are empty, i.e. we've # tested all of the object graph proto_classes = [field.name for field in proto.DESCRIPTOR.fields] for cls in proto_classes: if cls.startswith("state_"): assert proto.__getattribute__(cls)
def test_convert_ingest_info_duplicate_incarceration_incidents(self): # Arrange Python ingest info info = ingest_info.IngestInfo() person = info.create_state_person() person.state_person_id = 'person1' person.surname = 'testname' group = person.create_state_sentence_group() group.state_sentence_group_id = 'group1' incarceration_sentence = group.create_state_incarceration_sentence() incarceration_sentence.state_incarceration_sentence_id = 'is1' incarceration_period = incarceration_sentence. \ create_state_incarceration_period() incarceration_period.state_incarceration_period_id = 'ip1' incarceration_period.status = 'IN_CUSTODY' incident = incarceration_period.create_state_incarceration_incident() incident.state_incarceration_incident_id = 'incident1' incident.incident_type = 'FISTICUFFS' incident_outcome = \ incident.create_state_incarceration_incident_outcome() incident_outcome.state_incarceration_incident_outcome_id = 'incident1-1' incident_outcome.outcome_type = 'FINE' incident_dup = \ incarceration_period.create_state_incarceration_incident() incident_dup.state_incarceration_incident_id = 'incident1' incident_dup.incident_type = 'FISTICUFFS' incident_outcome_2 = \ incident_dup.create_state_incarceration_incident_outcome() incident_outcome_2.state_incarceration_incident_outcome_id = \ 'incident1-2' incident_outcome_2.outcome_type = 'FINE' # Arrange Proto ingest info expected_proto = ingest_info_pb2.IngestInfo() pb_person = expected_proto.state_people.add() pb_person.state_person_id = 'person1' pb_person.surname = 'testname' pb_person.state_sentence_group_ids.append('group1') pb_group = expected_proto.state_sentence_groups.add() pb_group.state_sentence_group_id = 'group1' pb_group.state_incarceration_sentence_ids.append('is1') pb_incarceration_sentence = \ expected_proto.state_incarceration_sentences.add() pb_incarceration_sentence.state_incarceration_sentence_id = 'is1' pb_incarceration_sentence.state_incarceration_period_ids.append('ip1') pb_incarceration_period = \ expected_proto.state_incarceration_periods.add() pb_incarceration_period.state_incarceration_period_id = 'ip1' pb_incarceration_period.status = 'IN_CUSTODY' pb_incarceration_period.state_incarceration_incident_ids \ .append('incident1') pb_incident = expected_proto.state_incarceration_incidents.add() pb_incident.state_incarceration_incident_id = 'incident1' pb_incident.incident_type = 'FISTICUFFS' pb_incident.state_incarceration_incident_outcome_ids.append( 'incident1-1') pb_incident_outcome = \ expected_proto.state_incarceration_incident_outcomes.add() pb_incident_outcome.state_incarceration_incident_outcome_id = \ 'incident1-1' pb_incident_outcome.outcome_type = 'FINE' pb_incident.state_incarceration_incident_outcome_ids.append( 'incident1-2') pb_incident_outcome = \ expected_proto.state_incarceration_incident_outcomes.add() pb_incident_outcome.state_incarceration_incident_outcome_id = \ 'incident1-2' pb_incident_outcome.outcome_type = 'FINE' # Act & Assert proto = ingest_utils.convert_ingest_info_to_proto(info) assert expected_proto == proto # Duplicate IncarcerationIncident is gone. info_back = ingest_utils.convert_proto_to_ingest_info(proto) incarceration_period.state_incarceration_incidents = [incident] incident.state_incarceration_incident_outcomes = [ incident_outcome, incident_outcome_2 ] assert info_back == info