def test_persist_to_db_different_regions(self, mock_write, _mock_region, mock_session_return): scrape_key1 = ScrapeKey(REGIONS[0], constants.ScrapeType.BACKGROUND) scrape_key2 = ScrapeKey(REGIONS[1], constants.ScrapeType.BACKGROUND) ii = IngestInfo() ii.create_person( person_id=TEST_ID, full_name=TEST_NAME).create_booking(booking_id=TEST_ID) ii2 = IngestInfo() ii2.create_person( person_id=TEST_ID, full_name=TEST_NAME2).create_booking(booking_id=TEST_ID) t = Task( task_type=constants.TaskType.SCRAPE_DATA, endpoint=TEST_ENDPOINT, response_type=constants.ResponseType.TEXT, ) t2 = Task( task_type=constants.TaskType.SCRAPE_DATA, endpoint=TEST_ENDPOINT, response_type=constants.ResponseType.TEXT, ) mock_session_1 = mock_session_return.return_value = create_mock_session( ) batch_persistence.write(ii, scrape_key1, t) expected_proto = ingest_utils.convert_ingest_info_to_proto(ii) batch_persistence.persist_to_database(scrape_key1.region_code, mock_session_1.start) result_proto = mock_write.call_args[0][0] self.assertEqual(result_proto, expected_proto) # We expect the region that we persisted to have no more ingest infos. ingest_infos_1 = datastore_ingest_info.batch_get_ingest_infos_for_region( REGIONS[0], mock_session_1.start) self.assertEqual(len(ingest_infos_1), 0) mock_session_2 = mock_session_return.return_value = create_mock_session( ) batch_persistence.write(ii2, scrape_key2, t2) ingest_infos_2 = datastore_ingest_info.batch_get_ingest_infos_for_region( REGIONS[1], mock_session_2.start) self.assertEqual(len(ingest_infos_2), 1) expected_proto = ingest_utils.convert_ingest_info_to_proto(ii2) batch_persistence.persist_to_database(scrape_key2.region_code, mock_session_2.start) result_proto = mock_write.call_args[0][0] self.assertEqual(result_proto, expected_proto) self.assertEqual(mock_write.call_count, 2)
def _parse_and_persist_contents(self, args: IngestArgsType, contents_handle: ContentsHandleType): """ Runs the full ingest process for this controller for files with non-empty contents. """ ingest_info = self._parse(args, contents_handle) if not ingest_info: raise DirectIngestError( error_type=DirectIngestErrorType.PARSE_ERROR, msg="No IngestInfo after parse.") logging.info("Successfully parsed data for ingest run [%s]", self._job_tag(args)) ingest_info_proto = \ ingest_utils.convert_ingest_info_to_proto(ingest_info) logging.info( "Successfully converted ingest_info to proto for ingest " "run [%s]", self._job_tag(args)) ingest_metadata = self._get_ingest_metadata(args) persist_success = persistence.write(ingest_info_proto, ingest_metadata) if not persist_success: raise DirectIngestError( error_type=DirectIngestErrorType.PERSISTENCE_ERROR, msg="Persist step failed") logging.info("Successfully persisted for ingest run [%s]", self._job_tag(args))
def test_persist_duplicates_to_db(self, mock_write, _mock_region, mock_session_return): """Tests that duplicate ingest_info.Person objects are merged before write.""" mock_session = mock_session_return.return_value = create_mock_session() scrape_key = ScrapeKey(REGIONS[0], constants.ScrapeType.BACKGROUND) # Arrange ii = IngestInfo() ii.create_person(person_id=TEST_ID, full_name=TEST_NAME) \ .create_booking(booking_id=TEST_ID) ii_2 = IngestInfo() ii.create_person(person_id=TEST_ID2, full_name=TEST_NAME2) ii_1_dup = copy.deepcopy(ii) t1, t2, t3 = (Task(task_type=constants.TaskType.SCRAPE_DATA, endpoint=TEST_ENDPOINT + str(i), response_type=constants.ResponseType.TEXT) for i in range(3)) batch_persistence.write(ii, scrape_key, t1) batch_persistence.write(ii_2, scrape_key, t2) batch_persistence.write(ii_1_dup, scrape_key, t3) batch_persistence.persist_to_database(scrape_key.region_code, mock_session.start) expected_ii = IngestInfo(people=ii.people + ii_2.people) expected_proto = ingest_utils.convert_ingest_info_to_proto(expected_ii) result_proto = mock_write.call_args[0][0] self.assertEqual(result_proto, expected_proto)
def test_scrape_data_no_more_tasks(self, mock_get_more, mock_fetch, mock_populate, mock_write): mock_fetch.return_value = (TEST_HTML, {}) mock_populate.return_value = ScrapedData( ingest_info=self.ii, persist=True, ) start_time = datetime.datetime.now() t = Task.evolve(TEST_TASK, task_type=constants.TaskType.SCRAPE_DATA) req = QueueRequest( scrape_type=constants.ScrapeType.BACKGROUND, next_task=t, scraper_start_time=start_time, ) scraper = FakeScraper("test") scraper.BATCH_WRITES = False scraper._generic_scrape(req) expected_metadata = IngestMetadata( scraper.region.region_code, scraper.region.jurisdiction_id, start_time, scraper.get_enum_overrides(), ) expected_proto = convert_ingest_info_to_proto(self.ii) self.assertEqual(mock_get_more.call_count, 0) self.assertEqual(mock_populate.call_count, 1) self.assertEqual(mock_write.call_count, 1) mock_write.assert_called_once_with(expected_proto, expected_metadata) self.assertEqual(len(scraper.tasks), 0)
def test_persist_to_db(self, mock_write, _mock_region, mock_session_return): mock_session = mock_session_return.return_value = create_mock_session() scrape_key = ScrapeKey(REGIONS[0], constants.ScrapeType.BACKGROUND) ii = IngestInfo() ii.create_person(person_id=TEST_ID, full_name=TEST_NAME) \ .create_booking(booking_id=TEST_ID) t = Task( task_type=constants.TaskType.SCRAPE_DATA, endpoint=TEST_ENDPOINT, response_type=constants.ResponseType.TEXT, ) batch_persistence.write(ii, scrape_key, t) expected_proto = ingest_utils.convert_ingest_info_to_proto(ii) batch_persistence.persist_to_database(scrape_key.region_code, mock_session.start) result_proto = mock_write.call_args[0][0] self.assertEqual(result_proto, expected_proto) # After we persist, there should no longer be ingest infos on Datastore ingest_infos = datastore_ingest_info.batch_get_ingest_infos_for_region( REGIONS[0], mock_session.start) self.assertEqual(len(ingest_infos), 0)
def validate_ingest( self, ingest_info: IngestInfo, expected_ingest_info: IngestInfo, metadata: IngestMetadata, ) -> IngestInfo: """This function runs validation on a computed and expected ingest_info. Args: ingest_info: the computed ingest info object expected_ingest_info: the ingest info expected to be returned from `populate_data`. If `expected_ingest_info` is `None`, then expects the return value of `populate_data` to be `None`. metadata: an ingest info metadata struct to pass along to the proto converter. Returns: The result from populate_data in case the user needs to do any extra validations on the output. """ if expected_ingest_info is None: assert ingest_info == expected_ingest_info return ingest_info # Attempt to convert the ingest_info to the ingest info proto, # validate the proto, and finally attempt to convert the proto into # our entitiy/ objects (which includes parsing strings into types) ingest_info_proto = ingest_utils.convert_ingest_info_to_proto( ingest_info) validate(ingest_info_proto) res = ingest_info_converter.convert_to_persistence_entities( ingest_info_proto, metadata) assert res.enum_parsing_errors == 0 assert res.general_parsing_errors == 0 assert res.protected_class_errors == 0 entity_validator.validate(res.people) differences = diff_ingest_infos(expected_ingest_info, ingest_info) if differences: self.fail( # type: ignore[attr-defined] "IngestInfo objects do not match.\n" "Expected:\n{}\n" "Actual:\n{}\n" "Differences:\n{}\n\n" "(paste the following) scraped object:" "\n{}".format( expected_ingest_info, ingest_info, "\n".join(differences), repr(ingest_info), )) return ingest_info
def test_multipleOpenBookings_raisesPersistenceError(self): ingest_info = ii.IngestInfo() person = ingest_info.create_person(full_name=FULL_NAME_1) person.create_booking(admission_date=DATE_RAW) person.create_booking(admission_date=DATE_RAW) self.assertFalse( persistence.write(convert_ingest_info_to_proto(ingest_info), DEFAULT_METADATA))
def test_scrape_data_and_more_no_persist_second_time_persist( self, mock_get_more, mock_fetch, mock_populate, mock_write): populate_task = Task.evolve(TEST_TASK, task_type=constants.TaskType.SCRAPE_DATA) mock_get_more.return_value = [populate_task] mock_fetch.return_value = (TEST_HTML, {}) mock_populate.return_value = ScrapedData( ingest_info=self.ii, persist=False, ) start_time = datetime.datetime.now() t = Task.evolve(TEST_TASK, task_type=constants.TaskType.SCRAPE_DATA_AND_MORE) req = QueueRequest( scrape_type=constants.ScrapeType.BACKGROUND, next_task=t, scraper_start_time=start_time, ) scraper = FakeScraper("test") scraper.BATCH_WRITES = False scraper._generic_scrape(req) # Should send the ii since we chose not to persist. expected_tasks = [ QueueRequest( scrape_type=constants.ScrapeType.BACKGROUND, next_task=populate_task, scraper_start_time=start_time, ingest_info=self.ii, ) ] self.assertEqual(mock_get_more.call_count, 1) self.assertEqual(mock_populate.call_count, 1) self.assertEqual(mock_write.call_count, 0) mock_get_more.assert_called_once_with(TEST_HTML, t) self.assertCountEqual(expected_tasks, scraper.tasks) mock_populate.return_value = ScrapedData( ingest_info=self.ii, persist=True, ) scraper._generic_scrape(scraper.tasks[0]) self.assertEqual(mock_get_more.call_count, 1) self.assertEqual(mock_populate.call_count, 2) self.assertEqual(mock_write.call_count, 1) expected_metadata = IngestMetadata( scraper.region.region_code, scraper.region.jurisdiction_id, start_time, scraper.get_enum_overrides(), ) expected_proto = convert_ingest_info_to_proto(self.ii) mock_write.assert_called_once_with(expected_proto, expected_metadata)
def test_convert_ingest_info_one_charge_to_one_bond(self, mock_create): mock_create.side_effect = self._create_generated_id info = ingest_info.IngestInfo() person = info.create_person() person.person_id = 'id1' booking = person.create_booking() booking.booking_id = 'id1' charge = booking.create_charge() charge.charge_id = 'id1' bond1 = charge.create_bond() bond1.amount = '$1' charge = booking.create_charge() charge.charge_id = 'id2' bond2 = charge.create_bond() bond2.amount = '$1' expected_proto = ingest_info_pb2.IngestInfo() person = expected_proto.people.add() person.person_id = 'id1' person.booking_ids.append('id1') booking = expected_proto.bookings.add() booking.booking_id = 'id1' booking.charge_ids.extend(['id1', 'id2']) charge = expected_proto.charges.add() charge.charge_id = 'id1' proto_bond1 = expected_proto.bonds.add() proto_bond1.amount = '$1' proto_bond1.bond_id = '1_GENERATE' charge.bond_id = proto_bond1.bond_id charge = expected_proto.charges.add() charge.charge_id = 'id2' proto_bond2 = expected_proto.bonds.add() proto_bond2.amount = '$1' proto_bond2.bond_id = '2_GENERATE' charge.bond_id = proto_bond2.bond_id proto = ingest_utils.convert_ingest_info_to_proto(info) assert expected_proto == proto info_back = ingest_utils.convert_proto_to_ingest_info(proto) assert info_back == info
def test_convert_ingest_info_one_charge_to_one_bond(self, mock_create): mock_create.side_effect = self._create_generated_id info = ingest_info.IngestInfo() person = info.create_person() person.person_id = "id1" booking = person.create_booking() booking.booking_id = "id1" charge = booking.create_charge() charge.charge_id = "id1" bond1 = charge.create_bond() bond1.amount = "$1" charge = booking.create_charge() charge.charge_id = "id2" bond2 = charge.create_bond() bond2.amount = "$1" expected_proto = ingest_info_pb2.IngestInfo() person = expected_proto.people.add() person.person_id = "id1" person.booking_ids.append("id1") booking = expected_proto.bookings.add() booking.booking_id = "id1" booking.charge_ids.extend(["id1", "id2"]) charge = expected_proto.charges.add() charge.charge_id = "id1" proto_bond1 = expected_proto.bonds.add() proto_bond1.amount = "$1" proto_bond1.bond_id = "1_GENERATE" charge.bond_id = proto_bond1.bond_id charge = expected_proto.charges.add() charge.charge_id = "id2" proto_bond2 = expected_proto.bonds.add() proto_bond2.amount = "$1" proto_bond2.bond_id = "2_GENERATE" charge.bond_id = proto_bond2.bond_id proto = ingest_utils.convert_ingest_info_to_proto(info) assert expected_proto == proto info_back = ingest_utils.convert_proto_to_ingest_info(proto) assert info_back == info
def test_convert_ingest_info_id_is_generated(self, mock_create): mock_create.side_effect = self._create_generated_id info = ingest_info.IngestInfo() person = info.create_person() person.surname = "testname" person.create_booking() expected_proto = ingest_info_pb2.IngestInfo() proto_person = expected_proto.people.add() proto_person.surname = "testname" proto_person.person_id = "1_GENERATE" proto_booking = expected_proto.bookings.add() proto_booking.booking_id = "2_GENERATE" proto_person.booking_ids.append(proto_booking.booking_id) proto = ingest_utils.convert_ingest_info_to_proto(info) assert proto == expected_proto info_back = ingest_utils.convert_proto_to_ingest_info(proto) assert info_back == info
def test_persist_to_db_same_task_one_fail_one_pass(self, mock_write, _mock_region, mock_session_return): mock_session = mock_session_return.return_value = create_mock_session() scrape_key = ScrapeKey(REGIONS[0], constants.ScrapeType.BACKGROUND) mock_write.return_value = True ii = IngestInfo() ii.create_person(person_id=TEST_ID, full_name=TEST_NAME) \ .create_booking(booking_id=TEST_ID) t = Task( task_type=constants.TaskType.SCRAPE_DATA, endpoint=TEST_ENDPOINT, response_type=constants.ResponseType.TEXT, ) # Because the tasks are the same, we expect that to be counted as a # pass. t2 = Task( task_type=constants.TaskType.SCRAPE_DATA, endpoint=TEST_ENDPOINT, response_type=constants.ResponseType.TEXT, ) batch_persistence.write(ii, scrape_key, t) batch_persistence.write_error(TEST_ERROR, TEST_TRACE, t2, scrape_key) expected_proto = ingest_utils.convert_ingest_info_to_proto(ii) self.assertTrue( batch_persistence.persist_to_database(scrape_key.region_code, mock_session.start)) result_proto = mock_write.call_args[0][0] self.assertEqual(result_proto, expected_proto) ingest_infos = datastore_ingest_info.batch_get_ingest_infos_for_region( REGIONS[0], mock_session.start) self.assertEqual(len(ingest_infos), 0)
def _get_proto_from_batch_ingest_info_data_list( batch_ingest_info_data_list: List[BatchIngestInfoData]) -> \ Tuple[ingest_info_pb2.IngestInfo, Dict[int, BatchIngestInfoData]]: """Merges an ingest_info_proto from all of the batched ingest_infos. Args: batch_ingest_info_data_list: A list of BatchIngestInfoData. Returns: an IngestInfo proto with data from all of the messages. """ logging.info("Starting generation of proto") ingest_infos: List[IngestInfo] = [] successful_tasks: Set[int] = set() failed_tasks: Dict[int, BatchIngestInfoData] = {} for batch_ingest_info_datum in batch_ingest_info_data_list: # We do this because dicts are not hashable in python and we want to # avoid an n2 operation to see which tasks have been seen previously # which can be on the order of a million operations. task_hash = batch_ingest_info_datum.task_hash if not batch_ingest_info_datum.error and task_hash not in \ successful_tasks: successful_tasks.add(task_hash) if task_hash in failed_tasks: del failed_tasks[task_hash] if batch_ingest_info_datum.ingest_info: ingest_infos.append(batch_ingest_info_datum.ingest_info) else: # We only add to failed if we didn't see a successful one. This is # because its possible a task ran 3 times before passing, meaning # we don't want to fail on that when we see the failed ones. if task_hash not in successful_tasks: failed_tasks[task_hash] = batch_ingest_info_datum deduped_ingest_info = _dedup_people(ingest_infos) base_proto = ingest_utils.convert_ingest_info_to_proto(deduped_ingest_info) ingest_info_validator.validate(base_proto) logging.info("Generated proto for [%s] people", len(base_proto.people)) return base_proto, failed_tasks
def test_convert_ingest_info_id_is_not_generated(self): info = ingest_info.IngestInfo() person = info.create_person() person.person_id = "id1" person.surname = "testname" booking = person.create_booking() booking.booking_id = "id2" booking.admission_date = "testdate" expected_proto = ingest_info_pb2.IngestInfo() person = expected_proto.people.add() person.person_id = "id1" person.surname = "testname" person.booking_ids.append("id2") booking = expected_proto.bookings.add() booking.booking_id = "id2" booking.admission_date = "testdate" proto = ingest_utils.convert_ingest_info_to_proto(info) assert expected_proto == proto info_back = ingest_utils.convert_proto_to_ingest_info(proto) assert info_back == info
def _parse_and_persist_contents(self, args: IngestArgsType, contents: ContentsType): """ Runs the full ingest process for this controller for files with non-empty contents. """ ingest_info = self._parse(args, contents) # TODO(1738): implement retry on fail. if not ingest_info: raise DirectIngestError( error_type=DirectIngestErrorType.PARSE_ERROR, msg="No IngestInfo after parse.") logging.info("Successfully parsed data for ingest run [%s]", self._job_tag(args)) ingest_info_proto = \ ingest_utils.convert_ingest_info_to_proto(ingest_info) logging.info( "Successfully converted ingest_info to proto for ingest " "run [%s]", self._job_tag(args)) ingest_metadata = IngestMetadata(self.region.region_code, self.region.jurisdiction_id, args.ingest_time, self.get_enum_overrides(), self.system_level) persist_success = persistence.write(ingest_info_proto, ingest_metadata) if not persist_success: raise DirectIngestError( error_type=DirectIngestErrorType.PERSISTENCE_ERROR, msg="Persist step failed") logging.info("Successfully persisted for ingest run [%s]", self._job_tag(args))
def test_convert_ingest_info_duplicate_incarceration_incidents(self): # Arrange Python ingest info info = ingest_info.IngestInfo() person = info.create_state_person() person.state_person_id = 'person1' person.surname = 'testname' group = person.create_state_sentence_group() group.state_sentence_group_id = 'group1' incarceration_sentence = group.create_state_incarceration_sentence() incarceration_sentence.state_incarceration_sentence_id = 'is1' incarceration_period = incarceration_sentence. \ create_state_incarceration_period() incarceration_period.state_incarceration_period_id = 'ip1' incarceration_period.status = 'IN_CUSTODY' incident = incarceration_period.create_state_incarceration_incident() incident.state_incarceration_incident_id = 'incident1' incident.incident_type = 'FISTICUFFS' incident_outcome = \ incident.create_state_incarceration_incident_outcome() incident_outcome.state_incarceration_incident_outcome_id = 'incident1-1' incident_outcome.outcome_type = 'FINE' incident_dup = \ incarceration_period.create_state_incarceration_incident() incident_dup.state_incarceration_incident_id = 'incident1' incident_dup.incident_type = 'FISTICUFFS' incident_outcome_2 = \ incident_dup.create_state_incarceration_incident_outcome() incident_outcome_2.state_incarceration_incident_outcome_id = \ 'incident1-2' incident_outcome_2.outcome_type = 'FINE' # Arrange Proto ingest info expected_proto = ingest_info_pb2.IngestInfo() pb_person = expected_proto.state_people.add() pb_person.state_person_id = 'person1' pb_person.surname = 'testname' pb_person.state_sentence_group_ids.append('group1') pb_group = expected_proto.state_sentence_groups.add() pb_group.state_sentence_group_id = 'group1' pb_group.state_incarceration_sentence_ids.append('is1') pb_incarceration_sentence = \ expected_proto.state_incarceration_sentences.add() pb_incarceration_sentence.state_incarceration_sentence_id = 'is1' pb_incarceration_sentence.state_incarceration_period_ids.append('ip1') pb_incarceration_period = \ expected_proto.state_incarceration_periods.add() pb_incarceration_period.state_incarceration_period_id = 'ip1' pb_incarceration_period.status = 'IN_CUSTODY' pb_incarceration_period.state_incarceration_incident_ids \ .append('incident1') pb_incident = expected_proto.state_incarceration_incidents.add() pb_incident.state_incarceration_incident_id = 'incident1' pb_incident.incident_type = 'FISTICUFFS' pb_incident.state_incarceration_incident_outcome_ids.append( 'incident1-1') pb_incident_outcome = \ expected_proto.state_incarceration_incident_outcomes.add() pb_incident_outcome.state_incarceration_incident_outcome_id = \ 'incident1-1' pb_incident_outcome.outcome_type = 'FINE' pb_incident.state_incarceration_incident_outcome_ids.append( 'incident1-2') pb_incident_outcome = \ expected_proto.state_incarceration_incident_outcomes.add() pb_incident_outcome.state_incarceration_incident_outcome_id = \ 'incident1-2' pb_incident_outcome.outcome_type = 'FINE' # Act & Assert proto = ingest_utils.convert_ingest_info_to_proto(info) assert expected_proto == proto # Duplicate IncarcerationIncident is gone. info_back = ingest_utils.convert_proto_to_ingest_info(proto) incarceration_period.state_incarceration_incidents = [incident] incident.state_incarceration_incident_outcomes = [ incident_outcome, incident_outcome_2 ] assert info_back == info
def test_convert_ingest_info_state_entities(self): # Arrange Python ingest info info = ingest_info.IngestInfo() person = info.create_state_person() person.state_person_id = "person1" person.surname = "testname" race = person.create_state_person_race() race.state_person_race_id = "race1" race.race = "white" ethnicity = person.create_state_person_ethnicity() ethnicity.state_person_ethnicity_id = "ethnicity1" ethnicity.ethnicity = "non-hispanic" external_id = person.create_state_person_external_id() external_id.state_person_external_id_id = "external_id1" external_id.id_type = "contrived" alias = person.create_state_alias() alias.state_alias_id = "alias1" alias.surname = "testerson" assessment = person.create_state_assessment() assessment.state_assessment_id = "assessment1" assessment.assessment_score = "42" supervising_officer = person.create_state_agent() supervising_officer.state_agent_id = "supervising_officer1" supervising_officer.full_name = "Officer Supervising" assessment_agent = assessment.create_state_agent() assessment_agent.state_agent_id = "agent1" assessment_agent.full_name = "Officer Jones" program_assignment = person.create_state_program_assignment() program_assignment.state_program_assignment_id = "assignment1" program_assignment.program_id = "program_id1" program_assignment_agent = program_assignment.create_state_agent() program_assignment_agent.state_agent_id = "program_agent1" program_assignment_agent.full_name = "Officer Program" group = person.create_state_sentence_group() group.state_sentence_group_id = "group1" fine = group.create_state_fine() fine.state_fine_id = "fine1" incarceration_sentence = group.create_state_incarceration_sentence() incarceration_sentence.state_incarceration_sentence_id = "is1" early_discharge1 = incarceration_sentence.create_state_early_discharge() early_discharge1.state_early_discharge_id = "early_discharge1" charge1 = incarceration_sentence.create_state_charge() charge1.state_charge_id = "charge1" charge1.classification_type = "F" incarceration_period = ( incarceration_sentence.create_state_incarceration_period() ) incarceration_period.state_incarceration_period_id = "ip1" incarceration_period.status = "IN_CUSTODY" incarceration_period.specialized_purpose_for_incarceration = ( "SHOCK INCARCERATION" ) incarceration_period.state_program_assignments = [program_assignment] incident = incarceration_period.create_state_incarceration_incident() incident.state_incarceration_incident_id = "incident1" incident.incident_type = "FISTICUFFS" incident_outcome = incident.create_state_incarceration_incident_outcome() incident_outcome.state_incarceration_incident_outcome_id = "incident1-1" incident_outcome.outcome_type = "FINE" incident_agent = incident.create_state_agent() incident_agent.state_agent_id = "agent2" incident_agent.full_name = "Officer Thompson" decision = incarceration_period.create_state_parole_decision() decision.state_parole_decision_id = "decision1" decision_agent = decision.create_state_agent() decision_agent.state_agent_id = "agent3" decision_agent.full_name = "Officer Barkley" supervision_sentence = group.create_state_supervision_sentence() supervision_sentence.state_supervision_sentence_id = "ss1" early_discharge2 = supervision_sentence.create_state_early_discharge() early_discharge2.state_early_discharge_id = "early_discharge2" charge2 = supervision_sentence.create_state_charge() charge2.state_charge_id = "charge2" charge2.classification_type = "M" supervision_period = supervision_sentence.create_state_supervision_period() supervision_period.state_supervision_period_id = "sp1" supervision_period.status = "TERMINATED" supervision_period_agent = supervision_period.create_state_agent() supervision_period_agent.state_agent_id = "agentPO" supervision_period_agent.full_name = "Officer Paroley" supervision_period.state_program_assignments = [program_assignment] supervision_case_type_entry = ( supervision_period.create_state_supervision_case_type_entry() ) supervision_case_type_entry.case_type = "case_type" supervision_case_type_entry.state_supervision_case_type_entry_id = ( "case_type_entry_id" ) supervision_contact = supervision_period.create_state_supervision_contact() supervision_contact.state_supervision_contact_id = "supervision_contact_id" supervision_contact.contact_type = "contact_type" supervision_contacted_agent = supervision_contact.create_state_agent() supervision_contacted_agent.state_agent_id = "agentPO" supervision_contacted_agent.full_name = "Officer Paroley" violation = supervision_period.create_state_supervision_violation() violation.state_supervision_violation_id = "violation1" violation.violated_conditions = "cond" violation.is_violent = "false" violation_type = violation.create_state_supervision_violation_type_entry() violation_type.state_supervision_violation_type_entry_id = "violation_type_id" violation_type.violation_type = "FELONY" violated_condition = ( violation.create_state_supervision_violated_condition_entry() ) violated_condition.state_supervision_violated_condition_entry_id = ( "condition_id" ) violated_condition.condition = "CURFEW" response = violation.create_state_supervision_violation_response() response.state_supervision_violation_response_id = "response1" response_decision_agent = response.create_state_agent() response_decision_agent.state_agent_id = "agentTERM" response_decision_agent.full_name = "Officer Termy" response_decision = ( response.create_state_supervision_violation_response_decision_entry() ) response_decision.state_supervision_violation_response_decision_entry_id = ( "response_decision_id" ) response_decision.decision = "REVOCATION" response_decision.revocation_type = "REINCARCERATION" bond = charge1.create_state_bond() bond.state_bond_id = "bond1" court_case = charge2.create_state_court_case() court_case.state_court_case_id = "case1" court_case_agent = court_case.create_state_agent() court_case_agent.state_agent_id = "agentJ" court_case_agent.full_name = "Judge Agent" # Arrange Proto ingest info expected_proto = ingest_info_pb2.IngestInfo() person_pb = expected_proto.state_people.add() person_pb.state_person_id = "person1" person_pb.surname = "testname" person_pb.state_person_race_ids.append("race1") race_pb = expected_proto.state_person_races.add() race_pb.state_person_race_id = "race1" race_pb.race = "white" person_pb.state_person_ethnicity_ids.append("ethnicity1") ethnicity_pb = expected_proto.state_person_ethnicities.add() ethnicity_pb.state_person_ethnicity_id = "ethnicity1" ethnicity_pb.ethnicity = "non-hispanic" person_pb.state_person_external_ids_ids.append("contrived:external_id1") external_id_pb = expected_proto.state_person_external_ids.add() external_id_pb.state_person_external_id_id = "contrived:external_id1" external_id_pb.id_type = "contrived" person_pb.state_alias_ids.append("alias1") alias_pb = expected_proto.state_aliases.add() alias_pb.state_alias_id = "alias1" alias_pb.surname = "testerson" person_pb.state_assessment_ids.append("assessment1") assessment_pb = expected_proto.state_assessments.add() assessment_pb.state_assessment_id = "assessment1" assessment_pb.assessment_score = "42" person_pb.supervising_officer_id = "supervising_officer1" supervising_officer_pb = expected_proto.state_agents.add() supervising_officer_pb.state_agent_id = "supervising_officer1" supervising_officer_pb.full_name = "Officer Supervising" assessment_pb.conducting_agent_id = "agent1" assessment_agent_pb = expected_proto.state_agents.add() assessment_agent_pb.state_agent_id = "agent1" assessment_agent_pb.full_name = "Officer Jones" person_pb.state_program_assignment_ids.append("assignment1") program_assignment_pb = expected_proto.state_program_assignments.add() program_assignment_pb.state_program_assignment_id = "assignment1" program_assignment_pb.program_id = "program_id1" program_assignment_pb.referring_agent_id = "program_agent1" program_assignment_agent_pb = expected_proto.state_agents.add() program_assignment_agent_pb.state_agent_id = "program_agent1" program_assignment_agent_pb.full_name = "Officer Program" person_pb.state_sentence_group_ids.append("group1") group_pb = expected_proto.state_sentence_groups.add() group_pb.state_sentence_group_id = "group1" group_pb.state_fine_ids.append("fine1") fine_pb = expected_proto.state_fines.add() fine_pb.state_fine_id = "fine1" group_pb.state_supervision_sentence_ids.append("ss1") supervision_sentence_pb = expected_proto.state_supervision_sentences.add() supervision_sentence_pb.state_supervision_sentence_id = "ss1" supervision_sentence_pb.state_early_discharge_ids.append("early_discharge2") early_discharge2_pb = expected_proto.state_early_discharges.add() early_discharge2_pb.state_early_discharge_id = "early_discharge2" supervision_sentence_pb.state_charge_ids.append("charge2") charge2_pb = expected_proto.state_charges.add() charge2_pb.state_charge_id = "charge2" charge2_pb.classification_type = "M" supervision_sentence_pb.state_supervision_period_ids.append("sp1") supervision_period_pb = expected_proto.state_supervision_periods.add() supervision_period_pb.state_supervision_period_id = "sp1" supervision_period_pb.status = "TERMINATED" supervision_period_pb.state_program_assignment_ids.append("assignment1") # An ordering requirement in the proto equality check at the end of this # test requires that this agent be added after agent1 and before agentPO court_case_agent_pb = expected_proto.state_agents.add() court_case_agent_pb.state_agent_id = "agentJ" court_case_agent_pb.full_name = "Judge Agent" supervision_period_pb.supervising_officer_id = "agentPO" supervision_period_agent_pb = expected_proto.state_agents.add() supervision_period_agent_pb.state_agent_id = "agentPO" supervision_period_agent_pb.full_name = "Officer Paroley" supervision_case_type_entry_pb = ( expected_proto.state_supervision_case_type_entries.add() ) supervision_case_type_entry_pb.state_supervision_case_type_entry_id = ( "case_type_entry_id" ) supervision_case_type_entry_pb.case_type = "case_type" supervision_period_pb.state_supervision_case_type_entry_ids.append( "case_type_entry_id" ) supervision_contact_pb = expected_proto.state_supervision_contacts.add() supervision_contact_pb.state_supervision_contact_id = "supervision_contact_id" supervision_contact_pb.contact_type = "contact_type" supervision_contact_pb.contacted_agent_id = "agentPO" supervision_period_pb.state_supervision_contact_ids.append( "supervision_contact_id" ) supervision_period_pb.state_supervision_violation_entry_ids.append("violation1") violation_pb = expected_proto.state_supervision_violations.add() violation_pb.state_supervision_violation_id = "violation1" violation_pb.is_violent = "false" violation_pb.violated_conditions = "cond" violation_pb.state_supervision_violation_type_entry_ids.append( "violation_type_id" ) violation_type_pb = ( expected_proto.state_supervision_violation_type_entries.add() ) violation_type_pb.state_supervision_violation_type_entry_id = ( "violation_type_id" ) violation_type_pb.violation_type = "FELONY" violation_pb.state_supervision_violated_condition_entry_ids.append( "condition_id" ) violation_type_pb = ( expected_proto.state_supervision_violated_condition_entries.add() ) violation_type_pb.state_supervision_violated_condition_entry_id = "condition_id" violation_type_pb.condition = "CURFEW" violation_pb.state_supervision_violation_response_ids.append("response1") response_pb = expected_proto.state_supervision_violation_responses.add() response_pb.state_supervision_violation_response_id = "response1" response_pb.decision_agent_ids.append("agentTERM") response_decision_agent_pb = expected_proto.state_agents.add() response_decision_agent_pb.state_agent_id = "agentTERM" response_decision_agent_pb.full_name = "Officer Termy" response_decision_pb = ( expected_proto.state_supervision_violation_response_decision_entries.add() ) response_decision_pb.state_supervision_violation_response_decision_entry_id = ( "response_decision_id" ) response_decision_pb.decision = "REVOCATION" response_decision_pb.revocation_type = "REINCARCERATION" response_pb.state_supervision_violation_response_decision_entry_ids.append( "response_decision_id" ) group_pb.state_incarceration_sentence_ids.append("is1") incarceration_sentence_pb = expected_proto.state_incarceration_sentences.add() incarceration_sentence_pb.state_incarceration_sentence_id = "is1" incarceration_sentence_pb.state_early_discharge_ids.append("early_discharge1") early_discharge1_pb = expected_proto.state_early_discharges.add() early_discharge1_pb.state_early_discharge_id = "early_discharge1" incarceration_sentence_pb.state_charge_ids.append("charge1") charge1_pb = expected_proto.state_charges.add() charge1_pb.state_charge_id = "charge1" charge1_pb.classification_type = "F" incarceration_sentence_pb.state_incarceration_period_ids.append("ip1") incarceration_period_pb = expected_proto.state_incarceration_periods.add() incarceration_period_pb.state_incarceration_period_id = "ip1" incarceration_period_pb.status = "IN_CUSTODY" incarceration_period_pb.specialized_purpose_for_incarceration = ( "SHOCK INCARCERATION" ) incarceration_period_pb.state_incarceration_incident_ids.append("incident1") incident_pb = expected_proto.state_incarceration_incidents.add() incident_pb.state_incarceration_incident_id = "incident1" incident_pb.incident_type = "FISTICUFFS" incarceration_period_pb.state_program_assignment_ids.append("assignment1") incident_pb.responding_officer_id = "agent2" incident_agent_pb = expected_proto.state_agents.add() incident_agent_pb.state_agent_id = "agent2" incident_agent_pb.full_name = "Officer Thompson" incident_pb.state_incarceration_incident_outcome_ids.append("incident1-1") incident_outcome_pb = expected_proto.state_incarceration_incident_outcomes.add() incident_outcome_pb.state_incarceration_incident_outcome_id = "incident1-1" incident_outcome_pb.outcome_type = "FINE" incarceration_period_pb.state_parole_decision_ids.append("decision1") decision_pb = expected_proto.state_parole_decisions.add() decision_pb.state_parole_decision_id = "decision1" decision_pb.decision_agent_ids.append("agent3") decision_agent_pb = expected_proto.state_agents.add() decision_agent_pb.state_agent_id = "agent3" decision_agent_pb.full_name = "Officer Barkley" charge1_pb.state_bond_id = "bond1" bond_pb = expected_proto.state_bonds.add() bond_pb.state_bond_id = "bond1" charge2_pb.state_court_case_id = "case1" court_case_pb = expected_proto.state_court_cases.add() court_case_pb.state_court_case_id = "case1" court_case_pb.judge_id = "agentJ" expected_info = copy.deepcopy(info) # Act & Assert proto = ingest_utils.convert_ingest_info_to_proto(info) assert expected_proto == proto info_back = ingest_utils.convert_proto_to_ingest_info(proto) assert info_back == expected_info # Assert that none of the proto's collections are empty, i.e. we've # tested all of the object graph proto_classes = [field.name for field in proto.DESCRIPTOR.fields] for cls in proto_classes: if cls.startswith("state_"): assert proto.__getattribute__(cls)
def test_convert_ingest_info_state_entities(self): # Arrange Python ingest info info = ingest_info.IngestInfo() person = info.create_state_person() person.state_person_id = 'person1' person.surname = 'testname' race = person.create_state_person_race() race.state_person_race_id = 'race1' race.race = 'white' ethnicity = person.create_state_person_ethnicity() ethnicity.state_person_ethnicity_id = 'ethnicity1' ethnicity.ethnicity = 'non-hispanic' external_id = person.create_state_person_external_id() external_id.state_person_external_id_id = 'external_id1' external_id.id_type = 'contrived' alias = person.create_state_alias() alias.state_alias_id = 'alias1' alias.surname = 'testerson' assessment = person.create_state_assessment() assessment.state_assessment_id = 'assessment1' assessment.assessment_score = '42' supervising_officer = person.create_state_agent() supervising_officer.state_agent_id = 'supervising_officer1' supervising_officer.full_name = 'Officer Supervising' assessment_agent = assessment.create_state_agent() assessment_agent.state_agent_id = 'agent1' assessment_agent.full_name = 'Officer Jones' program_assignment = person.create_state_program_assignment() program_assignment.state_program_assignment_id = 'assignment1' program_assignment.program_id = 'program_id1' program_assignment_agent = program_assignment.create_state_agent() program_assignment_agent.state_agent_id = 'program_agent1' program_assignment_agent.full_name = 'Officer Program' group = person.create_state_sentence_group() group.state_sentence_group_id = 'group1' fine = group.create_state_fine() fine.state_fine_id = 'fine1' incarceration_sentence = group.create_state_incarceration_sentence() incarceration_sentence.state_incarceration_sentence_id = 'is1' early_discharge1 = incarceration_sentence.create_state_early_discharge( ) early_discharge1.state_early_discharge_id = 'early_discharge1' charge1 = incarceration_sentence.create_state_charge() charge1.state_charge_id = 'charge1' charge1.classification_type = 'F' incarceration_period = incarceration_sentence. \ create_state_incarceration_period() incarceration_period.state_incarceration_period_id = 'ip1' incarceration_period.status = 'IN_CUSTODY' incarceration_period.specialized_purpose_for_incarceration = \ 'SHOCK INCARCERATION' incarceration_period.state_program_assignments = [program_assignment] incident = incarceration_period.create_state_incarceration_incident() incident.state_incarceration_incident_id = 'incident1' incident.incident_type = 'FISTICUFFS' incident_outcome = \ incident.create_state_incarceration_incident_outcome() incident_outcome.state_incarceration_incident_outcome_id = 'incident1-1' incident_outcome.outcome_type = 'FINE' incident_agent = incident.create_state_agent() incident_agent.state_agent_id = 'agent2' incident_agent.full_name = 'Officer Thompson' decision = incarceration_period.create_state_parole_decision() decision.state_parole_decision_id = 'decision1' decision_agent = decision.create_state_agent() decision_agent.state_agent_id = 'agent3' decision_agent.full_name = 'Officer Barkley' supervision_sentence = group.create_state_supervision_sentence() supervision_sentence.state_supervision_sentence_id = 'ss1' early_discharge2 = supervision_sentence.create_state_early_discharge() early_discharge2.state_early_discharge_id = 'early_discharge2' charge2 = supervision_sentence.create_state_charge() charge2.state_charge_id = 'charge2' charge2.classification_type = 'M' supervision_period = supervision_sentence. \ create_state_supervision_period() supervision_period.state_supervision_period_id = 'sp1' supervision_period.status = 'TERMINATED' supervision_period_agent = supervision_period.create_state_agent() supervision_period_agent.state_agent_id = 'agentPO' supervision_period_agent.full_name = 'Officer Paroley' supervision_period.state_program_assignments = [program_assignment] supervision_case_type_entry = supervision_period.create_state_supervision_case_type_entry( ) supervision_case_type_entry.case_type = 'case_type' supervision_case_type_entry.state_supervision_case_type_entry_id = 'case_type_entry_id' supervision_contact = supervision_period.create_state_supervision_contact( ) supervision_contact.state_supervision_contact_id = 'supervision_contact_id' supervision_contact.contact_type = 'contact_type' supervision_contacted_agent = supervision_contact.create_state_agent() supervision_contacted_agent.state_agent_id = 'agentPO' supervision_contacted_agent.full_name = 'Officer Paroley' violation = supervision_period.create_state_supervision_violation() violation.state_supervision_violation_id = 'violation1' violation.violated_conditions = 'cond' violation.is_violent = 'false' violation_type = violation.\ create_state_supervision_violation_type_entry() violation_type.state_supervision_violation_type_entry_id =\ 'violation_type_id' violation_type.violation_type = 'FELONY' violated_condition = \ violation.create_state_supervision_violated_condition_entry() violated_condition.state_supervision_violated_condition_entry_id =\ 'condition_id' violated_condition.condition = 'CURFEW' response = violation.create_state_supervision_violation_response() response.state_supervision_violation_response_id = 'response1' response_decision_agent = response.create_state_agent() response_decision_agent.state_agent_id = 'agentTERM' response_decision_agent.full_name = 'Officer Termy' response_decision = response.\ create_state_supervision_violation_response_decision_entry() response_decision.\ state_supervision_violation_response_decision_entry_id =\ 'response_decision_id' response_decision.decision = 'REVOCATION' response_decision.revocation_type = 'REINCARCERATION' bond = charge1.create_state_bond() bond.state_bond_id = 'bond1' court_case = charge2.create_state_court_case() court_case.state_court_case_id = 'case1' court_case_agent = court_case.create_state_agent() court_case_agent.state_agent_id = 'agentJ' court_case_agent.full_name = 'Judge Agent' # Arrange Proto ingest info expected_proto = ingest_info_pb2.IngestInfo() person_pb = expected_proto.state_people.add() person_pb.state_person_id = 'person1' person_pb.surname = 'testname' person_pb.state_person_race_ids.append('race1') race_pb = expected_proto.state_person_races.add() race_pb.state_person_race_id = 'race1' race_pb.race = 'white' person_pb.state_person_ethnicity_ids.append('ethnicity1') ethnicity_pb = expected_proto.state_person_ethnicities.add() ethnicity_pb.state_person_ethnicity_id = 'ethnicity1' ethnicity_pb.ethnicity = 'non-hispanic' person_pb.state_person_external_ids_ids.append( 'contrived:external_id1') external_id_pb = expected_proto.state_person_external_ids.add() external_id_pb.state_person_external_id_id = 'contrived:external_id1' external_id_pb.id_type = 'contrived' person_pb.state_alias_ids.append('alias1') alias_pb = expected_proto.state_aliases.add() alias_pb.state_alias_id = 'alias1' alias_pb.surname = 'testerson' person_pb.state_assessment_ids.append('assessment1') assessment_pb = expected_proto.state_assessments.add() assessment_pb.state_assessment_id = 'assessment1' assessment_pb.assessment_score = '42' person_pb.supervising_officer_id = 'supervising_officer1' supervising_officer_pb = expected_proto.state_agents.add() supervising_officer_pb.state_agent_id = 'supervising_officer1' supervising_officer_pb.full_name = 'Officer Supervising' assessment_pb.conducting_agent_id = 'agent1' assessment_agent_pb = expected_proto.state_agents.add() assessment_agent_pb.state_agent_id = 'agent1' assessment_agent_pb.full_name = 'Officer Jones' person_pb.state_program_assignment_ids.append('assignment1') program_assignment_pb = expected_proto.state_program_assignments.add() program_assignment_pb.state_program_assignment_id = 'assignment1' program_assignment_pb.program_id = 'program_id1' program_assignment_pb.referring_agent_id = 'program_agent1' program_assignment_agent_pb = expected_proto.state_agents.add() program_assignment_agent_pb.state_agent_id = 'program_agent1' program_assignment_agent_pb.full_name = 'Officer Program' person_pb.state_sentence_group_ids.append('group1') group_pb = expected_proto.state_sentence_groups.add() group_pb.state_sentence_group_id = 'group1' group_pb.state_fine_ids.append('fine1') fine_pb = expected_proto.state_fines.add() fine_pb.state_fine_id = 'fine1' group_pb.state_supervision_sentence_ids.append('ss1') supervision_sentence_pb = \ expected_proto.state_supervision_sentences.add() supervision_sentence_pb.state_supervision_sentence_id = 'ss1' supervision_sentence_pb.state_early_discharge_ids.append( 'early_discharge2') early_discharge2_pb = expected_proto.state_early_discharges.add() early_discharge2_pb.state_early_discharge_id = 'early_discharge2' supervision_sentence_pb.state_charge_ids.append('charge2') charge2_pb = expected_proto.state_charges.add() charge2_pb.state_charge_id = 'charge2' charge2_pb.classification_type = 'M' supervision_sentence_pb.state_supervision_period_ids.append('sp1') supervision_period_pb = expected_proto.state_supervision_periods.add() supervision_period_pb.state_supervision_period_id = 'sp1' supervision_period_pb.status = 'TERMINATED' supervision_period_pb.state_program_assignment_ids.append( 'assignment1') # An ordering requirement in the proto equality check at the end of this # test requires that this agent be added after agent1 and before agentPO court_case_agent_pb = expected_proto.state_agents.add() court_case_agent_pb.state_agent_id = 'agentJ' court_case_agent_pb.full_name = 'Judge Agent' supervision_period_pb.supervising_officer_id = 'agentPO' supervision_period_agent_pb = expected_proto.state_agents.add() supervision_period_agent_pb.state_agent_id = 'agentPO' supervision_period_agent_pb.full_name = 'Officer Paroley' supervision_case_type_entry_pb = expected_proto.state_supervision_case_type_entries.add( ) supervision_case_type_entry_pb.state_supervision_case_type_entry_id = 'case_type_entry_id' supervision_case_type_entry_pb.case_type = 'case_type' supervision_period_pb.state_supervision_case_type_entry_ids.append( 'case_type_entry_id') supervision_contact_pb = expected_proto.state_supervision_contacts.add( ) supervision_contact_pb.state_supervision_contact_id = 'supervision_contact_id' supervision_contact_pb.contact_type = 'contact_type' supervision_contact_pb.contacted_agent_id = 'agentPO' supervision_period_pb.state_supervision_contact_ids.append( 'supervision_contact_id') supervision_period_pb.state_supervision_violation_entry_ids.append( 'violation1') violation_pb = expected_proto.state_supervision_violations.add() violation_pb.state_supervision_violation_id = 'violation1' violation_pb.is_violent = 'false' violation_pb.violated_conditions = 'cond' violation_pb.state_supervision_violation_type_entry_ids.append( 'violation_type_id') violation_type_pb = \ expected_proto.state_supervision_violation_type_entries.add() violation_type_pb.state_supervision_violation_type_entry_id = \ 'violation_type_id' violation_type_pb.violation_type = 'FELONY' violation_pb.state_supervision_violated_condition_entry_ids.append( 'condition_id') violation_type_pb = \ expected_proto.state_supervision_violated_condition_entries.add() violation_type_pb.state_supervision_violated_condition_entry_id = \ 'condition_id' violation_type_pb.condition = 'CURFEW' violation_pb.state_supervision_violation_response_ids.append( 'response1') response_pb = expected_proto.state_supervision_violation_responses.add( ) response_pb.state_supervision_violation_response_id = 'response1' response_pb.decision_agent_ids.append('agentTERM') response_decision_agent_pb = expected_proto.state_agents.add() response_decision_agent_pb.state_agent_id = 'agentTERM' response_decision_agent_pb.full_name = 'Officer Termy' response_decision_pb = \ expected_proto.\ state_supervision_violation_response_decision_entries.add() response_decision_pb.\ state_supervision_violation_response_decision_entry_id = \ 'response_decision_id' response_decision_pb.decision = 'REVOCATION' response_decision_pb.revocation_type = 'REINCARCERATION' response_pb.\ state_supervision_violation_response_decision_entry_ids.append( 'response_decision_id' ) group_pb.state_incarceration_sentence_ids.append('is1') incarceration_sentence_pb = \ expected_proto.state_incarceration_sentences.add() incarceration_sentence_pb.state_incarceration_sentence_id = 'is1' incarceration_sentence_pb.state_early_discharge_ids.append( 'early_discharge1') early_discharge1_pb = expected_proto.state_early_discharges.add() early_discharge1_pb.state_early_discharge_id = 'early_discharge1' incarceration_sentence_pb.state_charge_ids.append('charge1') charge1_pb = expected_proto.state_charges.add() charge1_pb.state_charge_id = 'charge1' charge1_pb.classification_type = 'F' incarceration_sentence_pb.state_incarceration_period_ids.append('ip1') incarceration_period_pb = \ expected_proto.state_incarceration_periods.add() incarceration_period_pb.state_incarceration_period_id = 'ip1' incarceration_period_pb.status = 'IN_CUSTODY' incarceration_period_pb.specialized_purpose_for_incarceration = \ 'SHOCK INCARCERATION' incarceration_period_pb.state_incarceration_incident_ids \ .append('incident1') incident_pb = expected_proto.state_incarceration_incidents.add() incident_pb.state_incarceration_incident_id = 'incident1' incident_pb.incident_type = 'FISTICUFFS' incarceration_period_pb.state_program_assignment_ids.append( 'assignment1') incident_pb.responding_officer_id = 'agent2' incident_agent_pb = expected_proto.state_agents.add() incident_agent_pb.state_agent_id = 'agent2' incident_agent_pb.full_name = 'Officer Thompson' incident_pb.state_incarceration_incident_outcome_ids.append( 'incident1-1') incident_outcome_pb = \ expected_proto.state_incarceration_incident_outcomes.add() incident_outcome_pb.state_incarceration_incident_outcome_id = \ 'incident1-1' incident_outcome_pb.outcome_type = 'FINE' incarceration_period_pb.state_parole_decision_ids.append('decision1') decision_pb = expected_proto.state_parole_decisions.add() decision_pb.state_parole_decision_id = 'decision1' decision_pb.decision_agent_ids.append('agent3') decision_agent_pb = expected_proto.state_agents.add() decision_agent_pb.state_agent_id = 'agent3' decision_agent_pb.full_name = 'Officer Barkley' charge1_pb.state_bond_id = 'bond1' bond_pb = expected_proto.state_bonds.add() bond_pb.state_bond_id = 'bond1' charge2_pb.state_court_case_id = 'case1' court_case_pb = expected_proto.state_court_cases.add() court_case_pb.state_court_case_id = 'case1' court_case_pb.judge_id = 'agentJ' expected_info = copy.deepcopy(info) # Act & Assert proto = ingest_utils.convert_ingest_info_to_proto(info) assert expected_proto == proto info_back = ingest_utils.convert_proto_to_ingest_info(proto) assert info_back == expected_info # Assert that none of the proto's collections are empty, i.e. we've # tested all of the object graph proto_classes = [field.name for field in proto.DESCRIPTOR.fields] for cls in proto_classes: if cls.startswith('state_'): assert proto.__getattribute__(cls)
def _generic_scrape(self, request: QueueRequest): """ General handler for all scrape tasks. This function is a generic entry point into all types of scrapes. It decides what to call based on params. Args: params: dict of parameters passed from the last scrape session. """ try: task = request.next_task # Here we handle a special case where we weren't really sure # we were going to get data when we submitted a task, but then # we ended up with data, so no more requests are required, # just the content we already have. # TODO(#680): remove this if task.content is not None: content = self._parse_html_content(task.content) cookies = None else: post_data = task.post_data # Let the child transform the post_data if it wants before # sending the requests. This hook is in here in case the # child did something like compress the post_data before # it put it on the queue. self.transform_post_data(post_data) # We always fetch some content before doing anything. # Note that we use get here for the post_data to return a # default value of None if this scraper doesn't set it. try: content, cookies = self._fetch_content( task.endpoint, task.response_type, headers=task.headers, cookies=task.cookies, params=task.params, post_data=post_data, json_data=task.json) except Exception as e: raise ScraperFetchError(str(e)) from e scraped_data = None if self.should_scrape_data(task.task_type): # If we want to scrape data, we should either create an # ingest_info object or get the one that already exists. logging.info("Scraping data for [%s] and endpoint: [%s]", self.region.region_code, task.endpoint) try: scraped_data = self.populate_data( content, task, request.ingest_info or IngestInfo()) except Exception as e: raise ScraperPopulateDataError(str(e)) from e if self.should_get_more_tasks(task.task_type): logging.info("Getting more tasks for [%s] and endpoint: [%s]", self.region.region_code, task.endpoint) # Only send along ingest info if it will not be persisted now. ingest_info_to_send = None if scraped_data is not None and not scraped_data.persist: ingest_info_to_send = scraped_data.ingest_info try: # pylint: disable=assignment-from-no-return next_tasks = self.get_more_tasks(content, task) except Exception as e: raise ScraperGetMoreTasksError(str(e)) from e for next_task in next_tasks: # Include cookies received from response, if any if cookies: cookies.update(next_task.cookies) next_task = Task.evolve(next_task, cookies=cookies) self.add_task( '_generic_scrape', QueueRequest( scrape_type=request.scrape_type, scraper_start_time=request.scraper_start_time, next_task=next_task, ingest_info=ingest_info_to_send, )) if scraped_data is not None and scraped_data.persist: if scraped_data.ingest_info: logging.info("Logging at most 4 people (were %d):", len(scraped_data.ingest_info.people)) loop_count = min(len(scraped_data.ingest_info.people), constants.MAX_PEOPLE_TO_LOG) for i in range(loop_count): logging.info("[%s]", str(scraped_data.ingest_info.people[i])) logging.info("Last seen time of person being set as: [%s]", request.scraper_start_time) metadata = IngestMetadata(self.region.region_code, self.region.jurisdiction_id, request.scraper_start_time, self.get_enum_overrides()) if self.BATCH_WRITES: logging.info( "Queuing ingest_info ([%d] people) to " "batch_persistence for [%s]", len(scraped_data.ingest_info.people), self.region.region_code) scrape_key = ScrapeKey(self.region.region_code, request.scrape_type) batch_persistence.write( ingest_info=scraped_data.ingest_info, scrape_key=scrape_key, task=task, ) else: logging.info( "Writing ingest_info ([%d] people) to the database" " for [%s]", len(scraped_data.ingest_info.people), self.region.region_code) persistence.write( ingest_utils.convert_ingest_info_to_proto( scraped_data.ingest_info), metadata) for sc in scraped_data.single_counts: if not sc.date: scrape_key = ScrapeKey(self.region.region_code, constants.ScrapeType.BACKGROUND) session = sessions.get_current_session(scrape_key) if session: sc = attr.evolve(sc, date=session.start.date()) single_count.store_single_count( sc, self.region.jurisdiction_id) except Exception as e: if self.BATCH_WRITES: scrape_key = ScrapeKey(self.region.region_code, request.scrape_type) batch_persistence.write_error( error=str(e), trace_id=get_trace_id_from_flask(), task=task, scrape_key=scrape_key, ) raise e