def run_test_pipeline( self, dataset: str, fake_supervision_period_id: int, unifying_id_field_filter_set: Optional[Set[int]] = None, metric_types_filter: Optional[Set[str]] = None): """Runs a test version of the program pipeline.""" test_pipeline = TestPipeline() # Get StatePersons persons = ( test_pipeline | 'Load Persons' >> # type: ignore extractor_utils.BuildRootEntity( dataset=dataset, root_entity_class=entities.StatePerson, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=True)) # Get StateProgramAssignments program_assignments = ( test_pipeline | 'Load Program Assignments' >> # type: ignore extractor_utils.BuildRootEntity( dataset=dataset, root_entity_class=entities.StateProgramAssignment, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=True, unifying_id_field_filter_set=unifying_id_field_filter_set)) # Get StateAssessments assessments = ( test_pipeline | 'Load Assessments' >> # type: ignore extractor_utils.BuildRootEntity( dataset=dataset, root_entity_class=entities.StateAssessment, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=False, unifying_id_field_filter_set=unifying_id_field_filter_set)) # Get StateSupervisionPeriods supervision_periods = ( test_pipeline | 'Load SupervisionPeriods' >> # type: ignore extractor_utils.BuildRootEntity( dataset=dataset, root_entity_class=entities.StateSupervisionPeriod, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=False, unifying_id_field_filter_set=unifying_id_field_filter_set)) supervision_period_to_agent_map = { 'agent_id': 1010, 'agent_external_id': 'OFFICER0009', 'district_external_id': '10', 'supervision_period_id': fake_supervision_period_id } supervision_period_to_agent_associations = ( test_pipeline | 'Create SupervisionPeriod to Agent table' >> beam.Create( [supervision_period_to_agent_map])) supervision_period_to_agent_associations_as_kv = ( supervision_period_to_agent_associations | 'Convert SupervisionPeriod to Agent table to KV tuples' >> beam.ParDo(pipeline.ConvertDictToKVTuple(), 'supervision_period_id')) # Group each StatePerson with their other entities persons_entities = ({ 'person': persons, 'program_assignments': program_assignments, 'assessments': assessments, 'supervision_periods': supervision_periods } | 'Group StatePerson to StateProgramAssignments and' >> beam.CoGroupByKey()) # Identify ProgramEvents from the StatePerson's # StateProgramAssignments person_program_events = ( persons_entities | beam.ParDo( pipeline.ClassifyProgramAssignments(), AsDict(supervision_period_to_agent_associations_as_kv))) # Get pipeline job details for accessing job_id all_pipeline_options = PipelineOptions().get_all_options() # Add timestamp for local jobs job_timestamp = datetime.datetime.now().strftime( '%Y-%m-%d_%H_%M_%S.%f') all_pipeline_options['job_timestamp'] = job_timestamp metric_types = metric_types_filter if metric_types_filter else {'ALL'} # Get program metrics program_metrics = ( person_program_events | 'Get Program Metrics' >> # type: ignore pipeline.GetProgramMetrics(pipeline_options=all_pipeline_options, metric_types=metric_types, calculation_end_month=None, calculation_month_count=-1)) assert_that(program_metrics, AssertMatchers.validate_pipeline_test()) test_pipeline.run()
def testIncarcerationPipeline(self): fake_person_id = 12345 fake_person = schema.StatePerson( person_id=fake_person_id, gender=Gender.MALE, birthdate=date(1970, 1, 1), residency_status=ResidencyStatus.PERMANENT) persons_data = [normalized_database_base_dict(fake_person)] race_1 = schema.StatePersonRace(person_race_id=111, state_code='CA', race=Race.BLACK, person_id=fake_person_id) race_2 = schema.StatePersonRace(person_race_id=111, state_code='ND', race=Race.WHITE, person_id=fake_person_id) races_data = normalized_database_base_dict_list([race_1, race_2]) ethnicity = schema.StatePersonEthnicity(person_ethnicity_id=111, state_code='CA', ethnicity=Ethnicity.HISPANIC, person_id=fake_person_id) ethnicity_data = normalized_database_base_dict_list([ethnicity]) sentence_group = schema.StateSentenceGroup(sentence_group_id=111, person_id=fake_person_id) initial_incarceration = schema.StateIncarcerationPeriod( incarceration_period_id=1111, status=StateIncarcerationPeriodStatus.NOT_IN_CUSTODY, state_code='CA', county_code='124', facility='San Quentin', facility_security_level=StateIncarcerationFacilitySecurityLevel. MAXIMUM, admission_reason=StateIncarcerationPeriodAdmissionReason. NEW_ADMISSION, projected_release_reason=StateIncarcerationPeriodReleaseReason. CONDITIONAL_RELEASE, admission_date=date(2008, 11, 20), release_date=date(2010, 12, 4), release_reason=StateIncarcerationPeriodReleaseReason. SENTENCE_SERVED, person_id=fake_person_id, ) first_reincarceration = schema.StateIncarcerationPeriod( incarceration_period_id=2222, status=StateIncarcerationPeriodStatus.NOT_IN_CUSTODY, state_code='CA', county_code='124', facility='San Quentin', facility_security_level=StateIncarcerationFacilitySecurityLevel. MAXIMUM, admission_reason=StateIncarcerationPeriodAdmissionReason. NEW_ADMISSION, projected_release_reason=StateIncarcerationPeriodReleaseReason. CONDITIONAL_RELEASE, admission_date=date(2011, 4, 5), release_date=date(2014, 4, 14), release_reason=StateIncarcerationPeriodReleaseReason. SENTENCE_SERVED, person_id=fake_person_id) subsequent_reincarceration = schema.StateIncarcerationPeriod( incarceration_period_id=3333, status=StateIncarcerationPeriodStatus.IN_CUSTODY, state_code='CA', county_code='124', facility='San Quentin', facility_security_level=StateIncarcerationFacilitySecurityLevel. MAXIMUM, admission_reason=StateIncarcerationPeriodAdmissionReason. NEW_ADMISSION, projected_release_reason=StateIncarcerationPeriodReleaseReason. CONDITIONAL_RELEASE, admission_date=date(2017, 1, 4), person_id=fake_person_id) incarceration_sentence = schema.StateIncarcerationSentence( incarceration_sentence_id=1111, sentence_group_id=sentence_group.sentence_group_id, incarceration_periods=[ initial_incarceration, first_reincarceration, subsequent_reincarceration ], person_id=fake_person_id) supervision_sentence = schema.StateSupervisionSentence( supervision_sentence_id=123, person_id=fake_person_id) sentence_group.incarceration_sentences = [incarceration_sentence] sentence_group_data = [normalized_database_base_dict(sentence_group)] incarceration_sentence_data = [ normalized_database_base_dict(incarceration_sentence) ] supervision_sentence_data = [ normalized_database_base_dict(supervision_sentence) ] incarceration_periods_data = [ normalized_database_base_dict(initial_incarceration), normalized_database_base_dict(first_reincarceration), normalized_database_base_dict(subsequent_reincarceration) ] state_incarceration_sentence_incarceration_period_association = [ { 'incarceration_period_id': initial_incarceration.incarceration_period_id, 'incarceration_sentence_id': incarceration_sentence.incarceration_sentence_id, }, { 'incarceration_period_id': first_reincarceration.incarceration_period_id, 'incarceration_sentence_id': incarceration_sentence.incarceration_sentence_id, }, { 'incarceration_period_id': subsequent_reincarceration.incarceration_period_id, 'incarceration_sentence_id': incarceration_sentence.incarceration_sentence_id, }, ] data_dict = { schema.StatePerson.__tablename__: persons_data, schema.StatePersonRace.__tablename__: races_data, schema.StatePersonEthnicity.__tablename__: ethnicity_data, schema.StateSentenceGroup.__tablename__: sentence_group_data, schema.StateIncarcerationSentence.__tablename__: incarceration_sentence_data, schema.StateSupervisionSentence.__tablename__: supervision_sentence_data, schema.StateIncarcerationPeriod.__tablename__: incarceration_periods_data, schema.state_incarceration_sentence_incarceration_period_association_table.name: state_incarceration_sentence_incarceration_period_association, schema.state_supervision_sentence_incarceration_period_association_table.name: [{}] } test_pipeline = TestPipeline() # Get StatePersons persons = (test_pipeline | 'Load Persons' >> extractor_utils.BuildRootEntity( dataset=None, data_dict=data_dict, root_schema_class=schema.StatePerson, root_entity_class=entities.StatePerson, unifying_id_field='person_id', build_related_entities=True)) # Get StateSentenceGroups sentence_groups = ( test_pipeline | 'Load StateSentencegroups' >> extractor_utils.BuildRootEntity( dataset=None, data_dict=data_dict, root_schema_class=schema.StateSentenceGroup, root_entity_class=entities.StateSentenceGroup, unifying_id_field='person_id', build_related_entities=True)) # Get StateIncarcerationSentences incarceration_sentences = ( test_pipeline | 'Load StateIncarcerationSentences' >> extractor_utils.BuildRootEntity( dataset=None, data_dict=data_dict, root_schema_class=schema.StateIncarcerationSentence, root_entity_class=entities.StateIncarcerationSentence, unifying_id_field='person_id', build_related_entities=True)) # Get StateSupervisionSentences supervision_sentences = ( test_pipeline | 'Load StateSupervisionSentences' >> extractor_utils.BuildRootEntity( dataset=None, data_dict=data_dict, root_schema_class=schema.StateSupervisionSentence, root_entity_class=entities.StateSupervisionSentence, unifying_id_field='person_id', build_related_entities=True)) sentences_and_sentence_groups = ( { 'sentence_groups': sentence_groups, 'incarceration_sentences': incarceration_sentences, 'supervision_sentences': supervision_sentences } | 'Group sentences to sentence groups' >> beam.CoGroupByKey()) sentence_groups_with_hydrated_sentences = ( sentences_and_sentence_groups | 'Set hydrated sentences on sentence groups' >> beam.ParDo( SetSentencesOnSentenceGroup())) # Group each StatePerson with their related entities person_and_sentence_groups = ( { 'person': persons, 'sentence_groups': sentence_groups_with_hydrated_sentences } | 'Group StatePerson to SentenceGroups' >> beam.CoGroupByKey()) # Identify IncarcerationEvents events from the StatePerson's # StateIncarcerationPeriods fake_person_id_to_county_query_result = [{ 'person_id': fake_person_id, 'county_of_residence': _COUNTY_OF_RESIDENCE }] person_id_to_county_kv = ( test_pipeline | "Read person id to county associations from BigQuery" >> beam.Create(fake_person_id_to_county_query_result) | "Convert to KV" >> beam.ParDo(ConvertDictToKVTuple(), 'person_id')) person_events = (person_and_sentence_groups | 'Classify Incarceration Events' >> beam.ParDo( pipeline.ClassifyIncarcerationEvents(), AsDict(person_id_to_county_kv))) # Get pipeline job details for accessing job_id all_pipeline_options = PipelineOptions().get_all_options() # Add timestamp for local jobs job_timestamp = datetime.datetime.now().strftime( '%Y-%m-%d_%H_%M_%S.%f') all_pipeline_options['job_timestamp'] = job_timestamp # Get IncarcerationMetrics incarceration_metrics = ( person_events | 'Get Incarceration Metrics' >> pipeline.GetIncarcerationMetrics( pipeline_options=all_pipeline_options, inclusions=ALL_INCLUSIONS_DICT, calculation_month_limit=-1)) assert_that(incarceration_metrics, AssertMatchers.validate_metric_type()) test_pipeline.run()
def run_test_pipeline( fake_person_id: int, state_code: str, dataset: str, expected_metric_types: Set[IncarcerationMetricType], allow_empty: bool = False, unifying_id_field_filter_set: Optional[Set[int]] = None, metric_types_filter: Optional[Set[str]] = None): """Runs a test version of the incarceration pipeline.""" test_pipeline = TestPipeline() # Get StatePersons persons = ( test_pipeline | 'Load Persons' >> # type: ignore extractor_utils.BuildRootEntity( dataset=dataset, root_entity_class=entities.StatePerson, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=True)) # Get StateSentenceGroups sentence_groups = ( test_pipeline | 'Load StateSentenceGroups' >> # type: ignore extractor_utils.BuildRootEntity( dataset=dataset, root_entity_class=entities.StateSentenceGroup, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=True, unifying_id_field_filter_set=unifying_id_field_filter_set)) # Get StateIncarcerationSentences incarceration_sentences = ( test_pipeline | 'Load StateIncarcerationSentences' >> # type: ignore extractor_utils.BuildRootEntity( dataset=dataset, root_entity_class=entities.StateIncarcerationSentence, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=True, unifying_id_field_filter_set=unifying_id_field_filter_set)) # Get StateSupervisionSentences supervision_sentences = ( test_pipeline | 'Load StateSupervisionSentences' >> # type: ignore extractor_utils.BuildRootEntity( dataset=dataset, root_entity_class=entities.StateSupervisionSentence, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=True, unifying_id_field_filter_set=unifying_id_field_filter_set)) us_mo_sentence_status_rows: List[Dict[str, Any]] = [{ 'person_id': fake_person_id, 'sentence_external_id': 'XXX', 'sentence_status_external_id': 'YYY', 'status_code': 'ZZZ', 'status_date': 'not_a_date', 'status_description': 'XYZ' }] us_mo_sentence_statuses = (test_pipeline | 'Create MO sentence statuses' >> beam.Create(us_mo_sentence_status_rows)) us_mo_sentence_status_rankings_as_kv = ( us_mo_sentence_statuses | 'Convert sentence status ranking table to KV tuples' >> beam.ParDo(ConvertDictToKVTuple(), 'person_id')) sentences_and_statuses = ( { 'incarceration_sentences': incarceration_sentences, 'supervision_sentences': supervision_sentences, 'sentence_statuses': us_mo_sentence_status_rankings_as_kv } | 'Group sentences to the sentence statuses for that person' >> beam.CoGroupByKey()) sentences_converted = ( sentences_and_statuses | 'Convert to state-specific sentences' >> beam.ParDo( ConvertSentencesToStateSpecificType()).with_outputs( 'incarceration_sentences', 'supervision_sentences')) sentences_and_sentence_groups = ( { 'sentence_groups': sentence_groups, 'incarceration_sentences': sentences_converted.incarceration_sentences, 'supervision_sentences': sentences_converted.supervision_sentences } | 'Group sentences to sentence groups' >> beam.CoGroupByKey()) sentence_groups_with_hydrated_sentences = ( sentences_and_sentence_groups | 'Set hydrated sentences on sentence groups' >> beam.ParDo( SetSentencesOnSentenceGroup())) # Identify IncarcerationEvents events from the StatePerson's # StateIncarcerationPeriods fake_person_id_to_county_query_result = [{ 'person_id': fake_person_id, 'county_of_residence': _COUNTY_OF_RESIDENCE }] person_id_to_county_kv = ( test_pipeline | "Read person id to county associations from BigQuery" >> beam.Create(fake_person_id_to_county_query_result) | "Convert person_id to counties to KV" >> beam.ParDo( ConvertDictToKVTuple(), 'person_id')) incarceration_period_judicial_district_association_row = \ {'person_id': fake_person_id, 'incarceration_period_id': 123, 'judicial_district_code': 'NW'} ip_to_judicial_district_kv = ( test_pipeline | "Read incarceration_period to judicial_district associations from BigQuery" >> beam.Create( [incarceration_period_judicial_district_association_row]) | "Convert ips to judicial districts to KV" >> beam.ParDo( ConvertDictToKVTuple(), 'person_id')) state_race_ethnicity_population_count = { 'state_code': state_code, 'race_or_ethnicity': 'BLACK', 'population_count': 1, 'representation_priority': 1 } state_race_ethnicity_population_counts = ( test_pipeline | 'Create state_race_ethnicity_population_count table' >> beam.Create([state_race_ethnicity_population_count])) # Group each StatePerson with their related entities person_entities = ( { 'person': persons, 'sentence_groups': sentence_groups_with_hydrated_sentences, 'incarceration_period_judicial_district_association': ip_to_judicial_district_kv } | 'Group StatePerson to SentenceGroups' >> beam.CoGroupByKey()) # Identify IncarcerationEvents events from the StatePerson's StateIncarcerationPeriods person_incarceration_events = ( person_entities | 'Classify Incarceration Events' >> beam.ParDo( pipeline.ClassifyIncarcerationEvents(), AsDict(person_id_to_county_kv))) person_metadata = ( persons | "Build the person_metadata dictionary" >> beam.ParDo( BuildPersonMetadata(), AsList(state_race_ethnicity_population_counts))) person_incarceration_events_with_metadata = ( { 'person_events': person_incarceration_events, 'person_metadata': person_metadata } | 'Group IncarcerationEvents with person-level metadata' >> beam.CoGroupByKey() | 'Organize StatePerson, PersonMetadata and IncarcerationEvents for calculations' >> beam.ParDo(ExtractPersonEventsMetadata())) # Get pipeline job details for accessing job_id all_pipeline_options = PipelineOptions().get_all_options() # Add timestamp for local jobs job_timestamp = datetime.datetime.now().strftime( '%Y-%m-%d_%H_%M_%S.%f') all_pipeline_options['job_timestamp'] = job_timestamp metric_types = metric_types_filter if metric_types_filter else {'ALL'} # Get IncarcerationMetrics incarceration_metrics = ( person_incarceration_events_with_metadata | 'Get Incarceration Metrics' >> # type: ignore pipeline.GetIncarcerationMetrics( pipeline_options=all_pipeline_options, metric_types=metric_types, calculation_end_month=None, calculation_month_count=-1)) assert_that( incarceration_metrics, AssertMatchers.validate_metric_type(allow_empty=allow_empty), 'Assert that all metrics are of the expected type.') assert_that( incarceration_metrics, AssertMatchers.validate_pipeline_test(expected_metric_types), 'Assert the type of metrics produced are expected') test_pipeline.run()
def testProgramPipeline(self): """Tests the program pipeline.""" fake_person_id = 12345 fake_person = schema.StatePerson( person_id=fake_person_id, gender=Gender.MALE, birthdate=date(1970, 1, 1), residency_status=ResidencyStatus.PERMANENT) persons_data = [normalized_database_base_dict(fake_person)] race_1 = schema.StatePersonRace(person_race_id=111, state_code='CA', race=Race.BLACK, person_id=fake_person_id) race_2 = schema.StatePersonRace(person_race_id=111, state_code='ND', race=Race.WHITE, person_id=fake_person_id) races_data = normalized_database_base_dict_list([race_1, race_2]) ethnicity = schema.StatePersonEthnicity(person_ethnicity_id=111, state_code='CA', ethnicity=Ethnicity.HISPANIC, person_id=fake_person_id) ethnicity_data = normalized_database_base_dict_list([ethnicity]) program_assignment = schema.StateProgramAssignment( program_assignment_id=123, referral_date=date(2015, 5, 10), person_id=fake_person_id) assessment = schema.StateAssessment(assessment_id=298374, assessment_date=date(2015, 3, 19), assessment_type='LSIR', person_id=fake_person_id) supervision_period = schema.StateSupervisionPeriod( supervision_period_id=1111, state_code='CA', county_code='124', start_date=date(2015, 3, 14), termination_date=date(2016, 12, 29), supervision_type=StateSupervisionType.PROBATION, person_id=fake_person_id) program_assignment_data = [ normalized_database_base_dict(program_assignment) ] assessment_data = [normalized_database_base_dict(assessment)] supervision_periods_data = [ normalized_database_base_dict(supervision_period) ] supervision_violation_response = \ database_test_utils.generate_test_supervision_violation_response( fake_person_id) supervision_violation_response_data = [ normalized_database_base_dict(supervision_violation_response) ] data_dict = { schema.StatePerson.__tablename__: persons_data, schema.StatePersonRace.__tablename__: races_data, schema.StatePersonEthnicity.__tablename__: ethnicity_data, schema.StateSupervisionViolationResponse.__tablename__: supervision_violation_response_data, schema.StateSupervisionPeriod.__tablename__: supervision_periods_data, schema.StateProgramAssignment.__tablename__: program_assignment_data, schema.StateAssessment.__tablename__: assessment_data } test_pipeline = TestPipeline() # Get StatePersons persons = (test_pipeline | 'Load Persons' >> extractor_utils.BuildRootEntity( dataset=None, data_dict=data_dict, root_schema_class=schema.StatePerson, root_entity_class=entities.StatePerson, unifying_id_field='person_id', build_related_entities=True)) # Get StateProgramAssignments program_assignments = ( test_pipeline | 'Load Program Assignments' >> extractor_utils.BuildRootEntity( dataset=None, data_dict=data_dict, root_schema_class=schema.StateProgramAssignment, root_entity_class=entities.StateProgramAssignment, unifying_id_field='person_id', build_related_entities=True)) # Get StateAssessments assessments = (test_pipeline | 'Load Assessments' >> extractor_utils.BuildRootEntity( dataset=None, data_dict=data_dict, root_schema_class=schema.StateAssessment, root_entity_class=entities.StateAssessment, unifying_id_field='person_id', build_related_entities=False)) # Get StateSupervisionPeriods supervision_periods = ( test_pipeline | 'Load SupervisionPeriods' >> extractor_utils.BuildRootEntity( dataset=None, data_dict=data_dict, root_schema_class=schema.StateSupervisionPeriod, root_entity_class=entities.StateSupervisionPeriod, unifying_id_field='person_id', build_related_entities=False)) supervision_period_to_agent_map = { 'agent_id': 1010, 'agent_external_id': 'OFFICER0009', 'district_external_id': '10', 'supervision_period_id': supervision_period.supervision_period_id } supervision_period_to_agent_associations = ( test_pipeline | 'Create SupervisionPeriod to Agent table' >> beam.Create( [supervision_period_to_agent_map])) supervision_period_to_agent_associations_as_kv = ( supervision_period_to_agent_associations | 'Convert SupervisionPeriod to Agent table to KV tuples' >> beam.ParDo(pipeline.ConvertDictToKVTuple(), 'supervision_period_id')) # Group each StatePerson with their other entities persons_entities = ({ 'person': persons, 'program_assignments': program_assignments, 'assessments': assessments, 'supervision_periods': supervision_periods } | 'Group StatePerson to StateProgramAssignments and' >> beam.CoGroupByKey()) # Identify ProgramEvents from the StatePerson's # StateProgramAssignments person_program_events = ( persons_entities | beam.ParDo( pipeline.ClassifyProgramAssignments(), AsDict(supervision_period_to_agent_associations_as_kv))) # Get pipeline job details for accessing job_id all_pipeline_options = PipelineOptions().get_all_options() # Add timestamp for local jobs job_timestamp = datetime.datetime.now().strftime( '%Y-%m-%d_%H_%M_%S.%f') all_pipeline_options['job_timestamp'] = job_timestamp # Get program metrics program_metrics = (person_program_events | 'Get Program Metrics' >> pipeline.GetProgramMetrics( pipeline_options=all_pipeline_options, inclusions=ALL_INCLUSIONS_DICT, calculation_month_limit=-1)) assert_that(program_metrics, AssertMatchers.validate_pipeline_test()) test_pipeline.run()