def generate_program_assignment(person, **kwargs) -> schema.StateProgramAssignment: args = { "state_code": _STATE_CODE, "participation_status": StateProgramAssignmentParticipationStatus.PRESENT_WITHOUT_INFO.value, } args.update(kwargs) return schema.StateProgramAssignment(person=person, **args)
def build_data_dict(fake_person_id: int, fake_supervision_period_id: int): """Builds a data_dict for a basic run of the pipeline.""" fake_person = schema.StatePerson( person_id=fake_person_id, gender=Gender.MALE, birthdate=date(1970, 1, 1), residency_status=ResidencyStatus.PERMANENT) persons_data = [normalized_database_base_dict(fake_person)] race_1 = schema.StatePersonRace(person_race_id=111, state_code='CA', race=Race.BLACK, person_id=fake_person_id) race_2 = schema.StatePersonRace(person_race_id=111, state_code='ND', race=Race.WHITE, person_id=fake_person_id) races_data = normalized_database_base_dict_list([race_1, race_2]) ethnicity = schema.StatePersonEthnicity(person_ethnicity_id=111, state_code='CA', ethnicity=Ethnicity.HISPANIC, person_id=fake_person_id) ethnicity_data = normalized_database_base_dict_list([ethnicity]) program_assignment = schema.StateProgramAssignment( state_code='CA', program_assignment_id=123, referral_date=date(2015, 5, 10), person_id=fake_person_id) assessment = schema.StateAssessment(assessment_id=298374, assessment_date=date(2015, 3, 19), assessment_type='LSIR', person_id=fake_person_id) supervision_period = schema.StateSupervisionPeriod( supervision_period_id=fake_supervision_period_id, state_code='CA', county_code='124', start_date=date(2015, 3, 14), termination_date=date(2016, 12, 29), supervision_type=StateSupervisionType.PROBATION, person_id=fake_person_id) program_assignment_data = [ normalized_database_base_dict(program_assignment) ] assessment_data = [normalized_database_base_dict(assessment)] supervision_periods_data = [ normalized_database_base_dict(supervision_period) ] supervision_violation_response = \ database_test_utils.generate_test_supervision_violation_response( fake_person_id) supervision_violation_response_data = [ normalized_database_base_dict(supervision_violation_response) ] data_dict = { schema.StatePerson.__tablename__: persons_data, schema.StatePersonRace.__tablename__: races_data, schema.StatePersonEthnicity.__tablename__: ethnicity_data, schema.StateSupervisionViolationResponse.__tablename__: supervision_violation_response_data, schema.StateSupervisionPeriod.__tablename__: supervision_periods_data, schema.StateProgramAssignment.__tablename__: program_assignment_data, schema.StateAssessment.__tablename__: assessment_data, schema.StatePersonExternalId.__tablename__: [], schema.StatePersonAlias.__tablename__: [], schema.StateSentenceGroup.__tablename__: [], } return data_dict
def testProgramPipelineNoReferrals(self): """Tests the program pipeline where one person does not have any program assignment entities.""" fake_person_id = 12345 fake_person_id_2 = 9876 fake_person = schema.StatePerson( person_id=fake_person_id, gender=Gender.MALE, birthdate=date(1970, 1, 1), residency_status=ResidencyStatus.PERMANENT) fake_person_2 = schema.StatePerson( person_id=fake_person_id_2, gender=Gender.MALE, birthdate=date(1974, 3, 12), residency_status=ResidencyStatus.PERMANENT) persons_data = normalized_database_base_dict_list( [fake_person, fake_person_2]) race_1 = schema.StatePersonRace(person_race_id=111, state_code='CA', race=Race.BLACK, person_id=fake_person_id) race_2 = schema.StatePersonRace(person_race_id=111, state_code='ND', race=Race.WHITE, person_id=fake_person_id) races_data = normalized_database_base_dict_list([race_1, race_2]) ethnicity = schema.StatePersonEthnicity(person_ethnicity_id=111, state_code='CA', ethnicity=Ethnicity.HISPANIC, person_id=fake_person_id) ethnicity_data = normalized_database_base_dict_list([ethnicity]) # Program assignment for a different person program_assignment = schema.StateProgramAssignment( state_code='CA', program_assignment_id=123, referral_date=date(2015, 5, 10), person_id=fake_person_id_2) assessment = schema.StateAssessment(assessment_id=298374, assessment_date=date(2015, 3, 19), assessment_type='LSIR', person_id=fake_person_id) supervision_period = schema.StateSupervisionPeriod( supervision_period_id=1111, state_code='CA', county_code='124', start_date=date(2015, 3, 14), termination_date=date(2016, 12, 29), supervision_type=StateSupervisionType.PROBATION, person_id=fake_person_id) program_assignment_data = [ normalized_database_base_dict(program_assignment) ] assessment_data = [normalized_database_base_dict(assessment)] supervision_periods_data = [ normalized_database_base_dict(supervision_period) ] supervision_violation_response = \ database_test_utils.generate_test_supervision_violation_response( fake_person_id) supervision_violation_response_data = [ normalized_database_base_dict(supervision_violation_response) ] data_dict = { schema.StatePerson.__tablename__: persons_data, schema.StatePersonRace.__tablename__: races_data, schema.StatePersonEthnicity.__tablename__: ethnicity_data, schema.StateSupervisionViolationResponse.__tablename__: supervision_violation_response_data, schema.StateSupervisionPeriod.__tablename__: supervision_periods_data, schema.StateProgramAssignment.__tablename__: program_assignment_data, schema.StateAssessment.__tablename__: assessment_data, schema.StatePersonExternalId.__tablename__: [], schema.StatePersonAlias.__tablename__: [], schema.StateSentenceGroup.__tablename__: [], } dataset = 'recidiviz-123.state' with patch( 'recidiviz.calculator.pipeline.utils.extractor_utils.ReadFromBigQuery', self.fake_bq_source_factory.create_fake_bq_source_constructor( dataset, data_dict)): self.run_test_pipeline(dataset, supervision_period.supervision_period_id)
def testProgramPipelineNoReferrals(self): """Tests the program pipeline where one person does not have any program assignment entities.""" fake_person_id = 12345 fake_person_id_2 = 9876 fake_person = schema.StatePerson( state_code="US_XX", person_id=fake_person_id, gender=Gender.MALE, birthdate=date(1970, 1, 1), residency_status=ResidencyStatus.PERMANENT, ) fake_person_2 = schema.StatePerson( state_code="US_XX", person_id=fake_person_id_2, gender=Gender.MALE, birthdate=date(1974, 3, 12), residency_status=ResidencyStatus.PERMANENT, ) persons_data = normalized_database_base_dict_list([fake_person, fake_person_2]) race_1 = schema.StatePersonRace( person_race_id=111, state_code="US_XX", race=Race.BLACK, person_id=fake_person_id, ) race_2 = schema.StatePersonRace( person_race_id=111, state_code="US_XX", race=Race.WHITE, person_id=fake_person_id, ) races_data = normalized_database_base_dict_list([race_1, race_2]) ethnicity = schema.StatePersonEthnicity( person_ethnicity_id=111, state_code="US_XX", ethnicity=Ethnicity.HISPANIC, person_id=fake_person_id, ) ethnicity_data = normalized_database_base_dict_list([ethnicity]) # Program assignment for a different person program_assignment = schema.StateProgramAssignment( state_code="US_XX", program_assignment_id=123, referral_date=date(2015, 5, 10), person_id=fake_person_id_2, participation_status=StateProgramAssignmentParticipationStatus.DENIED, ) assessment = schema.StateAssessment( assessment_id=298374, state_code="US_XX", assessment_date=date(2015, 3, 19), assessment_type="LSIR", person_id=fake_person_id, ) supervision_period = schema.StateSupervisionPeriod( supervision_period_id=1111, state_code="US_XX", county_code="124", start_date=date(2015, 3, 14), termination_date=date(2016, 12, 29), supervision_type=StateSupervisionType.PROBATION, person_id=fake_person_id, status=StateSupervisionPeriodStatus.PRESENT_WITHOUT_INFO, ) program_assignment_data = [normalized_database_base_dict(program_assignment)] assessment_data = [normalized_database_base_dict(assessment)] supervision_periods_data = [normalized_database_base_dict(supervision_period)] supervision_violation_response = ( database_test_utils.generate_test_supervision_violation_response( fake_person_id ) ) supervision_violation_response_data = [ normalized_database_base_dict(supervision_violation_response) ] supervision_period_to_agent_data = [ { "agent_id": 1010, "person_id": fake_person_id, "state_code": "US_XX", "agent_external_id": "OFFICER0009", "supervision_period_id": supervision_period.supervision_period_id, } ] state_race_ethnicity_population_count_data = [ { "state_code": "US_XX", "race_or_ethnicity": "BLACK", "population_count": 1, "representation_priority": 1, } ] data_dict = { schema.StatePerson.__tablename__: persons_data, schema.StatePersonRace.__tablename__: races_data, schema.StatePersonEthnicity.__tablename__: ethnicity_data, schema.StateSupervisionViolationResponse.__tablename__: supervision_violation_response_data, schema.StateSupervisionPeriod.__tablename__: supervision_periods_data, schema.StateProgramAssignment.__tablename__: program_assignment_data, schema.StateAssessment.__tablename__: assessment_data, schema.StatePersonExternalId.__tablename__: [], schema.StatePersonAlias.__tablename__: [], schema.StateSentenceGroup.__tablename__: [], "supervision_period_to_agent_association": supervision_period_to_agent_data, "state_race_ethnicity_population_counts": state_race_ethnicity_population_count_data, } dataset = "recidiviz-123.state" self.run_test_pipeline(dataset, data_dict)
def build_data_dict(fake_person_id: int, fake_supervision_period_id: int): """Builds a data_dict for a basic run of the pipeline.""" fake_person = schema.StatePerson( state_code="US_XX", person_id=fake_person_id, gender=Gender.MALE, birthdate=date(1970, 1, 1), residency_status=ResidencyStatus.PERMANENT, ) persons_data = [normalized_database_base_dict(fake_person)] race_1 = schema.StatePersonRace( person_race_id=111, state_code="US_XX", race=Race.BLACK, person_id=fake_person_id, ) race_2 = schema.StatePersonRace( person_race_id=111, state_code="US_XX", race=Race.WHITE, person_id=fake_person_id, ) races_data = normalized_database_base_dict_list([race_1, race_2]) ethnicity = schema.StatePersonEthnicity( person_ethnicity_id=111, state_code="US_XX", ethnicity=Ethnicity.HISPANIC, person_id=fake_person_id, ) ethnicity_data = normalized_database_base_dict_list([ethnicity]) program_assignment = schema.StateProgramAssignment( state_code="US_XX", program_assignment_id=123, referral_date=date(2015, 5, 10), person_id=fake_person_id, participation_status=StateProgramAssignmentParticipationStatus.IN_PROGRESS, ) assessment = schema.StateAssessment( assessment_id=298374, state_code="US_XX", assessment_date=date(2015, 3, 19), assessment_type="LSIR", person_id=fake_person_id, ) supervision_period = schema.StateSupervisionPeriod( supervision_period_id=fake_supervision_period_id, state_code="US_XX", county_code="124", start_date=date(2015, 3, 14), termination_date=date(2016, 12, 29), supervision_type=StateSupervisionType.PROBATION, person_id=fake_person_id, status=StateSupervisionPeriodStatus.PRESENT_WITHOUT_INFO, ) program_assignment_data = [normalized_database_base_dict(program_assignment)] assessment_data = [normalized_database_base_dict(assessment)] supervision_periods_data = [normalized_database_base_dict(supervision_period)] supervision_violation_response = ( database_test_utils.generate_test_supervision_violation_response( fake_person_id ) ) supervision_violation_response_data = [ normalized_database_base_dict(supervision_violation_response) ] supervision_period_to_agent_data = [ { "agent_id": 1010, "person_id": fake_person_id, "state_code": "US_XX", "agent_external_id": "OFFICER0009", "supervision_period_id": fake_supervision_period_id, } ] state_race_ethnicity_population_count_data = [ { "state_code": "US_XX", "race_or_ethnicity": "BLACK", "population_count": 1, "representation_priority": 1, } ] data_dict = { schema.StatePerson.__tablename__: persons_data, schema.StatePersonRace.__tablename__: races_data, schema.StatePersonEthnicity.__tablename__: ethnicity_data, schema.StateSupervisionViolationResponse.__tablename__: supervision_violation_response_data, schema.StateSupervisionPeriod.__tablename__: supervision_periods_data, schema.StateProgramAssignment.__tablename__: program_assignment_data, schema.StateAssessment.__tablename__: assessment_data, schema.StatePersonExternalId.__tablename__: [], schema.StatePersonAlias.__tablename__: [], schema.StateSentenceGroup.__tablename__: [], "supervision_period_to_agent_association": supervision_period_to_agent_data, "state_race_ethnicity_population_counts": state_race_ethnicity_population_count_data, } return data_dict
def generate_test_person(person_id, sentence_groups, incarceration_period, agent, supervision_period) -> state_schema.StatePerson: """Returns a StatePerson to be used for testing.""" instance = state_schema.StatePerson( person_id=person_id, full_name='name', birthdate=datetime.date(1980, 1, 5), birthdate_inferred_from_age=False, external_ids=[ state_schema.StatePersonExternalId( person_external_id_id=234, external_id='person_external_id', id_type='STATE', state_code='us_ny', person_id=person_id, ) ], aliases=[ state_schema.StatePersonAlias( person_alias_id=1456, state_code='us_ca', full_name='name', person_id=person_id, ) ], races=[ state_schema.StatePersonRace( person_race_id=345, state_code='us_ca', race=Race.BLACK.value, race_raw_text='BLK', person_id=person_id, ) ], ethnicities=[ state_schema.StatePersonEthnicity( person_ethnicity_id=345, state_code='us_ca', ethnicity=Ethnicity.NOT_HISPANIC.value, ethnicity_raw_text='HISP', person_id=person_id, ) ], sentence_groups=sentence_groups, assessments=[ state_schema.StateAssessment( assessment_id=456, person_id=person_id, state_code='us_ca', incarceration_period=incarceration_period, conducting_agent=agent, ), state_schema.StateAssessment( assessment_id=4567, person_id=person_id, state_code='us_ca', supervision_period=supervision_period, conducting_agent=agent, ) ], program_assignments=[ state_schema.StateProgramAssignment( program_assignment_id=567, participation_status= StateProgramAssignmentParticipationStatus. PRESENT_WITHOUT_INFO.value, state_code='us_ca', referring_agent=agent, ) ] ) return instance
def testProgramPipeline(self): """Tests the program pipeline.""" fake_person_id = 12345 fake_person = schema.StatePerson( person_id=fake_person_id, gender=Gender.MALE, birthdate=date(1970, 1, 1), residency_status=ResidencyStatus.PERMANENT) persons_data = [normalized_database_base_dict(fake_person)] race_1 = schema.StatePersonRace(person_race_id=111, state_code='CA', race=Race.BLACK, person_id=fake_person_id) race_2 = schema.StatePersonRace(person_race_id=111, state_code='ND', race=Race.WHITE, person_id=fake_person_id) races_data = normalized_database_base_dict_list([race_1, race_2]) ethnicity = schema.StatePersonEthnicity(person_ethnicity_id=111, state_code='CA', ethnicity=Ethnicity.HISPANIC, person_id=fake_person_id) ethnicity_data = normalized_database_base_dict_list([ethnicity]) program_assignment = schema.StateProgramAssignment( program_assignment_id=123, referral_date=date(2015, 5, 10), person_id=fake_person_id) assessment = schema.StateAssessment(assessment_id=298374, assessment_date=date(2015, 3, 19), assessment_type='LSIR', person_id=fake_person_id) supervision_period = schema.StateSupervisionPeriod( supervision_period_id=1111, state_code='CA', county_code='124', start_date=date(2015, 3, 14), termination_date=date(2016, 12, 29), supervision_type=StateSupervisionType.PROBATION, person_id=fake_person_id) program_assignment_data = [ normalized_database_base_dict(program_assignment) ] assessment_data = [normalized_database_base_dict(assessment)] supervision_periods_data = [ normalized_database_base_dict(supervision_period) ] supervision_violation_response = \ database_test_utils.generate_test_supervision_violation_response( fake_person_id) supervision_violation_response_data = [ normalized_database_base_dict(supervision_violation_response) ] data_dict = { schema.StatePerson.__tablename__: persons_data, schema.StatePersonRace.__tablename__: races_data, schema.StatePersonEthnicity.__tablename__: ethnicity_data, schema.StateSupervisionViolationResponse.__tablename__: supervision_violation_response_data, schema.StateSupervisionPeriod.__tablename__: supervision_periods_data, schema.StateProgramAssignment.__tablename__: program_assignment_data, schema.StateAssessment.__tablename__: assessment_data } test_pipeline = TestPipeline() # Get StatePersons persons = (test_pipeline | 'Load Persons' >> extractor_utils.BuildRootEntity( dataset=None, data_dict=data_dict, root_schema_class=schema.StatePerson, root_entity_class=entities.StatePerson, unifying_id_field='person_id', build_related_entities=True)) # Get StateProgramAssignments program_assignments = ( test_pipeline | 'Load Program Assignments' >> extractor_utils.BuildRootEntity( dataset=None, data_dict=data_dict, root_schema_class=schema.StateProgramAssignment, root_entity_class=entities.StateProgramAssignment, unifying_id_field='person_id', build_related_entities=True)) # Get StateAssessments assessments = (test_pipeline | 'Load Assessments' >> extractor_utils.BuildRootEntity( dataset=None, data_dict=data_dict, root_schema_class=schema.StateAssessment, root_entity_class=entities.StateAssessment, unifying_id_field='person_id', build_related_entities=False)) # Get StateSupervisionPeriods supervision_periods = ( test_pipeline | 'Load SupervisionPeriods' >> extractor_utils.BuildRootEntity( dataset=None, data_dict=data_dict, root_schema_class=schema.StateSupervisionPeriod, root_entity_class=entities.StateSupervisionPeriod, unifying_id_field='person_id', build_related_entities=False)) supervision_period_to_agent_map = { 'agent_id': 1010, 'agent_external_id': 'OFFICER0009', 'district_external_id': '10', 'supervision_period_id': supervision_period.supervision_period_id } supervision_period_to_agent_associations = ( test_pipeline | 'Create SupervisionPeriod to Agent table' >> beam.Create( [supervision_period_to_agent_map])) supervision_period_to_agent_associations_as_kv = ( supervision_period_to_agent_associations | 'Convert SupervisionPeriod to Agent table to KV tuples' >> beam.ParDo(pipeline.ConvertDictToKVTuple(), 'supervision_period_id')) # Group each StatePerson with their other entities persons_entities = ({ 'person': persons, 'program_assignments': program_assignments, 'assessments': assessments, 'supervision_periods': supervision_periods } | 'Group StatePerson to StateProgramAssignments and' >> beam.CoGroupByKey()) # Identify ProgramEvents from the StatePerson's # StateProgramAssignments person_program_events = ( persons_entities | beam.ParDo( pipeline.ClassifyProgramAssignments(), AsDict(supervision_period_to_agent_associations_as_kv))) # Get pipeline job details for accessing job_id all_pipeline_options = PipelineOptions().get_all_options() # Add timestamp for local jobs job_timestamp = datetime.datetime.now().strftime( '%Y-%m-%d_%H_%M_%S.%f') all_pipeline_options['job_timestamp'] = job_timestamp # Get program metrics program_metrics = (person_program_events | 'Get Program Metrics' >> pipeline.GetProgramMetrics( pipeline_options=all_pipeline_options, inclusions=ALL_INCLUSIONS_DICT, calculation_month_limit=-1)) assert_that(program_metrics, AssertMatchers.validate_pipeline_test()) test_pipeline.run()