def test_isPlaceholder_defaultEnumValue(self):
        entity = schema.StateIncarcerationSentence(
            incarceration_type=StateIncarcerationType.STATE_PRISON.value)
        self.assertTrue(is_placeholder(entity))

        entity.incarceration_type_raw_text = 'PRISON'
        self.assertFalse(is_placeholder(entity))
Пример #2
0
def generate_incarceration_sentence(
        person, **kwargs) -> schema.StateIncarcerationSentence:
    args = {
        "status": StateSentenceStatus.PRESENT_WITHOUT_INFO.value,
        "state_code": _STATE_CODE,
    }
    args.update(kwargs)
    return schema.StateIncarcerationSentence(person=person, **args)
 def test_getRootEntity_allPlaceholders_raises(self) -> None:
     placeholder_incarceration_period = schema.StateIncarcerationPeriod()
     placeholder_incarceration_sentence = schema.StateIncarcerationSentence(
         incarceration_periods=[placeholder_incarceration_period])
     placeholder_sentence_group = schema.StateSentenceGroup(
         incarceration_sentences=[placeholder_incarceration_sentence])
     placeholder_person = schema.StatePerson(
         sentence_groups=[placeholder_sentence_group])
     with pytest.raises(EntityMatchingError):
         get_root_entity_cls([placeholder_person])
Пример #4
0
def generate_test_incarceration_sentence(
        person_id, charges, incarceration_periods) \
        -> state_schema.StateIncarcerationSentence:
    instance = state_schema.StateIncarcerationSentence(
        incarceration_sentence_id=2222,
        status=StateSentenceStatus.SUSPENDED.value,
        state_code='us_ca',
        person_id=person_id,
        charges=charges,
        incarceration_periods=incarceration_periods,
    )

    return instance
def generate_test_incarceration_sentence(
        person_id, charges=None, incarceration_periods=None) \
        -> state_schema.StateIncarcerationSentence:
    instance = state_schema.StateIncarcerationSentence(
        incarceration_sentence_id=2222,
        status=StateSentenceStatus.SUSPENDED.value,
        state_code='us_ca',
        person_id=person_id,
        is_capital_punishment=False,
        charges=(charges if charges else []),
        incarceration_periods=(incarceration_periods if incarceration_periods else []),
    )

    return instance
    def test_getRootEntity(self) -> None:
        # Arrange
        incarceration_incident = schema.StateIncarcerationIncident(
            external_id=_EXTERNAL_ID)
        placeholder_incarceration_period = schema.StateIncarcerationPeriod(
            incarceration_incidents=[incarceration_incident])
        placeholder_incarceration_sentence = schema.StateIncarcerationSentence(
            incarceration_periods=[placeholder_incarceration_period])
        placeholder_sentence_group = schema.StateSentenceGroup(
            sentence_group_id=None,
            incarceration_sentences=[placeholder_incarceration_sentence],
        )
        person = schema.StatePerson(
            sentence_groups=[placeholder_sentence_group])

        # Act
        root_entity_cls = get_root_entity_cls([person])

        # Assert
        self.assertEqual(schema.StateIncarcerationIncident, root_entity_cls)
Пример #7
0
    def build_incarceration_pipeline_data_dict_no_incarceration(
            self, fake_person_id: int):
        """Builds a data_dict for a run of the pipeline where the person has no incarceration."""
        fake_person_1 = schema.StatePerson(
            state_code='US_XX',
            person_id=fake_person_id,
            gender=Gender.MALE,
            birthdate=date(1970, 1, 1),
            residency_status=ResidencyStatus.PERMANENT)

        fake_person_id_2 = 6789

        fake_person_2 = schema.StatePerson(
            state_code='US_XX',
            person_id=fake_person_id_2,
            gender=Gender.FEMALE,
            birthdate=date(1990, 1, 1),
            residency_status=ResidencyStatus.PERMANENT)

        persons_data = [
            normalized_database_base_dict(fake_person_1),
            normalized_database_base_dict(fake_person_2)
        ]

        sentence_group = schema.StateSentenceGroup(sentence_group_id=111,
                                                   person_id=fake_person_id)

        incarceration_period = schema.StateIncarcerationPeriod(
            incarceration_period_id=1111,
            status=StateIncarcerationPeriodStatus.NOT_IN_CUSTODY,
            state_code='US_XX',
            county_code='124',
            facility='San Quentin',
            facility_security_level=StateIncarcerationFacilitySecurityLevel.
            MAXIMUM,
            admission_reason=StateIncarcerationPeriodAdmissionReason.
            NEW_ADMISSION,
            projected_release_reason=StateIncarcerationPeriodReleaseReason.
            CONDITIONAL_RELEASE,
            admission_date=date(2008, 11, 20),
            release_date=date(2010, 12, 4),
            release_reason=StateIncarcerationPeriodReleaseReason.
            SENTENCE_SERVED,
            person_id=fake_person_id)

        incarceration_sentence = schema.StateIncarcerationSentence(
            incarceration_sentence_id=1111,
            sentence_group_id=sentence_group.sentence_group_id,
            incarceration_periods=[incarceration_period],
            person_id=fake_person_id)

        supervision_sentence = schema.StateSupervisionSentence(
            supervision_sentence_id=123, person_id=fake_person_id)

        sentence_group.incarceration_sentences = [incarceration_sentence]

        sentence_group_data = [normalized_database_base_dict(sentence_group)]

        incarceration_sentence_data = [
            normalized_database_base_dict(incarceration_sentence)
        ]

        supervision_sentence_data = [
            normalized_database_base_dict(supervision_sentence)
        ]

        incarceration_periods_data = [
            normalized_database_base_dict(incarceration_period)
        ]

        state_incarceration_sentence_incarceration_period_association = [
            {
                'incarceration_period_id':
                incarceration_period.incarceration_period_id,
                'incarceration_sentence_id':
                incarceration_sentence.incarceration_sentence_id,
            },
        ]

        data_dict = self._default_data_dict()
        data_dict_overrides = {
            schema.StatePerson.__tablename__:
            persons_data,
            schema.StateSentenceGroup.__tablename__:
            sentence_group_data,
            schema.StateIncarcerationSentence.__tablename__:
            incarceration_sentence_data,
            schema.StateSupervisionSentence.__tablename__:
            supervision_sentence_data,
            schema.StateIncarcerationPeriod.__tablename__:
            incarceration_periods_data,
            schema.state_incarceration_sentence_incarceration_period_association_table.name:
            state_incarceration_sentence_incarceration_period_association,
        }
        data_dict.update(data_dict_overrides)

        return data_dict
Пример #8
0
    def build_incarceration_pipeline_data_dict(self,
                                               fake_person_id: int,
                                               state_code: str = 'US_XX'):
        """Builds a data_dict for a basic run of the pipeline."""
        fake_person = schema.StatePerson(
            state_code=state_code,
            person_id=fake_person_id,
            gender=Gender.MALE,
            birthdate=date(1970, 1, 1),
            residency_status=ResidencyStatus.PERMANENT)

        persons_data = [normalized_database_base_dict(fake_person)]

        race_1 = schema.StatePersonRace(person_race_id=111,
                                        state_code=state_code,
                                        race=Race.BLACK,
                                        person_id=fake_person_id)

        race_2 = schema.StatePersonRace(person_race_id=111,
                                        state_code=state_code,
                                        race=Race.WHITE,
                                        person_id=fake_person_id)

        races_data = normalized_database_base_dict_list([race_1, race_2])

        ethnicity = schema.StatePersonEthnicity(person_ethnicity_id=111,
                                                state_code=state_code,
                                                ethnicity=Ethnicity.HISPANIC,
                                                person_id=fake_person_id)

        ethnicity_data = normalized_database_base_dict_list([ethnicity])

        sentence_group = schema.StateSentenceGroup(sentence_group_id=111,
                                                   person_id=fake_person_id)

        initial_incarceration = schema.StateIncarcerationPeriod(
            incarceration_period_id=1111,
            incarceration_type=StateIncarcerationType.STATE_PRISON,
            status=StateIncarcerationPeriodStatus.NOT_IN_CUSTODY,
            state_code=state_code,
            county_code='124',
            facility='San Quentin',
            facility_security_level=StateIncarcerationFacilitySecurityLevel.
            MAXIMUM,
            admission_reason=StateIncarcerationPeriodAdmissionReason.
            NEW_ADMISSION,
            projected_release_reason=StateIncarcerationPeriodReleaseReason.
            CONDITIONAL_RELEASE,
            admission_date=date(2008, 11, 20),
            release_date=date(2010, 12, 4),
            release_reason=StateIncarcerationPeriodReleaseReason.
            SENTENCE_SERVED,
            person_id=fake_person_id,
        )

        first_reincarceration = schema.StateIncarcerationPeriod(
            incarceration_period_id=2222,
            incarceration_type=StateIncarcerationType.STATE_PRISON,
            status=StateIncarcerationPeriodStatus.NOT_IN_CUSTODY,
            state_code=state_code,
            county_code='124',
            facility='San Quentin',
            facility_security_level=StateIncarcerationFacilitySecurityLevel.
            MAXIMUM,
            admission_reason=StateIncarcerationPeriodAdmissionReason.
            NEW_ADMISSION,
            projected_release_reason=StateIncarcerationPeriodReleaseReason.
            CONDITIONAL_RELEASE,
            admission_date=date(2011, 4, 5),
            release_date=date(2014, 4, 14),
            release_reason=StateIncarcerationPeriodReleaseReason.
            SENTENCE_SERVED,
            person_id=fake_person_id)

        subsequent_reincarceration = schema.StateIncarcerationPeriod(
            incarceration_period_id=3333,
            incarceration_type=StateIncarcerationType.STATE_PRISON,
            status=StateIncarcerationPeriodStatus.IN_CUSTODY,
            state_code=state_code,
            county_code='124',
            facility='San Quentin',
            facility_security_level=StateIncarcerationFacilitySecurityLevel.
            MAXIMUM,
            admission_reason=StateIncarcerationPeriodAdmissionReason.
            NEW_ADMISSION,
            projected_release_reason=StateIncarcerationPeriodReleaseReason.
            CONDITIONAL_RELEASE,
            admission_date=date(2017, 1, 4),
            person_id=fake_person_id)

        incarceration_sentence = schema.StateIncarcerationSentence(
            incarceration_sentence_id=1111,
            state_code=state_code,
            sentence_group_id=sentence_group.sentence_group_id,
            incarceration_periods=[
                initial_incarceration, first_reincarceration,
                subsequent_reincarceration
            ],
            person_id=fake_person_id)

        supervision_sentence = schema.StateSupervisionSentence(
            supervision_sentence_id=123,
            state_code=state_code,
            person_id=fake_person_id)

        sentence_group.incarceration_sentences = [incarceration_sentence]

        sentence_group_data = [normalized_database_base_dict(sentence_group)]

        incarceration_sentence_data = [
            normalized_database_base_dict(incarceration_sentence)
        ]

        supervision_sentence_data = [
            normalized_database_base_dict(supervision_sentence)
        ]

        incarceration_periods_data = [
            normalized_database_base_dict(initial_incarceration),
            normalized_database_base_dict(first_reincarceration),
            normalized_database_base_dict(subsequent_reincarceration)
        ]

        state_incarceration_sentence_incarceration_period_association = [
            {
                'incarceration_period_id':
                initial_incarceration.incarceration_period_id,
                'incarceration_sentence_id':
                incarceration_sentence.incarceration_sentence_id,
            },
            {
                'incarceration_period_id':
                first_reincarceration.incarceration_period_id,
                'incarceration_sentence_id':
                incarceration_sentence.incarceration_sentence_id,
            },
            {
                'incarceration_period_id':
                subsequent_reincarceration.incarceration_period_id,
                'incarceration_sentence_id':
                incarceration_sentence.incarceration_sentence_id,
            },
        ]

        data_dict = self._default_data_dict()
        data_dict_overrides = {
            schema.StatePerson.__tablename__:
            persons_data,
            schema.StatePersonRace.__tablename__:
            races_data,
            schema.StatePersonEthnicity.__tablename__:
            ethnicity_data,
            schema.StateSentenceGroup.__tablename__:
            sentence_group_data,
            schema.StateIncarcerationSentence.__tablename__:
            incarceration_sentence_data,
            schema.StateSupervisionSentence.__tablename__:
            supervision_sentence_data,
            schema.StateIncarcerationPeriod.__tablename__:
            incarceration_periods_data,
            schema.state_incarceration_sentence_incarceration_period_association_table.name:
            state_incarceration_sentence_incarceration_period_association,
        }
        data_dict.update(data_dict_overrides)
        return data_dict
Пример #9
0
    def testIncarcerationPipeline(self):
        fake_person_id = 12345

        fake_person = schema.StatePerson(
            person_id=fake_person_id,
            gender=Gender.MALE,
            birthdate=date(1970, 1, 1),
            residency_status=ResidencyStatus.PERMANENT)

        persons_data = [normalized_database_base_dict(fake_person)]

        race_1 = schema.StatePersonRace(person_race_id=111,
                                        state_code='CA',
                                        race=Race.BLACK,
                                        person_id=fake_person_id)

        race_2 = schema.StatePersonRace(person_race_id=111,
                                        state_code='ND',
                                        race=Race.WHITE,
                                        person_id=fake_person_id)

        races_data = normalized_database_base_dict_list([race_1, race_2])

        ethnicity = schema.StatePersonEthnicity(person_ethnicity_id=111,
                                                state_code='CA',
                                                ethnicity=Ethnicity.HISPANIC,
                                                person_id=fake_person_id)

        ethnicity_data = normalized_database_base_dict_list([ethnicity])

        sentence_group = schema.StateSentenceGroup(sentence_group_id=111,
                                                   person_id=fake_person_id)

        initial_incarceration = schema.StateIncarcerationPeriod(
            incarceration_period_id=1111,
            status=StateIncarcerationPeriodStatus.NOT_IN_CUSTODY,
            state_code='CA',
            county_code='124',
            facility='San Quentin',
            facility_security_level=StateIncarcerationFacilitySecurityLevel.
            MAXIMUM,
            admission_reason=StateIncarcerationPeriodAdmissionReason.
            NEW_ADMISSION,
            projected_release_reason=StateIncarcerationPeriodReleaseReason.
            CONDITIONAL_RELEASE,
            admission_date=date(2008, 11, 20),
            release_date=date(2010, 12, 4),
            release_reason=StateIncarcerationPeriodReleaseReason.
            SENTENCE_SERVED,
            person_id=fake_person_id,
        )

        first_reincarceration = schema.StateIncarcerationPeriod(
            incarceration_period_id=2222,
            status=StateIncarcerationPeriodStatus.NOT_IN_CUSTODY,
            state_code='CA',
            county_code='124',
            facility='San Quentin',
            facility_security_level=StateIncarcerationFacilitySecurityLevel.
            MAXIMUM,
            admission_reason=StateIncarcerationPeriodAdmissionReason.
            NEW_ADMISSION,
            projected_release_reason=StateIncarcerationPeriodReleaseReason.
            CONDITIONAL_RELEASE,
            admission_date=date(2011, 4, 5),
            release_date=date(2014, 4, 14),
            release_reason=StateIncarcerationPeriodReleaseReason.
            SENTENCE_SERVED,
            person_id=fake_person_id)

        subsequent_reincarceration = schema.StateIncarcerationPeriod(
            incarceration_period_id=3333,
            status=StateIncarcerationPeriodStatus.IN_CUSTODY,
            state_code='CA',
            county_code='124',
            facility='San Quentin',
            facility_security_level=StateIncarcerationFacilitySecurityLevel.
            MAXIMUM,
            admission_reason=StateIncarcerationPeriodAdmissionReason.
            NEW_ADMISSION,
            projected_release_reason=StateIncarcerationPeriodReleaseReason.
            CONDITIONAL_RELEASE,
            admission_date=date(2017, 1, 4),
            person_id=fake_person_id)

        incarceration_sentence = schema.StateIncarcerationSentence(
            incarceration_sentence_id=1111,
            sentence_group_id=sentence_group.sentence_group_id,
            incarceration_periods=[
                initial_incarceration, first_reincarceration,
                subsequent_reincarceration
            ],
            person_id=fake_person_id)

        supervision_sentence = schema.StateSupervisionSentence(
            supervision_sentence_id=123, person_id=fake_person_id)

        sentence_group.incarceration_sentences = [incarceration_sentence]

        sentence_group_data = [normalized_database_base_dict(sentence_group)]

        incarceration_sentence_data = [
            normalized_database_base_dict(incarceration_sentence)
        ]

        supervision_sentence_data = [
            normalized_database_base_dict(supervision_sentence)
        ]

        incarceration_periods_data = [
            normalized_database_base_dict(initial_incarceration),
            normalized_database_base_dict(first_reincarceration),
            normalized_database_base_dict(subsequent_reincarceration)
        ]

        state_incarceration_sentence_incarceration_period_association = [
            {
                'incarceration_period_id':
                initial_incarceration.incarceration_period_id,
                'incarceration_sentence_id':
                incarceration_sentence.incarceration_sentence_id,
            },
            {
                'incarceration_period_id':
                first_reincarceration.incarceration_period_id,
                'incarceration_sentence_id':
                incarceration_sentence.incarceration_sentence_id,
            },
            {
                'incarceration_period_id':
                subsequent_reincarceration.incarceration_period_id,
                'incarceration_sentence_id':
                incarceration_sentence.incarceration_sentence_id,
            },
        ]

        data_dict = {
            schema.StatePerson.__tablename__:
            persons_data,
            schema.StatePersonRace.__tablename__:
            races_data,
            schema.StatePersonEthnicity.__tablename__:
            ethnicity_data,
            schema.StateSentenceGroup.__tablename__:
            sentence_group_data,
            schema.StateIncarcerationSentence.__tablename__:
            incarceration_sentence_data,
            schema.StateSupervisionSentence.__tablename__:
            supervision_sentence_data,
            schema.StateIncarcerationPeriod.__tablename__:
            incarceration_periods_data,
            schema.state_incarceration_sentence_incarceration_period_association_table.name:
            state_incarceration_sentence_incarceration_period_association,
            schema.state_supervision_sentence_incarceration_period_association_table.name:
            [{}]
        }

        test_pipeline = TestPipeline()

        # Get StatePersons
        persons = (test_pipeline
                   | 'Load Persons' >> extractor_utils.BuildRootEntity(
                       dataset=None,
                       data_dict=data_dict,
                       root_schema_class=schema.StatePerson,
                       root_entity_class=entities.StatePerson,
                       unifying_id_field='person_id',
                       build_related_entities=True))

        # Get StateSentenceGroups
        sentence_groups = (
            test_pipeline
            | 'Load StateSentencegroups' >> extractor_utils.BuildRootEntity(
                dataset=None,
                data_dict=data_dict,
                root_schema_class=schema.StateSentenceGroup,
                root_entity_class=entities.StateSentenceGroup,
                unifying_id_field='person_id',
                build_related_entities=True))

        # Get StateIncarcerationSentences
        incarceration_sentences = (
            test_pipeline | 'Load StateIncarcerationSentences' >>
            extractor_utils.BuildRootEntity(
                dataset=None,
                data_dict=data_dict,
                root_schema_class=schema.StateIncarcerationSentence,
                root_entity_class=entities.StateIncarcerationSentence,
                unifying_id_field='person_id',
                build_related_entities=True))

        # Get StateSupervisionSentences
        supervision_sentences = (
            test_pipeline | 'Load StateSupervisionSentences' >>
            extractor_utils.BuildRootEntity(
                dataset=None,
                data_dict=data_dict,
                root_schema_class=schema.StateSupervisionSentence,
                root_entity_class=entities.StateSupervisionSentence,
                unifying_id_field='person_id',
                build_related_entities=True))

        sentences_and_sentence_groups = (
            {
                'sentence_groups': sentence_groups,
                'incarceration_sentences': incarceration_sentences,
                'supervision_sentences': supervision_sentences
            }
            | 'Group sentences to sentence groups' >> beam.CoGroupByKey())

        sentence_groups_with_hydrated_sentences = (
            sentences_and_sentence_groups
            | 'Set hydrated sentences on sentence groups' >> beam.ParDo(
                SetSentencesOnSentenceGroup()))

        # Group each StatePerson with their related entities
        person_and_sentence_groups = (
            {
                'person': persons,
                'sentence_groups': sentence_groups_with_hydrated_sentences
            }
            | 'Group StatePerson to SentenceGroups' >> beam.CoGroupByKey())

        # Identify IncarcerationEvents events from the StatePerson's
        # StateIncarcerationPeriods
        fake_person_id_to_county_query_result = [{
            'person_id':
            fake_person_id,
            'county_of_residence':
            _COUNTY_OF_RESIDENCE
        }]
        person_id_to_county_kv = (
            test_pipeline
            | "Read person id to county associations from BigQuery" >>
            beam.Create(fake_person_id_to_county_query_result)
            |
            "Convert to KV" >> beam.ParDo(ConvertDictToKVTuple(), 'person_id'))

        person_events = (person_and_sentence_groups
                         | 'Classify Incarceration Events' >> beam.ParDo(
                             pipeline.ClassifyIncarcerationEvents(),
                             AsDict(person_id_to_county_kv)))

        # Get pipeline job details for accessing job_id
        all_pipeline_options = PipelineOptions().get_all_options()

        # Add timestamp for local jobs
        job_timestamp = datetime.datetime.now().strftime(
            '%Y-%m-%d_%H_%M_%S.%f')
        all_pipeline_options['job_timestamp'] = job_timestamp

        # Get IncarcerationMetrics
        incarceration_metrics = (
            person_events
            | 'Get Incarceration Metrics' >> pipeline.GetIncarcerationMetrics(
                pipeline_options=all_pipeline_options,
                inclusions=ALL_INCLUSIONS_DICT,
                calculation_month_limit=-1))

        assert_that(incarceration_metrics,
                    AssertMatchers.validate_metric_type())

        test_pipeline.run()
Пример #10
0
    def build_incarceration_pipeline_data_dict_no_incarceration(
            self, fake_person_id: int):
        """Builds a data_dict for a run of the pipeline where the person has no incarceration."""
        fake_person_1 = schema.StatePerson(
            state_code="US_XX",
            person_id=fake_person_id,
            gender=Gender.MALE,
            birthdate=date(1970, 1, 1),
            residency_status=ResidencyStatus.PERMANENT,
        )

        fake_person_id_2 = 6789

        fake_person_2 = schema.StatePerson(
            state_code="US_XX",
            person_id=fake_person_id_2,
            gender=Gender.FEMALE,
            birthdate=date(1990, 1, 1),
            residency_status=ResidencyStatus.PERMANENT,
        )

        persons_data = [
            normalized_database_base_dict(fake_person_1),
            normalized_database_base_dict(fake_person_2),
        ]

        sentence_group = schema.StateSentenceGroup(
            sentence_group_id=111,
            state_code="US_XX",
            status=StateSentenceStatus.PRESENT_WITHOUT_INFO,
            person_id=fake_person_id,
        )

        incarceration_period = schema.StateIncarcerationPeriod(
            incarceration_period_id=1111,
            status=StateIncarcerationPeriodStatus.NOT_IN_CUSTODY,
            state_code="US_XX",
            county_code="124",
            facility="San Quentin",
            facility_security_level=StateIncarcerationFacilitySecurityLevel.
            MAXIMUM,
            admission_reason=StateIncarcerationPeriodAdmissionReason.
            NEW_ADMISSION,
            projected_release_reason=StateIncarcerationPeriodReleaseReason.
            CONDITIONAL_RELEASE,
            admission_date=date(2008, 11, 20),
            release_date=date(2010, 12, 4),
            release_reason=StateIncarcerationPeriodReleaseReason.
            SENTENCE_SERVED,
            person_id=fake_person_id,
        )

        incarceration_sentence = schema.StateIncarcerationSentence(
            incarceration_sentence_id=1111,
            state_code="US_XX",
            sentence_group_id=sentence_group.sentence_group_id,
            incarceration_periods=[incarceration_period],
            person_id=fake_person_id,
            status=StateSentenceStatus.PRESENT_WITHOUT_INFO,
        )

        supervision_sentence = schema.StateSupervisionSentence(
            supervision_sentence_id=123,
            state_code="US_XX",
            person_id=fake_person_id,
            status=StateSentenceStatus.PRESENT_WITHOUT_INFO,
        )

        sentence_group.incarceration_sentences = [incarceration_sentence]

        sentence_group_data = [normalized_database_base_dict(sentence_group)]

        incarceration_sentence_data = [
            normalized_database_base_dict(incarceration_sentence)
        ]

        supervision_sentence_data = [
            normalized_database_base_dict(supervision_sentence)
        ]

        incarceration_periods_data = [
            normalized_database_base_dict(incarceration_period)
        ]

        state_incarceration_sentence_incarceration_period_association = [
            {
                "incarceration_period_id":
                incarceration_period.incarceration_period_id,
                "incarceration_sentence_id":
                incarceration_sentence.incarceration_sentence_id,
            },
        ]

        fake_person_id_to_county_query_result = [{
            "state_code":
            "US_XX",
            "person_id":
            fake_person_id,
            "county_of_residence":
            _COUNTY_OF_RESIDENCE,
        }]

        us_mo_sentence_status_data: List[Dict[str, Any]] = [{
            "state_code":
            "US_MO",
            "person_id":
            fake_person_id,
            "sentence_external_id":
            "XXX",
            "sentence_status_external_id":
            "YYY",
            "status_code":
            "ZZZ",
            "status_date":
            "not_a_date",
            "status_description":
            "XYZ",
        }]

        incarceration_period_judicial_district_association_data = [{
            "state_code":
            "US_XX",
            "person_id":
            fake_person_id,
            "incarceration_period_id":
            123,
            "judicial_district_code":
            "NW",
        }]

        state_race_ethnicity_population_count_data = [{
            "state_code":
            "US_XX",
            "race_or_ethnicity":
            "BLACK",
            "population_count":
            1,
            "representation_priority":
            1,
        }]

        data_dict = self._default_data_dict()
        data_dict_overrides = {
            schema.StatePerson.__tablename__:
            persons_data,
            schema.StateSentenceGroup.__tablename__:
            sentence_group_data,
            schema.StateIncarcerationSentence.__tablename__:
            incarceration_sentence_data,
            schema.StateSupervisionSentence.__tablename__:
            supervision_sentence_data,
            schema.StateIncarcerationPeriod.__tablename__:
            incarceration_periods_data,
            schema.state_incarceration_sentence_incarceration_period_association_table.name:
            state_incarceration_sentence_incarceration_period_association,
            "persons_to_recent_county_of_residence":
            fake_person_id_to_county_query_result,
            "incarceration_period_judicial_district_association":
            incarceration_period_judicial_district_association_data,
            "state_race_ethnicity_population_counts":
            state_race_ethnicity_population_count_data,
            "us_mo_sentence_statuses":
            us_mo_sentence_status_data,
        }
        data_dict.update(data_dict_overrides)

        return data_dict
Пример #11
0
    def build_incarceration_pipeline_data_dict(self,
                                               fake_person_id: int,
                                               state_code: str = "US_XX"):
        """Builds a data_dict for a basic run of the pipeline."""
        fake_person = schema.StatePerson(
            state_code=state_code,
            person_id=fake_person_id,
            gender=Gender.MALE,
            birthdate=date(1970, 1, 1),
            residency_status=ResidencyStatus.PERMANENT,
        )

        persons_data = [normalized_database_base_dict(fake_person)]

        race_1 = schema.StatePersonRace(
            person_race_id=111,
            state_code=state_code,
            race=Race.BLACK,
            person_id=fake_person_id,
        )

        race_2 = schema.StatePersonRace(
            person_race_id=111,
            state_code=state_code,
            race=Race.WHITE,
            person_id=fake_person_id,
        )

        races_data = normalized_database_base_dict_list([race_1, race_2])

        ethnicity = schema.StatePersonEthnicity(
            person_ethnicity_id=111,
            state_code=state_code,
            ethnicity=Ethnicity.HISPANIC,
            person_id=fake_person_id,
        )

        ethnicity_data = normalized_database_base_dict_list([ethnicity])

        sentence_group = schema.StateSentenceGroup(
            sentence_group_id=98765,
            state_code=state_code,
            status=StateSentenceStatus.PRESENT_WITHOUT_INFO,
            person_id=fake_person_id,
        )

        initial_incarceration = schema.StateIncarcerationPeriod(
            incarceration_period_id=1111,
            incarceration_type=StateIncarcerationType.STATE_PRISON,
            status=StateIncarcerationPeriodStatus.NOT_IN_CUSTODY,
            state_code=state_code,
            county_code="124",
            facility="San Quentin",
            facility_security_level=StateIncarcerationFacilitySecurityLevel.
            MAXIMUM,
            admission_reason=StateIncarcerationPeriodAdmissionReason.
            NEW_ADMISSION,
            projected_release_reason=StateIncarcerationPeriodReleaseReason.
            CONDITIONAL_RELEASE,
            admission_date=date(2008, 11, 20),
            release_date=date(2010, 12, 4),
            release_reason=StateIncarcerationPeriodReleaseReason.
            SENTENCE_SERVED,
            person_id=fake_person_id,
        )

        first_reincarceration = schema.StateIncarcerationPeriod(
            incarceration_period_id=2222,
            incarceration_type=StateIncarcerationType.STATE_PRISON,
            status=StateIncarcerationPeriodStatus.NOT_IN_CUSTODY,
            state_code=state_code,
            county_code="124",
            facility="San Quentin",
            facility_security_level=StateIncarcerationFacilitySecurityLevel.
            MAXIMUM,
            admission_reason=StateIncarcerationPeriodAdmissionReason.
            NEW_ADMISSION,
            projected_release_reason=StateIncarcerationPeriodReleaseReason.
            CONDITIONAL_RELEASE,
            admission_date=date(2011, 4, 5),
            release_date=date(2014, 4, 14),
            release_reason=StateIncarcerationPeriodReleaseReason.
            SENTENCE_SERVED,
            person_id=fake_person_id,
        )

        subsequent_reincarceration = schema.StateIncarcerationPeriod(
            incarceration_period_id=3333,
            incarceration_type=StateIncarcerationType.STATE_PRISON,
            status=StateIncarcerationPeriodStatus.IN_CUSTODY,
            state_code=state_code,
            county_code="124",
            facility="San Quentin",
            facility_security_level=StateIncarcerationFacilitySecurityLevel.
            MAXIMUM,
            admission_reason=StateIncarcerationPeriodAdmissionReason.
            NEW_ADMISSION,
            projected_release_reason=StateIncarcerationPeriodReleaseReason.
            CONDITIONAL_RELEASE,
            admission_date=date(2017, 1, 4),
            person_id=fake_person_id,
        )

        incarceration_sentence = schema.StateIncarcerationSentence(
            incarceration_sentence_id=1111,
            state_code=state_code,
            status=StateSentenceStatus.PRESENT_WITHOUT_INFO,
            sentence_group_id=sentence_group.sentence_group_id,
            incarceration_periods=[
                initial_incarceration,
                first_reincarceration,
                subsequent_reincarceration,
            ],
            person_id=fake_person_id,
        )

        supervision_sentence = schema.StateSupervisionSentence(
            supervision_sentence_id=123,
            state_code=state_code,
            sentence_group_id=sentence_group.sentence_group_id,
            person_id=fake_person_id,
            status=StateSentenceStatus.PRESENT_WITHOUT_INFO,
        )

        sentence_group.incarceration_sentences = [incarceration_sentence]
        sentence_group.supervision_sentences = [supervision_sentence]

        sentence_group_data = [normalized_database_base_dict(sentence_group)]

        incarceration_sentence_data = [
            normalized_database_base_dict(incarceration_sentence)
        ]

        supervision_sentence_data = [
            normalized_database_base_dict(supervision_sentence)
        ]

        incarceration_periods_data = [
            normalized_database_base_dict(initial_incarceration),
            normalized_database_base_dict(first_reincarceration),
            normalized_database_base_dict(subsequent_reincarceration),
        ]

        state_incarceration_sentence_incarceration_period_association = [
            {
                "incarceration_period_id":
                initial_incarceration.incarceration_period_id,
                "incarceration_sentence_id":
                incarceration_sentence.incarceration_sentence_id,
            },
            {
                "incarceration_period_id":
                first_reincarceration.incarceration_period_id,
                "incarceration_sentence_id":
                incarceration_sentence.incarceration_sentence_id,
            },
            {
                "incarceration_period_id":
                subsequent_reincarceration.incarceration_period_id,
                "incarceration_sentence_id":
                incarceration_sentence.incarceration_sentence_id,
            },
        ]

        fake_person_id_to_county_query_result = [{
            "state_code":
            state_code,
            "person_id":
            fake_person_id,
            "county_of_residence":
            _COUNTY_OF_RESIDENCE,
        }]

        us_mo_sentence_status_data: List[Dict[str, Any]] = [{
            "state_code":
            "US_MO",
            "person_id":
            fake_person_id,
            "sentence_external_id":
            "XXX",
            "sentence_status_external_id":
            "YYY",
            "status_code":
            "ZZZ",
            "status_date":
            "not_a_date",
            "status_description":
            "XYZ",
        }]

        incarceration_period_judicial_district_association_data = [{
            "state_code":
            state_code,
            "person_id":
            fake_person_id,
            "incarceration_period_id":
            123,
            "judicial_district_code":
            "NW",
        }]

        state_race_ethnicity_population_count_data = [{
            "state_code":
            state_code,
            "race_or_ethnicity":
            "BLACK",
            "population_count":
            1,
            "representation_priority":
            1,
        }]

        data_dict = self._default_data_dict()
        data_dict_overrides = {
            schema.StatePerson.__tablename__:
            persons_data,
            schema.StatePersonRace.__tablename__:
            races_data,
            schema.StatePersonEthnicity.__tablename__:
            ethnicity_data,
            schema.StateSentenceGroup.__tablename__:
            sentence_group_data,
            schema.StateIncarcerationSentence.__tablename__:
            incarceration_sentence_data,
            schema.StateSupervisionSentence.__tablename__:
            supervision_sentence_data,
            schema.StateIncarcerationPeriod.__tablename__:
            incarceration_periods_data,
            schema.state_incarceration_sentence_incarceration_period_association_table.name:
            state_incarceration_sentence_incarceration_period_association,
            "persons_to_recent_county_of_residence":
            fake_person_id_to_county_query_result,
            "incarceration_period_judicial_district_association":
            incarceration_period_judicial_district_association_data,
            "state_race_ethnicity_population_counts":
            state_race_ethnicity_population_count_data,
            "us_mo_sentence_statuses":
            us_mo_sentence_status_data,
        }
        data_dict.update(data_dict_overrides)
        return data_dict