def setUp(self):
        self.mock_project_id = 'fake-recidiviz-project'
        self.mock_dataset_id = 'base_dataset'
        self.mock_dataset = bigquery.dataset.DatasetReference(
            self.mock_project_id, self.mock_dataset_id)

        self.metadata_patcher = mock.patch('recidiviz.utils.metadata.project_id')
        self.mock_project_id_fn = self.metadata_patcher.start()
        self.mock_project_id_fn.return_value = self.mock_project_id

        self.client_patcher = mock.patch(
            'recidiviz.metrics.export.metric_view_export_manager.BigQueryClientImpl')
        self.mock_client = self.client_patcher.start().return_value

        self.mock_client.dataset_ref_for_id.return_value = self.mock_dataset

        self.mock_view_builder = MetricBigQueryViewBuilder(dataset_id=self.mock_dataset.dataset_id,
                                                           view_id='test_view',
                                                           view_query_template='SELECT NULL LIMIT 0',
                                                           dimensions=[])

        self.views_for_dataset = [self.mock_view_builder]

        self.output_uri_template_for_dataset = {
            "dataset_id": "gs://{project_id}-dataset-location/subdirectory",
        }

        self.views_to_update = {self.mock_dataset_id: self.views_for_dataset}

        self.metric_dataset_export_configs = [
            ExportMetricDatasetConfig(
                dataset_id=self.mock_dataset_id,
                metric_view_builders_to_export=self.views_for_dataset,
                output_directory_uri_template="gs://{project_id}-dataset-location/subdirectory",
                state_code_filter=mock_state_code,
                export_name=None
            )
        ]

        view_config_values = {
            'OUTPUT_DIRECTORY_URI_TEMPLATE_FOR_DATASET_EXPORT': self.output_uri_template_for_dataset,
            'VIEW_BUILDERS_FOR_VIEWS_TO_UPDATE': self.views_to_update,
            'METRIC_DATASET_EXPORT_CONFIGS': self.metric_dataset_export_configs
        }

        self.view_export_config_patcher = mock.patch(
            'recidiviz.metrics.export.metric_view_export_manager.view_config',
            **view_config_values)
        self.mock_export_config = self.view_export_config_patcher.start()
    def setUp(self) -> None:
        self.metadata_patcher = patch("recidiviz.utils.metadata.project_id")
        self.mock_project_id_fn = self.metadata_patcher.start()
        self.mock_project_id_fn.return_value = "project-id"

        self.mock_bq_view_namespace = BigQueryViewNamespace.STATE

        metric_view_one = MetricBigQueryViewBuilder(
            dataset_id="dataset",
            view_id="view1",
            description="view1 description",
            view_query_template="select * from table",
            dimensions=("a", "b", "c"),
        ).build()

        export_config_one_staging = ExportBigQueryViewConfig(
            bq_view_namespace=self.mock_bq_view_namespace,
            view=metric_view_one,
            view_filter_clause="WHERE state_code = 'US_XX'",
            intermediate_table_name="intermediate_table",
            output_directory=GcsfsDirectoryPath.from_absolute_path(
                "gs://bucket1/staging/US_XX"),
        )

        metric_view_two = MetricBigQueryViewBuilder(
            dataset_id="dataset",
            view_id="view2",
            description="view2 description",
            view_query_template="select * from view2",
            dimensions=("d", "e", "f"),
        ).build()

        export_config_two_staging = ExportBigQueryViewConfig(
            bq_view_namespace=self.mock_bq_view_namespace,
            view=metric_view_two,
            view_filter_clause="WHERE state_code = 'US_XX'",
            intermediate_table_name="intermediate_table2",
            output_directory=GcsfsDirectoryPath.from_absolute_path(
                "gs://bucket2/staging/US_XX"),
        )

        self.staging_paths = [
            export_config_one_staging.output_path("txt"),
            export_config_two_staging.output_path("txt"),
        ]
    def setUp(self):
        self.mock_project_id = 'fake-recidiviz-project'
        self.mock_dataset_id = 'base_dataset'
        self.mock_dataset = bigquery.dataset.DatasetReference(
            self.mock_project_id, self.mock_dataset_id)

        self.metadata_patcher = mock.patch(
            'recidiviz.utils.metadata.project_id')
        self.mock_project_id_fn = self.metadata_patcher.start()
        self.mock_project_id_fn.return_value = self.mock_project_id

        self.mock_view_builder = MetricBigQueryViewBuilder(
            dataset_id=self.mock_dataset.dataset_id,
            view_id='test_view',
            view_query_template='SELECT NULL LIMIT 0',
            dimensions=[])

        self.views_for_dataset = [self.mock_view_builder]
Exemplo n.º 4
0
    def setUp(self):
        self.mock_project_id = "fake-recidiviz-project"
        self.mock_dataset_id = "base_dataset"
        self.mock_dataset = bigquery.dataset.DatasetReference(
            self.mock_project_id, self.mock_dataset_id
        )

        self.metadata_patcher = mock.patch("recidiviz.utils.metadata.project_id")
        self.mock_project_id_fn = self.metadata_patcher.start()
        self.mock_project_id_fn.return_value = self.mock_project_id

        self.mock_big_query_view_namespace = BigQueryViewNamespace.STATE

        self.mock_view_builder = MetricBigQueryViewBuilder(
            dataset_id=self.mock_dataset.dataset_id,
            view_id="test_view",
            view_query_template="SELECT NULL LIMIT 0",
            dimensions=[],
        )

        self.views_for_dataset = [self.mock_view_builder]
Exemplo n.º 5
0
    def setUp(self) -> None:
        self.metadata_patcher = patch('recidiviz.utils.metadata.project_id')
        self.mock_project_id_fn = self.metadata_patcher.start()
        self.mock_project_id_fn.return_value = 'project-id'

        metric_view_one = MetricBigQueryViewBuilder(
            dataset_id='dataset',
            view_id='view1',
            view_query_template='select * from table',
            dimensions=['a', 'b', 'c'],
        ).build()

        export_config_one_staging = ExportBigQueryViewConfig(
            view=metric_view_one,
            view_filter_clause='WHERE state_code = \'US_XX\'',
            intermediate_table_name='intermediate_table',
            output_directory=GcsfsDirectoryPath.from_absolute_path(
                'gs://bucket1/staging/US_XX'),
        )

        metric_view_two = MetricBigQueryViewBuilder(
            dataset_id='dataset',
            view_id='view2',
            view_query_template='select * from view2',
            dimensions=['d', 'e', 'f'],
        ).build()

        export_config_two_staging = ExportBigQueryViewConfig(
            view=metric_view_two,
            view_filter_clause='WHERE state_code = \'US_XX\'',
            intermediate_table_name='intermediate_table2',
            output_directory=GcsfsDirectoryPath.from_absolute_path(
                'gs://bucket2/staging/US_XX'),
        )

        self.staging_paths = [
            export_config_one_staging.output_path('txt'),
            export_config_two_staging.output_path('txt')
        ]
Exemplo n.º 6
0
def get_view_builder(
    view_id: str,
    description: str,
    facility_type: state_specific_query_strings.SpotlightFacilityType,
) -> MetricBigQueryViewBuilder:
    """Retrieves an incarceration population view builder filtered by facility type"""
    return MetricBigQueryViewBuilder(
        view_id=view_id,
        description=description,
        dataset_id=dataset_config.PUBLIC_DASHBOARD_VIEWS_DATASET,
        view_query_template=
        POPULATION_BY_FACILITY_BY_DEMOGRAPHICS_VIEW_QUERY_TEMPLATE,
        dimensions=(
            "state_code",
            "date_of_stay",
            "facility",
            "race_or_ethnicity",
            "gender",
            "age_bucket",
        ),
        static_reference_dataset=dataset_config.
        STATIC_REFERENCE_TABLES_DATASET,
        reference_views_dataset=dataset_config.REFERENCE_VIEWS_DATASET,
        unnested_race_or_ethnicity_dimension=bq_utils.unnest_column(
            "race_or_ethnicity", "race_or_ethnicity"),
        gender_dimension=bq_utils.unnest_column("gender", "gender"),
        age_dimension=bq_utils.unnest_column("age_bucket", "age_bucket"),
        facility_dimension=bq_utils.unnest_column("facility", "facility"),
        state_specific_race_or_ethnicity_groupings=state_specific_query_strings
        .state_specific_race_or_ethnicity_groupings(
            race_or_ethnicity_column="prioritized_race_or_ethnicity"),
        state_specific_facility_mapping=state_specific_query_strings.
        spotlight_state_specific_facility(),
        facility_type_filter=state_specific_query_strings.
        spotlight_state_specific_facility_filter(facility_type=facility_type),
    )
      COUNT(DISTINCT(person_id)) as total_population
    FROM
      `{project_id}.{reference_views_dataset}.most_recent_daily_incarceration_population_materialized`,
      {unnested_race_or_ethnicity_dimension},
      {gender_dimension},
      {age_dimension}
    WHERE (race_or_ethnicity != 'ALL' AND gender = 'ALL' AND age_bucket = 'ALL') -- Race breakdown
      OR (race_or_ethnicity = 'ALL' AND gender != 'ALL' AND age_bucket = 'ALL') -- Gender breakdown
      OR (race_or_ethnicity = 'ALL' AND gender = 'ALL' AND age_bucket != 'ALL') -- Age breakdown
      OR (race_or_ethnicity = 'ALL' AND gender = 'ALL' AND age_bucket = 'ALL') -- State-wide count
    GROUP BY state_code, date_of_stay,  race_or_ethnicity, gender, age_bucket
    ORDER BY state_code, date_of_stay, race_or_ethnicity, gender, age_bucket
    """

INCARCERATION_POPULATION_BY_ADMISSION_REASON_VIEW_BUILDER = MetricBigQueryViewBuilder(
    dataset_id=dataset_config.PUBLIC_DASHBOARD_VIEWS_DATASET,
    view_id=INCARCERATION_POPULATION_BY_ADMISSION_REASON_VIEW_NAME,
    view_query_template=INCARCERATION_POPULATION_BY_ADMISSION_REASON_VIEW_QUERY_TEMPLATE,
    dimensions=['state_code', 'date_of_stay', 'race_or_ethnicity', 'gender', 'age_bucket'],
    description=INCARCERATION_POPULATION_BY_ADMISSION_REASON_VIEW_DESCRIPTION,
    reference_views_dataset=dataset_config.REFERENCE_VIEWS_DATASET,
    unnested_race_or_ethnicity_dimension=bq_utils.unnest_column('prioritized_race_or_ethnicity', 'race_or_ethnicity'),
    gender_dimension=bq_utils.unnest_column('gender', 'gender'),
    age_dimension=bq_utils.unnest_column('age_bucket', 'age_bucket'),
    state_specific_race_or_ethnicity_groupings=state_specific_query_strings.state_specific_race_or_ethnicity_groupings()
)

if __name__ == '__main__':
    with local_project_id_override(GCP_PROJECT_STAGING):
        INCARCERATION_POPULATION_BY_ADMISSION_REASON_VIEW_BUILDER.build_and_print()
"""

REVOCATIONS_MATRIX_DISTRIBUTION_BY_VIOLATION_VIEW_BUILDER = MetricBigQueryViewBuilder(
    dataset_id=dataset_config.DASHBOARD_VIEWS_DATASET,
    view_id=REVOCATIONS_MATRIX_DISTRIBUTION_BY_VIOLATION_VIEW_NAME,
    view_query_template=
    REVOCATIONS_MATRIX_DISTRIBUTION_BY_VIOLATION_QUERY_TEMPLATE,
    dimensions=(
        "state_code",
        "metric_period_months",
        "level_1_supervision_location",
        "level_2_supervision_location",
        "admission_type",
        "supervision_type",
        "supervision_level",
        "violation_type",
        "reported_violations",
        "charge_category",
    ),
    description=REVOCATIONS_MATRIX_DISTRIBUTION_BY_VIOLATION_DESCRIPTION,
    reference_views_dataset=dataset_config.REFERENCE_VIEWS_DATASET,
    state_specific_violation_type_entry=state_specific_query_strings.
    state_specific_violation_type_entry(),
    state_specific_supervision_location_optimization_filter=
    state_specific_query_strings.
    state_specific_supervision_location_optimization_filter(),
    state_specific_violation_type_entry_categories=state_specific_query_strings
    .state_specific_violation_type_entry_categories(),
)

if __name__ == "__main__":
Exemplo n.º 9
0
        ROUND(IEEE_DIVIDE(successful_termination_count, projected_completion_count), 2) as success_rate
    FROM success_counts
    ORDER BY state_code, projected_year, projected_month, supervision_type
    """

SUPERVISION_SUCCESS_BY_MONTH_VIEW_BUILDER = MetricBigQueryViewBuilder(
    dataset_id=dataset_config.PUBLIC_DASHBOARD_VIEWS_DATASET,
    view_id=SUPERVISION_SUCCESS_BY_MONTH_VIEW_NAME,
    view_query_template=SUPERVISION_SUCCESS_BY_MONTH_VIEW_QUERY_TEMPLATE,
    dimensions=(
        "state_code",
        "supervision_type",
        "projected_year",
        "projected_month",
        "district",
    ),
    description=SUPERVISION_SUCCESS_BY_MONTH_VIEW_DESCRIPTION,
    materialized_metrics_dataset=dataset_config.
    DATAFLOW_METRICS_MATERIALIZED_DATASET,
    reference_views_dataset=dataset_config.REFERENCE_VIEWS_DATASET,
    grouped_districts=state_specific_query_strings.
    state_supervision_specific_district_groupings(
        "supervising_district_external_id", "judicial_district_code"),
    district_dimension=bq_utils.unnest_district(),
    thirty_six_month_filter=bq_utils.thirty_six_month_filter(),
)

if __name__ == "__main__":
    with local_project_id_override(GCP_PROJECT_STAGING):
        SUPERVISION_SUCCESS_BY_MONTH_VIEW_BUILDER.build_and_print()
      {metric_period_dimension}
      WHERE methodology = 'EVENT'
        AND person_id IS NOT NULL
        AND DATE(year, month, 1) >= DATE_SUB(DATE_TRUNC(CURRENT_DATE('US/Pacific'), MONTH),
                                             INTERVAL metric_period_months - 1 MONTH)
      GROUP BY state_code, metric_period_months, supervision_type, district, person_id
    )
    WHERE supervision_type in ('ALL', 'PAROLE', 'PROBATION')
    GROUP BY state_code, metric_period_months, supervision_type, district
    ORDER BY state_code, metric_period_months, district, supervision_type
    """

SUPERVISION_TERMINATION_BY_TYPE_BY_PERIOD_VIEW_BUILDER = MetricBigQueryViewBuilder(
    dataset_id=dataset_config.DASHBOARD_VIEWS_DATASET,
    view_id=SUPERVISION_TERMINATION_BY_TYPE_BY_PERIOD_VIEW_NAME,
    view_query_template=SUPERVISION_TERMINATION_BY_TYPE_BY_PERIOD_QUERY_TEMPLATE,
    dimensions=['state_code', 'metric_period_months', 'supervision_type', 'district'],
    description=SUPERVISION_TERMINATION_BY_TYPE_BY_PERIOD_DESCRIPTION,
    reference_views_dataset=dataset_config.REFERENCE_VIEWS_DATASET,
    metrics_dataset=dataset_config.DATAFLOW_METRICS_DATASET,
    district_dimension=bq_utils.unnest_district(),
    supervision_type_dimension=bq_utils.unnest_supervision_type(),
    metric_period_dimension=bq_utils.unnest_metric_period_months(),
    filter_to_most_recent_job_id_for_metric=bq_utils.filter_to_most_recent_job_id_for_metric(
        reference_dataset=dataset_config.REFERENCE_VIEWS_DATASET)
)

if __name__ == '__main__':
    with local_project_id_override(GCP_PROJECT_STAGING):
        SUPERVISION_TERMINATION_BY_TYPE_BY_PERIOD_VIEW_BUILDER.build_and_print()
      metric_period_months
    FROM supervision
    FULL OUTER JOIN referrals
    USING (state_code, supervision_type, district, metric_period_months, gender)
    WHERE supervision_type in ('ALL', 'PAROLE', 'PROBATION')
      AND district IS NOT NULL
      AND state_code = 'US_ND'
    ORDER BY state_code, gender, district, supervision_type, metric_period_months
    """

FTR_REFERRALS_BY_GENDER_BY_PERIOD_VIEW_BUILDER = MetricBigQueryViewBuilder(
    dataset_id=dataset_config.DASHBOARD_VIEWS_DATASET,
    view_id=FTR_REFERRALS_BY_GENDER_BY_PERIOD_VIEW_NAME,
    view_query_template=FTR_REFERRALS_BY_GENDER_BY_PERIOD_QUERY_TEMPLATE,
    dimensions=(
        "state_code",
        "metric_period_months",
        "district",
        "supervision_type",
        "gender",
    ),
    description=FTR_REFERRALS_BY_GENDER_BY_PERIOD_DESCRIPTION,
    reference_views_dataset=dataset_config.REFERENCE_VIEWS_DATASET,
    metric_period_dimension=bq_utils.unnest_metric_period_months(),
    metric_period_condition=bq_utils.metric_period_condition(),
)

if __name__ == "__main__":
    with local_project_id_override(GCP_PROJECT_STAGING):
        FTR_REFERRALS_BY_GENDER_BY_PERIOD_VIEW_BUILDER.build_and_print()
      WHERE methodology = 'PERSON'
        AND person_id IS NOT NULL
        AND m.metric_period_months = 1
        AND {metric_period_condition}
      GROUP BY state_code, metric_period_months, district
    ) ret
    USING (state_code, metric_period_months, district)
    WHERE district IS NOT NULL
    ORDER BY state_code, metric_period_months, district
    """

REINCARCERATIONS_BY_PERIOD_VIEW_BUILDER = MetricBigQueryViewBuilder(
    dataset_id=dataset_config.DASHBOARD_VIEWS_DATASET,
    view_id=REINCARCERATIONS_BY_PERIOD_VIEW_NAME,
    view_query_template=REINCARCERATIONS_BY_PERIOD_QUERY_TEMPLATE,
    dimensions=['state_code', 'metric_period_months', 'district'],
    description=REINCARCERATIONS_BY_PERIOD_DESCRIPTION,
    metrics_dataset=dataset_config.DATAFLOW_METRICS_DATASET,
    reference_views_dataset=dataset_config.REFERENCE_VIEWS_DATASET,
    district_dimension=bq_utils.unnest_district(
        district_column='county_of_residence'),
    metric_period_dimension=bq_utils.unnest_metric_period_months(),
    metric_period_condition=bq_utils.metric_period_condition(),
    filter_to_most_recent_job_id_for_metric=bq_utils.
    filter_to_most_recent_job_id_for_metric(
        reference_dataset=dataset_config.REFERENCE_VIEWS_DATASET))

if __name__ == '__main__':
    with local_project_id_override(GCP_PROJECT_STAGING):
        REINCARCERATIONS_BY_PERIOD_VIEW_BUILDER.build_and_print()
      AND year = EXTRACT(YEAR FROM CURRENT_DATE('US/Pacific'))
      AND month = EXTRACT(MONTH FROM CURRENT_DATE('US/Pacific'))
    ORDER BY state_code, metric_period_months, violation_record
    """

REVOCATIONS_MATRIX_FILTERED_CASELOAD_VIEW_BUILDER = MetricBigQueryViewBuilder(
    dataset_id=dataset_config.DASHBOARD_VIEWS_DATASET,
    view_id=REVOCATIONS_MATRIX_FILTERED_CASELOAD_VIEW_NAME,
    view_query_template=REVOCATIONS_MATRIX_FILTERED_CASELOAD_QUERY_TEMPLATE,
    dimensions=[
        'state_code', 'metric_period_months', 'district', 'supervision_type',
        'supervision_level', 'charge_category', 'risk_level', 'violation_type',
        'reported_violations'
    ],
    description=REVOCATIONS_MATRIX_FILTERED_CASELOAD_DESCRIPTION,
    metrics_dataset=dataset_config.DATAFLOW_METRICS_DATASET,
    reference_views_dataset=dataset_config.REFERENCE_VIEWS_DATASET,
    most_severe_violation_type_subtype_grouping=state_specific_query_strings.
    state_specific_most_severe_violation_type_subtype_grouping(),
    state_specific_officer_recommendation=state_specific_query_strings.
    state_specific_officer_recommendation(),
    state_specific_supervision_level=state_specific_query_strings.
    state_specific_supervision_level(),
    filter_to_most_recent_job_id_for_metric=bq_utils.
    filter_to_most_recent_job_id_for_metric(
        reference_dataset=dataset_config.REFERENCE_VIEWS_DATASET))

if __name__ == '__main__':
    with local_project_id_override(GCP_PROJECT_STAGING):
        REVOCATIONS_MATRIX_FILTERED_CASELOAD_VIEW_BUILDER.build_and_print()
        district,
        metric_period_months,
        race_or_ethnicity
      FROM `{project_id}.{reference_views_dataset}.event_based_revocations`,
      {metric_period_dimension},
      {race_ethnicity_dimension}
      WHERE {metric_period_condition}
      GROUP BY state_code, supervision_type, district, metric_period_months, race_or_ethnicity
    ) rev
    USING (state_code, supervision_type, district, metric_period_months, race_or_ethnicity)
    WHERE supervision_type in ('ALL', 'PAROLE', 'PROBATION')
        AND race_or_ethnicity != 'EXTERNAL_UNKNOWN'
    ORDER BY state_code, race_or_ethnicity, district, supervision_type, metric_period_months
    """

REVOCATIONS_BY_RACE_AND_ETHNICITY_BY_PERIOD_VIEW_BUILDER = MetricBigQueryViewBuilder(
    dataset_id=dataset_config.DASHBOARD_VIEWS_DATASET,
    view_id=REVOCATIONS_BY_RACE_AND_ETHNICITY_BY_PERIOD_VIEW_NAME,
    view_query_template=REVOCATIONS_BY_RACE_AND_ETHNICITY_BY_PERIOD_QUERY_TEMPLATE,
    dimensions=['state_code', 'metric_period_months', 'supervision_type', 'district', 'race_or_ethnicity'],
    description=REVOCATIONS_BY_RACE_AND_ETHNICITY_BY_PERIOD_DESCRIPTION,
    reference_views_dataset=dataset_config.REFERENCE_VIEWS_DATASET,
    metric_period_dimension=bq_utils.unnest_metric_period_months(),
    race_ethnicity_dimension=bq_utils.unnest_race_and_ethnicity(),
    metric_period_condition=bq_utils.metric_period_condition(),
)

if __name__ == '__main__':
    with local_project_id_override(GCP_PROJECT_STAGING):
        REVOCATIONS_BY_RACE_AND_ETHNICITY_BY_PERIOD_VIEW_BUILDER.build_and_print()
Exemplo n.º 15
0
    LEFT JOIN
      revocation_counts
    USING (state_code, violation_type, reported_violations, gender, risk_level, supervision_type, supervision_level, charge_category,
      district, metric_period_months)
    LEFT JOIN
      termination_counts
    USING (state_code, violation_type, reported_violations, gender, risk_level, supervision_type, supervision_level, charge_category,
      district, metric_period_months)
    ORDER BY state_code, metric_period_months, district, supervision_type, supervision_level, gender, risk_level, violation_type,
      reported_violations, charge_category
    """

REVOCATIONS_MATRIX_DISTRIBUTION_BY_GENDER_VIEW_BUILDER = MetricBigQueryViewBuilder(
    dataset_id=dataset_config.DASHBOARD_VIEWS_DATASET,
    view_id=REVOCATIONS_MATRIX_DISTRIBUTION_BY_GENDER_VIEW_NAME,
    view_query_template=
    REVOCATIONS_MATRIX_DISTRIBUTION_BY_GENDER_QUERY_TEMPLATE,
    dimensions=[
        'state_code', 'metric_period_months', 'district', 'supervision_type',
        'supervision_level', 'violation_type', 'reported_violations',
        'charge_category', 'gender', 'risk_level'
    ],
    description=REVOCATIONS_MATRIX_DISTRIBUTION_BY_GENDER_DESCRIPTION,
    reference_views_dataset=dataset_config.REFERENCE_VIEWS_DATASET,
)

if __name__ == '__main__':
    with local_project_id_override(GCP_PROJECT_STAGING):
        REVOCATIONS_MATRIX_DISTRIBUTION_BY_GENDER_VIEW_BUILDER.build_and_print(
        )
Exemplo n.º 16
0
class TestExportViewCollectionConfig(unittest.TestCase):
    """Tests the functionality of the ExportViewCollectionConfig class."""

    def setUp(self):
        self.mock_project_id = "fake-recidiviz-project"
        self.mock_dataset_id = "base_dataset"
        self.mock_dataset = bigquery.dataset.DatasetReference(
            self.mock_project_id, self.mock_dataset_id
        )

        self.metadata_patcher = mock.patch("recidiviz.utils.metadata.project_id")
        self.mock_project_id_fn = self.metadata_patcher.start()
        self.mock_project_id_fn.return_value = self.mock_project_id

        self.mock_big_query_view_namespace = BigQueryViewNamespace.STATE

        self.mock_view_builder = MetricBigQueryViewBuilder(
            dataset_id=self.mock_dataset.dataset_id,
            view_id="test_view",
            view_query_template="SELECT NULL LIMIT 0",
            dimensions=[],
        )

        self.views_for_dataset = [self.mock_view_builder]

    def tearDown(self):
        self.metadata_patcher.stop()

    def test_matches_filter(self):
        """Tests matches_filter function to ensure that state codes and export names correctly match"""
        state_dataset_export_config = ExportViewCollectionConfig(
            view_builders_to_export=self.views_for_dataset,
            output_directory_uri_template="gs://{project_id}-bucket",
            state_code_filter="US_XX",
            export_name="EXPORT",
            bq_view_namespace=self.mock_big_query_view_namespace,
        )
        self.assertTrue(state_dataset_export_config.matches_filter("US_XX"))

        dataset_export_config = ExportViewCollectionConfig(
            view_builders_to_export=self.views_for_dataset,
            output_directory_uri_template="gs://{project_id}-bucket",
            state_code_filter=None,
            export_name="VALID_EXPORT_NAME",
            bq_view_namespace=self.mock_big_query_view_namespace,
        )
        self.assertTrue(dataset_export_config.matches_filter("VALID_EXPORT_NAME"))
        self.assertFalse(dataset_export_config.matches_filter("INVALID_EXPORT_NAME"))

    def test_matches_filter_case_insensitive(self):
        """Tests matches_filter function with different cases to ensure state codes and export names correctly match"""
        state_dataset_export_config = ExportViewCollectionConfig(
            view_builders_to_export=self.views_for_dataset,
            output_directory_uri_template="gs://{project_id}-bucket",
            state_code_filter="US_XX",
            export_name="OTHER_EXPORT",
            bq_view_namespace=self.mock_big_query_view_namespace,
        )
        self.assertTrue(state_dataset_export_config.matches_filter("US_xx"))

        dataset_export_config = ExportViewCollectionConfig(
            view_builders_to_export=self.views_for_dataset,
            output_directory_uri_template="gs://{project_id}-bucket",
            state_code_filter=None,
            export_name="VALID_EXPORT_NAME",
            bq_view_namespace=self.mock_big_query_view_namespace,
        )
        self.assertTrue(dataset_export_config.matches_filter("valid_export_name"))

    def test_metric_export_state_agnostic(self):
        """Tests the export_configs_for_views_to_export function on the ExportViewCollectionConfig class when the
        export is state-agnostic."""
        state_agnostic_dataset_export_config = ExportViewCollectionConfig(
            view_builders_to_export=self.views_for_dataset,
            output_directory_uri_template="gs://{project_id}-bucket-without-state-codes",
            state_code_filter=None,
            export_name="ALL_STATE_TEST_PRODUCT",
            bq_view_namespace=self.mock_big_query_view_namespace,
        )

        view_configs_to_export = (
            state_agnostic_dataset_export_config.export_configs_for_views_to_export(
                project_id=self.mock_project_id
            )
        )

        expected_view = self.mock_view_builder.build()

        expected_view_export_configs = [
            ExportBigQueryViewConfig(
                view=expected_view,
                view_filter_clause=None,
                intermediate_table_name=f"{expected_view.view_id}_table",
                output_directory=GcsfsDirectoryPath.from_absolute_path(
                    state_agnostic_dataset_export_config.output_directory_uri_template.format(
                        project_id=self.mock_project_id,
                    )
                ),
                export_output_formats=[
                    ExportOutputFormatType.JSON,
                    ExportOutputFormatType.METRIC,
                ],
            )
        ]

        self.assertEqual(expected_view_export_configs, view_configs_to_export)

    def test_metric_export_state_specific(self):
        """Tests the export_configs_for_views_to_export function on the ExportViewCollectionConfig class when the
        export is state-specific."""
        specific_state_dataset_export_config = ExportViewCollectionConfig(
            view_builders_to_export=self.views_for_dataset,
            output_directory_uri_template="gs://{project_id}-bucket",
            state_code_filter="US_XX",
            export_name="TEST_REPORT",
            bq_view_namespace=self.mock_big_query_view_namespace,
        )

        view_configs_to_export = (
            specific_state_dataset_export_config.export_configs_for_views_to_export(
                project_id=self.mock_project_id
            )
        )

        expected_view = self.mock_view_builder.build()

        expected_view_export_configs = [
            ExportBigQueryViewConfig(
                view=expected_view,
                view_filter_clause=" WHERE state_code = 'US_XX'",
                intermediate_table_name=f"{expected_view.view_id}_table_US_XX",
                output_directory=GcsfsDirectoryPath.from_absolute_path(
                    f"gs://{self.mock_project_id}-bucket/US_XX"
                ),
                export_output_formats=[
                    ExportOutputFormatType.JSON,
                    ExportOutputFormatType.METRIC,
                ],
            )
        ]

        self.assertEqual(expected_view_export_configs, view_configs_to_export)

    def test_metric_export_lantern_dashboard(self):
        """Tests the export_configs_for_views_to_export function on the ExportViewCollectionConfig class when the
        export is state-agnostic."""
        lantern_dashboard_dataset_export_config = ExportViewCollectionConfig(
            view_builders_to_export=self.views_for_dataset,
            output_directory_uri_template="gs://{project_id}-bucket-without-state-codes",
            state_code_filter=None,
            export_name="TEST_EXPORT",
            bq_view_namespace=self.mock_big_query_view_namespace,
        )

        view_configs_to_export = (
            lantern_dashboard_dataset_export_config.export_configs_for_views_to_export(
                project_id=self.mock_project_id
            )
        )

        expected_view = self.mock_view_builder.build()

        expected_view_export_configs = [
            ExportBigQueryViewConfig(
                view=expected_view,
                view_filter_clause=None,
                intermediate_table_name=f"{expected_view.view_id}_table",
                output_directory=GcsfsDirectoryPath.from_absolute_path(
                    lantern_dashboard_dataset_export_config.output_directory_uri_template.format(
                        project_id=self.mock_project_id,
                    )
                ),
                export_output_formats=[
                    ExportOutputFormatType.JSON,
                    ExportOutputFormatType.METRIC,
                ],
            )
        ]

        self.assertEqual(expected_view_export_configs, view_configs_to_export)

    def test_metric_export_lantern_dashboard_with_state(self):
        """Tests the export_configs_for_views_to_export function on the ExportViewCollectionConfig class when the
        export is state-specific."""
        lantern_dashboard_with_state_dataset_export_config = ExportViewCollectionConfig(
            view_builders_to_export=self.views_for_dataset,
            output_directory_uri_template="gs://{project_id}-bucket",
            state_code_filter="US_XX",
            export_name="TEST_EXPORT",
            bq_view_namespace=self.mock_big_query_view_namespace,
        )

        view_configs_to_export = lantern_dashboard_with_state_dataset_export_config.export_configs_for_views_to_export(
            project_id=self.mock_project_id
        )

        expected_view = self.mock_view_builder.build()

        expected_view_export_configs = [
            ExportBigQueryViewConfig(
                view=expected_view,
                view_filter_clause=" WHERE state_code = 'US_XX'",
                intermediate_table_name=f"{expected_view.view_id}_table_US_XX",
                output_directory=GcsfsDirectoryPath.from_absolute_path(
                    f"gs://{self.mock_project_id}-bucket/US_XX"
                ),
                export_output_formats=[
                    ExportOutputFormatType.JSON,
                    ExportOutputFormatType.METRIC,
                ],
            )
        ]

        self.assertEqual(expected_view_export_configs, view_configs_to_export)
      SELECT
        state_code, year, month,
        CASE WHEN termination_reason = 'ABSCONSION' THEN person_id ELSE NULL END AS absconsion,
        CASE WHEN termination_reason = 'DEATH' THEN person_id ELSE NULL END AS death,
        CASE WHEN termination_reason = 'DISCHARGE' THEN person_id ELSE NULL END AS discharge,
        CASE WHEN termination_reason = 'EXPIRATION' THEN person_id ELSE NULL END AS expiration,
        CASE WHEN termination_reason = 'REVOCATION' THEN person_id ELSE NULL END AS revocation,
        CASE WHEN termination_reason = 'SUSPENSION' THEN person_id ELSE NULL END AS suspension,
        CASE WHEN termination_reason = 'EXTERNAL_UNKNOWN' THEN person_id ELSE NULL END AS other,
        supervision_type,
        district
      FROM case_terminations
    )
    WHERE supervision_type IN ('ALL', 'PROBATION', 'PAROLE')
      AND year >= EXTRACT(YEAR FROM DATE_SUB(CURRENT_DATE('US/Pacific'), INTERVAL 3 YEAR))
    GROUP BY state_code, year, month, supervision_type, district
    ORDER BY state_code, year, month, supervision_type, district
    """

CASE_TERMINATIONS_BY_TYPE_BY_MONTH_VIEW_BUILDER = MetricBigQueryViewBuilder(
    dataset_id=dataset_config.DASHBOARD_VIEWS_DATASET,
    view_id=CASE_TERMINATIONS_BY_TYPE_BY_MONTH_VIEW_NAME,
    view_query_template=CASE_TERMINATIONS_BY_TYPE_BY_MONTH_QUERY_TEMPLATE,
    dimensions=['state_code', 'year', 'month', 'supervision_type', 'district'],
    description=CASE_TERMINATIONS_BY_TYPE_BY_MONTH_DESCRIPTION,
)

if __name__ == '__main__':
    with local_project_id_override(GCP_PROJECT_STAGING):
        CASE_TERMINATIONS_BY_TYPE_BY_MONTH_VIEW_BUILDER.build_and_print()
SUPERVISION_SUCCESS_BY_PERIOD_BY_DEMOGRAPHICS_VIEW_BUILDER = MetricBigQueryViewBuilder(
    dataset_id=dataset_config.PUBLIC_DASHBOARD_VIEWS_DATASET,
    view_id=SUPERVISION_SUCCESS_BY_PERIOD_BY_DEMOGRAPHICS_VIEW_NAME,
    view_query_template=
    SUPERVISION_SUCCESS_BY_PERIOD_BY_DEMOGRAPHICS_VIEW_QUERY_TEMPLATE,
    dimensions=(
        "state_code",
        "supervision_type",
        "metric_period_months",
        "district",
        "race_or_ethnicity",
        "gender",
        "age_bucket",
    ),
    description=SUPERVISION_SUCCESS_BY_PERIOD_BY_DEMOGRAPHICS_VIEW_DESCRIPTION,
    materialized_metrics_dataset=dataset_config.
    DATAFLOW_METRICS_MATERIALIZED_DATASET,
    reference_views_dataset=dataset_config.REFERENCE_VIEWS_DATASET,
    grouped_districts=state_specific_query_strings.
    state_supervision_specific_district_groupings(
        "supervising_district_external_id", "judicial_district_code"),
    metric_period_condition=bq_utils.metric_period_condition(month_offset=1),
    unnested_race_or_ethnicity_dimension=bq_utils.unnest_column(
        "race_or_ethnicity", "race_or_ethnicity"),
    gender_dimension=bq_utils.unnest_column("gender", "gender"),
    age_dimension=bq_utils.unnest_column("age_bucket", "age_bucket"),
    state_specific_race_or_ethnicity_groupings=state_specific_query_strings.
    state_specific_race_or_ethnicity_groupings(
        "prioritized_race_or_ethnicity"),
    state_specific_supervision_type_inclusion_filter=state_specific_query_strings
    .state_specific_supervision_type_inclusion_filter(),
)
Exemplo n.º 19
0
    GROUP BY state_code, supervision_type, race_or_ethnicity, region_id
    ORDER BY state_code, supervision_type, race_or_ethnicity, region_id
    """

ACTIVE_PROGRAM_PARTICIPATION_BY_REGION_VIEW_BUILDER = MetricBigQueryViewBuilder(
    dataset_id=dataset_config.PUBLIC_DASHBOARD_VIEWS_DATASET,
    view_id=ACTIVE_PROGRAM_PARTICIPATION_BY_REGION_VIEW_NAME,
    view_query_template=
    ACTIVE_PROGRAM_PARTICIPATION_BY_REGION_VIEW_QUERY_TEMPLATE,
    dimensions=[
        'state_code', 'supervision_type', 'race_or_ethnicity', 'region_id'
    ],
    description=ACTIVE_PROGRAM_PARTICIPATION_BY_REGION_VIEW_DESCRIPTION,
    base_dataset=dataset_config.STATE_BASE_DATASET,
    static_reference_dataset=dataset_config.STATIC_REFERENCE_TABLES_DATASET,
    reference_views_dataset=dataset_config.REFERENCE_VIEWS_DATASET,
    metrics_dataset=dataset_config.DATAFLOW_METRICS_DATASET,
    current_month_condition=bq_utils.current_month_condition(),
    state_specific_race_or_ethnicity_groupings=state_specific_query_strings.
    state_specific_race_or_ethnicity_groupings(),
    race_or_ethnicity_dimension=bq_utils.unnest_race_and_ethnicity(),
    unnested_race_or_ethnicity_dimension=bq_utils.unnest_column(
        'race_or_ethnicity', 'race_or_ethnicity'),
    region_dimension=bq_utils.unnest_column('region_id', 'region_id'),
    supervision_type_dimension=bq_utils.unnest_supervision_type(),
    filter_to_most_recent_job_id_for_metric=bq_utils.
    filter_to_most_recent_job_id_for_metric(
        reference_dataset=dataset_config.REFERENCE_VIEWS_DATASET))

if __name__ == '__main__':
    with local_project_id_override(GCP_PROJECT_STAGING):
        ACTIVE_PROGRAM_PARTICIPATION_BY_REGION_VIEW_BUILDER.build_and_print()
Exemplo n.º 20
0
    def test_export_happy_path(self) -> None:
        metric_view_one = MetricBigQueryViewBuilder(
            dataset_id='dataset',
            view_id='view1',
            view_query_template='select * from table',
            dimensions=['a', 'b', 'c'],
        ).build()

        export_config_one = ExportBigQueryViewConfig(
            view=metric_view_one,
            view_filter_clause='WHERE state_code = \'US_XX\'',
            intermediate_table_name='intermediate_table',
            output_directory=GcsfsDirectoryPath.from_absolute_path(
                'gs://bucket1/US_XX'),
        )
        export_config_one_staging = ExportBigQueryViewConfig(
            view=metric_view_one,
            view_filter_clause='WHERE state_code = \'US_XX\'',
            intermediate_table_name='intermediate_table',
            output_directory=GcsfsDirectoryPath.from_absolute_path(
                'gs://bucket1/staging/US_XX'),
        )

        metric_view_two = MetricBigQueryViewBuilder(
            dataset_id='dataset',
            view_id='view2',
            view_query_template='select * from view2',
            dimensions=['d', 'e', 'f'],
        ).build()

        export_config_two = ExportBigQueryViewConfig(
            view=metric_view_two,
            view_filter_clause='WHERE state_code = \'US_XX\'',
            intermediate_table_name='intermediate_table2',
            output_directory=GcsfsDirectoryPath.from_absolute_path(
                'gs://bucket2/US_XX'),
        )
        export_config_two_staging = ExportBigQueryViewConfig(
            view=metric_view_two,
            view_filter_clause='WHERE state_code = \'US_XX\'',
            intermediate_table_name='intermediate_table2',
            output_directory=GcsfsDirectoryPath.from_absolute_path(
                'gs://bucket2/staging/US_XX'),
        )

        mock_bq_client = create_autospec(BigQueryClient)
        mock_fs = create_autospec(GCSFileSystem)

        mock_fs.exists.return_value = True

        delegate_one = create_autospec(BigQueryViewExporter)
        delegate_one_staging_paths = [
            export_config_one_staging.output_path('json'),
            export_config_two_staging.output_path('json')
        ]
        delegate_one.export_and_validate.return_value = delegate_one_staging_paths

        delegate_two = create_autospec(BigQueryViewExporter)
        delegate_two_staging_paths = [
            export_config_one_staging.output_path('txt'),
            export_config_two_staging.output_path('txt')
        ]
        delegate_two.export_and_validate.return_value = delegate_two_staging_paths

        # Make the actual call
        exporter = CompositeBigQueryViewExporter(mock_bq_client, mock_fs,
                                                 [delegate_one, delegate_two])
        exporter.export_and_validate([export_config_one, export_config_two])

        # Assert all mocks called as expected
        delegate_one.export_and_validate.assert_has_calls([
            call([export_config_one_staging, export_config_two_staging]),
        ])

        delegate_two.export_and_validate.assert_has_calls([
            call([export_config_one_staging, export_config_two_staging]),
        ])

        mock_fs.copy.assert_has_calls([
            call(
                GcsfsFilePath(bucket_name='bucket1',
                              blob_name='staging/US_XX/view1.json'),
                GcsfsFilePath(bucket_name='bucket1',
                              blob_name='US_XX/view1.json')),
            call(
                GcsfsFilePath(bucket_name='bucket2',
                              blob_name='staging/US_XX/view2.json'),
                GcsfsFilePath(bucket_name='bucket2',
                              blob_name='US_XX/view2.json')),
            call(
                GcsfsFilePath(bucket_name='bucket1',
                              blob_name='staging/US_XX/view1.txt'),
                GcsfsFilePath(bucket_name='bucket1',
                              blob_name='US_XX/view1.txt')),
            call(
                GcsfsFilePath(bucket_name='bucket2',
                              blob_name='staging/US_XX/view2.txt'),
                GcsfsFilePath(bucket_name='bucket2',
                              blob_name='US_XX/view2.txt'))
        ])

        mock_fs.delete.assert_has_calls([
            call(
                GcsfsFilePath(bucket_name='bucket1',
                              blob_name='staging/US_XX/view1.json')),
            call(
                GcsfsFilePath(bucket_name='bucket2',
                              blob_name='staging/US_XX/view2.json')),
            call(
                GcsfsFilePath(bucket_name='bucket1',
                              blob_name='staging/US_XX/view1.txt')),
            call(
                GcsfsFilePath(bucket_name='bucket2',
                              blob_name='staging/US_XX/view2.txt'))
        ])

        mock_fs.exists.assert_has_calls([
            call(
                GcsfsFilePath(bucket_name='bucket1',
                              blob_name='US_XX/view1.json')),
            call(
                GcsfsFilePath(bucket_name='bucket2',
                              blob_name='US_XX/view2.json')),
            call(
                GcsfsFilePath(bucket_name='bucket1',
                              blob_name='US_XX/view1.txt')),
            call(
                GcsfsFilePath(bucket_name='bucket2',
                              blob_name='US_XX/view2.txt')),
        ])
      {unnested_race_or_ethnicity_dimension},
      {region_dimension},
      {supervision_type_dimension}
    GROUP BY state_code, supervision_type, race_or_ethnicity, region_id
    ORDER BY state_code, supervision_type, race_or_ethnicity, region_id
    """

ACTIVE_PROGRAM_PARTICIPATION_BY_REGION_VIEW_BUILDER = MetricBigQueryViewBuilder(
    dataset_id=dataset_config.PUBLIC_DASHBOARD_VIEWS_DATASET,
    view_id=ACTIVE_PROGRAM_PARTICIPATION_BY_REGION_VIEW_NAME,
    view_query_template=
    ACTIVE_PROGRAM_PARTICIPATION_BY_REGION_VIEW_QUERY_TEMPLATE,
    dimensions=("state_code", "supervision_type", "race_or_ethnicity",
                "region_id"),
    description=ACTIVE_PROGRAM_PARTICIPATION_BY_REGION_VIEW_DESCRIPTION,
    static_reference_dataset=dataset_config.STATIC_REFERENCE_TABLES_DATASET,
    materialized_metrics_dataset=dataset_config.
    DATAFLOW_METRICS_MATERIALIZED_DATASET,
    state_specific_race_or_ethnicity_groupings=state_specific_query_strings.
    state_specific_race_or_ethnicity_groupings(
        "prioritized_race_or_ethnicity"),
    unnested_race_or_ethnicity_dimension=bq_utils.unnest_column(
        "race_or_ethnicity", "race_or_ethnicity"),
    region_dimension=bq_utils.unnest_column("region_id", "region_id"),
    supervision_type_dimension=bq_utils.unnest_supervision_type(),
)

if __name__ == "__main__":
    with local_project_id_override(GCP_PROJECT_STAGING):
        ACTIVE_PROGRAM_PARTICIPATION_BY_REGION_VIEW_BUILDER.build_and_print()
    ORDER BY state_code, metric_period_months, supervision_type, race_or_ethnicity
    """

SUPERVISION_POPULATION_BY_PRIORITIZED_RACE_AND_ETHNICITY_BY_PERIOD_VIEW_BUILDER = MetricBigQueryViewBuilder(
    dataset_id=dataset_config.PUBLIC_DASHBOARD_VIEWS_DATASET,
    view_id=
    SUPERVISION_POPULATION_BY_PRIORITIZED_RACE_AND_ETHNICITY_BY_PERIOD_VIEW_NAME,
    view_query_template=
    SUPERVISION_POPULATION_BY_PRIORITIZED_RACE_AND_ETHNICITY_BY_PERIOD_VIEW_QUERY_TEMPLATE,
    dimensions=(
        "state_code",
        "supervision_type",
        "metric_period_months",
        "race_or_ethnicity",
    ),
    description=
    SUPERVISION_POPULATION_BY_PRIORITIZED_RACE_AND_ETHNICITY_BY_PERIOD_VIEW_DESCRIPTION,
    reference_views_dataset=dataset_config.REFERENCE_VIEWS_DATASET,
    static_reference_dataset=dataset_config.STATIC_REFERENCE_TABLES_DATASET,
    metric_period_condition=bq_utils.metric_period_condition(),
    unnested_race_or_ethnicity_dimension=bq_utils.unnest_column(
        "race_or_ethnicity", "race_or_ethnicity"),
    gender_dimension=bq_utils.unnest_column("gender", "gender"),
    age_dimension=bq_utils.unnest_column("age_bucket", "age_bucket"),
    state_specific_race_or_ethnicity_groupings=state_specific_query_strings.
    state_specific_race_or_ethnicity_groupings(),
    state_specific_supervision_type_inclusion_filter=state_specific_query_strings
    .state_specific_supervision_type_inclusion_filter(),
)

if __name__ == "__main__":
    with local_project_id_override(GCP_PROJECT_STAGING):
Exemplo n.º 23
0
      OR (race_or_ethnicity = 'ALL' AND gender = 'ALL' AND age_bucket = 'ALL')) -- State-wide count
    GROUP BY state_code, district, race_or_ethnicity, gender, age_bucket
    ORDER BY state_code, district, race_or_ethnicity, gender, age_bucket
    """

SENTENCE_TYPE_BY_DISTRICT_BY_DEMOGRAPHICS_VIEW_BUILDER = MetricBigQueryViewBuilder(
    dataset_id=dataset_config.PUBLIC_DASHBOARD_VIEWS_DATASET,
    view_id=SENTENCE_TYPE_BY_DISTRICT_BY_DEMOGRAPHICS_VIEW_NAME,
    view_query_template=
    SENTENCE_TYPE_BY_DISTRICT_BY_DEMOGRAPHICS_VIEW_QUERY_TEMPLATE,
    dimensions=("state_code", "district", "race_or_ethnicity", "gender",
                "age_bucket"),
    description=SENTENCE_TYPE_BY_DISTRICT_BY_DEMOGRAPHICS_VIEW_DESCRIPTION,
    materialized_metrics_dataset=dataset_config.
    DATAFLOW_METRICS_MATERIALIZED_DATASET,
    state_specific_race_or_ethnicity_groupings=state_specific_query_strings.
    state_specific_race_or_ethnicity_groupings(
        "prioritized_race_or_ethnicity"),
    unnested_race_or_ethnicity_dimension=bq_utils.unnest_column(
        "race_or_ethnicity", "race_or_ethnicity"),
    gender_dimension=bq_utils.unnest_column("gender", "gender"),
    age_dimension=bq_utils.unnest_column("age_bucket", "age_bucket"),
    district_dimension=bq_utils.unnest_district(
        state_specific_query_strings.
        state_specific_judicial_district_groupings("judicial_district_code")),
)

if __name__ == "__main__":
    with local_project_id_override(GCP_PROJECT_STAGING):
        SENTENCE_TYPE_BY_DISTRICT_BY_DEMOGRAPHICS_VIEW_BUILDER.build_and_print(
        )
    -- Filter out any rows that don't have a specified violation_type
    WHERE violation_type != 'NO_VIOLATION_TYPE'
    """

REVOCATIONS_MATRIX_DISTRIBUTION_BY_GENDER_VIEW_BUILDER = MetricBigQueryViewBuilder(
    dataset_id=dataset_config.DASHBOARD_VIEWS_DATASET,
    view_id=REVOCATIONS_MATRIX_DISTRIBUTION_BY_GENDER_VIEW_NAME,
    view_query_template=
    REVOCATIONS_MATRIX_DISTRIBUTION_BY_GENDER_QUERY_TEMPLATE,
    dimensions=(
        "state_code",
        "metric_period_months",
        "level_1_supervision_location",
        "level_2_supervision_location",
        "supervision_type",
        "supervision_level",
        "violation_type",
        "reported_violations",
        "admission_type",
        "charge_category",
        "gender",
    ),
    description=REVOCATIONS_MATRIX_DISTRIBUTION_BY_GENDER_DESCRIPTION,
    reference_views_dataset=dataset_config.REFERENCE_VIEWS_DATASET,
    gender_dimension=bq_utils.unnest_column("gender", "gender"),
    supported_gender_values=SUPPORTED_GENDER_VALUES,
)

if __name__ == "__main__":
    with local_project_id_override(GCP_PROJECT_STAGING):
        REVOCATIONS_MATRIX_DISTRIBUTION_BY_GENDER_VIEW_BUILDER.build_and_print(
Exemplo n.º 25
0
    SELECT
      *,
      IEEE_DIVIDE(recidivated_releases, releases) as recidivism_rate
    FROM
      recidivism_numbers
    ORDER BY state_code, release_cohort, followup_years, gender, age_bucket, race_or_ethnicity
    """

RECIDIVISM_RATES_BY_COHORT_BY_YEAR_VIEW_BUILDER = MetricBigQueryViewBuilder(
    dataset_id=dataset_config.PUBLIC_DASHBOARD_VIEWS_DATASET,
    view_id=RECIDIVISM_RATES_BY_COHORT_BY_YEAR_VIEW_NAME,
    view_query_template=RECIDIVISM_RATES_BY_COHORT_BY_YEAR_VIEW_QUERY_TEMPLATE,
    dimensions=[
        'state_code', 'release_cohort', 'followup_years', 'gender',
        'age_bucket', 'race_or_ethnicity'
    ],
    description=RECIDIVISM_RATES_BY_COHORT_BY_YEAR_VIEW_DESCRIPTION,
    metrics_dataset=dataset_config.DATAFLOW_METRICS_DATASET,
    reference_views_dataset=dataset_config.REFERENCE_VIEWS_DATASET,
    state_specific_race_or_ethnicity_groupings=state_specific_query_strings.
    state_specific_race_or_ethnicity_groupings(),
    race_or_ethnicity_dimension=bq_utils.unnest_column(
        'prioritized_race_or_ethnicity', 'race_or_ethnicity'),
    gender_dimension=bq_utils.unnest_column('gender', 'gender'),
    age_dimension=bq_utils.unnest_column('age_bucket', 'age_bucket'),
)

if __name__ == '__main__':
    with local_project_id_override(GCP_PROJECT_STAGING):
        RECIDIVISM_RATES_BY_COHORT_BY_YEAR_VIEW_BUILDER.build_and_print()
class ViewCollectionExportManagerTest(unittest.TestCase):
    """Tests for view_export_manager.py."""
    def setUp(self) -> None:
        self.app = Flask(__name__)
        self.app.register_blueprint(export_blueprint)
        self.app.config["TESTING"] = True
        self.headers: Dict[str, Dict[Any, Any]] = {
            "x-goog-iap-jwt-assertion": {}
        }
        self.client = self.app.test_client()
        self.mock_cloud_task_client_patcher = mock.patch(
            "google.cloud.tasks_v2.CloudTasksClient")
        self.mock_cloud_task_client_patcher.start()
        self.mock_uuid_patcher = mock.patch(
            f"{CLOUD_TASK_MANAGER_PACKAGE_NAME}.uuid")
        self.mock_uuid = self.mock_uuid_patcher.start()
        with self.app.test_request_context():
            self.metric_view_data_export_url = flask.url_for(
                "export.metric_view_data_export")
            self.create_metric_view_data_export_tasks_url = flask.url_for(
                "export.create_metric_view_data_export_tasks")
        self.mock_state_code = "US_XX"
        self.mock_project_id = "test-project"
        self.mock_dataset_id = "base_dataset"
        self.mock_dataset = bigquery.dataset.DatasetReference(
            self.mock_project_id, self.mock_dataset_id)

        self.metadata_patcher = mock.patch(
            "recidiviz.utils.metadata.project_id")
        self.mock_project_id_fn = self.metadata_patcher.start()
        self.mock_project_id_fn.return_value = self.mock_project_id

        self.client_patcher = mock.patch(
            "recidiviz.metrics.export.view_export_manager.BigQueryClientImpl")
        self.mock_client = self.client_patcher.start().return_value

        self.mock_client.dataset_ref_for_id.return_value = self.mock_dataset

        self.mock_view_builder = SimpleBigQueryViewBuilder(
            dataset_id=self.mock_dataset.dataset_id,
            view_id="test_view",
            description="test_view description",
            view_query_template="SELECT NULL LIMIT 0",
        )
        self.mock_metric_view_builder = MetricBigQueryViewBuilder(
            dataset_id=self.mock_dataset.dataset_id,
            view_id="test_view",
            description="test_view description",
            view_query_template="SELECT NULL LIMIT 0",
            dimensions=tuple(),
        )

        self.view_builders_for_dataset = [
            self.mock_view_builder,
            self.mock_metric_view_builder,
        ]

        self.output_uri_template_for_dataset = {
            "dataset_id": "gs://{project_id}-dataset-location/subdirectory",
        }

        self.views_to_update = {
            self.mock_dataset_id: self.view_builders_for_dataset
        }

        self.mock_export_name = "MOCK_EXPORT_NAME"
        self.mock_big_query_view_namespace = BigQueryViewNamespace.STATE

        self.metric_dataset_export_configs_index = {
            "EXPORT":
            ExportViewCollectionConfig(
                view_builders_to_export=[self.mock_view_builder],
                output_directory_uri_template=
                "gs://{project_id}-dataset-location/subdirectory",
                export_name="EXPORT",
                bq_view_namespace=self.mock_big_query_view_namespace,
            ),
            "OTHER_EXPORT":
            ExportViewCollectionConfig(
                view_builders_to_export=[self.mock_metric_view_builder],
                output_directory_uri_template=
                "gs://{project_id}-dataset-location/subdirectory",
                export_name="OTHER_EXPORT",
                bq_view_namespace=self.mock_big_query_view_namespace,
            ),
            self.mock_export_name:
            ExportViewCollectionConfig(
                view_builders_to_export=self.view_builders_for_dataset,
                output_directory_uri_template=
                "gs://{project_id}-dataset-location/subdirectory",
                export_name=self.mock_export_name,
                bq_view_namespace=self.mock_big_query_view_namespace,
            ),
        }

        export_config_values = {
            "OUTPUT_DIRECTORY_URI_TEMPLATE_FOR_DATASET_EXPORT":
            self.output_uri_template_for_dataset,
            "VIEW_COLLECTION_EXPORT_INDEX":
            self.metric_dataset_export_configs_index,
        }

        self.export_config_patcher = mock.patch(  # type: ignore[call-overload]
            "recidiviz.metrics.export.view_export_manager.export_config",
            **export_config_values,
        )
        self.mock_export_config = self.export_config_patcher.start()

        self.gcs_factory_patcher = mock.patch(
            "recidiviz.metrics.export.view_export_manager.GcsfsFactory.build")
        self.gcs_factory_patcher.start().return_value = FakeGCSFileSystem()

    def tearDown(self) -> None:
        self.client_patcher.stop()
        self.export_config_patcher.stop()
        self.metadata_patcher.stop()
        self.gcs_factory_patcher.stop()
        self.mock_uuid_patcher.stop()
        self.mock_cloud_task_client_patcher.stop()

    @mock.patch("recidiviz.utils.environment.get_gcp_environment")
    def test_get_configs_for_export_name(
            self, mock_environment: mock.MagicMock) -> None:
        """Tests get_configs_for_export_name function to ensure that export names correctly match"""

        mock_environment.return_value = "production"
        export_configs_for_filter = view_export_manager.get_configs_for_export_name(
            export_name=self.mock_export_name,
            state_code=self.mock_state_code,
            project_id=self.mock_project_id,
        )
        view = self.mock_view_builder.build()
        metric_view = self.mock_metric_view_builder.build()

        expected_view_config_list = [
            ExportBigQueryViewConfig(
                bq_view_namespace=self.mock_big_query_view_namespace,
                view=view,
                view_filter_clause=
                f" WHERE state_code = '{self.mock_state_code}'",
                intermediate_table_name=
                f"{view.view_id}_table_{self.mock_state_code}",
                output_directory=GcsfsDirectoryPath.from_absolute_path(
                    "gs://{project_id}-dataset-location/subdirectory/{state_code}"
                    .format(
                        project_id=self.mock_project_id,
                        state_code=self.mock_state_code,
                    )),
                export_output_formats=[ExportOutputFormatType.JSON],
            ),
            ExportBigQueryViewConfig(
                bq_view_namespace=self.mock_big_query_view_namespace,
                view=metric_view,
                view_filter_clause=
                f" WHERE state_code = '{self.mock_state_code}'",
                intermediate_table_name=
                f"{view.view_id}_table_{self.mock_state_code}",
                output_directory=GcsfsDirectoryPath.from_absolute_path(
                    "gs://{project_id}-dataset-location/subdirectory/{state_code}"
                    .format(
                        project_id=self.mock_project_id,
                        state_code=self.mock_state_code,
                    )),
                export_output_formats=[
                    ExportOutputFormatType.JSON,
                    ExportOutputFormatType.METRIC,
                ],
            ),
        ]

        self.assertEqual(expected_view_config_list, export_configs_for_filter)

        # Test for case insensitivity

        export_configs_for_filter = view_export_manager.get_configs_for_export_name(
            export_name=self.mock_export_name.lower(),
            state_code=self.mock_state_code.lower(),
            project_id=self.mock_project_id,
        )
        self.assertEqual(expected_view_config_list, export_configs_for_filter)

    @mock.patch("recidiviz.utils.environment.get_gcp_environment")
    def test_get_configs_for_export_name_state_agnostic(
            self, mock_environment: mock.MagicMock) -> None:
        """Tests get_configs_for_export_name function to ensure that export names correctly match"""

        mock_environment.return_value = "production"
        export_configs_for_filter = view_export_manager.get_configs_for_export_name(
            export_name=self.mock_export_name, project_id=self.mock_project_id)
        view = self.mock_view_builder.build()
        metric_view = self.mock_metric_view_builder.build()

        expected_view_config_list = [
            ExportBigQueryViewConfig(
                bq_view_namespace=self.mock_big_query_view_namespace,
                view=view,
                view_filter_clause=None,
                intermediate_table_name=f"{view.view_id}_table",
                output_directory=GcsfsDirectoryPath.from_absolute_path(
                    "gs://{project_id}-dataset-location/subdirectory".format(
                        project_id=self.mock_project_id, )),
                export_output_formats=[ExportOutputFormatType.JSON],
            ),
            ExportBigQueryViewConfig(
                bq_view_namespace=self.mock_big_query_view_namespace,
                view=metric_view,
                view_filter_clause=None,
                intermediate_table_name=f"{view.view_id}_table",
                output_directory=GcsfsDirectoryPath.from_absolute_path(
                    "gs://{project_id}-dataset-location/subdirectory".format(
                        project_id=self.mock_project_id, )),
                export_output_formats=[
                    ExportOutputFormatType.JSON,
                    ExportOutputFormatType.METRIC,
                ],
            ),
        ]

        self.assertEqual(expected_view_config_list, export_configs_for_filter)

        # Test for case insensitivity

        export_configs_for_filter = view_export_manager.get_configs_for_export_name(
            export_name=self.mock_export_name.lower(),
            project_id=self.mock_project_id)

        self.assertEqual(expected_view_config_list, export_configs_for_filter)

    @mock.patch("recidiviz.big_query.view_update_manager.rematerialize_views")
    @mock.patch(
        "recidiviz.big_query.export.big_query_view_exporter.BigQueryViewExporter"
    )
    def test_export_dashboard_data_to_cloud_storage(
            self, mock_view_exporter: Mock,
            mock_view_update_manager_rematerialize: Mock) -> None:
        """Tests the table is created from the view and then extracted."""
        view_export_manager.export_view_data_to_cloud_storage(
            self.mock_export_name, self.mock_state_code, mock_view_exporter)

        view = self.mock_view_builder.build()
        metric_view = self.mock_metric_view_builder.build()

        expected_view_config_list_1 = [
            ExportBigQueryViewConfig(
                bq_view_namespace=self.mock_big_query_view_namespace,
                view=view,
                view_filter_clause=" WHERE state_code = 'US_XX'",
                intermediate_table_name=f"{view.view_id}_table_US_XX",
                output_directory=GcsfsDirectoryPath.from_absolute_path(
                    "gs://{project_id}-dataset-location/subdirectory/{state_code}"
                    .format(
                        project_id=self.mock_project_id,
                        state_code="US_XX",
                    )),
                export_output_formats=[ExportOutputFormatType.JSON],
            )
        ]

        expected_view_config_list_2 = [
            ExportBigQueryViewConfig(
                bq_view_namespace=self.mock_big_query_view_namespace,
                view=metric_view,
                view_filter_clause=" WHERE state_code = 'US_XX'",
                intermediate_table_name=f"{view.view_id}_table_US_XX",
                output_directory=GcsfsDirectoryPath.from_absolute_path(
                    "gs://{project_id}-dataset-location/subdirectory/{state_code}"
                    .format(
                        project_id=self.mock_project_id,
                        state_code="US_XX",
                    )),
                export_output_formats=[
                    ExportOutputFormatType.JSON,
                    ExportOutputFormatType.METRIC,
                ],
            )
        ]
        mock_view_update_manager_rematerialize.assert_called()
        mock_view_exporter.export_and_validate.assert_has_calls(
            [
                mock.call([]),  # CSV export
                mock.call([]),
                mock.call([
                    expected_view_config_list_1[0].
                    pointed_to_staging_subdirectory(),
                    expected_view_config_list_2[0].
                    pointed_to_staging_subdirectory(),
                ]),  # JSON exports
                mock.call([
                    expected_view_config_list_2[0].
                    pointed_to_staging_subdirectory()
                ]),  # METRIC export ("OTHER_EXPORT")
            ],
            any_order=True,
        )

    @mock.patch(
        "recidiviz.big_query.view_update_manager.create_managed_dataset_and_deploy_views_for_view_builders"
    )
    @mock.patch(
        "recidiviz.big_query.export.big_query_view_exporter.BigQueryViewExporter"
    )
    def test_raise_exception_no_export_matched(
            self, mock_view_exporter: Mock,
            mock_view_update_manager_rematerialize: Mock) -> None:
        # pylint: disable=unused-argument
        """Tests the table is created from the view and then extracted."""
        self.mock_export_config.NAMESPACE_TO_UPDATE_FOR_EXPORT_FILTER = {
            "US_YY": "NAMESPACE"
        }

        with self.assertRaises(ValueError) as e:
            view_export_manager.export_view_data_to_cloud_storage(
                export_job_name="JOBZZZ",
                override_view_exporter=mock_view_exporter)
            self.assertEqual(
                str(e.exception),
                "Export filter did not match any export configs:",
                " JOBZZZ",
            )

    @mock.patch("recidiviz.big_query.view_update_manager.rematerialize_views")
    @mock.patch(
        "recidiviz.big_query.export.big_query_view_exporter.BigQueryViewExporter"
    )
    def test_export_dashboard_data_to_cloud_storage_state_agnostic(
            self, mock_view_exporter: Mock,
            mock_view_update_manager_rematerialize: Mock) -> None:
        """Tests the table is created from the view and then extracted, where the export is not state-specific."""
        state_agnostic_dataset_export_configs = {
            self.mock_export_name:
            ExportViewCollectionConfig(
                view_builders_to_export=self.view_builders_for_dataset,
                output_directory_uri_template=
                "gs://{project_id}-bucket-without-state-codes",
                export_name=self.mock_export_name,
                bq_view_namespace=self.mock_big_query_view_namespace,
            ),
        }

        self.mock_export_config.VIEW_COLLECTION_EXPORT_INDEX = (
            state_agnostic_dataset_export_configs)

        view_export_manager.export_view_data_to_cloud_storage(
            export_job_name=self.mock_export_name,
            override_view_exporter=mock_view_exporter,
        )

        view = self.mock_view_builder.build()
        metric_view = self.mock_metric_view_builder.build()

        view_export_configs = [
            ExportBigQueryViewConfig(
                bq_view_namespace=self.mock_big_query_view_namespace,
                view=view,
                view_filter_clause=None,
                intermediate_table_name=f"{view.view_id}_table",
                output_directory=GcsfsDirectoryPath.from_absolute_path(
                    "gs://{project_id}-bucket-without-state-codes".format(
                        project_id=self.mock_project_id, )),
                export_output_formats=[ExportOutputFormatType.JSON],
            ),
            ExportBigQueryViewConfig(
                bq_view_namespace=self.mock_big_query_view_namespace,
                view=metric_view,
                view_filter_clause=None,
                intermediate_table_name=f"{view.view_id}_table",
                output_directory=GcsfsDirectoryPath.from_absolute_path(
                    "gs://{project_id}-bucket-without-state-codes".format(
                        project_id=self.mock_project_id, )),
                export_output_formats=[
                    ExportOutputFormatType.JSON,
                    ExportOutputFormatType.METRIC,
                ],
            ),
        ]

        mock_view_update_manager_rematerialize.assert_called()

        mock_view_exporter.export_and_validate.assert_has_calls(
            [
                mock.call([]),  # CSV export
                mock.call([
                    view_export_configs[1].pointed_to_staging_subdirectory()
                ]),  # JSON export
                mock.call([
                    conf.pointed_to_staging_subdirectory()
                    for conf in view_export_configs
                ]),  # METRIC export
            ],
            any_order=True,
        )

    @mock.patch("recidiviz.big_query.view_update_manager.rematerialize_views")
    @mock.patch(
        "recidiviz.big_query.export.big_query_view_exporter.BigQueryViewExporter"
    )
    def test_export_dashboard_data_to_cloud_storage_value_error(
            self, mock_view_exporter: Mock,
            mock_view_update_manager_rematerialize: Mock) -> None:
        """Tests the table is created from the view and then extracted."""

        mock_view_exporter.export_and_validate.side_effect = ValueError
        with self.assertRaises(ValueError):
            view_export_manager.export_view_data_to_cloud_storage(
                self.mock_export_name,
                override_view_exporter=mock_view_exporter)

        # Just the metric export is attempted and then the raise stops subsequent checks from happening
        mock_view_update_manager_rematerialize.assert_called_once()

    @mock.patch("recidiviz.big_query.view_update_manager.rematerialize_views")
    @mock.patch(
        "recidiviz.big_query.export.big_query_view_exporter.BigQueryViewExporter"
    )
    def test_export_dashboard_data_to_cloud_storage_validation_error(
            self, mock_view_exporter: Mock,
            mock_view_update_manager_rematerialize: Mock) -> None:
        """Tests the table is created from the view and then extracted."""

        mock_view_exporter.export_and_validate.side_effect = ViewExportValidationError

        # Should not throw
        view_export_manager.export_view_data_to_cloud_storage(
            self.mock_export_name, override_view_exporter=mock_view_exporter)

        # Just the metric export is attempted and then the raise stops subsequent checks from happening
        mock_view_update_manager_rematerialize.assert_called_once()

    @mock.patch("recidiviz.metrics.export.view_export_manager.deployed_views")
    @mock.patch("recidiviz.big_query.view_update_manager.rematerialize_views")
    @mock.patch(
        "recidiviz.big_query.view_update_manager.create_managed_dataset_and_deploy_views_for_view_builders"
    )
    @mock.patch(
        "recidiviz.big_query.export.big_query_view_exporter.BigQueryViewExporter"
    )
    def test_export_dashboard_data_to_cloud_storage_update_all_views(
        self,
        mock_view_exporter: Mock,
        mock_view_update_manager_deploy: Mock,
        mock_view_update_manager_rematerialize: Mock,
        mock_deployed_views: Mock,
    ) -> None:
        """Tests that all views in the namespace are updated before the export when the export name is in
        export_config.NAMESPACES_REQUIRING_FULL_UPDATE."""
        self.mock_export_config.NAMESPACES_REQUIRING_FULL_UPDATE = [
            self.mock_big_query_view_namespace
        ]

        mock_deployed_views.DEPLOYED_VIEW_BUILDERS_BY_NAMESPACE = {
            self.mock_big_query_view_namespace: self.view_builders_for_dataset
        }

        view_export_manager.export_view_data_to_cloud_storage(
            self.mock_export_name, override_view_exporter=mock_view_exporter)

        mock_view_update_manager_deploy.assert_called_with(
            view_source_table_datasets=VIEW_SOURCE_TABLE_DATASETS,
            view_builders_to_update=self.view_builders_for_dataset,
        )
        mock_view_update_manager_rematerialize.assert_called_once()

    @mock.patch("recidiviz.metrics.export.view_export_manager.deployed_views")
    @mock.patch("recidiviz.big_query.view_update_manager.rematerialize_views")
    @mock.patch(
        "recidiviz.big_query.export.big_query_view_exporter.BigQueryViewExporter"
    )
    def test_export_dashboard_data_to_cloud_storage_update_materialized_views_only(
        self,
        mock_view_exporter: Mock,
        mock_view_update_manager_rematerialize: Mock,
        mock_deployed_views: Mock,
    ) -> None:
        """Tests that only materialized views in the namespace are updated before the export when the export name is not
        in export_config.NAMESPACES_REQUIRING_FULL_UPDATE."""
        self.mock_export_config.NAMESPACES_REQUIRING_FULL_UPDATE = [
            "OTHER_NAMESPACE"
        ]

        mock_deployed_views.DEPLOYED_VIEW_BUILDERS_BY_NAMESPACE = {
            self.mock_big_query_view_namespace: self.view_builders_for_dataset
        }

        view_export_manager.export_view_data_to_cloud_storage(
            self.mock_export_name, override_view_exporter=mock_view_exporter)

        mock_view_update_manager_rematerialize.assert_called_with(
            view_source_table_datasets=VIEW_SOURCE_TABLE_DATASETS,
            all_view_builders=DEPLOYED_VIEW_BUILDERS,
            views_to_update=[
                view.build() for view in self.view_builders_for_dataset
            ],
        )

    @mock.patch(
        "recidiviz.metrics.export.view_export_manager.export_view_data_to_cloud_storage"
    )
    def test_metric_view_data_export_valid_request(
            self, mock_export_view_data_to_cloud_storage: Mock) -> None:
        with self.app.test_request_context():
            mock_export_view_data_to_cloud_storage.return_value = None
            response = self.client.get(
                self.metric_view_data_export_url,
                headers=self.headers,
                query_string="export_job_name=EXPORT&state_code=US_XX",
            )
            self.assertEqual(HTTPStatus.OK, response.status_code)

            response = self.client.get(
                self.metric_view_data_export_url,
                headers=self.headers,
                query_string="export_job_name=export&state_code=us_xx",
            )
            self.assertEqual(HTTPStatus.OK, response.status_code)

    @mock.patch(
        "recidiviz.metrics.export.view_export_manager.export_view_data_to_cloud_storage"
    )
    def test_metric_view_data_export_state_agnostic(
            self, mock_export_view_data_to_cloud_storage: Mock) -> None:
        with self.app.test_request_context():
            mock_export_view_data_to_cloud_storage.return_value = None
            response = self.client.get(
                self.metric_view_data_export_url,
                headers=self.headers,
                query_string="export_job_name=MOCK_EXPORT_NAME",
            )
            self.assertEqual(HTTPStatus.OK, response.status_code)

            # case insensitive
            response = self.client.get(
                self.metric_view_data_export_url,
                headers=self.headers,
                query_string="export_job_name=mock_export_name",
            )
            self.assertEqual(HTTPStatus.OK, response.status_code)

    @mock.patch(
        "recidiviz.metrics.export.view_export_manager.export_view_data_to_cloud_storage"
    )
    def test_metric_view_data_export_missing_required_state_code(
            self, mock_export_view_data_to_cloud_storage: Mock) -> None:
        with self.app.test_request_context():
            mock_export_view_data_to_cloud_storage.return_value = None
            response = self.client.get(
                self.metric_view_data_export_url,
                headers=self.headers,
                query_string="export_job_name=EXPORT",
            )
            self.assertEqual(HTTPStatus.BAD_REQUEST, response.status_code)
            self.assertEqual(
                b"Missing required state_code parameter for export_job_name EXPORT",
                response.data,
            )

            # case insensitive
            response = self.client.get(
                self.metric_view_data_export_url,
                headers=self.headers,
                query_string="export_job_name=export",
            )
            self.assertEqual(HTTPStatus.BAD_REQUEST, response.status_code)
            self.assertEqual(
                b"Missing required state_code parameter for export_job_name EXPORT",
                response.data,
            )

    @mock.patch(
        "recidiviz.metrics.export.view_export_manager.export_view_data_to_cloud_storage"
    )
    def test_metric_view_data_export_missing_export_job_name(
            self, mock_export_view_data_to_cloud_storage: Mock) -> None:
        with self.app.test_request_context():
            mock_export_view_data_to_cloud_storage.return_value = None
            response = self.client.get(
                self.metric_view_data_export_url,
                headers=self.headers,
                query_string="state_code=US_XX",
            )
            self.assertEqual(HTTPStatus.BAD_REQUEST, response.status_code)
            self.assertEqual(b"Missing required export_job_name URL parameter",
                             response.data)

    @mock.patch(
        "recidiviz.metrics.export.view_export_cloud_task_manager.ViewExportCloudTaskManager.create_metric_view_data_export_task"
    )
    def test_create_metric_view_data_export_tasks_state_code_filter(
            self, mock_create_metric_view_data_export_task: Mock) -> None:
        with self.app.test_request_context():
            mock_create_metric_view_data_export_task.return_value = None
            response = self.client.get(
                self.create_metric_view_data_export_tasks_url,
                headers=self.headers,
                query_string="export_job_filter=US_XX",
            )
            self.assertEqual(HTTPStatus.OK, response.status_code)
            mock_create_metric_view_data_export_task.assert_has_calls(
                [
                    mock.call(export_job_name="EXPORT", state_code="US_XX"),
                    mock.call(export_job_name="OTHER_EXPORT",
                              state_code="US_XX"),
                ],
                any_order=True,
            )

            response = self.client.get(
                self.create_metric_view_data_export_tasks_url,
                headers=self.headers,
                query_string="export_job_filter=us_xx",
            )
            self.assertEqual(HTTPStatus.OK, response.status_code)
            mock_create_metric_view_data_export_task.assert_has_calls(
                [
                    mock.call(export_job_name="EXPORT", state_code="US_XX"),
                    mock.call(export_job_name="OTHER_EXPORT",
                              state_code="US_XX"),
                ],
                any_order=True,
            )

    @mock.patch(
        "recidiviz.metrics.export.view_export_cloud_task_manager.ViewExportCloudTaskManager.create_metric_view_data_export_task"
    )
    def test_create_metric_view_data_export_tasks_export_name_filter_state_agnostic(
            self, mock_create_metric_view_data_export_task: Mock) -> None:
        with self.app.test_request_context():
            mock_create_metric_view_data_export_task.return_value = None
            response = self.client.get(
                self.create_metric_view_data_export_tasks_url,
                headers=self.headers,
                query_string="export_job_filter=MOCK_EXPORT_NAME",
            )
            self.assertEqual(HTTPStatus.OK, response.status_code)
            mock_create_metric_view_data_export_task.assert_has_calls(
                [
                    mock.call(export_job_name="MOCK_EXPORT_NAME",
                              state_code=None),
                ],
                any_order=True,
            )

            # case insensitive
            response = self.client.get(
                self.create_metric_view_data_export_tasks_url,
                headers=self.headers,
                query_string="export_job_filter=mock_export_name",
            )
            self.assertEqual(HTTPStatus.OK, response.status_code)
            mock_create_metric_view_data_export_task.assert_has_calls(
                [
                    mock.call(export_job_name="MOCK_EXPORT_NAME",
                              state_code=None),
                ],
                any_order=True,
            )

    @mock.patch(
        "recidiviz.metrics.export.view_export_cloud_task_manager.ViewExportCloudTaskManager.create_metric_view_data_export_task"
    )
    def test_create_metric_view_data_export_tasks_export_name_filter(
            self, mock_create_metric_view_data_export_task: Mock) -> None:
        with self.app.test_request_context():
            mock_create_metric_view_data_export_task.return_value = None
            response = self.client.get(
                self.create_metric_view_data_export_tasks_url,
                headers=self.headers,
                query_string="export_job_filter=EXPORT",
            )
            self.assertEqual(HTTPStatus.OK, response.status_code)
            mock_create_metric_view_data_export_task.assert_has_calls(
                [
                    mock.call(export_job_name="EXPORT", state_code="US_XX"),
                    mock.call(export_job_name="EXPORT", state_code="US_WW"),
                ],
                any_order=True,
            )

            # case insensitive
            response = self.client.get(
                self.create_metric_view_data_export_tasks_url,
                headers=self.headers,
                query_string="export_job_filter=export",
            )
            self.assertEqual(HTTPStatus.OK, response.status_code)
            mock_create_metric_view_data_export_task.assert_has_calls(
                [
                    mock.call(export_job_name="EXPORT", state_code="US_XX"),
                    mock.call(export_job_name="EXPORT", state_code="US_WW"),
                ],
                any_order=True,
            )
Exemplo n.º 27
0
    """

INCARCERATION_POPULATION_BY_MONTH_BY_DEMOGRAPHICS_VIEW_BUILDER = MetricBigQueryViewBuilder(
    dataset_id=dataset_config.PUBLIC_DASHBOARD_VIEWS_DATASET,
    view_id=INCARCERATION_POPULATION_BY_MONTH_BY_DEMOGRAPHICS_VIEW_NAME,
    view_query_template=
    INCARCERATION_POPULATION_BY_MONTH_BY_DEMOGRAPHICS_VIEW_QUERY_TEMPLATE,
    dimensions=(
        "state_code",
        "population_date",
        "race_or_ethnicity",
        "gender",
        "age_bucket",
    ),
    description=
    INCARCERATION_POPULATION_BY_MONTH_BY_DEMOGRAPHICS_VIEW_DESCRIPTION,
    materialized_metrics_dataset=dataset_config.
    DATAFLOW_METRICS_MATERIALIZED_DATASET,
    reference_views_dataset=dataset_config.REFERENCE_VIEWS_DATASET,
    static_reference_dataset=dataset_config.STATIC_REFERENCE_TABLES_DATASET,
    unnested_race_or_ethnicity_dimension=bq_utils.unnest_column(
        "race_or_ethnicity", "race_or_ethnicity"),
    gender_dimension=bq_utils.unnest_column("gender", "gender"),
    age_dimension=bq_utils.unnest_column("age_bucket", "age_bucket"),
    state_specific_race_or_ethnicity_groupings=state_specific_query_strings.
    state_specific_race_or_ethnicity_groupings(
        "prioritized_race_or_ethnicity"),
    state_specific_facility_exclusion=state_specific_query_strings.
    state_specific_facility_exclusion(),
)

if __name__ == "__main__":
    def setUp(self) -> None:
        self.app = Flask(__name__)
        self.app.register_blueprint(export_blueprint)
        self.app.config["TESTING"] = True
        self.headers: Dict[str, Dict[Any, Any]] = {
            "x-goog-iap-jwt-assertion": {}
        }
        self.client = self.app.test_client()
        self.mock_cloud_task_client_patcher = mock.patch(
            "google.cloud.tasks_v2.CloudTasksClient")
        self.mock_cloud_task_client_patcher.start()
        self.mock_uuid_patcher = mock.patch(
            f"{CLOUD_TASK_MANAGER_PACKAGE_NAME}.uuid")
        self.mock_uuid = self.mock_uuid_patcher.start()
        with self.app.test_request_context():
            self.metric_view_data_export_url = flask.url_for(
                "export.metric_view_data_export")
            self.create_metric_view_data_export_tasks_url = flask.url_for(
                "export.create_metric_view_data_export_tasks")
        self.mock_state_code = "US_XX"
        self.mock_project_id = "test-project"
        self.mock_dataset_id = "base_dataset"
        self.mock_dataset = bigquery.dataset.DatasetReference(
            self.mock_project_id, self.mock_dataset_id)

        self.metadata_patcher = mock.patch(
            "recidiviz.utils.metadata.project_id")
        self.mock_project_id_fn = self.metadata_patcher.start()
        self.mock_project_id_fn.return_value = self.mock_project_id

        self.client_patcher = mock.patch(
            "recidiviz.metrics.export.view_export_manager.BigQueryClientImpl")
        self.mock_client = self.client_patcher.start().return_value

        self.mock_client.dataset_ref_for_id.return_value = self.mock_dataset

        self.mock_view_builder = SimpleBigQueryViewBuilder(
            dataset_id=self.mock_dataset.dataset_id,
            view_id="test_view",
            description="test_view description",
            view_query_template="SELECT NULL LIMIT 0",
        )
        self.mock_metric_view_builder = MetricBigQueryViewBuilder(
            dataset_id=self.mock_dataset.dataset_id,
            view_id="test_view",
            description="test_view description",
            view_query_template="SELECT NULL LIMIT 0",
            dimensions=tuple(),
        )

        self.view_builders_for_dataset = [
            self.mock_view_builder,
            self.mock_metric_view_builder,
        ]

        self.output_uri_template_for_dataset = {
            "dataset_id": "gs://{project_id}-dataset-location/subdirectory",
        }

        self.views_to_update = {
            self.mock_dataset_id: self.view_builders_for_dataset
        }

        self.mock_export_name = "MOCK_EXPORT_NAME"
        self.mock_big_query_view_namespace = BigQueryViewNamespace.STATE

        self.metric_dataset_export_configs_index = {
            "EXPORT":
            ExportViewCollectionConfig(
                view_builders_to_export=[self.mock_view_builder],
                output_directory_uri_template=
                "gs://{project_id}-dataset-location/subdirectory",
                export_name="EXPORT",
                bq_view_namespace=self.mock_big_query_view_namespace,
            ),
            "OTHER_EXPORT":
            ExportViewCollectionConfig(
                view_builders_to_export=[self.mock_metric_view_builder],
                output_directory_uri_template=
                "gs://{project_id}-dataset-location/subdirectory",
                export_name="OTHER_EXPORT",
                bq_view_namespace=self.mock_big_query_view_namespace,
            ),
            self.mock_export_name:
            ExportViewCollectionConfig(
                view_builders_to_export=self.view_builders_for_dataset,
                output_directory_uri_template=
                "gs://{project_id}-dataset-location/subdirectory",
                export_name=self.mock_export_name,
                bq_view_namespace=self.mock_big_query_view_namespace,
            ),
        }

        export_config_values = {
            "OUTPUT_DIRECTORY_URI_TEMPLATE_FOR_DATASET_EXPORT":
            self.output_uri_template_for_dataset,
            "VIEW_COLLECTION_EXPORT_INDEX":
            self.metric_dataset_export_configs_index,
        }

        self.export_config_patcher = mock.patch(  # type: ignore[call-overload]
            "recidiviz.metrics.export.view_export_manager.export_config",
            **export_config_values,
        )
        self.mock_export_config = self.export_config_patcher.start()

        self.gcs_factory_patcher = mock.patch(
            "recidiviz.metrics.export.view_export_manager.GcsfsFactory.build")
        self.gcs_factory_patcher.start().return_value = FakeGCSFileSystem()
Exemplo n.º 29
0
        district
      FROM `{project_id}.{reference_views_dataset}.event_based_supervision_populations`
      GROUP BY state_code, year, month, supervision_type, district
    ) pop
    LEFT JOIN (
      SELECT 
        state_code, year, month,
        COUNT(DISTINCT person_id) AS revocation_count,
        supervision_type,
        district
      FROM `{project_id}.{reference_views_dataset}.event_based_revocations`
      GROUP BY state_code, year, month, supervision_type, district
    ) rev
    USING (state_code, year, month, supervision_type, district)
    WHERE supervision_type in ('ALL', 'PAROLE', 'PROBATION')
    ORDER BY state_code, year, month, supervision_type, district
    """

REVOCATIONS_BY_MONTH_VIEW_BUILDER = MetricBigQueryViewBuilder(
    dataset_id=dataset_config.DASHBOARD_VIEWS_DATASET,
    view_id=REVOCATIONS_BY_MONTH_VIEW_NAME,
    view_query_template=REVOCATIONS_BY_MONTH_QUERY_TEMPLATE,
    dimensions=['state_code', 'year', 'month', 'supervision_type', 'district'],
    description=REVOCATIONS_BY_MONTH_DESCRIPTION,
    reference_views_dataset=dataset_config.REFERENCE_VIEWS_DATASET,
)

if __name__ == '__main__':
    with local_project_id_override(GCP_PROJECT_STAGING):
        REVOCATIONS_BY_MONTH_VIEW_BUILDER.build_and_print()
Exemplo n.º 30
0
        supervision_type,
        district
    FROM (
        SELECT
          state_code, year, month,
          COUNT(IF(most_severe_violation_type = 'NEW_ADMISSION', person_id, NULL)) AS new_admissions,
          COUNT(IF(most_severe_violation_type = 'TECHNICAL', person_id, NULL)) AS technicals,
          COUNT(IF(most_severe_violation_type IN ('ABSCONDED', 'ESCAPED', 'FELONY', 'MISDEMEANOR', 'LAW'), person_id, NULL)) AS non_technicals,
          COUNT(person_id) AS all_violation_types_count,
          supervision_type,
          district
        FROM most_recent_admission
        WHERE admission_rank = 1
        GROUP BY state_code, year, month, supervision_type, district
    )
    ORDER BY state_code, year, month, district, supervision_type
"""

ADMISSIONS_BY_TYPE_BY_MONTH_VIEW_BUILDER = MetricBigQueryViewBuilder(
    dataset_id=dataset_config.DASHBOARD_VIEWS_DATASET,
    view_id=ADMISSIONS_BY_TYPE_BY_MONTH_VIEW_NAME,
    view_query_template=ADMISSIONS_BY_TYPE_BY_MONTH_QUERY_TEMPLATE,
    dimensions=("state_code", "year", "month", "supervision_type", "district"),
    description=ADMISSIONS_BY_TYPE_BY_MONTH_DESCRIPTION,
    reference_views_dataset=dataset_config.REFERENCE_VIEWS_DATASET,
)

if __name__ == "__main__":
    with local_project_id_override(GCP_PROJECT_STAGING):
        ADMISSIONS_BY_TYPE_BY_MONTH_VIEW_BUILDER.build_and_print()