def test_select_all_state_code_and_ids_filter(self): expected_query = ( "SELECT * FROM `project-id.my_dataset.TABLE_WHERE_DATA_IS` " "WHERE state_code IN ('US_XX') AND person_id IN (1234)") self.assertEqual( expected_query, select_all_by_person_query( self.dataset, self.table_id, state_code_filter="US_XX", person_id_filter_set={1234}, ), ) expected_query = ( "SELECT * FROM `project-id.my_dataset.TABLE_WHERE_DATA_IS` " "WHERE state_code IN ('US_XX') AND field_name IN (1234, 56)") self.assertEqual( expected_query, select_all_query( self.dataset, self.table_id, state_code_filter="US_XX", unifying_id_field="field_name", unifying_id_field_filter_set={1234, 56}, ), )
def expand(self, pipeline: Pipeline): # Bring in the table from BigQuery table_query = select_all_by_person_query( self.dataset_id, self.table_id, self.state_code_filter, self.person_id_filter_set) table_contents = (pipeline | f"Read {self.dataset_id}.{self.table_id} table from BigQuery" >> ReadFromBigQuery(query=table_query)) return table_contents
def test_select_all_with_state_code_filter_only(self): expected_query = 'SELECT * FROM `project-id.my_dataset.TABLE_WHERE_DATA_IS` WHERE state_code IN (\'US_XX\')' self.assertEqual( expected_query, select_all_by_person_query(self.dataset, self.table_id, state_code_filter='US_XX', person_id_filter_set=None)) self.assertEqual( expected_query, select_all_query(self.dataset, self.table_id, state_code_filter='US_XX', unifying_id_field='field_name', unifying_id_field_filter_set=None))
def test_simple_select_all_no_filters(self): expected_query = 'SELECT * FROM `project-id.my_dataset.TABLE_WHERE_DATA_IS`' self.assertEqual( expected_query, select_all_by_person_query(self.dataset, self.table_id, state_code_filter=None, person_id_filter_set=None)) self.assertEqual( expected_query, select_all_query(self.dataset, self.table_id, state_code_filter=None, unifying_id_field='field_name', unifying_id_field_filter_set=None))
def test_select_all_with_ids_filter_only(self): expected_query = 'SELECT * FROM `project-id.my_dataset.TABLE_WHERE_DATA_IS` WHERE person_id IN (1234)' self.assertEqual( expected_query, select_all_by_person_query(self.dataset, self.table_id, state_code_filter=None, person_id_filter_set={1234})) expected_query = 'SELECT * FROM `project-id.my_dataset.TABLE_WHERE_DATA_IS` WHERE field_name IN (1234)' self.assertEqual( expected_query, select_all_query(self.dataset, self.table_id, state_code_filter=None, unifying_id_field='field_name', unifying_id_field_filter_set={1234}))
def test_select_all_state_code_and_ids_filter(self): expected_query = \ 'SELECT * FROM `project-id.my_dataset.TABLE_WHERE_DATA_IS` ' \ 'WHERE person_id IN (1234) AND state_code IN (\'US_XX\')' self.assertEqual( expected_query, select_all_by_person_query(self.dataset, self.table_id, state_code_filter='US_XX', person_id_filter_set={1234})) expected_query = \ 'SELECT * FROM `project-id.my_dataset.TABLE_WHERE_DATA_IS` ' \ 'WHERE field_name IN (1234, 56) AND state_code IN (\'US_XX\')' self.assertEqual( expected_query, select_all_query(self.dataset, self.table_id, state_code_filter='US_XX', unifying_id_field='field_name', unifying_id_field_filter_set={1234, 56}))
def test_select_all_with_state_code_filter_only(self): expected_query = "SELECT * FROM `project-id.my_dataset.TABLE_WHERE_DATA_IS` WHERE state_code IN ('US_XX')" self.assertEqual( expected_query, select_all_by_person_query( self.dataset, self.table_id, state_code_filter="US_XX", person_id_filter_set=None, ), ) self.assertEqual( expected_query, select_all_query( self.dataset, self.table_id, state_code_filter="US_XX", unifying_id_field="field_name", unifying_id_field_filter_set=None, ), )
def run(apache_beam_pipeline_options: PipelineOptions, data_input: str, reference_input: str, output: str, calculation_month_count: int, metric_types: List[str], state_code: Optional[str], calculation_end_month: Optional[str], person_filter_ids: Optional[List[int]]): """Runs the supervision calculation pipeline.""" # Workaround to load SQLAlchemy objects at start of pipeline. This is necessary because the BuildRootEntity # function tries to access attributes of relationship properties on the SQLAlchemy room_schema_class before they # have been loaded. However, if *any* SQLAlchemy objects have been instantiated, then the relationship properties # are loaded and their attributes can be successfully accessed. _ = schema.StatePerson() apache_beam_pipeline_options.view_as(SetupOptions).save_main_session = True # Get pipeline job details all_pipeline_options = apache_beam_pipeline_options.get_all_options() input_dataset = all_pipeline_options['project'] + '.' + data_input reference_dataset = all_pipeline_options['project'] + '.' + reference_input person_id_filter_set = set( person_filter_ids) if person_filter_ids else None with beam.Pipeline(options=apache_beam_pipeline_options) as p: # Get StatePersons persons = (p | 'Load Persons' >> BuildRootEntity( dataset=input_dataset, root_entity_class=entities.StatePerson, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=True, unifying_id_field_filter_set=person_id_filter_set, state_code=state_code)) # Get StateIncarcerationPeriods incarceration_periods = ( p | 'Load IncarcerationPeriods' >> BuildRootEntity( dataset=input_dataset, root_entity_class=entities.StateIncarcerationPeriod, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=True, unifying_id_field_filter_set=person_id_filter_set, state_code=state_code)) # Get StateSupervisionViolations supervision_violations = ( p | 'Load SupervisionViolations' >> BuildRootEntity( dataset=input_dataset, root_entity_class=entities.StateSupervisionViolation, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=True, unifying_id_field_filter_set=person_id_filter_set, state_code=state_code)) # TODO(2769): Don't bring this in as a root entity # Get StateSupervisionViolationResponses supervision_violation_responses = ( p | 'Load SupervisionViolationResponses' >> BuildRootEntity( dataset=input_dataset, root_entity_class=entities.StateSupervisionViolationResponse, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=True, unifying_id_field_filter_set=person_id_filter_set, state_code=state_code)) # Get StateSupervisionSentences supervision_sentences = ( p | 'Load SupervisionSentences' >> BuildRootEntity( dataset=input_dataset, root_entity_class=entities.StateSupervisionSentence, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=True, unifying_id_field_filter_set=person_id_filter_set, state_code=state_code)) # Get StateIncarcerationSentences incarceration_sentences = ( p | 'Load IncarcerationSentences' >> BuildRootEntity( dataset=input_dataset, root_entity_class=entities.StateIncarcerationSentence, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=True, unifying_id_field_filter_set=person_id_filter_set, state_code=state_code)) # Get StateSupervisionPeriods supervision_periods = ( p | 'Load SupervisionPeriods' >> BuildRootEntity( dataset=input_dataset, root_entity_class=entities.StateSupervisionPeriod, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=True, unifying_id_field_filter_set=person_id_filter_set, state_code=state_code)) # Get StateAssessments assessments = (p | 'Load Assessments' >> BuildRootEntity( dataset=input_dataset, root_entity_class=entities.StateAssessment, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=False, unifying_id_field_filter_set=person_id_filter_set, state_code=state_code)) supervision_contacts = ( p | 'Load StateSupervisionContacts' >> BuildRootEntity( dataset=input_dataset, root_entity_class=entities.StateSupervisionContact, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=False, unifying_id_field_filter_set=person_id_filter_set, state_code=state_code)) # Bring in the table that associates StateSupervisionViolationResponses to information about StateAgents ssvr_to_agent_association_query = select_all_by_person_query( reference_dataset, SSVR_TO_AGENT_ASSOCIATION_VIEW_NAME, state_code, person_id_filter_set) ssvr_to_agent_associations = ( p | "Read SSVR to Agent table from BigQuery" >> beam.io.Read( beam.io.BigQuerySource(query=ssvr_to_agent_association_query, use_standard_sql=True))) # Convert the association table rows into key-value tuples with the value for the # supervision_violation_response_id column as the key ssvr_agent_associations_as_kv = ( ssvr_to_agent_associations | 'Convert SSVR to Agent table to KV tuples' >> beam.ParDo( ConvertDictToKVTuple(), 'supervision_violation_response_id')) supervision_period_to_agent_association_query = select_all_by_person_query( reference_dataset, SUPERVISION_PERIOD_TO_AGENT_ASSOCIATION_VIEW_NAME, state_code, person_id_filter_set) supervision_period_to_agent_associations = ( p | "Read Supervision Period to Agent table from BigQuery" >> beam.io.Read( beam.io.BigQuerySource( query=supervision_period_to_agent_association_query, use_standard_sql=True))) # Convert the association table rows into key-value tuples with the value for the supervision_period_id column # as the key supervision_period_to_agent_associations_as_kv = ( supervision_period_to_agent_associations | 'Convert Supervision Period to Agent table to KV tuples' >> beam.ParDo(ConvertDictToKVTuple(), 'supervision_period_id')) if state_code is None or state_code == 'US_MO': # Bring in the reference table that includes sentence status ranking information us_mo_sentence_status_query = select_all_by_person_query( reference_dataset, US_MO_SENTENCE_STATUSES_VIEW_NAME, state_code, person_id_filter_set) us_mo_sentence_statuses = ( p | "Read MO sentence status table from BigQuery" >> beam.io.Read( beam.io.BigQuerySource(query=us_mo_sentence_status_query, use_standard_sql=True))) else: us_mo_sentence_statuses = ( p | f"Generate empty MO statuses list for non-MO state run: {state_code} " >> beam.Create([])) us_mo_sentence_status_rankings_as_kv = ( us_mo_sentence_statuses | 'Convert MO sentence status ranking table to KV tuples' >> beam.ParDo(ConvertDictToKVTuple(), 'person_id')) sentences_and_statuses = ( { 'incarceration_sentences': incarceration_sentences, 'supervision_sentences': supervision_sentences, 'sentence_statuses': us_mo_sentence_status_rankings_as_kv } | 'Group sentences to the sentence statuses for that person' >> beam.CoGroupByKey()) sentences_converted = ( sentences_and_statuses | 'Convert to state-specific sentences' >> beam.ParDo( ConvertSentencesToStateSpecificType()).with_outputs( 'incarceration_sentences', 'supervision_sentences')) # Bring in the judicial districts associated with supervision_periods sp_to_judicial_district_query = select_all_by_person_query( reference_dataset, SUPERVISION_PERIOD_JUDICIAL_DISTRICT_ASSOCIATION_VIEW_NAME, state_code, person_id_filter_set) sp_to_judicial_district_kv = ( p | "Read supervision_period to judicial_district associations from BigQuery" >> beam.io.Read( beam.io.BigQuerySource(query=sp_to_judicial_district_query, use_standard_sql=True)) | "Convert supervision_period to judicial_district association table to KV" >> beam.ParDo(ConvertDictToKVTuple(), 'person_id')) # Group StateSupervisionViolationResponses and StateSupervisionViolations by person_id supervision_violations_and_responses = ( { 'violations': supervision_violations, 'violation_responses': supervision_violation_responses } | 'Group StateSupervisionViolationResponses to ' 'StateSupervisionViolations' >> beam.CoGroupByKey()) # Set the fully hydrated StateSupervisionViolation entities on the corresponding # StateSupervisionViolationResponses violation_responses_with_hydrated_violations = ( supervision_violations_and_responses | 'Set hydrated StateSupervisionViolations on ' 'the StateSupervisionViolationResponses' >> beam.ParDo( SetViolationOnViolationsResponse())) # Group StateIncarcerationPeriods and StateSupervisionViolationResponses by person_id incarceration_periods_and_violation_responses = ( { 'incarceration_periods': incarceration_periods, 'violation_responses': violation_responses_with_hydrated_violations } | 'Group StateIncarcerationPeriods to ' 'StateSupervisionViolationResponses' >> beam.CoGroupByKey()) # Set the fully hydrated StateSupervisionViolationResponse entities on the corresponding # StateIncarcerationPeriods incarceration_periods_with_source_violations = ( incarceration_periods_and_violation_responses | 'Set hydrated StateSupervisionViolationResponses on ' 'the StateIncarcerationPeriods' >> beam.ParDo( SetViolationResponseOnIncarcerationPeriod())) # Group each StatePerson with their related entities person_entities = ( { 'person': persons, 'assessments': assessments, 'incarceration_periods': incarceration_periods_with_source_violations, 'supervision_periods': supervision_periods, 'supervision_sentences': sentences_converted.supervision_sentences, 'incarceration_sentences': sentences_converted.incarceration_sentences, 'violation_responses': violation_responses_with_hydrated_violations, 'supervision_contacts': supervision_contacts, 'supervision_period_judicial_district_association': sp_to_judicial_district_kv } | 'Group StatePerson to all entities' >> beam.CoGroupByKey()) # Identify SupervisionTimeBuckets from the StatePerson's StateSupervisionSentences and StateIncarcerationPeriods person_time_buckets = ( person_entities | 'Get SupervisionTimeBuckets' >> beam.ParDo( ClassifySupervisionTimeBuckets(), AsDict(ssvr_agent_associations_as_kv), AsDict(supervision_period_to_agent_associations_as_kv))) # Get pipeline job details for accessing job_id all_pipeline_options = apache_beam_pipeline_options.get_all_options() # Get the type of metric to calculate metric_types_set = set(metric_types) # Add timestamp for local jobs job_timestamp = datetime.datetime.now().strftime( '%Y-%m-%d_%H_%M_%S.%f') all_pipeline_options['job_timestamp'] = job_timestamp # Get supervision metrics supervision_metrics = ( person_time_buckets | 'Get Supervision Metrics' >> GetSupervisionMetrics( pipeline_options=all_pipeline_options, metric_types=metric_types_set, calculation_end_month=calculation_end_month, calculation_month_count=calculation_month_count)) if person_id_filter_set: logging.warning( "Non-empty person filter set - returning before writing metrics." ) return # Convert the metrics into a format that's writable to BQ writable_metrics = ( supervision_metrics | 'Convert to dict to be written to BQ' >> beam.ParDo(RecidivizMetricWritableDict()).with_outputs( SupervisionMetricType.SUPERVISION_COMPLIANCE.value, SupervisionMetricType.SUPERVISION_POPULATION.value, SupervisionMetricType.SUPERVISION_REVOCATION.value, SupervisionMetricType.SUPERVISION_REVOCATION_ANALYSIS.value, SupervisionMetricType. SUPERVISION_REVOCATION_VIOLATION_TYPE_ANALYSIS.value, SupervisionMetricType.SUPERVISION_SUCCESS.value, SupervisionMetricType. SUPERVISION_SUCCESSFUL_SENTENCE_DAYS_SERVED.value, SupervisionMetricType.SUPERVISION_TERMINATION.value)) # Write the metrics to the output tables in BigQuery terminations_table_id = DATAFLOW_METRICS_TO_TABLES.get( SupervisionTerminationMetric) compliance_table_id = DATAFLOW_METRICS_TO_TABLES.get( SupervisionCaseComplianceMetric) populations_table_id = DATAFLOW_METRICS_TO_TABLES.get( SupervisionPopulationMetric) revocations_table_id = DATAFLOW_METRICS_TO_TABLES.get( SupervisionRevocationMetric) revocation_analysis_table_id = DATAFLOW_METRICS_TO_TABLES.get( SupervisionRevocationAnalysisMetric) revocation_violation_type_analysis_table_id = \ DATAFLOW_METRICS_TO_TABLES.get(SupervisionRevocationViolationTypeAnalysisMetric) successes_table_id = DATAFLOW_METRICS_TO_TABLES.get( SupervisionSuccessMetric) successful_sentence_lengths_table_id = DATAFLOW_METRICS_TO_TABLES.get( SuccessfulSupervisionSentenceDaysServedMetric) _ = (writable_metrics.SUPERVISION_POPULATION | f"Write population metrics to BQ table: {populations_table_id}" >> beam.io.WriteToBigQuery( table=populations_table_id, dataset=output, create_disposition=beam.io.BigQueryDisposition.CREATE_NEVER, write_disposition=beam.io.BigQueryDisposition.WRITE_APPEND, method=beam.io.WriteToBigQuery.Method.FILE_LOADS)) _ = (writable_metrics.SUPERVISION_REVOCATION | f"Write revocation metrics to BQ table: {revocations_table_id}" >> beam.io.WriteToBigQuery( table=revocations_table_id, dataset=output, create_disposition=beam.io.BigQueryDisposition.CREATE_NEVER, write_disposition=beam.io.BigQueryDisposition.WRITE_APPEND, method=beam.io.WriteToBigQuery.Method.FILE_LOADS)) _ = (writable_metrics.SUPERVISION_SUCCESS | f"Write success metrics to BQ table: {successes_table_id}" >> beam.io.WriteToBigQuery( table=successes_table_id, dataset=output, create_disposition=beam.io.BigQueryDisposition.CREATE_NEVER, write_disposition=beam.io.BigQueryDisposition.WRITE_APPEND, method=beam.io.WriteToBigQuery.Method.FILE_LOADS)) _ = (writable_metrics.SUPERVISION_SUCCESSFUL_SENTENCE_DAYS_SERVED | f"Write supervision successful sentence length metrics to BQ" f" table: {successful_sentence_lengths_table_id}" >> beam.io.WriteToBigQuery( table=successful_sentence_lengths_table_id, dataset=output, create_disposition=beam.io.BigQueryDisposition.CREATE_NEVER, write_disposition=beam.io.BigQueryDisposition.WRITE_APPEND, method=beam.io.WriteToBigQuery.Method.FILE_LOADS)) _ = (writable_metrics.SUPERVISION_TERMINATION | f"Write termination metrics to BQ table: {terminations_table_id}" >> beam.io.WriteToBigQuery( table=terminations_table_id, dataset=output, create_disposition=beam.io.BigQueryDisposition.CREATE_NEVER, write_disposition=beam.io.BigQueryDisposition.WRITE_APPEND, method=beam.io.WriteToBigQuery.Method.FILE_LOADS)) _ = ( writable_metrics.SUPERVISION_REVOCATION_ANALYSIS | f"Write revocation analyses metrics to BQ table: {revocation_analysis_table_id}" >> beam.io.WriteToBigQuery( table=revocation_analysis_table_id, dataset=output, create_disposition=beam.io.BigQueryDisposition.CREATE_NEVER, write_disposition=beam.io.BigQueryDisposition.WRITE_APPEND, method=beam.io.WriteToBigQuery.Method.FILE_LOADS)) _ = (writable_metrics.SUPERVISION_REVOCATION_VIOLATION_TYPE_ANALYSIS | f"Write revocation violation type analyses metrics to BQ table: " f"{revocation_violation_type_analysis_table_id}" >> beam.io.WriteToBigQuery( table=revocation_violation_type_analysis_table_id, dataset=output, create_disposition=beam.io.BigQueryDisposition.CREATE_NEVER, write_disposition=beam.io.BigQueryDisposition.WRITE_APPEND, method=beam.io.WriteToBigQuery.Method.FILE_LOADS)) _ = (writable_metrics.SUPERVISION_COMPLIANCE | f"Write compliance metrics to BQ table: {compliance_table_id}" >> beam.io.WriteToBigQuery( table=compliance_table_id, dataset=output, create_disposition=beam.io.BigQueryDisposition.CREATE_NEVER, write_disposition=beam.io.BigQueryDisposition.WRITE_APPEND, method=beam.io.WriteToBigQuery.Method.FILE_LOADS))
def run(apache_beam_pipeline_options: PipelineOptions, data_input: str, reference_input: str, output: str, metric_types: List[str], state_code: Optional[str], person_filter_ids: Optional[List[int]]): """Runs the recidivism calculation pipeline.""" # Workaround to load SQLAlchemy objects at start of pipeline. This is # necessary because the BuildRootEntity function tries to access attributes # of relationship properties on the SQLAlchemy room_schema_class before they # have been loaded. However, if *any* SQLAlchemy objects have been # instantiated, then the relationship properties are loaded and their # attributes can be successfully accessed. _ = schema.StatePerson() apache_beam_pipeline_options.view_as(SetupOptions).save_main_session = True # Get pipeline job details all_pipeline_options = apache_beam_pipeline_options.get_all_options() query_dataset = all_pipeline_options['project'] + '.' + data_input reference_dataset = all_pipeline_options['project'] + '.' + reference_input person_id_filter_set = set( person_filter_ids) if person_filter_ids else None with beam.Pipeline(options=apache_beam_pipeline_options) as p: # Get StatePersons persons = ( p | 'Load Persons' >> BuildRootEntity( dataset=query_dataset, root_entity_class=entities.StatePerson, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=True, unifying_id_field_filter_set=person_id_filter_set)) # Get StateIncarcerationPeriods incarceration_periods = ( p | 'Load IncarcerationPeriods' >> BuildRootEntity( dataset=query_dataset, root_entity_class=entities.StateIncarcerationPeriod, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=True, unifying_id_field_filter_set=person_id_filter_set, state_code=state_code)) # Get StateSupervisionViolations supervision_violations = \ (p | 'Load SupervisionViolations' >> BuildRootEntity(dataset=query_dataset, root_entity_class=entities.StateSupervisionViolation, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=True, unifying_id_field_filter_set=person_id_filter_set, state_code=state_code )) # TODO(2769): Don't bring this in as a root entity # Get StateSupervisionViolationResponses supervision_violation_responses = \ (p | 'Load SupervisionViolationResponses' >> BuildRootEntity(dataset=query_dataset, root_entity_class=entities.StateSupervisionViolationResponse, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=True, unifying_id_field_filter_set=person_id_filter_set, state_code=state_code )) # Group StateSupervisionViolationResponses and # StateSupervisionViolations by person_id supervision_violations_and_responses = ( { 'violations': supervision_violations, 'violation_responses': supervision_violation_responses } | 'Group StateSupervisionViolationResponses to ' 'StateSupervisionViolations' >> beam.CoGroupByKey()) # Set the fully hydrated StateSupervisionViolation entities on # the corresponding StateSupervisionViolationResponses violation_responses_with_hydrated_violations = ( supervision_violations_and_responses | 'Set hydrated StateSupervisionViolations on ' 'the StateSupervisionViolationResponses' >> beam.ParDo( SetViolationOnViolationsResponse())) # Group StateIncarcerationPeriods and StateSupervisionViolationResponses # by person_id incarceration_periods_and_violation_responses = ( { 'incarceration_periods': incarceration_periods, 'violation_responses': violation_responses_with_hydrated_violations } | 'Group StateIncarcerationPeriods to ' 'StateSupervisionViolationResponses' >> beam.CoGroupByKey()) # Set the fully hydrated StateSupervisionViolationResponse entities on # the corresponding StateIncarcerationPeriods incarceration_periods_with_source_violations = ( incarceration_periods_and_violation_responses | 'Set hydrated StateSupervisionViolationResponses on ' 'the StateIncarcerationPeriods' >> beam.ParDo( SetViolationResponseOnIncarcerationPeriod())) # Group each StatePerson with their StateIncarcerationPeriods person_and_incarceration_periods = ( { 'person': persons, 'incarceration_periods': incarceration_periods_with_source_violations } | 'Group StatePerson to StateIncarcerationPeriods' >> beam.CoGroupByKey()) # Bring in the table that associates people and their county of residence person_id_to_county_query = select_all_by_person_query( reference_dataset, PERSONS_TO_RECENT_COUNTY_OF_RESIDENCE_VIEW_NAME, # TODO(3602): Once we put state_code on StatePerson objects, we can update the # persons_to_recent_county_of_residence query to have a state_code field, allowing us to also filter the # output by state_code. state_code_filter=None, person_id_filter_set=person_id_filter_set) person_id_to_county_kv = ( p | "Read person_id to county associations from BigQuery" >> beam.io.Read( beam.io.BigQuerySource(query=person_id_to_county_query, use_standard_sql=True)) | "Convert person_id to county association table to KV" >> beam.ParDo(ConvertDictToKVTuple(), 'person_id')) # Identify ReleaseEvents events from the StatePerson's # StateIncarcerationPeriods person_events = ( person_and_incarceration_periods | "ClassifyReleaseEvents" >> beam.ParDo( ClassifyReleaseEvents(), AsDict(person_id_to_county_kv))) # Get pipeline job details for accessing job_id all_pipeline_options = apache_beam_pipeline_options.get_all_options() # Add timestamp for local jobs job_timestamp = datetime.datetime.now().strftime( '%Y-%m-%d_%H_%M_%S.%f') all_pipeline_options['job_timestamp'] = job_timestamp # Get the type of metric to calculate metric_types_set = set(metric_types) # Get recidivism metrics recidivism_metrics = (person_events | 'Get Recidivism Metrics' >> GetRecidivismMetrics( pipeline_options=all_pipeline_options, metric_types=metric_types_set)) if person_id_filter_set: logging.warning( "Non-empty person filter set - returning before writing metrics." ) return # Convert the metrics into a format that's writable to BQ writable_metrics = ( recidivism_metrics | 'Convert to dict to be written to BQ' >> beam.ParDo( RecidivismMetricWritableDict()).with_outputs( 'rates', 'counts')) # Write the recidivism metrics to the output tables in BigQuery rates_table_id = DATAFLOW_METRICS_TO_TABLES.get( ReincarcerationRecidivismRateMetric) counts_table_id = DATAFLOW_METRICS_TO_TABLES.get( ReincarcerationRecidivismCountMetric) _ = (writable_metrics.rates | f"Write rate metrics to BQ table: {rates_table_id}" >> beam.io.WriteToBigQuery( table=rates_table_id, dataset=output, create_disposition=beam.io.BigQueryDisposition.CREATE_NEVER, write_disposition=beam.io.BigQueryDisposition.WRITE_APPEND, method=beam.io.WriteToBigQuery.Method.FILE_LOADS)) _ = (writable_metrics.counts | f"Write count metrics to BQ table: {counts_table_id}" >> beam.io.WriteToBigQuery( table=counts_table_id, dataset=output, create_disposition=beam.io.BigQueryDisposition.CREATE_NEVER, write_disposition=beam.io.BigQueryDisposition.WRITE_APPEND, method=beam.io.WriteToBigQuery.Method.FILE_LOADS))
def run(apache_beam_pipeline_options: PipelineOptions, data_input: str, reference_input: str, output: str, calculation_month_count: int, metric_types: List[str], state_code: Optional[str], calculation_end_month: Optional[str], person_filter_ids: Optional[List[int]]): """Runs the incarceration calculation pipeline.""" # Workaround to load SQLAlchemy objects at start of pipeline. This is necessary because the BuildRootEntity # function tries to access attributes of relationship properties on the SQLAlchemy room_schema_class before they # have been loaded. However, if *any* SQLAlchemy objects have been instantiated, then the relationship properties # are loaded and their attributes can be successfully accessed. _ = schema.StatePerson() apache_beam_pipeline_options.view_as(SetupOptions).save_main_session = True # Get pipeline job details all_apache_beam_pipeline_options = apache_beam_pipeline_options.get_all_options( ) query_dataset = all_apache_beam_pipeline_options[ 'project'] + '.' + data_input reference_dataset = all_apache_beam_pipeline_options[ 'project'] + '.' + reference_input person_id_filter_set = set( person_filter_ids) if person_filter_ids else None with beam.Pipeline(options=apache_beam_pipeline_options) as p: # Get StatePersons persons = (p | 'Load StatePersons' >> BuildRootEntity( dataset=query_dataset, root_entity_class=entities.StatePerson, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=True, unifying_id_field_filter_set=person_id_filter_set)) # Get StateSentenceGroups sentence_groups = (p | 'Load StateSentenceGroups' >> BuildRootEntity( dataset=query_dataset, root_entity_class=entities.StateSentenceGroup, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=True, unifying_id_field_filter_set=person_id_filter_set, state_code=state_code)) # Get StateIncarcerationSentences incarceration_sentences = ( p | 'Load StateIncarcerationSentences' >> BuildRootEntity( dataset=query_dataset, root_entity_class=entities.StateIncarcerationSentence, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=True, unifying_id_field_filter_set=person_id_filter_set, state_code=state_code)) # Get StateSupervisionSentences supervision_sentences = ( p | 'Load StateSupervisionSentences' >> BuildRootEntity( dataset=query_dataset, root_entity_class=entities.StateSupervisionSentence, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=True, unifying_id_field_filter_set=person_id_filter_set, state_code=state_code)) if state_code is None or state_code == 'US_MO': # Bring in the reference table that includes sentence status ranking information us_mo_sentence_status_query = select_all_by_person_query( reference_dataset, US_MO_SENTENCE_STATUSES_VIEW_NAME, state_code, person_id_filter_set) us_mo_sentence_statuses = ( p | "Read MO sentence status table from BigQuery" >> beam.io.Read( beam.io.BigQuerySource(query=us_mo_sentence_status_query, use_standard_sql=True))) else: us_mo_sentence_statuses = ( p | f"Generate empty MO statuses list for non-MO state run: {state_code} " >> beam.Create([])) us_mo_sentence_status_rankings_as_kv = ( us_mo_sentence_statuses | 'Convert MO sentence status ranking table to KV tuples' >> beam.ParDo(ConvertDictToKVTuple(), 'person_id')) supervision_sentences_and_statuses = ( { 'incarceration_sentences': incarceration_sentences, 'supervision_sentences': supervision_sentences, 'sentence_statuses': us_mo_sentence_status_rankings_as_kv } | 'Group sentences to the sentence statuses for that person' >> beam.CoGroupByKey()) sentences_converted = ( supervision_sentences_and_statuses | 'Convert to state-specific sentences' >> beam.ParDo( ConvertSentencesToStateSpecificType()).with_outputs( 'incarceration_sentences', 'supervision_sentences')) sentences_and_sentence_groups = ( { 'sentence_groups': sentence_groups, 'incarceration_sentences': sentences_converted.incarceration_sentences, 'supervision_sentences': sentences_converted.supervision_sentences } | 'Group sentences to sentence groups' >> beam.CoGroupByKey()) # Set hydrated sentences on the corresponding sentence groups sentence_groups_with_hydrated_sentences = ( sentences_and_sentence_groups | 'Set hydrated sentences on sentence groups' >> beam.ParDo( SetSentencesOnSentenceGroup())) # Bring in the table that associates people and their county of residence person_id_to_county_query = select_all_by_person_query( reference_dataset, PERSONS_TO_RECENT_COUNTY_OF_RESIDENCE_VIEW_NAME, # TODO(3602): Once we put state_code on StatePerson objects, we can update the # persons_to_recent_county_of_residence query to have a state_code field, allowing us to also filter the # output by state_code. state_code_filter=None, person_id_filter_set=person_id_filter_set) person_id_to_county_kv = ( p | "Read person_id to county associations from BigQuery" >> beam.io.Read( beam.io.BigQuerySource(query=person_id_to_county_query, use_standard_sql=True)) | "Convert person_id to county association table to KV" >> beam.ParDo(ConvertDictToKVTuple(), 'person_id')) # Bring in the judicial districts associated with incarceration_periods ip_to_judicial_district_query = select_all_by_person_query( reference_dataset, INCARCERATION_PERIOD_JUDICIAL_DISTRICT_ASSOCIATION_VIEW_NAME, state_code, person_id_filter_set) ip_to_judicial_district_kv = ( p | "Read incarceration_period to judicial_district associations from BigQuery" >> beam.io.Read( beam.io.BigQuerySource(query=ip_to_judicial_district_query, use_standard_sql=True)) | "Convert incarceration_period to judicial_district association table to KV" >> beam.ParDo(ConvertDictToKVTuple(), 'person_id')) # Group each StatePerson with their related entities person_entities = ( { 'person': persons, 'sentence_groups': sentence_groups_with_hydrated_sentences, 'incarceration_period_judicial_district_association': ip_to_judicial_district_kv } | 'Group StatePerson to SentenceGroups' >> beam.CoGroupByKey()) # Identify IncarcerationEvents events from the StatePerson's StateIncarcerationPeriods person_events = ( person_entities | 'Classify Incarceration Events' >> beam.ParDo( ClassifyIncarcerationEvents(), AsDict(person_id_to_county_kv))) # Get pipeline job details for accessing job_id all_pipeline_options = apache_beam_pipeline_options.get_all_options() # Add timestamp for local jobs job_timestamp = datetime.datetime.now().strftime( '%Y-%m-%d_%H_%M_%S.%f') all_apache_beam_pipeline_options['job_timestamp'] = job_timestamp # Get the type of metric to calculate metric_types_set = set(metric_types) # Get IncarcerationMetrics incarceration_metrics = ( person_events | 'Get Incarceration Metrics' >> GetIncarcerationMetrics( pipeline_options=all_pipeline_options, metric_types=metric_types_set, calculation_end_month=calculation_end_month, calculation_month_count=calculation_month_count)) if person_id_filter_set: logging.warning( "Non-empty person filter set - returning before writing metrics." ) return # Convert the metrics into a format that's writable to BQ writable_metrics = ( incarceration_metrics | 'Convert to dict to be written to BQ' >> beam.ParDo(RecidivizMetricWritableDict()).with_outputs( IncarcerationMetricType.INCARCERATION_ADMISSION.value, IncarcerationMetricType.INCARCERATION_POPULATION.value, IncarcerationMetricType.INCARCERATION_RELEASE.value)) # Write the metrics to the output tables in BigQuery admissions_table_id = DATAFLOW_METRICS_TO_TABLES.get( IncarcerationAdmissionMetric) population_table_id = DATAFLOW_METRICS_TO_TABLES.get( IncarcerationPopulationMetric) releases_table_id = DATAFLOW_METRICS_TO_TABLES.get( IncarcerationReleaseMetric) _ = (writable_metrics.INCARCERATION_ADMISSION | f"Write admission metrics to BQ table: {admissions_table_id}" >> beam.io.WriteToBigQuery( table=admissions_table_id, dataset=output, create_disposition=beam.io.BigQueryDisposition.CREATE_NEVER, write_disposition=beam.io.BigQueryDisposition.WRITE_APPEND, method=beam.io.WriteToBigQuery.Method.FILE_LOADS)) _ = (writable_metrics.INCARCERATION_POPULATION | f"Write population metrics to BQ table: {population_table_id}" >> beam.io.WriteToBigQuery( table=population_table_id, dataset=output, create_disposition=beam.io.BigQueryDisposition.CREATE_NEVER, write_disposition=beam.io.BigQueryDisposition.WRITE_APPEND, method=beam.io.WriteToBigQuery.Method.FILE_LOADS)) _ = (writable_metrics.INCARCERATION_RELEASE | f"Write release metrics to BQ table: {releases_table_id}" >> beam.io.WriteToBigQuery( table=releases_table_id, dataset=output, create_disposition=beam.io.BigQueryDisposition.CREATE_NEVER, write_disposition=beam.io.BigQueryDisposition.WRITE_APPEND, method=beam.io.WriteToBigQuery.Method.FILE_LOADS))
def run(apache_beam_pipeline_options: PipelineOptions, data_input: str, reference_input: str, output: str, calculation_month_count: int, metric_types: List[str], state_code: Optional[str], calculation_end_month: Optional[str], person_filter_ids: Optional[List[int]]): """Runs the program calculation pipeline.""" # Workaround to load SQLAlchemy objects at start of pipeline. This is necessary because the BuildRootEntity # function tries to access attributes of relationship properties on the SQLAlchemy room_schema_class before they # have been loaded. However, if *any* SQLAlchemy objects have been instantiated, then the relationship properties # are loaded and their attributes can be successfully accessed. _ = schema.StatePerson() apache_beam_pipeline_options.view_as(SetupOptions).save_main_session = True # Get pipeline job details all_pipeline_options = apache_beam_pipeline_options.get_all_options() input_dataset = all_pipeline_options['project'] + '.' + data_input reference_dataset = all_pipeline_options['project'] + '.' + reference_input person_id_filter_set = set( person_filter_ids) if person_filter_ids else None with beam.Pipeline(options=apache_beam_pipeline_options) as p: # Get StatePersons persons = (p | 'Load Persons' >> BuildRootEntity( dataset=input_dataset, root_entity_class=entities.StatePerson, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=True, unifying_id_field_filter_set=person_id_filter_set)) # Get StateProgramAssignments program_assignments = ( p | 'Load Program Assignments' >> BuildRootEntity( dataset=input_dataset, root_entity_class=entities.StateProgramAssignment, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=True, unifying_id_field_filter_set=person_id_filter_set, state_code=state_code)) # Get StateAssessments assessments = (p | 'Load Assessments' >> BuildRootEntity( dataset=input_dataset, root_entity_class=entities.StateAssessment, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=False, unifying_id_field_filter_set=person_id_filter_set, state_code=state_code)) # Get StateSupervisionPeriods supervision_periods = ( p | 'Load SupervisionPeriods' >> BuildRootEntity( dataset=input_dataset, root_entity_class=entities.StateSupervisionPeriod, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=False, unifying_id_field_filter_set=person_id_filter_set, state_code=state_code)) supervision_period_to_agent_association_query = select_all_by_person_query( reference_dataset, SUPERVISION_PERIOD_TO_AGENT_ASSOCIATION_VIEW_NAME, state_code, person_id_filter_set) supervision_period_to_agent_associations = ( p | "Read Supervision Period to Agent table from BigQuery" >> beam.io.Read( beam.io.BigQuerySource( query=supervision_period_to_agent_association_query, use_standard_sql=True))) # Convert the association table rows into key-value tuples with the value for the supervision_period_id column # as the key supervision_period_to_agent_associations_as_kv = ( supervision_period_to_agent_associations | 'Convert Supervision Period to Agent table to KV tuples' >> beam.ParDo(ConvertDictToKVTuple(), 'supervision_period_id')) # Group each StatePerson with their other entities persons_entities = ({ 'person': persons, 'program_assignments': program_assignments, 'assessments': assessments, 'supervision_periods': supervision_periods } | 'Group StatePerson to StateProgramAssignments and' >> beam.CoGroupByKey()) # Identify ProgramEvents from the StatePerson's StateProgramAssignments person_program_events = ( persons_entities | beam.ParDo( ClassifyProgramAssignments(), AsDict(supervision_period_to_agent_associations_as_kv))) # Get pipeline job details for accessing job_id all_pipeline_options = apache_beam_pipeline_options.get_all_options() # Add timestamp for local jobs job_timestamp = datetime.datetime.now().strftime( '%Y-%m-%d_%H_%M_%S.%f') all_pipeline_options['job_timestamp'] = job_timestamp # Get the type of metric to calculate metric_types_set = set(metric_types) # Get program metrics program_metrics = ( person_program_events | 'Get Program Metrics' >> GetProgramMetrics( pipeline_options=all_pipeline_options, metric_types=metric_types_set, calculation_end_month=calculation_end_month, calculation_month_count=calculation_month_count)) if person_id_filter_set: logging.warning( "Non-empty person filter set - returning before writing metrics." ) return # Convert the metrics into a format that's writable to BQ writable_metrics = ( program_metrics | 'Convert to dict to be written to BQ' >> beam.ParDo( ProgramMetricWritableDict()).with_outputs( 'participation', 'referrals')) # Write the metrics to the output tables in BigQuery referrals_table_id = DATAFLOW_METRICS_TO_TABLES.get( ProgramReferralMetric) participation_table_id = DATAFLOW_METRICS_TO_TABLES.get( ProgramParticipationMetric) _ = (writable_metrics.referrals | f"Write referral metrics to BQ table: {referrals_table_id}" >> beam.io.WriteToBigQuery( table=referrals_table_id, dataset=output, create_disposition=beam.io.BigQueryDisposition.CREATE_NEVER, write_disposition=beam.io.BigQueryDisposition.WRITE_APPEND, method=beam.io.WriteToBigQuery.Method.FILE_LOADS)) _ = ( writable_metrics.participation | f"Write participation metrics to BQ table: {participation_table_id}" >> beam.io.WriteToBigQuery( table=participation_table_id, dataset=output, create_disposition=beam.io.BigQueryDisposition.CREATE_NEVER, write_disposition=beam.io.BigQueryDisposition.WRITE_APPEND, method=beam.io.WriteToBigQuery.Method.FILE_LOADS))
def run( apache_beam_pipeline_options: PipelineOptions, data_input: str, reference_view_input: str, static_reference_input: str, output: str, calculation_month_count: int, metric_types: List[str], state_code: str, calculation_end_month: Optional[str], person_filter_ids: Optional[List[int]], ): """Runs the incarceration calculation pipeline.""" # Workaround to load SQLAlchemy objects at start of pipeline. This is necessary because the BuildRootEntity # function tries to access attributes of relationship properties on the SQLAlchemy room_schema_class before they # have been loaded. However, if *any* SQLAlchemy objects have been instantiated, then the relationship properties # are loaded and their attributes can be successfully accessed. _ = schema.StatePerson() apache_beam_pipeline_options.view_as(SetupOptions).save_main_session = True # Get pipeline job details all_pipeline_options = apache_beam_pipeline_options.get_all_options() project_id = all_pipeline_options["project"] if project_id is None: raise ValueError(f"No project set in pipeline options: {all_pipeline_options}") if state_code is None: raise ValueError("No state_code set for pipeline") input_dataset = project_id + "." + data_input reference_dataset = project_id + "." + reference_view_input static_reference_dataset = project_id + "." + static_reference_input person_id_filter_set = set(person_filter_ids) if person_filter_ids else None with beam.Pipeline(options=apache_beam_pipeline_options) as p: # Get StatePersons persons = p | "Load StatePersons" >> BuildRootEntity( dataset=input_dataset, root_entity_class=entities.StatePerson, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=True, unifying_id_field_filter_set=person_id_filter_set, state_code=state_code, ) # Get StateSentenceGroups sentence_groups = p | "Load StateSentenceGroups" >> BuildRootEntity( dataset=input_dataset, root_entity_class=entities.StateSentenceGroup, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=True, unifying_id_field_filter_set=person_id_filter_set, state_code=state_code, ) # Get StateIncarcerationSentences incarceration_sentences = ( p | "Load StateIncarcerationSentences" >> BuildRootEntity( dataset=input_dataset, root_entity_class=entities.StateIncarcerationSentence, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=True, unifying_id_field_filter_set=person_id_filter_set, state_code=state_code, ) ) # Get StateSupervisionSentences supervision_sentences = p | "Load StateSupervisionSentences" >> BuildRootEntity( dataset=input_dataset, root_entity_class=entities.StateSupervisionSentence, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=True, unifying_id_field_filter_set=person_id_filter_set, state_code=state_code, ) if state_code == "US_MO": # Bring in the reference table that includes sentence status ranking information us_mo_sentence_status_query = select_all_by_person_query( reference_dataset, US_MO_SENTENCE_STATUSES_VIEW_NAME, state_code, person_id_filter_set, ) us_mo_sentence_statuses = ( p | "Read MO sentence status table from BigQuery" >> ReadFromBigQuery(query=us_mo_sentence_status_query) ) else: us_mo_sentence_statuses = ( p | f"Generate empty MO statuses list for non-MO state run: {state_code} " >> beam.Create([]) ) us_mo_sentence_status_rankings_as_kv = ( us_mo_sentence_statuses | "Convert MO sentence status ranking table to KV tuples" >> beam.ParDo(ConvertDictToKVTuple(), "person_id") ) supervision_sentences_and_statuses = ( { "incarceration_sentences": incarceration_sentences, "supervision_sentences": supervision_sentences, "sentence_statuses": us_mo_sentence_status_rankings_as_kv, } | "Group sentences to the sentence statuses for that person" >> beam.CoGroupByKey() ) sentences_converted = ( supervision_sentences_and_statuses | "Convert to state-specific sentences" >> beam.ParDo(ConvertSentencesToStateSpecificType()).with_outputs( "incarceration_sentences", "supervision_sentences" ) ) sentences_and_sentence_groups = { "sentence_groups": sentence_groups, "incarceration_sentences": sentences_converted.incarceration_sentences, "supervision_sentences": sentences_converted.supervision_sentences, } | "Group sentences to sentence groups" >> beam.CoGroupByKey() # Set hydrated sentences on the corresponding sentence groups sentence_groups_with_hydrated_sentences = ( sentences_and_sentence_groups | "Set hydrated sentences on sentence groups" >> beam.ParDo(SetSentencesOnSentenceGroup()) ) # Bring in the table that associates people and their county of residence person_id_to_county_kv = ( p | "Load person_id_to_county_kv" >> ImportTableAsKVTuples( dataset_id=reference_dataset, table_id=PERSONS_TO_RECENT_COUNTY_OF_RESIDENCE_VIEW_NAME, table_key="person_id", state_code_filter=state_code, person_id_filter_set=person_id_filter_set, ) ) ip_to_judicial_district_kv = ( p | "Load ip_to_judicial_district_kv" >> ImportTableAsKVTuples( dataset_id=reference_dataset, table_id=INCARCERATION_PERIOD_JUDICIAL_DISTRICT_ASSOCIATION_VIEW_NAME, table_key="person_id", state_code_filter=state_code, person_id_filter_set=person_id_filter_set, ) ) state_race_ethnicity_population_counts = ( p | "Load state_race_ethnicity_population_counts" >> ImportTable( dataset_id=static_reference_dataset, table_id="state_race_ethnicity_population_counts", state_code_filter=state_code, person_id_filter_set=None, ) ) # Group each StatePerson with their related entities person_entities = { "person": persons, "sentence_groups": sentence_groups_with_hydrated_sentences, "incarceration_period_judicial_district_association": ip_to_judicial_district_kv, "persons_to_recent_county_of_residence": person_id_to_county_kv, } | "Group StatePerson to SentenceGroups" >> beam.CoGroupByKey() # Identify IncarcerationEvents events from the StatePerson's StateIncarcerationPeriods person_incarceration_events = ( person_entities | "Classify Incarceration Events" >> beam.ParDo(ClassifyIncarcerationEvents()) ) person_metadata = ( persons | "Build the person_metadata dictionary" >> beam.ParDo( BuildPersonMetadata(), AsList(state_race_ethnicity_population_counts) ) ) person_incarceration_events_with_metadata = ( { "person_events": person_incarceration_events, "person_metadata": person_metadata, } | "Group IncarcerationEvents with person-level metadata" >> beam.CoGroupByKey() | "Organize StatePerson, PersonMetadata and IncarcerationEvents for calculations" >> beam.ParDo(ExtractPersonEventsMetadata()) ) # Get pipeline job details for accessing job_id all_pipeline_options = apache_beam_pipeline_options.get_all_options() # Add timestamp for local jobs job_timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H_%M_%S.%f") all_pipeline_options["job_timestamp"] = job_timestamp # Get the type of metric to calculate metric_types_set = set(metric_types) # Get IncarcerationMetrics incarceration_metrics = ( person_incarceration_events_with_metadata | "Get Incarceration Metrics" >> GetIncarcerationMetrics( pipeline_options=all_pipeline_options, metric_types=metric_types_set, calculation_end_month=calculation_end_month, calculation_month_count=calculation_month_count, ) ) if person_id_filter_set: logging.warning( "Non-empty person filter set - returning before writing metrics." ) return # Convert the metrics into a format that's writable to BQ writable_metrics = ( incarceration_metrics | "Convert to dict to be written to BQ" >> beam.ParDo(RecidivizMetricWritableDict()).with_outputs( IncarcerationMetricType.INCARCERATION_ADMISSION.value, IncarcerationMetricType.INCARCERATION_POPULATION.value, IncarcerationMetricType.INCARCERATION_RELEASE.value, ) ) # Write the metrics to the output tables in BigQuery admissions_table_id = DATAFLOW_METRICS_TO_TABLES[IncarcerationAdmissionMetric] population_table_id = DATAFLOW_METRICS_TO_TABLES[IncarcerationPopulationMetric] releases_table_id = DATAFLOW_METRICS_TO_TABLES[IncarcerationReleaseMetric] _ = ( writable_metrics.INCARCERATION_ADMISSION | f"Write admission metrics to BQ table: {admissions_table_id}" >> WriteAppendToBigQuery( output_table=admissions_table_id, output_dataset=output, ) ) _ = ( writable_metrics.INCARCERATION_POPULATION | f"Write population metrics to BQ table: {population_table_id}" >> WriteAppendToBigQuery( output_table=population_table_id, output_dataset=output, ) ) _ = ( writable_metrics.INCARCERATION_RELEASE | f"Write release metrics to BQ table: {releases_table_id}" >> WriteAppendToBigQuery( output_table=releases_table_id, output_dataset=output, ) )
def execute_pipeline( self, pipeline: beam.Pipeline, all_pipeline_options: Dict[str, Any], state_code: str, input_dataset: str, reference_dataset: str, static_reference_dataset: str, metric_types: List[str], person_id_filter_set: Optional[Set[int]], calculation_month_count: int = -1, calculation_end_month: Optional[str] = None, ) -> beam.Pipeline: persons = pipeline | "Load StatePersons" >> BuildRootEntity( dataset=input_dataset, root_entity_class=entities.StatePerson, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=True, unifying_id_field_filter_set=person_id_filter_set, state_code=state_code, ) # Get StateSentenceGroups sentence_groups = pipeline | "Load StateSentenceGroups" >> BuildRootEntity( dataset=input_dataset, root_entity_class=entities.StateSentenceGroup, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=True, unifying_id_field_filter_set=person_id_filter_set, state_code=state_code, ) # Get StateIncarcerationSentences incarceration_sentences = ( pipeline | "Load StateIncarcerationSentences" >> BuildRootEntity( dataset=input_dataset, root_entity_class=entities.StateIncarcerationSentence, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=True, unifying_id_field_filter_set=person_id_filter_set, state_code=state_code, )) # Get StateSupervisionSentences supervision_sentences = ( pipeline | "Load StateSupervisionSentences" >> BuildRootEntity( dataset=input_dataset, root_entity_class=entities.StateSupervisionSentence, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=True, unifying_id_field_filter_set=person_id_filter_set, state_code=state_code, )) # Get StateSupervisionPeriods supervision_periods = ( pipeline | "Load StateSupervisionPeriods" >> BuildRootEntity( dataset=input_dataset, root_entity_class=entities.StateSupervisionPeriod, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=True, unifying_id_field_filter_set=person_id_filter_set, state_code=state_code, )) # Get StateAssessments assessments = pipeline | "Load Assessments" >> BuildRootEntity( dataset=input_dataset, root_entity_class=entities.StateAssessment, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=False, unifying_id_field_filter_set=person_id_filter_set, state_code=state_code, ) # Get StateSupervisionViolations supervision_violations = ( pipeline | "Load SupervisionViolations" >> BuildRootEntity( dataset=input_dataset, root_entity_class=entities.StateSupervisionViolation, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=True, unifying_id_field_filter_set=person_id_filter_set, state_code=state_code, )) # Get StateSupervisionViolationResponses supervision_violation_responses = ( pipeline | "Load SupervisionViolationResponses" >> BuildRootEntity( dataset=input_dataset, root_entity_class=entities.StateSupervisionViolationResponse, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=True, unifying_id_field_filter_set=person_id_filter_set, state_code=state_code, )) if state_code == "US_MO": # Bring in the reference table that includes sentence status ranking information us_mo_sentence_status_query = select_all_by_person_query( reference_dataset, US_MO_SENTENCE_STATUSES_VIEW_NAME, state_code, person_id_filter_set, ) us_mo_sentence_statuses = ( pipeline | "Read MO sentence status table from BigQuery" >> ReadFromBigQuery(query=us_mo_sentence_status_query)) else: us_mo_sentence_statuses = ( pipeline | f"Generate empty MO statuses list for non-MO state run: {state_code} " >> beam.Create([])) us_mo_sentence_status_rankings_as_kv = ( us_mo_sentence_statuses | "Convert MO sentence status ranking table to KV tuples" >> beam.ParDo(ConvertDictToKVTuple(), "person_id")) supervision_sentences_and_statuses = ( { "incarceration_sentences": incarceration_sentences, "supervision_sentences": supervision_sentences, "sentence_statuses": us_mo_sentence_status_rankings_as_kv, } | "Group sentences to the sentence statuses for that person" >> beam.CoGroupByKey()) sentences_converted = ( supervision_sentences_and_statuses | "Convert to state-specific sentences" >> beam.ParDo( ConvertSentencesToStateSpecificType()).with_outputs( "incarceration_sentences", "supervision_sentences")) # Set hydrated supervision periods on the corresponding incarceration sentences incarceration_sentences_with_hydrated_sps = ( { "supervision_periods": supervision_periods, "sentences": sentences_converted.incarceration_sentences, } | "Group supervision periods to incarceration sentences" >> beam.CoGroupByKey() | "Set hydrated supervision periods on incarceration sentences" >> beam.ParDo(SetSupervisionPeriodsOnSentences())) # Set hydrated supervision periods on the corresponding supervision sentences supervision_sentences_with_hydrated_sps = ( { "supervision_periods": supervision_periods, "sentences": sentences_converted.supervision_sentences, } | "Group supervision periods to supervision sentences" >> beam.CoGroupByKey() | "Set hydrated supervision periods on supervision sentences" >> beam.ParDo(SetSupervisionPeriodsOnSentences())) sentences_and_sentence_groups = { "sentence_groups": sentence_groups, "incarceration_sentences": incarceration_sentences_with_hydrated_sps, "supervision_sentences": supervision_sentences_with_hydrated_sps, } | "Group sentences to sentence groups" >> beam.CoGroupByKey() # Set hydrated sentences on the corresponding sentence groups sentence_groups_with_hydrated_sentences = ( sentences_and_sentence_groups | "Set hydrated sentences on sentence groups" >> beam.ParDo( SetSentencesOnSentenceGroup())) # Bring in the table that associates people and their county of residence person_id_to_county_kv = ( pipeline | "Load person_id_to_county_kv" >> ImportTableAsKVTuples( dataset_id=reference_dataset, table_id=PERSONS_TO_RECENT_COUNTY_OF_RESIDENCE_VIEW_NAME, table_key="person_id", state_code_filter=state_code, person_id_filter_set=person_id_filter_set, )) ip_to_judicial_district_kv = ( pipeline | "Load ip_to_judicial_district_kv" >> ImportTableAsKVTuples( dataset_id=reference_dataset, table_id= INCARCERATION_PERIOD_JUDICIAL_DISTRICT_ASSOCIATION_VIEW_NAME, table_key="person_id", state_code_filter=state_code, person_id_filter_set=person_id_filter_set, )) supervision_period_to_agent_associations_as_kv = ( pipeline | "Load supervision_period_to_agent_associations_as_kv" >> ImportTableAsKVTuples( dataset_id=reference_dataset, table_id=SUPERVISION_PERIOD_TO_AGENT_ASSOCIATION_VIEW_NAME, table_key="person_id", state_code_filter=state_code, person_id_filter_set=person_id_filter_set, )) state_race_ethnicity_population_counts = ( pipeline | "Load state_race_ethnicity_population_counts" >> ImportTable( dataset_id=static_reference_dataset, table_id="state_race_ethnicity_population_counts", state_code_filter=state_code, person_id_filter_set=None, )) # Group StateSupervisionViolationResponses and StateSupervisionViolations by person_id supervision_violations_and_responses = ( { "violations": supervision_violations, "violation_responses": supervision_violation_responses, } | "Group StateSupervisionViolationResponses to " "StateSupervisionViolations" >> beam.CoGroupByKey()) # Set the fully hydrated StateSupervisionViolation entities on the corresponding # StateSupervisionViolationResponses violation_responses_with_hydrated_violations = ( supervision_violations_and_responses | "Set hydrated StateSupervisionViolations on " "the StateSupervisionViolationResponses" >> beam.ParDo( SetViolationOnViolationsResponse())) # Group each StatePerson with their related entities person_entities = { "person": persons, "assessments": assessments, "sentence_groups": sentence_groups_with_hydrated_sentences, "violation_responses": violation_responses_with_hydrated_violations, "incarceration_period_judicial_district_association": ip_to_judicial_district_kv, "supervision_period_to_agent_association": supervision_period_to_agent_associations_as_kv, "persons_to_recent_county_of_residence": person_id_to_county_kv, } | "Group StatePerson to SentenceGroups" >> beam.CoGroupByKey() # Identify IncarcerationEvents events from the StatePerson's StateIncarcerationPeriods person_incarceration_events = ( person_entities | "Classify Incarceration Events" >> beam.ParDo( ClassifyEvents(), identifier=self.pipeline_config.identifier)) person_metadata = ( persons | "Build the person_metadata dictionary" >> beam.ParDo( BuildPersonMetadata(), state_race_ethnicity_population_counts=AsList( state_race_ethnicity_population_counts), )) person_incarceration_events_with_metadata = ( { "person_events": person_incarceration_events, "person_metadata": person_metadata, } | "Group IncarcerationEvents with person-level metadata" >> beam.CoGroupByKey() | "Organize StatePerson, PersonMetadata and IncarcerationEvents for calculations" >> beam.ParDo(ExtractPersonEventsMetadata())) # Add timestamp for local jobs job_timestamp = datetime.datetime.now().strftime( "%Y-%m-%d_%H_%M_%S.%f") all_pipeline_options["job_timestamp"] = job_timestamp # Get the type of metric to calculate metric_types_set = set(metric_types) # Get IncarcerationMetrics incarceration_metrics = ( person_incarceration_events_with_metadata | "Get Incarceration Metrics" >> GetMetrics( pipeline_options=all_pipeline_options, pipeline_config=self.pipeline_config, metric_types_to_include=metric_types_set, calculation_end_month=calculation_end_month, calculation_month_count=calculation_month_count, )) return incarceration_metrics
def run( apache_beam_pipeline_options: PipelineOptions, data_input: str, reference_view_input: str, static_reference_input: str, output: str, calculation_month_count: int, metric_types: List[str], state_code: str, calculation_end_month: Optional[str], person_filter_ids: Optional[List[int]], ) -> None: """Runs the supervision calculation pipeline.""" # Workaround to load SQLAlchemy objects at start of pipeline. This is necessary because the BuildRootEntity # function tries to access attributes of relationship properties on the SQLAlchemy room_schema_class before they # have been loaded. However, if *any* SQLAlchemy objects have been instantiated, then the relationship properties # are loaded and their attributes can be successfully accessed. _ = schema.StatePerson() apache_beam_pipeline_options.view_as(SetupOptions).save_main_session = True # Get pipeline job details all_pipeline_options = apache_beam_pipeline_options.get_all_options() project_id = all_pipeline_options["project"] if project_id is None: raise ValueError( f"No project set in pipeline options: {all_pipeline_options}") if state_code is None: raise ValueError("No state_code set for pipeline") input_dataset = project_id + "." + data_input reference_dataset = project_id + "." + reference_view_input static_reference_dataset = project_id + "." + static_reference_input person_id_filter_set = set( person_filter_ids) if person_filter_ids else None with beam.Pipeline(options=apache_beam_pipeline_options) as p: # Get StatePersons persons = p | "Load Persons" >> BuildRootEntity( dataset=input_dataset, root_entity_class=entities.StatePerson, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=True, unifying_id_field_filter_set=person_id_filter_set, state_code=state_code, ) # Get StateIncarcerationPeriods incarceration_periods = p | "Load IncarcerationPeriods" >> BuildRootEntity( dataset=input_dataset, root_entity_class=entities.StateIncarcerationPeriod, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=True, unifying_id_field_filter_set=person_id_filter_set, state_code=state_code, ) # Get StateSupervisionViolations supervision_violations = p | "Load SupervisionViolations" >> BuildRootEntity( dataset=input_dataset, root_entity_class=entities.StateSupervisionViolation, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=True, unifying_id_field_filter_set=person_id_filter_set, state_code=state_code, ) # TODO(#2769): Don't bring this in as a root entity # Get StateSupervisionViolationResponses supervision_violation_responses = ( p | "Load SupervisionViolationResponses" >> BuildRootEntity( dataset=input_dataset, root_entity_class=entities.StateSupervisionViolationResponse, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=True, unifying_id_field_filter_set=person_id_filter_set, state_code=state_code, )) # Get StateSupervisionSentences supervision_sentences = p | "Load SupervisionSentences" >> BuildRootEntity( dataset=input_dataset, root_entity_class=entities.StateSupervisionSentence, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=True, unifying_id_field_filter_set=person_id_filter_set, state_code=state_code, ) # Get StateIncarcerationSentences incarceration_sentences = p | "Load IncarcerationSentences" >> BuildRootEntity( dataset=input_dataset, root_entity_class=entities.StateIncarcerationSentence, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=True, unifying_id_field_filter_set=person_id_filter_set, state_code=state_code, ) # Get StateSupervisionPeriods supervision_periods = p | "Load SupervisionPeriods" >> BuildRootEntity( dataset=input_dataset, root_entity_class=entities.StateSupervisionPeriod, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=True, unifying_id_field_filter_set=person_id_filter_set, state_code=state_code, ) # Get StateAssessments assessments = p | "Load Assessments" >> BuildRootEntity( dataset=input_dataset, root_entity_class=entities.StateAssessment, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=False, unifying_id_field_filter_set=person_id_filter_set, state_code=state_code, ) supervision_contacts = p | "Load StateSupervisionContacts" >> BuildRootEntity( dataset=input_dataset, root_entity_class=entities.StateSupervisionContact, unifying_id_field=entities.StatePerson.get_class_id_name(), build_related_entities=False, unifying_id_field_filter_set=person_id_filter_set, state_code=state_code, ) supervision_period_to_agent_associations_as_kv = ( p | "Load supervision_period_to_agent_associations_as_kv" >> ImportTableAsKVTuples( dataset_id=reference_dataset, table_id=SUPERVISION_PERIOD_TO_AGENT_ASSOCIATION_VIEW_NAME, table_key="person_id", state_code_filter=state_code, person_id_filter_set=person_id_filter_set, )) # Bring in the judicial districts associated with supervision_periods sp_to_judicial_district_kv = ( p | "Load sp_to_judicial_district_kv" >> ImportTableAsKVTuples( dataset_id=reference_dataset, table_id= SUPERVISION_PERIOD_JUDICIAL_DISTRICT_ASSOCIATION_VIEW_NAME, state_code_filter=state_code, person_id_filter_set=person_id_filter_set, table_key="person_id", )) state_race_ethnicity_population_counts = ( p | "Load state_race_ethnicity_population_counts" >> ImportTable( dataset_id=static_reference_dataset, table_id="state_race_ethnicity_population_counts", state_code_filter=state_code, person_id_filter_set=None, )) if state_code == "US_MO": # Bring in the reference table that includes sentence status ranking information us_mo_sentence_status_query = select_all_by_person_query( reference_dataset, US_MO_SENTENCE_STATUSES_VIEW_NAME, state_code, person_id_filter_set, ) us_mo_sentence_statuses = ( p | "Read MO sentence status table from BigQuery" >> ReadFromBigQuery(query=us_mo_sentence_status_query)) else: us_mo_sentence_statuses = ( p | f"Generate empty MO statuses list for non-MO state run: {state_code} " >> beam.Create([])) us_mo_sentence_status_rankings_as_kv = ( us_mo_sentence_statuses | "Convert MO sentence status ranking table to KV tuples" >> beam.ParDo(ConvertDictToKVTuple(), "person_id")) sentences_and_statuses = ( { "incarceration_sentences": incarceration_sentences, "supervision_sentences": supervision_sentences, "sentence_statuses": us_mo_sentence_status_rankings_as_kv, } | "Group sentences to the sentence statuses for that person" >> beam.CoGroupByKey()) sentences_converted = ( sentences_and_statuses | "Convert to state-specific sentences" >> beam.ParDo( ConvertSentencesToStateSpecificType()).with_outputs( "incarceration_sentences", "supervision_sentences")) # Group StateSupervisionViolationResponses and StateSupervisionViolations by person_id supervision_violations_and_responses = ( { "violations": supervision_violations, "violation_responses": supervision_violation_responses, } | "Group StateSupervisionViolationResponses to " "StateSupervisionViolations" >> beam.CoGroupByKey()) # Set the fully hydrated StateSupervisionViolation entities on the corresponding # StateSupervisionViolationResponses violation_responses_with_hydrated_violations = ( supervision_violations_and_responses | "Set hydrated StateSupervisionViolations on " "the StateSupervisionViolationResponses" >> beam.ParDo( SetViolationOnViolationsResponse())) # Group StateIncarcerationPeriods and StateSupervisionViolationResponses by person_id incarceration_periods_and_violation_responses = ( { "incarceration_periods": incarceration_periods, "violation_responses": violation_responses_with_hydrated_violations, } | "Group StateIncarcerationPeriods to " "StateSupervisionViolationResponses" >> beam.CoGroupByKey()) # Set the fully hydrated StateSupervisionViolationResponse entities on the corresponding # StateIncarcerationPeriods incarceration_periods_with_source_violations = ( incarceration_periods_and_violation_responses | "Set hydrated StateSupervisionViolationResponses on " "the StateIncarcerationPeriods" >> beam.ParDo( SetViolationResponseOnIncarcerationPeriod())) # Group each StatePerson with their related entities person_entities = { "person": persons, "assessments": assessments, "incarceration_periods": incarceration_periods_with_source_violations, "supervision_periods": supervision_periods, "supervision_sentences": sentences_converted.supervision_sentences, "incarceration_sentences": sentences_converted.incarceration_sentences, "violation_responses": violation_responses_with_hydrated_violations, "supervision_contacts": supervision_contacts, "supervision_period_judicial_district_association": sp_to_judicial_district_kv, "supervision_period_to_agent_association": supervision_period_to_agent_associations_as_kv, } | "Group StatePerson to all entities" >> beam.CoGroupByKey() # Identify SupervisionTimeBuckets from the StatePerson's StateSupervisionSentences and StateIncarcerationPeriods person_time_buckets = (person_entities | "Get SupervisionTimeBuckets" >> beam.ParDo( ClassifySupervisionTimeBuckets())) person_metadata = ( persons | "Build the person_metadata dictionary" >> beam.ParDo( BuildPersonMetadata(), AsList(state_race_ethnicity_population_counts))) person_time_buckets_with_metadata = ( { "person_events": person_time_buckets, "person_metadata": person_metadata } | "Group SupervisionTimeBuckets with person-level metadata" >> beam.CoGroupByKey() | "Organize StatePerson, PersonMetadata and SupervisionTimeBuckets for calculations" >> beam.ParDo(ExtractPersonEventsMetadata())) # Get pipeline job details for accessing job_id all_pipeline_options = apache_beam_pipeline_options.get_all_options() # Get the type of metric to calculate metric_types_set = set(metric_types) # Add timestamp for local jobs job_timestamp = datetime.datetime.now().strftime( "%Y-%m-%d_%H_%M_%S.%f") all_pipeline_options["job_timestamp"] = job_timestamp # Get supervision metrics supervision_metrics = ( person_time_buckets_with_metadata | "Get Supervision Metrics" >> GetSupervisionMetrics( pipeline_options=all_pipeline_options, metric_types=metric_types_set, calculation_end_month=calculation_end_month, calculation_month_count=calculation_month_count, )) if person_id_filter_set: logging.warning( "Non-empty person filter set - returning before writing metrics." ) return # Convert the metrics into a format that's writable to BQ writable_metrics = ( supervision_metrics | "Convert to dict to be written to BQ" >> beam.ParDo(RecidivizMetricWritableDict()).with_outputs( SupervisionMetricType.SUPERVISION_COMPLIANCE.value, SupervisionMetricType.SUPERVISION_POPULATION.value, SupervisionMetricType.SUPERVISION_REVOCATION.value, SupervisionMetricType.SUPERVISION_REVOCATION_ANALYSIS.value, SupervisionMetricType.SUPERVISION_START.value, SupervisionMetricType.SUPERVISION_SUCCESS.value, SupervisionMetricType. SUPERVISION_SUCCESSFUL_SENTENCE_DAYS_SERVED.value, SupervisionMetricType.SUPERVISION_TERMINATION.value, SupervisionMetricType.SUPERVISION_OUT_OF_STATE_POPULATION. value, SupervisionMetricType.SUPERVISION_DOWNGRADE.value, )) terminations_table_id = DATAFLOW_METRICS_TO_TABLES[ SupervisionTerminationMetric] compliance_table_id = DATAFLOW_METRICS_TO_TABLES[ SupervisionCaseComplianceMetric] populations_table_id = DATAFLOW_METRICS_TO_TABLES[ SupervisionPopulationMetric] revocations_table_id = DATAFLOW_METRICS_TO_TABLES[ SupervisionRevocationMetric] revocation_analysis_table_id = DATAFLOW_METRICS_TO_TABLES[ SupervisionRevocationAnalysisMetric] successes_table_id = DATAFLOW_METRICS_TO_TABLES[ SupervisionSuccessMetric] successful_sentence_lengths_table_id = DATAFLOW_METRICS_TO_TABLES[ SuccessfulSupervisionSentenceDaysServedMetric] supervision_starts_table_id = DATAFLOW_METRICS_TO_TABLES[ SupervisionStartMetric] out_of_state_populations_table_id = DATAFLOW_METRICS_TO_TABLES[ SupervisionOutOfStatePopulationMetric] supervision_downgrade_table_id = DATAFLOW_METRICS_TO_TABLES[ SupervisionDowngradeMetric] _ = (writable_metrics.SUPERVISION_POPULATION | f"Write population metrics to BQ table: {populations_table_id}" >> WriteAppendToBigQuery( output_table=populations_table_id, output_dataset=output, )) _ = (writable_metrics.SUPERVISION_OUT_OF_STATE_POPULATION | f"Write out of state population metrics to BQ table: " f"{out_of_state_populations_table_id}" >> WriteAppendToBigQuery( output_table=out_of_state_populations_table_id, output_dataset=output, )) _ = (writable_metrics.SUPERVISION_REVOCATION | f"Write revocation metrics to BQ table: {revocations_table_id}" >> WriteAppendToBigQuery( output_table=revocations_table_id, output_dataset=output, )) _ = (writable_metrics.SUPERVISION_SUCCESS | f"Write success metrics to BQ table: {successes_table_id}" >> WriteAppendToBigQuery( output_table=successes_table_id, output_dataset=output, )) _ = (writable_metrics.SUPERVISION_SUCCESSFUL_SENTENCE_DAYS_SERVED | f"Write supervision successful sentence length metrics to BQ" f" table: {successful_sentence_lengths_table_id}" >> WriteAppendToBigQuery( output_table=successful_sentence_lengths_table_id, output_dataset=output, )) _ = (writable_metrics.SUPERVISION_TERMINATION | f"Write termination metrics to BQ table: {terminations_table_id}" >> WriteAppendToBigQuery( output_table=terminations_table_id, output_dataset=output, )) _ = (writable_metrics.SUPERVISION_REVOCATION_ANALYSIS | f"Write revocation analyses metrics to BQ table: " f"{revocation_analysis_table_id}" >> WriteAppendToBigQuery( output_table=revocation_analysis_table_id, output_dataset=output, )) _ = (writable_metrics.SUPERVISION_COMPLIANCE | f"Write compliance metrics to BQ table: {compliance_table_id}" >> WriteAppendToBigQuery( output_table=compliance_table_id, output_dataset=output, )) _ = (writable_metrics.SUPERVISION_START | f"Write start metrics to BQ table: {supervision_starts_table_id}" >> WriteAppendToBigQuery( output_table=supervision_starts_table_id, output_dataset=output, )) _ = ( writable_metrics.SUPERVISION_DOWNGRADE | f"Write downgrade metrics to BQ table: {supervision_downgrade_table_id}" >> WriteAppendToBigQuery( output_table=supervision_downgrade_table_id, output_dataset=output, ))