def test_get_configs_for_export_name( self, mock_environment: mock.MagicMock) -> None: """Tests get_configs_for_export_name function to ensure that export names correctly match""" mock_environment.return_value = "production" export_configs_for_filter = view_export_manager.get_configs_for_export_name( export_name=self.mock_export_name, state_code=self.mock_state_code, project_id=self.mock_project_id, ) view = self.mock_view_builder.build() metric_view = self.mock_metric_view_builder.build() expected_view_config_list = [ ExportBigQueryViewConfig( bq_view_namespace=self.mock_big_query_view_namespace, view=view, view_filter_clause= f" WHERE state_code = '{self.mock_state_code}'", intermediate_table_name= f"{view.view_id}_table_{self.mock_state_code}", output_directory=GcsfsDirectoryPath.from_absolute_path( "gs://{project_id}-dataset-location/subdirectory/{state_code}" .format( project_id=self.mock_project_id, state_code=self.mock_state_code, )), export_output_formats=[ExportOutputFormatType.JSON], ), ExportBigQueryViewConfig( bq_view_namespace=self.mock_big_query_view_namespace, view=metric_view, view_filter_clause= f" WHERE state_code = '{self.mock_state_code}'", intermediate_table_name= f"{view.view_id}_table_{self.mock_state_code}", output_directory=GcsfsDirectoryPath.from_absolute_path( "gs://{project_id}-dataset-location/subdirectory/{state_code}" .format( project_id=self.mock_project_id, state_code=self.mock_state_code, )), export_output_formats=[ ExportOutputFormatType.JSON, ExportOutputFormatType.METRIC, ], ), ] self.assertEqual(expected_view_config_list, export_configs_for_filter) # Test for case insensitivity export_configs_for_filter = view_export_manager.get_configs_for_export_name( export_name=self.mock_export_name.lower(), state_code=self.mock_state_code.lower(), project_id=self.mock_project_id, ) self.assertEqual(expected_view_config_list, export_configs_for_filter)
def setUp(self) -> None: self.mock_bq_client = mock.create_autospec(BigQueryClient) self.mock_validator = mock.create_autospec(BigQueryViewExportValidator) self.mock_project_id = "fake-project" self.metadata_patcher = mock.patch( "recidiviz.utils.metadata.project_id") self.mock_project_id_fn = self.metadata_patcher.start() self.mock_project_id_fn.return_value = self.mock_project_id self.view_builder = SimpleBigQueryViewBuilder( dataset_id="test_dataset", view_id="test_view", view_query_template="SELECT NULL LIMIT 0", ) self.second_view_builder = SimpleBigQueryViewBuilder( dataset_id="test_dataset", view_id="test_view_2", view_query_template="SELECT NULL LIMIT 0", ) self.view_export_configs = [ ExportBigQueryViewConfig( view=self.view_builder.build(), view_filter_clause=" WHERE state_code = 'US_XX'", intermediate_table_name= f"{self.view_builder.view_id}_table_US_XX", output_directory=GcsfsDirectoryPath.from_absolute_path( "gs://{project_id}-dataset-location/subdirectory/{state_code}" .format( project_id=self.mock_project_id, state_code="US_XX", )), export_output_formats=[ ExportOutputFormatType.JSON, ExportOutputFormatType.HEADERLESS_CSV, ], ), ExportBigQueryViewConfig( view=self.second_view_builder.build(), view_filter_clause=" WHERE state_code = 'US_XX'", intermediate_table_name= f"{self.second_view_builder.view_id}_table_US_XX", output_directory=GcsfsDirectoryPath.from_absolute_path( "gs://{project_id}-dataset-location/subdirectory/{state_code}" .format( project_id=self.mock_project_id, state_code="US_XX", )), export_output_formats=[ ExportOutputFormatType.JSON, ExportOutputFormatType.CSV, ], ), ]
def test_export_dashboard_data_to_cloud_storage( self, mock_view_exporter, mock_view_update_manager_rematerialize) -> None: """Tests the table is created from the view and then extracted.""" view_export_manager.export_view_data_to_cloud_storage( self.mock_state_code, mock_view_exporter) view = self.mock_view_builder.build() metric_view = self.mock_metric_view_builder.build() view_export_configs = [ ExportBigQueryViewConfig( view=view, view_filter_clause=" WHERE state_code = 'US_XX'", intermediate_table_name=f"{view.view_id}_table_US_XX", output_directory=GcsfsDirectoryPath.from_absolute_path( "gs://{project_id}-dataset-location/subdirectory/{state_code}" .format( project_id=self.mock_project_id, state_code="US_XX", )), export_output_formats=[ExportOutputFormatType.JSON], ), ExportBigQueryViewConfig( view=metric_view, view_filter_clause=" WHERE state_code = 'US_XX'", intermediate_table_name=f"{view.view_id}_table_US_XX", output_directory=GcsfsDirectoryPath.from_absolute_path( "gs://{project_id}-dataset-location/subdirectory/{state_code}" .format( project_id=self.mock_project_id, state_code="US_XX", )), export_output_formats=[ ExportOutputFormatType.JSON, ExportOutputFormatType.METRIC, ], ), ] mock_view_update_manager_rematerialize.assert_called() mock_view_exporter.export_and_validate.assert_has_calls( [ mock.call([]), # CSV export mock.call([ view_export_configs[1].pointed_to_staging_subdirectory() ]), # JSON export mock.call([ conf.pointed_to_staging_subdirectory() for conf in view_export_configs ]), # METRIC export ], any_order=True, )
def _export_optimized_format( self, export_config: ExportBigQueryViewConfig, formatted: OptimizedMetricRepresentation, storage_client: storage.Client, ) -> GcsfsFilePath: """Writes the optimized metric representation to Cloud Storage, based on the export configuration. Returns the output path the file was written to. """ output_path = export_config.output_path(extension="txt") logging.info( "Writing optimized metric file %s to GCS bucket %s...", output_path.blob_name, output_path.bucket_name, ) blob = storage.Blob.from_string(output_path.uri(), client=storage_client) self._set_format_metadata(formatted, blob, should_compress=True) blob.upload_from_string( self._produce_transmission_format(formatted, should_compress=True), content_type="text/plain", ) logging.info( "Optimized metric file %s written to GCS bucket %s.", output_path.blob_name, output_path.bucket_name, ) return output_path
def export_configs_for_views_to_export( self, project_id: str) -> Sequence[ExportBigQueryViewConfig]: """Builds a list of ExportBigQueryViewConfig that define how all views in view_builders_to_export should be exported to Google Cloud Storage.""" view_filter_clause = (f" WHERE state_code = '{self.state_code_filter}'" if self.state_code_filter else None) intermediate_table_name = "{export_view_name}_table" output_directory = self.output_directory_uri_template.format( project_id=project_id) if self.state_code_filter: intermediate_table_name += f"_{self.state_code_filter}" output_directory += f"/{self.state_code_filter}" configs = [] for vb in self.view_builders_to_export: view = vb.build() optional_args = {} if self.export_output_formats is not None: optional_args[ "export_output_formats"] = self.export_output_formats configs.append( ExportBigQueryViewConfig( view=view, view_filter_clause=view_filter_clause, intermediate_table_name=intermediate_table_name.format( export_view_name=view.view_id), output_directory=GcsfsDirectoryPath.from_absolute_path( output_directory), **optional_args, )) return configs
def test_noop_without_staging(self) -> None: not_pointed_at_staging_file = GcsfsFilePath.from_directory_and_file_name( self.config_with_path("gnarly").output_directory, "staging_results.txt") self.assertEqual( ExportBigQueryViewConfig.revert_staging_path_to_original( not_pointed_at_staging_file), GcsfsFilePath.from_absolute_path( "gs://gnarly/staging_results.txt"), )
def config_with_path(self, path: str) -> ExportBigQueryViewConfig: return ExportBigQueryViewConfig( view=SimpleBigQueryViewBuilder( dataset_id="test_dataset", view_id="test_view", view_query_template="you know", ).build(), view_filter_clause="WHERE state_code = 'US_XX'", intermediate_table_name="tubular", output_directory=GcsfsDirectoryPath.from_absolute_path( f"gs://{path}"), )
def test_json_throws(self) -> None: exporter = JsonLinesBigQueryViewExporter(self.mock_bq_client, self.mock_validator) view_export_configs = [ ExportBigQueryViewConfig( bq_view_namespace=self.mock_bq_view_namespace, view=self.view_builder.build(), view_filter_clause=" WHERE state_code = 'US_XX'", intermediate_table_name= f"{self.view_builder.view_id}_table_US_XX", output_directory=GcsfsDirectoryPath.from_absolute_path( "gs://{project_id}-dataset-location/subdirectory/{state_code}" .format( project_id=self.mock_project_id, state_code="US_XX", )), export_output_formats=[ ExportOutputFormatType.JSON, ExportOutputFormatType.HEADERLESS_CSV, ], ), ExportBigQueryViewConfig( bq_view_namespace=self.mock_bq_view_namespace, view=self.second_view_builder.build(), view_filter_clause=" WHERE state_code = 'US_XX'", intermediate_table_name= f"{self.second_view_builder.view_id}_table_US_XX", output_directory=GcsfsDirectoryPath.from_absolute_path( "gs://{project_id}-dataset-location/subdirectory/{state_code}" .format( project_id=self.mock_project_id, state_code="US_XX", )), export_output_formats=[ExportOutputFormatType.METRIC], ), ] with self.assertRaises(ValueError): exporter.export(view_export_configs)
def test_happy_path(self) -> None: pointed_at_staging_file = GcsfsFilePath.from_directory_and_file_name( self.config_with_path( "gnarly").pointed_to_staging_subdirectory().output_directory, "foo.txt", ) self.assertEqual(pointed_at_staging_file.abs_path(), "gnarly/staging/foo.txt") self.assertEqual( ExportBigQueryViewConfig.revert_staging_path_to_original( pointed_at_staging_file), GcsfsFilePath.from_absolute_path("gs://gnarly/foo.txt"), )
def export( self, export_configs: Sequence[ExportBigQueryViewConfig] ) -> List[GcsfsFilePath]: logging.info("Starting composite BigQuery view export.") staging_configs = [ config.pointed_to_staging_subdirectory() for config in export_configs ] all_staging_paths: List[GcsfsFilePath] = [] for view_exporter in self.delegate_view_exporters: logging.info( "Beginning staged export of results for view exporter delegate [%s]", view_exporter.__class__) staging_paths = view_exporter.export_and_validate(staging_configs) all_staging_paths.extend(staging_paths) logging.info( "Completed staged export of results for view exporter delegate [%s]", view_exporter.__class__) logging.info("Copying staged export results to final location") final_paths = [] for staging_path in all_staging_paths: final_path = ExportBigQueryViewConfig.revert_staging_path_to_original( staging_path) self.fs.copy(staging_path, final_path) final_paths.append(final_path) logging.info("Deleting staged copies of the final output paths") for staging_path in all_staging_paths: self.fs.delete(staging_path) logging.info("Completed composite BigQuery view export.") return final_paths
def setUp(self) -> None: self.metadata_patcher = patch("recidiviz.utils.metadata.project_id") self.mock_project_id_fn = self.metadata_patcher.start() self.mock_project_id_fn.return_value = "project-id" self.mock_bq_view_namespace = BigQueryViewNamespace.STATE metric_view_one = MetricBigQueryViewBuilder( dataset_id="dataset", view_id="view1", description="view1 description", view_query_template="select * from table", dimensions=("a", "b", "c"), ).build() export_config_one_staging = ExportBigQueryViewConfig( bq_view_namespace=self.mock_bq_view_namespace, view=metric_view_one, view_filter_clause="WHERE state_code = 'US_XX'", intermediate_table_name="intermediate_table", output_directory=GcsfsDirectoryPath.from_absolute_path( "gs://bucket1/staging/US_XX"), ) metric_view_two = MetricBigQueryViewBuilder( dataset_id="dataset", view_id="view2", description="view2 description", view_query_template="select * from view2", dimensions=("d", "e", "f"), ).build() export_config_two_staging = ExportBigQueryViewConfig( bq_view_namespace=self.mock_bq_view_namespace, view=metric_view_two, view_filter_clause="WHERE state_code = 'US_XX'", intermediate_table_name="intermediate_table2", output_directory=GcsfsDirectoryPath.from_absolute_path( "gs://bucket2/staging/US_XX"), ) self.staging_paths = [ export_config_one_staging.output_path("txt"), export_config_two_staging.output_path("txt"), ]
def setUp(self) -> None: self.metadata_patcher = patch('recidiviz.utils.metadata.project_id') self.mock_project_id_fn = self.metadata_patcher.start() self.mock_project_id_fn.return_value = 'project-id' metric_view_one = MetricBigQueryViewBuilder( dataset_id='dataset', view_id='view1', view_query_template='select * from table', dimensions=['a', 'b', 'c'], ).build() export_config_one_staging = ExportBigQueryViewConfig( view=metric_view_one, view_filter_clause='WHERE state_code = \'US_XX\'', intermediate_table_name='intermediate_table', output_directory=GcsfsDirectoryPath.from_absolute_path( 'gs://bucket1/staging/US_XX'), ) metric_view_two = MetricBigQueryViewBuilder( dataset_id='dataset', view_id='view2', view_query_template='select * from view2', dimensions=['d', 'e', 'f'], ).build() export_config_two_staging = ExportBigQueryViewConfig( view=metric_view_two, view_filter_clause='WHERE state_code = \'US_XX\'', intermediate_table_name='intermediate_table2', output_directory=GcsfsDirectoryPath.from_absolute_path( 'gs://bucket2/staging/US_XX'), ) self.staging_paths = [ export_config_one_staging.output_path('txt'), export_config_two_staging.output_path('txt') ]
def test_convert_happy_path(self): mock_bq_client = create_autospec(BigQueryClient) mock_dataset_ref = create_autospec(bigquery.DatasetReference) table_ref = bigquery.TableReference(mock_dataset_ref, "test_view") schema_fields = [ bigquery.SchemaField("district", "STRING"), bigquery.SchemaField("year", "STRING"), bigquery.SchemaField("month", "STRING"), bigquery.SchemaField("supervision_type", "STRING"), bigquery.SchemaField("total_revocations", "STRING"), ] table = bigquery.Table(table_ref, schema_fields) mock_bq_client.dataset_ref_for_id.return_value = mock_dataset_ref mock_bq_client.get_table.return_value = table all_rows = _transform_dicts_to_bq_row(_DATA_POINTS) mock_query_job = create_autospec(bigquery.QueryJob) mock_query_job.result.side_effect = [ all_rows, all_rows, ] def fake_paged_process_fn( query_job: bigquery.QueryJob, _page_size: int, process_fn: Callable[[bigquery.table.Row], None], ) -> None: for row in query_job.result( max_results=optimized_metric_big_query_view_exporter. QUERY_PAGE_SIZE, start_index=0, ): process_fn(row) mock_bq_client.paged_read_and_process.side_effect = fake_paged_process_fn mock_validator = create_autospec( OptimizedMetricBigQueryViewExportValidator) view_exporter = OptimizedMetricBigQueryViewExporter( mock_bq_client, mock_validator) export_config = ExportBigQueryViewConfig( view=MetricBigQueryViewBuilder( dataset_id="test_dataset", view_id="test_view", view_query_template="you know", dimensions=("district", "year", "month", "supervision_type"), ).build(), view_filter_clause="WHERE state_code = 'US_XX'", intermediate_table_name="tubular", output_directory=GcsfsDirectoryPath.from_absolute_path( "gs://gnarly/blob"), ) optimized_representation = ( view_exporter.convert_query_results_to_optimized_value_matrix( mock_query_job, export_config)) expected = OptimizedMetricRepresentation( value_matrix=_DATA_VALUES, dimension_manifest=_DIMENSION_MANIFEST, value_keys=_VALUE_KEYS, ) self.assertEqual(expected, optimized_representation) mock_query_job.result.assert_has_calls([ call( max_results=optimized_metric_big_query_view_exporter. QUERY_PAGE_SIZE, start_index=0, ), call( max_results=optimized_metric_big_query_view_exporter. QUERY_PAGE_SIZE, start_index=0, ), ]) mock_bq_client.paged_read_and_process.assert_called() mock_bq_client.dataset_ref_for_id.assert_called() mock_bq_client.get_table.assert_called()
def test_export_staging_delegate_validation_failed(self) -> None: metric_view_one = MetricBigQueryViewBuilder( dataset_id="dataset", view_id="view1", view_query_template="select * from table", dimensions=("a", "b", "c"), ).build() export_config_one = ExportBigQueryViewConfig( view=metric_view_one, view_filter_clause="WHERE state_code = 'US_XX'", intermediate_table_name="intermediate_table", output_directory=GcsfsDirectoryPath.from_absolute_path( "gs://bucket1/US_XX"), ) export_config_one_staging = ExportBigQueryViewConfig( view=metric_view_one, view_filter_clause="WHERE state_code = 'US_XX'", intermediate_table_name="intermediate_table", output_directory=GcsfsDirectoryPath.from_absolute_path( "gs://bucket1/staging/US_XX"), ) metric_view_two = MetricBigQueryViewBuilder( dataset_id="dataset", view_id="view2", view_query_template="select * from view2", dimensions=("d", "e", "f"), ).build() export_config_two = ExportBigQueryViewConfig( view=metric_view_two, view_filter_clause="WHERE state_code = 'US_XX'", intermediate_table_name="intermediate_table2", output_directory=GcsfsDirectoryPath.from_absolute_path( "gs://bucket2/US_XX"), ) export_config_two_staging = ExportBigQueryViewConfig( view=metric_view_two, view_filter_clause="WHERE state_code = 'US_XX'", intermediate_table_name="intermediate_table2", output_directory=GcsfsDirectoryPath.from_absolute_path( "gs://bucket2/staging/US_XX"), ) mock_fs = create_autospec(GCSFileSystem) delegate_one = create_autospec(BigQueryViewExporter) delegate_two = create_autospec(BigQueryViewExporter) delegate_one.export_and_validate.return_value = [ export_config_one_staging.output_path("json"), export_config_two_staging.output_path("json"), ] delegate_two.export_and_validate.return_value = [ export_config_one_staging.output_path("txt"), export_config_two_staging.output_path("txt"), ] delegate_one = create_autospec(BigQueryViewExporter) delegate_one_staging_paths = [ export_config_one_staging.output_path("json"), export_config_two_staging.output_path("json"), ] delegate_one.export_and_validate.return_value = delegate_one_staging_paths delegate_two = create_autospec(BigQueryViewExporter) delegate_two.export_and_validate.side_effect = ViewExportValidationError( "Validation failed") # Make the actual call with pytest.raises(ViewExportValidationError) as e: export_views_with_exporters( mock_fs, [export_config_one, export_config_two], { ExportOutputFormatType.JSON: delegate_one, ExportOutputFormatType.METRIC: delegate_two, }, ) self.assertIn("Validation failed", str(e.value))
def test_export_dashboard_data_to_cloud_storage_state_agnostic( self, mock_view_exporter: Mock, mock_view_update_manager_rematerialize: Mock) -> None: """Tests the table is created from the view and then extracted, where the export is not state-specific.""" state_agnostic_dataset_export_configs = { self.mock_export_name: ExportViewCollectionConfig( view_builders_to_export=self.view_builders_for_dataset, output_directory_uri_template= "gs://{project_id}-bucket-without-state-codes", export_name=self.mock_export_name, bq_view_namespace=self.mock_big_query_view_namespace, ), } self.mock_export_config.VIEW_COLLECTION_EXPORT_INDEX = ( state_agnostic_dataset_export_configs) view_export_manager.export_view_data_to_cloud_storage( export_job_name=self.mock_export_name, override_view_exporter=mock_view_exporter, ) view = self.mock_view_builder.build() metric_view = self.mock_metric_view_builder.build() view_export_configs = [ ExportBigQueryViewConfig( bq_view_namespace=self.mock_big_query_view_namespace, view=view, view_filter_clause=None, intermediate_table_name=f"{view.view_id}_table", output_directory=GcsfsDirectoryPath.from_absolute_path( "gs://{project_id}-bucket-without-state-codes".format( project_id=self.mock_project_id, )), export_output_formats=[ExportOutputFormatType.JSON], ), ExportBigQueryViewConfig( bq_view_namespace=self.mock_big_query_view_namespace, view=metric_view, view_filter_clause=None, intermediate_table_name=f"{view.view_id}_table", output_directory=GcsfsDirectoryPath.from_absolute_path( "gs://{project_id}-bucket-without-state-codes".format( project_id=self.mock_project_id, )), export_output_formats=[ ExportOutputFormatType.JSON, ExportOutputFormatType.METRIC, ], ), ] mock_view_update_manager_rematerialize.assert_called() mock_view_exporter.export_and_validate.assert_has_calls( [ mock.call([]), # CSV export mock.call([ view_export_configs[1].pointed_to_staging_subdirectory() ]), # JSON export mock.call([ conf.pointed_to_staging_subdirectory() for conf in view_export_configs ]), # METRIC export ], any_order=True, )
def test_export_happy_path(self) -> None: metric_view_one = MetricBigQueryViewBuilder( dataset_id="dataset", view_id="view1", view_query_template="select * from table", dimensions=("a", "b", "c"), ).build() export_config_one = ExportBigQueryViewConfig( view=metric_view_one, view_filter_clause="WHERE state_code = 'US_XX'", intermediate_table_name="intermediate_table", output_directory=GcsfsDirectoryPath.from_absolute_path( "gs://bucket1/US_XX"), ) export_config_one_staging = ExportBigQueryViewConfig( view=metric_view_one, view_filter_clause="WHERE state_code = 'US_XX'", intermediate_table_name="intermediate_table", output_directory=GcsfsDirectoryPath.from_absolute_path( "gs://bucket1/staging/US_XX"), ) metric_view_two = MetricBigQueryViewBuilder( dataset_id="dataset", view_id="view2", view_query_template="select * from view2", dimensions=("d", "e", "f"), ).build() export_config_two = ExportBigQueryViewConfig( view=metric_view_two, view_filter_clause="WHERE state_code = 'US_XX'", intermediate_table_name="intermediate_table2", output_directory=GcsfsDirectoryPath.from_absolute_path( "gs://bucket2/US_XX"), ) export_config_two_staging = ExportBigQueryViewConfig( view=metric_view_two, view_filter_clause="WHERE state_code = 'US_XX'", intermediate_table_name="intermediate_table2", output_directory=GcsfsDirectoryPath.from_absolute_path( "gs://bucket2/staging/US_XX"), ) mock_fs = create_autospec(GCSFileSystem) mock_fs.exists.return_value = True delegate_one = create_autospec(BigQueryViewExporter) delegate_one_staging_paths = [ export_config_one_staging.output_path("json"), export_config_two_staging.output_path("json"), ] delegate_one.export_and_validate.return_value = delegate_one_staging_paths delegate_two = create_autospec(BigQueryViewExporter) delegate_two_staging_paths = [ export_config_one_staging.output_path("txt"), export_config_two_staging.output_path("txt"), ] delegate_two.export_and_validate.return_value = delegate_two_staging_paths # Make the actual call export_views_with_exporters( mock_fs, [export_config_one, export_config_two], { ExportOutputFormatType.JSON: delegate_one, ExportOutputFormatType.METRIC: delegate_two, }, ) # Assert all mocks called as expected delegate_one.export_and_validate.assert_has_calls([ call([export_config_one_staging, export_config_two_staging]), ]) delegate_two.export_and_validate.assert_has_calls([ call([export_config_one_staging, export_config_two_staging]), ]) mock_fs.copy.assert_has_calls( [ call( GcsfsFilePath(bucket_name="bucket1", blob_name="staging/US_XX/view1.json"), GcsfsFilePath(bucket_name="bucket1", blob_name="US_XX/view1.json"), ), call( GcsfsFilePath(bucket_name="bucket2", blob_name="staging/US_XX/view2.json"), GcsfsFilePath(bucket_name="bucket2", blob_name="US_XX/view2.json"), ), call( GcsfsFilePath(bucket_name="bucket1", blob_name="staging/US_XX/view1.txt"), GcsfsFilePath(bucket_name="bucket1", blob_name="US_XX/view1.txt"), ), call( GcsfsFilePath(bucket_name="bucket2", blob_name="staging/US_XX/view2.txt"), GcsfsFilePath(bucket_name="bucket2", blob_name="US_XX/view2.txt"), ), ], any_order=True, ) mock_fs.delete.assert_has_calls( [ call( GcsfsFilePath(bucket_name="bucket1", blob_name="staging/US_XX/view1.json")), call( GcsfsFilePath(bucket_name="bucket2", blob_name="staging/US_XX/view2.json")), call( GcsfsFilePath(bucket_name="bucket1", blob_name="staging/US_XX/view1.txt")), call( GcsfsFilePath(bucket_name="bucket2", blob_name="staging/US_XX/view2.txt")), ], any_order=True, )
def test_export_happy_path(self) -> None: metric_view_one = MetricBigQueryViewBuilder( dataset_id='dataset', view_id='view1', view_query_template='select * from table', dimensions=['a', 'b', 'c'], ).build() export_config_one = ExportBigQueryViewConfig( view=metric_view_one, view_filter_clause='WHERE state_code = \'US_XX\'', intermediate_table_name='intermediate_table', output_directory=GcsfsDirectoryPath.from_absolute_path( 'gs://bucket1/US_XX'), ) export_config_one_staging = ExportBigQueryViewConfig( view=metric_view_one, view_filter_clause='WHERE state_code = \'US_XX\'', intermediate_table_name='intermediate_table', output_directory=GcsfsDirectoryPath.from_absolute_path( 'gs://bucket1/staging/US_XX'), ) metric_view_two = MetricBigQueryViewBuilder( dataset_id='dataset', view_id='view2', view_query_template='select * from view2', dimensions=['d', 'e', 'f'], ).build() export_config_two = ExportBigQueryViewConfig( view=metric_view_two, view_filter_clause='WHERE state_code = \'US_XX\'', intermediate_table_name='intermediate_table2', output_directory=GcsfsDirectoryPath.from_absolute_path( 'gs://bucket2/US_XX'), ) export_config_two_staging = ExportBigQueryViewConfig( view=metric_view_two, view_filter_clause='WHERE state_code = \'US_XX\'', intermediate_table_name='intermediate_table2', output_directory=GcsfsDirectoryPath.from_absolute_path( 'gs://bucket2/staging/US_XX'), ) mock_bq_client = create_autospec(BigQueryClient) mock_fs = create_autospec(GCSFileSystem) mock_fs.exists.return_value = True delegate_one = create_autospec(BigQueryViewExporter) delegate_one_staging_paths = [ export_config_one_staging.output_path('json'), export_config_two_staging.output_path('json') ] delegate_one.export_and_validate.return_value = delegate_one_staging_paths delegate_two = create_autospec(BigQueryViewExporter) delegate_two_staging_paths = [ export_config_one_staging.output_path('txt'), export_config_two_staging.output_path('txt') ] delegate_two.export_and_validate.return_value = delegate_two_staging_paths # Make the actual call exporter = CompositeBigQueryViewExporter(mock_bq_client, mock_fs, [delegate_one, delegate_two]) exporter.export_and_validate([export_config_one, export_config_two]) # Assert all mocks called as expected delegate_one.export_and_validate.assert_has_calls([ call([export_config_one_staging, export_config_two_staging]), ]) delegate_two.export_and_validate.assert_has_calls([ call([export_config_one_staging, export_config_two_staging]), ]) mock_fs.copy.assert_has_calls([ call( GcsfsFilePath(bucket_name='bucket1', blob_name='staging/US_XX/view1.json'), GcsfsFilePath(bucket_name='bucket1', blob_name='US_XX/view1.json')), call( GcsfsFilePath(bucket_name='bucket2', blob_name='staging/US_XX/view2.json'), GcsfsFilePath(bucket_name='bucket2', blob_name='US_XX/view2.json')), call( GcsfsFilePath(bucket_name='bucket1', blob_name='staging/US_XX/view1.txt'), GcsfsFilePath(bucket_name='bucket1', blob_name='US_XX/view1.txt')), call( GcsfsFilePath(bucket_name='bucket2', blob_name='staging/US_XX/view2.txt'), GcsfsFilePath(bucket_name='bucket2', blob_name='US_XX/view2.txt')) ]) mock_fs.delete.assert_has_calls([ call( GcsfsFilePath(bucket_name='bucket1', blob_name='staging/US_XX/view1.json')), call( GcsfsFilePath(bucket_name='bucket2', blob_name='staging/US_XX/view2.json')), call( GcsfsFilePath(bucket_name='bucket1', blob_name='staging/US_XX/view1.txt')), call( GcsfsFilePath(bucket_name='bucket2', blob_name='staging/US_XX/view2.txt')) ]) mock_fs.exists.assert_has_calls([ call( GcsfsFilePath(bucket_name='bucket1', blob_name='US_XX/view1.json')), call( GcsfsFilePath(bucket_name='bucket2', blob_name='US_XX/view2.json')), call( GcsfsFilePath(bucket_name='bucket1', blob_name='US_XX/view1.txt')), call( GcsfsFilePath(bucket_name='bucket2', blob_name='US_XX/view2.txt')), ])
def test_export_final_existence_validation_failed(self) -> None: metric_view_one = MetricBigQueryViewBuilder( dataset_id='dataset', view_id='view1', view_query_template='select * from table', dimensions=['a', 'b', 'c'], ).build() export_config_one = ExportBigQueryViewConfig( view=metric_view_one, view_filter_clause='WHERE state_code = \'US_XX\'', intermediate_table_name='intermediate_table', output_directory=GcsfsDirectoryPath.from_absolute_path( 'gs://bucket1/US_XX'), ) export_config_one_staging = ExportBigQueryViewConfig( view=metric_view_one, view_filter_clause='WHERE state_code = \'US_XX\'', intermediate_table_name='intermediate_table', output_directory=GcsfsDirectoryPath.from_absolute_path( 'gs://bucket1/staging/US_XX'), ) metric_view_two = MetricBigQueryViewBuilder( dataset_id='dataset', view_id='view2', view_query_template='select * from view2', dimensions=['d', 'e', 'f'], ).build() export_config_two = ExportBigQueryViewConfig( view=metric_view_two, view_filter_clause='WHERE state_code = \'US_XX\'', intermediate_table_name='intermediate_table2', output_directory=GcsfsDirectoryPath.from_absolute_path( 'gs://bucket2/US_XX'), ) export_config_two_staging = ExportBigQueryViewConfig( view=metric_view_two, view_filter_clause='WHERE state_code = \'US_XX\'', intermediate_table_name='intermediate_table2', output_directory=GcsfsDirectoryPath.from_absolute_path( 'gs://bucket2/staging/US_XX'), ) mock_bq_client = create_autospec(BigQueryClient) mock_fs = create_autospec(GCSFileSystem) # This should cause export_and_validate to raise a ValueError mock_fs.exists.return_value = False delegate_one = create_autospec(BigQueryViewExporter) delegate_one_staging_paths = [ export_config_one_staging.output_path('json'), export_config_two_staging.output_path('json') ] delegate_one.export_and_validate.return_value = delegate_one_staging_paths delegate_two = create_autospec(BigQueryViewExporter) delegate_two_staging_paths = [ export_config_one_staging.output_path('txt'), export_config_two_staging.output_path('txt') ] delegate_two.export_and_validate.return_value = delegate_two_staging_paths # Make the actual call exporter = CompositeBigQueryViewExporter(mock_bq_client, mock_fs, [delegate_one, delegate_two]) with pytest.raises(ViewExportValidationError) as e: exporter.export_and_validate( [export_config_one, export_config_two]) # We get an error at the very end of the export chain because even though delegate validations passed, the # final validation failed self.assertIn( 'Validation on path bucket1/US_XX/view1.json failed the metric file export. ' 'Stopping execution here.', str(e.value)) # The delegate exporters validations all passed so we still copy from staging to final delegate_one.export_and_validate.assert_has_calls([ call([export_config_one_staging, export_config_two_staging]), ]) delegate_two.export_and_validate.assert_has_calls([ call([export_config_one_staging, export_config_two_staging]), ]) mock_fs.copy.assert_has_calls([ call( GcsfsFilePath(bucket_name='bucket1', blob_name='staging/US_XX/view1.json'), GcsfsFilePath(bucket_name='bucket1', blob_name='US_XX/view1.json')), call( GcsfsFilePath(bucket_name='bucket2', blob_name='staging/US_XX/view2.json'), GcsfsFilePath(bucket_name='bucket2', blob_name='US_XX/view2.json')), call( GcsfsFilePath(bucket_name='bucket1', blob_name='staging/US_XX/view1.txt'), GcsfsFilePath(bucket_name='bucket1', blob_name='US_XX/view1.txt')), call( GcsfsFilePath(bucket_name='bucket2', blob_name='staging/US_XX/view2.txt'), GcsfsFilePath(bucket_name='bucket2', blob_name='US_XX/view2.txt')) ]) mock_fs.delete.assert_has_calls([ call( GcsfsFilePath(bucket_name='bucket1', blob_name='staging/US_XX/view1.json')), call( GcsfsFilePath(bucket_name='bucket2', blob_name='staging/US_XX/view2.json')), call( GcsfsFilePath(bucket_name='bucket1', blob_name='staging/US_XX/view1.txt')), call( GcsfsFilePath(bucket_name='bucket2', blob_name='staging/US_XX/view2.txt')) ]) # Only one call to the Exists validation made because the first one failed mock_fs.exists.assert_has_calls([ call( GcsfsFilePath(bucket_name='bucket1', blob_name='US_XX/view1.json')), ])
def test_export_staging_delegate_validation_failed(self) -> None: metric_view_one = MetricBigQueryViewBuilder( dataset_id='dataset', view_id='view1', view_query_template='select * from table', dimensions=['a', 'b', 'c'], ).build() export_config_one = ExportBigQueryViewConfig( view=metric_view_one, view_filter_clause='WHERE state_code = \'US_XX\'', intermediate_table_name='intermediate_table', output_directory=GcsfsDirectoryPath.from_absolute_path( 'gs://bucket1/US_XX'), ) export_config_one_staging = ExportBigQueryViewConfig( view=metric_view_one, view_filter_clause='WHERE state_code = \'US_XX\'', intermediate_table_name='intermediate_table', output_directory=GcsfsDirectoryPath.from_absolute_path( 'gs://bucket1/staging/US_XX'), ) metric_view_two = MetricBigQueryViewBuilder( dataset_id='dataset', view_id='view2', view_query_template='select * from view2', dimensions=['d', 'e', 'f'], ).build() export_config_two = ExportBigQueryViewConfig( view=metric_view_two, view_filter_clause='WHERE state_code = \'US_XX\'', intermediate_table_name='intermediate_table2', output_directory=GcsfsDirectoryPath.from_absolute_path( 'gs://bucket2/US_XX'), ) export_config_two_staging = ExportBigQueryViewConfig( view=metric_view_two, view_filter_clause='WHERE state_code = \'US_XX\'', intermediate_table_name='intermediate_table2', output_directory=GcsfsDirectoryPath.from_absolute_path( 'gs://bucket2/staging/US_XX'), ) mock_bq_client = create_autospec(BigQueryClient) mock_fs = create_autospec(GCSFileSystem) delegate_one = create_autospec(BigQueryViewExporter) delegate_two = create_autospec(BigQueryViewExporter) delegate_one.export_and_validate.return_value = [ export_config_one_staging.output_path('json'), export_config_two_staging.output_path('json') ] delegate_two.export_and_validate.return_value = [ export_config_one_staging.output_path('txt'), export_config_two_staging.output_path('txt') ] delegate_one = create_autospec(BigQueryViewExporter) delegate_one_staging_paths = [ export_config_one_staging.output_path('json'), export_config_two_staging.output_path('json') ] delegate_one.export_and_validate.return_value = delegate_one_staging_paths delegate_two = create_autospec(BigQueryViewExporter) delegate_two.export_and_validate.side_effect = ViewExportValidationError( 'Validation failed') # Make the actual call exporter = CompositeBigQueryViewExporter(mock_bq_client, mock_fs, [delegate_one, delegate_two]) with pytest.raises(ViewExportValidationError) as e: exporter.export_and_validate( [export_config_one, export_config_two]) self.assertIn('Validation failed', str(e.value))