def test_dag_with_cycle_after_root(self) -> None: view_1 = BigQueryView( dataset_id="dataset_1", view_id="table_1", view_query_template="SELECT * FROM `{project_id}.source_dataset.source_table`", ) view_2 = BigQueryView( dataset_id="dataset_2", view_id="table_2", view_query_template=""" SELECT * FROM `{project_id}.dataset_1.table_1` JOIN `{project_id}.dataset_3.table_3` USING (col)""", ) view_3 = BigQueryView( dataset_id="dataset_3", view_id="table_3", view_query_template="SELECT * FROM `{project_id}.dataset_2.table_2`", ) with self.assertRaises(ValueError) as e: _ = BigQueryViewDagWalker([view_1, view_2, view_3]) self.assertEqual( str(e.exception), "Detected cycle in graph reachable from ('dataset_1', 'table_1'): " "[('dataset_2', 'table_2'), ('dataset_3', 'table_3')]", )
def test_parse_view_materialized_parent(self) -> None: view = BigQueryView( dataset_id="my_dataset", view_id="my_view_id", description="my view description", view_query_template="SELECT * FROM `{project_id}.some_dataset.some_table_materialized`", ) parent_view = BigQueryView( dataset_id="some_dataset", view_id="some_table", description="my parent view description", view_query_template="SELECT * FROM UNNEST([])", should_materialize=True, ) node = BigQueryViewDagNode(view) if not parent_view.materialized_address: raise ValueError("Null materialized_address for view [{parent_view}]") node.set_materialized_addresss( {parent_view.materialized_address: DagKey.for_view(parent_view)} ) self.assertEqual( node.parent_keys, { DagKey( view_address=BigQueryAddress( dataset_id="some_dataset", table_id="some_table" ) ) }, )
def test_dag_two_views_same_materialized_address(self) -> None: view_1 = BigQueryView( dataset_id="dataset_1", view_id="table_1", description="table_1 description", should_materialize=True, materialized_address_override=BigQueryAddress( dataset_id="other_dataset", table_id="other_table" ), view_query_template="SELECT * FROM `{project_id}.source_dataset.source_table`", ) view_2 = BigQueryView( dataset_id="dataset_2", view_id="table_2", description="table_2 description", should_materialize=True, materialized_address_override=BigQueryAddress( dataset_id="other_dataset", table_id="other_table" ), view_query_template="SELECT * FROM `{project_id}.source_dataset.source_table_2`", ) with self.assertRaises(ValueError) as e: _ = BigQueryViewDagWalker([view_1, view_2]) self.assertTrue( str(e.exception).startswith( "Found materialized view address for view [('dataset_2', 'table_2')] " "that matches materialized_address of another view: " "[('dataset_1', 'table_1')]." ) )
def test_union_dags_same_view_different_object(self) -> None: view = BigQueryView( dataset_id="dataset_1", view_id="table_1", description="table_1 description", should_materialize=True, materialized_address_override=BigQueryAddress( dataset_id="other_dataset_1", table_id="other_table_1" ), view_query_template="SELECT * FROM `{project_id}.source_dataset.source_table`", ) unioned_dag = BigQueryViewDagWalker.union_dags( BigQueryViewDagWalker([view]), BigQueryViewDagWalker( [ BigQueryView( dataset_id="dataset_1", view_id="table_1", description="table_1 description", should_materialize=True, materialized_address_override=BigQueryAddress( dataset_id="other_dataset_1", table_id="other_table_1" ), view_query_template="SELECT * FROM `{project_id}.source_dataset.source_table`", ) ] ), ) self.assertCountEqual([view], unioned_dag.views)
def test_samneness_check_validation_name(self) -> None: check = SamenessDataValidationCheck( validation_type=ValidationCheckType.SAMENESS, sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=["a", "b", "c"], view=BigQueryView( dataset_id="my_dataset", view_id="test_view", view_query_template="select * from literally_anything", ), ) self.assertEqual(check.validation_name, "test_view") check_with_name_suffix = SamenessDataValidationCheck( validation_type=ValidationCheckType.SAMENESS, sameness_check_type=SamenessDataValidationCheckType.NUMBERS, validation_name_suffix="b_c_only", comparison_columns=["b", "c"], view=BigQueryView( dataset_id="my_dataset", view_id="test_view", view_query_template="select * from literally_anything", ), ) self.assertEqual(check_with_name_suffix.validation_name, "test_view_b_c_only")
def test_copy_bq_views_raw_project_id( self, mock_table_exists: mock.MagicMock, mock_copy_view: mock.MagicMock) -> None: """Check that copy_view is called, even when the project_id is in the view_query_template.""" view_with_project_id = BigQueryView( project_id=self.mock_source_project_id, dataset_id=self.mock_source_dataset_id, view_id="test_view", description="test_view description", view_query_template= f"SELECT * FROM {self.mock_source_project_id}.other_dataset.table LIMIT 0", should_materialize=True, ) self.mock_client.list_tables.return_value = [view_with_project_id] self.mock_client.get_table.return_value = view_with_project_id mock_table_exists.side_effect = self.table_exists_side_effect copy_bq_views( source_project_id=self.mock_source_project_id, source_dataset_id=self.mock_source_dataset_id, destination_project_id=self.mock_destination_project_id, destination_dataset_id=self.mock_destination_dataset_id, ) expected_view = BigQueryView( project_id=self.mock_destination_project_id, dataset_id=self.mock_destination_dataset_id, view_id="test_view", description="test_view description", view_query_template= f"SELECT * FROM {self.mock_destination_project_id}.other_dataset.table LIMIT 0", should_materialize=True, ) expected_destination_dataset_ref = bigquery.DatasetReference( project=self.mock_destination_project_id, dataset_id=self.mock_destination_dataset_id, ) mock_copy_view.assert_called() self.assertEqual(expected_view, mock_copy_view.call_args_list[0][1].get("view")) self.assertEqual( self.mock_destination_project_id, mock_copy_view.call_args_list[0][1].get( "destination_client").project_id, ) self.assertEqual( expected_destination_dataset_ref, mock_copy_view.call_args_list[0][1].get("destination_dataset_ref"), )
def test_populate_node_family_full_parentage_complex_dependencies(self) -> None: view_1 = BigQueryView( dataset_id="dataset_1", view_id="table_1", description="table_1 description", view_query_template="SELECT * FROM `{project_id}.source_dataset.source_table`", ) view_2 = BigQueryView( dataset_id="dataset_2", view_id="table_2", description="table_2 description", view_query_template="SELECT * FROM `{project_id}.dataset_1.table_1`", ) view_3 = BigQueryView( dataset_id="dataset_3", view_id="table_3", description="table_3 description", view_query_template=""" SELECT * FROM `{project_id}.dataset_1.table_1` JOIN `{project_id}.dataset_2.table_2` USING (col)""", ) view_4 = BigQueryView( dataset_id="dataset_4", view_id="table_4", description="table_4 description", view_query_template=""" SELECT * FROM `{project_id}.dataset_2.table_2` JOIN `{project_id}.dataset_3.table_3` USING (col)""", ) dag_walker = BigQueryViewDagWalker([view_1, view_2, view_3, view_4]) start_node = dag_walker.node_for_view(view_4) dag_walker.populate_node_family_for_node( node=start_node, view_source_table_datasets={"source_dataset"} ) expected_parent_nodes = { DagKey( view_address=BigQueryAddress( dataset_id="source_dataset", table_id="source_table" ) ), DagKey.for_view(view_1), DagKey.for_view(view_2), DagKey.for_view(view_3), } self.assertEqual(expected_parent_nodes, start_node.node_family.full_parentage)
def test_dag_parents_materialized_non_default(self) -> None: self.maxDiff = None view_1 = BigQueryView( dataset_id="dataset_1", view_id="table_1", description="table_1 description", should_materialize=True, materialized_address_override=BigQueryAddress( dataset_id="other_dataset_1", table_id="other_table_1" ), view_query_template="SELECT * FROM `{project_id}.source_dataset.source_table`", ) view_2 = BigQueryView( dataset_id="dataset_2", view_id="table_2", description="table_2 description", should_materialize=True, materialized_address_override=BigQueryAddress( dataset_id="other_dataset_2", table_id="other_table_2" ), view_query_template="SELECT * FROM `{project_id}.source_dataset.source_table_2`", ) view_3 = BigQueryView( dataset_id="dataset_3", view_id="table_3", description="table_3 description", view_query_template=""" SELECT * FROM `{project_id}.dataset_1.table_1` JOIN `{project_id}.other_dataset_2.other_table_2` USING (col)""", ) walker = BigQueryViewDagWalker([view_1, view_2, view_3]) def process_simple( view: BigQueryView, parent_results: Dict[BigQueryView, DagKey] ) -> str: if view == view_3: # View 3 should have two parents self.assertEqual( {view_1: view_1.view_id, view_2: view_2.view_id}, parent_results ) return view.view_id result = walker.process_dag(process_simple) self.assertEqual( {view_1: view_1.view_id, view_2: view_2.view_id, view_3: view_3.view_id}, result, )
def test_copy_bq_views(self, mock_table_exists, mock_copy_view): """Check that copy_view is called when the view does not exist in the destination dataset.""" self.mock_client.list_tables.return_value = [self.mock_view] self.mock_client.get_table.return_value = self.mock_view mock_table_exists.side_effect = self.table_exists_side_effect copy_bq_views(source_project_id=self.mock_source_project_id, source_dataset_id=self.mock_source_dataset_id, destination_project_id=self.mock_destination_project_id, destination_dataset_id=self.mock_destination_dataset_id) expected_view = BigQueryView( project_id=self.mock_destination_project_id, dataset_id=self.mock_destination_dataset_id, view_id=self.mock_view.view_id, view_query_template=self.mock_view.view_query, should_materialize=True) expected_destination_dataset_ref = bigquery.DatasetReference( project=self.mock_destination_project_id, dataset_id=self.mock_destination_dataset_id) mock_copy_view.assert_called() self.assertEqual(expected_view, mock_copy_view.call_args_list[0][1].get('view')) self.assertEqual( self.mock_destination_project_id, mock_copy_view.call_args_list[0] [1].get('destination_client').project_id) self.assertEqual( expected_destination_dataset_ref, mock_copy_view.call_args_list[0][1].get('destination_dataset_ref'))
def test_string_sameness_check_different_values_above_margin(self) -> None: num_bad_rows = 5 max_allowed_error = (num_bad_rows - 1) / 100 # Below the number of bad rows self.mock_client.run_query_async.return_value = ( self.return_string_values_with_num_bad_rows(num_bad_rows) ) job = DataValidationJob( region_code="US_VA", validation=SamenessDataValidationCheck( validation_type=ValidationCheckType.SAMENESS, comparison_columns=["a", "b", "c"], sameness_check_type=SamenessDataValidationCheckType.STRINGS, max_allowed_error=max_allowed_error, view=BigQueryView( dataset_id="my_dataset", view_id="test_view", view_query_template="select * from literally_anything", ), ), ) result = SamenessValidationChecker.run_check(job) actual_expected_error = num_bad_rows / 100 self.assertEqual( result, DataValidationJobResult( validation_job=job, was_successful=False, failure_description=f"{num_bad_rows} out of 100 row(s) did not contain matching strings. " f"The acceptable margin of error is only {max_allowed_error}, but the " f"validation returned an error rate of {actual_expected_error}.", ), )
def test_string_sameness_check_different_values_handle_non_string_type( self, ) -> None: self.mock_client.run_query_async.return_value = [ {"a": "same", "b": "same", "c": 1245} ] job = DataValidationJob( region_code="US_VA", validation=SamenessDataValidationCheck( validation_type=ValidationCheckType.SAMENESS, comparison_columns=["a", "b", "c"], sameness_check_type=SamenessDataValidationCheckType.STRINGS, view=BigQueryView( dataset_id="my_dataset", view_id="test_view", view_query_template="select * from literally_anything", ), ), ) with self.assertRaises(ValueError) as e: _ = SamenessValidationChecker.run_check(job) self.assertEqual( str(e.exception), "Unexpected type [<class 'int'>] for value [1245] in STRING validation [test_view].", )
def test_string_sameness_check_numbers_values_all_none(self) -> None: self.mock_client.run_query_async.return_value = [ {"a": None, "b": None, "c": None} ] job = DataValidationJob( region_code="US_VA", validation=SamenessDataValidationCheck( validation_type=ValidationCheckType.SAMENESS, comparison_columns=["a", "b", "c"], sameness_check_type=SamenessDataValidationCheckType.NUMBERS, view=BigQueryView( dataset_id="my_dataset", view_id="test_view", view_query_template="select * from literally_anything", ), ), ) with self.assertRaises(ValueError) as e: _ = SamenessValidationChecker.run_check(job) self.assertEqual( str(e.exception), "Unexpected None value for column [a] in validation [test_view].", )
def test_sameness_check_numbers_multiple_rows_above_margin(self) -> None: self.mock_client.run_query_async.return_value = [ {"a": 97, "b": 100, "c": 99}, {"a": 14, "b": 21, "c": 14}, ] job = DataValidationJob( region_code="US_VA", validation=SamenessDataValidationCheck( validation_type=ValidationCheckType.SAMENESS, comparison_columns=["a", "b", "c"], sameness_check_type=SamenessDataValidationCheckType.NUMBERS, max_allowed_error=0.02, view=BigQueryView( dataset_id="my_dataset", view_id="test_view", view_query_template="select * from literally_anything", ), ), ) result = SamenessValidationChecker.run_check(job) self.assertEqual( result, DataValidationJobResult( validation_job=job, was_successful=False, failure_description="2 row(s) had unacceptable margins of error. The acceptable margin " "of error is only 0.02, but the validation returned rows with " "errors as high as 0.3333.", ), )
def main(*, source_project_id, source_dataset_id, destination_project_id, destination_dataset_id): """Copies all views from the source_project_id.source_dataset_id to the destination_project_id.destination_dataset_id.""" # Construct a BigQuery client with the source_project_id source_client = BigQueryClientImpl(project_id=source_project_id) # Construct a BigQuery client with the destination_project_id destination_client = BigQueryClientImpl(project_id=destination_project_id) destination_dataset = bigquery.DatasetReference(destination_project_id, destination_dataset_id) tables_in_source_dataset = source_client.list_tables(source_dataset_id) for table_ref in tables_in_source_dataset: table = source_client.get_table( source_client.dataset_ref_for_id(table_ref.dataset_id), table_ref.table_id) # Only copy this view if there is a view_query to replicate and the view doesn't already exist in the # destination dataset if table.view_query and not destination_client.table_exists( destination_dataset, table_id=table.table_id): # Retrieve all of the information about the view source_client.copy_view( view=BigQueryView(dataset_id=table_ref.dataset_id, view_id=table.table_id, view_query_template=table.view_query), destination_client=destination_client, destination_dataset_ref=destination_dataset)
def test_parse_view_multiple_parents(self) -> None: view = BigQueryView( dataset_id="my_dataset", view_id="my_view_id", description="my view description", view_query_template="""SELECT * FROM `{project_id}.some_dataset.some_table` LEFT OUTER JOIN `{project_id}.some_dataset.other_table` USING (some_col); """, ) node = BigQueryViewDagNode(view) node.set_materialized_addresss({}) self.assertEqual( node.parent_keys, { DagKey( view_address=BigQueryAddress( dataset_id="some_dataset", table_id="some_table" ) ), DagKey( view_address=BigQueryAddress( dataset_id="some_dataset", table_id="other_table" ) ), }, )
def test_existence_check_failures(self) -> None: self.mock_client.run_query_async.return_value = [ "some result row", "some other result row", ] job = DataValidationJob( region_code="US_VA", validation=ExistenceDataValidationCheck( validation_type=ValidationCheckType.EXISTENCE, view=BigQueryView( dataset_id="my_dataset", view_id="test_view", view_query_template="select * from literally_anything", ), ), ) result = ExistenceValidationChecker.run_check(job) self.assertEqual( result, DataValidationJobResult( validation_job=job, was_successful=False, failure_description= "Found [2] invalid rows, though [0] were expected", ), )
def setUp(self): project_id = 'fake-recidiviz-project' self.mock_dataset_name = 'base_dataset' self.mock_dataset = bigquery.dataset.DatasetReference( project_id, self.mock_dataset_name) self.metadata_patcher = mock.patch('recidiviz.utils.metadata.project_id') self.mock_project_id_fn = self.metadata_patcher.start() self.mock_project_id_fn.return_value = project_id self.client_patcher = mock.patch( 'recidiviz.calculator.query.state.dashboard_export_manager.BigQueryClientImpl') self.mock_client = self.client_patcher.start().return_value self.mock_client.dataset_ref_for_id.return_value = self.mock_dataset self.mock_view = BigQueryView(dataset_id=self.mock_dataset.dataset_id, view_id='test_view', view_query_template='SELECT NULL LIMIT 0') self.views_to_export = [self.mock_view] dashboard_export_config_values = { 'STATES_TO_EXPORT': ['US_CA'], 'VIEWS_TO_EXPORT': self.views_to_export, } self.dashboard_export_config_patcher = mock.patch( 'recidiviz.calculator.query.state.dashboard_export_manager.dashboard_export_config', **dashboard_export_config_values) self.mock_export_config = self.dashboard_export_config_patcher.start() self.views_to_update = {self.mock_dataset_name: self.views_to_export}
def setUp(self) -> None: self.location = 'US' self.mock_project_id = 'fake-recidiviz-project' self.mock_dataset_id = 'fake-dataset' self.mock_table_id = 'test_table' self.mock_dataset_ref = bigquery.dataset.DatasetReference( self.mock_project_id, self.mock_dataset_id) self.mock_table = self.mock_dataset_ref.table(self.mock_table_id) self.metadata_patcher = mock.patch( 'recidiviz.utils.metadata.project_id') self.mock_project_id_fn = self.metadata_patcher.start() self.mock_project_id_fn.return_value = self.mock_project_id self.client_patcher = mock.patch( 'recidiviz.big_query.big_query_client.client') self.mock_client = self.client_patcher.start().return_value self.mock_view = BigQueryView( dataset_id='dataset', view_id='test_view', view_query_template='SELECT NULL LIMIT 0', should_materialize=True) self.bq_client = BigQueryClientImpl()
def test_sameness_check_numbers_different_values_within_margin(self): self.mock_client.run_query_async.return_value = [{ 'a': 98, 'b': 100, 'c': 99 }] job = DataValidationJob( region_code='US_VA', validation=SamenessDataValidationCheck( validation_type=ValidationCheckType.SAMENESS, comparison_columns=['a', 'b', 'c'], sameness_check_type=SamenessDataValidationCheckType.NUMBERS, max_allowed_error=0.02, view=BigQueryView( dataset_id='my_dataset', view_id='test_view', view_query_template='select * from literally_anything'))) result = SamenessValidationChecker.run_check(job) self.assertEqual( result, DataValidationJobResult(validation_job=job, was_successful=True, failure_description=None))
def test_sameness_check_numbers_multiple_rows_above_margin(self): self.mock_client.run_query_async.return_value = [{ 'a': 97, 'b': 100, 'c': 99 }, { 'a': 14, 'b': 21, 'c': 14 }] job = DataValidationJob( region_code='US_VA', validation=SamenessDataValidationCheck( validation_type=ValidationCheckType.SAMENESS, comparison_columns=['a', 'b', 'c'], sameness_check_type=SamenessDataValidationCheckType.NUMBERS, max_allowed_error=0.02, view=BigQueryView( dataset_id='my_dataset', view_id='test_view', view_query_template='select * from literally_anything'))) result = SamenessValidationChecker.run_check(job) self.assertEqual( result, DataValidationJobResult( validation_job=job, was_successful=False, failure_description= '2 row(s) had unacceptable margins of error. The acceptable margin ' 'of error is only 0.02, but the validation returned rows with ' 'errors as high as 0.3333.', ))
def test_string_sameness_check_different_values_handle_empty_string(self): self.mock_client.run_query_async.return_value = [{ 'a': 'same', 'b': 'same', 'c': None }] job = DataValidationJob( region_code='US_VA', validation=SamenessDataValidationCheck( validation_type=ValidationCheckType.SAMENESS, comparison_columns=['a', 'b', 'c'], sameness_check_type=SamenessDataValidationCheckType.STRINGS, view=BigQueryView( dataset_id='my_dataset', view_id='test_view', view_query_template='select * from literally_anything'))) result = SamenessValidationChecker.run_check(job) self.assertEqual( result, DataValidationJobResult( validation_job=job, was_successful=False, failure_description= '1 out of 1 row(s) did not contain matching strings. ' 'The acceptable margin of error is only 0.0, but the ' 'validation returned an error rate of 1.0.', ))
def test_string_sameness_check_strings_values_all_none(self) -> None: self.mock_client.run_query_async.return_value = [ {"a": None, "b": None, "c": None} ] job = DataValidationJob( region_code="US_VA", validation=SamenessDataValidationCheck( validation_type=ValidationCheckType.SAMENESS, comparison_columns=["a", "b", "c"], sameness_check_type=SamenessDataValidationCheckType.STRINGS, view=BigQueryView( dataset_id="my_dataset", view_id="test_view", view_query_template="select * from literally_anything", ), ), ) result = SamenessValidationChecker.run_check(job) self.assertEqual( result, DataValidationJobResult( validation_job=job, was_successful=True, failure_description=None ), )
def test_string_sameness_check_different_values_handle_non_string_type( self): self.mock_client.run_query_async.return_value = [{ 'a': 'same', 'b': 'same', 'c': 1245 }] job = DataValidationJob( region_code='US_VA', validation=SamenessDataValidationCheck( validation_type=ValidationCheckType.SAMENESS, comparison_columns=['a', 'b', 'c'], sameness_check_type=SamenessDataValidationCheckType.STRINGS, view=BigQueryView( dataset_id='my_dataset', view_id='test_view', view_query_template='select * from literally_anything'))) with self.assertRaises(ValueError) as e: _ = SamenessValidationChecker.run_check(job) self.assertEqual( str(e.exception), 'Unexpected type [<class \'int\'>] for value [1245] in STRING validation [test_view].' )
def test_string_sameness_check_different_values_handle_empty_string(self) -> None: self.mock_client.run_query_async.return_value = [ {"a": "same", "b": "same", "c": None} ] job = DataValidationJob( region_code="US_VA", validation=SamenessDataValidationCheck( validation_type=ValidationCheckType.SAMENESS, comparison_columns=["a", "b", "c"], sameness_check_type=SamenessDataValidationCheckType.STRINGS, view=BigQueryView( dataset_id="my_dataset", view_id="test_view", view_query_template="select * from literally_anything", ), ), ) result = SamenessValidationChecker.run_check(job) self.assertEqual( result, DataValidationJobResult( validation_job=job, was_successful=False, failure_description="1 out of 1 row(s) did not contain matching strings. " "The acceptable margin of error is only 0.0, but the " "validation returned an error rate of 1.0.", ), )
def test_string_sameness_check_different_values_above_margin(self): num_bad_rows = 5 max_allowed_error = ( (num_bad_rows - 1) / 100) # Below the number of bad rows self.mock_client.run_query_async.return_value = self.return_string_values_with_num_bad_rows( num_bad_rows) job = DataValidationJob( region_code='US_VA', validation=SamenessDataValidationCheck( validation_type=ValidationCheckType.SAMENESS, comparison_columns=['a', 'b', 'c'], sameness_check_type=SamenessDataValidationCheckType.STRINGS, max_allowed_error=max_allowed_error, view=BigQueryView( dataset_id='my_dataset', view_id='test_view', view_query_template='select * from literally_anything'))) result = SamenessValidationChecker.run_check(job) actual_expected_error = (num_bad_rows / 100) self.assertEqual( result, DataValidationJobResult( validation_job=job, was_successful=False, failure_description= f'{num_bad_rows} out of 100 row(s) did not contain matching strings. ' f'The acceptable margin of error is only {max_allowed_error}, but the ' f'validation returned an error rate of {actual_expected_error}.', ))
def test_string_sameness_check_different_values_within_margin(self) -> None: num_bad_rows = 2 max_allowed_error = num_bad_rows / 100 self.mock_client.run_query_async.return_value = ( self.return_string_values_with_num_bad_rows(num_bad_rows) ) job = DataValidationJob( region_code="US_VA", validation=SamenessDataValidationCheck( validation_type=ValidationCheckType.SAMENESS, comparison_columns=["a", "b", "c"], sameness_check_type=SamenessDataValidationCheckType.STRINGS, max_allowed_error=max_allowed_error, view=BigQueryView( dataset_id="my_dataset", view_id="test_view", view_query_template="select * from literally_anything", ), ), ) result = SamenessValidationChecker.run_check(job) self.assertEqual( result, DataValidationJobResult( validation_job=job, was_successful=True, failure_description=None ), )
def test_existence_check_failures_below_threshold(self) -> None: self.mock_client.run_query_async.return_value = [ "some result row", "some other result row", ] job = DataValidationJob( region_code="US_VA", validation=ExistenceDataValidationCheck( validation_type=ValidationCheckType.EXISTENCE, view=BigQueryView( dataset_id="my_dataset", view_id="test_view", view_query_template="select * from literally_anything", ), num_allowed_rows=2, ), ) result = ExistenceValidationChecker.run_check(job) self.assertEqual( result, DataValidationJobResult(validation_job=job, was_successful=True, failure_description=None), )
def test_create_dataset_and_update_views(self): """Test that create_dataset_and_update_views creates a dataset if necessary, and updates all views.""" dataset = bigquery.dataset.DatasetReference(_PROJECT_ID, _DATASET_NAME) sample_views = [ { 'view_id': 'my_fake_view', 'view_query': 'SELECT NULL LIMIT 0' }, { 'view_id': 'my_other_fake_view', 'view_query': 'SELECT NULL LIMIT 0' }, ] mock_views = [ BigQueryView(dataset_id=_DATASET_NAME, view_query_template='a', **view) for view in sample_views ] self.mock_client.dataset_ref_for_id.return_value = dataset # pylint: disable=protected-access view_update_manager._create_dataset_and_update_views(mock_views) self.mock_client.dataset_ref_for_id.assert_called_with(_DATASET_NAME) self.mock_client.create_dataset_if_necessary.assert_called_with( dataset, None) self.mock_client.create_or_update_view.assert_has_calls( [mock.call(dataset, view) for view in mock_views])
def setUp(self) -> None: self.location = "US" self.mock_project_id = "fake-recidiviz-project" self.mock_dataset_id = "fake-dataset" self.mock_table_id = "test_table" self.mock_dataset_ref = bigquery.dataset.DatasetReference( self.mock_project_id, self.mock_dataset_id ) self.mock_table = self.mock_dataset_ref.table(self.mock_table_id) self.metadata_patcher = mock.patch("recidiviz.utils.metadata.project_id") self.mock_project_id_fn = self.metadata_patcher.start() self.mock_project_id_fn.return_value = self.mock_project_id self.client_patcher = mock.patch("recidiviz.big_query.big_query_client.client") self.mock_client = self.client_patcher.start().return_value self.mock_view = BigQueryView( dataset_id="dataset", view_id="test_view", view_query_template="SELECT NULL LIMIT 0", should_materialize=True, ) self.bq_client = BigQueryClientImpl()
def test_parse_view_materialized_parent(self) -> None: view = BigQueryView( dataset_id="my_dataset", view_id="my_view_id", view_query_template="SELECT * FROM `{project_id}.some_dataset.some_table_materialized`", ) node = BigQueryViewDagNode(view) self.assertEqual(node.parent_keys, {("some_dataset", "some_table")})