def test_resolve_dataplex_entity_uris(self, temp_configs_dir, test_dq_dataplex_client, test_bigquery_client, test_dataplex_metadata_defaults_configs, tmp_path): try: temp_dir = Path(tmp_path).joinpath("clouddq_test_configs_cache_2") temp_dir.mkdir(parents=True) with working_directory(temp_dir): configs_cache = lib.prepare_configs_cache(temp_configs_dir) count_1 = configs_cache._cache_db['entities'].count target_rule_binding_ids = [ row["id"].lower() for row in configs_cache._cache_db.query("select id from rule_bindings") ] configs_cache.resolve_dataplex_entity_uris( dataplex_client=test_dq_dataplex_client, bigquery_client=test_bigquery_client, default_configs=test_dataplex_metadata_defaults_configs, target_rule_binding_ids=target_rule_binding_ids ) count_2 = configs_cache._cache_db['entities'].count assert count_2 > count_1 finally: shutil.rmtree(temp_dir)
def test_cli_dry_run_invalid_configs_fail(self, runner, tmp_path, gcp_project_id, gcp_bq_region, gcp_bq_dataset): try: temp_dir = Path(tmp_path).joinpath( "clouddq_test_cli_dry_run_invalid_configs_fail") temp_dir.mkdir(parents=True) configs_invalid = Path("tests").joinpath( "resources", "configs_invalid.yml").absolute() with working_directory(temp_dir): args = [ "ALL", f"{configs_invalid}", f"--gcp_project_id={gcp_project_id}", f"--gcp_bq_dataset_id={gcp_bq_dataset}", f"--gcp_region_id={gcp_bq_region}", "--dry_run", "--debug", "--skip_sql_validation" ] result = runner.invoke(main, args) logger.info(result.output) assert result.exit_code == 1 error_message = ( "must have defined value 'rule_ids' of type 'list'.") assert error_message in result.output finally: shutil.rmtree(temp_dir)
def test_get_entities_configs_from_rule_bindings( self, test_default_dataplex_configs_cache, gcp_project_id, gcp_dataplex_bigquery_dataset_id, tmp_path): try: temp_dir = Path(tmp_path).joinpath("clouddq_test_configs_cache_3") temp_dir.mkdir(parents=True) with working_directory(temp_dir): rule_binding_ids = [ 'T5_URI_BQ_EMAIL_DUPLICATE', 'T7_URI_DP_EMAIL_DUPLICATE' ] output = test_default_dataplex_configs_cache.\ get_entities_configs_from_rule_bindings(rule_binding_ids) table_name = ( f'{gcp_project_id.replace("-","_")}__{gcp_dataplex_bigquery_dataset_id}' '__contact_details__VALUE_1' ) expected = { table_name: { 'rule_binding_ids_list': [ 'T5_URI_BQ_EMAIL_DUPLICATE', 'T7_URI_DP_EMAIL_DUPLICATE' ] } } assert expected == output finally: shutil.rmtree(temp_dir)
def test_cli(self, runner, temp_configs_dir, gcp_application_credentials, gcp_project_id, gcp_bq_dataset, gcp_bq_region, target_bq_result_dataset_name, target_bq_result_table_name, tmp_path): try: temp_dir = Path(tmp_path).joinpath( "clouddq_test_cli_integration_1") temp_dir.mkdir(parents=True) with working_directory(temp_dir): logger.info( f"Running test_cli_dbt_path with {gcp_project_id}, {gcp_bq_dataset}, {gcp_bq_region}" ) logger.info(f"test_cli_dbt_path {gcp_application_credentials}") target_table = f"{gcp_project_id}.{target_bq_result_dataset_name}.{target_bq_result_table_name}" args = [ "T1_DQ_1_VALUE_NOT_NULL,T2_DQ_1_EMAIL,T3_DQ_1_EMAIL_DUPLICATE", f"{temp_configs_dir}", f"--gcp_project_id={gcp_project_id}", f"--gcp_bq_dataset_id={gcp_bq_dataset}", f"--gcp_region_id={gcp_bq_region}", f"--target_bigquery_summary_table={target_table}", "--debug", "--summary_to_stdout", ] logger.info(f"Args: {' '.join(args)}") result = runner.invoke(main, args) print(result.output) assert result.exit_code == 0 finally: shutil.rmtree(temp_dir)
def run_dbt( dbt_path: Path, dbt_profile_dir: Path, configs: Optional[Dict] = None, environment: str = "clouddq", debug: bool = False, dry_run: bool = False, ) -> None: """ Args: dbt_path: Path: Path of dbt project described in `dbt_project.yml` dbt_profile_dir: str: configs: typing.Dict: environment: str: debug: bool: (Default value = False) dry_run: bool: (Default value = False) Returns: """ if not configs: configs = {} command = [] command.extend(["run"]) command += [ "--profiles-dir", str(dbt_profile_dir), "--vars", json.dumps(configs), "--target", environment, ] try: with working_directory(dbt_path): if debug: logger.debug("Using dbt working directory: %s", Path.cwd()) debug_commands = command.copy() debug_commands[0] = "debug" try: logger.info("\nExecuting dbt command:\n %s", debug_commands) dbt(debug_commands) except SystemExit: pass else: if not dry_run: logger.info("\nExecuting dbt command:\n %s", command) dbt(command) else: logger.info("\ndbt command generated as part of dry-run:\n %s", command) except SystemExit as sysexit: if sysexit.code == 0: logger.debug("dbt run completed successfully.") else: raise RuntimeError( "dbt run failed with Runtime Error. " "See Runtime Error description in dbt run logs for details. " ) except Exception as e: raise RuntimeError(f"dbt run failed with unknown error: '{e}'")
def test_cli_dry_run_oauth_configs(self, runner, temp_configs_dir, gcp_project_id, gcp_bq_region, gcp_bq_dataset, gcp_application_credentials, tmp_path): try: temp_dir = Path(tmp_path).joinpath( "clouddq_test_cli_integration_3") temp_dir.mkdir(parents=True) with working_directory(temp_dir): logger.info( f"test_cli_dry_run_oauth_configs {gcp_application_credentials}" ) args = [ "T1_DQ_1_VALUE_NOT_NULL,T2_DQ_1_EMAIL,T3_DQ_1_EMAIL_DUPLICATE", f"{temp_configs_dir}", f"--gcp_project_id={gcp_project_id}", f"--gcp_bq_dataset_id={gcp_bq_dataset}", f"--gcp_region_id={gcp_bq_region}", "--dry_run", "--debug", ] result = runner.invoke(main, args) print(result.output) assert result.exit_code == 0 finally: shutil.rmtree(temp_dir)
def test_cli_dry_run_oath_impersonation_fail( self, runner, temp_configs_dir, gcp_project_id, gcp_bq_region, gcp_bq_dataset, gcp_application_credentials, gcp_impersonation_credentials, tmp_path): try: temp_dir = Path(tmp_path).joinpath( "clouddq_test_cli_integration_8") temp_dir.mkdir(parents=True) with working_directory(temp_dir): logger.info( f"test_cli_dry_run_oath_impersonation {gcp_application_credentials}" ) args = [ "T1_DQ_1_VALUE_NOT_NULL,T2_DQ_1_EMAIL,T3_DQ_1_EMAIL_DUPLICATE", f"{temp_configs_dir}", f"--gcp_project_id={gcp_project_id}", f"--gcp_bq_dataset_id={gcp_bq_dataset}", f"--gcp_region_id={gcp_bq_region}", "--gcp_impersonation_credentials=non-existent-svc@non-existent-project.com", "--dry_run", "--debug", ] if not gcp_impersonation_credentials: pytest.skip( "Skipping tests involving service-account impersonation because " "test environment variable IMPERSONATION_SERVICE_ACCOUNT cannot be found." ) result = runner.invoke(main, args) print(result.output) assert result.exit_code == 1 assert isinstance(result.exception, SystemExit) finally: shutil.rmtree(temp_dir)
def test_cli_dry_run_sa_key_and_impersonation( self, runner, temp_configs_dir, gcp_project_id, gcp_bq_region, gcp_bq_dataset, gcp_sa_key, gcp_impersonation_credentials, gcp_application_credentials, tmp_path): try: temp_dir = Path(tmp_path).joinpath( "clouddq_test_cli_integration_6") temp_dir.mkdir(parents=True) with working_directory(temp_dir): logger.info( f"test_cli_dry_run_sa_key_and_impersonation {gcp_application_credentials}" ) args = [ "T1_DQ_1_VALUE_NOT_NULL,T2_DQ_1_EMAIL,T3_DQ_1_EMAIL_DUPLICATE", f"{temp_configs_dir}", f"--gcp_project_id={gcp_project_id}", f"--gcp_bq_dataset_id={gcp_bq_dataset}", f"--gcp_region_id={gcp_bq_region}", f"--gcp_service_account_key_path={gcp_sa_key}", f"--gcp_impersonation_credentials={gcp_impersonation_credentials}", "--dry_run", "--debug", ] if not gcp_sa_key: pytest.skip( "Skipping tests involving exported service-account key " "credentials because test environment variable GOOGLE_SDK_CREDENTIALS" " cannot be found.") result = runner.invoke(main, args) print(result.output) assert result.exit_code == 0 finally: shutil.rmtree(temp_dir)
def test_prepare_configs_cache(self, temp_configs_dir, tmp_path): try: temp_dir = Path(tmp_path).joinpath("clouddq_test_configs_cache_1") temp_dir.mkdir(parents=True) with working_directory(temp_dir): configs_cache = lib.prepare_configs_cache(temp_configs_dir) assert type(configs_cache) == DqConfigsCache assert configs_cache._cache_db["entities"].exists() assert configs_cache._cache_db["rules"].exists() assert configs_cache._cache_db["row_filters"].exists() assert configs_cache._cache_db["rule_bindings"].exists() finally: shutil.rmtree(temp_dir)
def test_cli_missing_connection_configs_fail(self, runner, tmp_path, source_configs_path): try: temp_dir = Path(tmp_path).joinpath("clouddq_test_cli_dry_run_1") temp_dir.mkdir(parents=True) with working_directory(temp_dir): args = [ "T1_DQ_1_VALUE_NOT_NULL,T2_DQ_1_EMAIL,T3_DQ_1_EMAIL_DUPLICATE", f"{source_configs_path}", "--dry_run", "--debug", "--skip_sql_validation" ] result = runner.invoke(main, args) logger.info(result.output) assert result.exit_code == 1 assert isinstance(result.exception, ValueError) finally: shutil.rmtree(temp_dir)
def test_default_dataplex_configs_cache( temp_configs_dir, test_dq_dataplex_client, test_dataplex_metadata_defaults_configs, tmp_path, test_bigquery_client): temp_path = Path(tmp_path).joinpath("clouddq_test_configs_cache") temp_path.mkdir() with working_directory(temp_path): configs_cache = prepare_configs_cache(configs_path=temp_configs_dir) target_rule_binding_ids = [ row["id"] for row in configs_cache._cache_db.query( "select id from rule_bindings") ] configs_cache.resolve_dataplex_entity_uris( dataplex_client=test_dq_dataplex_client, bigquery_client=test_bigquery_client, default_configs=test_dataplex_metadata_defaults_configs, target_rule_binding_ids=target_rule_binding_ids) yield configs_cache
def test_cli_dry_run(self, runner, tmp_path, source_configs_path, gcp_project_id, gcp_bq_region, gcp_bq_dataset): try: temp_dir = Path(tmp_path).joinpath("clouddq_test_cli_dry_run_2") temp_dir.mkdir(parents=True) with working_directory(temp_dir): args = [ "T1_DQ_1_VALUE_NOT_NULL,T2_DQ_1_EMAIL,T3_DQ_1_EMAIL_DUPLICATE", f"{source_configs_path}", f"--gcp_project_id={gcp_project_id}", f"--gcp_bq_dataset_id={gcp_bq_dataset}", f"--gcp_region_id={gcp_bq_region}", "--dry_run", "--debug", "--skip_sql_validation" ] result = runner.invoke(main, args) logger.info(result.output) assert result.exit_code == 0 finally: shutil.rmtree(temp_dir)
def test_cli_16_rb_10_rules( self, runner, tmp_path, gcp_project_id, gcp_dataplex_region, gcp_dataplex_lake_name, gcp_dataplex_zone_id, gcp_bq_dataset, target_bq_result_dataset_name, target_bq_result_table_name): try: temp_dir = Path(tmp_path).joinpath("clouddq_test_cli_16_rb_10_rules") temp_dir.mkdir(parents=True) source_configs = Path("tests").joinpath("resources", "configs_16_rb_10_rules.yml").absolute() # Prepare entity_uri configs configs_16_rb_10_rules = temp_dir.joinpath("configs_16_rb_10_rules.yml").absolute() target_table = f"{gcp_project_id}.{target_bq_result_dataset_name}.{target_bq_result_table_name}" with open(source_configs) as source_file: lines = source_file.read() with open(configs_16_rb_10_rules, "w") as target_file: lines = lines.replace("<my-gcp-dataplex-lake-id>", gcp_dataplex_lake_name) lines = lines.replace("<my-gcp-dataplex-region-id>", gcp_dataplex_region) lines = lines.replace("<my-gcp-project-id>", gcp_project_id) lines = lines.replace("<my-gcp-dataplex-zone-id>", gcp_dataplex_zone_id) target_file.write(lines) with working_directory(temp_dir): args = [ "ALL", f"{configs_16_rb_10_rules}", f"--gcp_project_id={gcp_project_id}", f"--gcp_bq_dataset_id={gcp_bq_dataset}", f"--target_bigquery_summary_table={target_table}", "--debug", ] logger.info(f"Args: {' '.join(args)}") result = runner.invoke(main, args) logger.info(result.output) assert result.exit_code == 0 finally: shutil.rmtree(temp_dir)
def test_cli_dry_run_100_rules(self, runner, tmp_path, gcp_project_id, gcp_bq_region, gcp_bq_dataset): try: temp_dir = Path(tmp_path).joinpath( "clouddq_test_cli_dry_run_100_rules") temp_dir.mkdir(parents=True) configs_100_rules = Path("tests").joinpath( "resources", "configs_100_rules.yml").absolute() with working_directory(temp_dir): args = [ "ALL", f"{configs_100_rules}", f"--gcp_project_id={gcp_project_id}", f"--gcp_bq_dataset_id={gcp_bq_dataset}", f"--gcp_region_id={gcp_bq_region}", "--dry_run", "--debug", "--skip_sql_validation" ] result = runner.invoke(main, args) logger.info(result.output) assert result.exit_code == 0 finally: shutil.rmtree(temp_dir)
def test_num_threads( self, runner, target_bq_result_dataset_name, target_bq_result_table_name, tmp_path, test_resources, ): try: temp_dir = Path(tmp_path).joinpath("cloud_dq_working_dir") temp_dir.mkdir(parents=True) with working_directory(temp_dir): intermediate_table_expiration_hours = 24 num_threads = 10 dbt_runner = DbtRunner( environment_target="Dev", gcp_project_id="<my-gcp-project-id>", gcp_region_id="<my-gcp-region-id>", gcp_bq_dataset_id="<my-bq-dataset-id>", gcp_service_account_key_path=None, gcp_impersonation_credentials=None, intermediate_table_expiration_hours= intermediate_table_expiration_hours, num_threads=num_threads, ) profiles_yml_actual = dbt_runner.connection_config.to_dbt_profiles_yml(environment_target="Dev")\ .strip() with open(test_resources / "expected_test_profiles.yml") as source_file: profiles_yml_expected = source_file.read().strip() profiles_yml_expected = profiles_yml_expected.replace( GOOGLE_LICENSE.strip(), "").strip() profiles_yml_expected = profiles_yml_expected.replace( "'", "") assert profiles_yml_actual == profiles_yml_expected finally: shutil.rmtree(temp_dir)
def test_dq_rule_binding_conflicted_column_id_is_escaped_for_sql_expr(self, temp_configs_dir, tmp_path): try: temp_dir = Path(tmp_path).joinpath("clouddq_test_configs_cache_2") temp_dir.mkdir(parents=True) with working_directory(temp_dir): configs_cache = lib.prepare_configs_cache(temp_configs_dir) finally: shutil.rmtree(temp_dir) dq_rule_binding_dict_with_conflicted_column_id = { "entity_id": "TEST_TABLE", "column_id": "data", "row_filter_id": "NONE", "rule_ids": ["REGEX_VALID_EMAIL"], "metadata": {"key": "value"} } output = DqRuleBinding.from_dict( rule_binding_id="valid", kwargs=dq_rule_binding_dict_with_conflicted_column_id ).resolve_all_configs_to_dict(configs_cache=configs_cache) assert output["rule_configs_dict"]["REGEX_VALID_EMAIL"]["rule_sql_expr"] == \ "REGEXP_CONTAINS( CAST( data.data AS STRING), '^[^@]+[@]{1}[^@]+$' )"
def test_target_table_dq_summary_duplicate(self, runner, temp_configs_dir, gcp_application_credentials, gcp_project_id, gcp_bq_dataset, gcp_bq_region, tmp_path): try: temp_dir = Path(tmp_path).joinpath( "clouddq_test_cli_integration_1") temp_dir.mkdir(parents=True) with working_directory(temp_dir): logger.info( f"Running test_cli_dbt_path with {gcp_project_id}, {gcp_bq_dataset}, {gcp_bq_region}" ) logger.info(f"test_cli_dbt_path {gcp_application_credentials}") target_table = f"{gcp_project_id}.{gcp_bq_dataset}.dq_summary" args = [ "T1_DQ_1_VALUE_NOT_NULL,T2_DQ_1_EMAIL,T3_DQ_1_EMAIL_DUPLICATE", f"{temp_configs_dir}", f"--gcp_project_id={gcp_project_id}", f"--gcp_bq_dataset_id={gcp_bq_dataset}", f"--gcp_region_id={gcp_bq_region}", f"--target_bigquery_summary_table={target_table}", "--debug", "--summary_to_stdout", ] logger.info(f"Args: {' '.join(args)}") result = runner.invoke(main, args) print(result.output) assert result.exit_code == 1 error_message = f"ValueError: The target bigquery summary table name `{target_table}` "\ f"cannot be same as dq summary table name `{target_table}` which "\ f"is reserved for storing the intermediate results used by clouddq "\ f"for further processing in case of incremental validation." assert error_message in result.output finally: shutil.rmtree(temp_dir)
def test_dq_rule_binding_conflicted_column_id_is_not_escaped_for_sql_statement(self, temp_configs_dir, tmp_path): try: temp_dir = Path(tmp_path).joinpath("clouddq_test_configs_cache_2") temp_dir.mkdir(parents=True) with working_directory(temp_dir): configs_cache = lib.prepare_configs_cache(temp_configs_dir) finally: shutil.rmtree(temp_dir) dq_rule_binding_dict_with_conflicted_column_id = { "entity_id": "TEST_TABLE", "column_id": "data", "row_filter_id": "NONE", "rule_ids": [{"NO_DUPLICATES_IN_COLUMN_GROUPS": {"column_names": "data"}}], "metadata": {"key": "value"} } output = DqRuleBinding.from_dict( rule_binding_id="valid", kwargs=dq_rule_binding_dict_with_conflicted_column_id ).resolve_all_configs_to_dict(configs_cache=configs_cache) text = output["rule_configs_dict"]["NO_DUPLICATES_IN_COLUMN_GROUPS"]["rule_sql_expr"] expected = """ |select a.* |from data a |inner join ( | select | data | from data | group by data | having count(*) > 1 |) duplicates |using (data)""" assert strip_margin(text.replace(r"\s\s+", " ")) == \ strip_margin(expected.replace(r"\s\s+", " "))
def test_last_modified_in_dq_summary( self, runner, temp_configs_dir, gcp_project_id, gcp_bq_region, gcp_bq_dataset, gcp_application_credentials, tmp_path, target_bq_result_dataset_name, target_bq_result_table_name, ): try: temp_dir = Path(tmp_path).joinpath( "clouddq_test_cli_integration_4") temp_dir.mkdir(parents=True) with working_directory(temp_dir): logger.info( f"test_last_modified_in_dq_summary {gcp_application_credentials}" ) target_table = f"{gcp_project_id}.{target_bq_result_dataset_name}.{target_bq_result_table_name}" args = [ "T1_DQ_1_VALUE_NOT_NULL,T2_DQ_1_EMAIL,T3_DQ_1_EMAIL_DUPLICATE", f"{temp_configs_dir}", f"--gcp_project_id={gcp_project_id}", f"--gcp_bq_dataset_id={gcp_bq_dataset}", f"--gcp_region_id={gcp_bq_region}", f"--target_bigquery_summary_table={target_table}", "--debug" ] result = runner.invoke(main, args) print(result.output) assert result.exit_code == 0 # Test the last modified column in the summary try: client = BigQueryClient() sql = f""" WITH last_mod AS ( SELECT project_id || '.' || dataset_id || '.' || table_id AS full_table_id, TIMESTAMP_MILLIS(last_modified_time) AS last_modified FROM `{gcp_project_id}.{gcp_bq_dataset}.__TABLES__` ) SELECT count(*) as errors FROM `{target_table}` d JOIN last_mod ON last_mod.full_table_id = d.table_id WHERE d.last_modified IS NOT NULL AND NOT d.last_modified = last_mod.last_modified """ query_job = client.execute_query(sql) results = query_job.result() logger.info("Query done") row = results.next() errors = row.errors logger.info(f"Got {errors} errors") assert errors == 0 except Exception as exc: logger.fatal(f'Exception in query: {exc}') assert False finally: client.close_connection() finally: shutil.rmtree(temp_dir)
def test_dq_rules( self, runner, temp_configs_from_dq_rules_config_file, gcp_application_credentials, gcp_project_id, gcp_bq_dataset, gcp_bq_region, target_bq_result_dataset_name, target_bq_result_table_name, tmp_path, client, gcp_impersonation_credentials, gcp_sa_key, create_expected_results_table, source_dq_rules_configs_file_path, test_resources, caplog, ): caplog.set_level(logging.INFO, logger="clouddq") try: temp_dir = Path(tmp_path).joinpath("clouddq_test_dq_rules") temp_dir.mkdir(parents=True) with working_directory(temp_dir): logger.info( f"test_last_modified_in_dq_summary {gcp_application_credentials}" ) target_table = f"{gcp_project_id}.{target_bq_result_dataset_name}.{target_bq_result_table_name}" args = [ "ALL", f"{temp_configs_from_dq_rules_config_file}", f"--gcp_project_id={gcp_project_id}", f"--gcp_bq_dataset_id={gcp_bq_dataset}", f"--gcp_region_id={gcp_bq_region}", f"--target_bigquery_summary_table={target_table}", ] result = runner.invoke(main, args) assert result.exit_code == 0 intermediate_table_expiration_hours = 24 num_threads = 8 # Prepare dbt runtime dbt_runner = DbtRunner( environment_target="Dev", gcp_project_id=gcp_project_id, gcp_region_id=gcp_bq_region, gcp_bq_dataset_id=gcp_bq_dataset, gcp_service_account_key_path=gcp_sa_key, gcp_impersonation_credentials=gcp_impersonation_credentials, intermediate_table_expiration_hours= intermediate_table_expiration_hours, num_threads=num_threads, ) dbt_path = dbt_runner.get_dbt_path() invocation_id = get_dbt_invocation_id(dbt_path) logger.info(f"Dbt invocation id is: {invocation_id}") # Test the DQ expected results sql = f""" WITH validation_errors AS ( SELECT rule_binding_id, rule_id, column_id, dimension, metadata_json_string, progress_watermark, rows_validated, complex_rule_validation_errors_count, complex_rule_validation_success_flag, success_count, failed_count, null_count FROM `{gcp_project_id}.{target_bq_result_dataset_name}.{target_bq_result_table_name}` WHERE invocation_id='{invocation_id}' EXCEPT DISTINCT SELECT rule_binding_id, rule_id, column_id, dimension, metadata_json_string, progress_watermark, rows_validated, complex_rule_validation_errors_count, complex_rule_validation_success_flag, success_count, failed_count, null_count FROM `{create_expected_results_table}` ) SELECT TO_JSON_STRING(validation_errors) FROM validation_errors; """ logger.info(f"SQL query is: {sql}") query_job = client.execute_query(sql) results = query_job.result() logger.info("Query done") rows = list(results) logger.info(f"Query execution returned {len(rows)} rows") if len(rows): logger.info( f"Input yaml from {source_dq_rules_configs_file_path}:" ) with open(source_dq_rules_configs_file_path) as input_yaml: lines = input_yaml.read() logger.info(lines) logger.warning( "Rows with values not matching the expected " "content in 'tests/resources/expected_results.json':") for row in rows: record = json.loads(str(row[0])) logger.warning(f"\n{pformat(record)}") failed_rows = [json.loads(row[0]) for row in rows] failed_rows_rule_binding_ids = [ row['rule_binding_id'] for row in failed_rows ] failed_rows_rule_ids = [row['rule_id'] for row in failed_rows] with open(test_resources / "dq_rules_expected_results.json", "rb") as source_file: expected_json = [] json_data = json.loads(source_file.read()) for record in json_data: if record[ 'rule_binding_id'] not in failed_rows_rule_binding_ids: continue if record['rule_id'] not in failed_rows_rule_ids: continue expected_json.append(record) assert failed_rows == expected_json finally: shutil.rmtree(temp_dir)
def test_configs_cache(source_configs_path, tmp_path): temp_path = Path(tmp_path).joinpath("clouddq_test_configs_cache") temp_path.mkdir() with working_directory(temp_path): configs_cache = prepare_configs_cache(configs_path=source_configs_path) yield configs_cache