def check_store_backend_store_backend_id_functionality( store_backend: StoreBackend, store_backend_id: str = None) -> None: """ Assertions to check if a store backend is handling reading and writing a store_backend_id appropriately. Args: store_backend: Instance of subclass of StoreBackend to test e.g. TupleFilesystemStoreBackend store_backend_id: Manually input store_backend_id Returns: None """ # Check that store_backend_id exists can be read assert store_backend.store_backend_id is not None store_error_uuid = "00000000-0000-0000-0000-00000000e003" assert store_backend.store_backend_id != store_error_uuid if store_backend_id: assert store_backend.store_backend_id == store_backend_id # Check that store_backend_id is a valid UUID assert test_utils.validate_uuid4(store_backend.store_backend_id) # Check in file stores that the actual file exists assert store_backend.has_key(key=(".ge_store_backend_id", )) # Check file stores for the file in the correct format store_backend_id_from_file = store_backend.get( key=(".ge_store_backend_id", )) store_backend_id_file_parser = "store_backend_id = " + pp.Word(pp.hexnums + "-") parsed_store_backend_id = store_backend_id_file_parser.parseString( store_backend_id_from_file) assert test_utils.validate_uuid4(parsed_store_backend_id[1])
def test_metric_store_store_backend_id(in_memory_param_store): """ What does this test and why? A Store should be able to report it's store_backend_id which is set when the StoreBackend is instantiated. """ # Check that store_backend_id exists can be read assert in_memory_param_store.store_backend_id is not None # Check that store_backend_id is a valid UUID assert test_utils.validate_uuid4(in_memory_param_store.store_backend_id)
def test_query_store_store_backend_id(basic_sqlalchemy_query_store): """ What does this test and why? A Store should be able to report it's store_backend_id which is set when the StoreBackend is instantiated. """ # Check that store_backend_id exists can be read assert basic_sqlalchemy_query_store.store_backend_id is not None # Check that store_backend_id is a valid UUID assert test_utils.validate_uuid4(basic_sqlalchemy_query_store.store_backend_id)
def test_ValidationsStore_with_DatabaseStoreBackend(sa): # Use sqlite so we don't require postgres for this test. connection_kwargs = {"drivername": "sqlite"} # First, demonstrate that we pick up default configuration my_store = ValidationsStore( store_backend={ "class_name": "DatabaseStoreBackend", "credentials": connection_kwargs, } ) with pytest.raises(TypeError): my_store.get("not_a_ValidationResultIdentifier") ns_1 = ValidationResultIdentifier( expectation_suite_identifier=ExpectationSuiteIdentifier( expectation_suite_name="asset.quarantine", ), run_id="20191007T151224.1234Z_prod_100", batch_identifier="batch_id", ) my_store.set(ns_1, ExpectationSuiteValidationResult(success=True)) assert my_store.get(ns_1) == ExpectationSuiteValidationResult( success=True, statistics={}, results=[] ) ns_2 = ValidationResultIdentifier( expectation_suite_identifier=ExpectationSuiteIdentifier( expectation_suite_name="asset.quarantine", ), run_id="20191007T151224.1234Z_prod_200", batch_identifier="batch_id", ) my_store.set(ns_2, ExpectationSuiteValidationResult(success=False)) assert my_store.get(ns_2) == ExpectationSuiteValidationResult( success=False, statistics={}, results=[] ) assert set(my_store.list_keys()) == { ns_1, ns_2, } """ What does this test and why? A Store should be able to report it's store_backend_id which is set when the StoreBackend is instantiated. """ # Check that store_backend_id exists can be read assert my_store.store_backend_id is not None # Check that store_backend_id is a valid UUID assert test_utils.validate_uuid4(my_store.store_backend_id)
def test_ValidationsStore_with_InMemoryStoreBackend(): my_store = ValidationsStore( store_backend={ "module_name": "great_expectations.data_context.store", "class_name": "InMemoryStoreBackend", } ) with pytest.raises(TypeError): my_store.get("not_a_ValidationResultIdentifier") ns_1 = ValidationResultIdentifier.from_tuple( ( "a", "b", "c", "quarantine", datetime.datetime.now(datetime.timezone.utc), "prod-100", ) ) my_store.set(ns_1, ExpectationSuiteValidationResult(success=True)) assert my_store.get(ns_1) == ExpectationSuiteValidationResult( success=True, statistics={}, results=[] ) ns_2 = ValidationResultIdentifier.from_tuple( ( "a", "b", "c", "quarantine", datetime.datetime.now(datetime.timezone.utc), "prod-200", ) ) my_store.set(ns_2, ExpectationSuiteValidationResult(success=False)) assert my_store.get(ns_2) == ExpectationSuiteValidationResult( success=False, statistics={}, results=[] ) assert set(my_store.list_keys()) == { ns_1, ns_2, } """ What does this test and why? A Store should be able to report it's store_backend_id which is set when the StoreBackend is instantiated. """ # Check that store_backend_id exists can be read assert my_store.store_backend_id is not None # Check that store_backend_id is a valid UUID assert test_utils.validate_uuid4(my_store.store_backend_id)
def test_expectations_store_report_store_backend_id_in_memory_store_backend(): """ What does this test and why? A Store should be able to report it's store_backend_id which is set when the StoreBackend is instantiated. """ in_memory_expectations_store = ExpectationsStore() # Check that store_backend_id exists can be read assert in_memory_expectations_store.store_backend_id is not None # Check that store_backend_id is a valid UUID assert test_utils.validate_uuid4( in_memory_expectations_store.store_backend_id)
def test_database_store_backend_id_initialization(caplog, sa, test_backends): """ What does this test and why? NOTE: This test only has one key column which may not mirror actual functionality A StoreBackend should have a store_backend_id property. That store_backend_id should be read and initialized from an existing persistent store_backend_id during instantiation, or a new store_backend_id should be generated and persisted. The store_backend_id should be a valid UUIDv4 If a new store_backend_id cannot be persisted, use an ephemeral store_backend_id. Persistence should be in a .ge_store_id file for for filesystem and blob-stores. Note: StoreBackend & TupleStoreBackend are abstract classes, so we will test the concrete classes that inherit from them. See also test_store_backends::test_StoreBackend_id_initialization """ if "postgresql" not in test_backends: pytest.skip("test_database_store_backend_id_initialization requires postgresql") store_backend = DatabaseStoreBackend( credentials={ "drivername": "postgresql", "username": "******", "password": "", "host": "localhost", "port": "5432", "database": "test_ci", }, table_name="test_database_store_backend_id_initialization", key_columns=["k1", "k2", "k3"], ) # Check that store_backend_id exists can be read assert store_backend.store_backend_id is not None # Check that store_backend_id is a valid UUID assert test_utils.validate_uuid4(store_backend.store_backend_id)
def test_database_store_backend_id_initialization(caplog, sa, test_backends): """ What does this test and why? NOTE: This test only has one key column which may not mirror actual functionality A StoreBackend should have a store_backend_id property. That store_backend_id should be read and initialized from an existing persistent store_backend_id during instantiation, or a new store_backend_id should be generated and persisted. The store_backend_id should be a valid UUIDv4 If a new store_backend_id cannot be persisted, use an ephemeral store_backend_id. Persistence should be in a .ge_store_id file for for filesystem and blob-stores. If an existing data_context_id is available in the great_expectations.yml, use this as the expectation_store id. If a store_backend_id is provided via manually_initialize_store_backend_id, make sure it is retrievable. Note: StoreBackend & TupleStoreBackend are abstract classes, so we will test the concrete classes that inherit from them. See also test_store_backends::test_StoreBackend_id_initialization """ if "postgresql" not in test_backends: pytest.skip("test_database_store_backend_id_initialization requires postgresql") store_backend = DatabaseStoreBackend( credentials={ "drivername": "postgresql", "username": "******", "password": "", "host": os.getenv("GE_TEST_LOCAL_DB_HOSTNAME", "localhost"), "port": "5432", "database": "test_ci", }, table_name="test_database_store_backend_id_initialization", key_columns=["k1", "k2", "k3"], ) # Check that store_backend_id exists can be read assert store_backend.store_backend_id is not None # Check that store_backend_id is a valid UUID assert test_utils.validate_uuid4(store_backend.store_backend_id) # Test that expectations store can be created with specified store_backend_id expectations_store_with_database_backend = instantiate_class_from_config( config={ "class_name": "ExpectationsStore", "store_backend": { "class_name": "DatabaseStoreBackend", "credentials": { "drivername": "postgresql", "username": "******", "password": "", "host": os.getenv("GE_TEST_LOCAL_DB_HOSTNAME", "localhost"), "port": "5432", "database": "test_ci", }, "manually_initialize_store_backend_id": "00000000-0000-0000-0000-000000aaaaaa", "table_name": "ge_expectations_store", "key_columns": {"expectation_suite_name"}, }, # "name": "postgres_expectations_store", }, runtime_environment=None, config_defaults={ "module_name": "great_expectations.data_context.store", "store_name": "postgres_expectations_store", }, ) assert ( expectations_store_with_database_backend.store_backend_id == "00000000-0000-0000-0000-000000aaaaaa" )
def test_ValidationsStore_with_TupleS3StoreBackend(): bucket = "test_validation_store_bucket" prefix = "test/prefix" # create a bucket in Moto's mock AWS environment conn = boto3.resource("s3", region_name="us-east-1") conn.create_bucket(Bucket=bucket) # First, demonstrate that we pick up default configuration including from an S3TupleS3StoreBackend my_store = ValidationsStore( store_backend={ "class_name": "TupleS3StoreBackend", "bucket": bucket, "prefix": prefix, } ) with pytest.raises(TypeError): my_store.get("not_a_ValidationResultIdentifier") ns_1 = ValidationResultIdentifier( expectation_suite_identifier=ExpectationSuiteIdentifier( expectation_suite_name="asset.quarantine", ), run_id="20191007T151224.1234Z_prod_100", batch_identifier="batch_id", ) my_store.set(ns_1, ExpectationSuiteValidationResult(success=True)) assert my_store.get(ns_1) == ExpectationSuiteValidationResult( success=True, statistics={}, results=[] ) ns_2 = ValidationResultIdentifier( expectation_suite_identifier=ExpectationSuiteIdentifier( expectation_suite_name="asset.quarantine", ), run_id="20191007T151224.1234Z_prod_200", batch_identifier="batch_id", ) my_store.set(ns_2, ExpectationSuiteValidationResult(success=False)) assert my_store.get(ns_2) == ExpectationSuiteValidationResult( success=False, statistics={}, results=[] ) # Verify that internals are working as expected, including the default filepath assert { s3_object_info["Key"] for s3_object_info in boto3.client("s3").list_objects_v2( Bucket=bucket, Prefix=prefix )["Contents"] } == { "test/prefix/.ge_store_backend_id", "test/prefix/asset/quarantine/20191007T151224.1234Z_prod_100/20190926T134241.000000Z/batch_id.json", "test/prefix/asset/quarantine/20191007T151224.1234Z_prod_200/20190926T134241.000000Z/batch_id.json", } print(my_store.list_keys()) assert set(my_store.list_keys()) == { ns_1, ns_2, } """ What does this test and why? A Store should be able to report it's store_backend_id which is set when the StoreBackend is instantiated. """ # Check that store_backend_id exists can be read assert my_store.store_backend_id is not None # Check that store_backend_id is a valid UUID assert test_utils.validate_uuid4(my_store.store_backend_id)
def test_ValidationsStore_with_TupleFileSystemStoreBackend(tmp_path_factory): path = str( tmp_path_factory.mktemp( "test_ValidationResultStore_with_TupleFileSystemStoreBackend__dir" ) ) project_path = str(tmp_path_factory.mktemp("my_dir")) my_store = ValidationsStore( store_backend={ "module_name": "great_expectations.data_context.store", "class_name": "TupleFilesystemStoreBackend", "base_directory": "my_store/", }, runtime_environment={"root_directory": path}, ) with pytest.raises(TypeError): my_store.get("not_a_ValidationResultIdentifier") ns_1 = ValidationResultIdentifier( expectation_suite_identifier=ExpectationSuiteIdentifier("asset.quarantine"), run_id="prod-100", batch_identifier="batch_id", ) my_store.set(ns_1, ExpectationSuiteValidationResult(success=True)) assert my_store.get(ns_1) == ExpectationSuiteValidationResult( success=True, statistics={}, results=[] ) ns_2 = ValidationResultIdentifier.from_tuple( ( "asset", "quarantine", "prod-20", datetime.datetime.now(datetime.timezone.utc), "batch_id", ) ) my_store.set(ns_2, ExpectationSuiteValidationResult(success=False)) assert my_store.get(ns_2) == ExpectationSuiteValidationResult( success=False, statistics={}, results=[] ) print(my_store.list_keys()) assert set(my_store.list_keys()) == { ns_1, ns_2, } print(gen_directory_tree_str(path)) assert ( gen_directory_tree_str(path) == """\ test_ValidationResultStore_with_TupleFileSystemStoreBackend__dir0/ my_store/ .ge_store_backend_id asset/ quarantine/ prod-100/ 20190926T134241.000000Z/ batch_id.json prod-20/ 20190926T134241.000000Z/ batch_id.json """ ) """ What does this test and why? A Store should be able to report it's store_backend_id which is set when the StoreBackend is instantiated. """ # Check that store_backend_id exists can be read assert my_store.store_backend_id is not None # Check that store_backend_id is a valid UUID assert test_utils.validate_uuid4(my_store.store_backend_id) # Check that another store with the same configuration shares the same store_backend_id my_store_duplicate = ValidationsStore( store_backend={ "module_name": "great_expectations.data_context.store", "class_name": "TupleFilesystemStoreBackend", "base_directory": "my_store/", }, runtime_environment={"root_directory": path}, ) assert my_store.store_backend_id == my_store_duplicate.store_backend_id