Exemplo n.º 1
0
def test_ValidationsStore__convert_resource_identifier_to_list():

    my_store = ValidationsStore(
        store_backend={
            "module_name": "great_expectations.data_context.store",
            "class_name": "InMemoryStoreBackend",
        },
        root_directory=None,
    )

    ns_1 = ValidationResultIdentifier(
        from_string="ValidationResultIdentifier.a.b.c.quarantine.prod-100")
    assert my_store._convert_resource_identifier_to_tuple(ns_1) == (
        'a', 'b', 'c', 'quarantine', 'prod-100')
def test_StoreAction():
    fake_in_memory_store = ValidationsStore(root_directory=None,
                                            store_backend={
                                                "class_name":
                                                "InMemoryStoreBackend",
                                            })
    stores = {"fake_in_memory_store": fake_in_memory_store}

    # NOTE: This is a hack meant to last until we implement runtime_configs
    class Object(object):
        pass

    data_context = Object()
    data_context.stores = stores

    action = StoreAction(
        data_context=data_context,
        target_store_name="fake_in_memory_store",
    )
    assert fake_in_memory_store.list_keys() == []

    vr_id = "ValidationResultIdentifier.my_db.default_generator.my_table.default_expectations.prod_20190801"
    action.run(validation_result_suite_identifier=ValidationResultIdentifier(
        from_string=vr_id),
               validation_result_suite={},
               data_asset=None)

    assert len(fake_in_memory_store.list_keys()) == 1
    assert fake_in_memory_store.list_keys()[0].to_string(
    ) == "ValidationResultIdentifier.my_db.default_generator.my_table.default_expectations.prod_20190801"
    assert fake_in_memory_store.get(
        ValidationResultIdentifier(
            from_string=
            "ValidationResultIdentifier.my_db.default_generator.my_table.default_expectations.prod_20190801"
        )) == {}
def test_StoreAction():
    fake_in_memory_store = ValidationsStore(
        store_backend={
            "class_name": "InMemoryStoreBackend",
        }
    )
    stores = {"fake_in_memory_store": fake_in_memory_store}

    class Object:
        ge_cloud_mode = False

    data_context = Object()
    data_context.stores = stores

    action = StoreValidationResultAction(
        data_context=data_context,
        target_store_name="fake_in_memory_store",
    )
    assert fake_in_memory_store.list_keys() == []

    action.run(
        validation_result_suite_identifier=ValidationResultIdentifier(
            expectation_suite_identifier=ExpectationSuiteIdentifier(
                expectation_suite_name="default_expectations"
            ),
            run_id=RunIdentifier(run_name="prod_20190801"),
            batch_identifier="1234",
        ),
        validation_result_suite=ExpectationSuiteValidationResult(
            success=False, results=[]
        ),
        data_asset=None,
    )

    expected_run_id = RunIdentifier(
        run_name="prod_20190801", run_time="20190926T134241.000000Z"
    )

    assert len(fake_in_memory_store.list_keys()) == 1
    stored_identifier = fake_in_memory_store.list_keys()[0]
    assert stored_identifier.batch_identifier == "1234"
    assert (
        stored_identifier.expectation_suite_identifier.expectation_suite_name
        == "default_expectations"
    )
    assert stored_identifier.run_id == expected_run_id

    assert fake_in_memory_store.get(
        ValidationResultIdentifier(
            expectation_suite_identifier=ExpectationSuiteIdentifier(
                expectation_suite_name="default_expectations"
            ),
            run_id=expected_run_id,
            batch_identifier="1234",
        )
    ) == ExpectationSuiteValidationResult(success=False, results=[])
Exemplo n.º 4
0
def test_ValidationsStore_with_TupleFileSystemStoreBackend(tmp_path_factory):
    path = str(
        tmp_path_factory.mktemp(
            "test_ValidationResultStore_with_TupleFileSystemStoreBackend__dir")
    )
    project_path = str(tmp_path_factory.mktemp("my_dir"))

    my_store = ValidationsStore(
        store_backend={
            "module_name": "great_expectations.data_context.store",
            "class_name": "TupleFilesystemStoreBackend",
            "base_directory": "my_store/",
        },
        runtime_environment={"root_directory": path},
    )

    with pytest.raises(TypeError):
        my_store.get("not_a_ValidationResultIdentifier")

    ns_1 = ValidationResultIdentifier(
        expectation_suite_identifier=ExpectationSuiteIdentifier(
            "asset.quarantine"),
        run_id="prod-100",
        batch_identifier="batch_id",
    )
    my_store.set(ns_1, ExpectationSuiteValidationResult(success=True))
    assert my_store.get(ns_1) == ExpectationSuiteValidationResult(
        success=True, statistics={}, results=[])

    ns_2 = ValidationResultIdentifier.from_tuple(
        ("asset", "quarantine", "prod-20", "batch_id"))
    my_store.set(ns_2, ExpectationSuiteValidationResult(success=False))
    assert my_store.get(ns_2) == ExpectationSuiteValidationResult(
        success=False, statistics={}, results=[])

    print(my_store.list_keys())
    assert set(my_store.list_keys()) == {
        ns_1,
        ns_2,
    }

    print(gen_directory_tree_str(path))
    assert (gen_directory_tree_str(path) == """\
test_ValidationResultStore_with_TupleFileSystemStoreBackend__dir0/
    my_store/
        asset/
            quarantine/
                prod-100/
                    batch_id.json
                prod-20/
                    batch_id.json
""")
Exemplo n.º 5
0
def test_ValidationsStore_with_InMemoryStoreBackend():
    my_store = ValidationsStore(
        store_backend={
            "module_name": "great_expectations.data_context.store",
            "class_name": "InMemoryStoreBackend",
        })

    with pytest.raises(TypeError):
        my_store.get("not_a_ValidationResultIdentifier")

    ns_1 = ValidationResultIdentifier.from_tuple(
        ("a", "b", "c", "quarantine", "prod-100"))
    my_store.set(ns_1, ExpectationSuiteValidationResult(success=True))
    assert my_store.get(ns_1) == ExpectationSuiteValidationResult(
        success=True, statistics={}, results=[])

    ns_2 = ValidationResultIdentifier.from_tuple(
        ("a", "b", "c", "quarantine", "prod-200"))
    my_store.set(ns_2, ExpectationSuiteValidationResult(success=False))
    assert my_store.get(ns_2) == ExpectationSuiteValidationResult(
        success=False, statistics={}, results=[])

    assert set(my_store.list_keys()) == {
        ns_1,
        ns_2,
    }
Exemplo n.º 6
0
def test_ValidationsStore_with_TupleS3StoreBackend():
    bucket = "test_validation_store_bucket"
    prefix = "test/prefix"

    # create a bucket in Moto's mock AWS environment
    conn = boto3.resource("s3", region_name="us-east-1")
    conn.create_bucket(Bucket=bucket)

    # First, demonstrate that we pick up default configuration including from an S3TupleS3StoreBackend
    my_store = ValidationsStore(store_backend={
        "class_name": "TupleS3StoreBackend",
        "bucket": bucket,
        "prefix": prefix,
    })

    with pytest.raises(TypeError):
        my_store.get("not_a_ValidationResultIdentifier")

    ns_1 = ValidationResultIdentifier(
        expectation_suite_identifier=ExpectationSuiteIdentifier(
            expectation_suite_name="asset.quarantine", ),
        run_id="20191007T151224.1234Z_prod_100",
        batch_identifier="batch_id",
    )
    my_store.set(ns_1, ExpectationSuiteValidationResult(success=True))
    assert my_store.get(ns_1) == ExpectationSuiteValidationResult(
        success=True, statistics={}, results=[])

    ns_2 = ValidationResultIdentifier(
        expectation_suite_identifier=ExpectationSuiteIdentifier(
            expectation_suite_name="asset.quarantine", ),
        run_id="20191007T151224.1234Z_prod_200",
        batch_identifier="batch_id",
    )

    my_store.set(ns_2, ExpectationSuiteValidationResult(success=False))
    assert my_store.get(ns_2) == ExpectationSuiteValidationResult(
        success=False, statistics={}, results=[])

    # Verify that internals are working as expected, including the default filepath
    assert set([
        s3_object_info["Key"] for s3_object_info in boto3.client(
            "s3").list_objects(Bucket=bucket, Prefix=prefix)["Contents"]
    ]) == {
        "test/prefix/asset/quarantine/20191007T151224.1234Z_prod_100/batch_id.json",
        "test/prefix/asset/quarantine/20191007T151224.1234Z_prod_200/batch_id.json",
    }

    print(my_store.list_keys())
    assert set(my_store.list_keys()) == {
        ns_1,
        ns_2,
    }
Exemplo n.º 7
0
def test_ValidationsStore_with_DatabaseStoreBackend():
    # Use sqlite so we don't require postgres for this test.
    connection_kwargs = {"drivername": "sqlite"}

    # First, demonstrate that we pick up default configuration
    my_store = ValidationsStore(store_backend={
        "class_name": "DatabaseStoreBackend",
        "credentials": connection_kwargs,
    })

    with pytest.raises(TypeError):
        my_store.get("not_a_ValidationResultIdentifier")

    ns_1 = ValidationResultIdentifier(
        expectation_suite_identifier=ExpectationSuiteIdentifier(
            expectation_suite_name="asset.quarantine", ),
        run_id="20191007T151224.1234Z_prod_100",
        batch_identifier="batch_id",
    )
    my_store.set(ns_1, ExpectationSuiteValidationResult(success=True))
    assert my_store.get(ns_1) == ExpectationSuiteValidationResult(
        success=True, statistics={}, results=[])

    ns_2 = ValidationResultIdentifier(
        expectation_suite_identifier=ExpectationSuiteIdentifier(
            expectation_suite_name="asset.quarantine", ),
        run_id="20191007T151224.1234Z_prod_200",
        batch_identifier="batch_id",
    )

    my_store.set(ns_2, ExpectationSuiteValidationResult(success=False))
    assert my_store.get(ns_2) == ExpectationSuiteValidationResult(
        success=False, statistics={}, results=[])

    assert set(my_store.list_keys()) == {
        ns_1,
        ns_2,
    }
def test_ValidationsStore_with_InMemoryStoreBackend():
    my_store = ValidationsStore(
        store_backend={
            "module_name": "great_expectations.data_context.store",
            "class_name": "InMemoryStoreBackend",
        }
    )

    with pytest.raises(TypeError):
        my_store.get("not_a_ValidationResultIdentifier")

    ns_1 = ValidationResultIdentifier.from_tuple(
        (
            "a",
            "b",
            "c",
            "quarantine",
            datetime.datetime.now(datetime.timezone.utc),
            "prod-100",
        )
    )
    my_store.set(ns_1, ExpectationSuiteValidationResult(success=True))
    assert my_store.get(ns_1) == ExpectationSuiteValidationResult(
        success=True, statistics={}, results=[]
    )

    ns_2 = ValidationResultIdentifier.from_tuple(
        (
            "a",
            "b",
            "c",
            "quarantine",
            datetime.datetime.now(datetime.timezone.utc),
            "prod-200",
        )
    )
    my_store.set(ns_2, ExpectationSuiteValidationResult(success=False))
    assert my_store.get(ns_2) == ExpectationSuiteValidationResult(
        success=False, statistics={}, results=[]
    )
    assert set(my_store.list_keys()) == {
        ns_1,
        ns_2,
    }
    """
    What does this test and why?
    A Store should be able to report it's store_backend_id
    which is set when the StoreBackend is instantiated.
    """
    # Check that store_backend_id exists can be read
    assert my_store.store_backend_id is not None
    # Check that store_backend_id is a valid UUID
    assert test_utils.validate_uuid4(my_store.store_backend_id)
def test_ValidationsStore_with_DatabaseStoreBackend(sa):
    # Use sqlite so we don't require postgres for this test.
    connection_kwargs = {"drivername": "sqlite"}

    # First, demonstrate that we pick up default configuration
    my_store = ValidationsStore(
        store_backend={
            "class_name": "DatabaseStoreBackend",
            "credentials": connection_kwargs,
        }
    )

    with pytest.raises(TypeError):
        my_store.get("not_a_ValidationResultIdentifier")

    ns_1 = ValidationResultIdentifier(
        expectation_suite_identifier=ExpectationSuiteIdentifier(
            expectation_suite_name="asset.quarantine",
        ),
        run_id="20191007T151224.1234Z_prod_100",
        batch_identifier="batch_id",
    )
    my_store.set(ns_1, ExpectationSuiteValidationResult(success=True))
    assert my_store.get(ns_1) == ExpectationSuiteValidationResult(
        success=True, statistics={}, results=[]
    )

    ns_2 = ValidationResultIdentifier(
        expectation_suite_identifier=ExpectationSuiteIdentifier(
            expectation_suite_name="asset.quarantine",
        ),
        run_id="20191007T151224.1234Z_prod_200",
        batch_identifier="batch_id",
    )

    my_store.set(ns_2, ExpectationSuiteValidationResult(success=False))
    assert my_store.get(ns_2) == ExpectationSuiteValidationResult(
        success=False, statistics={}, results=[]
    )

    assert set(my_store.list_keys()) == {
        ns_1,
        ns_2,
    }

    """
    What does this test and why?
    A Store should be able to report it's store_backend_id
    which is set when the StoreBackend is instantiated.
    """
    # Check that store_backend_id exists can be read
    assert my_store.store_backend_id is not None
    # Check that store_backend_id is a valid UUID
    assert test_utils.validate_uuid4(my_store.store_backend_id)
def test_ValidationsStore_with_TupleS3StoreBackend():
    bucket = "test_validation_store_bucket"
    prefix = "test/prefix"

    # create a bucket in Moto's mock AWS environment
    conn = boto3.resource("s3", region_name="us-east-1")
    conn.create_bucket(Bucket=bucket)

    # First, demonstrate that we pick up default configuration including from an S3TupleS3StoreBackend
    my_store = ValidationsStore(
        store_backend={
            "class_name": "TupleS3StoreBackend",
            "bucket": bucket,
            "prefix": prefix,
        }
    )

    with pytest.raises(TypeError):
        my_store.get("not_a_ValidationResultIdentifier")

    ns_1 = ValidationResultIdentifier(
        expectation_suite_identifier=ExpectationSuiteIdentifier(
            expectation_suite_name="asset.quarantine",
        ),
        run_id="20191007T151224.1234Z_prod_100",
        batch_identifier="batch_id",
    )
    my_store.set(ns_1, ExpectationSuiteValidationResult(success=True))
    assert my_store.get(ns_1) == ExpectationSuiteValidationResult(
        success=True, statistics={}, results=[]
    )

    ns_2 = ValidationResultIdentifier(
        expectation_suite_identifier=ExpectationSuiteIdentifier(
            expectation_suite_name="asset.quarantine",
        ),
        run_id="20191007T151224.1234Z_prod_200",
        batch_identifier="batch_id",
    )

    my_store.set(ns_2, ExpectationSuiteValidationResult(success=False))
    assert my_store.get(ns_2) == ExpectationSuiteValidationResult(
        success=False, statistics={}, results=[]
    )

    # Verify that internals are working as expected, including the default filepath
    assert {
        s3_object_info["Key"]
        for s3_object_info in boto3.client("s3").list_objects_v2(
            Bucket=bucket, Prefix=prefix
        )["Contents"]
    } == {
        "test/prefix/.ge_store_backend_id",
        "test/prefix/asset/quarantine/20191007T151224.1234Z_prod_100/20190926T134241.000000Z/batch_id.json",
        "test/prefix/asset/quarantine/20191007T151224.1234Z_prod_200/20190926T134241.000000Z/batch_id.json",
    }

    print(my_store.list_keys())
    assert set(my_store.list_keys()) == {
        ns_1,
        ns_2,
    }

    """
    What does this test and why?
    A Store should be able to report it's store_backend_id
    which is set when the StoreBackend is instantiated.
    """
    # Check that store_backend_id exists can be read
    assert my_store.store_backend_id is not None
    # Check that store_backend_id is a valid UUID
    assert test_utils.validate_uuid4(my_store.store_backend_id)
def test_ValidationsStore_with_TupleFileSystemStoreBackend(tmp_path_factory):
    path = str(
        tmp_path_factory.mktemp(
            "test_ValidationResultStore_with_TupleFileSystemStoreBackend__dir"
        )
    )
    project_path = str(tmp_path_factory.mktemp("my_dir"))

    my_store = ValidationsStore(
        store_backend={
            "module_name": "great_expectations.data_context.store",
            "class_name": "TupleFilesystemStoreBackend",
            "base_directory": "my_store/",
        },
        runtime_environment={"root_directory": path},
    )

    with pytest.raises(TypeError):
        my_store.get("not_a_ValidationResultIdentifier")

    ns_1 = ValidationResultIdentifier(
        expectation_suite_identifier=ExpectationSuiteIdentifier("asset.quarantine"),
        run_id="prod-100",
        batch_identifier="batch_id",
    )
    my_store.set(ns_1, ExpectationSuiteValidationResult(success=True))
    assert my_store.get(ns_1) == ExpectationSuiteValidationResult(
        success=True, statistics={}, results=[]
    )

    ns_2 = ValidationResultIdentifier.from_tuple(
        (
            "asset",
            "quarantine",
            "prod-20",
            datetime.datetime.now(datetime.timezone.utc),
            "batch_id",
        )
    )
    my_store.set(ns_2, ExpectationSuiteValidationResult(success=False))
    assert my_store.get(ns_2) == ExpectationSuiteValidationResult(
        success=False, statistics={}, results=[]
    )

    print(my_store.list_keys())
    assert set(my_store.list_keys()) == {
        ns_1,
        ns_2,
    }

    print(gen_directory_tree_str(path))
    assert (
        gen_directory_tree_str(path)
        == """\
test_ValidationResultStore_with_TupleFileSystemStoreBackend__dir0/
    my_store/
        .ge_store_backend_id
        asset/
            quarantine/
                prod-100/
                    20190926T134241.000000Z/
                        batch_id.json
                prod-20/
                    20190926T134241.000000Z/
                        batch_id.json
"""
    )

    """
    What does this test and why?
    A Store should be able to report it's store_backend_id
    which is set when the StoreBackend is instantiated.
    """
    # Check that store_backend_id exists can be read
    assert my_store.store_backend_id is not None
    # Check that store_backend_id is a valid UUID
    assert test_utils.validate_uuid4(my_store.store_backend_id)

    # Check that another store with the same configuration shares the same store_backend_id
    my_store_duplicate = ValidationsStore(
        store_backend={
            "module_name": "great_expectations.data_context.store",
            "class_name": "TupleFilesystemStoreBackend",
            "base_directory": "my_store/",
        },
        runtime_environment={"root_directory": path},
    )
    assert my_store.store_backend_id == my_store_duplicate.store_backend_id
Exemplo n.º 12
0
def test_ValidationsStore_with_InMemoryStoreBackend():

    my_store = ValidationsStore(
        store_backend={
            "module_name": "great_expectations.data_context.store",
            "class_name": "InMemoryStoreBackend",
            "separator": ".",
        },
        root_directory=None,  #"dummy/path/",
    )

    with pytest.raises(TypeError):
        my_store.get("not_a_ValidationResultIdentifier")

    with pytest.raises(MissingTopLevelConfigKeyError):
        my_store.get(ValidationResultIdentifier(**{}))

    ns_1 = ValidationResultIdentifier(
        from_string="ValidationResultIdentifier.a.b.c.quarantine.prod-100")
    my_store.set(ns_1, {"A": "aaa"})
    assert my_store.get(ns_1) == {"A": "aaa"}

    ns_2 = ValidationResultIdentifier(
        from_string="ValidationResultIdentifier.a.b.c.quarantine.prod-200")
    my_store.set(ns_2, "bbb")
    assert my_store.get(ns_2) == "bbb"

    # Verify that internals are working as expected
    assert my_store.store_backend.store == {
        'a.b.c.quarantine.prod-100': '{"A": "aaa"}',
        'a.b.c.quarantine.prod-200': '"bbb"',
    }

    print(my_store.list_keys())
    assert set(my_store.list_keys()) == {
        ns_1,
        ns_2,
    }
Exemplo n.º 13
0
def test_ValidationsStore_with_FixedLengthTupleS3StoreBackend():
    bucket = "test_validation_store_bucket"
    prefix = "test/prefix"

    # create a bucket in Moto's mock AWS environment
    conn = boto3.resource('s3', region_name='us-east-1')
    conn.create_bucket(Bucket=bucket)

    # First, demonstrate that we pick up default configuration including from an S3FixedLengthTupleS3StoreBackend
    my_store = ValidationsStore(store_backend={
        "class_name": "FixedLengthTupleS3StoreBackend",
        "bucket": bucket,
        "prefix": prefix
    },
                                root_directory=None)

    with pytest.raises(TypeError):
        my_store.get("not_a_ValidationResultIdentifier")

    with pytest.raises(MissingTopLevelConfigKeyError):
        my_store.get(ValidationResultIdentifier(**{}))

    ns_1 = ValidationResultIdentifier(
        expectation_suite_identifier=ExpectationSuiteIdentifier(
            data_asset_name=DataAssetIdentifier(datasource="a",
                                                generator="b",
                                                generator_asset="c"),
            expectation_suite_name="quarantine",
        ),
        run_id="20191007T151224.1234Z_prod_100")
    my_store.set(ns_1, {"A": "aaa"})
    assert my_store.get(ns_1) == {"A": "aaa"}

    ns_2 = ValidationResultIdentifier(
        expectation_suite_identifier=ExpectationSuiteIdentifier(
            data_asset_name=DataAssetIdentifier(datasource="a",
                                                generator="b",
                                                generator_asset="c"),
            expectation_suite_name="quarantine",
        ),
        run_id="20191007T151224.1234Z_prod_200")

    my_store.set(ns_2, "bbb")
    assert my_store.get(ns_2) == "bbb"

    # Verify that internals are working as expected, including the default filepath
    assert set([
        s3_object_info['Key'] for s3_object_info in boto3.client(
            's3').list_objects(Bucket=bucket, Prefix=prefix)['Contents']
    ]) == {
        'test/prefix/20191007T151224.1234Z_prod_100/a/b/c/quarantine.json',
        'test/prefix/20191007T151224.1234Z_prod_200/a/b/c/quarantine.json'
    }

    print(my_store.list_keys())
    assert set(my_store.list_keys()) == {
        ns_1,
        ns_2,
    }
Exemplo n.º 14
0
def test_ValidationsStore_with_FixedLengthTupleFileSystemStoreBackend(
        tmp_path_factory):
    path = str(
        tmp_path_factory.mktemp(
            'test_ValidationResultStore_with_FixedLengthTupleFileSystemStoreBackend__dir'
        ))
    project_path = str(tmp_path_factory.mktemp('my_dir'))

    my_store = ValidationsStore(
        store_backend={
            "module_name": "great_expectations.data_context.store",
            "class_name": "FixedLengthTupleFilesystemStoreBackend",
            "base_directory": "my_store/",
            "filepath_template": "{4}/{0}/{1}/{2}/{3}.txt",
        },
        root_directory=path,
    )

    with pytest.raises(TypeError):
        my_store.get("not_a_ValidationResultIdentifier")

    with pytest.raises(MissingTopLevelConfigKeyError):
        my_store.get(ValidationResultIdentifier(**{}))

    ns_1 = ValidationResultIdentifier(
        from_string="ValidationResultIdentifier.a.b.c.quarantine.prod-100")
    my_store.set(ns_1, {"A": "aaa"})
    assert my_store.get(ns_1) == {"A": "aaa"}

    ns_2 = ValidationResultIdentifier(
        from_string="ValidationResultIdentifier.a.b.c.quarantine.prod-20")
    my_store.set(ns_2, {"B": "bbb"})
    assert my_store.get(ns_2) == {"B": "bbb"}

    print(my_store.list_keys())
    assert set(my_store.list_keys()) == {
        ns_1,
        ns_2,
    }

    print(gen_directory_tree_str(path))
    assert gen_directory_tree_str(path) == """\