def save_expectation_suite(
        self,
        expectation_suite: ExpectationSuite,
        expectation_suite_name: Optional[str] = None,
        overwrite_existing: bool = True,
        **kwargs: Dict[str, Any],
    ):
        """Save the provided expectation suite into the DataContext.

        Args:
            expectation_suite: the suite to save
            expectation_suite_name: the name of this expectation suite. If no name is provided the name will \
                be read from the suite

            overwrite_existing: bool setting whether to overwrite existing ExpectationSuite

        Returns:
            None
        """
        if expectation_suite_name is None:
            key: ExpectationSuiteIdentifier = ExpectationSuiteIdentifier(
                expectation_suite_name=expectation_suite.expectation_suite_name
            )
        else:
            expectation_suite.expectation_suite_name = expectation_suite_name
            key: ExpectationSuiteIdentifier = ExpectationSuiteIdentifier(
                expectation_suite_name=expectation_suite_name)
        if self.expectations_store.has_key(key) and not overwrite_existing:
            raise ge_exceptions.DataContextError(
                "expectation_suite with name {} already exists. If you would like to overwrite this "
                "expectation_suite, set overwrite_existing=True.".format(
                    expectation_suite_name))
        self._evaluation_parameter_dependencies_compiled = False
        return self.expectations_store.set(key, expectation_suite, **kwargs)
Пример #2
0
def test_ExpectationsStore_with_DatabaseStoreBackend():
    # Use sqlite so we don't require postgres for this test.
    connection_kwargs = {"drivername": "sqlite"}

    # First, demonstrate that we pick up default configuration
    my_store = ExpectationsStore(store_backend={
        "class_name": "DatabaseStoreBackend",
        "credentials": connection_kwargs
    })

    with pytest.raises(TypeError):
        my_store.get("not_a_ExpectationSuiteIdentifier")

    ns_1 = ExpectationSuiteIdentifier.from_tuple(tuple("a.b.c.warning"))
    my_store.set(ns_1,
                 ExpectationSuite(expectation_suite_name="a.b.c.warning"))
    assert my_store.get(ns_1) == ExpectationSuite(
        expectation_suite_name="a.b.c.warning")

    ns_2 = ExpectationSuiteIdentifier.from_tuple(tuple("a.b.c.failure"))

    my_store.set(ns_2,
                 ExpectationSuite(expectation_suite_name="a.b.c.failure"))
    assert my_store.get(ns_2) == ExpectationSuite(
        expectation_suite_name="a.b.c.failure")

    assert set(my_store.list_keys()) == {
        ns_1,
        ns_2,
    }
Пример #3
0
def test_HtmlSiteStore_S3_backend():
    bucket = "test_validation_store_bucket"
    prefix = "test/prefix"

    # create a bucket in Moto's mock AWS environment
    conn = boto3.resource("s3", region_name="us-east-1")
    conn.create_bucket(Bucket=bucket)

    my_store = HtmlSiteStore(store_backend={
        "class_name": "TupleS3StoreBackend",
        "bucket": bucket,
        "prefix": prefix,
    })

    with pytest.raises(TypeError):
        my_store.get("not_a_ValidationResultIdentifier")

    ns_1 = SiteSectionIdentifier(
        site_section_name="validations",
        resource_identifier=ValidationResultIdentifier(
            expectation_suite_identifier=ExpectationSuiteIdentifier(
                expectation_suite_name="asset.quarantine", ),
            run_id="20191007T151224.1234Z_prod_100",
            batch_identifier="1234",
        ),
    )
    my_store.set(ns_1, "aaa")

    ns_2 = SiteSectionIdentifier(
        site_section_name="expectations",
        resource_identifier=ExpectationSuiteIdentifier(
            expectation_suite_name="asset.quarantine", ),
    )
    my_store.set(ns_2, "bbb")

    assert set(my_store.list_keys()) == {
        ns_1.resource_identifier,
        ns_2.resource_identifier,
    }

    # This is a special un-store-like method exposed by the HtmlSiteStore
    my_store.write_index_page("index_html_string_content")

    # Verify that internals are working as expected, including the default filepath
    # paths below should include the batch_parameters
    assert {
        s3_object_info["Key"]
        for s3_object_info in boto3.client("s3").list_objects_v2(
            Bucket=bucket, Prefix=prefix)["Contents"]
    } == {
        "test/prefix/index.html",
        "test/prefix/expectations/asset/quarantine.html",
        "test/prefix/validations/asset/quarantine/20191007T151224.1234Z_prod_100/20190926T134241.000000Z/1234.html",
    }

    index_content = (boto3.client("s3").get_object(
        Bucket=bucket,
        Key="test/prefix/index.html")["Body"].read().decode("utf-8"))
    assert index_content == "index_html_string_content"
def test_StoreAction():
    fake_in_memory_store = ValidationsStore(
        store_backend={
            "class_name": "InMemoryStoreBackend",
        }
    )
    stores = {"fake_in_memory_store": fake_in_memory_store}

    class Object:
        ge_cloud_mode = False

    data_context = Object()
    data_context.stores = stores

    action = StoreValidationResultAction(
        data_context=data_context,
        target_store_name="fake_in_memory_store",
    )
    assert fake_in_memory_store.list_keys() == []

    action.run(
        validation_result_suite_identifier=ValidationResultIdentifier(
            expectation_suite_identifier=ExpectationSuiteIdentifier(
                expectation_suite_name="default_expectations"
            ),
            run_id=RunIdentifier(run_name="prod_20190801"),
            batch_identifier="1234",
        ),
        validation_result_suite=ExpectationSuiteValidationResult(
            success=False, results=[]
        ),
        data_asset=None,
    )

    expected_run_id = RunIdentifier(
        run_name="prod_20190801", run_time="20190926T134241.000000Z"
    )

    assert len(fake_in_memory_store.list_keys()) == 1
    stored_identifier = fake_in_memory_store.list_keys()[0]
    assert stored_identifier.batch_identifier == "1234"
    assert (
        stored_identifier.expectation_suite_identifier.expectation_suite_name
        == "default_expectations"
    )
    assert stored_identifier.run_id == expected_run_id

    assert fake_in_memory_store.get(
        ValidationResultIdentifier(
            expectation_suite_identifier=ExpectationSuiteIdentifier(
                expectation_suite_name="default_expectations"
            ),
            run_id=expected_run_id,
            batch_identifier="1234",
        )
    ) == ExpectationSuiteValidationResult(success=False, results=[])
Пример #5
0
def test_ValidationsStore_with_TupleS3StoreBackend():
    bucket = "test_validation_store_bucket"
    prefix = "test/prefix"

    # create a bucket in Moto's mock AWS environment
    conn = boto3.resource("s3", region_name="us-east-1")
    conn.create_bucket(Bucket=bucket)

    # First, demonstrate that we pick up default configuration including from an S3TupleS3StoreBackend
    my_store = ValidationsStore(store_backend={
        "class_name": "TupleS3StoreBackend",
        "bucket": bucket,
        "prefix": prefix,
    })

    with pytest.raises(TypeError):
        my_store.get("not_a_ValidationResultIdentifier")

    ns_1 = ValidationResultIdentifier(
        expectation_suite_identifier=ExpectationSuiteIdentifier(
            expectation_suite_name="asset.quarantine", ),
        run_id="20191007T151224.1234Z_prod_100",
        batch_identifier="batch_id",
    )
    my_store.set(ns_1, ExpectationSuiteValidationResult(success=True))
    assert my_store.get(ns_1) == ExpectationSuiteValidationResult(
        success=True, statistics={}, results=[])

    ns_2 = ValidationResultIdentifier(
        expectation_suite_identifier=ExpectationSuiteIdentifier(
            expectation_suite_name="asset.quarantine", ),
        run_id="20191007T151224.1234Z_prod_200",
        batch_identifier="batch_id",
    )

    my_store.set(ns_2, ExpectationSuiteValidationResult(success=False))
    assert my_store.get(ns_2) == ExpectationSuiteValidationResult(
        success=False, statistics={}, results=[])

    # Verify that internals are working as expected, including the default filepath
    assert {
        s3_object_info["Key"]
        for s3_object_info in boto3.client("s3").list_objects_v2(
            Bucket=bucket, Prefix=prefix)["Contents"]
    } == {
        "test/prefix/asset/quarantine/20191007T151224.1234Z_prod_100/20190926T134241.000000Z/batch_id.json",
        "test/prefix/asset/quarantine/20191007T151224.1234Z_prod_200/20190926T134241.000000Z/batch_id.json",
    }

    print(my_store.list_keys())
    assert set(my_store.list_keys()) == {
        ns_1,
        ns_2,
    }
def test_ValidationsStore_with_DatabaseStoreBackend(sa):
    # Use sqlite so we don't require postgres for this test.
    connection_kwargs = {"drivername": "sqlite"}

    # First, demonstrate that we pick up default configuration
    my_store = ValidationsStore(
        store_backend={
            "class_name": "DatabaseStoreBackend",
            "credentials": connection_kwargs,
        }
    )

    with pytest.raises(TypeError):
        my_store.get("not_a_ValidationResultIdentifier")

    ns_1 = ValidationResultIdentifier(
        expectation_suite_identifier=ExpectationSuiteIdentifier(
            expectation_suite_name="asset.quarantine",
        ),
        run_id="20191007T151224.1234Z_prod_100",
        batch_identifier="batch_id",
    )
    my_store.set(ns_1, ExpectationSuiteValidationResult(success=True))
    assert my_store.get(ns_1) == ExpectationSuiteValidationResult(
        success=True, statistics={}, results=[]
    )

    ns_2 = ValidationResultIdentifier(
        expectation_suite_identifier=ExpectationSuiteIdentifier(
            expectation_suite_name="asset.quarantine",
        ),
        run_id="20191007T151224.1234Z_prod_200",
        batch_identifier="batch_id",
    )

    my_store.set(ns_2, ExpectationSuiteValidationResult(success=False))
    assert my_store.get(ns_2) == ExpectationSuiteValidationResult(
        success=False, statistics={}, results=[]
    )

    assert set(my_store.list_keys()) == {
        ns_1,
        ns_2,
    }

    """
    What does this test and why?
    A Store should be able to report it's store_backend_id
    which is set when the StoreBackend is instantiated.
    """
    # Check that store_backend_id exists can be read
    assert my_store.store_backend_id is not None
    # Check that store_backend_id is a valid UUID
    assert test_utils.validate_uuid4(my_store.store_backend_id)
def test_ExpectationsStore_with_DatabaseStoreBackend(sa):
    # Use sqlite so we don't require postgres for this test.
    connection_kwargs = {"drivername": "sqlite"}

    # First, demonstrate that we pick up default configuration
    my_store = ExpectationsStore(store_backend={
        "class_name": "DatabaseStoreBackend",
        "credentials": connection_kwargs,
    })
    with pytest.raises(TypeError):
        my_store.get("not_a_ExpectationSuiteIdentifier")

    # first suite to add to db
    default_suite = ExpectationSuite(
        expectation_suite_name="a.b.c.warning",
        meta={"test_meta_key": "test_meta_value"},
        expectations=[],
    )

    ns_1 = ExpectationSuiteIdentifier.from_tuple(tuple("a.b.c.warning"))
    # initial set and check if first suite exists
    my_store.set(ns_1, default_suite)
    assert my_store.get(ns_1) == ExpectationSuite(
        expectation_suite_name="a.b.c.warning",
        meta={"test_meta_key": "test_meta_value"},
        expectations=[],
    )

    # update suite and check if new value exists
    updated_suite = ExpectationSuite(
        expectation_suite_name="a.b.c.warning",
        meta={"test_meta_key": "test_new_meta_value"},
        expectations=[],
    )
    my_store.set(ns_1, updated_suite)
    assert my_store.get(ns_1) == ExpectationSuite(
        expectation_suite_name="a.b.c.warning",
        meta={"test_meta_key": "test_new_meta_value"},
        expectations=[],
    )

    ns_2 = ExpectationSuiteIdentifier.from_tuple(tuple("a.b.c.failure"))
    my_store.set(ns_2,
                 ExpectationSuite(expectation_suite_name="a.b.c.failure"))
    assert my_store.get(ns_2) == ExpectationSuite(
        expectation_suite_name="a.b.c.failure")

    assert set(my_store.list_keys()) == {
        ns_1,
        ns_2,
    }
Пример #8
0
 def __init__(
     self,
     run_id,
     data_asset_name,
     expectation_suite_identifier,
     metric_name,
     metric_kwargs,
     metric_value,
 ):
     super().__init__(metric_name, metric_kwargs, metric_value)
     if not isinstance(expectation_suite_identifier,
                       ExpectationSuiteIdentifier):
         expectation_suite_identifier = ExpectationSuiteIdentifier(
             expectation_suite_name=expectation_suite_identifier)
     if isinstance(run_id, str):
         warnings.warn(
             "String run_ids will be deprecated in the future. Please provide a run_id of type "
             "RunIdentifier(run_name=None, run_time=None), or a dictionary containing run_name "
             "and run_time (both optional).",
             DeprecationWarning,
         )
         try:
             run_time = parse(run_id)
         except (ValueError, TypeError):
             run_time = None
         run_id = RunIdentifier(run_name=run_id, run_time=run_time)
     elif isinstance(run_id, dict):
         run_id = RunIdentifier(**run_id)
     elif run_id is None:
         run_id = RunIdentifier()
     elif not isinstance(run_id, RunIdentifier):
         run_id = RunIdentifier(run_name=str(run_id))
     self._run_id = run_id
     self._data_asset_name = data_asset_name
     self._expectation_suite_identifier = expectation_suite_identifier
Пример #9
0
def validation_result_suite_id():
    return ValidationResultIdentifier(
        expectation_suite_identifier=ExpectationSuiteIdentifier(
            "asset.default"),
        run_id=RunIdentifier(run_name="test_100"),
        batch_identifier="1234",
    )
def test_StoreMetricsAction_column_metric(basic_in_memory_data_context_for_validation_operator):
    action = StoreMetricsAction(
        data_context=basic_in_memory_data_context_for_validation_operator,
        requested_metrics={
            "*": [
                {
                    "column": {
                        "provider_id": ["expect_column_values_to_be_unique.result.unexpected_count"]
                    }
                },
                "statistics.evaluated_expectations",
                "statistics.successful_expectations"
            ]
        },
        target_store_name="metrics_store"
    )

    validation_result = ExpectationSuiteValidationResult(
        success=False,
        meta={
            "expectation_suite_name": "foo",
            "run_id": "bar"
        },
        results=[
            ExpectationValidationResult(
                meta={},
                result={
                    "element_count": 10,
                    "missing_count": 0,
                    "missing_percent": 0.0,
                    "unexpected_count": 7,
                    "unexpected_percent": 0.0,
                    "unexpected_percent_nonmissing": 0.0,
                    "partial_unexpected_list": []
                },
                success=True,
                expectation_config=ExpectationConfiguration(
                    expectation_type="expect_column_values_to_be_unique",
                    kwargs={
                        "column": "provider_id",
                        "result_format": "BASIC"
                    }
                ),
                exception_info=None
            )
        ],
        statistics={
            "evaluated_expectations": 5,
            "successful_expectations": 3
        }
    )

    action.run(validation_result, ValidationResultIdentifier.from_object(validation_result), data_asset=None)

    assert basic_in_memory_data_context_for_validation_operator.stores["metrics_store"].get(ValidationMetricIdentifier(
        run_id="bar",
        expectation_suite_identifier=ExpectationSuiteIdentifier("foo"),
        metric_name="expect_column_values_to_be_unique.result.unexpected_count",
        metric_kwargs_id="column=provider_id"
    )) == 7
Пример #11
0
def prompt_profile_to_create_a_suite(
    data_context: DataContext,
    expectation_suite_name: str,
):

    cli_message(
        string="""
Great Expectations will create a notebook, containing code cells that select from available columns in your dataset and
generate expectations about them to demonstrate some examples of assertions you can make about your data.

When you run this notebook, Great Expectations will store these expectations in a new Expectation Suite "{:s}" here:

  {:s}
""".format(
            expectation_suite_name,
            data_context.stores[
                data_context.expectations_store_name
            ].store_backend.get_url_for_key(
                ExpectationSuiteIdentifier(
                    expectation_suite_name=expectation_suite_name
                ).to_tuple()
            ),
        )
    )

    confirm_proceed_or_exit()
Пример #12
0
    def _add_expectations_to_index_links(self, index_links_dict: OrderedDict,
                                         skip_and_clean_missing: bool) -> None:
        expectations = self.site_section_builders_config.get(
            "expectations", "None")
        if expectations and expectations not in FALSEY_YAML_STRINGS:
            expectation_suite_source_keys = self.data_context.stores[
                self.site_section_builders_config["expectations"].get(
                    "source_store_name")].list_keys()
            expectation_suite_site_keys = [
                ExpectationSuiteIdentifier.from_tuple(expectation_suite_tuple)
                for expectation_suite_tuple in self.target_store.
                store_backends[ExpectationSuiteIdentifier].list_keys()
            ]
            if skip_and_clean_missing:
                cleaned_keys = []
                for expectation_suite_site_key in expectation_suite_site_keys:
                    if expectation_suite_site_key not in expectation_suite_source_keys:
                        self.target_store.store_backends[
                            ExpectationSuiteIdentifier].remove_key(
                                expectation_suite_site_key)
                    else:
                        cleaned_keys.append(expectation_suite_site_key)
                expectation_suite_site_keys = cleaned_keys

            for expectation_suite_key in expectation_suite_site_keys:
                self.add_resource_info_to_index_links_dict(
                    index_links_dict=index_links_dict,
                    expectation_suite_name=expectation_suite_key.
                    expectation_suite_name,
                    section_name="expectations",
                )
Пример #13
0
def test_ValidationsStore_with_TupleFileSystemStoreBackend(tmp_path_factory):
    path = str(
        tmp_path_factory.mktemp(
            "test_ValidationResultStore_with_TupleFileSystemStoreBackend__dir")
    )
    project_path = str(tmp_path_factory.mktemp("my_dir"))

    my_store = ValidationsStore(
        store_backend={
            "module_name": "great_expectations.data_context.store",
            "class_name": "TupleFilesystemStoreBackend",
            "base_directory": "my_store/",
        },
        runtime_environment={"root_directory": path},
    )

    with pytest.raises(TypeError):
        my_store.get("not_a_ValidationResultIdentifier")

    ns_1 = ValidationResultIdentifier(
        expectation_suite_identifier=ExpectationSuiteIdentifier(
            "asset.quarantine"),
        run_id="prod-100",
        batch_identifier="batch_id",
    )
    my_store.set(ns_1, ExpectationSuiteValidationResult(success=True))
    assert my_store.get(ns_1) == ExpectationSuiteValidationResult(
        success=True, statistics={}, results=[])

    ns_2 = ValidationResultIdentifier.from_tuple((
        "asset",
        "quarantine",
        "prod-20",
        datetime.datetime.now(datetime.timezone.utc),
        "batch_id",
    ))
    my_store.set(ns_2, ExpectationSuiteValidationResult(success=False))
    assert my_store.get(ns_2) == ExpectationSuiteValidationResult(
        success=False, statistics={}, results=[])

    print(my_store.list_keys())
    assert set(my_store.list_keys()) == {
        ns_1,
        ns_2,
    }

    print(gen_directory_tree_str(path))
    assert (gen_directory_tree_str(path) == """\
test_ValidationResultStore_with_TupleFileSystemStoreBackend__dir0/
    my_store/
        asset/
            quarantine/
                prod-100/
                    20190926T134241.000000Z/
                        batch_id.json
                prod-20/
                    20190926T134241.000000Z/
                        batch_id.json
""")
Пример #14
0
def validation_result_suite_extended_id():
    return ValidationResultIdentifier(
        expectation_suite_identifier=ExpectationSuiteIdentifier(
            "asset.default"),
        run_id=RunIdentifier(run_name="test_100",
                             run_time="Tue May 08 15:14:45 +0800 2012"),
        batch_identifier=BatchIdentifier(batch_identifier="1234",
                                         data_asset_name="asset"),
    )
Пример #15
0
def test_ValidationsStore_with_DatabaseStoreBackend(sa):
    # Use sqlite so we don't require postgres for this test.
    connection_kwargs = {"drivername": "sqlite"}

    # First, demonstrate that we pick up default configuration
    my_store = ValidationsStore(
        store_backend={
            "class_name": "DatabaseStoreBackend",
            "credentials": connection_kwargs,
        }
    )

    with pytest.raises(TypeError):
        my_store.get("not_a_ValidationResultIdentifier")

    ns_1 = ValidationResultIdentifier(
        expectation_suite_identifier=ExpectationSuiteIdentifier(
            expectation_suite_name="asset.quarantine",
        ),
        run_id="20191007T151224.1234Z_prod_100",
        batch_identifier="batch_id",
    )
    my_store.set(ns_1, ExpectationSuiteValidationResult(success=True))
    assert my_store.get(ns_1) == ExpectationSuiteValidationResult(
        success=True, statistics={}, results=[]
    )

    ns_2 = ValidationResultIdentifier(
        expectation_suite_identifier=ExpectationSuiteIdentifier(
            expectation_suite_name="asset.quarantine",
        ),
        run_id="20191007T151224.1234Z_prod_200",
        batch_identifier="batch_id",
    )

    my_store.set(ns_2, ExpectationSuiteValidationResult(success=False))
    assert my_store.get(ns_2) == ExpectationSuiteValidationResult(
        success=False, statistics={}, results=[]
    )

    assert set(my_store.list_keys()) == {
        ns_1,
        ns_2,
    }
Пример #16
0
    def self_check(self, pretty_print):
        return_obj = {}

        if pretty_print:
            print("Checking for existing keys...")

        return_obj["keys"] = self.list_keys()
        return_obj["len_keys"] = len(return_obj["keys"])
        len_keys = return_obj["len_keys"]

        if pretty_print:
            if return_obj["len_keys"] == 0:
                print(f"\t{len_keys} keys found")
            else:
                print(f"\t{len_keys} keys found:")
                for key in return_obj["keys"][:10]:
                    print(f"		{str(key)}")
            if len_keys > 10:
                print("\t\t...")
            print()

        test_key_name = "test-key-" + "".join(
            [random.choice(list("0123456789ABCDEF")) for i in range(20)])

        if self.ge_cloud_mode:
            test_key: GeCloudIdentifier = self.key_class(
                resource_type=GeCloudRESTResource.CONTRACT,
                ge_cloud_id=str(uuid.uuid4()),
            )

        else:
            test_key: ValidationResultIdentifier = self.key_class(
                expectation_suite_identifier=ExpectationSuiteIdentifier(
                    expectation_suite_name="temporary_test_suite", ),
                run_id="temporary_test_run_id",
                batch_identifier=test_key_name,
            )
        test_value = ExpectationSuiteValidationResult(success=True)

        if pretty_print:
            print(f"Attempting to add a new test key: {test_key}...")
        self.set(key=test_key, value=test_value)
        if pretty_print:
            print("\tTest key successfully added.")
            print()

        if pretty_print:
            print(
                f"Attempting to retrieve the test value associated with key: {test_key}..."
            )
        test_value = self.get(key=test_key, )
        if pretty_print:
            print("\tTest value successfully retrieved.")
            print()

        return return_obj
Пример #17
0
 def __init__(self, run_id, expectation_suite_identifier, metric_name,
              metric_kwargs, metric_value):
     super(ValidationMetric, self).__init__(metric_name, metric_kwargs,
                                            metric_value)
     if not isinstance(expectation_suite_identifier,
                       ExpectationSuiteIdentifier):
         expectation_suite_identifier = ExpectationSuiteIdentifier(
             expectation_suite_name=expectation_suite_identifier)
     self._run_id = run_id
     self._expectation_suite_identifier = expectation_suite_identifier
Пример #18
0
 def _convert_tuple_to_resource_identifier(self, tuple_):
     if tuple_[0] == "expectations":
         resource_identifier = ExpectationSuiteIdentifier(*tuple_[1])
     elif tuple_[0] == "validations":
         resource_identifier = ValidationResultIdentifier(*tuple_[1])
     else:
         raise Exception("unknown section name: " + tuple_[0])
     new_identifier = SiteSectionIdentifier(
         site_section_name=tuple_[0],
         resource_identifier=resource_identifier)
     return new_identifier
Пример #19
0
 def from_tuple(cls, tuple_):
     if len(tuple_) < 4:
         raise GreatExpectationsError(
             "ValidationMetricIdentifier tuple must have at least four components."
         )
     return cls(
         run_id=tuple_[0],
         expectation_suite_identifier=ExpectationSuiteIdentifier.from_tuple(
             tuple_[1:-2]),
         metric_name=tuple_[-2],
         metric_kwargs_id=tuple_[-1])
def ge_validation_result_suite_id() -> ValidationResultIdentifier:
    validation_result_suite_id = ValidationResultIdentifier(
        expectation_suite_identifier=ExpectationSuiteIdentifier(
            "asset.default"),
        run_id=RunIdentifier(
            run_name="test_100",
            run_time=datetime.fromtimestamp(1640701702, tz=timezone.utc),
        ),
        batch_identifier="010ef8c1cd417910b971f4468f024ec5",
    )

    return validation_result_suite_id
Пример #21
0
def _profile_to_create_a_suite(
    additional_batch_kwargs,
    batch_kwargs,
    batch_kwargs_generator_name,
    context,
    datasource_name,
    expectation_suite_name,
    data_asset_name,
    profiler_configuration,
):

    cli_message(
        """
Great Expectations will choose a couple of columns and generate expectations about them
to demonstrate some examples of assertions you can make about your data.

Great Expectations will store these expectations in a new Expectation Suite '{:s}' here:

  {:s}
""".format(
            expectation_suite_name,
            context.stores[
                context.expectations_store_name
            ].store_backend.get_url_for_key(
                ExpectationSuiteIdentifier(
                    expectation_suite_name=expectation_suite_name
                ).to_tuple()
            ),
        )
    )

    confirm_proceed_or_exit()

    # TODO this may not apply
    cli_message("\nGenerating example Expectation Suite...")
    run_id = datetime.datetime.now(datetime.timezone.utc).strftime("%Y%m%dT%H%M%S.%fZ")
    profiling_results = context.profile_data_asset(
        datasource_name,
        batch_kwargs_generator_name=batch_kwargs_generator_name,
        data_asset_name=data_asset_name,
        batch_kwargs=batch_kwargs,
        profiler=BasicSuiteBuilderProfiler,
        profiler_configuration=profiler_configuration,
        expectation_suite_name=expectation_suite_name,
        run_id=run_id,
        additional_batch_kwargs=additional_batch_kwargs,
    )
    if not profiling_results["success"]:
        _raise_profiling_errors(profiling_results)

    cli_message("\nDone generating example Expectation Suite")
    return profiling_results
Пример #22
0
def test_expectations_store():
    my_store = ExpectationsStore()

    with pytest.raises(TypeError):
        my_store.set("not_a_ValidationResultIdentifier")

    ns_1 = ExpectationSuiteIdentifier.from_tuple(tuple("a.b.c.warning"))
    my_store.set(ns_1,
                 ExpectationSuite(expectation_suite_name="a.b.c.warning"))
    assert my_store.get(ns_1) == ExpectationSuite(
        expectation_suite_name="a.b.c.warning")

    ns_2 = ExpectationSuiteIdentifier.from_tuple(tuple("a.b.c.failure"))
    my_store.set(ns_2,
                 ExpectationSuite(expectation_suite_name="a.b.c.failure"))
    assert my_store.get(ns_2) == ExpectationSuite(
        expectation_suite_name="a.b.c.failure")

    assert set(my_store.list_keys()) == {
        ns_1,
        ns_2,
    }
Пример #23
0
def test_resource_key_passes_run_name_filter():
    resource_key = ValidationResultIdentifier(
        expectation_suite_identifier=ExpectationSuiteIdentifier("test_suite"),
        run_id=RunIdentifier(run_name="foofooprofilingfoo"),
        batch_identifier="f14c3d2f6e8028c2db0c25edabdb0d61",
    )

    assert (resource_key_passes_run_name_filter(
        resource_key, run_name_filter={"equals": "profiling"}) is False)
    assert (resource_key_passes_run_name_filter(
        resource_key, run_name_filter={"equals": "foofooprofilingfoo"}) is
            True)

    assert (resource_key_passes_run_name_filter(
        resource_key, run_name_filter={"not_equals": "profiling"}) is True)
    assert (resource_key_passes_run_name_filter(
        resource_key, run_name_filter={"not_equals": "foofooprofilingfoo"}) is
            False)

    assert (resource_key_passes_run_name_filter(
        resource_key, run_name_filter={"includes": "profiling"}) is True)
    assert (resource_key_passes_run_name_filter(
        resource_key, run_name_filter={"includes": "foobar"}) is False)

    assert (resource_key_passes_run_name_filter(
        resource_key, run_name_filter={"not_includes": "foobar"}) is True)
    assert (resource_key_passes_run_name_filter(
        resource_key, run_name_filter={"not_includes": "profiling"}) is False)

    assert (resource_key_passes_run_name_filter(
        resource_key,
        run_name_filter={"matches_regex": "(foo){2}profiling("
                         "foo)+"},
    ) is True)
    assert (resource_key_passes_run_name_filter(
        resource_key,
        run_name_filter={"matches_regex": "(foo){3}profiling("
                         "foo)+"},
    ) is False)
    with pytest.warns(DeprecationWarning):
        assert (resource_key_passes_run_name_filter(
            resource_key, run_name_filter={"eq": "profiling"}) is False)
        assert (resource_key_passes_run_name_filter(
            resource_key, run_name_filter={"eq": "foofooprofilingfoo"}) is
                True)
    with pytest.warns(DeprecationWarning):
        assert (resource_key_passes_run_name_filter(
            resource_key, run_name_filter={"ne": "profiling"}) is True)
        assert (resource_key_passes_run_name_filter(
            resource_key, run_name_filter={"ne": "foofooprofilingfoo"}) is
                False)
Пример #24
0
 def from_fixed_length_tuple(cls, tuple_):
     if len(tuple_) != 4:
         raise GreatExpectationsError(
             "ValidationMetricIdentifier fixed length tuple must have exactly four "
             "components."
         )
     return cls(
         run_id=tuple_[0],
         expectation_suite_identifier=ExpectationSuiteIdentifier.from_fixed_length_tuple(
             tuple((tuple_[1],))
         ),
         metric_name=tuple_[2],
         metric_kwargs_id=tuple_[3],
     )
def test_expectations_store(empty_data_context):
    context: DataContext = empty_data_context
    my_store = ExpectationsStore()

    with pytest.raises(TypeError):
        my_store.set("not_a_ValidationResultIdentifier")

    ns_1 = ExpectationSuiteIdentifier.from_tuple(tuple("a.b.c.warning"))
    my_store.set(
        ns_1,
        ExpectationSuite(expectation_suite_name="a.b.c.warning",
                         data_context=context),
    )

    ns_1_dict: dict = my_store.get(ns_1)
    ns_1_suite: ExpectationSuite = ExpectationSuite(**ns_1_dict,
                                                    data_context=context)
    assert ns_1_suite == ExpectationSuite(
        expectation_suite_name="a.b.c.warning", data_context=context)

    ns_2 = ExpectationSuiteIdentifier.from_tuple(tuple("a.b.c.failure"))
    my_store.set(
        ns_2,
        ExpectationSuite(expectation_suite_name="a.b.c.failure",
                         data_context=context),
    )
    ns_2_dict: dict = my_store.get(ns_2)
    ns_2_suite: ExpectationSuite = ExpectationSuite(**ns_2_dict,
                                                    data_context=context)
    assert ns_2_suite == ExpectationSuite(
        expectation_suite_name="a.b.c.failure", data_context=context)

    assert set(my_store.list_keys()) == {
        ns_1,
        ns_2,
    }
Пример #26
0
def suite_delete(suite, directory):
    """Delete an expectation suite from the expectation store."""
    context = load_data_context_with_error_handling(directory)
    suite_names = context.list_expectation_suite_names()
    if len(suite_names) == 0:
        cli_message("No expectation suites found")
        return

    if len(suite_names) > 0:
        expectation_suite = ExpectationSuite(expectation_suite_name=suite)
        key = ExpectationSuiteIdentifier(expectation_suite_name=suite)
        if key:
            context.delete_expectation_suite(expectation_suite)
        else:
            cli_message("No matching expectation suites found")
            sys.exit(1)
def test_SlackNotificationAction(data_context_parameterized_expectation_suite):
    renderer = {
        "module_name": "great_expectations.render.renderer.slack_renderer",
        "class_name": "SlackRenderer",
    }
    slack_webhook = "https://hooks.slack.com/services/test/slack/webhook"
    notify_on = "all"

    slack_action = SlackNotificationAction(
        data_context=data_context_parameterized_expectation_suite,
        renderer=renderer,
        slack_webhook=slack_webhook,
        notify_on=notify_on,
    )

    validation_result_suite = ExpectationSuiteValidationResult(
        results=[],
        success=True,
        statistics={
            "evaluated_expectations": 0,
            "successful_expectations": 0,
            "unsuccessful_expectations": 0,
            "success_percent": None,
        },
        meta={
            "great_expectations_version": "v0.8.0__develop",
            "expectation_suite_name": "asset.default",
            "run_id": "test_100",
        },
    )

    validation_result_suite_id = ValidationResultIdentifier(
        expectation_suite_identifier=ExpectationSuiteIdentifier(
            "asset.default"),
        run_id="test_100",
        batch_identifier="1234",
    )

    # TODO: improve this test - currently it is verifying a failed call to Slack. It returns a "empty" payload
    assert slack_action.run(
        validation_result_suite_identifier=validation_result_suite_id,
        validation_result_suite=validation_result_suite,
        data_asset=None,
    ) == {
        "slack_notification_result": None
    }
def test_TupleGCSStoreBackend_base_public_path():
    """
    What does this test and why?

    the base_public_path parameter allows users to point to a custom DNS when hosting Data docs.

    This test will exercise the get_url_for_key method twice to see that we are getting the expected url,
    with or without base_public_path
    """
    bucket = "leakybucket"
    prefix = "this_is_a_test_prefix"
    project = "dummy-project"
    base_public_path = "http://www.test.com/"

    with patch("google.cloud.storage.Client",
               autospec=True) as mock_gcs_client:
        mock_client = mock_gcs_client.return_value
        mock_bucket = mock_client.get_bucket.return_value
        mock_blob = mock_bucket.blob.return_value

        my_store_with_base_public_path = TupleGCSStoreBackend(
            filepath_template=None,
            bucket=bucket,
            prefix=prefix,
            project=project,
            base_public_path=base_public_path,
        )

        my_store_with_base_public_path.set(("BBB", ),
                                           b"bbb",
                                           content_encoding=None,
                                           content_type="image/png")

    run_id = RunIdentifier("my_run_id", datetime.datetime.utcnow())
    key = ValidationResultIdentifier(
        ExpectationSuiteIdentifier(expectation_suite_name="my_suite_name"),
        run_id,
        "my_batch_id",
    )
    run_time_string = run_id.to_tuple()[1]

    url = my_store_with_base_public_path.get_public_url_for_key(key.to_tuple())
    assert (
        url == "http://www.test.com/leakybucket" +
        f"/this_is_a_test_prefix/my_suite_name/my_run_id/{run_time_string}/my_batch_id"
    )
Пример #29
0
def create_empty_suite(context: DataContext, expectation_suite_name: str,
                       batch_kwargs) -> None:
    cli_message("""
Great Expectations will create a new Expectation Suite '{:s}' and store it here:

  {:s}
""".format(
        expectation_suite_name,
        context.stores[
            context.expectations_store_name].store_backend.get_url_for_key(
                ExpectationSuiteIdentifier(
                    expectation_suite_name=expectation_suite_name).to_tuple()),
    ))
    suite = context.create_expectation_suite(expectation_suite_name)
    suite.add_citation(comment="New suite added via CLI",
                       batch_kwargs=batch_kwargs)
    context.save_expectation_suite(suite, expectation_suite_name)
def test_ExpectationsStore_with_DatabaseStoreBackend_postgres(caplog):
    connection_kwargs = {
        "drivername": "postgresql",
        "username": "******",
        "password": "",
        "host": "localhost",
        "port": "5432",
        "database": "test_ci",
    }

    # First, demonstrate that we pick up default configuration
    my_store = ExpectationsStore(store_backend={
        "class_name": "DatabaseStoreBackend",
        "credentials": connection_kwargs,
    })
    with pytest.raises(TypeError):
        my_store.get("not_a_ExpectationSuiteIdentifier")

    # first suite to add to db
    default_suite = ExpectationSuite(
        expectation_suite_name="a.b.c",
        meta={"test_meta_key": "test_meta_value"},
        expectations=[],
    )

    ns_1 = ExpectationSuiteIdentifier.from_tuple(tuple("a.b.c"))
    # initial set and check if first suite exists
    my_store.set(ns_1, default_suite)
    assert my_store.get(ns_1) == ExpectationSuite(
        expectation_suite_name="a.b.c",
        meta={"test_meta_key": "test_meta_value"},
        expectations=[],
    )

    # update suite and check if new value exists
    updated_suite = ExpectationSuite(
        expectation_suite_name="a.b.c",
        meta={"test_meta_key": "test_new_meta_value"},
        expectations=[],
    )
    my_store.set(ns_1, updated_suite)
    assert my_store.get(ns_1) == ExpectationSuite(
        expectation_suite_name="a.b.c",
        meta={"test_meta_key": "test_new_meta_value"},
        expectations=[],
    )