def get_key(self) -> DataContextKey:
     """
     Generates the appropriate Store key to retrieve/store configs.
     """
     key: ConfigurationIdentifier = ConfigurationIdentifier(
         configuration_key=DataContextVariableSchema.ALL_VARIABLES)
     return key
    def delete_profiler(
        profiler_store: ProfilerStore,
        name: Optional[str] = None,
        ge_cloud_id: Optional[str] = None,
    ) -> None:
        assert bool(name) ^ bool(
            ge_cloud_id
        ), "Must provide either name or ge_cloud_id (but not both)"

        key: Union[GeCloudIdentifier, ConfigurationIdentifier]
        if ge_cloud_id:
            key = GeCloudIdentifier(resource_type="contract",
                                    ge_cloud_id=ge_cloud_id)
        else:
            key = ConfigurationIdentifier(configuration_key=name)

        try:
            profiler_store.remove_key(key=key)
        except (ge_exceptions.InvalidKeyError, KeyError) as exc_ik:
            id_ = (key.configuration_key if isinstance(
                key, ConfigurationIdentifier) else key)
            raise ge_exceptions.ProfilerNotFoundError(
                message=
                f'Non-existent Profiler configuration named "{id_}".\n\nDetails: {exc_ik}'
            )
    def add_profiler(
        config: RuleBasedProfilerConfig,
        data_context: "DataContext",  # noqa: F821
        profiler_store: ProfilerStore,
        ge_cloud_id: Optional[str] = None,
    ) -> "RuleBasedProfiler":
        if not RuleBasedProfiler._check_validity_of_batch_requests_in_config(
                config=config):
            raise ge_exceptions.InvalidConfigError(
                f'batch_data found in batch_request cannot be saved to ProfilerStore "{profiler_store.store_name}"'
            )

        # Chetan - 20220204 - DataContext to be removed once it can be decoupled from RBP
        new_profiler: "RuleBasedProfiler" = instantiate_class_from_config(
            config=config.to_json_dict(),
            runtime_environment={
                "data_context": data_context,
            },
            config_defaults={
                "module_name": "great_expectations.rule_based_profiler",
                "class_name": "RuleBasedProfiler",
            },
        )

        key: Union[GeCloudIdentifier, ConfigurationIdentifier]
        if ge_cloud_id:
            key = GeCloudIdentifier(resource_type="contract",
                                    ge_cloud_id=ge_cloud_id)
        else:
            key = ConfigurationIdentifier(configuration_key=config.name, )

        profiler_store.set(key=key, value=config)

        return new_profiler
def test_list_profilers(mock_profiler_store: mock.MagicMock):
    store = mock_profiler_store()
    keys = ["a", "b", "c"]
    store.list_keys.return_value = [
        ConfigurationIdentifier(char) for char in keys
    ]
    res = RuleBasedProfiler.list_profilers(store, ge_cloud_mode=False)

    assert res == keys
    assert store.list_keys.called
Exemplo n.º 5
0
def delete_checkpoint_config_from_store_backend(
    store_name: str,
    store_backend: Union[StoreBackend, dict],
    checkpoint_name: str,
):
    config_store: CheckpointStore = build_checkpoint_store_using_store_backend(
        store_name=store_name,
        store_backend=store_backend,
    )
    key: ConfigurationIdentifier = ConfigurationIdentifier(
        configuration_key=checkpoint_name, )
    config_store.remove_key(key=key)
def test_delete_profiler(populated_profiler_store: ProfilerStore, ):
    with mock.patch(
            "great_expectations.data_context.store.profiler_store.ProfilerStore.remove_key",
    ) as mock_remove_key:
        RuleBasedProfiler.delete_profiler(
            profiler_store=populated_profiler_store,
            name="my_profiler",
            ge_cloud_id=None,
        )

    assert mock_remove_key.call_count == 1
    assert mock_remove_key.call_args == mock.call(
        key=ConfigurationIdentifier("my_profiler"))
Exemplo n.º 7
0
def save_checkpoint_config_to_store_backend(
    store_name: str,
    store_backend: Union[StoreBackend, dict],
    checkpoint_name: str,
    checkpoint_configuration: CheckpointConfig,
):
    config_store: CheckpointStore = build_checkpoint_store_using_store_backend(
        store_name=store_name,
        store_backend=store_backend,
        overwrite_existing=True,
    )
    key: ConfigurationIdentifier = ConfigurationIdentifier(
        configuration_key=checkpoint_name, )
    config_store.set(key=key, value=checkpoint_configuration)
    def determine_key(
        name: Optional[str], ge_cloud_id: Optional[str]
    ) -> Union[GeCloudIdentifier, ConfigurationIdentifier]:
        assert bool(name) ^ bool(
            ge_cloud_id), "Must provide either name or ge_cloud_id."

        key: Union[GeCloudIdentifier, ConfigurationIdentifier]
        if ge_cloud_id:
            key = GeCloudIdentifier(resource_type=GeCloudRESTResource.CONTRACT,
                                    ge_cloud_id=ge_cloud_id)
        else:
            key = ConfigurationIdentifier(configuration_key=name)

        return key
Exemplo n.º 9
0
def delete_config_from_store_backend(
    class_name: str,
    module_name: str,
    store_name: str,
    store_backend: Union[StoreBackend, dict],
    configuration_key: str,
):
    config_store: ConfigurationStore = build_configuration_store(
        class_name=class_name,
        module_name=module_name,
        store_name=store_name,
        store_backend=store_backend,
        overwrite_existing=True,
    )
    key: ConfigurationIdentifier = ConfigurationIdentifier(
        configuration_key=configuration_key, )
    config_store.remove_key(key=key)
Exemplo n.º 10
0
def load_config_from_store_backend(
    class_name: str,
    module_name: str,
    store_name: str,
    store_backend: Union[StoreBackend, dict],
    configuration_key: str,
) -> BaseYamlConfig:
    config_store: ConfigurationStore = build_configuration_store(
        class_name=class_name,
        module_name=module_name,
        store_name=store_name,
        store_backend=store_backend,
        overwrite_existing=False,
    )
    key: ConfigurationIdentifier = ConfigurationIdentifier(
        configuration_key=configuration_key, )
    return config_store.get(key=key)
Exemplo n.º 11
0
def load_checkpoint_config_from_store_backend(
    store_name: str,
    store_backend: Union[StoreBackend, dict],
    checkpoint_name: str,
) -> CheckpointConfig:
    config_store: CheckpointStore = build_checkpoint_store_using_store_backend(
        store_name=store_name,
        store_backend=store_backend,
    )
    key: ConfigurationIdentifier = ConfigurationIdentifier(
        configuration_key=checkpoint_name, )
    try:
        return config_store.get(key=key)
    except ge_exceptions.InvalidBaseYamlConfigError as exc:
        logger.error(exc.messages)
        raise ge_exceptions.InvalidCheckpointConfigError(
            "Error while processing DataContextConfig.", exc)
Exemplo n.º 12
0
def save_config_to_store_backend(
    class_name: str,
    module_name: str,
    store_name: str,
    store_backend: Union[StoreBackend, dict],
    configuration_key: str,
    configuration: BaseYamlConfig,
):
    config_store: ConfigurationStore = build_configuration_store(
        class_name=class_name,
        module_name=module_name,
        store_name=store_name,
        store_backend=store_backend,
        overwrite_existing=True,
    )
    key: ConfigurationIdentifier = ConfigurationIdentifier(
        configuration_key=configuration_key, )
    config_store.set(key=key, value=configuration)
    def get_profiler(
        data_context: "DataContext",  # noqa: F821
        profiler_store: ProfilerStore,
        name: Optional[str] = None,
        ge_cloud_id: Optional[str] = None,
    ) -> "RuleBasedProfiler":
        assert bool(name) ^ bool(
            ge_cloud_id
        ), "Must provide either name or ge_cloud_id (but not both)"

        key: Union[GeCloudIdentifier, ConfigurationIdentifier]
        if ge_cloud_id:
            key = GeCloudIdentifier(resource_type="contract",
                                    ge_cloud_id=ge_cloud_id)
        else:
            key = ConfigurationIdentifier(configuration_key=name, )
        try:
            profiler_config: RuleBasedProfilerConfig = profiler_store.get(
                key=key)
        except ge_exceptions.InvalidKeyError as exc_ik:
            id_ = (key.configuration_key if isinstance(
                key, ConfigurationIdentifier) else key)
            raise ge_exceptions.ProfilerNotFoundError(
                message=
                f'Non-existent Profiler configuration named "{id_}".\n\nDetails: {exc_ik}'
            )

        config = profiler_config.to_json_dict()
        if name:
            config.update({"name": name})
        config = filter_properties_dict(properties=config, clean_falsy=True)

        profiler = instantiate_class_from_config(
            config=config,
            runtime_environment={
                "data_context": data_context,
            },
            config_defaults={
                "module_name": "great_expectations.rule_based_profiler",
                "class_name": "RuleBasedProfiler",
            },
        )

        return profiler
Exemplo n.º 14
0
def delete_checkpoint(
    checkpoint_store: CheckpointStore,
    name: Optional[str] = None,
    ge_cloud_id: Optional[str] = None,
):
    assert bool(name) ^ bool(ge_cloud_id), "Must provide either name or ge_cloud_id."

    if ge_cloud_id:
        key: GeCloudIdentifier = GeCloudIdentifier(
            resource_type="contract", ge_cloud_id=ge_cloud_id
        )
    else:
        key: ConfigurationIdentifier = ConfigurationIdentifier(configuration_key=name)

    try:
        checkpoint_store.remove_key(key=key)
    except ge_exceptions.InvalidKeyError as exc_ik:
        raise ge_exceptions.CheckpointNotFoundError(
            message=f'Non-existent Checkpoint configuration named "{key.configuration_key}".\n\nDetails: {exc_ik}'
        )
Exemplo n.º 15
0
def add_checkpoint(
    data_context: "DataContext",  # noqa: F821
    checkpoint_store: CheckpointStore,
    checkpoint_store_name: str,
    ge_cloud_mode: bool,
    name: str,
    config_version: Optional[Union[int, float]] = None,
    template_name: Optional[str] = None,
    module_name: Optional[str] = None,
    class_name: Optional[str] = None,
    run_name_template: Optional[str] = None,
    expectation_suite_name: Optional[str] = None,
    batch_request: Optional[dict] = None,
    action_list: Optional[List[dict]] = None,
    evaluation_parameters: Optional[dict] = None,
    runtime_configuration: Optional[dict] = None,
    validations: Optional[List[dict]] = None,
    profilers: Optional[List[dict]] = None,
    # Next two fields are for LegacyCheckpoint configuration
    validation_operator_name: Optional[str] = None,
    batches: Optional[List[dict]] = None,
    # the following four arguments are used by SimpleCheckpoint
    site_names: Optional[Union[str, List[str]]] = None,
    slack_webhook: Optional[str] = None,
    notify_on: Optional[str] = None,
    notify_with: Optional[Union[str, List[str]]] = None,
    ge_cloud_id: Optional[str] = None,
    expectation_suite_ge_cloud_id: Optional[str] = None,
) -> Union[Checkpoint, LegacyCheckpoint]:
    checkpoint_config: Union[CheckpointConfig, dict]

    # These checks protect against typed objects (BatchRequest and/or RuntimeBatchRequest) encountered in arguments.
    batch_request = get_batch_request_as_dict(batch_request=batch_request)
    validations = get_validations_with_batch_request_as_dict(validations=validations)

    # DataFrames shouldn't be saved to CheckpointStore
    if batch_request_contains_batch_data(batch_request=batch_request):
        raise ge_exceptions.InvalidConfigError(
            f'batch_data found in batch_request cannot be saved to CheckpointStore "{checkpoint_store_name}"'
        )

    if batch_request_in_validations_contains_batch_data(validations=validations):
        raise ge_exceptions.InvalidConfigError(
            f'batch_data found in validations cannot be saved to CheckpointStore "{checkpoint_store_name}"'
        )

    checkpoint_config = {
        "name": name,
        "config_version": config_version,
        "template_name": template_name,
        "module_name": module_name,
        "class_name": class_name,
        "run_name_template": run_name_template,
        "expectation_suite_name": expectation_suite_name,
        "batch_request": batch_request,
        "action_list": action_list,
        "evaluation_parameters": evaluation_parameters,
        "runtime_configuration": runtime_configuration,
        "validations": validations,
        "profilers": profilers,
        # Next two fields are for LegacyCheckpoint configuration
        "validation_operator_name": validation_operator_name,
        "batches": batches,
        # the following four keys are used by SimpleCheckpoint
        "site_names": site_names,
        "slack_webhook": slack_webhook,
        "notify_on": notify_on,
        "notify_with": notify_with,
        "ge_cloud_id": ge_cloud_id,
        "expectation_suite_ge_cloud_id": expectation_suite_ge_cloud_id,
    }

    checkpoint_config = deep_filter_properties_iterable(
        properties=checkpoint_config,
        clean_falsy=True,
    )

    new_checkpoint: Union[
        Checkpoint, SimpleCheckpoint, LegacyCheckpoint
    ] = instantiate_class_from_config(
        config=checkpoint_config,
        runtime_environment={
            "data_context": data_context,
        },
        config_defaults={
            "module_name": "great_expectations.checkpoint",
        },
    )

    if ge_cloud_mode:
        key: GeCloudIdentifier = GeCloudIdentifier(
            resource_type="contract", ge_cloud_id=ge_cloud_id
        )
    else:
        key: ConfigurationIdentifier = ConfigurationIdentifier(
            configuration_key=name,
        )

    checkpoint_config = new_checkpoint.get_config()

    checkpoint_ref = checkpoint_store.set(key=key, value=checkpoint_config)
    if isinstance(checkpoint_ref, GeCloudIdAwareRef):
        ge_cloud_id = checkpoint_ref.ge_cloud_id
        new_checkpoint.ge_cloud_id = uuid.UUID(ge_cloud_id)

    return new_checkpoint
Exemplo n.º 16
0
def get_checkpoint(
    data_context: "DataContext",  # noqa: F821
    checkpoint_store: CheckpointStore,
    name: Optional[str] = None,
    ge_cloud_id: Optional[str] = None,
) -> Union[Checkpoint, LegacyCheckpoint]:
    if ge_cloud_id:
        key: GeCloudIdentifier = GeCloudIdentifier(
            resource_type="contract", ge_cloud_id=ge_cloud_id
        )
    else:
        key: ConfigurationIdentifier = ConfigurationIdentifier(
            configuration_key=name,
        )
    try:
        checkpoint_config: CheckpointConfig = checkpoint_store.get(key=key)
    except ge_exceptions.InvalidKeyError as exc_ik:
        raise ge_exceptions.CheckpointNotFoundError(
            message=f'Non-existent Checkpoint configuration named "{key.configuration_key}".\n\nDetails: {exc_ik}'
        )
    except ValidationError as exc_ve:
        raise ge_exceptions.InvalidCheckpointConfigError(
            message="Invalid Checkpoint configuration", validation_error=exc_ve
        )

    if checkpoint_config.config_version is None:
        if not (
            "batches" in checkpoint_config.to_json_dict()
            and (
                len(checkpoint_config.to_json_dict()["batches"]) == 0
                or {"batch_kwargs", "expectation_suite_names",}.issubset(
                    set(
                        list(
                            itertools.chain.from_iterable(
                                [
                                    item.keys()
                                    for item in checkpoint_config.to_json_dict()[
                                        "batches"
                                    ]
                                ]
                            )
                        )
                    )
                )
            )
        ):
            raise ge_exceptions.CheckpointError(
                message="Attempt to instantiate LegacyCheckpoint with insufficient and/or incorrect arguments."
            )

    config: dict = checkpoint_config.to_json_dict()

    if name:
        config.update({"name": name})

    config = filter_properties_dict(properties=config, clean_falsy=True)

    checkpoint: Union[Checkpoint, LegacyCheckpoint] = instantiate_class_from_config(
        config=config,
        runtime_environment={
            "data_context": data_context,
        },
        config_defaults={
            "module_name": "great_expectations.checkpoint",
        },
    )

    return checkpoint
Exemplo n.º 17
0
def test_profiler_store_integration(
    empty_data_context: DataContext,
    profiler_store_name: str,
    profiler_name: str,
    profiler_config_with_placeholder_args: RuleBasedProfilerConfig,
):
    base_directory: str = str(
        Path(empty_data_context.root_directory) / "profilers")

    profiler_store: ProfilerStore = build_profiler_store_using_filesystem(
        store_name=profiler_store_name,
        base_directory=base_directory,
        overwrite_existing=True,
    )

    dir_tree: str

    dir_tree = gen_directory_tree_str(startpath=base_directory)
    assert (dir_tree == """profilers/
    .ge_store_backend_id
""")

    key: ConfigurationIdentifier = ConfigurationIdentifier(
        configuration_key=profiler_name)
    profiler_store.set(key=key, value=profiler_config_with_placeholder_args)

    dir_tree = gen_directory_tree_str(startpath=base_directory)
    assert (dir_tree == """profilers/
    .ge_store_backend_id
    my_first_profiler.yml
""")

    assert len(profiler_store.list_keys()) == 1
    profiler_store.remove_key(key=key)
    assert len(profiler_store.list_keys()) == 0

    data: dict = profiler_store.self_check()
    self_check_report: dict = convert_to_json_serializable(data=data)

    # Drop dynamic value to ensure appropriate assert
    self_check_report["config"]["store_backend"].pop("base_directory")

    assert self_check_report == {
        "config": {
            "class_name": "ProfilerStore",
            "module_name":
            "great_expectations.data_context.store.profiler_store",
            "overwrite_existing": True,
            "store_backend": {
                "class_name": "TupleFilesystemStoreBackend",
                "filepath_suffix": ".yml",
                "fixed_length_key": False,
                "module_name":
                "great_expectations.data_context.store.tuple_store_backend",
                "platform_specific_separator": True,
                "suppress_store_backend_id": False,
            },
            "store_name": "profiler_store",
        },
        "keys": [],
        "len_keys": 0,
    }
def test_checkpoint_store(empty_data_context):
    store_name: str = "checkpoint_store"
    base_directory: str = str(
        Path(empty_data_context.root_directory) / "checkpoints")

    checkpoint_store: CheckpointStore = build_checkpoint_store_using_filesystem(
        store_name=store_name,
        base_directory=base_directory,
        overwrite_existing=True,
    )

    assert len(checkpoint_store.list_keys()) == 0

    with pytest.raises(TypeError):
        checkpoint_store.set(key="my_first_checkpoint",
                             value="this is not a checkpoint")

    assert len(checkpoint_store.list_keys()) == 0

    checkpoint_name_0: str = "my_checkpoint_0"
    run_name_template_0: str = "%Y-%M-my-run-template-$VAR"
    validations_0: Union[List, Dict] = [{
        "batch_request": {
            "datasource_name": "my_pandas_datasource",
            "data_connector_name": "my_runtime_data_connector",
            "data_asset_name": "my_website_logs",
        },
        "action_list": [
            {
                "name": "store_validation_result",
                "action": {
                    "class_name": "StoreValidationResultAction",
                },
            },
            {
                "name": "store_evaluation_params",
                "action": {
                    "class_name": "StoreEvaluationParametersAction",
                },
            },
            {
                "name": "update_data_docs",
                "action": {
                    "class_name": "UpdateDataDocsAction",
                },
            },
        ],
    }]
    expectation_suite_name_0: str = "my.test.expectation_suite.name"
    evaluation_parameters_0: dict = {
        "environment": "$GE_ENVIRONMENT",
        "tolerance": 1.0e-2,
        "aux_param_0": "$MY_PARAM",
        "aux_param_1": "1 + $MY_PARAM",
    }
    runtime_configuration_0: dict = {
        "result_format": {
            "result_format": "BASIC",
            "partial_unexpected_count": 20,
        },
    }
    my_checkpoint_config_0: CheckpointConfig = CheckpointConfig(
        name=checkpoint_name_0,
        run_name_template=run_name_template_0,
        expectation_suite_name=expectation_suite_name_0,
        evaluation_parameters=evaluation_parameters_0,
        runtime_configuration=runtime_configuration_0,
        validations=validations_0,
    )

    key_0: ConfigurationIdentifier = ConfigurationIdentifier(
        configuration_key=checkpoint_name_0, )
    checkpoint_store.set(key=key_0, value=my_checkpoint_config_0)

    assert len(checkpoint_store.list_keys()) == 1

    assert filter_properties_dict(properties=checkpoint_store.get(
        key=key_0).to_json_dict(), ) == filter_properties_dict(
            properties=my_checkpoint_config_0.to_json_dict(), )

    dir_tree: str = gen_directory_tree_str(startpath=base_directory)
    assert (dir_tree == """checkpoints/
    .ge_store_backend_id
    my_checkpoint_0.yml
""")

    checkpoint_name_1: str = "my_checkpoint_1"
    run_name_template_1: str = "%Y-%M-my-run-template-$VAR"
    validations_1: Union[List, Dict] = [{
        "action_list": [
            {
                "name": "store_validation_result",
                "action": {
                    "class_name": "StoreValidationResultAction",
                },
            },
            {
                "name": "store_evaluation_params",
                "action": {
                    "class_name": "StoreEvaluationParametersAction",
                },
            },
            {
                "name": "update_data_docs",
                "action": {
                    "class_name": "UpdateDataDocsAction",
                },
            },
        ]
    }]
    expectation_suite_name_1: str = "my.test.expectation_suite.name"
    batch_request_1: dict = {
        "datasource_name": "my_pandas_datasource",
        "data_connector_name": "my_runtime_data_connector",
        "data_asset_name": "my_website_logs",
    }
    evaluation_parameters_1: dict = {
        "environment": "$GE_ENVIRONMENT",
        "tolerance": 1.0e-2,
        "aux_param_0": "$MY_PARAM",
        "aux_param_1": "1 + $MY_PARAM",
    }
    runtime_configuration_1: dict = {
        "result_format": {
            "result_format": "BASIC",
            "partial_unexpected_count": 20,
        },
    }
    my_checkpoint_config_1: CheckpointConfig = CheckpointConfig(
        name=checkpoint_name_1,
        run_name_template=run_name_template_1,
        expectation_suite_name=expectation_suite_name_1,
        batch_request=batch_request_1,
        evaluation_parameters=evaluation_parameters_1,
        runtime_configuration=runtime_configuration_1,
        validations=validations_1,
    )

    key_1: ConfigurationIdentifier = ConfigurationIdentifier(
        configuration_key=checkpoint_name_1, )
    checkpoint_store.set(key=key_1, value=my_checkpoint_config_1)

    assert len(checkpoint_store.list_keys()) == 2

    assert filter_properties_dict(properties=checkpoint_store.get(
        key=key_1).to_json_dict(), ) == filter_properties_dict(
            properties=my_checkpoint_config_1.to_json_dict(), )

    dir_tree: str = gen_directory_tree_str(startpath=base_directory)
    assert (dir_tree == """checkpoints/
    .ge_store_backend_id
    my_checkpoint_0.yml
    my_checkpoint_1.yml
""")

    self_check_report: dict = convert_to_json_serializable(
        data=checkpoint_store.self_check())
    assert self_check_report == {
        "keys": ["my_checkpoint_0", "my_checkpoint_1"],
        "len_keys": 2,
        "config": {
            "store_name": "checkpoint_store",
            "class_name": "CheckpointStore",
            "module_name":
            "great_expectations.data_context.store.checkpoint_store",
            "overwrite_existing": True,
            "store_backend": {
                "base_directory":
                f"{empty_data_context.root_directory}/checkpoints",
                "platform_specific_separator": True,
                "fixed_length_key": False,
                "suppress_store_backend_id": False,
                "module_name":
                "great_expectations.data_context.store.tuple_store_backend",
                "class_name": "TupleFilesystemStoreBackend",
                "filepath_template": "{0}.yml",
            },
        },
    }

    checkpoint_store.remove_key(key=key_0)
    checkpoint_store.remove_key(key=key_1)
    assert len(checkpoint_store.list_keys()) == 0