示例#1
0
class ExpectationSuiteValidationResultSchema(Schema):
    success = fields.Bool()
    results = fields.List(fields.Nested(ExpectationValidationResultSchema))
    evaluation_parameters = fields.Dict()
    statistics = fields.Dict()
    meta = fields.Dict(allow_none=True)
    ge_cloud_id = fields.UUID(required=False, allow_none=True)

    # noinspection PyUnusedLocal
    @pre_dump
    def prepare_dump(self, data, **kwargs):
        data = deepcopy(data)
        if isinstance(data, ExpectationSuiteValidationResult):
            data.meta = convert_to_json_serializable(data=data.meta)
            data.statistics = convert_to_json_serializable(
                data=data.statistics)
        elif isinstance(data, dict):
            data["meta"] = convert_to_json_serializable(data=data.get("meta"))
            data["statistics"] = convert_to_json_serializable(
                data=data.get("statistics"))
        return data

    # noinspection PyUnusedLocal
    @post_load
    def make_expectation_suite_validation_result(self, data, **kwargs):
        return ExpectationSuiteValidationResult(**data)
示例#2
0
class ExpectationValidationResultSchema(Schema):
    success = fields.Bool()
    expectation_config = fields.Nested(ExpectationConfigurationSchema)
    result = fields.Dict()
    meta = fields.Dict()
    exception_info = fields.Dict()

    # noinspection PyUnusedLocal
    @pre_dump
    def convert_result_to_serializable(self, data, **kwargs):
        data = deepcopy(data)
        if isinstance(data, ExpectationValidationResult):
            data.result = convert_to_json_serializable(data.result)
        elif isinstance(data, dict):
            data["result"] = convert_to_json_serializable(data.get("result"))
        return data

    # # noinspection PyUnusedLocal
    # @pre_dump
    # def clean_empty(self, data, **kwargs):
    #     # if not hasattr(data, 'meta'):
    #     #     pass
    #     # elif len(data.meta) == 0:
    #     #     del data.meta
    #     # return data
    #     pass

    # noinspection PyUnusedLocal
    @post_load
    def make_expectation_validation_result(self, data, **kwargs):
        return ExpectationValidationResult(**data)
示例#3
0
class ExpectationSuiteSchema(Schema):
    expectation_suite_name = fields.Str()
    expectations = fields.List(fields.Nested(ExpectationConfigurationSchema))
    evaluation_parameters = fields.Dict(allow_none=True)
    data_asset_type = fields.Str(allow_none=True)
    meta = fields.Dict()

    # NOTE: 20191107 - JPC - we may want to remove clean_empty and update tests to require the other fields;
    # doing so could also allow us not to have to make a copy of data in the pre_dump method.
    def clean_empty(self, data):
        if not hasattr(data, "evaluation_parameters"):
            pass
        elif len(data.evaluation_parameters) == 0:
            del data.evaluation_parameters

        if not hasattr(data, "meta"):
            pass
        elif data.meta is None or data.meta == []:
            pass
        elif len(data.meta) == 0:
            del data.meta
        return data

    # noinspection PyUnusedLocal
    @pre_dump
    def prepare_dump(self, data, **kwargs):
        data = deepcopy(data)
        data.meta = convert_to_json_serializable(data.meta)
        data = self.clean_empty(data)
        return data

    # noinspection PyUnusedLocal
    @post_load
    def make_expectation_suite(self, data, **kwargs):
        return ExpectationSuite(**data)
示例#4
0
class DatasourceConfigSchema(Schema):
    class Meta:
        unknown = INCLUDE

    class_name = fields.String(required=True)
    module_name = fields.String(missing="great_expectations.datasource")
    data_asset_type = fields.Nested(ClassConfigSchema)
    # TODO: Update to generator-specific
    # batch_kwargs_generators = fields.Mapping(keys=fields.Str(), values=fields.Nested(fields.GeneratorSchema))
    batch_kwargs_generators = fields.Dict(
        keys=fields.Str(), values=fields.Dict(), allow_none=True
    )
    credentials = fields.Raw(allow_none=True)
    spark_context = fields.Raw(allow_none=True)

    @validates_schema
    def validate_schema(self, data, **kwargs):
        if "generators" in data:
            raise ge_exceptions.InvalidConfigError(
                "Your current configuration uses the 'generators' key in a datasource, but in version 0.10 of "
                "GE, that key is renamed to 'batch_kwargs_generators'. Please update your config to continue."
            )

    # noinspection PyUnusedLocal
    @post_load
    def make_datasource_config(self, data, **kwargs):
        return DatasourceConfig(**data)
示例#5
0
class DatasourceConfigSchema(Schema):
    class Meta:
        unknown = INCLUDE

    class_name = fields.String(missing="Datasource")
    module_name = fields.String(missing="great_expectations.datasource")

    execution_engine = fields.Nested(
        ExecutionEngineConfigSchema, required=False, allow_none=True
    )
    data_connectors = fields.Dict(
        keys=fields.Str(),
        values=fields.Nested(DataConnectorConfigSchema),
        required=False,
        allow_none=True,
    )

    data_asset_type = fields.Nested(ClassConfigSchema, required=False, allow_none=True)

    # TODO: Update to generator-specific
    # batch_kwargs_generators = fields.Mapping(keys=fields.Str(), values=fields.Nested(fields.GeneratorSchema))
    batch_kwargs_generators = fields.Dict(
        keys=fields.Str(), values=fields.Dict(), required=False, allow_none=True
    )
    connection_string = fields.String(required=False, allow_none=True)
    credentials = fields.Raw(required=False, allow_none=True)
    introspection = fields.Dict(required=False, allow_none=True)
    tables = fields.Dict(required=False, allow_none=True)

    @validates_schema
    def validate_schema(self, data, **kwargs):
        if "generators" in data:
            raise ge_exceptions.InvalidConfigError(
                'Your current configuration uses the "generators" key in a datasource, but in version 0.10 of '
                'GE, that key is renamed to "batch_kwargs_generators". Please update your configuration to continue.'
            )
        # If a class_name begins with the dollar sign ("$"), then it is assumed to be a variable name to be substituted.
        if data["class_name"][0] == "$":
            return
        if (
            "connection_string" in data
            or "credentials" in data
            or "introspection" in data
            or "tables" in data
        ) and not (
            data["class_name"]
            in ["SqlAlchemyDatasource", "SimpleSqlalchemyDatasource",]
        ):
            raise ge_exceptions.InvalidConfigError(
                f"""Your current configuration uses one or more keys in a data source, that are required only by a
sqlalchemy data source (your data source is "{data['class_name']}").  Please update your configuration to continue.
                """
            )

    # noinspection PyUnusedLocal
    @post_load
    def make_datasource_config(self, data, **kwargs):
        return DatasourceConfig(**data)
示例#6
0
class RuleBasedProfilerConfigSchema(Schema):
    """
    Schema classes for configurations which extend from BaseYamlConfig must extend top-level Marshmallow Schema class.
    Schema classes for their constituent configurations which extend DictDot leve must extend NotNullSchema class.
    """
    class Meta:
        unknown = INCLUDE

    name = fields.String(
        required=True,
        allow_none=False,
    )
    class_name = fields.String(
        required=False,
        all_none=True,
        allow_none=True,
        missing="RuleBasedProfiler",
    )
    module_name = fields.String(
        required=False,
        all_none=True,
        allow_none=True,
        missing="great_expectations.rule_based_profiler",
    )
    config_version = fields.Float(
        required=True,
        allow_none=False,
        validate=lambda x: x == 1.0,
        error_messages={
            "invalid":
            "config version is not supported; it must be 1.0 per the current version of Great Expectations"
        },
    )
    variables = fields.Dict(
        keys=fields.String(
            required=True,
            allow_none=False,
        ),
        required=False,
        allow_none=True,
    )
    rules = fields.Dict(
        keys=fields.String(
            required=True,
            allow_none=False,
        ),
        values=fields.Nested(
            RuleConfigSchema,
            required=True,
            allow_none=False,
        ),
        required=True,
        allow_none=False,
    )
示例#7
0
class ExpectationConfigurationSchema(Schema):
    expectation_type = fields.Str(
        required=True,
        error_messages={
            "required": "expectation_type missing in expectation configuration"
        },
    )
    kwargs = fields.Dict()
    meta = fields.Dict()

    # noinspection PyUnusedLocal
    @post_load
    def make_expectation_configuration(self, data, **kwargs):
        return ExpectationConfiguration(**data)
class ExpectationSuiteSchema(Schema):
    expectation_suite_name = fields.Str()
    ge_cloud_id = fields.UUID(required=False, allow_none=True)
    expectations = fields.List(fields.Nested(ExpectationConfigurationSchema))
    evaluation_parameters = fields.Dict(allow_none=True)
    data_asset_type = fields.Str(allow_none=True)
    meta = fields.Dict()

    # NOTE: 20191107 - JPC - we may want to remove clean_empty and update tests to require the other fields;
    # doing so could also allow us not to have to make a copy of data in the pre_dump method.
    # noinspection PyMethodMayBeStatic
    def clean_empty(self, data):
        if isinstance(data, ExpectationSuite):
            if not hasattr(data, "evaluation_parameters"):
                pass
            elif len(data.evaluation_parameters) == 0:
                del data.evaluation_parameters

            if not hasattr(data, "meta"):
                pass
            elif data.meta is None or data.meta == []:
                pass
            elif len(data.meta) == 0:
                del data.meta
        elif isinstance(data, dict):
            if not data.get("evaluation_parameters"):
                pass
            elif len(data.get("evaluation_parameters")) == 0:
                data.pop("evaluation_parameters")

            if not data.get("meta"):
                pass
            elif data.get("meta") is None or data.get("meta") == []:
                pass
            elif len(data.get("meta")) == 0:
                data.pop("meta")
        return data

    # noinspection PyUnusedLocal
    @pre_dump
    def prepare_dump(self, data, **kwargs):
        data = deepcopy(data)
        if isinstance(data, ExpectationSuite):
            data.meta = convert_to_json_serializable(data.meta)
        elif isinstance(data, dict):
            data["meta"] = convert_to_json_serializable(data.get("meta"))
        data = self.clean_empty(data)
        return data
示例#9
0
class DatasourceConfigSchema(Schema):
    class Meta:
        unknown = INCLUDE

    class_name = fields.String(missing="Datasource")
    module_name = fields.String(missing="great_expectations.datasource")
    execution_engine = fields.Nested(ExecutionEngineConfigSchema)

    data_connectors = fields.Dict(
        keys=fields.Str(),
        values=fields.Nested(DataConnectorConfigSchema),
        required=True,
        allow_none=False,
    )

    credentials = fields.Raw(allow_none=True)
    spark_context = fields.Raw(allow_none=True)

    @validates_schema
    def validate_schema(self, data, **kwargs):
        pass

    # noinspection PyUnusedLocal
    @post_load
    def make_datasource_config(self, data, **kwargs):
        return DatasourceConfig(**data)
示例#10
0
class ExpectationConfigurationBuilderConfigSchema(NotNullSchema):
    class Meta:
        unknown = INCLUDE

    __config_class__ = ExpectationConfigurationBuilderConfig

    class_name = fields.String(
        required=False,
        all_none=True,
    )
    module_name = fields.String(
        required=False,
        all_none=True,
        missing=
        "great_expectations.rule_based_profiler.expectation_configuration_builder",
    )
    expectation_type = fields.Str(
        required=True,
        error_messages={
            "required":
            "expectation_type missing in expectation configuration builder"
        },
    )
    meta = fields.Dict(
        keys=fields.String(
            required=True,
            allow_none=False,
        ),
        required=False,
        allow_none=True,
    )
示例#11
0
class RenderedAtomicValueSchema(Schema):
    class Meta:
        unknown = INCLUDE

    schema = fields.Dict(required=False, allow_none=True)
    header = fields.Dict(required=False, allow_none=True)

    # for StringValueType
    template = fields.String(required=False, allow_none=True)
    params = fields.Dict(required=False, allow_none=True)

    # for TableType
    header_row = fields.List(fields.Dict, required=False, allow_none=True)
    table = fields.List(
        fields.List(fields.Dict, required=False, allow_none=True))

    # for GraphType
    graph = fields.Dict(required=False, allow_none=True)

    # for UnknownType
    kwargs = fields.Dict(required=False, allow_none=True)

    @post_load
    def create_value_obj(self, data, **kwargs):
        return RenderedAtomicValue(**data)

    REMOVE_KEYS_IF_NONE = [
        "template",
        "table",
        "params",
        "header_row",
        "table",
        "graph",
        "kwargs",
    ]

    @post_dump
    def clean_null_attrs(self, data: dict, **kwargs: dict) -> dict:
        """Removes the attributes in RenderedAtomicValueSchema.REMOVE_KEYS_IF_NONE during serialization if
        their values are None."""
        data = deepcopy(data)
        for key in RenderedAtomicValueSchema.REMOVE_KEYS_IF_NONE:
            if key == "graph" and key in data and data[key].graph is None:
                data.pop(key)
            elif key in data and data[key] is None:
                data.pop(key)
        return data
示例#12
0
class ValidationOperatorResultSchema(Schema):
    run_id = fields.Nested(RunIdentifierSchema)
    run_results = fields.Dict()
    evaluation_parameters = fields.Dict(allow_none=True)
    validation_operator_config = fields.Dict()
    success = fields.Bool()

    # noinspection PyUnusedLocal
    @pre_dump
    def prepare_dump(self, data, **kwargs):
        data = deepcopy(data)
        data._run_results = convert_to_json_serializable(data.run_results)
        return data

    # noinspection PyUnusedLocal
    @post_load
    def make_expectation_suite_validation_result(self, data, **kwargs):
        return ValidationOperatorResult(**data)
示例#13
0
class ExpectationSuiteValidationResultSchema(Schema):
    success = fields.Bool()
    results = fields.List(fields.Nested(ExpectationValidationResultSchema))
    evaluation_parameters = fields.Dict()
    statistics = fields.Dict()
    meta = fields.Dict(allow_none=True)

    # noinspection PyUnusedLocal
    @pre_dump
    def prepare_dump(self, data, **kwargs):
        data = deepcopy(data)
        data.meta = convert_to_json_serializable(data.meta)
        return data

    # noinspection PyUnusedLocal
    @post_load
    def make_expectation_suite_validation_result(self, data, **kwargs):
        return ExpectationSuiteValidationResult(**data)
示例#14
0
class NotebookTemplateConfigSchema(Schema):
    file_name = fields.String()
    template_kwargs = fields.Dict(keys=fields.Str(),
                                  values=fields.Str(),
                                  allow_none=True)

    # noinspection PyUnusedLocal
    @post_load
    def make_notebook_template_config(self, data, **kwargs):
        return NotebookTemplateConfig(**data)
示例#15
0
class CheckpointResultSchema(Schema):
    # JC: I think this needs to be changed to be an instance of a new type called CheckpointResult,
    # which would include the top-level keys run_id, config, name, and a list of results.
    run_id = fields.Nested(RunIdentifierSchema)
    run_results = fields.Dict(required=False, allow_none=True)
    checkpoint_config = fields.Dict(required=False, allow_none=True)
    success = fields.Boolean(required=False, allow_none=True)

    # noinspection PyUnusedLocal
    @pre_dump
    def prepare_dump(self, data, **kwargs):
        data = copy.deepcopy(data)
        data._run_results = convert_to_json_serializable(data.run_results)
        return data

    # noinspection PyUnusedLocal
    @post_load
    def make_checkpoint_result(self, data, **kwargs):
        return CheckpointResult(**data)
示例#16
0
class ExecutionEngineConfigSchema(Schema):
    class Meta:
        unknown = INCLUDE

    class_name = fields.String(required=True)
    module_name = fields.String(missing="great_expectations.execution_engine")
    connection_string = fields.String(required=False, allow_none=True)
    spark_config = fields.Raw(required=False, allow_none=True)
    boto3_options = fields.Dict(
        keys=fields.Str(), values=fields.Str(), required=False, allow_none=True
    )
    caching = fields.Boolean(required=False, allow_none=True)
    batch_spec_defaults = fields.Dict(required=False, allow_none=True)

    @validates_schema
    def validate_schema(self, data, **kwargs):
        # If a class_name begins with the dollar sign ("$"), then it is assumed to be a variable name to be substituted.
        if data["class_name"][0] == "$":
            return
        if "connection_string" in data and not (
            data["class_name"] == "SqlAlchemyExecutionEngine"
        ):
            raise ge_exceptions.InvalidConfigError(
                f"""Your current configuration uses the "connection_string" key in an execution engine, but only 
SqlAlchemyExecutionEngine requires this attribute (your execution engine is "{data['class_name']}").  Please update your
configuration to continue.
                """
            )
        if "spark_config" in data and not (
            data["class_name"] == "SparkDFExecutionEngine"
        ):
            raise ge_exceptions.InvalidConfigError(
                f"""Your current configuration uses the "spark_config" key in an execution engine, but only 
SparkDFExecutionEngine requires this attribute (your execution engine is "{data['class_name']}").  Please update your
configuration to continue.
                """
            )

    # noinspection PyUnusedLocal
    @post_load
    def make_execution_engine_config(self, data, **kwargs):
        return ExecutionEngineConfig(**data)
示例#17
0
class RenderedAtomicValueSchema(Schema):
    class Meta:
        unknown = INCLUDE

    # for StringType
    template = fields.String(required=False, allow_none=True)
    params = fields.Dict(required=False, allow_none=True)
    schema = fields.Dict(required=False, allow_none=True)

    # for TableType
    header = fields.Dict(required=False, allow_none=True)
    header_row = fields.List(fields.Dict, required=False, allow_none=True)
    table = fields.List(fields.List(fields.Dict, required=False, allow_none=True))

    # for GraphType
    graph = fields.String(required=False, allow_none=True)

    @post_load
    def create_value_obj(self, data, **kwargs):
        return RenderedAtomicValue(**data)
示例#18
0
class ExpectationValidationResultSchema(Schema):
    success = fields.Bool(required=False, allow_none=True)
    expectation_config = fields.Nested(lambda: ExpectationConfigurationSchema,
                                       required=False,
                                       allow_none=True)
    result = fields.Dict(required=False, allow_none=True)
    meta = fields.Dict(required=False, allow_none=True)
    exception_info = fields.Dict(required=False, allow_none=True)
    rendered_content = fields.List(
        fields.Nested(lambda: RenderedAtomicContentSchema,
                      required=False,
                      allow_none=True))

    # noinspection PyUnusedLocal
    @pre_dump
    def convert_result_to_serializable(self, data, **kwargs):
        data = deepcopy(data)
        if isinstance(data, ExpectationValidationResult):
            data.result = convert_to_json_serializable(data.result)
        elif isinstance(data, dict):
            data["result"] = convert_to_json_serializable(data.get("result"))
        return data

    REMOVE_KEYS_IF_NONE = ["rendered_content"]

    @post_dump
    def clean_null_attrs(self, data: dict, **kwargs: dict) -> dict:
        """Removes the attributes in ExpectationValidationResultSchema.REMOVE_KEYS_IF_NONE during serialization if
        their values are None."""
        data = deepcopy(data)
        for key in ExpectationConfigurationSchema.REMOVE_KEYS_IF_NONE:
            if key in data and data[key] is None:
                data.pop(key)
        return data

    # noinspection PyUnusedLocal
    @post_load
    def make_expectation_validation_result(self, data, **kwargs):
        return ExpectationValidationResult(**data)
示例#19
0
class ExecutionEngineConfigSchema(Schema):
    class Meta:
        unknown = INCLUDE

    class_name = fields.String(required=True)
    module_name = fields.String(missing="great_expectations.execution_engine")
    caching = fields.Boolean()
    batch_spec_defaults = fields.Dict(allow_none=True)

    @validates_schema
    def validate_schema(self, data, **kwargs):
        pass

    # noinspection PyUnusedLocal
    @post_load
    def make_execution_engine_config(self, data, **kwargs):
        return ExecutionEngineConfig(**data)
示例#20
0
class ExpectationConfigurationBuilderConfigSchema(NotNullSchema):
    class Meta:
        unknown = INCLUDE

    __config_class__ = ExpectationConfigurationBuilderConfig

    module_name = fields.String(
        required=False,
        allow_none=True,
        missing=
        "great_expectations.rule_based_profiler.expectation_configuration_builder",
    )
    class_name = fields.String(
        required=True,
        allow_none=False,
    )
    expectation_type = fields.Str(
        required=True,
        error_messages={
            "required":
            "expectation_type missing in expectation configuration builder"
        },
    )
    meta = fields.Dict(
        keys=fields.String(
            required=True,
            allow_none=False,
        ),
        required=False,
        allow_none=True,
    )
    validation_parameter_builder_configs = fields.List(
        cls_or_instance=fields.Nested(
            lambda: ParameterBuilderConfigSchema(),
            required=True,
            allow_none=False,
        ),
        required=False,
        allow_none=True,
    )
示例#21
0
class DataConnectorConfigSchema(Schema):
    class Meta:
        unknown = INCLUDE

    class_name = fields.String(required=True)
    module_name = fields.String(missing="great_expectations.datasource.data_connector")

    assets = fields.Dict(
        keys=fields.Str(),
        values=fields.Nested(AssetConfigSchema),
        required=False,
        allow_none=True,
    )

    @validates_schema
    def validate_schema(self, data, **kwargs):
        pass

    # noinspection PyUnusedLocal
    @post_load
    def make_data_connector_config(self, data, **kwargs):
        return DataConnectorConfig(**data)
示例#22
0
class RuleConfigSchema(NotNullSchema):
    class Meta:
        unknown = INCLUDE

    __config_class__ = RuleConfig

    variables = fields.Dict(
        keys=fields.String(
            required=True,
            allow_none=False,
        ),
        required=False,
        allow_none=True,
    )
    domain_builder = fields.Nested(
        DomainBuilderConfigSchema,
        required=False,
        allow_none=True,
    )
    parameter_builders = fields.List(
        cls_or_instance=fields.Nested(
            ParameterBuilderConfigSchema,
            required=True,
            allow_none=False,
        ),
        required=False,
        allow_none=True,
    )
    expectation_configuration_builders = fields.List(
        cls_or_instance=fields.Nested(
            ExpectationConfigurationBuilderConfigSchema,
            required=True,
            allow_none=False,
        ),
        required=False,
        allow_none=True,
    )
示例#23
0
class DataContextConfigSchema(Schema):
    config_version = fields.Number(
        validate=lambda x: 0 < x < 100,
        error_messages={"invalid": "config version must "
                        "be a number."},
    )
    datasources = fields.Dict(
        keys=fields.Str(),
        values=fields.Nested(DatasourceConfigSchema),
        required=False,
        allow_none=True,
    )
    expectations_store_name = fields.Str()
    validations_store_name = fields.Str()
    evaluation_parameter_store_name = fields.Str()
    plugins_directory = fields.Str(allow_none=True)
    validation_operators = fields.Dict(keys=fields.Str(), values=fields.Dict())
    stores = fields.Dict(keys=fields.Str(), values=fields.Dict())
    notebooks = fields.Nested(NotebooksConfigSchema, allow_none=True)
    data_docs_sites = fields.Dict(keys=fields.Str(),
                                  values=fields.Dict(),
                                  allow_none=True)
    config_variables_file_path = fields.Str(allow_none=True)
    anonymous_usage_statistics = fields.Nested(
        AnonymizedUsageStatisticsConfigSchema)

    # noinspection PyMethodMayBeStatic
    # noinspection PyUnusedLocal
    def handle_error(self, exc, data, **kwargs):
        """Log and raise our custom exception when (de)serialization fails."""
        logger.error(exc.messages)
        raise ge_exceptions.InvalidDataContextConfigError(
            "Error while processing DataContextConfig.", exc)

    @validates_schema
    def validate_schema(self, data, **kwargs):
        if "config_version" not in data:
            raise ge_exceptions.InvalidDataContextConfigError(
                "The key `config_version` is missing; please check your config file.",
                validation_error=ValidationError("no config_version key"),
            )

        if not isinstance(data["config_version"], (int, float)):
            raise ge_exceptions.InvalidDataContextConfigError(
                "The key `config_version` must be a number. Please check your config file.",
                validation_error=ValidationError(
                    "config version not a number"),
            )

        # When migrating from 0.7.x to 0.8.0
        if data["config_version"] == 0 and ("validations_store" in list(
                data.keys()) or "validations_stores" in list(data.keys())):
            raise ge_exceptions.UnsupportedConfigVersionError(
                "You appear to be using a config version from the 0.7.x series. This version is no longer supported."
            )
        elif data["config_version"] < MINIMUM_SUPPORTED_CONFIG_VERSION:
            raise ge_exceptions.UnsupportedConfigVersionError(
                "You appear to have an invalid config version ({}).\n    The version number must be at least {}. "
                "Please see the migration guide at https://docs.greatexpectations.io/en/latest/guides/how_to_guides/migrating_versions.html"
                .format(data["config_version"],
                        MINIMUM_SUPPORTED_CONFIG_VERSION), )
        elif data["config_version"] > CURRENT_CONFIG_VERSION:
            raise ge_exceptions.InvalidDataContextConfigError(
                "You appear to have an invalid config version ({}).\n    The maximum valid version is {}."
                .format(data["config_version"], CURRENT_CONFIG_VERSION),
                validation_error=ValidationError("config version too high"),
            )
示例#24
0
class DataConnectorConfigSchema(Schema):
    class Meta:
        unknown = INCLUDE

    class_name = fields.String(required=True)
    module_name = fields.String(
        missing="great_expectations.datasource.data_connector")

    assets = fields.Dict(
        keys=fields.Str(),
        values=fields.Nested(AssetConfigSchema,
                             required=False,
                             allow_none=True),
        required=False,
        allow_none=True,
    )

    base_directory = fields.String(required=False, allow_none=True)
    glob_directive = fields.String(required=False, allow_none=True)
    default_regex = fields.Dict(required=False, allow_none=True)
    runtime_keys = fields.List(cls_or_instance=fields.Str(),
                               required=False,
                               allow_none=True)
    bucket = fields.String(required=False, allow_none=True)
    prefix = fields.String(required=False, allow_none=True)
    delimiter = fields.String(required=False, allow_none=True)
    max_keys = fields.Integer(required=False, allow_none=True)
    boto3_options = fields.Dict(keys=fields.Str(),
                                values=fields.Str(),
                                required=False,
                                allow_none=True)
    data_asset_name_prefix = fields.String(required=False, allow_none=True)
    data_asset_name_suffix = fields.String(required=False, allow_none=True)
    include_schema_name = fields.Boolean(required=False, allow_none=True)
    splitter_method = fields.String(required=False, allow_none=True)
    splitter_kwargs = fields.Dict(required=False, allow_none=True)
    sampling_method = fields.String(required=False, allow_none=True)
    sampling_kwargs = fields.Dict(required=False, allow_none=True)
    excluded_tables = fields.List(cls_or_instance=fields.Str(),
                                  required=False,
                                  allow_none=True)
    included_tables = fields.List(cls_or_instance=fields.Str(),
                                  required=False,
                                  allow_none=True)
    skip_inapplicable_tables = fields.Boolean(required=False, allow_none=True)

    @validates_schema
    def validate_schema(self, data, **kwargs):
        # If a class_name begins with the dollar sign ("$"), then it is assumed to be a variable name to be substituted.
        if data["class_name"][0] == "$":
            return
        if ("default_regex" in data) and not (data["class_name"] in [
                "InferredAssetFilesystemDataConnector",
                "ConfiguredAssetFilesystemDataConnector",
                "InferredAssetS3DataConnector",
                "ConfiguredAssetS3DataConnector",
        ]):
            raise ge_exceptions.InvalidConfigError(
                f"""Your current configuration uses one or more keys in a data connector, that are required only by a
subclass of the FilePathDataConnector class (your data conntector is "{data['class_name']}").  Please update your
configuration to continue.
                """)
        if ("glob_directive" in data) and not (data["class_name"] in [
                "InferredAssetFilesystemDataConnector",
                "ConfiguredAssetFilesystemDataConnector",
        ]):
            raise ge_exceptions.InvalidConfigError(
                f"""Your current configuration uses one or more keys in a data connector, that are required only by a
filesystem type of the data connector (your data conntector is "{data['class_name']}").  Please update your
configuration to continue.
                """)
        if ("bucket" in data or "prefix" in data or "delimiter" in data
                or "max_keys" in data) and not (data["class_name"] in [
                    "InferredAssetS3DataConnector",
                    "ConfiguredAssetS3DataConnector",
                ]):
            raise ge_exceptions.InvalidConfigError(
                f"""Your current configuration uses one or more keys in a data connector, that are required only by an
S3 type of the data connector (your data conntector is "{data['class_name']}").  Please update your configuration to
continue.
                """)
        if ("data_asset_name_prefix" in data
                or "data_asset_name_suffix" in data
                or "include_schema_name" in data or "splitter_method" in data
                or "splitter_kwargs" in data or "sampling_method" in data
                or "sampling_kwargs" in data or "excluded_tables" in data
                or "included_tables" in data or "skip_inapplicable_tables"
                in data) and not (data["class_name"] in [
                    "InferredAssetSqlDataConnector",
                    "ConfiguredAssetSqlDataConnector",
                ]):
            raise ge_exceptions.InvalidConfigError(
                f"""Your current configuration uses one or more keys in a data connector, that are required only by an
SQL type of the data connector (your data conntector is "{data['class_name']}").  Please update your configuration to
continue.
                """)

    # noinspection PyUnusedLocal
    @post_load
    def make_data_connector_config(self, data, **kwargs):
        return DataConnectorConfig(**data)