def __init__(self, expectation_type, kwargs, meta=None, success_on_last_run=None):
     if not isinstance(expectation_type, string_types):
         raise InvalidExpectationConfigurationError("expectation_type must be a string")
     self._expectation_type = expectation_type
     if not isinstance(kwargs, dict):
         raise InvalidExpectationConfigurationError("expectation configuration kwargs must be an "
                                                    "ExpectationKwargs object.")
     self._kwargs = ExpectationKwargs(kwargs)
     if meta is None:
         meta = {}
     # We require meta information to be serializable, but do not convert until necessary
     ensure_json_serializable(meta)
     self.meta = meta
     self.success_on_last_run = success_on_last_run
示例#2
0
    def validate_configuration(
            self, configuration: Optional[ExpectationConfiguration]):
        super().validate_configuration(configuration)

        try:
            assert (
                "quantile_ranges"
                in configuration.kwargs), "quantile ranges must be provided"
            assert isinstance(configuration.kwargs["quantile_ranges"],
                              dict), "quantile_ranges should be a dictionary"

        except AssertionError as e:
            raise InvalidExpectationConfigurationError(str(e))

        # Ensuring actual quantiles and their value ranges match up
        quantile_ranges = configuration.kwargs["quantile_ranges"]
        quantiles = quantile_ranges["quantiles"]
        quantile_value_ranges = quantile_ranges["value_ranges"]
        if "allow_relative_error" in configuration.kwargs:
            allow_relative_error = configuration.kwargs["allow_relative_error"]
        else:
            allow_relative_error = False

        if allow_relative_error is not False:
            raise ValueError(
                "PandasExecutionEngine does not support relative error in column quantiles."
            )

        if len(quantiles) != len(quantile_value_ranges):
            raise ValueError(
                "quntile_values and quantiles must have the same number of elements"
            )
        return True
    def validate_configuration(
            self, configuration: Optional[ExpectationConfiguration]) -> None:
        super().validate_configuration(configuration)

        if configuration is None:
            configuration = self.configuration

        try:
            assert (configuration.kwargs.get("min_value") is not None
                    or configuration.kwargs.get("max_value")
                    is not None), "min_value and max_value cannot both be None"
            if configuration.kwargs.get("min_value"):
                assert (isinstance(
                    configuration.kwargs["min_value"], dict) or float(
                        configuration.kwargs.get("min_value")).is_integer()
                        ), "min_value and max_value must be integers"
                if isinstance(configuration.kwargs.get("min_value"), dict):
                    assert "$PARAMETER" in configuration.kwargs.get(
                        "min_value"
                    ), 'Evaluation Parameter dict for min_value kwarg must have "$PARAMETER" key.'

            if configuration.kwargs.get("max_value"):
                assert (isinstance(
                    configuration.kwargs["max_value"], dict) or float(
                        configuration.kwargs.get("max_value")).is_integer()
                        ), "min_value and max_value must be integers"
                if isinstance(configuration.kwargs.get("max_value"), dict):
                    assert "$PARAMETER" in configuration.kwargs.get(
                        "max_value"
                    ), 'Evaluation Parameter dict for max_value kwarg must have "$PARAMETER" key.'
        except AssertionError as e:
            raise InvalidExpectationConfigurationError(str(e))
示例#4
0
    def validate_configuration(
            self, configuration: Optional[ExpectationConfiguration]) -> None:
        """
        Validates that a configuration has been set, and sets a configuration if it has yet to be set. Ensures that
        necessary configuration arguments have been provided for the validation of the expectation.

        Args:
            configuration (OPTIONAL[ExpectationConfiguration]): \
                An optional Expectation Configuration entry that will be used to configure the expectation
        Returns:
            None. Raises InvalidExpectationConfigurationError if the config is not validated successfully
        """

        super().validate_configuration(configuration)
        if configuration is None:
            configuration = self.configuration

        strict = configuration.kwargs.get("strict")

        # Check other things in configuration.kwargs and raise Exceptions if needed
        try:
            assert (isinstance(strict, bool)
                    or strict is None), "strict must be a boolean value"
        except AssertionError as e:
            raise InvalidExpectationConfigurationError(str(e))
示例#5
0
 def validate_configuration(self, configuration: Optional[ExpectationConfiguration]):
     super().validate_configuration(configuration)
     try:
         assert "type_" in configuration.kwargs, "type_ is required"
     except AssertionError as e:
         raise InvalidExpectationConfigurationError(str(e))
     return True
    def validate_configuration(
            self, configuration: Optional[ExpectationConfiguration]) -> None:
        super().validate_configuration(configuration)
        try:
            assert (
                "quantile_ranges"
                in configuration.kwargs), "quantile_ranges must be provided"
            assert isinstance(configuration.kwargs["quantile_ranges"],
                              dict), "quantile_ranges should be a dictionary"

            assert all([
                True if None in x or x == sorted(x) else False for x in
                configuration.kwargs["quantile_ranges"]["value_ranges"]
            ]), "quantile_ranges must consist of ordered pairs"

        except AssertionError as e:
            raise InvalidExpectationConfigurationError(str(e))

        # Ensuring actual quantiles and their value ranges match up
        quantile_ranges = configuration.kwargs["quantile_ranges"]
        quantiles = quantile_ranges["quantiles"]
        quantile_value_ranges = quantile_ranges["value_ranges"]

        if len(quantiles) != len(quantile_value_ranges):
            raise ValueError(
                "quantile_values and quantiles must have the same number of elements"
            )
示例#7
0
    def validate_configuration(
            self, configuration: Optional[ExpectationConfiguration]):
        """
        Validates that a configuration has been set, and sets a configuration if it has yet to be set. Ensures that
        necessary configuration arguments have been provided for the validation of the expectation.

        Args:
            configuration (OPTIONAL[ExpectationConfiguration]): \
                An optional Expectation Configuration entry that will be used to configure the expectation
        Returns:
            True if the configuration has been validated successfully. Otherwise, raises an exception
        """

        # Setting up a configuration
        super().validate_configuration(configuration)

        # Ensuring that a proper value has been provided
        try:
            assert "column_set" in configuration.kwargs, "column_set is required"
            assert (isinstance(configuration.kwargs["column_set"],
                               (list, set, dict))
                    or configuration.kwargs["column_set"] is None
                    ), "column_set must be a list, set, or None"
            if isinstance(configuration.kwargs["column_set"], dict):
                assert (
                    "$PARAMETER" in configuration.kwargs["column_set"]
                ), 'Evaluation Parameter dict for column_set kwarg must have "$PARAMETER" key.'
        except AssertionError as e:
            raise InvalidExpectationConfigurationError(str(e))
        return True
示例#8
0
    def validate_configuration(self, configuration: Optional[ExpectationConfiguration]):
        """
        Validates that a configuration has been set, and sets a configuration if it has yet to be set. Ensures that
        necessary configuration arguments have been provided for the validation of the expectation.

        Args:
            configuration (OPTIONAL[ExpectationConfiguration]): \
                An optional Expectation Configuration entry that will be used to configure the expectation
        Returns:
            True if the configuration has been validated successfully. Otherwise, raises an exception
        """

        try:
            assert (
                "other_table_name" in configuration.kwargs
            ), "other_table_name is required"
            assert isinstance(
                configuration.kwargs["other_table_name"], str
            ), "other_table_name must be a string"

            if "ignore_columns" in configuration.kwargs:
                pattern = re.compile(r"^(\w+)(,\s*\w+)*$")
                assert (
                    True
                    if (pattern.match(configuration.kwargs["ignore_columns"]))
                    else False
                ), "ignore_columns input is not valid. Please provide comma seperated columns list"
        except AssertionError as e:
            raise InvalidExpectationConfigurationError(str(e))
        super().validate_configuration(configuration)
        return True
示例#9
0
    def find_expectation_indexes(
        self,
        expectation_configuration: ExpectationConfiguration,
        match_type: str = "domain",
    ) -> List[int]:
        """

        Args:
            expectation_configuration: A potentially incomplete (partial) Expectation Configuration to match against to
                find the index of any matching Expectation Configurations on the suite.
            match_type: This determines what kwargs to use when matching. Options are 'domain' to match based
                on the data evaluated by that expectation, 'success' to match based on all configuration parameters
                 that influence whether an expectation succeeds based on a given batch of data, and 'runtime' to match
                 based on all configuration parameters

        Returns: A list of indexes of matching ExpectationConfiguration

        Raises:
            InvalidExpectationConfigurationError

        """
        if not isinstance(expectation_configuration, ExpectationConfiguration):
            raise InvalidExpectationConfigurationError(
                "Ensure that expectation configuration is valid."
            )
        match_indexes = []
        for idx, expectation in enumerate(self.expectations):
            if expectation.isEquivalentTo(expectation_configuration, match_type):
                match_indexes.append(idx)

        return match_indexes
示例#10
0
 def __init__(self, expectation_type, kwargs, meta=None, success_on_last_run=None):
     if not isinstance(expectation_type, str):
         raise InvalidExpectationConfigurationError(
             "expectation_type must be a string"
         )
     self._expectation_type = expectation_type
     if not isinstance(kwargs, dict):
         raise InvalidExpectationConfigurationError(
             "expectation configuration kwargs must be a dict."
         )
     self._kwargs = kwargs
     self._raw_kwargs = None  # the kwargs before evaluation parameters are evaluated
     if meta is None:
         meta = {}
     # We require meta information to be serializable, but do not convert until necessary
     ensure_json_serializable(meta)
     self.meta = meta
     self.success_on_last_run = success_on_last_run
示例#11
0
 def validate_configuration(self, configuration: Optional[ExpectationConfiguration]):
     # Ensuring basic configuration parameters are properly set
     try:
         assert (
             "column" in configuration.kwargs
         ), "'column' parameter is required for column expectations"
     except AssertionError as e:
         raise InvalidExpectationConfigurationError(str(e))
     return True
示例#12
0
 def validate_configuration(self, configuration: Optional[ExpectationConfiguration]):
     if configuration is None:
         configuration = self.configuration
     try:
         assert configuration.expectation_type == self.expectation_type, (
             "expectation configuration type does not match " "expectation type"
         )
     except AssertionError as e:
         raise InvalidExpectationConfigurationError(str(e))
     return True
示例#13
0
    def validate_metric_value_between_configuration(
        self, configuration: Optional[ExpectationConfiguration]
    ):
        # Validating that Minimum and Maximum values are of the proper format and type
        min_val = None
        max_val = None

        if "min_value" in configuration.kwargs:
            min_val = configuration.kwargs["min_value"]

        if "max_value" in configuration.kwargs:
            max_val = configuration.kwargs["max_value"]

        try:
            # Ensuring Proper interval has been provided
            assert min_val is None or isinstance(
                min_val, (float, int, dict)
            ), "Provided min threshold must be a number"
            if isinstance(min_val, dict):
                assert (
                    "$PARAMETER" in min_val
                ), 'Evaluation Parameter dict for min_value kwarg must have "$PARAMETER" key'

            assert max_val is None or isinstance(
                max_val, (float, int, dict)
            ), "Provided max threshold must be a number"
            if isinstance(max_val, dict):
                assert "$PARAMETER" in max_val, (
                    "Evaluation Parameter dict for max_value "
                    "kwarg "
                    'must have "$PARAMETER" key'
                )

        except AssertionError as e:
            raise InvalidExpectationConfigurationError(str(e))

        if min_val is not None and max_val is not None and min_val > max_val:
            raise InvalidExpectationConfigurationError(
                "Minimum Threshold cannot be larger than Maximum Threshold"
            )

        return True
示例#14
0
 def validate_configuration(
         self, configuration: Optional[ExpectationConfiguration]):
     super().validate_configuration(configuration)
     try:
         assert "type_list" in configuration.kwargs, "type_list is required"
         assert (isinstance(configuration.kwargs["type_list"], list)
                 or configuration.kwargs["type_list"] is None
                 ), "type_list must be a list or None"
     except AssertionError as e:
         raise InvalidExpectationConfigurationError(str(e))
     return True
示例#15
0
def ensure_row_condition_is_correct(row_condition_string) -> None:
    """Ensure no quote nor \\\\n are introduced in row_condition string.

    Otherwise it may cause an issue at the reload of the expectation.
    An error is raised at the declaration of the expectations to ensure
    the user is not doing a mistake. He can use double quotes for example.

    Parameters
    ----------
    row_condition_string : str
        the pandas query string
    """
    if "'" in row_condition_string:
        raise InvalidExpectationConfigurationError(
            f"{row_condition_string} cannot be serialized to json. "
            "Do not introduce simple quotes in configuration."
            "Use double quotes instead.")
    if "\n" in row_condition_string:
        raise InvalidExpectationConfigurationError(
            f"{repr(row_condition_string)} cannot be serialized to json. Do not introduce \\n in configuration."
        )
示例#16
0
 def validate_configuration(
         self, configuration: Optional[ExpectationConfiguration]) -> None:
     super().validate_configuration(configuration)
     try:
         assert "type_list" in configuration.kwargs, "type_list is required"
         assert (isinstance(configuration.kwargs["type_list"], (list, dict))
                 or configuration.kwargs["type_list"] is None
                 ), "type_list must be a list or None"
         if isinstance(configuration.kwargs["type_list"], dict):
             assert (
                 "$PARAMETER" in configuration.kwargs["type_list"]
             ), 'Evaluation Parameter dict for type_list kwarg must have "$PARAMETER" key.'
     except AssertionError as e:
         raise InvalidExpectationConfigurationError(str(e))
示例#17
0
    def validate_configuration(self, configuration: Optional[ExpectationConfiguration]):
        super().validate_configuration(configuration)

        # Ensuring necessary parameters are present and of the proper type
        min_val = None
        max_val = None

        # Testing that proper thresholds are in place
        if "min_value" in configuration.kwargs:
            min_val = configuration.kwargs["min_value"]

        if "max_value" in configuration.kwargs:
            max_val = configuration.kwargs["max_value"]

        try:
            assert (
                "column" in configuration.kwargs
            ), "'column' parameter is required for metric"
            assert (
                min_val is not None or max_val is not None
            ), "min_value and max_value cannot both be none"
            assert (
                "quantile_ranges" in configuration.kwargs
            ), "quantile ranges must be provided"
            assert (
                type(configuration.kwargs["quantile_ranges"]) == dict
            ), "quantile_ranges should be a dictionary"

        except AssertionError as e:
            raise InvalidExpectationConfigurationError(str(e))

        # Ensuring actual quantiles and their value ranges match up
        quantile_ranges = configuration.kwargs["quantile_ranges"]
        quantiles = quantile_ranges["quantiles"]
        quantile_value_ranges = quantile_ranges["value_ranges"]
        if "allow_relative_error" in configuration.kwargs:
            allow_relative_error = configuration.kwargs["allow_relative_error"]
        else:
            allow_relative_error = False

        if allow_relative_error is not False:
            raise ValueError(
                "PandasExecutionEngine does not support relative error in column quantiles."
            )

        if len(quantiles) != len(quantile_value_ranges):
            raise ValueError(
                "quntile_values and quantiles must have the same number of elements"
            )
        return True
示例#18
0
 def validate_configuration(self, configuration: Optional[ExpectationConfiguration]):
     if not super().validate_configuration(configuration):
         return False
     try:
         assert (
             "column" in configuration.kwargs
         ), "'column' parameter is required for column map expectations"
         if "mostly" in configuration.kwargs:
             mostly = configuration.kwargs["mostly"]
             assert isinstance(
                 mostly, (int, float)
             ), "'mostly' parameter must be an integer or float"
             assert 0 <= mostly <= 1, "'mostly' parameter must be between 0 and 1"
     except AssertionError as e:
         raise InvalidExpectationConfigurationError(str(e))
     return True
    def find_expectation_indexes(
        self,
        expectation_configuration: Optional[ExpectationConfiguration] = None,
        match_type: str = "domain",
        ge_cloud_id: str = None,
    ) -> List[int]:
        """
        Find indexes of Expectations matching the given ExpectationConfiguration on the given match_type.
        If a ge_cloud_id is provided, match_type is ignored and only indexes of Expectations
        with matching ge_cloud_id are returned.

        Args:
            expectation_configuration: A potentially incomplete (partial) Expectation Configuration to match against to
                find the index of any matching Expectation Configurations on the suite.
            match_type: This determines what kwargs to use when matching. Options are 'domain' to match based
                on the data evaluated by that expectation, 'success' to match based on all configuration parameters
                 that influence whether an expectation succeeds based on a given batch of data, and 'runtime' to match
                 based on all configuration parameters
            ge_cloud_id: Great Expectations Cloud id

        Returns: A list of indexes of matching ExpectationConfiguration

        Raises:
            InvalidExpectationConfigurationError

        """
        if expectation_configuration is None and ge_cloud_id is None:
            raise TypeError(
                "Must provide either expectation_configuration or ge_cloud_id")

        if expectation_configuration and not isinstance(
                expectation_configuration, ExpectationConfiguration):
            raise InvalidExpectationConfigurationError(
                "Ensure that expectation configuration is valid.")

        match_indexes = []
        for idx, expectation in enumerate(self.expectations):
            if ge_cloud_id is not None:
                if str(expectation.ge_cloud_id) == str(ge_cloud_id):
                    match_indexes.append(idx)
            else:
                if expectation.isEquivalentTo(other=expectation_configuration,
                                              match_type=match_type):
                    match_indexes.append(idx)

        return match_indexes
    def validate_configuration(
        self, configuration: Optional[ExpectationConfiguration]
    ) -> None:
        """
        Validates that a configuration has been set, and sets a configuration if it has yet to be set. Ensures that
        necessary configuration arguments have been provided for the validation of the expectation.

        Args:
            configuration (OPTIONAL[ExpectationConfiguration]): \
                An optional Expectation Configuration entry that will be used to configure the expectation
        Returns:
            None. Raises InvalidExpectationConfigurationError if the config is not validated successfully
        """

        super().validate_configuration(configuration)
        if configuration is None:
            configuration = self.configuration

        n_features = configuration.kwargs.get("n_features")
        columns = configuration.kwargs.get("important_columns")
        threshold = configuration.kwargs.get("threshold")
        y_column = configuration.kwargs.get("y_column")

        try:
            assert (
                columns is not None or threshold is not None
            ), "at least one of important_columns or threshold is required"
            assert (
                isinstance(n_features, int) or n_features is None
            ), "n_features must be an integer"
            if columns is not None:
                assert (
                    isinstance(columns, tuple) or isinstance(columns, list)
                ) and all(
                    isinstance(i, str) for i in columns
                ), "columns must be a tuple or list of string column names"
            assert (
                isinstance(threshold, float) and (0 <= threshold <= 1)
            ) or threshold is None, "threshold must be a float between 0 and 1"
            assert y_column is not None, "target y_column must be specified"
            assert isinstance(y_column, str), "y_column must be a string column name"
        except AssertionError as e:
            raise InvalidExpectationConfigurationError(str(e))
        super().validate_configuration(configuration)
示例#21
0
    def validate_configuration(
            self, configuration: Optional[ExpectationConfiguration]):
        """
        Validates that a configuration has been set, and sets a configuration if it has yet to be set. Ensures that
        necessary configuration arguments have been provided for the validation of the expectation.

        Args:
            configuration (OPTIONAL[ExpectationConfiguration]): \
                An optional Expectation Configuration entry that will be used to configure the expectation
        Returns:
            True if the configuration has been validated successfully. Otherwise, raises an exception
        """

        #     # Setting up a configuration
        try:
            assert "user_input" in configuration.kwargs, "user_input is required"
            assert isinstance(configuration.kwargs["user_input"],
                              str), "user_input must be a string"
        except AssertionError as e:
            raise InvalidExpectationConfigurationError(str(e))
        super().validate_configuration(configuration)
        return True
示例#22
0
def ensure_json_serializable(data):
    """
    Helper function to convert an object to one that is json serializable

    Args:
        data: an object to attempt to convert a corresponding json-serializable object

    Returns:
        (dict) A converted test_object

    Warning:
        test_obj may also be converted in place.

    """
    import numpy as np
    import pandas as pd
    import datetime
    import decimal

    # If it's one of our types, we use our own conversion; this can move to full schema
    # once nesting goes all the way down
    if isinstance(
            data,
        (
            ExpectationConfiguration,
            ExpectationSuite,
            ExpectationValidationResult,
            ExpectationSuiteValidationResult,
        ),
    ):
        return

    try:
        if not isinstance(data, list) and np.isnan(data):
            # np.isnan is functionally vectorized, but we only want to apply this to single objects
            # Hence, why we test for `not isinstance(list))`
            return
    except TypeError:
        pass
    except ValueError:
        pass

    if isinstance(data, (str, int, float, bool)):
        # No problem to encode json
        return

    elif isinstance(data, dict):
        for key in data:
            str(key)  # key must be cast-able to string
            ensure_json_serializable(data[key])

        return

    elif isinstance(data, (list, tuple, set)):
        for val in data:
            ensure_json_serializable(val)
        return

    elif isinstance(data, (np.ndarray, pd.Index)):
        # test_obj[key] = test_obj[key].tolist()
        # If we have an array or index, convert it first to a list--causing coercion to float--and then round
        # to the number of digits for which the string representation will equal the float representation
        _ = [ensure_json_serializable(x) for x in data.tolist()]
        return

    # Note: This clause has to come after checking for np.ndarray or we get:
    #      `ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()`
    elif data is None:
        # No problem to encode json
        return

    elif isinstance(data, (datetime.datetime, datetime.date)):
        return

    # Use built in base type from numpy, https://docs.scipy.org/doc/numpy-1.13.0/user/basics.types.html
    # https://github.com/numpy/numpy/pull/9505
    elif np.issubdtype(type(data), np.bool_):
        return

    elif np.issubdtype(type(data), np.integer) or np.issubdtype(
            type(data), np.uint):
        return

    elif np.issubdtype(type(data), np.floating):
        # Note: Use np.floating to avoid FutureWarning from numpy
        return

    elif isinstance(data, pd.Series):
        # Converting a series is tricky since the index may not be a string, but all json
        # keys must be strings. So, we use a very ugly serialization strategy
        index_name = data.index.name or "index"
        value_name = data.name or "value"
        _ = [{
            index_name: ensure_json_serializable(idx),
            value_name: ensure_json_serializable(val),
        } for idx, val in data.iteritems()]
        return
    elif isinstance(data, pd.DataFrame):
        return ensure_json_serializable(data.to_dict(orient="records"))

    elif isinstance(data, decimal.Decimal):
        return

    else:
        raise InvalidExpectationConfigurationError(
            "%s is of type %s which cannot be serialized to json" %
            (str(data), type(data).__name__))
示例#23
0
def ensure_json_serializable(data):
    """
    Helper function to convert an object to one that is json serializable
    Args:
        data: an object to attempt to convert a corresponding json-serializable object
    Returns:
        (dict) A converted test_object
    Warning:
        test_obj may also be converted in place.
    """

    if isinstance(data, SerializableDictDot):
        return

    try:
        if not isinstance(data, list) and pd.isna(data):
            # pd.isna is functionally vectorized, but we only want to apply this to single objects
            # Hence, why we test for `not isinstance(list))`
            return
    except TypeError:
        pass
    except ValueError:
        pass

    if isinstance(data, ((str, ), (int, ), float, bool)):
        # No problem to encode json
        return

    elif isinstance(data, dict):
        for key in data:
            str(key)  # key must be cast-able to string
            ensure_json_serializable(data[key])

        return

    elif isinstance(data, (list, tuple, set)):
        for val in data:
            ensure_json_serializable(val)
        return

    elif isinstance(data, (np.ndarray, pd.Index)):
        # test_obj[key] = test_obj[key].tolist()
        # If we have an array or index, convert it first to a list--causing coercion to float--and then round
        # to the number of digits for which the string representation will equal the float representation
        _ = [ensure_json_serializable(x) for x in data.tolist()]
        return

    # Note: This clause has to come after checking for np.ndarray or we get:
    #      `ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()`
    elif data is None:
        # No problem to encode json
        return

    elif isinstance(data, (datetime.datetime, datetime.date)):
        return

    # Use built in base type from numpy, https://docs.scipy.org/doc/numpy-1.13.0/user/basics.types.html
    # https://github.com/numpy/numpy/pull/9505
    elif np.issubdtype(type(data), np.bool_):
        return

    elif np.issubdtype(type(data), np.integer) or np.issubdtype(
            type(data), np.uint):
        return

    elif np.issubdtype(type(data), np.floating):
        # Note: Use np.floating to avoid FutureWarning from numpy
        return

    elif isinstance(data, pd.Series):
        # Converting a series is tricky since the index may not be a string, but all json
        # keys must be strings. So, we use a very ugly serialization strategy
        index_name = data.index.name or "index"
        value_name = data.name or "value"
        _ = [{
            index_name: ensure_json_serializable(idx),
            value_name: ensure_json_serializable(val),
        } for idx, val in data.iteritems()]
        return

    elif pyspark and isinstance(data, pyspark.sql.DataFrame):
        # using StackOverflow suggestion for converting pyspark df into dictionary
        # https://stackoverflow.com/questions/43679880/pyspark-dataframe-to-dictionary-columns-as-keys-and-list-of-column-values-ad-di
        return ensure_json_serializable(
            dict(zip(data.schema.names, zip(*data.collect()))))

    elif isinstance(data, pd.DataFrame):
        return ensure_json_serializable(data.to_dict(orient="records"))

    elif isinstance(data, decimal.Decimal):
        return

    elif isinstance(data, RunIdentifier):
        return

    else:
        raise InvalidExpectationConfigurationError(
            "%s is of type %s which cannot be serialized to json" %
            (str(data), type(data).__name__))
示例#24
0
 def configuration(self):
     if self._configuration is None:
         raise InvalidExpectationConfigurationError(
             "cannot access configuration: expectation has not yet been configured"
         )
     return self._configuration