def _get_unexpected_statement(cls, evr): success = evr.success result = evr.result if evr.exception_info["raised_exception"]: exception_message_template_str = ( "\n\n$expectation_type raised an exception:\n$exception_message" ) exception_message = RenderedStringTemplateContent( **{ "content_block_type": "string_template", "string_template": { "template": exception_message_template_str, "params": { "expectation_type": evr.expectation_config.expectation_type, "exception_message": evr.exception_info["exception_message"], }, "tag": "strong", "styling": { "classes": ["text-danger"], "params": { "exception_message": { "tag": "code" }, "expectation_type": { "classes": ["badge", "badge-danger", "mb-2"] }, }, }, }, }) exception_traceback_collapse = CollapseContent( **{ "collapse_toggle_link": "Show exception traceback...", "collapse": [ RenderedStringTemplateContent( **{ "content_block_type": "string_template", "string_template": { "template": evr.exception_info["exception_traceback"], "tag": "code", }, }) ], }) return [exception_message, exception_traceback_collapse] if success or not result.get("unexpected_count"): return [] else: unexpected_count = num_to_str(result["unexpected_count"], use_locale=True, precision=20) unexpected_percent = ( num_to_str(result["unexpected_percent"], precision=4) + "%") element_count = num_to_str(result["element_count"], use_locale=True, precision=20) template_str = ( "\n\n$unexpected_count unexpected values found. " "$unexpected_percent of $element_count total rows.") return [ RenderedStringTemplateContent( **{ "content_block_type": "string_template", "string_template": { "template": template_str, "params": { "unexpected_count": unexpected_count, "unexpected_percent": unexpected_percent, "element_count": element_count, }, "tag": "strong", "styling": { "classes": ["text-danger"] }, }, }) ]
def _get_unexpected_statement(cls, evr): success = evr["success"] result = evr.get("result", {}) if ("expectation_config" in evr and "exception_info" in evr and evr["exception_info"]["raised_exception"] is True): template_str = "\n\n$expectation_type raised an exception:\n$exception_message" return RenderedComponentContent( **{ "content_block_type": "string_template", "string_template": { "template": template_str, "params": { "expectation_type": evr["expectation_config"]["expectation_type"], "exception_message": evr["exception_info"]["exception_message"] }, "tag": "strong", "styling": { "classes": ["text-danger"], "params": { "exception_message": { "tag": "code" }, "expectation_type": { "classes": ["badge", "badge-danger", "mb-2"] } } } }, }) if success or not result.get("unexpected_count"): return None else: unexpected_count = num_to_str(result["unexpected_count"], use_locale=True, precision=20) unexpected_percent = num_to_str(result["unexpected_percent"], precision=4) + "%" element_count = num_to_str(result["element_count"], use_locale=True, precision=20) template_str = "\n\n$unexpected_count unexpected values found. " \ "$unexpected_percent of $element_count total rows." return RenderedComponentContent( **{ "content_block_type": "string_template", "string_template": { "template": template_str, "params": { "unexpected_count": unexpected_count, "unexpected_percent": unexpected_percent, "element_count": element_count }, "tag": "strong", "styling": { "classes": ["text-danger"] } } })
def _get_observed_value(cls, evr): try: result = evr["result"] except KeyError: return "--" expectation_type = evr["expectation_config"]["expectation_type"] if expectation_type == "expect_column_kl_divergence_to_be_less_than": if not evr["result"].get("details"): return "--" weights = evr["result"]["details"]["observed_partition"]["weights"] if len(weights) <= 10: height = 200 width = 200 col_width = 4 else: height = 300 width = 300 col_width = 6 if evr["result"]["details"]["observed_partition"].get("bins"): bins = evr["result"]["details"]["observed_partition"]["bins"] bins_x1 = [round(value, 1) for value in bins[:-1]] bins_x2 = [round(value, 1) for value in bins[1:]] df = pd.DataFrame({ "bin_min": bins_x1, "bin_max": bins_x2, "fraction": weights, }) bars = alt.Chart(df).mark_bar().encode( x='bin_min:O', x2='bin_max:O', y="fraction:Q").properties(width=width, height=height, autosize="fit") chart = bars.to_json() elif evr["result"]["details"]["observed_partition"].get("values"): values = evr["result"]["details"]["observed_partition"][ "values"] df = pd.DataFrame({"values": values, "fraction": weights}) bars = alt.Chart(df).mark_bar().encode( x='values:N', y="fraction:Q").properties(width=width, height=height, autosize="fit") chart = bars.to_json() return { "content_block_type": "graph", "graph": chart, "styling": { "classes": ["col-" + str(col_width)], "styles": { "margin-top": "20px", } } } if result.get("observed_value"): observed_value = result.get("observed_value") if isinstance(observed_value, (integer_types, float)) and not isinstance(observed_value, bool): return num_to_str(observed_value, precision=10, use_locale=True) return str(observed_value) elif expectation_type == "expect_column_values_to_be_null": try: notnull_percent = result["unexpected_percent"] return num_to_str(100 - notnull_percent, precision=5, use_locale=True) + "% null" except KeyError: return "unknown % null" elif expectation_type == "expect_column_values_to_not_be_null": try: null_percent = result["unexpected_percent"] return num_to_str(100 - null_percent, precision=5, use_locale=True) + "% not null" except KeyError: return "unknown % not null" elif result.get("unexpected_percent") is not None: return num_to_str(result.get("unexpected_percent"), precision=5) + "% unexpected" else: return "--"
def _atomic_prescriptive_template( cls, configuration=None, result=None, language=None, runtime_configuration=None, **kwargs, ): runtime_configuration = runtime_configuration or {} include_column_name = runtime_configuration.get( "include_column_name", True) include_column_name = (include_column_name if include_column_name is not None else True) styling = runtime_configuration.get("styling") params = substitute_none_for_missing( configuration.kwargs, ["column", "mostly", "row_condition", "condition_parser"], ) params_with_json_schema = { "column": { "schema": { "type": "string" }, "value": params.get("column") }, "mostly": { "schema": { "type": "number" }, "value": params.get("mostly") }, "mostly_pct": { "schema": { "type": "string" }, "value": params.get("mostly_pct"), }, "row_condition": { "schema": { "type": "string" }, "value": params.get("row_condition"), }, "condition_parser": { "schema": { "type": "string" }, "value": params.get("condition_parser"), }, } template_str = "values must be parseable as JSON" if params["mostly"] is not None and params["mostly"] < 1.0: params_with_json_schema["mostly_pct"]["value"] = num_to_str( params["mostly"] * 100, precision=15, no_scientific=True) # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".") template_str += ", at least $mostly_pct % of the time." else: template_str += "." if include_column_name: template_str = f"$column {template_str}" if params["row_condition"] is not None: ( conditional_template_str, conditional_params, ) = parse_row_condition_string_pandas_engine( params["row_condition"], with_schema=True) template_str = f"{conditional_template_str}, then {template_str}" params_with_json_schema.update(conditional_params) return (template_str, params_with_json_schema, styling)
def test_num_to_str(): f = 0.99999999999999 # We can round assert num_to_str(f, precision=4) == "≈1" # Specifying extra precision should not cause a problem assert num_to_str(f, precision=20) == "0.99999999999999" f = 1234567890.123456 # Our float can only hold 17 significant digits assert num_to_str(f, precision=4) == "≈1.235e+9" assert num_to_str(f, precision=20) == "1234567890.123456" assert num_to_str(f, use_locale=True, precision=40) == "1,234,567,890.123456" f = 1.123456789012345 # 17 sig digits mostly after decimal assert num_to_str(f, precision=5) == "≈1.1235" assert num_to_str(f, precision=20) == "1.123456789012345" f = 0.1 # A classic difficulty for floating point arithmetic assert num_to_str(f) == "0.1" assert num_to_str(f, precision=20) == "0.1" assert num_to_str(f, no_scientific=True) == "0.1" f = 1.23456789012345e-10 # significant digits can come late assert num_to_str(f, precision=20) == "1.23456789012345e-10" assert num_to_str(f, precision=5) == "≈1.2346e-10" assert ( num_to_str(f, precision=20, no_scientific=True) == "0.000000000123456789012345" ) assert num_to_str(f, precision=5, no_scientific=True) == "≈0.00000000012346" f = 100.0 # floats should have trailing digits and numbers stripped assert num_to_str(f, precision=10, no_scientific=True) == "100" assert num_to_str(f, precision=10) == "100" assert num_to_str(f, precision=10, use_locale=True) == "100" f = 100 # integers should never be stripped! assert num_to_str(f, precision=10, no_scientific=True) == "100" assert num_to_str(f, precision=10) == "100" assert num_to_str(f, precision=10, use_locale=True) == "100" f = 1000 # If we have a number longer than our precision, we should still be able to correctly format assert num_to_str(f, precision=4) == "1000" assert num_to_str(f) == "1000"
def _atomic_prescriptive_template( cls, configuration=None, result=None, language=None, runtime_configuration=None, **kwargs, ): runtime_configuration = runtime_configuration or {} include_column_name = runtime_configuration.get("include_column_name", True) include_column_name = ( include_column_name if include_column_name is not None else True ) styling = runtime_configuration.get("styling") params = substitute_none_for_missing( configuration.kwargs, [ "column", "min_value", "max_value", "mostly", "row_condition", "condition_parser", "strict_min", "strict_max", ], ) params_with_json_schema = { "column": {"schema": {"type": "string"}, "value": params.get("column")}, "min_value": { "schema": {"type": "number"}, "value": params.get("min_value"), }, "max_value": { "schema": {"type": "number"}, "value": params.get("max_value"), }, "mostly": {"schema": {"type": "number"}, "value": params.get("mostly")}, "mostly_pct": { "schema": {"type": "string"}, "value": params.get("mostly_pct"), }, "row_condition": { "schema": {"type": "string"}, "value": params.get("row_condition"), }, "condition_parser": { "schema": {"type": "string"}, "value": params.get("condition_parser"), }, "strict_min": { "schema": {"type": "boolean"}, "value": params.get("strict_min"), }, "strict_max": { "schema": {"type": "boolean"}, "value": params.get("strict_max"), }, } at_least_str, at_most_str = handle_strict_min_max(params) if (params["min_value"] is None) and (params["max_value"] is None): template_str = "may have any number of unique values." else: if params["mostly"] is not None and params["mostly"] < 1.0: params_with_json_schema["mostly_pct"]["value"] = num_to_str( params["mostly"] * 100, precision=15, no_scientific=True ) # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".") if params["min_value"] is None: template_str = f"must have {at_most_str} $max_value unique values, at least $mostly_pct % of the time." elif params["max_value"] is None: template_str = f"must have {at_least_str} $min_value unique values, at least $mostly_pct % of the time." else: template_str = f"must have {at_least_str} $min_value and {at_most_str} $max_value unique values, at least $mostly_pct % of the time." else: if params["min_value"] is None: template_str = f"must have {at_most_str} $max_value unique values." elif params["max_value"] is None: template_str = f"must have {at_least_str} $min_value unique values." else: template_str = f"must have {at_least_str} $min_value and {at_most_str} $max_value unique values." if include_column_name: template_str = f"$column {template_str}" if params["row_condition"] is not None: ( conditional_template_str, conditional_params, ) = parse_row_condition_string_pandas_engine( params["row_condition"], with_schema=True ) template_str = f"{conditional_template_str}, then {template_str}" params_with_json_schema.update(conditional_params) return (template_str, params_with_json_schema, styling)
def _prescriptive_renderer( cls, configuration=None, result=None, language=None, runtime_configuration=None, **kwargs, ): runtime_configuration = runtime_configuration or {} include_column_name = runtime_configuration.get("include_column_name", True) include_column_name = ( include_column_name if include_column_name is not None else True ) styling = runtime_configuration.get("styling") params = substitute_none_for_missing( configuration.kwargs, [ "column", "min_value", "max_value", "mostly", "row_condition", "condition_parser", "strict_min", "strict_max", ], ) at_least_str, at_most_str = handle_strict_min_max(params) if (params["min_value"] is None) and (params["max_value"] is None): template_str = "may have any number of unique values." else: if params["mostly"] is not None and params["mostly"] < 1.0: params["mostly_pct"] = num_to_str( params["mostly"] * 100, precision=15, no_scientific=True ) # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".") if params["min_value"] is None: template_str = f"must have {at_most_str} $max_value unique values, at least $mostly_pct % of the time." elif params["max_value"] is None: template_str = f"must have {at_least_str} $min_value unique values, at least $mostly_pct % of the time." else: template_str = f"must have {at_least_str} $min_value and {at_most_str} $max_value unique values, at least $mostly_pct % of the time." else: if params["min_value"] is None: template_str = f"must have {at_most_str} $max_value unique values." elif params["max_value"] is None: template_str = f"must have {at_least_str} $min_value unique values." else: template_str = f"must have {at_least_str} $min_value and {at_most_str} $max_value unique values." if include_column_name: template_str = f"$column {template_str}" if params["row_condition"] is not None: ( conditional_template_str, conditional_params, ) = parse_row_condition_string_pandas_engine(params["row_condition"]) template_str = f"{conditional_template_str}, then {template_str}" params.update(conditional_params) return [ RenderedStringTemplateContent( **{ "content_block_type": "string_template", "string_template": { "template": template_str, "params": params, "styling": styling, }, } ) ]
def _prescriptive_renderer( cls, configuration: ExpectationConfiguration = None, result: ExpectationValidationResult = None, language: str = None, runtime_configuration: dict = None, **kwargs, ) -> List[Union[dict, str, RenderedStringTemplateContent, RenderedTableContent, RenderedBulletListContent, RenderedGraphContent, Any, ]]: runtime_configuration = runtime_configuration or {} include_column_name = runtime_configuration.get( "include_column_name", True) include_column_name = (include_column_name if include_column_name is not None else True) styling = runtime_configuration.get("styling") params = substitute_none_for_missing( configuration.kwargs, [ "column", "min_value", "max_value", "mostly", "row_condition", "condition_parser", "strict_min", "strict_max", ], ) if (params["min_value"] is None) and (params["max_value"] is None): template_str = "values may have any length." else: at_least_str, at_most_str = handle_strict_min_max(params) if params["mostly"] is not None and params["mostly"] < 1.0: params["mostly_pct"] = num_to_str(params["mostly"] * 100, precision=15, no_scientific=True) # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".") if params["min_value"] is not None and params[ "max_value"] is not None: template_str = f"values must be {at_least_str} $min_value and {at_most_str} $max_value characters long, at least $mostly_pct % of the time." elif params["min_value"] is None: template_str = f"values must be {at_most_str} $max_value characters long, at least $mostly_pct % of the time." elif params["max_value"] is None: template_str = f"values must be {at_least_str} $min_value characters long, at least $mostly_pct % of the time." else: if params["min_value"] is not None and params[ "max_value"] is not None: template_str = f"values must always be {at_least_str} $min_value and {at_most_str} $max_value characters long." elif params["min_value"] is None: template_str = f"values must always be {at_most_str} $max_value characters long." elif params["max_value"] is None: template_str = f"values must always be {at_least_str} $min_value characters long." if include_column_name: template_str = f"$column {template_str}" if params["row_condition"] is not None: ( conditional_template_str, conditional_params, ) = parse_row_condition_string_pandas_engine( params["row_condition"]) template_str = f"{conditional_template_str}, then {template_str}" params.update(conditional_params) return [ RenderedStringTemplateContent( **{ "content_block_type": "string_template", "string_template": { "template": template_str, "params": params, "styling": styling, }, }) ]
def _prescriptive_renderer( cls, configuration=None, result=None, language=None, runtime_configuration=None, **kwargs, ): runtime_configuration = runtime_configuration or {} include_column_name = runtime_configuration.get( "include_column_name", True) include_column_name = (include_column_name if include_column_name is not None else True) styling = runtime_configuration.get("styling") params = substitute_none_for_missing( configuration.kwargs, [ "column", "regex_list", "mostly", "row_condition", "condition_parser" ], ) if not params.get("regex_list") or len(params.get("regex_list")) == 0: values_string = "[ ]" else: for i, v in enumerate(params["regex_list"]): params[f"v__{str(i)}"] = v values_string = " ".join( [f"$v__{str(i)}" for i, v in enumerate(params["regex_list"])]) template_str = ( "values must not match any of the following regular expressions: " + values_string) if params["mostly"] is not None and params["mostly"] < 1.0: params["mostly_pct"] = num_to_str(params["mostly"] * 100, precision=15, no_scientific=True) # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".") template_str += ", at least $mostly_pct % of the time." else: template_str += "." if include_column_name: template_str = f"$column {template_str}" if params["row_condition"] is not None: ( conditional_template_str, conditional_params, ) = parse_row_condition_string_pandas_engine( params["row_condition"]) template_str = f"{conditional_template_str}, then {template_str}" params.update(conditional_params) return [ RenderedStringTemplateContent( **{ "content_block_type": "string_template", "string_template": { "template": template_str, "params": params, "styling": styling, }, }) ]
def _atomic_prescriptive_template( cls, configuration=None, result=None, language=None, runtime_configuration=None, **kwargs, ): runtime_configuration = runtime_configuration or {} include_column_name = runtime_configuration.get( "include_column_name", True) include_column_name = (include_column_name if include_column_name is not None else True) styling = runtime_configuration.get("styling") params = substitute_none_for_missing( configuration.kwargs, [ "column", "regex_list", "mostly", "row_condition", "condition_parser" ], ) params_with_json_schema = { "column": { "schema": { "type": "string" }, "value": params.get("column") }, "regex_list": { "schema": { "type": "array" }, "value": params.get("regex_list"), }, "mostly": { "schema": { "type": "number" }, "value": params.get("mostly"), }, "mostly_pct": { "schema": { "type": "string" }, "value": params.get("mostly_pct"), }, "row_condition": { "schema": { "type": "string" }, "value": params.get("row_condition"), }, "condition_parser": { "schema": { "type": "string" }, "value": params.get("condition_parser"), }, } if not params.get("regex_list") or len(params.get("regex_list")) == 0: values_string = "[ ]" else: for i, v in enumerate(params["regex_list"]): params[f"v__{str(i)}"] = v values_string = " ".join( [f"$v__{str(i)}" for i, v in enumerate(params["regex_list"])]) template_str = ( "values must not match any of the following regular expressions: " + values_string) if params["mostly"] is not None and params["mostly"] < 1.0: params_with_json_schema["mostly_pct"]["value"] = num_to_str( params["mostly"] * 100, precision=15, no_scientific=True) # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".") template_str += ", at least $mostly_pct % of the time." else: template_str += "." if include_column_name: template_str = f"$column {template_str}" if params["row_condition"] is not None: ( conditional_template_str, conditional_params, ) = parse_row_condition_string_pandas_engine( params["row_condition"], with_schema=True) template_str = f"{conditional_template_str}, then {template_str}" params_with_json_schema.update(conditional_params) params_with_json_schema = add_values_with_json_schema_from_list_in_params( params=params, params_with_json_schema=params_with_json_schema, param_key_with_list="regex_list", ) return (template_str, params_with_json_schema, styling)
def _prescriptive_renderer( cls, configuration=None, result=None, language=None, runtime_configuration=None, **kwargs, ): runtime_configuration = runtime_configuration or {} include_column_name = runtime_configuration.get( "include_column_name", True) include_column_name = (include_column_name if include_column_name is not None else True) styling = runtime_configuration.get("styling") params = substitute_none_for_missing( configuration.kwargs, [ "column", "mostly", "json_schema", "row_condition", "condition_parser" ], ) if not params.get("json_schema"): template_str = "values must match a JSON Schema but none was specified." else: params["formatted_json"] = ( "<pre>" + json.dumps(params.get("json_schema"), indent=4) + "</pre>") if params["mostly"] is not None: params["mostly_pct"] = num_to_str(params["mostly"] * 100, precision=15, no_scientific=True) # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".") template_str = "values must match the following JSON Schema, at least $mostly_pct % of the time: $formatted_json" else: template_str = ( "values must match the following JSON Schema: $formatted_json" ) if include_column_name: template_str = "$column " + template_str if params["row_condition"] is not None: ( conditional_template_str, conditional_params, ) = parse_row_condition_string_pandas_engine( params["row_condition"]) template_str = conditional_template_str + ", then " + template_str params.update(conditional_params) return [ RenderedStringTemplateContent( **{ "content_block_type": "string_template", "string_template": { "template": template_str, "params": params, "styling": { "params": { "formatted_json": { "classes": [] } } }, }, }) ]
def _atomic_prescriptive_template( cls, configuration=None, result=None, language=None, runtime_configuration=None, **kwargs, ): runtime_configuration = runtime_configuration or {} include_column_name = runtime_configuration.get( "include_column_name", True) include_column_name = (include_column_name if include_column_name is not None else True) styling = runtime_configuration.get("styling") params = substitute_none_for_missing( configuration.kwargs, [ "column", "mostly", "json_schema", "row_condition", "condition_parser" ], ) params_with_json_schema = { "column": { "schema": { "type": "string" }, "value": params.get("column") }, "mostly": { "schema": { "type": "number" }, "value": params.get("mostly") }, "mostly_pct": { "schema": { "type": "number" }, "value": params.get("mostly_pct"), }, "json_schema": { "schema": { "type": "object" }, "value": params.get("json_schema"), }, "row_condition": { "schema": { "type": "string" }, "value": params.get("row_condition"), }, "condition_parser": { "schema": { "type": "string" }, "value": params.get("condition_parser"), }, } if not params.get("json_schema"): template_str = "values must match a JSON Schema but none was specified." else: params["formatted_json"] = ( "<pre>" + json.dumps(params.get("json_schema"), indent=4) + "</pre>") if params["mostly"] is not None: params_with_json_schema["mostly_pct"]["value"] = num_to_str( params["mostly"] * 100, precision=15, no_scientific=True) # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".") template_str = "values must match the following JSON Schema, at least $mostly_pct % of the time: $formatted_json" else: template_str = ( "values must match the following JSON Schema: $formatted_json" ) if include_column_name: template_str = "$column " + template_str if params["row_condition"] is not None: ( conditional_template_str, conditional_params, ) = parse_row_condition_string_pandas_engine( params["row_condition"], with_schema=True) template_str = conditional_template_str + ", then " + template_str params_with_json_schema.update(conditional_params) return (template_str, params_with_json_schema, styling)