def uniqueness_constraint( columns: Sequence[str], assertion: Callable[[float], bool], where: Optional[str] = None, hint: Optional[str] = None, ) -> Constraint: """ Runs Uniqueness analysis on the given columns and executes the assertion columns. Parameters: ---------- columns: Columns to run the assertion on. assertion: Callable that receives a float input parameter and returns a boolean where: Additional filter to apply before the analyzer is run. hint: A hint to provide additional context why a constraint could have failed """ uniqueness = Uniqueness(columns, where) constraint = AnalysisBasedConstraint[FrequenciesAndNumRows, float, float]( uniqueness, assertion, hint=hint # type: ignore[arg-type] ) return NamedConstraint(constraint, f"UniquenessConstraint({uniqueness})")
def compliance_constraint( name: str, column: str, assertion: Callable[[float], bool], where: Optional[str] = None, hint: Optional[str] = None, ) -> Constraint: """ Runs given the expression on the given column(s) and executes the assertion Parameters: --------- name: A name that summarizes the check being made. This name is being used to name the metrics for the analysis being done. column: The column expression to be evaluated. assertion: Callable that receives a float input parameter and returns a boolean where: Additional filter to apply before the analyzer is run. hint: A hint to provide additional context why a constraint could have failed """ compliance = Compliance(name, column, where) constraint = AnalysisBasedConstraint[NumMatchesAndCount, float, float]( compliance, assertion, hint=hint # type: ignore[arg-type] ) return NamedConstraint(constraint, f"ComplianceConstraint({compliance})")
def quantile_constraint( column: str, quantile: float, assertion: Callable[[float], bool], where: Optional[str] = None, hint: Optional[str] = None, ) -> Constraint: """ Runs quantile analysis on the given column and executes the assertion column: Column to run the assertion on quantile: Which quantile to assert on assertion Callable that receives a float input parameter (the computed quantile) and returns a boolean hint: A hint to provide additional context why a constraint could have failed """ quant = Quantile(column, quantile, where) constraint = AnalysisBasedConstraint[QuantileState, float, float]( quant, assertion, hint=hint # type: ignore[arg-type] ) return NamedConstraint(constraint, f"QuantileConstraint({quant})")
def max_constraint( column: str, assertion: Callable[[float], bool], where: Optional[str] = None, hint: Optional[str] = None, ) -> Constraint: maximum = Maximum(column, where) constraint = AnalysisBasedConstraint[MaxState, float, float]( maximum, assertion, hint=hint # type: ignore[arg-type] ) return NamedConstraint(constraint, f"MaximumConstraint({maximum})")
def standard_deviation_constraint( column: str, assertion: Callable[[float], bool], where: Optional[str] = None, hint: Optional[str] = None, ) -> Constraint: std = StandardDeviation(column, where) constraint = AnalysisBasedConstraint[StandardDeviationState, float, float]( std, assertion, hint=hint # type: ignore[arg-type] ) return NamedConstraint(constraint, f"StandardDeviationConstraint({std})")
def sum_constraint( column: str, assertion: Callable[[float], bool], where: Optional[str] = None, hint: Optional[str] = None, ) -> Constraint: sum_ = Sum(column, where) constraint = AnalysisBasedConstraint[SumState, float, float]( sum_, assertion, hint=hint # type: ignore[arg-type] ) return NamedConstraint(constraint, f"SumConstraint({sum})")
def mean_constraint( column: str, assertion: Callable[[float], bool], where: Optional[str] = None, hint: Optional[str] = None, ) -> Constraint: mean = Mean(column, where) constraint = AnalysisBasedConstraint[MeanState, float, float]( mean, assertion, hint=hint # type: ignore[arg-type] ) return NamedConstraint(constraint, f"MeanConstraint({mean})")
def completeness_constraint( column: str, assertion: Callable[[float], bool], where: Optional[str] = None, hint: Optional[str] = None, ) -> Constraint: completeness = Completeness(column, where) constraint = AnalysisBasedConstraint[NumMatchesAndCount, float, float]( completeness, assertion, hint=hint # type: ignore[arg-type] ) return NamedConstraint(constraint, f"CompletenessConstraint({completeness})")
def size_constraint( assertion: Callable[[int], bool], where: Optional[str] = None, hint: Optional[str] = None, ) -> Constraint: if not callable(assertion): raise ValueError("assertion is not a callable") size = Size(where) constraint = AnalysisBasedConstraint[NumMatches, int, int]( size, assertion, hint=hint # type: ignore[arg-type] ) return NamedConstraint(constraint, f"SizeConstraint({size})")
def pattern_match_constraint( column: str, pattern: Union[str, Pattern], assertion: Callable[[float], bool], where: Optional[str] = None, name: Optional[str] = None, hint: Optional[str] = None, ) -> Constraint: """ Runs given regex compliance analysis on the given column(s) and executes the assertion. Parameters ---------- column: The column to run the assertion on pattern: The regex pattern to check compliance for (either string or pattern instance) where: Additional filter to apply before the analyzer is run. name: A name that summarizes the check being made. This name is being used to name the metrics for the analysis being done. hint: A hint to provide additional context why a constraint could have failed """ pattern_match = PatternMatch(column, pattern, where) constraint = AnalysisBasedConstraint[NumMatchesAndCount, float, float]( pattern_match, assertion, hint=hint # type: ignore[arg-type] ) name = (f"PatternMatchConstraint({name})" if name else f"PatternMatchConstraint({column}, {pattern})") return NamedConstraint(constraint, name)