示例#1
0
def uniqueness_constraint(
    columns: Sequence[str],
    assertion: Callable[[float], bool],
    where: Optional[str] = None,
    hint: Optional[str] = None,
) -> Constraint:
    """
    Runs Uniqueness analysis on the given columns and executes the assertion
    columns.

    Parameters:
    ----------

    columns:
        Columns to run the assertion on.
    assertion:
        Callable that receives a float input parameter and returns a boolean
    where:
        Additional filter to apply before the analyzer is run.
    hint:
         A hint to provide additional context why a constraint could have failed

    """

    uniqueness = Uniqueness(columns, where)
    constraint = AnalysisBasedConstraint[FrequenciesAndNumRows, float, float](
        uniqueness,
        assertion,
        hint=hint  # type: ignore[arg-type]
    )

    return NamedConstraint(constraint, f"UniquenessConstraint({uniqueness})")
示例#2
0
def compliance_constraint(
    name: str,
    column: str,
    assertion: Callable[[float], bool],
    where: Optional[str] = None,
    hint: Optional[str] = None,
) -> Constraint:
    """
    Runs given the expression on the given column(s) and executes the assertion

    Parameters:
    ---------
    name:
        A name that summarizes the check being made. This name is being used to name the
        metrics for the analysis being done.
    column:
        The column expression to be evaluated.
    assertion:
        Callable that receives a float input parameter and returns a boolean
    where:
        Additional filter to apply before the analyzer is run.
    hint:
         A hint to provide additional context why a constraint could have failed

    """
    compliance = Compliance(name, column, where)
    constraint = AnalysisBasedConstraint[NumMatchesAndCount, float, float](
        compliance,
        assertion,
        hint=hint  # type: ignore[arg-type]
    )

    return NamedConstraint(constraint, f"ComplianceConstraint({compliance})")
示例#3
0
def quantile_constraint(
    column: str,
    quantile: float,
    assertion: Callable[[float], bool],
    where: Optional[str] = None,
    hint: Optional[str] = None,
) -> Constraint:
    """
    Runs quantile analysis on the given column and executes the assertion

    column:
        Column to run the assertion on
    quantile:
        Which quantile to assert on
    assertion
        Callable that receives a float input parameter (the computed quantile)
        and returns a boolean
    hint:
        A hint to provide additional context why a constraint could have failed
    """
    quant = Quantile(column, quantile, where)
    constraint = AnalysisBasedConstraint[QuantileState, float, float](
        quant,
        assertion,
        hint=hint  # type: ignore[arg-type]
    )

    return NamedConstraint(constraint, f"QuantileConstraint({quant})")
示例#4
0
def max_constraint(
    column: str,
    assertion: Callable[[float], bool],
    where: Optional[str] = None,
    hint: Optional[str] = None,
) -> Constraint:

    maximum = Maximum(column, where)
    constraint = AnalysisBasedConstraint[MaxState, float, float](
        maximum,
        assertion,
        hint=hint  # type: ignore[arg-type]
    )

    return NamedConstraint(constraint, f"MaximumConstraint({maximum})")
示例#5
0
def standard_deviation_constraint(
    column: str,
    assertion: Callable[[float], bool],
    where: Optional[str] = None,
    hint: Optional[str] = None,
) -> Constraint:

    std = StandardDeviation(column, where)
    constraint = AnalysisBasedConstraint[StandardDeviationState, float, float](
        std,
        assertion,
        hint=hint  # type: ignore[arg-type]
    )

    return NamedConstraint(constraint, f"StandardDeviationConstraint({std})")
示例#6
0
def sum_constraint(
    column: str,
    assertion: Callable[[float], bool],
    where: Optional[str] = None,
    hint: Optional[str] = None,
) -> Constraint:

    sum_ = Sum(column, where)
    constraint = AnalysisBasedConstraint[SumState, float, float](
        sum_,
        assertion,
        hint=hint  # type: ignore[arg-type]
    )

    return NamedConstraint(constraint, f"SumConstraint({sum})")
示例#7
0
def mean_constraint(
    column: str,
    assertion: Callable[[float], bool],
    where: Optional[str] = None,
    hint: Optional[str] = None,
) -> Constraint:

    mean = Mean(column, where)
    constraint = AnalysisBasedConstraint[MeanState, float, float](
        mean,
        assertion,
        hint=hint  # type: ignore[arg-type]
    )

    return NamedConstraint(constraint, f"MeanConstraint({mean})")
示例#8
0
def completeness_constraint(
    column: str,
    assertion: Callable[[float], bool],
    where: Optional[str] = None,
    hint: Optional[str] = None,
) -> Constraint:

    completeness = Completeness(column, where)
    constraint = AnalysisBasedConstraint[NumMatchesAndCount, float, float](
        completeness,
        assertion,
        hint=hint  # type: ignore[arg-type]
    )

    return NamedConstraint(constraint,
                           f"CompletenessConstraint({completeness})")
示例#9
0
def size_constraint(
    assertion: Callable[[int], bool],
    where: Optional[str] = None,
    hint: Optional[str] = None,
) -> Constraint:

    if not callable(assertion):
        raise ValueError("assertion is not a callable")

    size = Size(where)
    constraint = AnalysisBasedConstraint[NumMatches, int, int](
        size,
        assertion,
        hint=hint  # type: ignore[arg-type]
    )

    return NamedConstraint(constraint, f"SizeConstraint({size})")
示例#10
0
def pattern_match_constraint(
    column: str,
    pattern: Union[str, Pattern],
    assertion: Callable[[float], bool],
    where: Optional[str] = None,
    name: Optional[str] = None,
    hint: Optional[str] = None,
) -> Constraint:
    """
    Runs given regex compliance analysis on the given column(s) and executes the
    assertion.

    Parameters
    ----------
    column:
          The column to run the assertion on
    pattern:
        The regex pattern to check compliance for (either string or pattern instance)
    where:
        Additional filter to apply before the analyzer is run.
    name:
        A name that summarizes the check being made. This name is being used
        to name the metrics for the analysis being done.
    hint:
        A hint to provide additional context why a constraint could have failed
    """

    pattern_match = PatternMatch(column, pattern, where)

    constraint = AnalysisBasedConstraint[NumMatchesAndCount, float, float](
        pattern_match,
        assertion,
        hint=hint  # type: ignore[arg-type]
    )

    name = (f"PatternMatchConstraint({name})"
            if name else f"PatternMatchConstraint({column}, {pattern})")

    return NamedConstraint(constraint, name)