def check_compatibility(spec, schema): for field in spec.fields: src_type = schema.find_type(field.source_id) if not src_type.is_primitive_type(): raise ValidationException("Cannot partition by non-primitive source field: %s", src_type) if not field.transform.can_transform(src_type): raise ValidationException("Invalid source type %s for transform: %s", (src_type, field.transform))
def find(self, field_id, struct): fields = struct.fields for i, field in enumerate(fields): if field.field_id == self.field_id: return i raise ValidationException("Cannot find top-level field id %d in struct: %s", (field_id, struct))
def bind_in_operation(self, bound_term): from .expressions import Expressions def convert_literal(lit): converted = lit.to(bound_term) ValidationException.check( converted is not None, "Invalid Value for conversion to type %s: %s (%s)", (bound_term.type, lit, lit.__class__.__name__)) return converted converted_literals = filter( lambda x: x != Literals.above_max() and x != Literals.below_min(), [convert_literal(lit) for lit in self.literals]) if len(converted_literals) == 0: return Expressions.always_true( ) if Operation.NOT_IN else Expressions.always_false() literal_set = set(converted_literals) if len(literal_set) == 1: if self.op == Operation.IN: return BoundPredicate(Operation.EQ, bound_term, literal_set[0]) elif self.op == Operation.NOT_IN: return BoundPredicate(Operation.NOT_EQ, bound_term, literal_set[0]) else: raise ValidationException("Operation must be in or not in", ()) return BoundPredicate(self.op, bound_term, literals=literal_set, is_set_predicate=True)
def bind(self, struct, case_sensitive=True): # noqa: C901 if case_sensitive: field = struct.field(self.ref.name) else: field = struct.case_insensitive_field(self.ref.name.lower()) ValidationException.check(field is not None, "Cannot find field '%s' in struct %s", (self.ref.name, struct)) if self.lit is None: if self.op == Operation.IS_NULL: if field.is_required: return FALSE return BoundPredicate(Operation.IS_NULL, BoundReference(struct, field.field_id)) elif self.op == Operation.NOT_NULL: if field.is_required: return TRUE return BoundPredicate(Operation.NOT_NULL, BoundReference(struct, field.field_id)) else: raise ValidationException( "Operation must be IS_NULL or NOT_NULL", None) literal = self.lit.to(field.type) if literal is None: raise ValidationException( "Invalid value for comparison inclusive type %s: %s (%s)", (field.type, self.lit.value, type(self.lit.value))) elif literal == Literals.above_max(): if self.op in (Operation.LT, Operation.LT_EQ, Operation.NOT_EQ): return TRUE elif self.op in (Operation.GT, Operation.GT_EQ, Operation.EQ): return FALSE elif literal == Literals.below_min(): if self.op in (Operation.LT, Operation.LT_EQ, Operation.NOT_EQ): return FALSE elif self.op in (Operation.GT, Operation.GT_EQ, Operation.EQ): return TRUE return BoundPredicate(self.op, BoundReference(struct, field.field_id), literal)
def test_set_predicate(self, value: Any) -> bool: if self._literals is None: raise ValidationException("Literals must not be none", ()) if self.op == Operation.IN: return value in self._literals elif self.op == Operation.NOT_IN: return value not in self._literals else: raise ValueError(f"{self.op} is not a valid set predicate")
def bind_unary_operation(self, bound_term: BoundTerm) -> BoundPredicate: from .expressions import Expressions if self.op == Operation.IS_NULL: if bound_term.ref.field.is_required: return Expressions.always_false() return BoundPredicate(Operation.IS_NULL, bound_term, is_unary_predicate=True) elif self.op == Operation.NOT_NULL: if bound_term.ref.field.is_required: return Expressions.always_true() return BoundPredicate(Operation.NOT_NULL, bound_term, is_unary_predicate=True) elif self.op in [Operation.IS_NAN, Operation.NOT_NAN]: if not self.floating_type(bound_term.ref.type.type_id): raise ValidationException( f"{self.op} cannot be used with a non-floating column", ()) return BoundPredicate(self.op, bound_term, is_unary_predicate=True) raise ValidationException( f"Operation must be in [IS_NULL, NOT_NULL, IS_NAN, NOT_NAN] was:{self.op}", ())
def test_literal_predicate(self, value: Any) -> bool: if self.lit is None: raise ValidationException("Literal must not be none", ()) if self.op == Operation.LT: return value < self.lit.value elif self.op == Operation.LT_EQ: return value <= self.lit.value elif self.op == Operation.GT: return value > self.lit.value elif self.op == Operation.GT_EQ: return value >= self.lit.value elif self.op == Operation.EQ: return value == self.lit.value elif self.op == Operation.NOT_EQ: return value != self.lit.value else: raise ValueError(f"{self.op} is not a valid literal predicate")