def check_compatibility(spec, schema): for field in spec.fields: src_type = schema.find_type(field.source_id) if not src_type.is_primitive_type(): raise ValidationException("Cannot partition by non-primitive source field: %s", src_type) if not field.transform.can_transform(src_type): raise ValidationException("Invalid source type %s for transform: %s", (src_type, field.transform))
def convert_literal(lit): converted = lit.to(bound_term) ValidationException.check( converted is not None, "Invalid Value for conversion to type %s: %s (%s)", (bound_term.type, lit, lit.__class__.__name__)) return converted
def replace_properties(self, new_properties): ValidationException.check(new_properties is not None, "Cannot set properties to null") return TableMetadata(self.ops, None, self.location, int(time.time() * 1000), self.last_column_id, self.schema, self.spec, new_properties, self.current_snapshot_id, self.snapshots, self.snapshot_log)
def rollback_to(self, snapshot): ValidationException.check(snapshot.snapshot_id not in self.snapshot_by_id, "Cannot set current snapshot to unknown: %s", (snapshot.snapshot_id,)) now_millis = int(time.time() * 1000) new_snapshot_log = self.snapshot_log + [SnapshotLogEntry(now_millis, snapshot.snapshot_id)] return TableMetadata(self.ops, None, self.location, now_millis, self.last_column_id, self.schema, self.spec, self.properties, snapshot.snapshot_id, self.snapshots, new_snapshot_log)
def remove_snapshot_log_entries(self, snapshot_ids): new_snapshot_log = list() for entry in self.snapshot_log: if entry.snapshot_id not in snapshot_ids: new_snapshot_log.append(entry) check_snapshot = self.current_snapshot_id < 0 or new_snapshot_log[-1].snapshot_id == self.current_snapshot_id ValidationException.check(check_snapshot, "Cannot set invalid snapshot log: latest entry is not the current snapshot") return TableMetadata(self.ops, None, self.location, int(time.time() * 1000), self.last_column_id, self.schema, self.spec, self.properties, self.current_snapshot_id, self.snapshots, new_snapshot_log)
def bind(self, struct: StructType, case_sensitive: bool = True) -> BoundReference: from iceberg.api import Schema schema = Schema(struct.fields) field = schema.find_field( self.name ) if case_sensitive else schema.case_insensitive_find_field(self.name) ValidationException.check(field is not None, "Cannot find field '%s' in struct: %s", (self.name, schema.as_struct())) return BoundReference(struct, field)
def test(self, struct: StructLike = None, value: Any = None) -> bool: ValidationException.check(struct is None or value is None, "Either struct or value must be none", ()) if struct is not None: ValidationException.check(isinstance(self.term, BoundTerm), "Term must be bound to eval: %s", (self.term)) return self.test(value=self.term.eval(struct)) # type: ignore else: if self.is_unary_predicate: return self.test_unary_predicate(value) elif self.is_literal_predicate: return self.test_literal_predicate(value) else: return self.test_set_predicate(value)
def find(self, field_id, struct): fields = struct.fields for i, field in enumerate(fields): if field.field_id == self.field_id: return i raise ValidationException("Cannot find top-level field id %d in struct: %s", (field_id, struct))
def bind_in_operation(self, bound_term): from .expressions import Expressions def convert_literal(lit): converted = lit.to(bound_term) ValidationException.check( converted is not None, "Invalid Value for conversion to type %s: %s (%s)", (bound_term.type, lit, lit.__class__.__name__)) return converted converted_literals = filter( lambda x: x != Literals.above_max() and x != Literals.below_min(), [convert_literal(lit) for lit in self.literals]) if len(converted_literals) == 0: return Expressions.always_true( ) if Operation.NOT_IN else Expressions.always_false() literal_set = set(converted_literals) if len(literal_set) == 1: if self.op == Operation.IN: return BoundPredicate(Operation.EQ, bound_term, literal_set[0]) elif self.op == Operation.NOT_IN: return BoundPredicate(Operation.NOT_EQ, bound_term, literal_set[0]) else: raise ValidationException("Operation must be in or not in", ()) return BoundPredicate(self.op, bound_term, literals=literal_set, is_set_predicate=True)
def bind(self, struct, case_sensitive=True): # noqa: C901 if case_sensitive: field = struct.field(self.ref.name) else: field = struct.case_insensitive_field(self.ref.name.lower()) ValidationException.check(field is not None, "Cannot find field '%s' in struct %s", (self.ref.name, struct)) if self.lit is None: if self.op == Operation.IS_NULL: if field.is_required: return FALSE return BoundPredicate(Operation.IS_NULL, BoundReference(struct, field.field_id)) elif self.op == Operation.NOT_NULL: if field.is_required: return TRUE return BoundPredicate(Operation.NOT_NULL, BoundReference(struct, field.field_id)) else: raise ValidationException( "Operation must be IS_NULL or NOT_NULL", None) literal = self.lit.to(field.type) if literal is None: raise ValidationException( "Invalid value for comparison inclusive type %s: %s (%s)", (field.type, self.lit.value, type(self.lit.value))) elif literal == Literals.above_max(): if self.op in (Operation.LT, Operation.LT_EQ, Operation.NOT_EQ): return TRUE elif self.op in (Operation.GT, Operation.GT_EQ, Operation.EQ): return FALSE elif literal == Literals.below_min(): if self.op in (Operation.LT, Operation.LT_EQ, Operation.NOT_EQ): return FALSE elif self.op in (Operation.GT, Operation.GT_EQ, Operation.EQ): return TRUE return BoundPredicate(self.op, BoundReference(struct, field.field_id), literal)
def test_set_predicate(self, value: Any) -> bool: if self._literals is None: raise ValidationException("Literals must not be none", ()) if self.op == Operation.IN: return value in self._literals elif self.op == Operation.NOT_IN: return value not in self._literals else: raise ValueError(f"{self.op} is not a valid set predicate")
def bind_unary_operation(self, bound_term: BoundTerm) -> BoundPredicate: from .expressions import Expressions if self.op == Operation.IS_NULL: if bound_term.ref.field.is_required: return Expressions.always_false() return BoundPredicate(Operation.IS_NULL, bound_term, is_unary_predicate=True) elif self.op == Operation.NOT_NULL: if bound_term.ref.field.is_required: return Expressions.always_true() return BoundPredicate(Operation.NOT_NULL, bound_term, is_unary_predicate=True) elif self.op in [Operation.IS_NAN, Operation.NOT_NAN]: if not self.floating_type(bound_term.ref.type.type_id): raise ValidationException( f"{self.op} cannot be used with a non-floating column", ()) return BoundPredicate(self.op, bound_term, is_unary_predicate=True) raise ValidationException( f"Operation must be in [IS_NULL, NOT_NULL, IS_NAN, NOT_NAN] was:{self.op}", ())
def bind_literal_operation(self, bound_term): from .expressions import Expressions lit = self.lit.to(bound_term.type) ValidationException.check( lit is not None, "Invalid Value for conversion to type %s: %s (%s)", (bound_term.type, self.lit, self.lit.__class__.__name__)) if lit == Literals.above_max(): if self.op in [Operation.LT, Operation.LT_EQ, Operation.NOT_EQ]: return Expressions.always_true() elif self.op in [Operation.GT, Operation.GT_EQ, Operation.EQ]: return Expressions.always_false() elif lit == Literals.below_min(): if self.op in [Operation.LT, Operation.LT_EQ, Operation.NOT_EQ]: return Expressions.always_false() elif self.op in [Operation.GT, Operation.GT_EQ, Operation.EQ]: return Expressions.always_true() return BoundPredicate(self.op, bound_term, lit=lit, is_literal_predicate=True)
def test_literal_predicate(self, value: Any) -> bool: if self.lit is None: raise ValidationException("Literal must not be none", ()) if self.op == Operation.LT: return value < self.lit.value elif self.op == Operation.LT_EQ: return value <= self.lit.value elif self.op == Operation.GT: return value > self.lit.value elif self.op == Operation.GT_EQ: return value >= self.lit.value elif self.op == Operation.EQ: return value == self.lit.value elif self.op == Operation.NOT_EQ: return value != self.lit.value else: raise ValueError(f"{self.op} is not a valid literal predicate")
def __init__(self, op: Operation, term: BoundTerm, lit: BaseLiteral = None, literals: List[BaseLiteral] = None, is_unary_predicate: bool = False, is_literal_predicate: bool = False, is_set_predicate: bool = False): self.is_unary_predicate = is_unary_predicate self.is_literal_predicate = is_literal_predicate self.is_set_predicate = is_set_predicate super(BoundPredicate, self).__init__(op, term) ValidationException.check( sum([is_unary_predicate, is_literal_predicate, is_set_predicate]) == 1, "Only a single predicate type may be set: %s=%s, %s=%s, %s=%s", ("is_unary_predicate", is_unary_predicate, "is_literal_predicate", is_literal_predicate, "is_set_predicate", is_set_predicate)) self._literals: Optional[List[BaseLiteral]] = None if self.is_unary_predicate: ValidationException.check( lit is None, "Unary Predicates may not have a literal", ()) elif self.is_literal_predicate: ValidationException.check( lit is not None, "Literal Predicates must have a literal set", ()) self._literals = [lit] # type: ignore elif self.is_set_predicate: ValidationException.check(literals is not None, "Set Predicates must have literals set", ()) self._literals = literals else: raise ValueError( f"Unable to instantiate {op} -> (lit={lit}, literal={literals}" )
def eval(self, struct: StructLike) -> bool: ValidationException.check(isinstance(self.term, BoundTerm), "Term must be bound to eval: %s", (self.term)) return self.test(self.term.eval(struct)) # type: ignore