示例#1
0
 def check_compatibility(spec, schema):
     for field in spec.fields:
         src_type = schema.find_type(field.source_id)
         if not src_type.is_primitive_type():
             raise ValidationException("Cannot partition by non-primitive source field: %s", src_type)
         if not field.transform.can_transform(src_type):
             raise ValidationException("Invalid source type %s for transform: %s", (src_type, field.transform))
示例#2
0
 def convert_literal(lit):
     converted = lit.to(bound_term)
     ValidationException.check(
         converted is not None,
         "Invalid Value for conversion to type %s: %s (%s)",
         (bound_term.type, lit, lit.__class__.__name__))
     return converted
示例#3
0
    def replace_properties(self, new_properties):
        ValidationException.check(new_properties is not None,
                                  "Cannot set properties to null")

        return TableMetadata(self.ops, None, self.location,
                             int(time.time() * 1000), self.last_column_id,
                             self.schema, self.spec, new_properties,
                             self.current_snapshot_id, self.snapshots,
                             self.snapshot_log)
示例#4
0
    def rollback_to(self, snapshot):
        ValidationException.check(snapshot.snapshot_id not in self.snapshot_by_id,
                                  "Cannot set current snapshot to unknown: %s", (snapshot.snapshot_id,))

        now_millis = int(time.time() * 1000)
        new_snapshot_log = self.snapshot_log + [SnapshotLogEntry(now_millis, snapshot.snapshot_id)]

        return TableMetadata(self.ops, None, self.location,
                             now_millis, self.last_column_id, self.schema, self.spec, self.properties,
                             snapshot.snapshot_id, self.snapshots, new_snapshot_log)
示例#5
0
    def remove_snapshot_log_entries(self, snapshot_ids):
        new_snapshot_log = list()

        for entry in self.snapshot_log:
            if entry.snapshot_id not in snapshot_ids:
                new_snapshot_log.append(entry)

        check_snapshot = self.current_snapshot_id < 0 or new_snapshot_log[-1].snapshot_id == self.current_snapshot_id
        ValidationException.check(check_snapshot,
                                  "Cannot set invalid snapshot log: latest entry is not the current snapshot")

        return TableMetadata(self.ops, None, self.location,
                             int(time.time() * 1000), self.last_column_id, self.schema, self.spec, self.properties,
                             self.current_snapshot_id, self.snapshots, new_snapshot_log)
示例#6
0
    def bind(self,
             struct: StructType,
             case_sensitive: bool = True) -> BoundReference:
        from iceberg.api import Schema
        schema = Schema(struct.fields)
        field = schema.find_field(
            self.name
        ) if case_sensitive else schema.case_insensitive_find_field(self.name)

        ValidationException.check(field is not None,
                                  "Cannot find field '%s' in struct: %s",
                                  (self.name, schema.as_struct()))

        return BoundReference(struct, field)
示例#7
0
 def test(self, struct: StructLike = None, value: Any = None) -> bool:
     ValidationException.check(struct is None or value is None,
                               "Either struct or value must be none", ())
     if struct is not None:
         ValidationException.check(isinstance(self.term, BoundTerm),
                                   "Term must be bound to eval: %s",
                                   (self.term))
         return self.test(value=self.term.eval(struct))  # type: ignore
     else:
         if self.is_unary_predicate:
             return self.test_unary_predicate(value)
         elif self.is_literal_predicate:
             return self.test_literal_predicate(value)
         else:
             return self.test_set_predicate(value)
示例#8
0
    def find(self, field_id, struct):
        fields = struct.fields
        for i, field in enumerate(fields):
            if field.field_id == self.field_id:
                return i

        raise ValidationException("Cannot find top-level field id %d in struct: %s", (field_id, struct))
示例#9
0
    def bind_in_operation(self, bound_term):
        from .expressions import Expressions

        def convert_literal(lit):
            converted = lit.to(bound_term)
            ValidationException.check(
                converted is not None,
                "Invalid Value for conversion to type %s: %s (%s)",
                (bound_term.type, lit, lit.__class__.__name__))
            return converted

        converted_literals = filter(
            lambda x: x != Literals.above_max() and x != Literals.below_min(),
            [convert_literal(lit) for lit in self.literals])
        if len(converted_literals) == 0:
            return Expressions.always_true(
            ) if Operation.NOT_IN else Expressions.always_false()
        literal_set = set(converted_literals)
        if len(literal_set) == 1:
            if self.op == Operation.IN:
                return BoundPredicate(Operation.EQ, bound_term, literal_set[0])
            elif self.op == Operation.NOT_IN:
                return BoundPredicate(Operation.NOT_EQ, bound_term,
                                      literal_set[0])
            else:
                raise ValidationException("Operation must be in or not in", ())

        return BoundPredicate(self.op,
                              bound_term,
                              literals=literal_set,
                              is_set_predicate=True)
示例#10
0
    def bind(self, struct, case_sensitive=True):  # noqa: C901
        if case_sensitive:
            field = struct.field(self.ref.name)
        else:
            field = struct.case_insensitive_field(self.ref.name.lower())

        ValidationException.check(field is not None,
                                  "Cannot find field '%s' in struct %s",
                                  (self.ref.name, struct))

        if self.lit is None:
            if self.op == Operation.IS_NULL:
                if field.is_required:
                    return FALSE
                return BoundPredicate(Operation.IS_NULL,
                                      BoundReference(struct, field.field_id))
            elif self.op == Operation.NOT_NULL:
                if field.is_required:
                    return TRUE
                return BoundPredicate(Operation.NOT_NULL,
                                      BoundReference(struct, field.field_id))
            else:
                raise ValidationException(
                    "Operation must be IS_NULL or NOT_NULL", None)

        literal = self.lit.to(field.type)
        if literal is None:
            raise ValidationException(
                "Invalid value for comparison inclusive type %s: %s (%s)",
                (field.type, self.lit.value, type(self.lit.value)))
        elif literal == Literals.above_max():
            if self.op in (Operation.LT, Operation.LT_EQ, Operation.NOT_EQ):
                return TRUE
            elif self.op in (Operation.GT, Operation.GT_EQ, Operation.EQ):
                return FALSE

        elif literal == Literals.below_min():
            if self.op in (Operation.LT, Operation.LT_EQ, Operation.NOT_EQ):
                return FALSE
            elif self.op in (Operation.GT, Operation.GT_EQ, Operation.EQ):
                return TRUE

        return BoundPredicate(self.op, BoundReference(struct, field.field_id),
                              literal)
示例#11
0
    def test_set_predicate(self, value: Any) -> bool:
        if self._literals is None:
            raise ValidationException("Literals must not be none", ())

        if self.op == Operation.IN:
            return value in self._literals
        elif self.op == Operation.NOT_IN:
            return value not in self._literals
        else:
            raise ValueError(f"{self.op} is not a valid set predicate")
示例#12
0
    def bind_unary_operation(self, bound_term: BoundTerm) -> BoundPredicate:
        from .expressions import Expressions
        if self.op == Operation.IS_NULL:
            if bound_term.ref.field.is_required:
                return Expressions.always_false()
            return BoundPredicate(Operation.IS_NULL,
                                  bound_term,
                                  is_unary_predicate=True)
        elif self.op == Operation.NOT_NULL:
            if bound_term.ref.field.is_required:
                return Expressions.always_true()
            return BoundPredicate(Operation.NOT_NULL,
                                  bound_term,
                                  is_unary_predicate=True)
        elif self.op in [Operation.IS_NAN, Operation.NOT_NAN]:
            if not self.floating_type(bound_term.ref.type.type_id):
                raise ValidationException(
                    f"{self.op} cannot be used with a non-floating column", ())
            return BoundPredicate(self.op, bound_term, is_unary_predicate=True)

        raise ValidationException(
            f"Operation must be in [IS_NULL, NOT_NULL, IS_NAN, NOT_NAN] was:{self.op}",
            ())
示例#13
0
    def bind_literal_operation(self, bound_term):
        from .expressions import Expressions

        lit = self.lit.to(bound_term.type)
        ValidationException.check(
            lit is not None,
            "Invalid Value for conversion to type %s: %s (%s)",
            (bound_term.type, self.lit, self.lit.__class__.__name__))

        if lit == Literals.above_max():
            if self.op in [Operation.LT, Operation.LT_EQ, Operation.NOT_EQ]:
                return Expressions.always_true()
            elif self.op in [Operation.GT, Operation.GT_EQ, Operation.EQ]:
                return Expressions.always_false()
        elif lit == Literals.below_min():
            if self.op in [Operation.LT, Operation.LT_EQ, Operation.NOT_EQ]:
                return Expressions.always_false()
            elif self.op in [Operation.GT, Operation.GT_EQ, Operation.EQ]:
                return Expressions.always_true()

        return BoundPredicate(self.op,
                              bound_term,
                              lit=lit,
                              is_literal_predicate=True)
示例#14
0
    def test_literal_predicate(self, value: Any) -> bool:
        if self.lit is None:
            raise ValidationException("Literal must not be none", ())

        if self.op == Operation.LT:
            return value < self.lit.value
        elif self.op == Operation.LT_EQ:
            return value <= self.lit.value
        elif self.op == Operation.GT:
            return value > self.lit.value
        elif self.op == Operation.GT_EQ:
            return value >= self.lit.value
        elif self.op == Operation.EQ:
            return value == self.lit.value
        elif self.op == Operation.NOT_EQ:
            return value != self.lit.value
        else:
            raise ValueError(f"{self.op} is not a valid literal predicate")
示例#15
0
    def __init__(self,
                 op: Operation,
                 term: BoundTerm,
                 lit: BaseLiteral = None,
                 literals: List[BaseLiteral] = None,
                 is_unary_predicate: bool = False,
                 is_literal_predicate: bool = False,
                 is_set_predicate: bool = False):
        self.is_unary_predicate = is_unary_predicate
        self.is_literal_predicate = is_literal_predicate
        self.is_set_predicate = is_set_predicate

        super(BoundPredicate, self).__init__(op, term)
        ValidationException.check(
            sum([is_unary_predicate, is_literal_predicate,
                 is_set_predicate]) == 1,
            "Only a single predicate type may be set: %s=%s, %s=%s, %s=%s",
            ("is_unary_predicate", is_unary_predicate, "is_literal_predicate",
             is_literal_predicate, "is_set_predicate", is_set_predicate))

        self._literals: Optional[List[BaseLiteral]] = None
        if self.is_unary_predicate:
            ValidationException.check(
                lit is None, "Unary Predicates may not have a literal", ())

        elif self.is_literal_predicate:
            ValidationException.check(
                lit is not None, "Literal Predicates must have a literal set",
                ())
            self._literals = [lit]  # type: ignore

        elif self.is_set_predicate:
            ValidationException.check(literals is not None,
                                      "Set Predicates must have literals set",
                                      ())
            self._literals = literals
        else:
            raise ValueError(
                f"Unable to instantiate {op} -> (lit={lit}, literal={literals}"
            )
示例#16
0
 def eval(self, struct: StructLike) -> bool:
     ValidationException.check(isinstance(self.term, BoundTerm),
                               "Term must be bound to eval: %s",
                               (self.term))
     return self.test(self.term.eval(struct))  # type: ignore