def test_table_scan_honors_select_without_case_sensitivity(ts_table): scan1 = ts_table.new_scan().case_sensitive(False).select(["ID"]) # order of refinements shouldn't matter scan2 = ts_table.new_scan().select(["ID"]).case_sensitive(False) expected_schema = Schema([NestedField.required(1, "id", IntegerType.get())]) assert scan1.schema.as_struct() == expected_schema.as_struct() assert scan2.schema.as_struct() == expected_schema.as_struct()
def bind(self, struct: StructType, case_sensitive: bool = True) -> BoundReference: from iceberg.api import Schema schema = Schema(struct.fields) field = schema.find_field( self.name ) if case_sensitive else schema.case_insensitive_find_field(self.name) ValidationException.check(field is not None, "Cannot find field '%s' in struct: %s", (self.name, schema.as_struct())) return BoundReference(struct, field)
def prune_columns(file_schema: Schema, expected_schema: Schema) -> List[str]: """ Given two Iceberg schema's returns a list of column_names for all id's in the file schema that are projected in the expected schema Parameters ---------- file_schema : iceberg.api.Schema An Iceberg schema of the file being read expected_schema : iceberg.api.Schema An Iceberg schema of the final projection Returns ------- list The column names in the file that matched ids in the expected schema """ return [column.name for column in file_schema.as_struct().fields if column.id in get_projected_ids(expected_schema)]
def test_table_scan_honors_select(ts_table): scan = ts_table.new_scan().select(["id"]) expected_schema = Schema([NestedField.required(1, "id", IntegerType.get())]) assert scan.schema.as_struct() == expected_schema.as_struct()