Пример #1
0
def test_table_scan_honors_select_without_case_sensitivity(ts_table):
    scan1 = ts_table.new_scan().case_sensitive(False).select(["ID"])
    # order of refinements shouldn't matter
    scan2 = ts_table.new_scan().select(["ID"]).case_sensitive(False)

    expected_schema = Schema([NestedField.required(1, "id", IntegerType.get())])

    assert scan1.schema.as_struct() == expected_schema.as_struct()
    assert scan2.schema.as_struct() == expected_schema.as_struct()
Пример #2
0
    def bind(self,
             struct: StructType,
             case_sensitive: bool = True) -> BoundReference:
        from iceberg.api import Schema
        schema = Schema(struct.fields)
        field = schema.find_field(
            self.name
        ) if case_sensitive else schema.case_insensitive_find_field(self.name)

        ValidationException.check(field is not None,
                                  "Cannot find field '%s' in struct: %s",
                                  (self.name, schema.as_struct()))

        return BoundReference(struct, field)
def prune_columns(file_schema: Schema, expected_schema: Schema) -> List[str]:
    """
    Given two Iceberg schema's returns a list of column_names for all id's in the
    file schema that are projected in the expected schema

    Parameters
    ----------
    file_schema : iceberg.api.Schema
        An Iceberg schema of the file being read
    expected_schema : iceberg.api.Schema
        An Iceberg schema of the final projection
    Returns
    -------
    list
        The column names in the file that matched ids in the expected schema
    """
    return [column.name for column in file_schema.as_struct().fields
            if column.id in get_projected_ids(expected_schema)]
Пример #4
0
def test_table_scan_honors_select(ts_table):
    scan = ts_table.new_scan().select(["id"])

    expected_schema = Schema([NestedField.required(1, "id", IntegerType.get())])

    assert scan.schema.as_struct() == expected_schema.as_struct()