def add_per_vector_min_max_to_table_schema_metadata( table: pa.Table, per_vector_min_max: Dict[str, dict]) -> pa.Table: """Store dict with per-vector min/max values schema's metadata""" webviz_meta = {_PER_VECTOR_MIN_MAX_KEY: per_vector_min_max} new_combined_meta = {} if table.schema.metadata is not None: new_combined_meta.update(table.schema.metadata) new_combined_meta.update( {_MAIN_WEBVIZ_METADATA_KEY: json.dumps(webviz_meta)}) table = table.replace_schema_metadata(new_combined_meta) return table
def _update_metadata(table: pa.Table, new_metadata={}) -> pa.Table: """ Serialise user-defined table-level metadata as JSON-encoded byte strings and append to existing table metadata. """ # with help from stackoverflow users 3519145 'thomas' and 289784 'suvayu' if new_metadata: # set aside original metadata tbl_metadata = table.schema.metadata # update original metadata with new metadata from user for k, v in new_metadata.items(): tbl_metadata[k] = json.dumps(v).encode("utf-8") # replace metadata in table object table = table.replace_schema_metadata(tbl_metadata) return table
def table_cast(table: pa.Table, schema: pa.Schema): """Improved version of pa.Table.cast It supports casting to feature types stored in the schema metadata. Args: table (pa.Table): PyArrow table to cast schema (pa.Schema): target PyArrow schema. Returns: pa.Table: the casted table """ if table.schema != schema: from .features import Features return cast_table_to_features(table, Features.from_arrow_schema(schema)) elif table.schema.metadata != schema.metadata: return table.replace_schema_metadata(schema.metadata) else: return table