def from_buffer(buf, format="json", explicit_partitions=True): if format == "json": metadata = load_json(buf) else: metadata = msgpack.unpackb(buf) return DatasetMetadata.from_dict( metadata, explicit_partitions=explicit_partitions)
def test_roundtrip_msgpack(): expected = { "dataset_metadata_version": 4, "dataset_uuid": "uuid", "metadata": { "key": "value", "creation_time": "2000-01-01 01:01:01" }, "partitions": { "part_1": { "files": { "core": "file.parquet" } } }, "partition_keys": [], "indices": { "p_id": { "1": ["part_1"] } }, } result = msgpack.unpackb( DatasetMetadata.from_buffer(msgpack.packb(expected), format="msgpack").to_msgpack()) assert expected == result
def from_buffer(buf: str, format: str = "json", explicit_partitions: bool = True): if format == "json": metadata = load_json(buf) else: metadata = unpackb(buf) return DatasetMetadata.from_dict( metadata, explicit_partitions=explicit_partitions )
def test_builder_msgpack(metadata_version, frozen_time): creation_time = TIME_TO_FREEZE_ISO expected = { "dataset_uuid": "uuid", "dataset_metadata_version": metadata_version, "metadata": { "creation_time": creation_time }, "partitions": {}, } key, result = DatasetMetadataBuilder( "uuid", metadata_version=metadata_version).to_msgpack() result = msgpack.unpackb(result) assert key == "uuid.by-dataset-metadata.msgpack.zstd" assert result == expected
def load_from_store( uuid: str, store: StoreInput, load_schema: bool = True, load_all_indices: bool = False, ) -> "DatasetMetadata": """ Load a dataset from a storage Parameters ---------- uuid UUID of the dataset. store Object that implements the .get method for file/object loading. load_schema Load table schema load_all_indices Load all registered indices into memory. Returns ------- dataset_metadata: :class:`~kartothek.core.dataset.DatasetMetadata` Parsed metadata. """ key1 = naming.metadata_key_from_uuid(uuid) store = ensure_store(store) try: value = store.get(key1) metadata = load_json(value) except KeyError: key2 = naming.metadata_key_from_uuid(uuid, format="msgpack") try: value = store.get(key2) metadata = unpackb(value) except KeyError: raise KeyError( "Dataset does not exist. Tried {} and {}".format( key1, key2)) ds = DatasetMetadata.load_from_dict(metadata, store, load_schema=load_schema) if load_all_indices: ds = ds.load_all_indices(store) return ds
def load_from_buffer(buf, store, format="json"): """ Load a dataset from a (string) buffer. Parameters ---------- buf: Union[str, bytes] Input to be parsed. store: simplekv.KeyValueStore Object that implements the .get method for file/object loading. Returns ------- dataset_metadata: :class:`~kartothek.core.dataset.DatasetMetadata` Parsed metadata. """ if format == "json": metadata = load_json(buf) elif format == "msgpack": metadata = msgpack.unpackb(buf) return DatasetMetadata.load_from_dict(metadata, store)
def load_from_buffer(buf, store: StoreInput, format: str = "json") -> "DatasetMetadata": """ Load a dataset from a (string) buffer. Parameters ---------- buf: Input to be parsed. store: Object that implements the .get method for file/object loading. Returns ------- DatasetMetadata: Parsed metadata. """ if format == "json": metadata = load_json(buf) elif format == "msgpack": metadata = unpackb(buf) return DatasetMetadata.load_from_dict(metadata, store)
def test_msgpack(): dct = {"a": 1, "b": {"c": "ÖaŒ"}} assert dct == unpackb(packb(dct))
def test_msgpack_storage(store): dct = {"a": 1, "b": {"c": "ÖaŒ"}} key = "test" store.put(key, packb(dct)) value = store.get(key) assert dct == unpackb(value)