Exemplo n.º 1
0
 def from_buffer(buf, format="json", explicit_partitions=True):
     if format == "json":
         metadata = load_json(buf)
     else:
         metadata = msgpack.unpackb(buf)
     return DatasetMetadata.from_dict(
         metadata, explicit_partitions=explicit_partitions)
Exemplo n.º 2
0
def test_roundtrip_msgpack():
    expected = {
        "dataset_metadata_version": 4,
        "dataset_uuid": "uuid",
        "metadata": {
            "key": "value",
            "creation_time": "2000-01-01 01:01:01"
        },
        "partitions": {
            "part_1": {
                "files": {
                    "core": "file.parquet"
                }
            }
        },
        "partition_keys": [],
        "indices": {
            "p_id": {
                "1": ["part_1"]
            }
        },
    }

    result = msgpack.unpackb(
        DatasetMetadata.from_buffer(msgpack.packb(expected),
                                    format="msgpack").to_msgpack())
    assert expected == result
Exemplo n.º 3
0
 def from_buffer(buf: str, format: str = "json", explicit_partitions: bool = True):
     if format == "json":
         metadata = load_json(buf)
     else:
         metadata = unpackb(buf)
     return DatasetMetadata.from_dict(
         metadata, explicit_partitions=explicit_partitions
     )
Exemplo n.º 4
0
def test_builder_msgpack(metadata_version, frozen_time):
    creation_time = TIME_TO_FREEZE_ISO
    expected = {
        "dataset_uuid": "uuid",
        "dataset_metadata_version": metadata_version,
        "metadata": {
            "creation_time": creation_time
        },
        "partitions": {},
    }
    key, result = DatasetMetadataBuilder(
        "uuid", metadata_version=metadata_version).to_msgpack()
    result = msgpack.unpackb(result)
    assert key == "uuid.by-dataset-metadata.msgpack.zstd"
    assert result == expected
Exemplo n.º 5
0
    def load_from_store(
        uuid: str,
        store: StoreInput,
        load_schema: bool = True,
        load_all_indices: bool = False,
    ) -> "DatasetMetadata":
        """
        Load a dataset from a storage

        Parameters
        ----------
        uuid
            UUID of the dataset.
        store
            Object that implements the .get method for file/object loading.
        load_schema
            Load table schema
        load_all_indices
            Load all registered indices into memory.

        Returns
        -------
        dataset_metadata: :class:`~kartothek.core.dataset.DatasetMetadata`
            Parsed metadata.
        """
        key1 = naming.metadata_key_from_uuid(uuid)
        store = ensure_store(store)
        try:
            value = store.get(key1)
            metadata = load_json(value)
        except KeyError:
            key2 = naming.metadata_key_from_uuid(uuid, format="msgpack")
            try:
                value = store.get(key2)
                metadata = unpackb(value)
            except KeyError:
                raise KeyError(
                    "Dataset does not exist. Tried {} and {}".format(
                        key1, key2))

        ds = DatasetMetadata.load_from_dict(metadata,
                                            store,
                                            load_schema=load_schema)
        if load_all_indices:
            ds = ds.load_all_indices(store)
        return ds
Exemplo n.º 6
0
    def load_from_buffer(buf, store, format="json"):
        """
        Load a dataset from a (string) buffer.

        Parameters
        ----------
        buf: Union[str, bytes]
            Input to be parsed.
        store: simplekv.KeyValueStore
            Object that implements the .get method for file/object loading.

        Returns
        -------
        dataset_metadata: :class:`~kartothek.core.dataset.DatasetMetadata`
            Parsed metadata.
        """
        if format == "json":
            metadata = load_json(buf)
        elif format == "msgpack":
            metadata = msgpack.unpackb(buf)
        return DatasetMetadata.load_from_dict(metadata, store)
Exemplo n.º 7
0
    def load_from_buffer(buf,
                         store: StoreInput,
                         format: str = "json") -> "DatasetMetadata":
        """
        Load a dataset from a (string) buffer.

        Parameters
        ----------
        buf:
            Input to be parsed.
        store:
            Object that implements the .get method for file/object loading.

        Returns
        -------
        DatasetMetadata:
            Parsed metadata.
        """
        if format == "json":
            metadata = load_json(buf)
        elif format == "msgpack":
            metadata = unpackb(buf)
        return DatasetMetadata.load_from_dict(metadata, store)
Exemplo n.º 8
0
def test_msgpack():
    dct = {"a": 1, "b": {"c": "ÖaŒ"}}
    assert dct == unpackb(packb(dct))
Exemplo n.º 9
0
def test_msgpack_storage(store):
    dct = {"a": 1, "b": {"c": "ÖaŒ"}}
    key = "test"
    store.put(key, packb(dct))
    value = store.get(key)
    assert dct == unpackb(value)