Exemplo n.º 1
0
    def load(self, store: StoreInput):
        """
        Load an external index into memory. Returns a new index object that
        contains the index dictionary. Returns itself if the index is internal
        or an already loaded index.

        Parameters
        ----------
        store
            Object that implements the .get method for file/object loading.

        Returns
        -------
        index: [kartothek.core.index.ExplicitSecondaryIndex]
        """
        if self.loaded:
            return self

        store = ensure_store(store)

        index_buffer = store.get(self.index_storage_key)
        index_dct, column_type = _parquet_bytes_to_dict(
            self.column, index_buffer)

        return ExplicitSecondaryIndex(
            column=self.column,
            index_dct=index_dct,
            dtype=column_type,
            index_storage_key=self.index_storage_key,
            normalize_dtype=False,
        )
Exemplo n.º 2
0
    def load_from_store(
        uuid: str,
        store: StoreInput,
        load_schema: bool = True,
        load_all_indices: bool = False,
    ) -> "DatasetMetadata":
        """
        Load a dataset from a storage

        Parameters
        ----------
        uuid
            UUID of the dataset.
        store
            Object that implements the .get method for file/object loading.
        load_schema
            Load table schema
        load_all_indices
            Load all registered indices into memory.

        Returns
        -------
        dataset_metadata: :class:`~kartothek.core.dataset.DatasetMetadata`
            Parsed metadata.
        """
        key1 = naming.metadata_key_from_uuid(uuid)
        store = ensure_store(store)
        try:
            value = store.get(key1)
            metadata = load_json(value)
        except KeyError:
            key2 = naming.metadata_key_from_uuid(uuid, format="msgpack")
            try:
                value = store.get(key2)
                metadata = unpackb(value)
            except KeyError:
                raise KeyError(
                    "Dataset does not exist. Tried {} and {}".format(
                        key1, key2))

        ds = DatasetMetadata.load_from_dict(metadata,
                                            store,
                                            load_schema=load_schema)
        if load_all_indices:
            ds = ds.load_all_indices(store)
        return ds