def load(self, store: StoreInput): """ Load an external index into memory. Returns a new index object that contains the index dictionary. Returns itself if the index is internal or an already loaded index. Parameters ---------- store Object that implements the .get method for file/object loading. Returns ------- index: [kartothek.core.index.ExplicitSecondaryIndex] """ if self.loaded: return self store = ensure_store(store) index_buffer = store.get(self.index_storage_key) index_dct, column_type = _parquet_bytes_to_dict( self.column, index_buffer) return ExplicitSecondaryIndex( column=self.column, index_dct=index_dct, dtype=column_type, index_storage_key=self.index_storage_key, normalize_dtype=False, )
def load_from_store( uuid: str, store: StoreInput, load_schema: bool = True, load_all_indices: bool = False, ) -> "DatasetMetadata": """ Load a dataset from a storage Parameters ---------- uuid UUID of the dataset. store Object that implements the .get method for file/object loading. load_schema Load table schema load_all_indices Load all registered indices into memory. Returns ------- dataset_metadata: :class:`~kartothek.core.dataset.DatasetMetadata` Parsed metadata. """ key1 = naming.metadata_key_from_uuid(uuid) store = ensure_store(store) try: value = store.get(key1) metadata = load_json(value) except KeyError: key2 = naming.metadata_key_from_uuid(uuid, format="msgpack") try: value = store.get(key2) metadata = unpackb(value) except KeyError: raise KeyError( "Dataset does not exist. Tried {} and {}".format( key1, key2)) ds = DatasetMetadata.load_from_dict(metadata, store, load_schema=load_schema) if load_all_indices: ds = ds.load_all_indices(store) return ds