def _copy_naive( key_mappings: Dict[str, str], src_store: KeyValueStore, tgt_store: KeyValueStore, md_transformed: Optional[Dict[str, DatasetMetadata]] = None, ): """ Copies a list of items from one KV store to another. Parameters ---------- key_mappings: Dict[str, str] Mapping of source key names to target key names. May be equal if a key will not be renamed. src_store: simplekv.KeyValueStore Source KV store– tgt_store: simplekv.KeyValueStore Target KV store md_transformed: Dict[str, DatasetMetadata] Mapping containing {target dataset uuid: modified target metadata} values which will be written directly instead of being copied """ for src_key, tgt_key in key_mappings.items(): if (md_transformed is not None) and (tgt_key in md_transformed): item = md_transformed.get(tgt_key).to_json() # type: ignore else: item = src_store.get(src_key) tgt_store.put(tgt_key, item)
def store(self, store: KeyValueStore, dataset_uuid: str) -> str: """ Store the index as a parquet file If compatible, the new keyname will be the name stored under the attribute `index_storage_key`. If this attribute is None, a new key will be generated of the format `{dataset_uuid}/indices/{column}/{timestamp}.by-dataset-index.parquet` where the timestamp is in nanosecond accuracy and is created upon Index object initialization Parameters ---------- store: dataset_uuid: """ storage_key = None if (self.index_storage_key is not None and dataset_uuid and dataset_uuid in self.index_storage_key): storage_key = self.index_storage_key if storage_key is None: storage_key = "{dataset_uuid}/indices/{column}/{timestamp}{suffix}".format( dataset_uuid=dataset_uuid, suffix=naming.EXTERNAL_INDEX_SUFFIX, column=quote(self.column), timestamp=quote(self.creation_time.isoformat()), ) table = _index_dct_to_table(self.index_dct, self.column, self.dtype) buf = pa.BufferOutputStream() pq.write_table(table, buf) store.put(storage_key, buf.getvalue().to_pybytes()) return storage_key
def store_schema_metadata( schema: SchemaWrapper, dataset_uuid: str, store: KeyValueStore, table: str = SINGLE_TABLE, ) -> str: """ Store schema and metadata to store. Parameters ---------- schema Schema information for DataFrame/table. dataset_uuid Unique ID of the dataset in question. store Object that implements `.put(key, data)` to write data. table Table to write metadata for. Returns ------- key: str Key to which the metadata was written to. """ key = _get_common_metadata_key(dataset_uuid=dataset_uuid, table=table) return store.put(key, _schema2bytes(schema.internal()))