def modify_uuid(self, target_uuid: str): """ Modify the dataset uuid and depending metadata: - paths to partitioning files - path to index files Parameters ---------- target_uuid: str Modified dataset UUID. Returns ------- DatasetMetadataBuilder modified builder object """ # modify file names in partition metadata modified_partitions = {} for p_key, p in self.partitions.items(): pdict = p.to_dict() for table_key, table_file in pdict["files"].items(): if table_file.startswith(f"{self.uuid}/"): pdict["files"][table_key] = table_file.replace( self.uuid, target_uuid, 1) modified_partitions[p_key] = Partition.from_dict(p_key, pdict) self.partitions = modified_partitions for i_key, i in self.indices.items(): if (isinstance(i, ExplicitSecondaryIndex) and i.index_storage_key is not None): i.index_storage_key = i.index_storage_key.replace( self.uuid, target_uuid, 1) self.uuid = target_uuid return self
def from_dict(dct: Dict, explicit_partitions: bool = True): """ Load dataset metadata from a dictionary. This must have no external references. Otherwise use ``load_from_dict`` to have them resolved automatically. """ # Use the builder class for reconstruction to have a single point for metadata version changes builder = DatasetMetadataBuilder( uuid=dct[naming.UUID_KEY], metadata_version=dct[naming.METADATA_VERSION_KEY], explicit_partitions=explicit_partitions, partition_keys=dct.get("partition_keys", None), table_meta=dct.get("table_meta", None), ) for key, value in dct.get("metadata", {}).items(): builder.add_metadata(key, value) for partition_label, part_dct in dct.get("partitions", {}).items(): builder.add_partition( partition_label, Partition.from_dict(partition_label, part_dct)) for column, index_dct in dct.get("indices", {}).items(): if isinstance(index_dct, IndexBase): builder.add_embedded_index(column, index_dct) else: builder.add_embedded_index( column, ExplicitSecondaryIndex.from_v2(column, index_dct)) return builder.to_dataset()
def test_raise_on_erroneous_input(): with pytest.raises(ValueError): Partition.from_dict(label="label", dct="some_not_supported_external_ref")
def test_roundtrip(): expected = {"files": {"Queejeb3": "file.parquet"}} result = Partition.from_dict("partition_label", expected).to_dict() assert expected == result
def test_roundtrip_empty_metadata(): _input = {"files": {"Queejeb3": "file.parquet"}} expected = {"files": {"Queejeb3": "file.parquet"}} result = Partition.from_dict("partition_label", _input).to_dict() assert expected == result