def test_add_nested_to_plain(): mp = MetaPartition( label="label_1", file="file", data=pd.DataFrame({"test": [1, 2, 3]}), indices={"test": [1, 2, 3]}, ) to_nest = [ MetaPartition( label="label_2", data=pd.DataFrame({"test": [4, 5, 6]}), indices={"test": [4, 5, 6]}, ), MetaPartition( label="label_22", data=pd.DataFrame({"test": [4, 5, 6]}), indices={"test": [4, 5, 6]}, ), ] mp_nested = to_nest[0].add_metapartition(to_nest[1]) mp_add_nested = mp.add_metapartition(mp_nested) mp_iter = mp.add_metapartition(to_nest[0]).add_metapartition(to_nest[1]) assert mp_add_nested == mp_iter
def test_eq_nested(): mp_1 = MetaPartition( label="label_1", file="file", data=pd.DataFrame({"test": [1, 2, 3]}), indices={"test": [1, 2, 3]}, ) mp_2 = MetaPartition( label="label_2", data=pd.DataFrame({"test": [4, 5, 6]}), indices={"test": [4, 5, 6]}, ) mp = mp_1.add_metapartition(mp_2) assert mp == mp assert mp != mp_2 assert mp_2 != mp mp_other = MetaPartition(label="label_3", data=pd.DataFrame({"test": [4, 5, 6]})) mp_other = mp_1.add_metapartition(mp_other) assert mp != mp_other assert mp_other != mp
def test_nested_incompatible_meta(): mp = MetaPartition( label="label_1", data=pd.DataFrame({"test": np.array([1, 2, 3], dtype=np.int8)}), metadata_version=4, ) mp_2 = MetaPartition( label="label_2", data=pd.DataFrame({"test": np.array([4, 5, 6], dtype=np.float64)}), metadata_version=4, ) with pytest.raises(ValueError): mp.add_metapartition(mp_2)
def test_nested_copy(): mp = MetaPartition( label="label_1", file="file", data=pd.DataFrame({"test": [1, 2, 3]}), indices={"test": { 1: "label_1", 2: "label_2", 3: "label_3" }}, ) mp_2 = MetaPartition( label="label_2", data=pd.DataFrame({"test": [4, 5, 6]}), indices={"test": [4, 5, 6]}, ) mp = mp.add_metapartition(mp_2) assert len(mp.metapartitions) == 2 new_mp = mp.copy() # Check if the copy is identical assert len(new_mp.metapartitions) == len(mp.metapartitions) assert new_mp == mp # ... but not the same object assert id(new_mp) != id(mp)
def test_partition_on_nested(): original_df = pd.DataFrame( { "level1": [1, 2, 3, 1, 2, 3], "level2": [1, 1, 1, 2, 2, 2], "no_index_col": np.arange(0, 6), } ) mp = MetaPartition( label="label_1", files={"core": "file"}, data={"core": original_df}, dataset_metadata={"dataset": "metadata"}, metadata_version=4, ) mp2 = MetaPartition( label="label_2", files={"core": "file"}, data={"core": original_df}, dataset_metadata={"dataset": "metadata"}, metadata_version=4, ) mp = mp.add_metapartition(mp2) new_mp = mp.partition_on(["level1", "level2"]) assert len(new_mp.metapartitions) == 12 labels = [] for mp in new_mp: labels.append(mp.label) assert len(mp.data) == 1 assert "core" in mp.data df = mp.data["core"] assert df._is_view # try to be agnostic about the order assert len(df) == 1 assert "level1" not in df assert "level2" not in df assert "no_index_col" in df expected_labels = [ "level1=1/level2=1/label_1", "level1=1/level2=2/label_1", "level1=2/level2=1/label_1", "level1=2/level2=2/label_1", "level1=3/level2=1/label_1", "level1=3/level2=2/label_1", "level1=1/level2=1/label_2", "level1=1/level2=2/label_2", "level1=2/level2=1/label_2", "level1=2/level2=2/label_2", "level1=3/level2=1/label_2", "level1=3/level2=2/label_2", ] assert sorted(labels) == sorted(expected_labels)
def test_partition_label_helper(labels, flat_labels): mps = [] for lbl in labels: if isinstance(lbl, list): mp = MetaPartition(lbl[0]) for nested_lbl in lbl[1:]: mp = mp.add_metapartition(MetaPartition(label=nested_lbl)) mps.append(mp) else: mps.append(MetaPartition(label=lbl)) assert set(partition_labels_from_mps(mps)) == set(flat_labels)
def test_eq_nested(): mp = MetaPartition( label="label_1", files={"core": "file"}, data={"core": pd.DataFrame({"test": [1, 2, 3]})}, indices={"test": [1, 2, 3]}, dataset_metadata={"dataset": "metadata"}, ) mp_2 = MetaPartition( label="label_2", data={"core": pd.DataFrame({"test": [4, 5, 6]})}, indices={"test": [4, 5, 6]}, ) mp = mp.add_metapartition(mp_2) assert mp == mp
def test_add_metapartition_duplicate_labels(): mp = MetaPartition(label="label") mp_2 = MetaPartition(label="label") with pytest.raises(RuntimeError): mp.add_metapartition(mp_2)
def test_add_metapartition(): mp = MetaPartition( label="label_1", data=pd.DataFrame({"test": [1, 2, 3]}), indices={"test": [1, 2, 3]}, ) mp_2 = MetaPartition( label="label_2", data=pd.DataFrame({"test": [4, 5, 6]}), indices={"test": [4, 5, 6]}, ) new_mp = mp.add_metapartition(mp_2) # Cannot access single object attributes with pytest.raises(AttributeError): new_mp.indices with pytest.raises(AttributeError): new_mp.label with pytest.raises(AttributeError): new_mp.data with pytest.raises(AttributeError): new_mp.file with pytest.raises(AttributeError): new_mp.indices with pytest.raises(AttributeError): new_mp.indices partition_list = new_mp.metapartitions assert len(partition_list) == 2 first_mp = partition_list[0] assert first_mp["label"] == "label_1" assert first_mp["indices"] == {"test": [1, 2, 3]} first_mp = partition_list[1] assert first_mp["label"] == "label_2" assert first_mp["indices"] == {"test": [4, 5, 6]} # This tests whether it is possible to add to an already nested MetaPartition mp_3 = MetaPartition( label="label_3", data=pd.DataFrame({"test": [7, 8, 9]}), indices={"test": [7, 8, 9]}, ) new_mp = new_mp.add_metapartition(mp_3) partition_list = new_mp.metapartitions assert len(partition_list) == 3 first_mp = partition_list[0] assert first_mp["label"] == "label_1" assert first_mp["indices"] == {"test": [1, 2, 3]} first_mp = partition_list[1] assert first_mp["label"] == "label_2" assert first_mp["indices"] == {"test": [4, 5, 6]} first_mp = partition_list[2] assert first_mp["label"] == "label_3" assert first_mp["indices"] == {"test": [7, 8, 9]}