def test_reserve_dtype(self): t = TypedDfBuilder("a").reserve("x", dtype=np.float32).build() df = t.convert(pd.DataFrame([pd.Series(dict(x="0.5"))])) assert df.column_names() == ["x"] assert df.to_numpy().tolist() == [[0.5]] with pytest.raises(ValueError): t.convert(pd.DataFrame([pd.Series(dict(x="kitten"))]))
def test_drop(self): t = TypedDfBuilder("a").reserve("column").drop("trash").build() typ: DfTyping = t.get_typing() assert typ.columns_to_drop == {"trash"} df = t.convert(pd.DataFrame([pd.Series(dict(x="x", zz="y"))])) assert df.column_names() == ["x", "zz"] df = t.convert(pd.DataFrame([pd.Series(dict(x="x", trash="y"))])) assert df.column_names() == ["x"]
def test_condition(self): t = TypedDfBuilder("a").verify(always_ok).build() typ: DfTyping = t.get_typing() assert typ.required_columns == [] assert typ.required_index_names == [] assert typ.verifications == [always_ok] TypedDf(pd.DataFrame()) t = TypedDfBuilder("a").verify(always_fail).build() with pytest.raises(VerificationFailedError): t.convert(pd.DataFrame())
def test_no_overwrite(self): t = TypedDfBuilder("a").reserve("x", "y").build() df = t.convert(pd.DataFrame([pd.Series(dict(x="cat", y="dog"))])) with tmpfile(".csv") as path: df.write_file(path, overwrite=False) with pytest.raises(FileExistsError): df.write_file(path, overwrite=False)
def test_mkdir(self): t = TypedDfBuilder("a").reserve("x", "y").build() df = t.convert(pd.DataFrame([pd.Series(dict(x="cat", y="dog"))])) with tmpdir() as path: df.write_file(path / "a.csv", mkdirs=True) with tmpdir() as path: with pytest.raises(FileNotFoundError): df.write_file(path / "b.csv")
def test_pass_io_options(self): t = TypedDfBuilder("a").reserve("x", "y").add_write_kwargs(FileFormat.csv, sep="&").build() df = t.convert(pd.DataFrame([pd.Series(dict(x="cat", y="dog"))])) with tmpfile(".csv") as path: df.write_file(path) lines = path.read_text(encoding="utf8").splitlines() assert lines == ["x&y", "cat&dog"]
def test_dir_hash(self): t = TypedDfBuilder("a").reserve("x", "y").build() df = t.convert(pd.DataFrame([pd.Series(dict(x="cat", y="kitten"))])) with tmpfile(".csv") as path: hash_dir = Checksums().get_dirsum_of_file(path) hash_dir.unlink(missing_ok=True) df.write_file(path, dir_hash=True) assert hash_dir.exists() got = Checksums().load_dirsum_exact(hash_dir) assert list(got.keys()) == [path] hit = got[path] assert len(hit) == 64 t.read_file(path, dir_hash=True) t.read_file(path, hex_hash=hit)
def test_file_hash(self): t = TypedDfBuilder("a").reserve("x", "y").build() df = t.convert(pd.DataFrame([pd.Series(dict(x="cat", y="dog"))])) # unfortunately, the file that gets output is os-dependent # \n vs \r\n is an issue, so we can't check the exact hash with tmpfile(".csv") as path: df.write_file(path, file_hash=True) hash_file = Checksums().get_filesum_of_file(path) assert hash_file.exists() got = Checksums().load_filesum_of_file(path) assert got.file_path == path hit = got.hash_value assert len(hit) == 64 t.read_file(path, file_hash=True) t.read_file(path, hex_hash=hit)
def test_strict(self): # strict columns but not index t = TypedDfBuilder("a").strict(index=False, cols=True).build() typ: DfTyping = t.get_typing() assert typ.more_indices_allowed assert not typ.more_columns_allowed t.convert(pd.DataFrame([pd.Series(dict(x="x"))]).set_index("x")) with pytest.raises(UnexpectedColumnError): t.convert(pd.DataFrame([pd.Series(dict(x="x"))])) # strict index but not columns t = TypedDfBuilder("a").strict(True, False).build() typ: DfTyping = t.get_typing() assert typ.more_columns_allowed assert not typ.more_indices_allowed t.convert(pd.DataFrame([pd.Series(dict(x="x"))])) with pytest.raises(UnexpectedIndexNameError): df = PrettyDf( pd.DataFrame([pd.Series(dict(x="x"))]).set_index("x")) assert df.index_names() == ["x"] assert df.column_names() == [] t.convert(df) # neither strict t = TypedDfBuilder("a").strict(False, False).build() t.convert(pd.DataFrame([pd.Series(dict(x="x"))]))
def test_attrs_hard(self): meta = None try: t = TypedDfBuilder("a").reserve("x", "y").build() df = t.convert(pd.DataFrame([pd.Series(dict(x="cat", y="kitten"))])) df.attrs["matrix"] = np.zeros((2, 2)) with tmpfile(".csv") as path: df.write_file(path, attrs=True) meta = Path(str(path) + ".attrs.json") assert meta.exists() df = t.read_file(path, attrs=True) assert df.attrs == {"matrix": [["0.0", "0.0"], ["0.0", "0.0"]]} finally: if meta is not None: meta.unlink(missing_ok=True)
def test_attrs(self): meta = None try: t = TypedDfBuilder("a").reserve("x", "y").build() df = t.convert(pd.DataFrame([pd.Series(dict(x="cat", y="kitten"))])) df.attrs["fruit"] = "apple" with tmpfile(".csv") as path: df.write_file(path, attrs=True) meta = Path(str(path) + ".attrs.json") assert meta.exists() data = meta.read_text(encoding="utf-8").replace("\n", "").replace( " ", "") assert data == '{"fruit": "apple"}' df = t.read_file(path, attrs=True) assert df.attrs == {"fruit": "apple"} finally: if meta is not None: meta.unlink(missing_ok=True)