def write_reread(t: Tafra) -> None: t.to_csv(write_path) t2 = Tafra.read_csv(write_path, dtypes=t.dtypes) for c1, c2 in zip(t.columns, t2.columns): assert np.array_equal(t.data[c1], t2.data[c2]) assert np.array_equal(t.dtypes[c1], t2.dtypes[c2])
def test_csv() -> None: write_path = 'test/test_to_csv.csv' def write_reread(t: Tafra) -> None: t.to_csv(write_path) t2 = Tafra.read_csv(write_path, dtypes=t.dtypes) for c1, c2 in zip(t.columns, t2.columns): assert np.array_equal(t.data[c1], t2.data[c2]) assert np.array_equal(t.dtypes[c1], t2.dtypes[c2]) # straightforward CSV - inference heuristic works path = Path('test/ex1.csv') t = Tafra.read_csv(path) assert t.dtypes['a'] == 'int32' assert t.dtypes['b'] == 'bool' assert t.dtypes['c'] == 'float64' assert t.rows == 6 assert len(t.columns) == 3 check_tafra(t) write_reread(t) # test again with TextIOWrapper with open('test/ex1.csv', 'r') as f: t = Tafra.read_csv(f) assert t.dtypes['a'] == 'int32' assert t.dtypes['b'] == 'bool' assert t.dtypes['c'] == 'float64' assert t.rows == 6 assert len(t.columns) == 3 check_tafra(t) write_reread(t) with open(write_path, 'w') as f: t.to_csv(f) with pytest.raises(ValueError) as e: with open(write_path) as f: t.to_csv(f) # short CSV - ends during inference period t = Tafra.read_csv('test/ex2.csv') assert t.dtypes['a'] == 'int32' assert t.dtypes['b'] == 'bool' assert t.dtypes['c'] == 'float64' assert t.rows == 2 assert len(t.columns) == 3 check_tafra(t) write_reread(t) # harder CSV - promote to object during inference period, # duplicate column name t = Tafra.read_csv('test/ex3.csv') assert t.dtypes['a'] == 'int32' assert t.dtypes['b'] == 'object' assert t.dtypes['b (2)'] == 'float64' assert t.rows == 6 assert len(t.columns) == 3 check_tafra(t) write_reread(t) # as above, but with a promotion required after inference period # (heuristic fails) t = Tafra.read_csv('test/ex4.csv') assert t.dtypes['a'] == 'int32' assert t.dtypes['b'] == 'object' assert t.dtypes['b (2)'] == 'float64' assert t.rows == 6 assert len(t.columns) == 3 check_tafra(t) write_reread(t) # bad CSV - missing column on row #4 with pytest.raises(ValueError) as e: t = Tafra.read_csv('test/ex5.csv') # bad CSV - missing column on row #4 - after guess rows with pytest.raises(ValueError) as e: t = Tafra.read_csv('test/ex5.csv', guess_rows=2) # missing column - but numpy will automatically convert missing (None) to nan t = Tafra.read_csv('test/ex6.csv') assert t.dtypes['dp'] == 'float64' assert t.dtypes['dp_prime'] == 'float64' assert t.dtypes['dp_prime_te'] == 'float64' assert t.dtypes['t'] == 'float64' assert t.dtypes['te'] == 'float64' check_tafra(t) # missing column - do not automatically cast t = Tafra.read_csv('test/ex6.csv', missing=None) assert t.dtypes['dp'] == 'float64' assert t.dtypes['dp_prime'] == 'object' assert t.dtypes['dp_prime_te'] == 'object' assert t.dtypes['t'] == 'float64' assert t.dtypes['te'] == 'float64' check_tafra(t) t.update_dtypes_inplace({'dp_prime': float, 'dp_prime_te': 'float64'}) assert t.dtypes['dp_prime'] == 'float64' assert t.dtypes['dp_prime_te'] == 'float64' check_tafra(t) # force dtypes on missing columns t = Tafra.read_csv('test/ex6.csv', missing=None, dtypes={ 'dp_prime': np.float, 'dp_prime_te': np.float32 }) assert t.dtypes['dp'] == 'float64' assert t.dtypes['dp_prime'] == 'float64' assert t.dtypes['dp_prime_te'] == 'float32' assert t.dtypes['t'] == 'float64' assert t.dtypes['te'] == 'float64' check_tafra(t) # override a column type t = Tafra.read_csv('test/ex4.csv', dtypes={'a': 'float32'}) assert t.dtypes['a'] == 'float32' assert t.dtypes['b'] == 'object' assert t.dtypes['b (2)'] == 'float64' assert t.rows == 6 assert len(t.columns) == 3 check_tafra(t) write_reread(t)