示例#1
0
    def write_reread(t: Tafra) -> None:
        t.to_csv(write_path)
        t2 = Tafra.read_csv(write_path, dtypes=t.dtypes)

        for c1, c2 in zip(t.columns, t2.columns):
            assert np.array_equal(t.data[c1], t2.data[c2])
            assert np.array_equal(t.dtypes[c1], t2.dtypes[c2])
示例#2
0
def test_csv() -> None:
    write_path = 'test/test_to_csv.csv'

    def write_reread(t: Tafra) -> None:
        t.to_csv(write_path)
        t2 = Tafra.read_csv(write_path, dtypes=t.dtypes)

        for c1, c2 in zip(t.columns, t2.columns):
            assert np.array_equal(t.data[c1], t2.data[c2])
            assert np.array_equal(t.dtypes[c1], t2.dtypes[c2])

    # straightforward CSV - inference heuristic works
    path = Path('test/ex1.csv')
    t = Tafra.read_csv(path)
    assert t.dtypes['a'] == 'int32'
    assert t.dtypes['b'] == 'bool'
    assert t.dtypes['c'] == 'float64'
    assert t.rows == 6
    assert len(t.columns) == 3
    check_tafra(t)
    write_reread(t)

    # test again with TextIOWrapper
    with open('test/ex1.csv', 'r') as f:
        t = Tafra.read_csv(f)
    assert t.dtypes['a'] == 'int32'
    assert t.dtypes['b'] == 'bool'
    assert t.dtypes['c'] == 'float64'
    assert t.rows == 6
    assert len(t.columns) == 3
    check_tafra(t)
    write_reread(t)

    with open(write_path, 'w') as f:
        t.to_csv(f)
    with pytest.raises(ValueError) as e:
        with open(write_path) as f:
            t.to_csv(f)

    # short CSV - ends during inference period
    t = Tafra.read_csv('test/ex2.csv')
    assert t.dtypes['a'] == 'int32'
    assert t.dtypes['b'] == 'bool'
    assert t.dtypes['c'] == 'float64'
    assert t.rows == 2
    assert len(t.columns) == 3
    check_tafra(t)
    write_reread(t)

    # harder CSV - promote to object during inference period,
    #   duplicate column name
    t = Tafra.read_csv('test/ex3.csv')
    assert t.dtypes['a'] == 'int32'
    assert t.dtypes['b'] == 'object'
    assert t.dtypes['b (2)'] == 'float64'
    assert t.rows == 6
    assert len(t.columns) == 3
    check_tafra(t)
    write_reread(t)

    # as above, but with a promotion required after inference period
    #   (heuristic fails)
    t = Tafra.read_csv('test/ex4.csv')
    assert t.dtypes['a'] == 'int32'
    assert t.dtypes['b'] == 'object'
    assert t.dtypes['b (2)'] == 'float64'
    assert t.rows == 6
    assert len(t.columns) == 3
    check_tafra(t)
    write_reread(t)

    # bad CSV - missing column on row #4
    with pytest.raises(ValueError) as e:
        t = Tafra.read_csv('test/ex5.csv')

    # bad CSV - missing column on row #4 - after guess rows
    with pytest.raises(ValueError) as e:
        t = Tafra.read_csv('test/ex5.csv', guess_rows=2)

    # missing column - but numpy will automatically convert missing (None) to nan
    t = Tafra.read_csv('test/ex6.csv')
    assert t.dtypes['dp'] == 'float64'
    assert t.dtypes['dp_prime'] == 'float64'
    assert t.dtypes['dp_prime_te'] == 'float64'
    assert t.dtypes['t'] == 'float64'
    assert t.dtypes['te'] == 'float64'
    check_tafra(t)

    # missing column - do not automatically cast
    t = Tafra.read_csv('test/ex6.csv', missing=None)
    assert t.dtypes['dp'] == 'float64'
    assert t.dtypes['dp_prime'] == 'object'
    assert t.dtypes['dp_prime_te'] == 'object'
    assert t.dtypes['t'] == 'float64'
    assert t.dtypes['te'] == 'float64'
    check_tafra(t)

    t.update_dtypes_inplace({'dp_prime': float, 'dp_prime_te': 'float64'})
    assert t.dtypes['dp_prime'] == 'float64'
    assert t.dtypes['dp_prime_te'] == 'float64'
    check_tafra(t)

    # force dtypes on missing columns
    t = Tafra.read_csv('test/ex6.csv',
                       missing=None,
                       dtypes={
                           'dp_prime': np.float,
                           'dp_prime_te': np.float32
                       })
    assert t.dtypes['dp'] == 'float64'
    assert t.dtypes['dp_prime'] == 'float64'
    assert t.dtypes['dp_prime_te'] == 'float32'
    assert t.dtypes['t'] == 'float64'
    assert t.dtypes['te'] == 'float64'
    check_tafra(t)

    # override a column type
    t = Tafra.read_csv('test/ex4.csv', dtypes={'a': 'float32'})
    assert t.dtypes['a'] == 'float32'
    assert t.dtypes['b'] == 'object'
    assert t.dtypes['b (2)'] == 'float64'
    assert t.rows == 6
    assert len(t.columns) == 3
    check_tafra(t)
    write_reread(t)