def test_header_disagrees_with_dshape(): ds = datashape.dshape('var * {name: string, bal: int64}') with filetext('name,val\nAlice,100\nBob,200', extension='csv') as fn: csv = CSV(fn, header=True) assert convert(list, csv) == [('Alice', 100), ('Bob', 200)] assert list(convert(pd.DataFrame, csv).columns) == ['name', 'val'] assert list(convert(pd.DataFrame, csv, dshape=ds).columns) == ['name', 'bal']
def test_pandas_read_supports_read_csv_kwargs(): with filetext('Alice,1\nBob,2') as fn: ds = datashape.dshape('var * {name: string, amount: int}') csv = CSV(fn) df = csv_to_DataFrame(csv, dshape=ds, usecols=['name']) assert isinstance(df, pd.DataFrame) assert convert(list, df) == [('Alice',), ('Bob',)]
def test_glob(): d = {'accounts1.csv': 'name,when\nAlice,100\nBob,200', 'accounts2.csv': 'name,when\nAlice,300\nBob,400'} with filetexts(d) as fns: r = resource('accounts*.csv', has_header=True) assert convert(list, r) == [('Alice', 100), ('Bob', 200), ('Alice', 300), ('Bob', 400)]
def test_pandas_read_supports_read_csv_kwargs(): with filetext('Alice,1\nBob,2') as fn: ds = datashape.dshape('var * {name: string, amount: int}') csv = CSV(fn) df = csv_to_DataFrame(csv, dshape=ds, usecols=['name']) assert isinstance(df, pd.DataFrame) assert convert(list, df) == [('Alice', ), ('Bob', )]
def test_pandas_loads_in_datetimes_naively(): with filetext('name,when\nAlice,2014-01-01\nBob,2014-02-02') as fn: csv = CSV(fn, has_header=True) ds = datashape.dshape('var * {name: string, when: datetime}') assert discover(csv) == ds df = convert(pd.DataFrame, csv) assert df.dtypes['when'] == 'M8[ns]'
def test_pandas_read(): with filetext('Alice,1\nBob,2') as fn: ds = datashape.dshape('var * {name: string, amount: int}') csv = CSV(fn) df = csv_to_DataFrame(csv, dshape=ds) assert isinstance(df, pd.DataFrame) assert convert(list, df) == [('Alice', 1), ('Bob', 2)] assert list(df.columns) == ['name', 'amount']
def test_unused_datetime_columns(): ds = datashape.dshape('var * {val: string, when: datetime}') with filetext("val,when\na,2000-01-01\nb,2000-02-02") as fn: csv = CSV(fn, has_header=True) assert convert( list, csv_to_DataFrame(csv, usecols=['val'], squeeze=True, dshape=ds)) == ['a', 'b']
def test_pandas_loads_in_datetimes_naively(): with filetext('name,when\nAlice,2014-01-01\nBob,2014-02-02') as fn: csv = CSV(fn, has_header=True) ds = datashape.dshape('var * {name: ?string, when: ?datetime}') assert discover(csv) == ds df = convert(pd.DataFrame, csv) assert df.dtypes['when'] == 'M8[ns]'
def test_pandas_read_supports_gzip(): with filetext('Alice,1\nBob,2', open=gzip.open, extension='.csv.gz') as fn: ds = datashape.dshape('var * {name: string, amount: int}') csv = CSV(fn) df = csv_to_DataFrame(csv, dshape=ds) assert isinstance(df, pd.DataFrame) assert convert(list, df) == [('Alice', 1), ('Bob', 2)] assert list(df.columns) == ['name', 'amount']
def test_glob(): d = { 'accounts1.csv': 'name,when\nAlice,100\nBob,200', 'accounts2.csv': 'name,when\nAlice,300\nBob,400' } with filetexts(d) as fns: r = resource('accounts*.csv', has_header=True) assert convert(list, r) == [('Alice', 100), ('Bob', 200), ('Alice', 300), ('Bob', 400)]
def test_pandas_read_supports_gzip(): with filetext('Alice,1\nBob,2', open=gzip.open, mode='wt', extension='.csv.gz') as fn: ds = datashape.dshape('var * {name: string, amount: int}') csv = CSV(fn) df = csv_to_DataFrame(csv, dshape=ds) assert isinstance(df, pd.DataFrame) assert convert(list, df) == [('Alice', 1), ('Bob', 2)] assert list(df.columns) == ['name', 'amount']
def test_csv_append(): with tmpfile('.csv') as fn: csv = CSV(fn, has_header=False) data = [('Alice', 100), ('Bob', 200)] append(csv, data) assert list(convert(Iterator, csv)) == data with open(fn) as f: s = f.read() assert 'Alice' in s assert '100' in s
def test_discover_csv_files_without_header(): with filetext('Alice,2014-01-01\nBob,2014-02-02') as fn: csv = CSV(fn, has_header=False) df = convert(pd.DataFrame, csv) assert len(df) == 2 assert 'Alice' not in list(df.columns)
def test_csv_separator_header(): with filetext('a|b|c\n1|2|3\n4|5|6', extension='csv') as fn: csv = CSV(fn, delimiter='|', has_header=True) assert convert(list, csv) == [(1, 2, 3), (4, 5, 6)]
def test_empty_dataframe(): with filetext('name,val', extension='csv') as fn: csv = CSV(fn, has_header=True) df = convert(pd.DataFrame, csv) assert isinstance(df, pd.DataFrame)
def test_unused_datetime_columns(): ds = datashape.dshape('var * {val: string, when: datetime}') with filetext("val,when\na,2000-01-01\nb,2000-02-02") as fn: csv = CSV(fn, has_header=True) assert convert(list, csv_to_DataFrame(csv, usecols=['val'], squeeze=True, dshape=ds)) == ['a', 'b']