def test_pandas_read_supports_read_csv_kwargs(): with filetext('Alice,1\nBob,2') as fn: ds = datashape.dshape('var * {name: string, amount: int}') csv = CSV(fn) df = csv_to_dataframe(csv, dshape=ds, usecols=['name']) assert isinstance(df, pd.DataFrame) assert convert(list, df) == [('Alice',), ('Bob',)]
def test_pandas_read_supports_read_csv_kwargs(): with filetext("Alice,1\nBob,2") as fn: ds = datashape.dshape("var * {name: string, amount: int}") csv = CSV(fn) df = csv_to_dataframe(csv, dshape=ds, usecols=["name"]) assert isinstance(df, pd.DataFrame) assert convert(list, df) == [("Alice",), ("Bob",)]
def test_pandas_read_supports_missing_integers(): with filetext('Alice,1\nBob,') as fn: ds = datashape.dshape('var * {name: string, val: ?int32}') csv = CSV(fn) df = csv_to_dataframe(csv, dshape=ds) assert isinstance(df, pd.DataFrame) assert list(df.columns) == ['name', 'val'] assert df.dtypes['val'] == 'f4'
def test_pandas_read_supports_datetimes(): with filetext('Alice,2014-01-02\nBob,2014-01-03') as fn: ds = datashape.dshape('var * {name: string, when: date}') csv = CSV(fn) df = csv_to_dataframe(csv, dshape=ds) assert isinstance(df, pd.DataFrame) assert list(df.columns) == ['name', 'when'] assert df.dtypes['when'] == 'M8[ns]'
def test_pandas_read(): with filetext('Alice,1\nBob,2') as fn: ds = datashape.dshape('var * {name: string, amount: int}') csv = CSV(fn) df = csv_to_dataframe(csv, dshape=ds) assert isinstance(df, pd.DataFrame) assert convert(list, df) == [('Alice', 1), ('Bob', 2)] assert list(df.columns) == ['name', 'amount']
def test_pandas_read_supports_gzip(): with filetext("Alice,1\nBob,2", open=gzip.open, mode="wt", extension=".csv.gz") as fn: ds = datashape.dshape("var * {name: string, amount: int}") csv = CSV(fn) df = csv_to_dataframe(csv, dshape=ds) assert isinstance(df, pd.DataFrame) assert convert(list, df) == [("Alice", 1), ("Bob", 2)] assert list(df.columns) == ["name", "amount"]
def test_pandas_read_supports_missing_integers(): with filetext("Alice,1\nBob,") as fn: ds = datashape.dshape("var * {name: string, val: ?int32}") csv = CSV(fn) df = csv_to_dataframe(csv, dshape=ds) assert isinstance(df, pd.DataFrame) assert list(df.columns) == ["name", "val"] assert df.dtypes["val"] == "f4"
def test_pandas_read_supports_datetimes(): with filetext("Alice,2014-01-02\nBob,2014-01-03") as fn: ds = datashape.dshape("var * {name: string, when: date}") csv = CSV(fn) df = csv_to_dataframe(csv, dshape=ds) assert isinstance(df, pd.DataFrame) assert list(df.columns) == ["name", "when"] assert df.dtypes["when"] == "M8[ns]"
def test_pandas_read(): with filetext("Alice,1\nBob,2") as fn: ds = datashape.dshape("var * {name: string, amount: int}") csv = CSV(fn) df = csv_to_dataframe(csv, dshape=ds) assert isinstance(df, pd.DataFrame) assert convert(list, df) == [("Alice", 1), ("Bob", 2)] assert list(df.columns) == ["name", "amount"]
def test_unused_datetime_columns(): ds = datashape.dshape('var * {val: string, when: datetime}') with filetext("val,when\na,2000-01-01\nb,2000-02-02") as fn: csv = CSV(fn, has_header=True) assert convert( list, csv_to_dataframe(csv, usecols=['val'], squeeze=True, dshape=ds)) == ['a', 'b']
def test_pandas_read_supports_gzip(): with filetext('Alice,1\nBob,2', open=gzip.open, mode='wt', extension='.csv.gz') as fn: ds = datashape.dshape('var * {name: string, amount: int}') csv = CSV(fn) df = csv_to_dataframe(csv, dshape=ds) assert isinstance(df, pd.DataFrame) assert convert(list, df) == [('Alice', 1), ('Bob', 2)] assert list(df.columns) == ['name', 'amount']
def test_unused_datetime_columns(): ds = datashape.dshape('var * {val: string, when: datetime}') with filetext("val,when\na,2000-01-01\nb,2000-02-02") as fn: csv = CSV(fn, has_header=True) assert convert(list, csv_to_dataframe(csv, usecols=['val'], squeeze=True, dshape=ds)) == ['a', 'b']