def test_simple_methods(): df = DaskDataFrame([], "a:str,b:int") assert df.empty assert 0 == df.count() assert not df.is_local df = DaskDataFrame([["a", 1], ["b", "2"]], "x:str,y:double") assert not df.empty assert 2 == df.count() assert ["a", 1.0] == df.peek_array() assert dict(x="a", y=1.0) == df.peek_dict() df_eq( PandasDataFrame(df.as_pandas()), [["a", 1.0], ["b", 2.0]], "x:str,y:double", throw=True, )
def test_init(): df = DaskDataFrame(schema="a:str,b:int") assert df.is_bounded assert df.count() == 0 assert df.schema == "a:str,b:int" pdf = pandas.DataFrame([["a", 1], ["b", 2]]) raises(FugueDataFrameInitError, lambda: DaskDataFrame(pdf)) df = DaskDataFrame(pdf, "a:str,b:str") assert [["a", "1"], ["b", "2"]] == df.as_pandas().values.tolist() df = DaskDataFrame(pdf, "a:str,b:int") assert [["a", 1], ["b", 2]] == df.as_pandas().values.tolist() df = DaskDataFrame(pdf, "a:str,b:double") assert [["a", 1.0], ["b", 2.0]] == df.as_pandas().values.tolist() pdf = DaskDataFrame([["a", 1], ["b", 2]], "a:str,b:int").native["b"] assert isinstance(pdf, pd.Series) df = DaskDataFrame(pdf, "b:str") assert [["1"], ["2"]] == df.as_pandas().values.tolist() df = DaskDataFrame(pdf, "b:double") assert [[1.0], [2.0]] == df.as_pandas().values.tolist() pdf = DaskDataFrame([["a", 1], ["b", 2]], "x:str,y:long").native df = DaskDataFrame(pdf) assert df.schema == "x:str,y:long" df = DaskDataFrame(pdf, "y:str,x:str") assert [["1", "a"], ["2", "b"]] == df.as_pandas().values.tolist() ddf = DaskDataFrame(df) assert [["1", "a"], ["2", "b"]] == ddf.as_pandas().values.tolist() assert df.native is ddf.native # no real copy happened df = DaskDataFrame([["a", 1], ["b", "2"]], "x:str,y:double") assert [["a", 1.0], ["b", 2.0]] == df.as_pandas().values.tolist() df = DaskDataFrame([], "x:str,y:double") assert [] == df.as_pandas().values.tolist() raises(FugueDataFrameInitError, lambda: DaskDataFrame(123))