示例#1
0
def test_simple_methods():
    df = DaskDataFrame([], "a:str,b:int")
    assert df.empty
    assert 0 == df.count()
    assert not df.is_local

    df = DaskDataFrame([["a", 1], ["b", "2"]], "x:str,y:double")
    assert not df.empty
    assert 2 == df.count()
    assert ["a", 1.0] == df.peek_array()
    assert dict(x="a", y=1.0) == df.peek_dict()

    df_eq(
        PandasDataFrame(df.as_pandas()),
        [["a", 1.0], ["b", 2.0]],
        "x:str,y:double",
        throw=True,
    )
示例#2
0
def test_init():
    df = DaskDataFrame(schema="a:str,b:int")
    assert df.is_bounded
    assert df.count() == 0
    assert df.schema == "a:str,b:int"

    pdf = pandas.DataFrame([["a", 1], ["b", 2]])
    raises(FugueDataFrameInitError, lambda: DaskDataFrame(pdf))
    df = DaskDataFrame(pdf, "a:str,b:str")
    assert [["a", "1"], ["b", "2"]] == df.as_pandas().values.tolist()
    df = DaskDataFrame(pdf, "a:str,b:int")
    assert [["a", 1], ["b", 2]] == df.as_pandas().values.tolist()
    df = DaskDataFrame(pdf, "a:str,b:double")
    assert [["a", 1.0], ["b", 2.0]] == df.as_pandas().values.tolist()

    pdf = DaskDataFrame([["a", 1], ["b", 2]], "a:str,b:int").native["b"]
    assert isinstance(pdf, pd.Series)
    df = DaskDataFrame(pdf, "b:str")
    assert [["1"], ["2"]] == df.as_pandas().values.tolist()
    df = DaskDataFrame(pdf, "b:double")
    assert [[1.0], [2.0]] == df.as_pandas().values.tolist()

    pdf = DaskDataFrame([["a", 1], ["b", 2]], "x:str,y:long").native
    df = DaskDataFrame(pdf)
    assert df.schema == "x:str,y:long"
    df = DaskDataFrame(pdf, "y:str,x:str")
    assert [["1", "a"], ["2", "b"]] == df.as_pandas().values.tolist()
    ddf = DaskDataFrame(df)
    assert [["1", "a"], ["2", "b"]] == ddf.as_pandas().values.tolist()
    assert df.native is ddf.native  # no real copy happened

    df = DaskDataFrame([["a", 1], ["b", "2"]], "x:str,y:double")
    assert [["a", 1.0], ["b", 2.0]] == df.as_pandas().values.tolist()

    df = DaskDataFrame([], "x:str,y:double")
    assert [] == df.as_pandas().values.tolist()

    raises(FugueDataFrameInitError, lambda: DaskDataFrame(123))