def test_df_eq(): df1 = ArrayDataFrame([[0, 100.0, "a"]], "a:int,b:double,c:str", dict(a=1)) df2 = ArrayDataFrame([[0, 100.001, "a"]], "a:int,b:double,c:str", dict(a=2)) assert df_eq(df1, df1) assert df_eq(df1, df2, digits=4, check_metadata=False) # metadata assert not df_eq(df1, df2, digits=4, check_metadata=True) # precision assert not df_eq(df1, df2, digits=6, check_metadata=False) # no content assert df_eq(df1, df2, digits=6, check_metadata=False, check_content=False) raises(AssertionError, lambda: df_eq(df1, df2, throw=True)) df1 = ArrayDataFrame([[100.0, "a"]], "a:double,b:str", dict(a=1)) assert df_eq(df1, df1.as_pandas(), df1.schema, df1.metadata) df1 = ArrayDataFrame([[None, "a"]], "a:double,b:str", dict(a=1)) assert df_eq(df1, df1) df1 = ArrayDataFrame([[None, "a"]], "a:double,b:str", dict(a=1)) df2 = ArrayDataFrame([[np.nan, "a"]], "a:double,b:str", dict(a=1)) assert df_eq(df1, df2) df1 = ArrayDataFrame([[100.0, None]], "a:double,b:str", dict(a=1)) df2 = ArrayDataFrame([[100.0, None]], "a:double,b:str", dict(a=1)) assert df_eq(df1, df2) df1 = ArrayDataFrame([[0], [1]], "a:int") df2 = ArrayDataFrame([[1], [0]], "a:int") assert df_eq(df1, df2) assert not df_eq(df1, df2, check_order=True)
def test_nan_none(): df = ArrayDataFrame([[None, None]], "b:str,c:double") assert df.as_pandas().iloc[0, 0] is None arr = PandasDataFrame(df.as_pandas(), df.schema).as_array()[0] assert arr[0] is None assert math.isnan(arr[1]) df = ArrayDataFrame([[None, None]], "b:int,c:bool") arr = PandasDataFrame(df.as_pandas(), df.schema).as_array(type_safe=True)[0] assert arr[0] is None assert arr[1] is None df = ArrayDataFrame([["a", 1.1], [None, None]], "b:str,c:double") arr = PandasDataFrame(df.as_pandas(), df.schema).as_array(type_safe=True)[1] assert arr[0] is None assert arr[1] is None
def test_nan_none(): # TODO: on dask, these tests can't pass # df = ArrayDataFrame([[None, None]], "b:str,c:double") # assert df.as_pandas().iloc[0, 0] is None # arr = DaskDataFrame(df.as_pandas(), df.schema).as_array()[0] # assert arr[0] is None # assert math.isnan(arr[1]) # df = ArrayDataFrame([[None, None]], "b:int,c:bool") # arr = DaskDataFrame(df.as_pandas(), df.schema).as_array(type_safe=True)[0] # assert np.isnan(arr[0]) # TODO: this will cause inconsistent behavior cross engine # assert np.isnan(arr[1]) # TODO: this will cause inconsistent behavior cross engine df = ArrayDataFrame([["a", 1.1], [None, None]], "b:str,c:double") arr = DaskDataFrame(df.as_pandas(), df.schema).as_array()[1] assert arr[0] is None assert math.isnan(arr[1]) arr = DaskDataFrame(df.as_array(), df.schema).as_array()[1] assert arr[0] is None assert math.isnan(arr[1]) arr = DaskDataFrame(df.as_pandas()["b"], "b:str").as_array()[1] assert arr[0] is None
def test_to_local_df(): df = ArrayDataFrame([[0, 1]], "a:int,b:int") pdf = PandasDataFrame(df.as_pandas(), "a:int,b:int") idf = IterableDataFrame([[0, 1]], "a:int,b:int") assert to_local_df(df) is df assert to_local_df(pdf) is pdf assert to_local_df(idf) is idf assert isinstance(to_local_df(df.native, "a:int,b:int"), ArrayDataFrame) assert isinstance(to_local_df(pdf.native, "a:int,b:int"), PandasDataFrame) assert isinstance(to_local_df(idf.native, "a:int,b:int"), IterableDataFrame) raises(TypeError, lambda: to_local_df(123)) metadata = dict(a=1) assert to_local_df(df.native, df.schema, metadata).metadata == metadata raises(NoneArgumentError, lambda: to_local_df(None)) raises(ValueError, lambda: to_local_df(df, "a:int,b:int", None))