def _test_as_array_perf(): s = Schema() arr = [] for i in range(100): s.append(f"a{i}:int") arr.append(i) for i in range(100): s.append(f"b{i}:int") arr.append(float(i)) for i in range(100): s.append(f"c{i}:str") arr.append(str(i)) data = [] for i in range(5000): data.append(list(arr)) df = ArrayDataFrame(data, s) res = df.as_array() res = df.as_array(type_safe=True) nts, ts = 0.0, 0.0 for i in range(10): t = datetime.now() res = df.as_array() nts += (datetime.now() - t).total_seconds() t = datetime.now() res = df.as_array(type_safe=True) ts += (datetime.now() - t).total_seconds() print(nts, ts)
def test_nested(): data = [[dict(a=1, b=[3, 4], d=1.0)], [json.dumps(dict(b=[30, "40"]))]] df = ArrayDataFrame(data, "a:{a:str,b:[int]}") a = df.as_array(type_safe=True) assert [[dict(a="1", b=[3, 4])], [dict(a=None, b=[30, 40])]] == a data = [[[json.dumps(dict(b=[30, "40"]))]]] df = ArrayDataFrame(data, "a:[{a:str,b:[int]}]") a = df.as_array(type_safe=True) assert [[[dict(a=None, b=[30, 40])]]] == a
def test_transformer(): assert isinstance(t1, CoTransformer) df1 = ArrayDataFrame([[0, 2]], "a:int,b:int") df2 = ArrayDataFrame([[0, 2]], "a:int,c:int") dfs = DataFrames(df1, df2) t1._output_schema = t1.get_output_schema(dfs) assert t1.output_schema == "a:int,b:int" t2._output_schema = t2.get_output_schema(dfs) assert t2.output_schema == "b:int,a:int" assert [[0, 2, 1]] == list(t3(df1.as_array(), df2.as_pandas()))
def test_simple_methods(): df = ArrayDataFrame([["a", 1], ["b", "2"]], "x:str,y:double") assert 2 == df.count() assert not df.empty assert ["a", 1.0] == df.peek_array() assert dict(x="a", y=1.0) == df.peek_dict() assert [["a", 1], ["b", "2"]] == df.as_array() df = ArrayDataFrame([["a", 1], ["b", "2"]], "x:str,y:double") pdf = df.as_pandas() assert [["a", 1.0], ["b", 2.0]] == pdf.values.tolist() df = ArrayDataFrame([], "x:str,y:double") pdf = df.as_pandas() assert [] == pdf.values.tolist()
def test_as_array(): df = ArrayDataFrame([], "a:str,b:int") assert [] == df.as_array() assert [] == df.as_array(type_safe=True) assert [] == list(df.as_array_iterable()) assert [] == list(df.as_array_iterable(type_safe=True)) df = ArrayDataFrame([["a", 1]], "a:str,b:int") assert [["a", 1]] == df.as_array() df = ArrayDataFrame([["a", 1]], "a:str,b:int") assert [["a", 1]] == df.as_array(["a", "b"]) df = ArrayDataFrame([["a", 1]], "a:str,b:int") assert [[1, "a"]] == df.as_array(["b", "a"]) df = ArrayDataFrame([[1.0, 1.1]], "a:double,b:int") d = df.as_array(type_safe=False) assert [[1.0, 1]] != d df = ArrayDataFrame([[1.0, 1.1]], "a:double,b:int") d = df.as_array(type_safe=True) assert [[1.0, 1]] == d assert isinstance(d[0][0], float) assert isinstance(d[0][1], int) df = ArrayDataFrame([[1.0, 1.1]], "a:double,b:int") assert [[1.0, 1]] == df.as_array(["a", "b"], type_safe=True) df = ArrayDataFrame([[1.0, 1.1]], "a:double,b:int") assert [[1, 1.0]] == df.as_array(["b", "a"], type_safe=True) df = ArrayDataFrame([[np.float64(1.0), 1.1]], "a:double,b:int") d = df.as_array(type_safe=True) assert [[1.0, 1]] == d assert isinstance(d[0][0], float) assert isinstance(d[0][1], int) df = ArrayDataFrame([[pd.Timestamp("2020-01-01"), 1.1]], "a:datetime,b:int") assert [[datetime(2020, 1, 1), 1]] == df.as_array(type_safe=True) df = ArrayDataFrame([["2020-01-01", 1.1]], "a:datetime,b:int") assert [[datetime(2020, 1, 1), 1]] == df.as_array(type_safe=True) df = ArrayDataFrame([[pd.NaT, 1.1]], "a:datetime,b:int") assert [[None, 1]] == df.as_array(type_safe=True)
def test_init(): df = ArrayDataFrame(schema="a:str,b:int") assert df.empty assert df.schema == "a:str,b:int" assert df.is_bounded data = [["a", 1], ["b", 2]] df = ArrayDataFrame(data, "a:str,b:str") assert [["a", "1"], ["b", "2"]] == df.as_array(type_safe=True) df = ArrayDataFrame(data, "a:str,b:int") assert [["a", 1], ["b", 2]] == df.as_array(type_safe=True) df = ArrayDataFrame(data, "a:str,b:double") assert [["a", 1.0], ["b", 2.0]] == df.as_array(type_safe=True) df = ArrayDataFrame(data, "a:str,b:double") ddf = ArrayDataFrame(df) assert [["a", 1.0], ["b", 2.0]] == ddf.as_array(type_safe=True) df = ArrayDataFrame(data, "a:str,b:double") ddf = ArrayDataFrame(df, "a:str,b:float64") assert [["a", 1.0], ["b", 2.0]] == ddf.as_array(type_safe=True) df = ArrayDataFrame(data, "a:str,b:double") ddf = ArrayDataFrame(df, "b:str,a:str") assert [["1", "a"], ["2", "b"]] == ddf.as_array(type_safe=True) df = ArrayDataFrame(data, "a:str,b:double") ddf = ArrayDataFrame(df, ["b"]) assert ddf.schema == "b:double" assert [[1.0], [2.0]] == ddf.as_array(type_safe=True) df = ArrayDataFrame(data, "a:str,b:double") ddf = ArrayDataFrame(df, ["a:str,b:str"]) assert [["a", "1"], ["b", "2"]] == ddf.as_array(type_safe=True) df = ArrayDataFrame(data, "a:str,b:double") ddf = ArrayDataFrame(df, ["b:str"]) assert [["1"], ["2"]] == ddf.as_array(type_safe=True) pdf = PandasDataFrame(data, "a:str,b:double") df = ArrayDataFrame(pdf, "a:str,b:double") assert [["a", 1.0], ["b", 2.0]] == df.as_array(type_safe=True) df = ArrayDataFrame(pdf, "b:str,a:str") assert [["1.0", "a"], ["2.0", "b"]] == df.as_array(type_safe=True) df = ArrayDataFrame([], "x:str,y:double") assert df.empty raises(FugueDataFrameInitError, lambda: ArrayDataFrame(123))