def test_lazy(): df = DataFrame({"a": [1, 2, 3], "b": [1.0, 2.0, 3.0]}) ldf = df.lazy().with_column(lit(1).alias("foo")).select( [col("a"), col("foo")]) print(ldf.collect()) # test if it executes new = (df.lazy().with_column( when(col("a").gt(lit(2))).then(lit(10)).otherwise( lit(1)).alias("new")).collect())
def test_binary_function(): df = DataFrame({"a": [1, 2, 3], "b": [1.0, 2.0, 3.0]}) out = ( df.lazy() .with_column(map_binary(col("a"), col("b"), lambda a, b: a + b)) .collect() ) assert out["binary_function"] == (out.a + out.b)
def test_custom_groupby(): df = DataFrame({"A": ["a", "a", "c", "c"], "B": [1, 3, 5, 2]}) assert df.groupby("A").select("B").apply(lambda x: x.sum()).shape == (2, 2) assert df.groupby("A").select("B").apply( lambda x: Series("", np.array(x))).shape == (2, 2) df = DataFrame({"a": [1, 2, 1, 1], "b": ["a", "b", "c", "c"]}) out = (df.lazy().groupby("b").agg( [col("a").apply_groups(lambda x: x.sum(), dtype_out=int)]).collect()) assert out.shape == (3, 2)
def test_join(): df_left = DataFrame({ "a": ["a", "b", "a", "z"], "b": [1, 2, 3, 4], "c": [6, 5, 4, 3], }) df_right = DataFrame({ "a": ["b", "c", "b", "a"], "k": [0, 3, 9, 6], "c": [1, 0, 2, 1], }) joined = df_left.join(df_right, left_on="a", right_on="a").sort("a") assert joined["b"].series_equal(Series("", [1, 3, 2, 2])) joined = df_left.join(df_right, left_on="a", right_on="a", how="left").sort("a") assert joined["c_right"].is_null().sum() == 1 assert joined["b"].series_equal(Series("", [1, 3, 2, 2, 4])) joined = df_left.join(df_right, left_on="a", right_on="a", how="outer").sort("a") assert joined["c_right"].null_count() == 1 assert joined["c"].null_count() == 2 assert joined["b"].null_count() == 2 df_a = DataFrame({"a": [1, 2, 1, 1], "b": ["a", "b", "c", "c"]}) df_b = DataFrame({ "foo": [1, 1, 1], "bar": ["a", "c", "c"], "ham": ["let", "var", "const"] }) # just check if join on multiple columns runs df_a.join(df_b, left_on=["a", "b"], right_on=["foo", "bar"]) eager_join = df_a.join(df_b, left_on="a", right_on="foo") lazy_join = df_a.lazy().join(df_b.lazy(), left_on="a", right_on="foo").collect() assert lazy_join.shape == eager_join.shape
def test_groupby_apply(): df = DataFrame({"a": [1, 1, 3], "b": [1.0, 2.0, 3.0]}) ldf = df.lazy().groupby("a").apply(lambda df: df) assert ldf.collect().sort("b").frame_equal(df)
def test_or(): df = DataFrame({"a": [1, 2, 3], "b": [1.0, 2.0, 3.0]}) out = df.lazy().filter((pl.col("a") == 1) | (pl.col("b") > 2)).collect() assert out.shape[0] == 2
def test_fold(): df = DataFrame({"a": [1, 2, 3], "b": [1.0, 2.0, 3.0]}) out = df.lazy().select(pl.sum(["a", "b"])).collect() assert out["sum"].series_equal(Series("sum", [2, 4, 6]))
def test_agg(): df = DataFrame({"a": [1, 2, 3], "b": [1.0, 2.0, 3.0]}) ldf = df.lazy().min() assert ldf.collect().shape == (1, 2)
def test_apply(): df = DataFrame({"a": [1, 2, 3], "b": [1.0, 2.0, 3.0]}) new = df.lazy().with_column(col("a").map(lambda s: s * 2).alias("foo")).collect()