def test_lazy_concat(df: pl.DataFrame) -> None: shape = df.shape shape = (shape[0] * 2, shape[1]) out = pl.concat([df.lazy(), df.lazy()]).collect() assert out.shape == shape assert out.frame_equal(df.vstack(df.clone()), null_equal=True)
def test_lazy(): df = DataFrame({"a": [1, 2, 3], "b": [1.0, 2.0, 3.0]}) ldf = df.lazy().with_column(lit(1).alias("foo")).select( [col("a"), col("foo")]) print(ldf.collect()) # test if it executes new = (df.lazy().with_column( when(col("a").gt(lit(2))).then(lit(10)).otherwise( lit(1)).alias("new")).collect())
def test_quantile(fruits_cars: pl.DataFrame) -> None: assert fruits_cars.lazy().quantile(0.25, "nearest").collect()["A"][0] == 2 assert fruits_cars.select(pl.col("A").quantile(0.25, "nearest"))["A"][0] == 2 assert fruits_cars.lazy().quantile(0.24, "lower").collect()["A"][0] == 1 assert fruits_cars.select(pl.col("A").quantile(0.24, "lower"))["A"][0] == 1 assert fruits_cars.lazy().quantile(0.26, "higher").collect()["A"][0] == 3 assert fruits_cars.select(pl.col("A").quantile(0.26, "higher"))["A"][0] == 3 assert fruits_cars.lazy().quantile(0.24, "midpoint").collect()["A"][0] == 1.5 assert fruits_cars.select(pl.col("A").quantile(0.24, "midpoint"))["A"][0] == 1.5 assert fruits_cars.lazy().quantile(0.24, "linear").collect()["A"][0] == 1.96 assert fruits_cars.select(pl.col("A").quantile(0.24, "linear"))["A"][0] == 1.96
def test_last(fruits_cars: pl.DataFrame) -> None: assert ( fruits_cars.lazy() .last() .collect() .frame_equal(fruits_cars[(len(fruits_cars) - 1) :, :]) )
def test_binary_function(): df = DataFrame({"a": [1, 2, 3], "b": [1.0, 2.0, 3.0]}) out = ( df.lazy() .with_column(map_binary(col("a"), col("b"), lambda a, b: a + b)) .collect() ) assert out["binary_function"] == (out.a + out.b)
def test_set_null(): df = DataFrame({"a": [1, 2, 3], "b": [1.0, 2.0, 3.0]}) out = (df.lazy().with_column( when(col("a") > 1).then( lit(None)).otherwise(100).alias("foo")).collect()) s = out["foo"] assert s[0] == 100 assert s[1] is None assert s[2] is None
def test_shift(fruits_cars: pl.DataFrame) -> None: df = pl.DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 2, 3, 4, 5]}) out = df.select(col("a").shift(1)) assert out["a"].series_equal(pl.Series("a", [None, 1, 2, 3, 4]), null_equal=True) res = fruits_cars.lazy().shift(2).collect() expected = pl.DataFrame({ "A": [None, None, 1, 2, 3], "fruits": [None, None, "banana", "banana", "apple"], "B": [None, None, 5, 4, 3], "cars": [None, None, "beetle", "audi", "beetle"], }) res.frame_equal(expected, null_equal=True) # negative value res = fruits_cars.lazy().shift(-2).collect() for rows in [3, 4]: for cols in range(4): assert res[rows, cols] is None
def test_join(): df_left = DataFrame({ "a": ["a", "b", "a", "z"], "b": [1, 2, 3, 4], "c": [6, 5, 4, 3], }) df_right = DataFrame({ "a": ["b", "c", "b", "a"], "k": [0, 3, 9, 6], "c": [1, 0, 2, 1], }) joined = df_left.join(df_right, left_on="a", right_on="a").sort("a") assert joined["b"].series_equal(Series("", [1, 3, 2, 2])) joined = df_left.join(df_right, left_on="a", right_on="a", how="left").sort("a") assert joined["c_right"].is_null().sum() == 1 assert joined["b"].series_equal(Series("", [1, 3, 2, 2, 4])) joined = df_left.join(df_right, left_on="a", right_on="a", how="outer").sort("a") assert joined["c_right"].null_count() == 1 assert joined["c"].null_count() == 2 assert joined["b"].null_count() == 2 df_a = DataFrame({"a": [1, 2, 1, 1], "b": ["a", "b", "c", "c"]}) df_b = DataFrame({ "foo": [1, 1, 1], "bar": ["a", "c", "c"], "ham": ["let", "var", "const"] }) # just check if join on multiple columns runs df_a.join(df_b, left_on=["a", "b"], right_on=["foo", "bar"]) eager_join = df_a.join(df_b, left_on="a", right_on="foo") lazy_join = df_a.lazy().join(df_b.lazy(), left_on="a", right_on="foo").collect() assert lazy_join.shape == eager_join.shape
def test_custom_groupby(): df = DataFrame({"A": ["a", "a", "c", "c"], "B": [1, 3, 5, 2]}) assert df.groupby("A").select("B").apply(lambda x: x.sum()).shape == (2, 2) assert df.groupby("A").select("B").apply( lambda x: Series("", np.array(x))).shape == ( 2, 2, ) df = DataFrame({"a": [1, 2, 1, 1], "b": ["a", "b", "c", "c"]}) out = (df.lazy().groupby("b").agg( [col("a").apply(lambda x: x.sum(), dtype_out=int)]).collect()) assert out.shape == (3, 2)
def test_std(fruits_cars: pl.DataFrame) -> None: assert fruits_cars.lazy().std().collect()["A"][0] == pytest.approx( 1.5811388300841898)
def test_first(fruits_cars: pl.DataFrame) -> None: assert fruits_cars.lazy().first().collect().frame_equal(fruits_cars[0, :])
def test_groupby_apply(): df = DataFrame({"a": [1, 1, 3], "b": [1.0, 2.0, 3.0]}) ldf = df.lazy().groupby("a").apply(lambda df: df) assert ldf.collect().sort("b").frame_equal(df)
def test_var(fruits_cars: pl.DataFrame) -> None: assert fruits_cars.lazy().var().collect()["A"][0] == pytest.approx(2.5)
def test_fold(): df = DataFrame({"a": [1, 2, 3], "b": [1.0, 2.0, 3.0]}) out = df.lazy().select(pl.sum(["a", "b"])).collect() assert out["sum"].series_equal(Series("sum", [2, 4, 6]))
def test_or(): df = DataFrame({"a": [1, 2, 3], "b": [1.0, 2.0, 3.0]}) out = df.lazy().filter((pl.col("a") == 1) | (pl.col("b") > 2)).collect() assert out.shape[0] == 2
def test_apply(): df = DataFrame({"a": [1, 2, 3], "b": [1.0, 2.0, 3.0]}) new = df.lazy().with_column( col("a").map(lambda s: s * 2).alias("foo")).collect()
def test_agg(): df = DataFrame({"a": [1, 2, 3], "b": [1.0, 2.0, 3.0]}) ldf = df.lazy().min() assert ldf.collect().shape == (1, 2)
def test_add_eager_column(): df = DataFrame({"a": [1, 2, 3], "b": [1.0, 2.0, 3.0]}) out = df.lazy().with_column(pl.lit(pl.Series("c", [1, 2, 3]))).collect() assert out["c"].sum() == 6
def test_median(fruits_cars: pl.DataFrame) -> None: assert fruits_cars.lazy().median().collect()["A"][0] == 3 assert fruits_cars.select(pl.col("A").median())["A"][0] == 3
def test_collect_all(df: pl.DataFrame, no_optimization: bool) -> None: lf1 = df.lazy().select(pl.col("int").sum()) lf2 = df.lazy().select((pl.col("floats") * 2).sum()) out = pl.collect_all([lf1, lf2], no_optimization=no_optimization) assert out[0][0, 0] == 6 assert out[1][0, 0] == 12.0
def test_fetch(fruits_cars: pl.DataFrame) -> None: res = fruits_cars.lazy().select("*").fetch(2) assert res.frame_equal(res[:2])
def test_tail(fruits_cars: pl.DataFrame) -> None: assert fruits_cars.lazy().tail(2).collect().frame_equal(fruits_cars[3:, :])
def test_head(fruits_cars: pl.DataFrame) -> None: assert fruits_cars.lazy().head(2).collect().frame_equal(fruits_cars[:2, :])
def test_with_column_renamed(fruits_cars: pl.DataFrame) -> None: res = fruits_cars.lazy().rename({"A": "C"}).collect() assert res.columns[0] == "C"