def test_fold(): df = pl.DataFrame({"a": [1, 2, 3], "b": [1.0, 2.0, 3.0]}) out = df.lazy().select(pl.sum(["a", "b"])).collect() assert out["sum"].series_equal(pl.Series("sum", [2, 4, 6])) out = df.select( pl.fold(acc=lit(0), f=lambda acc, x: acc + x, exprs=pl.col("*")).alias("foo")) assert out["foo"] == [2, 4, 6]
def test_fold_filter(): df = pl.DataFrame({"a": [1, 2, 3], "b": [0, 1, 2]}) out = df.filter( pl.fold( acc=pl.lit(True), f=lambda a, b: a & b, exprs=[col(c) > 1 for c in df.columns], )) assert out.shape == (1, 2) out = df.filter( pl.fold( acc=pl.lit(True), f=lambda a, b: a | b, exprs=[col(c) > 1 for c in df.columns], )) assert out.shape == (3, 2)
def test_regex_in_filter() -> None: df = pl.DataFrame( { "nrs": [1, 2, 3, None, 5], "names": ["foo", "ham", "spam", "egg", None], "flt": [1.0, None, 3.0, 1.0, None], } ) assert df.filter( pl.fold(acc=False, f=lambda acc, s: acc | s, exprs=(pl.col("^nrs|flt*$") < 3)) ).row(0) == (1, "foo", 1.0)
def test_fold() -> None: df = pl.DataFrame({"a": [1, 2, 3], "b": [1.0, 2.0, 3.0]}) out = df.select([ pl.sum(["a", "b"]), pl.max(["a", pl.col("b")**2]), pl.min(["a", pl.col("b")**2]), ]) assert out["sum"].series_equal(pl.Series("sum", [2.0, 4.0, 6.0])) assert out["max"].series_equal(pl.Series("max", [1.0, 4.0, 9.0])) assert out["min"].series_equal(pl.Series("min", [1.0, 2.0, 3.0])) out = df.select( pl.fold(acc=lit(0), f=lambda acc, x: acc + x, exprs=pl.col("*")).alias("foo")) assert out["foo"] == [2, 4, 6]
def test_sum_to_one(self): cols = [ "gravel_component", "sand_component", "clay_component", "loam_component", "peat_component", "silt_component", ] s = self.bore.df.select([ pl.fold( pl.lit(0), lambda a, b: a + b, [ pl.when(pl.col(a) < 0).then(1 / len(cols)).otherwise( pl.col(a)) for a in cols ], ).alias("sum") ]) self.assertTrue(np.all(np.isclose(s, 1)))
import polars as pl from polars import col, lit df = pl.DataFrame({"a": [1, 2, 3], "b": [0, 1, 2]}) out = df.filter( pl.fold(acc=lit(True), f=lambda acc, x: acc & x, exprs=col("*") > 1))
import polars as pl from polars import col, lit df = pl.DataFrame({"a": [1, 2, 3], "b": [10, 20, 30]}) out = df.select( pl.fold(acc=lit(0), f=lambda acc, x: acc + x, exprs=col("*")).alias("sum"))