Пример #1
0
def test_fold():
    df = pl.DataFrame({"a": [1, 2, 3], "b": [1.0, 2.0, 3.0]})
    out = df.lazy().select(pl.sum(["a", "b"])).collect()
    assert out["sum"].series_equal(pl.Series("sum", [2, 4, 6]))

    out = df.select(
        pl.fold(acc=lit(0), f=lambda acc, x: acc + x,
                exprs=pl.col("*")).alias("foo"))
    assert out["foo"] == [2, 4, 6]
Пример #2
0
def test_fold_filter():
    df = pl.DataFrame({"a": [1, 2, 3], "b": [0, 1, 2]})

    out = df.filter(
        pl.fold(
            acc=pl.lit(True),
            f=lambda a, b: a & b,
            exprs=[col(c) > 1 for c in df.columns],
        ))

    assert out.shape == (1, 2)

    out = df.filter(
        pl.fold(
            acc=pl.lit(True),
            f=lambda a, b: a | b,
            exprs=[col(c) > 1 for c in df.columns],
        ))

    assert out.shape == (3, 2)
Пример #3
0
def test_regex_in_filter() -> None:
    df = pl.DataFrame(
        {
            "nrs": [1, 2, 3, None, 5],
            "names": ["foo", "ham", "spam", "egg", None],
            "flt": [1.0, None, 3.0, 1.0, None],
        }
    )

    assert df.filter(
        pl.fold(acc=False, f=lambda acc, s: acc | s, exprs=(pl.col("^nrs|flt*$") < 3))
    ).row(0) == (1, "foo", 1.0)
Пример #4
0
def test_fold() -> None:
    df = pl.DataFrame({"a": [1, 2, 3], "b": [1.0, 2.0, 3.0]})
    out = df.select([
        pl.sum(["a", "b"]),
        pl.max(["a", pl.col("b")**2]),
        pl.min(["a", pl.col("b")**2]),
    ])
    assert out["sum"].series_equal(pl.Series("sum", [2.0, 4.0, 6.0]))
    assert out["max"].series_equal(pl.Series("max", [1.0, 4.0, 9.0]))
    assert out["min"].series_equal(pl.Series("min", [1.0, 2.0, 3.0]))

    out = df.select(
        pl.fold(acc=lit(0), f=lambda acc, x: acc + x,
                exprs=pl.col("*")).alias("foo"))
    assert out["foo"] == [2, 4, 6]
Пример #5
0
 def test_sum_to_one(self):
     cols = [
         "gravel_component",
         "sand_component",
         "clay_component",
         "loam_component",
         "peat_component",
         "silt_component",
     ]
     s = self.bore.df.select([
         pl.fold(
             pl.lit(0),
             lambda a, b: a + b,
             [
                 pl.when(pl.col(a) < 0).then(1 / len(cols)).otherwise(
                     pl.col(a)) for a in cols
             ],
         ).alias("sum")
     ])
     self.assertTrue(np.all(np.isclose(s, 1)))
Пример #6
0
import polars as pl
from polars import col, lit

df = pl.DataFrame({"a": [1, 2, 3], "b": [0, 1, 2]})

out = df.filter(
    pl.fold(acc=lit(True), f=lambda acc, x: acc & x, exprs=col("*") > 1))
Пример #7
0
import polars as pl
from polars import col, lit

df = pl.DataFrame({"a": [1, 2, 3], "b": [10, 20, 30]})

out = df.select(
    pl.fold(acc=lit(0), f=lambda acc, x: acc + x, exprs=col("*")).alias("sum"))