def test_lazy_functions(): df = pl.DataFrame({ "a": ["foo", "bar", "2"], "b": [1, 2, 3], "c": [1.0, 2.0, 3.0] }) out = df[[pl.count("a")]] assert out[0] == 3 assert pl.count(df["a"]) == 3 out = df[[ pl.var("b"), pl.std("b"), pl.max("b"), pl.min("b"), pl.sum("b"), pl.mean("b"), pl.median("b"), pl.n_unique("b"), pl.first("b"), pl.last("b"), ]] expected = 1.0 assert np.isclose(out[0], expected) assert np.isclose(pl.var(df["b"]), expected) expected = 1.0 assert np.isclose(out[1], expected) assert np.isclose(pl.std(df["b"]), expected) expected = 3 assert np.isclose(out[2], expected) assert np.isclose(pl.max(df["b"]), expected) expected = 1 assert np.isclose(out[3], expected) assert np.isclose(pl.min(df["b"]), expected) expected = 6 assert np.isclose(out[4], expected) assert np.isclose(pl.sum(df["b"]), expected) expected = 2 assert np.isclose(out[5], expected) assert np.isclose(pl.mean(df["b"]), expected) expected = 2 assert np.isclose(out[6], expected) assert np.isclose(pl.median(df["b"]), expected) expected = 3 assert np.isclose(out[7], expected) assert np.isclose(pl.n_unique(df["b"]), expected) expected = 1 assert np.isclose(out[8], expected) assert np.isclose(pl.first(df["b"]), expected) expected = 3 assert np.isclose(out[9], expected) assert np.isclose(pl.last(df["b"]), expected) expected = 3 assert np.isclose(out[9], expected) assert np.isclose(pl.last(df["b"]), expected)
def test_lazy_functions(): df = pl.DataFrame({ "a": ["foo", "bar", "2"], "b": [1, 2, 3], "c": [1.0, 2.0, 3.0] }) out = df[[pl.count("a")]] assert out["a"] == 3 assert pl.count(df["a"]) == 3 out = df[[ pl.var("b"), pl.std("b"), pl.max("b"), pl.min("b"), pl.sum("b"), pl.mean("b"), pl.median("b"), pl.n_unique("b"), pl.first("b"), pl.last("b"), ]] expected = 1.0 assert np.isclose(out.select_at_idx(0), expected) assert np.isclose(pl.var(df["b"]), expected) expected = 1.0 assert np.isclose(out.select_at_idx(1), expected) assert np.isclose(pl.std(df["b"]), expected) expected = 3 assert np.isclose(out.select_at_idx(2), expected) assert np.isclose(pl.max(df["b"]), expected) expected = 1 assert np.isclose(out.select_at_idx(3), expected) assert np.isclose(pl.min(df["b"]), expected) expected = 6 assert np.isclose(out.select_at_idx(4), expected) assert np.isclose(pl.sum(df["b"]), expected) expected = 2 assert np.isclose(out.select_at_idx(5), expected) assert np.isclose(pl.mean(df["b"]), expected) expected = 2 assert np.isclose(out.select_at_idx(6), expected) assert np.isclose(pl.median(df["b"]), expected) expected = 3 assert np.isclose(out.select_at_idx(7), expected) assert np.isclose(pl.n_unique(df["b"]), expected) expected = 1 assert np.isclose(out.select_at_idx(8), expected) assert np.isclose(pl.first(df["b"]), expected) expected = 3 assert np.isclose(out.select_at_idx(9), expected) assert np.isclose(pl.last(df["b"]), expected) expected = 3 assert np.isclose(out.select_at_idx(9), expected) assert np.isclose(pl.last(df["b"]), expected)
def test_list_eval_expression() -> None: df = pl.DataFrame({"a": [1, 8, 3], "b": [4, 5, 2]}) for parallel in [True, False]: assert df.with_column( pl.concat_list(["a", "b"]).arr.eval( pl.first().rank(), parallel=parallel).alias("rank")).to_dict(False) == { "a": [1, 8, 3], "b": [4, 5, 2], "rank": [[1.0, 2.0], [2.0, 1.0], [2.0, 1.0]], } assert df["a"].reshape( (1, -1)).arr.eval(pl.first(), parallel=parallel).to_list() == [[1, 8, 3]]
def test_window_function(): df = pl.DataFrame({ "A": [1, 2, 3, 4, 5], "fruits": ["banana", "banana", "apple", "apple", "banana"], "B": [5, 4, 3, 2, 1], "cars": ["beetle", "audi", "beetle", "beetle", "beetle"], }) q = df.lazy().with_columns([ pl.sum("A").over("fruits").alias("fruit_sum_A"), pl.first("B").over("fruits").alias("fruit_first_B"), pl.max("B").over("cars").alias("cars_max_B"), ]) out = q.collect() assert out["cars_max_B"] == [5, 4, 5, 5, 5] out = df[[pl.first("B").over(["fruits", "cars"]).alias("B_first")]] assert out["B_first"] == [5, 4, 3, 3, 5]
import polars as pl dataset = pl.DataFrame({ "A": [1, 2, 3, 4, 5], "fruits": ["banana", "banana", "apple", "apple", "banana"], "B": [5, 4, 3, 2, 1], "cars": ["beetle", "audi", "beetle", "beetle", "beetle"], }) q = dataset.lazy().with_columns([ pl.sum("A").over("fruits").alias("fruit_sum_A"), pl.first("B").over("fruits").alias("fruit_first_B"), pl.max("B").over("cars").alias("cars_max_B"), ]) df = q.collect()
import polars as pl from .dataset import dataset q = (dataset.lazy().groupby("first_name").agg( [pl.count("party"), pl.col("gender").list(), pl.first("last_name")]).sort("party_count", reverse=True).limit(5)) df = q.collect()