def test_apply_none() -> None: df = pl.DataFrame({ "g": [1, 1, 1, 2, 2, 2, 5], "a": [2, 4, 5, 190, 1, 4, 1], "b": [1, 3, 2, 1, 43, 3, 1], }) out = (df.groupby("g", maintain_order=True).agg( pl.apply( exprs=["a", pl.col("b")**4, pl.col("a") / 4], f=lambda x: x[0] * x[1] + x[2].sum(), ).alias("multiple")))["multiple"] assert out[0].to_list() == [4.75, 326.75, 82.75] assert out[1].to_list() == [238.75, 3418849.75, 372.75] out_df = df.select(pl.map(exprs=["a", "b"], f=lambda s: s[0] * s[1])) assert out_df["a"].to_list() == (df["a"] * df["b"]).to_list() # check if we can return None def func(s: List) -> Optional[int]: if s[0][0] == 190: return None else: return s[0] out = ( df.groupby("g", maintain_order=True).agg( pl.apply(exprs=["a", pl.col("b")**4, pl.col("a") / 4], f=func).alias( # type: ignore "multiple")))["multiple"] assert out[1] is None
def test_error_on_reducing_map() -> None: df = pl.DataFrame( dict(id=[0, 0, 0, 1, 1, 1], t=[2, 4, 5, 10, 11, 14], y=[0, 1, 1, 2, 3, 4]) ) with pytest.raises( pl.ComputeError, match="A 'map' functions output length must be equal to that of the input length. Consider using 'apply' in favor of 'map'.", ): df.groupby("id").agg(pl.map(["t", "y"], np.trapz))
import polars as pl from my_polars_functions import hamming_distance a = pl.Series("a", ["foo", "bar"]) b = pl.Series("b", ["fooy", "ham"]) dist = hamming_distance(a, b) expected = pl.Series("", [None, 2], dtype=pl.UInt32) # run on 2 Series print("hamming distance: ", hamming_distance(a, b)) assert dist.series_equal(expected, null_equal=True) # or use in polars expressions print( pl.DataFrame([a, b]).select( pl.map(["a", "b"], lambda series: hamming_distance(series[0], series[1]))))