def test_head_tail(fruits_cars: pl.DataFrame) -> None: res_expr = fruits_cars.select([pl.head("A", 2)]) res_series = pl.head(fruits_cars["A"], 2) expected = pl.Series("A", [1, 2]) assert res_expr.to_series(0).series_equal(expected) assert res_series.series_equal(expected) res_expr = fruits_cars.select([pl.tail("A", 2)]) res_series = pl.tail(fruits_cars["A"], 2) expected = pl.Series("A", [4, 5]) assert res_expr.to_series(0).series_equal(expected) assert res_series.series_equal(expected)
def test_quantile(fruits_cars: pl.DataFrame) -> None: assert fruits_cars.lazy().quantile(0.25, "nearest").collect()["A"][0] == 2 assert fruits_cars.select(pl.col("A").quantile(0.25, "nearest"))["A"][0] == 2 assert fruits_cars.lazy().quantile(0.24, "lower").collect()["A"][0] == 1 assert fruits_cars.select(pl.col("A").quantile(0.24, "lower"))["A"][0] == 1 assert fruits_cars.lazy().quantile(0.26, "higher").collect()["A"][0] == 3 assert fruits_cars.select(pl.col("A").quantile(0.26, "higher"))["A"][0] == 3 assert fruits_cars.lazy().quantile(0.24, "midpoint").collect()["A"][0] == 1.5 assert fruits_cars.select(pl.col("A").quantile(0.24, "midpoint"))["A"][0] == 1.5 assert fruits_cars.lazy().quantile(0.24, "linear").collect()["A"][0] == 1.96 assert fruits_cars.select(pl.col("A").quantile(0.24, "linear"))["A"][0] == 1.96
def test_is_between(fruits_cars: pl.DataFrame) -> None: assert fruits_cars.select(pl.col("A").is_between( 2, 4))["is_between"].series_equal( # type: ignore[arg-type] pl.Series("is_between", [False, False, True, False, False])) assert fruits_cars.select(pl.col("A").is_between( 2, 4, False))["is_between"].series_equal( # type: ignore[arg-type] pl.Series("is_between", [False, False, True, False, False])) assert fruits_cars.select(pl.col("A").is_between( 2, 4, [False, False]))["is_between"].series_equal( # type: ignore[arg-type] pl.Series("is_between", [False, False, True, False, False])) assert fruits_cars.select(pl.col("A").is_between( 2, 4, True))["is_between"].series_equal( # type: ignore[arg-type] pl.Series("is_between", [False, True, True, True, False])) assert fruits_cars.select(pl.col("A").is_between( 2, 4, [True, True]))["is_between"].series_equal( # type: ignore[arg-type] pl.Series("is_between", [False, True, True, True, False])) assert fruits_cars.select(pl.col("A").is_between( 2, 4, [False, True]))["is_between"].series_equal( # type: ignore[arg-type] pl.Series("is_between", [False, False, True, True, False])) assert fruits_cars.select(pl.col("A").is_between( 2, 4, [True, False]))["is_between"].series_equal( # type: ignore[arg-type] pl.Series("is_between", [False, True, True, False, False]))
def test_write_json2(df: pl.DataFrame) -> None: # text-based conversion loses time info df = df.select(pl.all().exclude(["cat", "time"])) s = df.write_json(to_string=True) f = io.BytesIO() f.write(s.encode()) f.seek(0) out = pl.read_json(f) assert df.frame_equal(out, null_equal=True) file = io.BytesIO() df.write_json(file) file.seek(0) out = pl.read_json(file) assert df.frame_equal(out, null_equal=True)
def df_no_lists(df: pl.DataFrame) -> pl.DataFrame: return df.select( pl.all().exclude(["list_str", "list_int", "list_bool", "list_int", "list_flt"]) )
def test_cov(fruits_cars: pl.DataFrame) -> None: assert fruits_cars.select(pl.cov("A", "B"))[0, 0] == -2.5 assert fruits_cars.select(pl.cov(pl.col("A"), pl.col("B")))[0, 0] == -2.5
def test_any_expr(fruits_cars: pl.DataFrame) -> None: assert fruits_cars.with_column(pl.col("A").cast(bool)).select( pl.any("A"))[0, 0] assert fruits_cars.select(pl.any([pl.col("A"), pl.col("B")]))[0, 0]
def test_lower_bound_upper_bound(fruits_cars: pl.DataFrame) -> None: res_expr = fruits_cars.select(pl.col("A").lower_bound()) assert res_expr["A"][0] < -10_000_000 res_expr = fruits_cars.select(pl.col("A").upper_bound()) assert res_expr["A"][0] > 10_000_000
def test_max_min_multiple_columns(fruits_cars: pl.DataFrame) -> None: res = fruits_cars.select(pl.max(["A", "B"]).alias("max")) assert res.to_series(0).series_equal(pl.Series("max", [5, 4, 3, 4, 5])) res = fruits_cars.select(pl.min(["A", "B"]).alias("min")) assert res.to_series(0).series_equal(pl.Series("min", [1, 2, 3, 2, 1]))
def test_median(fruits_cars: pl.DataFrame) -> None: assert fruits_cars.lazy().median().collect()["A"][0] == 3 assert fruits_cars.select(pl.col("A").median())["A"][0] == 3
def test_any_expr(fruits_cars: pl.DataFrame) -> None: assert fruits_cars.select(pl.any("A"))[0, 0] assert fruits_cars.select(pl.any([pl.col("A"), pl.col("B")]))[0, 0]