Пример #1
0
def test_list_concat_rolling_window() -> None:
    # inspired by: https://stackoverflow.com/questions/70377100/use-the-rolling-function-of-polars-to-get-a-list-of-all-values-in-the-rolling-wi
    # this tests if it works without specifically creating list dtype upfront.
    # note that the given answer is prefered over this snippet as that reuses the list array when shifting
    df = pl.DataFrame({
        "A": [1.0, 2.0, 9.0, 2.0, 13.0],
    })

    out = df.with_columns(
        [pl.col("A").shift(i).alias(f"A_lag_{i}") for i in range(3)]).select([
            pl.concat_list([f"A_lag_{i}"
                            for i in range(3)][::-1]).alias("A_rolling")
        ])
    assert out.shape == (5, 1)
    assert out.to_series().dtype == pl.List

    # this test proper null behavior of concat list
    out = (
        df.with_column(pl.col("A").reshape((-1, 1)))  # first turn into a list
        .with_columns([
            pl.col("A").shift(i).alias(f"A_lag_{i}")
            for i in range(3)  # slice the lists to a lag
        ]).select([
            pl.all(),
            pl.concat_list([f"A_lag_{i}"
                            for i in range(3)][::-1]).alias("A_rolling"),
        ]))
    assert out.shape == (5, 5)
    assert out["A_rolling"].dtype == pl.List
Пример #2
0
def test_list_concat_supertype() -> None:
    df = pl.DataFrame(
        [pl.Series("a", [1, 2], pl.UInt8), pl.Series("b", [10000, 20000], pl.UInt16)]
    )
    assert df.with_column(pl.concat_list(pl.col(["a", "b"])).alias("concat_list"))[
        "concat_list"
    ].to_list() == [[1, 10000], [2, 20000]]
Пример #3
0
def test_list_eval_dtype_inference() -> None:
    grades = pl.DataFrame(
        {
            "student": ["bas", "laura", "tim", "jenny"],
            "arithmetic": [10, 5, 6, 8],
            "biology": [4, 6, 2, 7],
            "geography": [8, 4, 9, 7],
        }
    )

    rank_pct = pl.col("").rank(reverse=True) / pl.col("").count().cast(pl.UInt16)

    # the .arr.first() would fail if .arr.eval did not correctly infer the output type
    assert grades.with_column(
        pl.concat_list(pl.all().exclude("student")).alias("all_grades")
    ).select(
        [
            pl.col("all_grades")
            .arr.eval(rank_pct, parallel=True)
            .alias("grades_rank")
            .arr.first()
        ]
    ).to_series().to_list() == [
        0.3333333432674408,
        0.6666666865348816,
        0.6666666865348816,
        0.3333333432674408,
    ]
Пример #4
0
def test_list_concat_nulls() -> None:
    assert pl.DataFrame(
        {
            "a": [["a", "b"], None, ["c", "d", "e"], None],
            "t": [["x"], ["y"], None, None],
        }
    ).with_column(pl.concat_list(["a", "t"]).alias("concat"))["concat"].to_list() == [
        ["a", "b", "x"],
        None,
        None,
        None,
    ]
Пример #5
0
def test_list_eval_expression() -> None:
    df = pl.DataFrame({"a": [1, 8, 3], "b": [4, 5, 2]})

    for parallel in [True, False]:
        assert df.with_column(
            pl.concat_list(["a", "b"]).arr.eval(
                pl.first().rank(),
                parallel=parallel).alias("rank")).to_dict(False) == {
                    "a": [1, 8, 3],
                    "b": [4, 5, 2],
                    "rank": [[1.0, 2.0], [2.0, 1.0], [2.0, 1.0]],
                }

        assert df["a"].reshape(
            (1, -1)).arr.eval(pl.first(),
                              parallel=parallel).to_list() == [[1, 8, 3]]
Пример #6
0
def test_list_concat_dispatch() -> None:
    s0 = pl.Series("a", [[1, 2]])
    s1 = pl.Series("b", [[3, 4, 5]])
    expected = pl.Series("a", [[1, 2, 3, 4, 5]])

    out = s0.arr.concat([s1])
    assert out.series_equal(expected)

    out = s0.arr.concat(s1)
    assert out.series_equal(expected)

    df = pl.DataFrame([s0, s1])
    assert df.select(pl.concat_list(["a", "b"]).alias("a"))["a"].series_equal(expected)
    assert df.select(pl.col("a").arr.concat("b").alias("a"))["a"].series_equal(expected)
    assert df.select(pl.col("a").arr.concat(["b"]).alias("a"))["a"].series_equal(
        expected
    )