示例#1
0
def test_arr_lengths_dispatch() -> None:
    s = pl.Series("a", [[1, 2], [1, 2, 3]])
    testing.assert_series_equal(s.arr.lengths(), pl.Series("a", [2, 3], dtype=UInt32))
    df = pl.DataFrame([s])
    testing.assert_series_equal(
        df.select(pl.col("a").arr.lengths())["a"], pl.Series("a", [2, 3], dtype=UInt32)
    )
示例#2
0
def test_sqrt_dispatch() -> None:
    s = pl.Series("a", [1, 2])
    testing.assert_series_equal(s.sqrt(), pl.Series("a", [1.0, np.sqrt(2)]))
    df = pl.DataFrame([s])
    testing.assert_series_equal(
        df.select(pl.col("a").sqrt())["a"], pl.Series("a", [1.0, np.sqrt(2)])
    )
示例#3
0
def test_various() -> None:
    a = pl.Series("a", [1, 2])

    assert a.is_null().sum() == 0
    assert a.name == "a"
    a.rename("b", in_place=True)
    assert a.name == "b"
    assert a.len() == 2
    assert len(a) == 2
    b = a.slice(1, 1)
    assert b.len() == 1
    assert b.series_equal(pl.Series("b", [2]))
    a.append(b)
    assert a.series_equal(pl.Series("b", [1, 2, 2]))

    a = pl.Series("a", range(20))
    assert a.head(5).len() == 5
    assert a.tail(5).len() == 5
    assert a.head(5) != a.tail(5)

    a = pl.Series("a", [2, 1, 4])
    a.sort(in_place=True)
    assert a.series_equal(pl.Series("a", [1, 2, 4]))
    a = pl.Series("a", [2, 1, 1, 4, 4, 4])
    testing.assert_series_equal(a.arg_unique(), pl.Series("a", [0, 1, 3], dtype=UInt32))

    assert a.take([2, 3]).series_equal(pl.Series("a", [1, 4]))
    assert a.is_numeric()
    a = pl.Series("bool", [True, False])
    assert not a.is_numeric()
示例#4
0
def test_contains() -> None:
    a = pl.Series("a", [[1, 2, 3], [2, 5], [6, 7, 8, 9]])
    out = a.arr.contains(2)
    expected = pl.Series("a", [True, True, False])
    testing.assert_series_equal(out, expected)

    out = pl.select(pl.lit(a).arr.contains(2)).to_series()
    testing.assert_series_equal(out, expected)
示例#5
0
def test_shuffle() -> None:
    a = pl.Series("a", [1, 2, 3])
    out = a.shuffle(2)
    expected = pl.Series("a", [2, 1, 3])
    testing.assert_series_equal(out, expected)

    out = pl.select(pl.lit(a).shuffle(2)).to_series()
    testing.assert_series_equal(out, expected)
示例#6
0
def test_is_between_datetime() -> None:
    s = pl.Series("a", [datetime(2020, 1, 1, 10, 0, 0), datetime(2020, 1, 1, 20, 0, 0)])
    start = datetime(2020, 1, 1, 12, 0, 0)
    end = datetime(2020, 1, 1, 23, 0, 0)
    expected = pl.Series("a", [False, True])

    # only on the expression api
    result = s.to_frame().with_column(pl.col("*").is_between(start, end))["is_between"]
    testing.assert_series_equal(result.rename("a"), expected)
示例#7
0
def test_min_nulls_consistency() -> None:
    df = pl.DataFrame({"a": [None, 2, 3], "b": [4, None, 6], "c": [7, 5, 0]})
    out = df.select([pl.min(["a", "b", "c"])]).to_series()
    expected = pl.Series("min", [4, 2, 0])
    testing.assert_series_equal(out, expected)

    out = df.select([pl.max(["a", "b", "c"])]).to_series()
    expected = pl.Series("max", [7, 5, 6])
    testing.assert_series_equal(out, expected)
示例#8
0
def test_peak_max_peak_min() -> None:
    s = pl.Series("a", [4, 1, 3, 2, 5])
    result = s.peak_min()
    expected = pl.Series([False, True, False, True, False])
    testing.assert_series_equal(result, expected)

    result = s.peak_max()
    expected = pl.Series([True, False, True, False, True])
    testing.assert_series_equal(result, expected)
示例#9
0
def test_flatten_explode() -> None:
    df = pl.Series("a", ["Hello", "World"])
    expected = pl.Series("a", ["H", "e", "l", "l", "o", "W", "o", "r", "l", "d"])

    result: pl.Series = df.to_frame().select(pl.col("a").flatten())[:, 0]  # type: ignore
    testing.assert_series_equal(result, expected)

    result: pl.Series = df.to_frame().select(pl.col("a").explode())[:, 0]  # type: ignore
    testing.assert_series_equal(result, expected)
示例#10
0
def test_arithmetic(s: pl.Series) -> None:
    a = s
    b = s

    assert ((a * b) == [1, 4]).sum() == 2
    assert ((a / b) == [1.0, 1.0]).sum() == 2
    assert ((a + b) == [2, 4]).sum() == 2
    assert ((a - b) == [0, 0]).sum() == 2
    assert ((a + 1) == [2, 3]).sum() == 2
    assert ((a - 1) == [0, 1]).sum() == 2
    assert ((a / 1) == [1.0, 2.0]).sum() == 2
    assert ((a // 2) == [0, 1]).sum() == 2
    assert ((a * 2) == [2, 4]).sum() == 2
    assert ((1 + a) == [2, 3]).sum() == 2
    assert ((1 - a) == [0, -1]).sum() == 2
    assert ((1 * a) == [1, 2]).sum() == 2
    # integer division
    testing.assert_series_equal(1 / a, pl.Series([1.0, 0.5]))  # type: ignore
    if s.dtype == Int64:
        expected = pl.Series([1, 0])
    else:
        expected = pl.Series([1.0, 0.5])
    testing.assert_series_equal(1 // a, expected)
    # modulo
    assert ((1 % a) == [0, 1]).sum() == 2
    assert ((a % 1) == [0, 0]).sum() == 2
    # negate
    assert (-a == [-1, -2]).sum() == 2
    # wrong dtypes in rhs operands
    assert ((1.0 - a) == [0.0, -1.0]).sum() == 2
    assert ((1.0 / a) == [1.0, 0.5]).sum() == 2
    assert ((1.0 * a) == [1, 2]).sum() == 2
    assert ((1.0 + a) == [2, 3]).sum() == 2
    assert ((1.0 % a) == [0, 1]).sum() == 2

    a = pl.Series("a", [datetime(2021, 1, 1)])
    with pytest.raises(ValueError):
        a // 2
    with pytest.raises(ValueError):
        a / 2
    with pytest.raises(ValueError):
        a * 2
    with pytest.raises(ValueError):
        a % 2
    with pytest.raises(ValueError):
        a ** 2
    with pytest.raises(ValueError):
        2 / a
    with pytest.raises(ValueError):
        2 // a
    with pytest.raises(ValueError):
        2 * a
    with pytest.raises(ValueError):
        2 % a
    with pytest.raises(ValueError):
        2 ** a
示例#11
0
def test_diff_dispatch() -> None:
    s = pl.Series("a", [1, 2, 3, 2, 2, 3, 0])
    expected = pl.Series("a", [1, 1, -1, 0, 1, -3])

    testing.assert_series_equal(s.diff(null_behavior="drop"), expected)

    df = pl.DataFrame([s])
    testing.assert_series_equal(
        df.select(pl.col("a").diff())["a"], pl.Series("a", [None, 1, 1, -1, 0, 1, -3])
    )
示例#12
0
def test_assert_series_equal_int_overflow() -> None:
    # internally may call 'abs' if not check_exact, which can overflow on signed int
    s0 = pl.Series([-128], dtype=pl.Int8)
    s1 = pl.Series([0, -128], dtype=pl.Int8)
    s2 = pl.Series([1, -128], dtype=pl.Int8)

    for check_exact in (True, False):
        assert_series_equal(s0, s0, check_exact=check_exact)
        with pytest.raises(AssertionError):
            assert_series_equal(s1, s2, check_exact=check_exact)
示例#13
0
def test_compare_series_type_mismatch() -> None:
    srs1 = pl.Series([1, 2, 3])
    srs2 = pl.DataFrame({"col1": [2, 3, 4]})
    with pytest.raises(AssertionError,
                       match="Series are different\n\nType mismatch"):
        assert_series_equal(srs1, srs2)  # type: ignore[arg-type]

    srs3 = pl.Series([1.0, 2.0, 3.0])
    with pytest.raises(AssertionError,
                       match="Series are different\n\nDtype mismatch"):
        assert_series_equal(srs1, srs3)
示例#14
0
def test_compare_series_nans_assert_equal() -> None:
    # NaN values do not _compare_ equal, but should _assert_ as equal here
    nan = float("NaN")

    srs1 = pl.Series([1.0, 2.0, nan])
    srs2 = pl.Series([1.0, 2.0, nan])
    assert_series_equal(srs1, srs2)

    srs1 = pl.Series([1.0, 2.0, nan])
    srs2 = pl.Series([1.0, nan, 3.0])
    with pytest.raises(AssertionError):
        assert_series_equal(srs1, srs2, check_exact=True)
示例#15
0
def test_rank_dispatch() -> None:
    s = pl.Series("a", [1, 2, 3, 2, 2, 3, 0])

    testing.assert_series_equal(
        s.rank("dense"), pl.Series("a", [2, 3, 4, 3, 3, 4, 1], dtype=UInt32)
    )

    df = pl.DataFrame([s])
    assert df.select(pl.col("a").rank("dense"))["a"] == [2, 3, 4, 3, 3, 4, 1]

    testing.assert_series_equal(
        s.rank("dense", reverse=True),
        pl.Series("a", [3, 2, 1, 2, 2, 1, 4], dtype=UInt32),
    )
示例#16
0
def test_series_slice(
    srs: pl.Series,
    start: int | None,
    stop: int | None,
    step: int | None,
) -> None:
    py_data = srs.to_list()

    s = slice(start, stop, step)
    sliced_py_data = py_data[s]
    sliced_pl_data = srs[s].to_list()

    assert sliced_py_data == sliced_pl_data, f"slice [{start}:{stop}:{step}] failed"
    assert_series_equal(srs, srs, check_exact=True)
示例#17
0
def test_abs() -> None:
    # ints
    s = pl.Series([1, -2, 3, -4])
    testing.assert_series_equal(s.abs(), pl.Series([1, 2, 3, 4]))
    testing.assert_series_equal(np.abs(s), pl.Series([1, 2, 3, 4]))  # type: ignore

    # floats
    s = pl.Series([1.0, -2.0, 3, -4.0])
    testing.assert_series_equal(s.abs(), pl.Series([1.0, 2.0, 3.0, 4.0]))
    testing.assert_series_equal(
        np.abs(s), pl.Series([1.0, 2.0, 3.0, 4.0])  # type: ignore
    )
    testing.assert_series_equal(
        pl.select(pl.lit(s).abs()).to_series(), pl.Series([1.0, 2.0, 3.0, 4.0])
    )
示例#18
0
def test_equality() -> None:
    a = pl.Series("a", [1, 2])
    b = a

    cmp = a == b
    assert isinstance(cmp, pl.Series)
    assert cmp.sum() == 2
    assert (a != b).sum() == 0
    assert (a >= b).sum() == 2
    assert (a <= b).sum() == 2
    assert (a > b).sum() == 0
    assert (a < b).sum() == 0
    assert a.sum() == 3
    assert a.series_equal(b)

    a = pl.Series("name", ["ham", "foo", "bar"])
    testing.assert_series_equal((a == "ham"), pl.Series("name", [True, False, False]))
示例#19
0
def verify_series_and_expr_api(
    input: pl.Series, expected: pl.Series, op: str, *args: Any, **kwargs: Any
) -> None:
    """
    Small helper function to test element-wise functions for both the series and expressions api.

    Examples
    --------
    >>> s = pl.Series([1, 3, 2])
    >>> expected = pl.Series([1, 2, 3])
    >>> verify_series_and_expr_api(s, expected, "sort")
    """
    expr = _getattr_multi(pl.col("*"), op)(*args, **kwargs)
    result_expr: pl.Series = input.to_frame().select(expr)[:, 0]  # type: ignore
    result_series = _getattr_multi(input, op)(*args, **kwargs)
    testing.assert_series_equal(result_expr, expected)
    testing.assert_series_equal(result_series, expected)
示例#20
0
def test_ufunc() -> None:
    a = pl.Series("a", [1.0, 2.0, 3.0, 4.0])
    b = np.multiply(a, 4)
    assert isinstance(b, pl.Series)
    assert b == [4, 8, 12, 16]

    # test if null bitmask is preserved
    a = pl.Series("a", [1.0, None, 3.0])
    b = np.exp(a)
    assert b.null_count() == 1

    # test if it works with chunked series.
    a = pl.Series("a", [1.0, None, 3.0])
    b = pl.Series("b", [4.0, 5.0, None])
    a.append(b)
    assert a.n_chunks() == 2
    c = np.multiply(a, 3)
    testing.assert_series_equal(c, pl.Series("a", [3.0, None, 9.0, 12.0, 15.0, None]))
示例#21
0
def test_comparisons_bool_series_to_int() -> None:
    srs_bool = pl.Series([True, False])
    # todo: do we want this to work?
    testing.assert_series_equal(srs_bool / 1, pl.Series([True, False], dtype=Float64))
    with pytest.raises(TypeError, match=r"\-: 'Series' and 'int'"):
        srs_bool - 1
    with pytest.raises(TypeError, match=r"\+: 'Series' and 'int'"):
        srs_bool + 1
    with pytest.raises(TypeError, match=r"\%: 'Series' and 'int'"):
        srs_bool % 2
    with pytest.raises(TypeError, match=r"\*: 'Series' and 'int'"):
        srs_bool * 1
    with pytest.raises(
        TypeError, match=r"'<' not supported between instances of 'Series' and 'int'"
    ):
        srs_bool < 2
    with pytest.raises(
        TypeError, match=r"'>' not supported between instances of 'Series' and 'int'"
    ):
        srs_bool > 2
示例#22
0
def test_true_divide() -> None:
    s = pl.Series("a", [1, 2])
    testing.assert_series_equal(s / 2, pl.Series("a", [0.5, 1.0]))
    testing.assert_series_equal(
        pl.DataFrame([s]).select(pl.col("a") / 2)["a"], pl.Series("a", [0.5, 1.0])
    )

    # rtruediv
    testing.assert_series_equal(
        pl.DataFrame([s]).select(2 / pl.col("a"))["literal"],
        pl.Series("literal", [2.0, 1.0]),
    )

    # https://github.com/pola-rs/polars/issues/1369
    vals = [3000000000, 2, 3]
    foo = pl.Series(vals)
    testing.assert_series_equal(foo / 1, pl.Series(vals, dtype=Float64))
    testing.assert_series_equal(
        pl.DataFrame({"a": vals}).select([pl.col("a") / 1])["a"],
        pl.Series("a", vals, dtype=Float64),
    )
示例#23
0
def test_comparisons_bool_series_to_int() -> None:
    srs_bool = pl.Series([True, False])
    # todo: do we want this to work?
    testing.assert_series_equal(srs_bool / 1, pl.Series([True, False], dtype=Float64))
    match = r"cannot do arithmetic with series of dtype: <class 'polars.datatypes.Boolean'> and argument of type: <class 'bool'>"
    with pytest.raises(ValueError, match=match):
        srs_bool - 1
    with pytest.raises(ValueError, match=match):
        srs_bool + 1
    match = r"cannot do arithmetic with series of dtype: <class 'polars.datatypes.Boolean'> and argument of type: <class 'bool'>"
    with pytest.raises(ValueError, match=match):
        srs_bool % 2
    with pytest.raises(ValueError, match=match):
        srs_bool * 1
    with pytest.raises(
        TypeError, match=r"'<' not supported between instances of 'Series' and 'int'"
    ):
        srs_bool < 2
    with pytest.raises(
        TypeError, match=r"'>' not supported between instances of 'Series' and 'int'"
    ):
        srs_bool > 2
示例#24
0
def test_comparisons_float_series_to_int() -> None:
    srs_float = pl.Series([1.0, 2.0, 3.0, 4.0])
    testing.assert_series_equal(srs_float - 1, pl.Series([0.0, 1.0, 2.0, 3.0]))
    testing.assert_series_equal(srs_float + 1, pl.Series([2.0, 3.0, 4.0, 5.0]))
    testing.assert_series_equal(srs_float * 2, pl.Series([2.0, 4.0, 6.0, 8.0]))
    testing.assert_series_equal(srs_float / 2, pl.Series([0.5, 1.0, 1.5, 2.0]))
    testing.assert_series_equal(srs_float % 2, pl.Series([1.0, 0.0, 1.0, 0.0]))
    testing.assert_series_equal(4 % srs_float, pl.Series([0.0, 0.0, 1.0, 0.0]))

    testing.assert_series_equal(srs_float // 2, pl.Series([0.0, 1.0, 1.0, 2.0]))
    testing.assert_series_equal(srs_float < 3, pl.Series([True, True, False, False]))
    testing.assert_series_equal(srs_float <= 3, pl.Series([True, True, True, False]))
    testing.assert_series_equal(srs_float > 3, pl.Series([False, False, False, True]))
    testing.assert_series_equal(srs_float >= 3, pl.Series([False, False, True, True]))
    testing.assert_series_equal(srs_float == 3, pl.Series([False, False, True, False]))
    testing.assert_series_equal(srs_float - True, pl.Series([0.0, 1.0, 2.0, 3.0]))
示例#25
0
def test_comparisons_int_series_to_float() -> None:
    srs_int = pl.Series([1, 2, 3, 4])
    testing.assert_series_equal(srs_int - 1.0, pl.Series([0.0, 1.0, 2.0, 3.0]))
    testing.assert_series_equal(srs_int + 1.0, pl.Series([2.0, 3.0, 4.0, 5.0]))
    testing.assert_series_equal(srs_int * 2.0, pl.Series([2.0, 4.0, 6.0, 8.0]))
    testing.assert_series_equal(srs_int / 2.0, pl.Series([0.5, 1.0, 1.5, 2.0]))
    testing.assert_series_equal(srs_int % 2.0, pl.Series([1.0, 0.0, 1.0, 0.0]))
    testing.assert_series_equal(4.0 % srs_int, pl.Series([0.0, 0.0, 1.0, 0.0]))

    testing.assert_series_equal(srs_int // 2.0, pl.Series([0.0, 1.0, 1.0, 2.0]))
    testing.assert_series_equal(srs_int < 3.0, pl.Series([True, True, False, False]))
    testing.assert_series_equal(srs_int <= 3.0, pl.Series([True, True, True, False]))
    testing.assert_series_equal(srs_int > 3.0, pl.Series([False, False, False, True]))
    testing.assert_series_equal(srs_int >= 3.0, pl.Series([False, False, True, True]))
    testing.assert_series_equal(srs_int == 3.0, pl.Series([False, False, True, False]))
    testing.assert_series_equal(srs_int - True, pl.Series([0, 1, 2, 3]))
示例#26
0
def test_bitwise() -> None:
    a = pl.Series("a", [1, 2, 3])
    b = pl.Series("b", [3, 4, 5])
    testing.assert_series_equal(a & b, pl.Series("a", [1, 0, 1]))
    testing.assert_series_equal(a | b, pl.Series("a", [3, 6, 7]))
    testing.assert_series_equal(a ^ b, pl.Series("a", [2, 6, 6]))

    df = pl.DataFrame([a, b])
    out = df.select(
        [
            (pl.col("a") & pl.col("b")).alias("and"),
            (pl.col("a") | pl.col("b")).alias("or"),
            (pl.col("a") ^ pl.col("b")).alias("xor"),
        ]
    )
    testing.assert_series_equal(out["and"], pl.Series("and", [1, 0, 1]))
    testing.assert_series_equal(out["or"], pl.Series("or", [3, 6, 7]))
    testing.assert_series_equal(out["xor"], pl.Series("xor", [2, 6, 6]))
示例#27
0
def test_floor_divide() -> None:
    s = pl.Series("a", [1, 2, 3])
    testing.assert_series_equal(s // 2, pl.Series("a", [0, 1, 1]))
    testing.assert_series_equal(
        pl.DataFrame([s]).select(pl.col("a") // 2)["a"], pl.Series("a", [0, 1, 1])
    )
示例#28
0
def test_arr_ordering() -> None:
    s = pl.Series("a", [[2, 1], [1, 3, 2]])
    testing.assert_series_equal(s.arr.sort(), pl.Series("a", [[1, 2], [1, 2, 3]]))
    testing.assert_series_equal(s.arr.reverse(), pl.Series("a", [[1, 2], [2, 3, 1]]))
示例#29
0
def test_arr_arithmetic() -> None:
    s = pl.Series("a", [[1, 2], [1, 2, 3]])
    testing.assert_series_equal(s.arr.sum(), pl.Series("a", [3, 6]))
    testing.assert_series_equal(s.arr.mean(), pl.Series("a", [1.5, 2.0]))
    testing.assert_series_equal(s.arr.max(), pl.Series("a", [2, 3]))
    testing.assert_series_equal(s.arr.min(), pl.Series("a", [1, 1]))
示例#30
0
def test_rolling() -> None:
    a = pl.Series("a", [1, 2, 3, 2, 1])
    testing.assert_series_equal(a.rolling_min(2), pl.Series("a", [None, 1, 2, 2, 1]))
    testing.assert_series_equal(a.rolling_max(2), pl.Series("a", [None, 2, 3, 3, 2]))
    testing.assert_series_equal(a.rolling_sum(2), pl.Series("a", [None, 3, 5, 5, 3]))
    testing.assert_series_equal(
        a.rolling_mean(2), pl.Series("a", [None, 1.5, 2.5, 2.5, 1.5])
    )
    assert a.rolling_std(2).to_list()[1] == pytest.approx(0.7071067811865476)
    assert a.rolling_var(2).to_list()[1] == pytest.approx(0.5)
    testing.assert_series_equal(
        a.rolling_median(4), pl.Series("a", [None, None, None, 2, 2], dtype=Float64)
    )
    testing.assert_series_equal(
        a.rolling_quantile(0, "nearest", 3),
        pl.Series("a", [None, None, 1, 2, 1], dtype=Float64),
    )
    testing.assert_series_equal(
        a.rolling_quantile(0, "lower", 3),
        pl.Series("a", [None, None, 1, 2, 1], dtype=Float64),
    )
    testing.assert_series_equal(
        a.rolling_quantile(0, "higher", 3),
        pl.Series("a", [None, None, 1, 2, 1], dtype=Float64),
    )
    assert a.rolling_skew(4).null_count() == 3