def test_arr_lengths_dispatch() -> None: s = pl.Series("a", [[1, 2], [1, 2, 3]]) testing.assert_series_equal(s.arr.lengths(), pl.Series("a", [2, 3], dtype=UInt32)) df = pl.DataFrame([s]) testing.assert_series_equal( df.select(pl.col("a").arr.lengths())["a"], pl.Series("a", [2, 3], dtype=UInt32) )
def test_sqrt_dispatch() -> None: s = pl.Series("a", [1, 2]) testing.assert_series_equal(s.sqrt(), pl.Series("a", [1.0, np.sqrt(2)])) df = pl.DataFrame([s]) testing.assert_series_equal( df.select(pl.col("a").sqrt())["a"], pl.Series("a", [1.0, np.sqrt(2)]) )
def test_various() -> None: a = pl.Series("a", [1, 2]) assert a.is_null().sum() == 0 assert a.name == "a" a.rename("b", in_place=True) assert a.name == "b" assert a.len() == 2 assert len(a) == 2 b = a.slice(1, 1) assert b.len() == 1 assert b.series_equal(pl.Series("b", [2])) a.append(b) assert a.series_equal(pl.Series("b", [1, 2, 2])) a = pl.Series("a", range(20)) assert a.head(5).len() == 5 assert a.tail(5).len() == 5 assert a.head(5) != a.tail(5) a = pl.Series("a", [2, 1, 4]) a.sort(in_place=True) assert a.series_equal(pl.Series("a", [1, 2, 4])) a = pl.Series("a", [2, 1, 1, 4, 4, 4]) testing.assert_series_equal(a.arg_unique(), pl.Series("a", [0, 1, 3], dtype=UInt32)) assert a.take([2, 3]).series_equal(pl.Series("a", [1, 4])) assert a.is_numeric() a = pl.Series("bool", [True, False]) assert not a.is_numeric()
def test_contains() -> None: a = pl.Series("a", [[1, 2, 3], [2, 5], [6, 7, 8, 9]]) out = a.arr.contains(2) expected = pl.Series("a", [True, True, False]) testing.assert_series_equal(out, expected) out = pl.select(pl.lit(a).arr.contains(2)).to_series() testing.assert_series_equal(out, expected)
def test_shuffle() -> None: a = pl.Series("a", [1, 2, 3]) out = a.shuffle(2) expected = pl.Series("a", [2, 1, 3]) testing.assert_series_equal(out, expected) out = pl.select(pl.lit(a).shuffle(2)).to_series() testing.assert_series_equal(out, expected)
def test_is_between_datetime() -> None: s = pl.Series("a", [datetime(2020, 1, 1, 10, 0, 0), datetime(2020, 1, 1, 20, 0, 0)]) start = datetime(2020, 1, 1, 12, 0, 0) end = datetime(2020, 1, 1, 23, 0, 0) expected = pl.Series("a", [False, True]) # only on the expression api result = s.to_frame().with_column(pl.col("*").is_between(start, end))["is_between"] testing.assert_series_equal(result.rename("a"), expected)
def test_min_nulls_consistency() -> None: df = pl.DataFrame({"a": [None, 2, 3], "b": [4, None, 6], "c": [7, 5, 0]}) out = df.select([pl.min(["a", "b", "c"])]).to_series() expected = pl.Series("min", [4, 2, 0]) testing.assert_series_equal(out, expected) out = df.select([pl.max(["a", "b", "c"])]).to_series() expected = pl.Series("max", [7, 5, 6]) testing.assert_series_equal(out, expected)
def test_peak_max_peak_min() -> None: s = pl.Series("a", [4, 1, 3, 2, 5]) result = s.peak_min() expected = pl.Series([False, True, False, True, False]) testing.assert_series_equal(result, expected) result = s.peak_max() expected = pl.Series([True, False, True, False, True]) testing.assert_series_equal(result, expected)
def test_flatten_explode() -> None: df = pl.Series("a", ["Hello", "World"]) expected = pl.Series("a", ["H", "e", "l", "l", "o", "W", "o", "r", "l", "d"]) result: pl.Series = df.to_frame().select(pl.col("a").flatten())[:, 0] # type: ignore testing.assert_series_equal(result, expected) result: pl.Series = df.to_frame().select(pl.col("a").explode())[:, 0] # type: ignore testing.assert_series_equal(result, expected)
def test_arithmetic(s: pl.Series) -> None: a = s b = s assert ((a * b) == [1, 4]).sum() == 2 assert ((a / b) == [1.0, 1.0]).sum() == 2 assert ((a + b) == [2, 4]).sum() == 2 assert ((a - b) == [0, 0]).sum() == 2 assert ((a + 1) == [2, 3]).sum() == 2 assert ((a - 1) == [0, 1]).sum() == 2 assert ((a / 1) == [1.0, 2.0]).sum() == 2 assert ((a // 2) == [0, 1]).sum() == 2 assert ((a * 2) == [2, 4]).sum() == 2 assert ((1 + a) == [2, 3]).sum() == 2 assert ((1 - a) == [0, -1]).sum() == 2 assert ((1 * a) == [1, 2]).sum() == 2 # integer division testing.assert_series_equal(1 / a, pl.Series([1.0, 0.5])) # type: ignore if s.dtype == Int64: expected = pl.Series([1, 0]) else: expected = pl.Series([1.0, 0.5]) testing.assert_series_equal(1 // a, expected) # modulo assert ((1 % a) == [0, 1]).sum() == 2 assert ((a % 1) == [0, 0]).sum() == 2 # negate assert (-a == [-1, -2]).sum() == 2 # wrong dtypes in rhs operands assert ((1.0 - a) == [0.0, -1.0]).sum() == 2 assert ((1.0 / a) == [1.0, 0.5]).sum() == 2 assert ((1.0 * a) == [1, 2]).sum() == 2 assert ((1.0 + a) == [2, 3]).sum() == 2 assert ((1.0 % a) == [0, 1]).sum() == 2 a = pl.Series("a", [datetime(2021, 1, 1)]) with pytest.raises(ValueError): a // 2 with pytest.raises(ValueError): a / 2 with pytest.raises(ValueError): a * 2 with pytest.raises(ValueError): a % 2 with pytest.raises(ValueError): a ** 2 with pytest.raises(ValueError): 2 / a with pytest.raises(ValueError): 2 // a with pytest.raises(ValueError): 2 * a with pytest.raises(ValueError): 2 % a with pytest.raises(ValueError): 2 ** a
def test_diff_dispatch() -> None: s = pl.Series("a", [1, 2, 3, 2, 2, 3, 0]) expected = pl.Series("a", [1, 1, -1, 0, 1, -3]) testing.assert_series_equal(s.diff(null_behavior="drop"), expected) df = pl.DataFrame([s]) testing.assert_series_equal( df.select(pl.col("a").diff())["a"], pl.Series("a", [None, 1, 1, -1, 0, 1, -3]) )
def test_assert_series_equal_int_overflow() -> None: # internally may call 'abs' if not check_exact, which can overflow on signed int s0 = pl.Series([-128], dtype=pl.Int8) s1 = pl.Series([0, -128], dtype=pl.Int8) s2 = pl.Series([1, -128], dtype=pl.Int8) for check_exact in (True, False): assert_series_equal(s0, s0, check_exact=check_exact) with pytest.raises(AssertionError): assert_series_equal(s1, s2, check_exact=check_exact)
def test_compare_series_type_mismatch() -> None: srs1 = pl.Series([1, 2, 3]) srs2 = pl.DataFrame({"col1": [2, 3, 4]}) with pytest.raises(AssertionError, match="Series are different\n\nType mismatch"): assert_series_equal(srs1, srs2) # type: ignore[arg-type] srs3 = pl.Series([1.0, 2.0, 3.0]) with pytest.raises(AssertionError, match="Series are different\n\nDtype mismatch"): assert_series_equal(srs1, srs3)
def test_compare_series_nans_assert_equal() -> None: # NaN values do not _compare_ equal, but should _assert_ as equal here nan = float("NaN") srs1 = pl.Series([1.0, 2.0, nan]) srs2 = pl.Series([1.0, 2.0, nan]) assert_series_equal(srs1, srs2) srs1 = pl.Series([1.0, 2.0, nan]) srs2 = pl.Series([1.0, nan, 3.0]) with pytest.raises(AssertionError): assert_series_equal(srs1, srs2, check_exact=True)
def test_rank_dispatch() -> None: s = pl.Series("a", [1, 2, 3, 2, 2, 3, 0]) testing.assert_series_equal( s.rank("dense"), pl.Series("a", [2, 3, 4, 3, 3, 4, 1], dtype=UInt32) ) df = pl.DataFrame([s]) assert df.select(pl.col("a").rank("dense"))["a"] == [2, 3, 4, 3, 3, 4, 1] testing.assert_series_equal( s.rank("dense", reverse=True), pl.Series("a", [3, 2, 1, 2, 2, 1, 4], dtype=UInt32), )
def test_series_slice( srs: pl.Series, start: int | None, stop: int | None, step: int | None, ) -> None: py_data = srs.to_list() s = slice(start, stop, step) sliced_py_data = py_data[s] sliced_pl_data = srs[s].to_list() assert sliced_py_data == sliced_pl_data, f"slice [{start}:{stop}:{step}] failed" assert_series_equal(srs, srs, check_exact=True)
def test_abs() -> None: # ints s = pl.Series([1, -2, 3, -4]) testing.assert_series_equal(s.abs(), pl.Series([1, 2, 3, 4])) testing.assert_series_equal(np.abs(s), pl.Series([1, 2, 3, 4])) # type: ignore # floats s = pl.Series([1.0, -2.0, 3, -4.0]) testing.assert_series_equal(s.abs(), pl.Series([1.0, 2.0, 3.0, 4.0])) testing.assert_series_equal( np.abs(s), pl.Series([1.0, 2.0, 3.0, 4.0]) # type: ignore ) testing.assert_series_equal( pl.select(pl.lit(s).abs()).to_series(), pl.Series([1.0, 2.0, 3.0, 4.0]) )
def test_equality() -> None: a = pl.Series("a", [1, 2]) b = a cmp = a == b assert isinstance(cmp, pl.Series) assert cmp.sum() == 2 assert (a != b).sum() == 0 assert (a >= b).sum() == 2 assert (a <= b).sum() == 2 assert (a > b).sum() == 0 assert (a < b).sum() == 0 assert a.sum() == 3 assert a.series_equal(b) a = pl.Series("name", ["ham", "foo", "bar"]) testing.assert_series_equal((a == "ham"), pl.Series("name", [True, False, False]))
def verify_series_and_expr_api( input: pl.Series, expected: pl.Series, op: str, *args: Any, **kwargs: Any ) -> None: """ Small helper function to test element-wise functions for both the series and expressions api. Examples -------- >>> s = pl.Series([1, 3, 2]) >>> expected = pl.Series([1, 2, 3]) >>> verify_series_and_expr_api(s, expected, "sort") """ expr = _getattr_multi(pl.col("*"), op)(*args, **kwargs) result_expr: pl.Series = input.to_frame().select(expr)[:, 0] # type: ignore result_series = _getattr_multi(input, op)(*args, **kwargs) testing.assert_series_equal(result_expr, expected) testing.assert_series_equal(result_series, expected)
def test_ufunc() -> None: a = pl.Series("a", [1.0, 2.0, 3.0, 4.0]) b = np.multiply(a, 4) assert isinstance(b, pl.Series) assert b == [4, 8, 12, 16] # test if null bitmask is preserved a = pl.Series("a", [1.0, None, 3.0]) b = np.exp(a) assert b.null_count() == 1 # test if it works with chunked series. a = pl.Series("a", [1.0, None, 3.0]) b = pl.Series("b", [4.0, 5.0, None]) a.append(b) assert a.n_chunks() == 2 c = np.multiply(a, 3) testing.assert_series_equal(c, pl.Series("a", [3.0, None, 9.0, 12.0, 15.0, None]))
def test_comparisons_bool_series_to_int() -> None: srs_bool = pl.Series([True, False]) # todo: do we want this to work? testing.assert_series_equal(srs_bool / 1, pl.Series([True, False], dtype=Float64)) with pytest.raises(TypeError, match=r"\-: 'Series' and 'int'"): srs_bool - 1 with pytest.raises(TypeError, match=r"\+: 'Series' and 'int'"): srs_bool + 1 with pytest.raises(TypeError, match=r"\%: 'Series' and 'int'"): srs_bool % 2 with pytest.raises(TypeError, match=r"\*: 'Series' and 'int'"): srs_bool * 1 with pytest.raises( TypeError, match=r"'<' not supported between instances of 'Series' and 'int'" ): srs_bool < 2 with pytest.raises( TypeError, match=r"'>' not supported between instances of 'Series' and 'int'" ): srs_bool > 2
def test_true_divide() -> None: s = pl.Series("a", [1, 2]) testing.assert_series_equal(s / 2, pl.Series("a", [0.5, 1.0])) testing.assert_series_equal( pl.DataFrame([s]).select(pl.col("a") / 2)["a"], pl.Series("a", [0.5, 1.0]) ) # rtruediv testing.assert_series_equal( pl.DataFrame([s]).select(2 / pl.col("a"))["literal"], pl.Series("literal", [2.0, 1.0]), ) # https://github.com/pola-rs/polars/issues/1369 vals = [3000000000, 2, 3] foo = pl.Series(vals) testing.assert_series_equal(foo / 1, pl.Series(vals, dtype=Float64)) testing.assert_series_equal( pl.DataFrame({"a": vals}).select([pl.col("a") / 1])["a"], pl.Series("a", vals, dtype=Float64), )
def test_comparisons_bool_series_to_int() -> None: srs_bool = pl.Series([True, False]) # todo: do we want this to work? testing.assert_series_equal(srs_bool / 1, pl.Series([True, False], dtype=Float64)) match = r"cannot do arithmetic with series of dtype: <class 'polars.datatypes.Boolean'> and argument of type: <class 'bool'>" with pytest.raises(ValueError, match=match): srs_bool - 1 with pytest.raises(ValueError, match=match): srs_bool + 1 match = r"cannot do arithmetic with series of dtype: <class 'polars.datatypes.Boolean'> and argument of type: <class 'bool'>" with pytest.raises(ValueError, match=match): srs_bool % 2 with pytest.raises(ValueError, match=match): srs_bool * 1 with pytest.raises( TypeError, match=r"'<' not supported between instances of 'Series' and 'int'" ): srs_bool < 2 with pytest.raises( TypeError, match=r"'>' not supported between instances of 'Series' and 'int'" ): srs_bool > 2
def test_comparisons_float_series_to_int() -> None: srs_float = pl.Series([1.0, 2.0, 3.0, 4.0]) testing.assert_series_equal(srs_float - 1, pl.Series([0.0, 1.0, 2.0, 3.0])) testing.assert_series_equal(srs_float + 1, pl.Series([2.0, 3.0, 4.0, 5.0])) testing.assert_series_equal(srs_float * 2, pl.Series([2.0, 4.0, 6.0, 8.0])) testing.assert_series_equal(srs_float / 2, pl.Series([0.5, 1.0, 1.5, 2.0])) testing.assert_series_equal(srs_float % 2, pl.Series([1.0, 0.0, 1.0, 0.0])) testing.assert_series_equal(4 % srs_float, pl.Series([0.0, 0.0, 1.0, 0.0])) testing.assert_series_equal(srs_float // 2, pl.Series([0.0, 1.0, 1.0, 2.0])) testing.assert_series_equal(srs_float < 3, pl.Series([True, True, False, False])) testing.assert_series_equal(srs_float <= 3, pl.Series([True, True, True, False])) testing.assert_series_equal(srs_float > 3, pl.Series([False, False, False, True])) testing.assert_series_equal(srs_float >= 3, pl.Series([False, False, True, True])) testing.assert_series_equal(srs_float == 3, pl.Series([False, False, True, False])) testing.assert_series_equal(srs_float - True, pl.Series([0.0, 1.0, 2.0, 3.0]))
def test_comparisons_int_series_to_float() -> None: srs_int = pl.Series([1, 2, 3, 4]) testing.assert_series_equal(srs_int - 1.0, pl.Series([0.0, 1.0, 2.0, 3.0])) testing.assert_series_equal(srs_int + 1.0, pl.Series([2.0, 3.0, 4.0, 5.0])) testing.assert_series_equal(srs_int * 2.0, pl.Series([2.0, 4.0, 6.0, 8.0])) testing.assert_series_equal(srs_int / 2.0, pl.Series([0.5, 1.0, 1.5, 2.0])) testing.assert_series_equal(srs_int % 2.0, pl.Series([1.0, 0.0, 1.0, 0.0])) testing.assert_series_equal(4.0 % srs_int, pl.Series([0.0, 0.0, 1.0, 0.0])) testing.assert_series_equal(srs_int // 2.0, pl.Series([0.0, 1.0, 1.0, 2.0])) testing.assert_series_equal(srs_int < 3.0, pl.Series([True, True, False, False])) testing.assert_series_equal(srs_int <= 3.0, pl.Series([True, True, True, False])) testing.assert_series_equal(srs_int > 3.0, pl.Series([False, False, False, True])) testing.assert_series_equal(srs_int >= 3.0, pl.Series([False, False, True, True])) testing.assert_series_equal(srs_int == 3.0, pl.Series([False, False, True, False])) testing.assert_series_equal(srs_int - True, pl.Series([0, 1, 2, 3]))
def test_bitwise() -> None: a = pl.Series("a", [1, 2, 3]) b = pl.Series("b", [3, 4, 5]) testing.assert_series_equal(a & b, pl.Series("a", [1, 0, 1])) testing.assert_series_equal(a | b, pl.Series("a", [3, 6, 7])) testing.assert_series_equal(a ^ b, pl.Series("a", [2, 6, 6])) df = pl.DataFrame([a, b]) out = df.select( [ (pl.col("a") & pl.col("b")).alias("and"), (pl.col("a") | pl.col("b")).alias("or"), (pl.col("a") ^ pl.col("b")).alias("xor"), ] ) testing.assert_series_equal(out["and"], pl.Series("and", [1, 0, 1])) testing.assert_series_equal(out["or"], pl.Series("or", [3, 6, 7])) testing.assert_series_equal(out["xor"], pl.Series("xor", [2, 6, 6]))
def test_floor_divide() -> None: s = pl.Series("a", [1, 2, 3]) testing.assert_series_equal(s // 2, pl.Series("a", [0, 1, 1])) testing.assert_series_equal( pl.DataFrame([s]).select(pl.col("a") // 2)["a"], pl.Series("a", [0, 1, 1]) )
def test_arr_ordering() -> None: s = pl.Series("a", [[2, 1], [1, 3, 2]]) testing.assert_series_equal(s.arr.sort(), pl.Series("a", [[1, 2], [1, 2, 3]])) testing.assert_series_equal(s.arr.reverse(), pl.Series("a", [[1, 2], [2, 3, 1]]))
def test_arr_arithmetic() -> None: s = pl.Series("a", [[1, 2], [1, 2, 3]]) testing.assert_series_equal(s.arr.sum(), pl.Series("a", [3, 6])) testing.assert_series_equal(s.arr.mean(), pl.Series("a", [1.5, 2.0])) testing.assert_series_equal(s.arr.max(), pl.Series("a", [2, 3])) testing.assert_series_equal(s.arr.min(), pl.Series("a", [1, 1]))
def test_rolling() -> None: a = pl.Series("a", [1, 2, 3, 2, 1]) testing.assert_series_equal(a.rolling_min(2), pl.Series("a", [None, 1, 2, 2, 1])) testing.assert_series_equal(a.rolling_max(2), pl.Series("a", [None, 2, 3, 3, 2])) testing.assert_series_equal(a.rolling_sum(2), pl.Series("a", [None, 3, 5, 5, 3])) testing.assert_series_equal( a.rolling_mean(2), pl.Series("a", [None, 1.5, 2.5, 2.5, 1.5]) ) assert a.rolling_std(2).to_list()[1] == pytest.approx(0.7071067811865476) assert a.rolling_var(2).to_list()[1] == pytest.approx(0.5) testing.assert_series_equal( a.rolling_median(4), pl.Series("a", [None, None, None, 2, 2], dtype=Float64) ) testing.assert_series_equal( a.rolling_quantile(0, "nearest", 3), pl.Series("a", [None, None, 1, 2, 1], dtype=Float64), ) testing.assert_series_equal( a.rolling_quantile(0, "lower", 3), pl.Series("a", [None, None, 1, 2, 1], dtype=Float64), ) testing.assert_series_equal( a.rolling_quantile(0, "higher", 3), pl.Series("a", [None, None, 1, 2, 1], dtype=Float64), ) assert a.rolling_skew(4).null_count() == 3