def test_groupby_bool_first() -> None: df = pd.DataFrame({"x": pd.Series([True, True], dtype=RLEDtype(bool)), "g": 1}) series = df.groupby("g")["x"].first() assert series.dtype == RLEDtype(bool) expected = RLEArray._from_sequence([True]) npt.assert_array_equal(series.array, expected)
def test_unary_operator( rle_series: pd.Series, uncompressed_series: pd.Series, unary_operator: FUnaryOperator, ) -> None: actual = unary_operator(rle_series) assert actual.dtype == RLEDtype(float) expected = unary_operator(uncompressed_series).astype(RLEDtype(float)) pd.testing.assert_series_equal(actual, expected)
def test_binary_bool_operator_scalar( rle_bool_series: pd.Series, uncompressed_bool_series: pd.Series, bool_scalar: bool, binary_bool_operator: FBinaryBoolOperator, ) -> None: actual = binary_bool_operator(rle_bool_series, bool_scalar) assert actual.dtype == RLEDtype(bool) expected = binary_bool_operator(uncompressed_bool_series, bool_scalar).astype(RLEDtype(bool)) pd.testing.assert_series_equal(actual, expected)
def test_groupby_bool_sum() -> None: # Cython routines for integer addition are not available, so we need to accept floats here. df = pd.DataFrame({"x": pd.Series([True, True], dtype=RLEDtype(bool)), "g": 1}) series = df.groupby("g")["x"].sum() assert series.dtype == np.float64 expected = np.array([2], dtype=np.float64) npt.assert_array_equal(series.to_numpy(), expected)
def test_round(data_orig: pd.Series, data_rle: pd.Series, decimals: int) -> None: result_orig = data_orig.round(decimals=decimals) result_rle = data_rle.round(decimals=decimals) assert result_rle.dtype == RLEDtype(result_orig.dtype) result_converted = result_rle.astype(result_rle.dtype._dtype) pdt.assert_series_equal(result_orig, result_converted)
def test_shift( data_orig: pd.Series, data_rle: pd.Series, periods: int, fill_value: Any ) -> None: result_orig = data_orig.shift(periods=periods, fill_value=fill_value) result_rle = data_rle.shift(periods=periods, fill_value=fill_value) assert result_rle.dtype == RLEDtype(result_orig.dtype) result_converted = result_rle.astype(result_rle.dtype._dtype) pdt.assert_series_equal(result_orig, result_converted)
def test_unary_bool_operator_array( rle_bool_series: pd.Series, uncompressed_bool_series: pd.Series, unary_bool_operator: FUnaryBoolOperator, ) -> None: actual = unary_bool_operator(rle_bool_series.array) assert actual.dtype == RLEDtype(bool) expected = unary_bool_operator(uncompressed_bool_series.array) npt.assert_array_equal(actual, expected)
def test_factorize_int() -> None: array = RLEArray._from_sequence([42, -10, -10], dtype=RLEDtype(np.int32)) codes_actual, uniques_actual = array.factorize() codes_expected = np.array([0, 1, 1], dtype=np.int64) assert codes_actual.dtype == codes_expected.dtype npt.assert_array_equal(codes_actual, codes_expected) uniques_expected = RLEArray._from_sequence([42, -10], dtype=np.int32) assert uniques_actual.dtype == uniques_expected.dtype npt.assert_array_equal(uniques_actual, uniques_expected)
def test_from_sequence_bool() -> None: array = RLEArray._from_sequence( np.array([0, 1], dtype=np.int64), dtype=RLEDtype(bool) ) npt.assert_array_equal(array, np.array([False, True])) array = RLEArray._from_sequence( np.array([0.0, 1.0], dtype=np.float64), dtype=RLEDtype(bool) ) npt.assert_array_equal(array, np.array([False, True])) with pytest.raises(TypeError, match="Need to pass bool-like values"): RLEArray._from_sequence(np.array([1, 2], dtype=np.int64), dtype=RLEDtype(bool)) with pytest.raises(TypeError, match="Need to pass bool-like values"): RLEArray._from_sequence(np.array([-1, 1], dtype=np.int64), dtype=RLEDtype(bool)) with pytest.raises(TypeError, match="Masked booleans are not supported"): RLEArray._from_sequence( np.array([np.nan, 1.0], dtype=np.float64), dtype=RLEDtype(bool) )
def test_binary_operator_scalar( rle_series: pd.Series, uncompressed_series: pd.Series, scalar: float, binary_operator: FBinaryOperator, ) -> None: actual = binary_operator(rle_series, scalar) assert actual.dtype == RLEDtype(float) expected = binary_operator(uncompressed_series, scalar).astype("RLEDtype[float]") pd.testing.assert_series_equal(actual, expected)
def test_compare_scalar( rle_series: pd.Series, uncompressed_series: pd.Series, scalar: float, compare_operator: FCompareOperator, ) -> None: actual = compare_operator(rle_series, scalar) assert actual.dtype == RLEDtype(bool) expected = compare_operator(uncompressed_series, scalar).astype("RLEDtype[bool]") pd.testing.assert_series_equal(actual, expected)
def test_no_copy(series: pd.Series) -> None: series2 = series.astype(series.dtype, copy=False) assert series2.values is series.values assert series2.dtype == RLEDtype(int)
def series() -> pd.Series: return pd.Series([1, 1, 2]).astype(RLEDtype(int))
import numpy as np import pytest from rle_array import RLEDtype @pytest.mark.parametrize( "a,b,expected", [ ( # a RLEDtype(int), # b RLEDtype(int), # expected True, ), ( # a RLEDtype(int), # b RLEDtype(float), # expected False, ), ( # a RLEDtype(int), # b RLEDtype(np.int64), # expected
def test_append_mixed() -> None: actual = pd.concat( [pd.Series([1], dtype=np.int8), pd.Series([1], dtype=RLEDtype(np.int8))] ) assert actual.dtype == np.int8
def data_rle(data_orig: pd.Series) -> pd.Series: return data_orig.astype(RLEDtype(data_orig.dtype))
def data_rle_bool(data_orig_bool): return data_orig_bool.astype(RLEDtype(data_orig_bool.dtype))
def test_copy_different_dtype(series: pd.Series) -> None: series2 = series.astype(RLEDtype(float), copy=False) assert series2.values is not series.values assert series2.dtype == RLEDtype(float)
def test_get_common_dtype(dtype: RLEDtype, dtypes: List[Any], expected: Any) -> None: actual = dtype._get_common_dtype(dtypes) assert actual == expected
def dtype(): """A fixture providing the ExtensionDtype to validate.""" return RLEDtype(np.float32)
def data_rle_bool(data_orig_bool: pd.Series) -> pd.Series: return data_orig_bool.astype(RLEDtype(data_orig_bool.dtype))
from typing import Any, List import numpy as np import pytest from rle_array import RLEDtype @pytest.mark.parametrize( "a, b, expected", [ ( # a RLEDtype(int), # b RLEDtype(int), # expected True, ), ( # a RLEDtype(int), # b RLEDtype(float), # expected False, ), ( # a RLEDtype(int), # b
def data_rle(data_orig): return data_orig.astype(RLEDtype(data_orig.dtype))
def series_rle(series_orig: pd.Series) -> pd.Series: return series_orig.astype(RLEDtype(series_orig.dtype))
def series_rle(series_orig): return series_orig.astype(RLEDtype(series_orig.dtype))