Пример #1
0
 def _str_len(self):
     # utf8_length added in pyarrow 4.0.0
     if hasattr(pc, "utf8_length"):
         result = pc.utf8_length(self._data)
         return Int64Dtype().__from_arrow__(result)
     else:
         return super()._str_len()
Пример #2
0
    def _get_result_dtype(self, dtype: DtypeObj) -> DtypeObj:
        """
        Get the desired dtype of a result based on the
        input dtype and how it was computed.

        Parameters
        ----------
        dtype : np.dtype or ExtensionDtype
            Input dtype.

        Returns
        -------
        np.dtype or ExtensionDtype
            The desired dtype of the result.
        """
        how = self.how

        if how in ["add", "cumsum", "sum", "prod"]:
            if dtype == np.dtype(bool):
                return np.dtype(np.int64)
            elif isinstance(dtype, (BooleanDtype, IntegerDtype)):
                return Int64Dtype()
        elif how in ["mean", "median", "var"]:
            if isinstance(dtype, (BooleanDtype, IntegerDtype)):
                return Float64Dtype()
            elif is_float_dtype(dtype) or is_complex_dtype(dtype):
                return dtype
            elif is_numeric_dtype(dtype):
                return np.dtype(np.float64)
        return dtype
Пример #3
0
    def get_result_dtype(self, dtype: DtypeObj) -> DtypeObj:
        """
        Get the desired dtype of a result based on the
        input dtype and how it was computed.

        Parameters
        ----------
        dtype : np.dtype or ExtensionDtype
            Input dtype.

        Returns
        -------
        np.dtype or ExtensionDtype
            The desired dtype of the result.
        """
        from pandas.core.arrays.boolean import BooleanDtype
        from pandas.core.arrays.floating import Float64Dtype
        from pandas.core.arrays.integer import (
            Int64Dtype,
            _IntegerDtype,
        )

        how = self.how

        if how in ["add", "cumsum", "sum", "prod"]:
            if dtype == np.dtype(bool):
                return np.dtype(np.int64)
            elif isinstance(dtype, (BooleanDtype, _IntegerDtype)):
                return Int64Dtype()
        elif how in ["mean", "median", "var"] and isinstance(
                dtype, (BooleanDtype, _IntegerDtype)):
            return Float64Dtype()
        return dtype
Пример #4
0
    def _str_len(self):
        if pa_version_under4p0:
            fallback_performancewarning(version="4")
            return super()._str_len()

        result = pc.utf8_length(self._data)
        return Int64Dtype().__from_arrow__(result)
Пример #5
0
def test_to_integer_array_inferred_dtype():
    # if values has dtype -> respect it
    result = integer_array(np.array([1, 2], dtype='int8'))
    assert result.dtype == Int8Dtype()
    result = integer_array(np.array([1, 2], dtype='int32'))
    assert result.dtype == Int32Dtype()

    # if values have no dtype -> always int64
    result = integer_array([1, 2])
    assert result.dtype == Int64Dtype()
Пример #6
0
def test_to_integer_array_inferred_dtype(constructor):
    # if values has dtype -> respect it
    result = constructor(np.array([1, 2], dtype="int8"))
    assert result.dtype == Int8Dtype()
    result = constructor(np.array([1, 2], dtype="int32"))
    assert result.dtype == Int32Dtype()

    # if values have no dtype -> always int64
    result = constructor([1, 2])
    assert result.dtype == Int64Dtype()
Пример #7
0
def test_to_integer_array_float():
    result = integer_array([1., 2.])
    expected = integer_array([1, 2])
    tm.assert_extension_array_equal(result, expected)

    with pytest.raises(TypeError, match="cannot safely cast non-equivalent"):
        integer_array([1.5, 2.])

    # for float dtypes, the itemsize is not preserved
    result = integer_array(np.array([1., 2.], dtype='float32'))
    assert result.dtype == Int64Dtype()
Пример #8
0
def test_to_integer_array_float():
    result = IntegerArray._from_sequence([1.0, 2.0])
    expected = pd.array([1, 2], dtype="Int64")
    tm.assert_extension_array_equal(result, expected)

    with pytest.raises(TypeError, match="cannot safely cast non-equivalent"):
        IntegerArray._from_sequence([1.5, 2.0])

    # for float dtypes, the itemsize is not preserved
    result = IntegerArray._from_sequence(np.array([1.0, 2.0], dtype="float32"))
    assert result.dtype == Int64Dtype()
Пример #9
0
def test_uses_pandas_na():
    a = pd.array([1, None], dtype=Int64Dtype())
    assert a[1] is pd.NA
Пример #10
0
    expected = pd.array([1, 2, np.nan], dtype="Int64")
    tm.assert_extension_array_equal(result, expected)

    with pytest.raises(ValueError,
                       match=r"invalid literal for int\(\) with base 10: .*"):
        IntegerArray._from_sequence(["1", "2", ""])

    with pytest.raises(ValueError,
                       match=r"invalid literal for int\(\) with base 10: .*"):
        IntegerArray._from_sequence(["1.5", "2.0"])


@pytest.mark.parametrize(
    "bool_values, int_values, target_dtype, expected_dtype",
    [
        ([False, True], [0, 1], Int64Dtype(), Int64Dtype()),
        ([False, True], [0, 1], "Int64", Int64Dtype()),
        ([False, True, np.nan], [0, 1, np.nan], Int64Dtype(), Int64Dtype()),
    ],
)
def test_to_integer_array_bool(constructor, bool_values, int_values,
                               target_dtype, expected_dtype):
    result = constructor(bool_values, dtype=target_dtype)
    assert result.dtype == expected_dtype
    expected = pd.array(int_values, dtype=target_dtype)
    tm.assert_extension_array_equal(result, expected)


@pytest.mark.parametrize(
    "values, to_dtype, result_dtype",
    [
Пример #11
0
 def test_as_json_table_type_ext_integer_dtype(self):
     assert as_json_table_type(Int64Dtype()) == "integer"
Пример #12
0
    def _str_len(self):
        if pa_version_under4p0:
            return super()._str_len()

        result = pc.utf8_length(self._data)
        return Int64Dtype().__from_arrow__(result)
Пример #13
0
def test_dtypes(dtype):
    # smoke tests on auto dtype construction

    if dtype.is_signed_integer:
        assert np.dtype(dtype.type).kind == 'i'
    else:
        assert np.dtype(dtype.type).kind == 'u'
    assert dtype.name is not None


@pytest.mark.parametrize('dtype, expected', [
    (Int8Dtype(), 'Int8Dtype()'),
    (Int16Dtype(), 'Int16Dtype()'),
    (Int32Dtype(), 'Int32Dtype()'),
    (Int64Dtype(), 'Int64Dtype()'),
    (UInt8Dtype(), 'UInt8Dtype()'),
    (UInt16Dtype(), 'UInt16Dtype()'),
    (UInt32Dtype(), 'UInt32Dtype()'),
    (UInt64Dtype(), 'UInt64Dtype()'),
])
def test_repr_dtype(dtype, expected):
    assert repr(dtype) == expected


def test_repr_array():
    result = repr(integer_array([1, None, 3]))
    expected = (
        '<IntegerArray>\n'
        '[1, NaN, 3]\n'
        'Length: 3, dtype: Int64'
Пример #14
0
    # smoke tests on auto dtype construction

    if dtype.is_signed_integer:
        assert np.dtype(dtype.type).kind == "i"
    else:
        assert np.dtype(dtype.type).kind == "u"
    assert dtype.name is not None


@pytest.mark.parametrize(
    "dtype, expected",
    [
        (Int8Dtype(), "Int8Dtype()"),
        (Int16Dtype(), "Int16Dtype()"),
        (Int32Dtype(), "Int32Dtype()"),
        (Int64Dtype(), "Int64Dtype()"),
        (UInt8Dtype(), "UInt8Dtype()"),
        (UInt16Dtype(), "UInt16Dtype()"),
        (UInt32Dtype(), "UInt32Dtype()"),
        (UInt64Dtype(), "UInt64Dtype()"),
    ],
)
def test_repr_dtype(dtype, expected):
    assert repr(dtype) == expected


def test_repr_array():
    result = repr(integer_array([1, None, 3]))
    expected = "<IntegerArray>\n[1, <NA>, 3]\nLength: 3, dtype: Int64"
    assert result == expected
Пример #15
0
def test_dtypes(dtype):
    # smoke tests on auto dtype construction

    if dtype.is_signed_integer:
        assert np.dtype(dtype.type).kind == 'i'
    else:
        assert np.dtype(dtype.type).kind == 'u'
    assert dtype.name is not None


@pytest.mark.parametrize('dtype, expected', [
    (Int8Dtype(), 'Int8Dtype()'),
    (Int16Dtype(), 'Int16Dtype()'),
    (Int32Dtype(), 'Int32Dtype()'),
    (Int64Dtype(), 'Int64Dtype()'),
    (UInt8Dtype(), 'UInt8Dtype()'),
    (UInt16Dtype(), 'UInt16Dtype()'),
    (UInt32Dtype(), 'UInt32Dtype()'),
    (UInt64Dtype(), 'UInt64Dtype()'),
])
def test_repr_dtype(dtype, expected):
    assert repr(dtype) == expected


def test_repr_array():
    result = repr(integer_array([1, None, 3]))
    expected = ('<IntegerArray>\n' '[1, NaN, 3]\n' 'Length: 3, dtype: Int64')
    assert result == expected