def test_pow(self): # https://github.com/pandas-dev/pandas/issues/22022 a = integer_array([1, np.nan, np.nan, 1]) b = integer_array([1, np.nan, 1, np.nan]) result = a ** b expected = pd.core.arrays.integer_array([1, np.nan, np.nan, 1]) tm.assert_extension_array_equal(result, expected)
def test_to_integer_array_dtype_keyword(): result = integer_array([1, 2], dtype='int8') assert result.dtype == Int8Dtype() # if values has dtype -> override it result = integer_array(np.array([1, 2], dtype='int8'), dtype='int32') assert result.dtype == Int32Dtype()
def test_to_integer_array_inferred_dtype(): # if values has dtype -> respect it result = integer_array(np.array([1, 2], dtype='int8')) assert result.dtype == Int8Dtype() result = integer_array(np.array([1, 2], dtype='int32')) assert result.dtype == Int32Dtype() # if values have no dtype -> always int64 result = integer_array([1, 2]) assert result.dtype == Int64Dtype()
def test_get_numeric_data_extension_dtype(self): # GH 22290 df = DataFrame({ 'A': integer_array([-10, np.nan, 0, 10, 20, 30], dtype='Int64'), 'B': Categorical(list('abcabc')), 'C': integer_array([0, 1, 2, 3, np.nan, 5], dtype='UInt8'), 'D': IntervalArray.from_breaks(range(7))}) result = df._get_numeric_data() expected = df.loc[:, ['A', 'C']] assert_frame_equal(result, expected)
def test_to_integer_array_float(): result = integer_array([1., 2.]) expected = integer_array([1, 2]) tm.assert_extension_array_equal(result, expected) with pytest.raises(TypeError, match="cannot safely cast non-equivalent"): integer_array([1.5, 2.]) # for float dtypes, the itemsize is not preserved result = integer_array(np.array([1., 2.], dtype='float32')) assert result.dtype == Int64Dtype()
def test_astype_nansafe(): # see gh-22343 arr = integer_array([np.nan, 1, 2], dtype="Int8") msg = "cannot convert float NaN to integer" with pytest.raises(ValueError, match=msg): arr.astype('uint32')
def test_astype_nansafe(): # https://github.com/pandas-dev/pandas/pull/22343 arr = integer_array([np.nan, 1, 2], dtype="Int8") with tm.assert_raises_regex( ValueError, 'cannot convert float NaN to integer'): arr.astype('uint32')
def test_construct_cast_invalid(self, dtype): msg = "cannot safely" arr = [1.2, 2.3, 3.7] with pytest.raises(TypeError, match=msg): integer_array(arr, dtype=dtype) with pytest.raises(TypeError, match=msg): pd.Series(arr).astype(dtype) arr = [1.2, 2.3, 3.7, np.nan] with pytest.raises(TypeError, match=msg): integer_array(arr, dtype=dtype) with pytest.raises(TypeError, match=msg): pd.Series(arr).astype(dtype)
def test_repr_array(): result = repr(integer_array([1, None, 3])) expected = ( '<IntegerArray>\n' '[1, NaN, 3]\n' 'Length: 3, dtype: Int64' ) assert result == expected
def test_astype_extension_dtypes_1d(self, dtype): # GH 22578 df = pd.DataFrame({'a': [1., 2., 3.]}) expected1 = pd.DataFrame({'a': integer_array([1, 2, 3], dtype=dtype)}) tm.assert_frame_equal(df.astype(dtype), expected1) tm.assert_frame_equal(df.astype('int64').astype(dtype), expected1) df = pd.DataFrame({'a': [1., 2., 3.]}) df['a'] = df['a'].astype(dtype) expected2 = pd.DataFrame({'a': integer_array([1, 2, 3], dtype=dtype)}) tm.assert_frame_equal(df, expected2) tm.assert_frame_equal(df.astype(dtype), expected1) tm.assert_frame_equal(df.astype('int64').astype(dtype), expected1)
def test_repr_array_long(): data = integer_array([1, 2, None] * 1000) expected = ( "<IntegerArray>\n" "[ 1, 2, NaN, 1, 2, NaN, 1, 2, NaN, 1,\n" " ...\n" " NaN, 1, 2, NaN, 1, 2, NaN, 1, 2, NaN]\n" "Length: 3000, dtype: Int64" ) result = repr(data) assert result == expected
def test_astype_extension_dtypes(self, dtype): # GH 22578 df = pd.DataFrame([[1., 2.], [3., 4.], [5., 6.]], columns=['a', 'b']) expected1 = pd.DataFrame({'a': integer_array([1, 3, 5], dtype=dtype), 'b': integer_array([2, 4, 6], dtype=dtype)}) tm.assert_frame_equal(df.astype(dtype), expected1) tm.assert_frame_equal(df.astype('int64').astype(dtype), expected1) tm.assert_frame_equal(df.astype(dtype).astype('float64'), df) df = pd.DataFrame([[1., 2.], [3., 4.], [5., 6.]], columns=['a', 'b']) df['b'] = df['b'].astype(dtype) expected2 = pd.DataFrame({'a': [1., 3., 5.], 'b': integer_array([2, 4, 6], dtype=dtype)}) tm.assert_frame_equal(df, expected2) tm.assert_frame_equal(df.astype(dtype), expected1) tm.assert_frame_equal(df.astype('int64').astype(dtype), expected1)
def test_reduce_to_float(op): # some reduce ops always return float, even if the result # is a rounded number df = pd.DataFrame({ "A": ['a', 'b', 'b'], "B": [1, None, 3], "C": integer_array([1, None, 3], dtype='Int64'), }) # op result = getattr(df.C, op)() assert isinstance(result, float) # groupby result = getattr(df.groupby("A"), op)() expected = pd.DataFrame({ "B": np.array([1.0, 3.0]), "C": integer_array([1, 3], dtype="Int64") }, index=pd.Index(['a', 'b'], name='A')) tm.assert_frame_equal(result, expected)
def test_preserve_dtypes(op): # TODO(#22346): preserve Int64 dtype # for ops that enable (mean would actually work here # but generally it is a float return value) df = pd.DataFrame({ "A": ['a', 'b', 'b'], "B": [1, None, 3], "C": integer_array([1, None, 3], dtype='Int64'), }) # op result = getattr(df.C, op)() assert isinstance(result, int) # groupby result = getattr(df.groupby("A"), op)() expected = pd.DataFrame({ "B": np.array([1.0, 3.0]), "C": integer_array([1, 3], dtype="Int64") }, index=pd.Index(['a', 'b'], name='A')) tm.assert_frame_equal(result, expected)
def test_groupby_mean_included(): df = pd.DataFrame({ "A": ['a', 'b', 'b'], "B": [1, None, 3], "C": integer_array([1, None, 3], dtype='Int64'), }) result = df.groupby("A").sum() # TODO(#22346): preserve Int64 dtype expected = pd.DataFrame({ "B": np.array([1.0, 3.0]), "C": np.array([1, 3], dtype="int64") }, index=pd.Index(['a', 'b'], name='A')) tm.assert_frame_equal(result, expected)
def test_construct_index(self, all_data, dropna): # ensure that we do not coerce to Float64Index, rather # keep as Index all_data = all_data[:10] if dropna: other = np.array(all_data[~all_data.isna()]) else: other = all_data result = pd.Index(integer_array(other, dtype=all_data.dtype)) expected = pd.Index(other, dtype=object) tm.assert_index_equal(result, expected)
def test_integer_array_constructor(): values = np.array([1, 2, 3, 4], dtype='int64') mask = np.array([False, False, False, True], dtype='bool') result = IntegerArray(values, mask) expected = integer_array([1, 2, 3, np.nan], dtype='int64') tm.assert_extension_array_equal(result, expected) with pytest.raises(TypeError): IntegerArray(values.tolist(), mask) with pytest.raises(TypeError): IntegerArray(values, mask.tolist()) with pytest.raises(TypeError): IntegerArray(values.astype(float), mask) with pytest.raises(TypeError): IntegerArray(values)
def test_ufunc_reduce_raises(values): a = integer_array(values) with pytest.raises(NotImplementedError): np.add.reduce(a)
def data(dtype): return integer_array(make_data(), dtype=dtype)
def test_to_integer_array_error(values): # error in converting existing arrays to IntegerArrays with pytest.raises(TypeError): integer_array(values)
def data_missing(dtype): return integer_array([np.nan, 1], dtype=dtype)
def data_for_sorting(dtype): return integer_array([1, 2, 0], dtype=dtype)
def test_rpow_one_to_na(self): # https://github.com/pandas-dev/pandas/issues/22022 arr = integer_array([np.nan, np.nan]) result = np.array([1.0, 2.0])**arr expected = np.array([1.0, np.nan]) tm.assert_numpy_array_equal(result, expected)
def test_arith_zero_dim_ndarray(self, other): arr = integer_array([1, None, 2]) result = arr + np.array(other) expected = arr + other tm.assert_equal(result, expected)
def test_to_integer_array_bool(bool_values, int_values, target_dtype, expected_dtype): result = integer_array(bool_values, dtype=target_dtype) assert result.dtype == expected_dtype expected = integer_array(int_values, dtype=target_dtype) tm.assert_extension_array_equal(result, expected)
def test_integer_array_constructor_none_is_nan(a, b): result = integer_array(a) expected = integer_array(b) tm.assert_extension_array_equal(result, expected)
def data_for_grouping(dtype): b = 1 a = 0 c = 2 na = np.nan return integer_array([b, b, na, na, a, a, b, c], dtype=dtype)
def data_for_twos(dtype): return integer_array(np.ones(100) * 2, dtype=dtype)
def test_rpow_one_to_na(self): # https://github.com/pandas-dev/pandas/issues/22022 arr = integer_array([np.nan, np.nan]) result = np.array([1.0, 2.0]) ** arr expected = np.array([1.0, np.nan]) tm.assert_numpy_array_equal(result, expected)
def data_missing(dtype): return integer_array([pd.NA, 1], dtype=dtype)
def test_to_integer_array_error(values): # error in converting existing arrays to IntegerArrays msg = (r"(:?.* cannot be converted to an IntegerDtype)" r"|(:?values must be a 1D list-like)") with pytest.raises(TypeError, match=msg): integer_array(values)
(["a", "b"], "category", pd.Categorical(["a", "b"])), ( ["a", "b"], pd.CategoricalDtype(None, ordered=True), pd.Categorical(["a", "b"], ordered=True), ), # Interval ( [pd.Interval(1, 2), pd.Interval(3, 4)], "interval", IntervalArray.from_tuples([(1, 2), (3, 4)]), ), # Sparse ([0, 1], "Sparse[int64]", SparseArray([0, 1], dtype="int64")), # IntegerNA ([1, None], "Int16", integer_array([1, None], dtype="Int16")), (pd.Series([1, 2]), None, PandasArray(np.array([1, 2], dtype=np.int64))), # String (["a", None], "string", StringArray._from_sequence(["a", None])), ( ["a", None], pd.StringDtype(), StringArray._from_sequence(["a", None]), ), # Boolean ([True, None], "boolean", BooleanArray._from_sequence([True, None])), ( [True, None], pd.BooleanDtype(), BooleanArray._from_sequence([True, None]),
pd.arrays.TimedeltaArray._from_sequence(['1H', '2H'])), # Category (['a', 'b'], 'category', pd.Categorical(['a', 'b'])), (['a', 'b'], pd.CategoricalDtype( None, ordered=True), pd.Categorical(['a', 'b'], ordered=True)), # Interval ([pd.Interval(1, 2), pd.Interval(3, 4) ], 'interval', pd.IntervalArray.from_tuples([(1, 2), (3, 4)])), # Sparse ([0, 1], 'Sparse[int64]', pd.SparseArray([0, 1], dtype='int64')), # IntegerNA ([1, None], 'Int16', integer_array([1, None], dtype='Int16')), (pd.Series([1, 2]), None, PandasArray(np.array([1, 2], dtype=np.int64))), # Index (pd.Index([1, 2]), None, PandasArray(np.array([1, 2], dtype=np.int64)) ), # Series[EA] returns the EA (pd.Series(pd.Categorical(['a', 'b'], categories=['a', 'b', 'c'])), None, pd.Categorical(['a', 'b'], categories=['a', 'b', 'c'])), # "3rd party" EAs work ([decimal.Decimal(0), decimal.Decimal(1)], 'decimal', to_decimal( [0, 1])),
def test_repr_array(): result = repr(integer_array([1, None, 3])) expected = "<IntegerArray>\n[1, NaN, 3]\nLength: 3, dtype: Int64" assert result == expected
def test_pow_array(self): a = integer_array([0, 0, 0, 1, 1, 1, None, None, None]) b = integer_array([0, 1, None, 0, 1, None, 0, 1, None]) result = a**b expected = integer_array([1, 0, None, 1, 1, 1, 1, None, None]) tm.assert_extension_array_equal(result, expected)
def test_ufunc_reduce_raises(values): a = integer_array(values) msg = r"The 'reduce' method is not supported." with pytest.raises(NotImplementedError, match=msg): np.add.reduce(a)
def data_missing_for_sorting(dtype): return integer_array([1, np.nan, 0], dtype=dtype)
def test_to_integer_array(values, to_dtype, result_dtype): # convert existing arrays to IntegerArrays result = integer_array(values, dtype=to_dtype) assert result.dtype == result_dtype() expected = integer_array(values, dtype=result_dtype()) tm.assert_extension_array_equal(result, expected)
pd.arrays.TimedeltaArray._from_sequence(['1H', '2H'])), # Category (['a', 'b'], 'category', pd.Categorical(['a', 'b'])), (['a', 'b'], pd.CategoricalDtype(None, ordered=True), pd.Categorical(['a', 'b'], ordered=True)), # Interval ([pd.Interval(1, 2), pd.Interval(3, 4)], 'interval', pd.IntervalArray.from_tuples([(1, 2), (3, 4)])), # Sparse ([0, 1], 'Sparse[int64]', pd.SparseArray([0, 1], dtype='int64')), # IntegerNA ([1, None], 'Int16', integer_array([1, None], dtype='Int16')), (pd.Series([1, 2]), None, PandasArray(np.array([1, 2], dtype=np.int64))), # Index (pd.Index([1, 2]), None, PandasArray(np.array([1, 2], dtype=np.int64))), # Series[EA] returns the EA (pd.Series(pd.Categorical(['a', 'b'], categories=['a', 'b', 'c'])), None, pd.Categorical(['a', 'b'], categories=['a', 'b', 'c'])), # "3rd party" EAs work ([decimal.Decimal(0), decimal.Decimal(1)], 'decimal', to_decimal([0, 1])), # pass an ExtensionArray, but a different dtype (period_array(['2000', '2001'], freq='D'),
def data(dtype): return integer_array( list(range(8)) + [np.nan] + list(range(10, 98)) + [np.nan] + [99, 100], dtype=dtype, )