示例#1
0
 def test_pow(self):
     # https://github.com/pandas-dev/pandas/issues/22022
     a = integer_array([1, np.nan, np.nan, 1])
     b = integer_array([1, np.nan, 1, np.nan])
     result = a ** b
     expected = pd.core.arrays.integer_array([1, np.nan, np.nan, 1])
     tm.assert_extension_array_equal(result, expected)
示例#2
0
def test_to_integer_array_dtype_keyword():
    result = integer_array([1, 2], dtype='int8')
    assert result.dtype == Int8Dtype()

    # if values has dtype -> override it
    result = integer_array(np.array([1, 2], dtype='int8'), dtype='int32')
    assert result.dtype == Int32Dtype()
示例#3
0
def test_to_integer_array_inferred_dtype():
    # if values has dtype -> respect it
    result = integer_array(np.array([1, 2], dtype='int8'))
    assert result.dtype == Int8Dtype()
    result = integer_array(np.array([1, 2], dtype='int32'))
    assert result.dtype == Int32Dtype()

    # if values have no dtype -> always int64
    result = integer_array([1, 2])
    assert result.dtype == Int64Dtype()
示例#4
0
 def test_get_numeric_data_extension_dtype(self):
     # GH 22290
     df = DataFrame({
         'A': integer_array([-10, np.nan, 0, 10, 20, 30], dtype='Int64'),
         'B': Categorical(list('abcabc')),
         'C': integer_array([0, 1, 2, 3, np.nan, 5], dtype='UInt8'),
         'D': IntervalArray.from_breaks(range(7))})
     result = df._get_numeric_data()
     expected = df.loc[:, ['A', 'C']]
     assert_frame_equal(result, expected)
示例#5
0
def test_to_integer_array_float():
    result = integer_array([1., 2.])
    expected = integer_array([1, 2])
    tm.assert_extension_array_equal(result, expected)

    with pytest.raises(TypeError, match="cannot safely cast non-equivalent"):
        integer_array([1.5, 2.])

    # for float dtypes, the itemsize is not preserved
    result = integer_array(np.array([1., 2.], dtype='float32'))
    assert result.dtype == Int64Dtype()
示例#6
0
def test_astype_nansafe():
    # see gh-22343
    arr = integer_array([np.nan, 1, 2], dtype="Int8")
    msg = "cannot convert float NaN to integer"

    with pytest.raises(ValueError, match=msg):
        arr.astype('uint32')
示例#7
0
def test_astype_nansafe():
    # https://github.com/pandas-dev/pandas/pull/22343
    arr = integer_array([np.nan, 1, 2], dtype="Int8")

    with tm.assert_raises_regex(
            ValueError, 'cannot convert float NaN to integer'):
        arr.astype('uint32')
示例#8
0
    def test_construct_cast_invalid(self, dtype):

        msg = "cannot safely"
        arr = [1.2, 2.3, 3.7]
        with pytest.raises(TypeError, match=msg):
            integer_array(arr, dtype=dtype)

        with pytest.raises(TypeError, match=msg):
            pd.Series(arr).astype(dtype)

        arr = [1.2, 2.3, 3.7, np.nan]
        with pytest.raises(TypeError, match=msg):
            integer_array(arr, dtype=dtype)

        with pytest.raises(TypeError, match=msg):
            pd.Series(arr).astype(dtype)
示例#9
0
def test_repr_array():
    result = repr(integer_array([1, None, 3]))
    expected = (
        '<IntegerArray>\n'
        '[1, NaN, 3]\n'
        'Length: 3, dtype: Int64'
    )
    assert result == expected
示例#10
0
    def test_astype_extension_dtypes_1d(self, dtype):
        # GH 22578
        df = pd.DataFrame({'a': [1., 2., 3.]})

        expected1 = pd.DataFrame({'a': integer_array([1, 2, 3],
                                                     dtype=dtype)})
        tm.assert_frame_equal(df.astype(dtype), expected1)
        tm.assert_frame_equal(df.astype('int64').astype(dtype), expected1)

        df = pd.DataFrame({'a': [1., 2., 3.]})
        df['a'] = df['a'].astype(dtype)
        expected2 = pd.DataFrame({'a': integer_array([1, 2, 3],
                                                     dtype=dtype)})
        tm.assert_frame_equal(df, expected2)

        tm.assert_frame_equal(df.astype(dtype), expected1)
        tm.assert_frame_equal(df.astype('int64').astype(dtype), expected1)
示例#11
0
def test_repr_array_long():
    data = integer_array([1, 2, None] * 1000)
    expected = (
        "<IntegerArray>\n"
        "[  1,   2, NaN,   1,   2, NaN,   1,   2, NaN,   1,\n"
        " ...\n"
        " NaN,   1,   2, NaN,   1,   2, NaN,   1,   2, NaN]\n"
        "Length: 3000, dtype: Int64"
    )
    result = repr(data)
    assert result == expected
示例#12
0
    def test_astype_extension_dtypes(self, dtype):
        # GH 22578
        df = pd.DataFrame([[1., 2.], [3., 4.], [5., 6.]], columns=['a', 'b'])

        expected1 = pd.DataFrame({'a': integer_array([1, 3, 5],
                                                     dtype=dtype),
                                  'b': integer_array([2, 4, 6],
                                                     dtype=dtype)})
        tm.assert_frame_equal(df.astype(dtype), expected1)
        tm.assert_frame_equal(df.astype('int64').astype(dtype), expected1)
        tm.assert_frame_equal(df.astype(dtype).astype('float64'), df)

        df = pd.DataFrame([[1., 2.], [3., 4.], [5., 6.]], columns=['a', 'b'])
        df['b'] = df['b'].astype(dtype)
        expected2 = pd.DataFrame({'a': [1., 3., 5.],
                                  'b': integer_array([2, 4, 6],
                                                     dtype=dtype)})
        tm.assert_frame_equal(df, expected2)

        tm.assert_frame_equal(df.astype(dtype), expected1)
        tm.assert_frame_equal(df.astype('int64').astype(dtype), expected1)
示例#13
0
def test_reduce_to_float(op):
    # some reduce ops always return float, even if the result
    # is a rounded number
    df = pd.DataFrame({
        "A": ['a', 'b', 'b'],
        "B": [1, None, 3],
        "C": integer_array([1, None, 3], dtype='Int64'),
    })

    # op
    result = getattr(df.C, op)()
    assert isinstance(result, float)

    # groupby
    result = getattr(df.groupby("A"), op)()

    expected = pd.DataFrame({
        "B": np.array([1.0, 3.0]),
        "C": integer_array([1, 3], dtype="Int64")
    }, index=pd.Index(['a', 'b'], name='A'))
    tm.assert_frame_equal(result, expected)
示例#14
0
def test_preserve_dtypes(op):
    # TODO(#22346): preserve Int64 dtype
    # for ops that enable (mean would actually work here
    # but generally it is a float return value)
    df = pd.DataFrame({
        "A": ['a', 'b', 'b'],
        "B": [1, None, 3],
        "C": integer_array([1, None, 3], dtype='Int64'),
    })

    # op
    result = getattr(df.C, op)()
    assert isinstance(result, int)

    # groupby
    result = getattr(df.groupby("A"), op)()

    expected = pd.DataFrame({
        "B": np.array([1.0, 3.0]),
        "C": integer_array([1, 3], dtype="Int64")
    }, index=pd.Index(['a', 'b'], name='A'))
    tm.assert_frame_equal(result, expected)
示例#15
0
def test_groupby_mean_included():
    df = pd.DataFrame({
        "A": ['a', 'b', 'b'],
        "B": [1, None, 3],
        "C": integer_array([1, None, 3], dtype='Int64'),
    })

    result = df.groupby("A").sum()
    # TODO(#22346): preserve Int64 dtype
    expected = pd.DataFrame({
        "B": np.array([1.0, 3.0]),
        "C": np.array([1, 3], dtype="int64")
    }, index=pd.Index(['a', 'b'], name='A'))
    tm.assert_frame_equal(result, expected)
示例#16
0
    def test_construct_index(self, all_data, dropna):
        # ensure that we do not coerce to Float64Index, rather
        # keep as Index

        all_data = all_data[:10]
        if dropna:
            other = np.array(all_data[~all_data.isna()])
        else:
            other = all_data

        result = pd.Index(integer_array(other, dtype=all_data.dtype))
        expected = pd.Index(other, dtype=object)

        tm.assert_index_equal(result, expected)
示例#17
0
def test_integer_array_constructor():
    values = np.array([1, 2, 3, 4], dtype='int64')
    mask = np.array([False, False, False, True], dtype='bool')

    result = IntegerArray(values, mask)
    expected = integer_array([1, 2, 3, np.nan], dtype='int64')
    tm.assert_extension_array_equal(result, expected)

    with pytest.raises(TypeError):
        IntegerArray(values.tolist(), mask)

    with pytest.raises(TypeError):
        IntegerArray(values, mask.tolist())

    with pytest.raises(TypeError):
        IntegerArray(values.astype(float), mask)

    with pytest.raises(TypeError):
        IntegerArray(values)
示例#18
0
def test_ufunc_reduce_raises(values):
    a = integer_array(values)
    with pytest.raises(NotImplementedError):
        np.add.reduce(a)
示例#19
0
def data(dtype):
    return integer_array(make_data(), dtype=dtype)
示例#20
0
def test_to_integer_array_error(values):
    # error in converting existing arrays to IntegerArrays
    with pytest.raises(TypeError):
        integer_array(values)
示例#21
0
def data_missing(dtype):
    return integer_array([np.nan, 1], dtype=dtype)
示例#22
0
def data(dtype):
    return integer_array(make_data(), dtype=dtype)
示例#23
0
def data_for_sorting(dtype):
    return integer_array([1, 2, 0], dtype=dtype)
示例#24
0
 def test_rpow_one_to_na(self):
     # https://github.com/pandas-dev/pandas/issues/22022
     arr = integer_array([np.nan, np.nan])
     result = np.array([1.0, 2.0])**arr
     expected = np.array([1.0, np.nan])
     tm.assert_numpy_array_equal(result, expected)
示例#25
0
 def test_arith_zero_dim_ndarray(self, other):
     arr = integer_array([1, None, 2])
     result = arr + np.array(other)
     expected = arr + other
     tm.assert_equal(result, expected)
示例#26
0
def test_to_integer_array_bool(bool_values, int_values, target_dtype,
                               expected_dtype):
    result = integer_array(bool_values, dtype=target_dtype)
    assert result.dtype == expected_dtype
    expected = integer_array(int_values, dtype=target_dtype)
    tm.assert_extension_array_equal(result, expected)
示例#27
0
def test_to_integer_array_error(values):
    # error in converting existing arrays to IntegerArrays
    with pytest.raises(TypeError):
        integer_array(values)
示例#28
0
def test_integer_array_constructor_none_is_nan(a, b):
    result = integer_array(a)
    expected = integer_array(b)
    tm.assert_extension_array_equal(result, expected)
示例#29
0
def data_missing(dtype):
    return integer_array([np.nan, 1], dtype=dtype)
示例#30
0
def data_for_grouping(dtype):
    b = 1
    a = 0
    c = 2
    na = np.nan
    return integer_array([b, b, na, na, a, a, b, c], dtype=dtype)
示例#31
0
 def test_arith_zero_dim_ndarray(self, other):
     arr = integer_array([1, None, 2])
     result = arr + np.array(other)
     expected = arr + other
     tm.assert_equal(result, expected)
示例#32
0
def data_for_twos(dtype):
    return integer_array(np.ones(100) * 2, dtype=dtype)
示例#33
0
 def test_rpow_one_to_na(self):
     # https://github.com/pandas-dev/pandas/issues/22022
     arr = integer_array([np.nan, np.nan])
     result = np.array([1.0, 2.0]) ** arr
     expected = np.array([1.0, np.nan])
     tm.assert_numpy_array_equal(result, expected)
示例#34
0
def data_for_grouping(dtype):
    b = 1
    a = 0
    c = 2
    na = np.nan
    return integer_array([b, b, na, na, a, a, b, c], dtype=dtype)
def data_missing(dtype):
    return integer_array([pd.NA, 1], dtype=dtype)
示例#36
0
def test_to_integer_array_error(values):
    # error in converting existing arrays to IntegerArrays
    msg = (r"(:?.* cannot be converted to an IntegerDtype)"
           r"|(:?values must be a 1D list-like)")
    with pytest.raises(TypeError, match=msg):
        integer_array(values)
示例#37
0
 (["a", "b"], "category", pd.Categorical(["a", "b"])),
 (
     ["a", "b"],
     pd.CategoricalDtype(None, ordered=True),
     pd.Categorical(["a", "b"], ordered=True),
 ),
 # Interval
 (
     [pd.Interval(1, 2), pd.Interval(3, 4)],
     "interval",
     IntervalArray.from_tuples([(1, 2), (3, 4)]),
 ),
 # Sparse
 ([0, 1], "Sparse[int64]", SparseArray([0, 1], dtype="int64")),
 # IntegerNA
 ([1, None], "Int16", integer_array([1, None], dtype="Int16")),
 (pd.Series([1, 2]), None, PandasArray(np.array([1, 2],
                                                dtype=np.int64))),
 # String
 (["a", None], "string", StringArray._from_sequence(["a", None])),
 (
     ["a", None],
     pd.StringDtype(),
     StringArray._from_sequence(["a", None]),
 ),
 # Boolean
 ([True, None], "boolean", BooleanArray._from_sequence([True, None])),
 (
     [True, None],
     pd.BooleanDtype(),
     BooleanArray._from_sequence([True, None]),
示例#38
0
文件: test_array.py 项目: dm36/pandas
         pd.arrays.TimedeltaArray._from_sequence(['1H', '2H'])),

        # Category
        (['a', 'b'], 'category', pd.Categorical(['a', 'b'])),
        (['a', 'b'], pd.CategoricalDtype(
            None, ordered=True), pd.Categorical(['a', 'b'], ordered=True)),

        # Interval
        ([pd.Interval(1, 2), pd.Interval(3, 4)
          ], 'interval', pd.IntervalArray.from_tuples([(1, 2), (3, 4)])),

        # Sparse
        ([0, 1], 'Sparse[int64]', pd.SparseArray([0, 1], dtype='int64')),

        # IntegerNA
        ([1, None], 'Int16', integer_array([1, None], dtype='Int16')),
        (pd.Series([1, 2]), None, PandasArray(np.array([1, 2],
                                                       dtype=np.int64))),

        # Index
        (pd.Index([1, 2]), None, PandasArray(np.array([1, 2], dtype=np.int64))
         ),

        # Series[EA] returns the EA
        (pd.Series(pd.Categorical(['a', 'b'], categories=['a', 'b', 'c'])),
         None, pd.Categorical(['a', 'b'], categories=['a', 'b', 'c'])),

        # "3rd party" EAs work
        ([decimal.Decimal(0), decimal.Decimal(1)], 'decimal', to_decimal(
            [0, 1])),
示例#39
0
def test_integer_array_constructor_none_is_nan(a, b):
    result = integer_array(a)
    expected = integer_array(b)
    tm.assert_extension_array_equal(result, expected)
示例#40
0
def test_repr_array():
    result = repr(integer_array([1, None, 3]))
    expected = "<IntegerArray>\n[1, NaN, 3]\nLength: 3, dtype: Int64"
    assert result == expected
示例#41
0
def data_for_twos(dtype):
    return integer_array(np.ones(100) * 2, dtype=dtype)
示例#42
0
 def test_pow_array(self):
     a = integer_array([0, 0, 0, 1, 1, 1, None, None, None])
     b = integer_array([0, 1, None, 0, 1, None, 0, 1, None])
     result = a**b
     expected = integer_array([1, 0, None, 1, 1, 1, 1, None, None])
     tm.assert_extension_array_equal(result, expected)
示例#43
0
def test_ufunc_reduce_raises(values):
    a = integer_array(values)
    msg = r"The 'reduce' method is not supported."
    with pytest.raises(NotImplementedError, match=msg):
        np.add.reduce(a)
示例#44
0
def data_missing_for_sorting(dtype):
    return integer_array([1, np.nan, 0], dtype=dtype)
示例#45
0
def test_to_integer_array(values, to_dtype, result_dtype):
    # convert existing arrays to IntegerArrays
    result = integer_array(values, dtype=to_dtype)
    assert result.dtype == result_dtype()
    expected = integer_array(values, dtype=result_dtype())
    tm.assert_extension_array_equal(result, expected)
示例#46
0
def data_for_sorting(dtype):
    return integer_array([1, 2, 0], dtype=dtype)
示例#47
0
def data_missing_for_sorting(dtype):
    return integer_array([1, np.nan, 0], dtype=dtype)
示例#48
0
def test_to_integer_array(values, to_dtype, result_dtype):
    # convert existing arrays to IntegerArrays
    result = integer_array(values, dtype=to_dtype)
    assert result.dtype == result_dtype()
    expected = integer_array(values, dtype=result_dtype())
    tm.assert_extension_array_equal(result, expected)
示例#49
0
     pd.arrays.TimedeltaArray._from_sequence(['1H', '2H'])),

    # Category
    (['a', 'b'], 'category', pd.Categorical(['a', 'b'])),
    (['a', 'b'], pd.CategoricalDtype(None, ordered=True),
     pd.Categorical(['a', 'b'], ordered=True)),

    # Interval
    ([pd.Interval(1, 2), pd.Interval(3, 4)], 'interval',
     pd.IntervalArray.from_tuples([(1, 2), (3, 4)])),

    # Sparse
    ([0, 1], 'Sparse[int64]', pd.SparseArray([0, 1], dtype='int64')),

    # IntegerNA
    ([1, None], 'Int16', integer_array([1, None], dtype='Int16')),
    (pd.Series([1, 2]), None, PandasArray(np.array([1, 2], dtype=np.int64))),

    # Index
    (pd.Index([1, 2]), None, PandasArray(np.array([1, 2], dtype=np.int64))),

    # Series[EA] returns the EA
    (pd.Series(pd.Categorical(['a', 'b'], categories=['a', 'b', 'c'])),
     None,
     pd.Categorical(['a', 'b'], categories=['a', 'b', 'c'])),

    # "3rd party" EAs work
    ([decimal.Decimal(0), decimal.Decimal(1)], 'decimal', to_decimal([0, 1])),

    # pass an ExtensionArray, but a different dtype
    (period_array(['2000', '2001'], freq='D'),
示例#50
0
def data(dtype):
    return integer_array(
        list(range(8)) + [np.nan] + list(range(10, 98)) + [np.nan] + [99, 100],
        dtype=dtype,
    )