Пример #1
0
 def test_tdi_iadd_timedeltalike(self, delta):
     # only test adding/sub offsets as + is now numeric
     rng = timedelta_range('1 days', '10 days')
     expected = timedelta_range('1 days 02:00:00', '10 days 02:00:00',
                                freq='D')
     rng += delta
     tm.assert_index_equal(rng, expected)
Пример #2
0
    def test_constructor_coverage(self):
        rng = timedelta_range('1 days', periods=10.5)
        exp = timedelta_range('1 days', periods=10)
        tm.assert_index_equal(rng, exp)

        msg = 'periods must be a number, got foo'
        with pytest.raises(TypeError, match=msg):
            TimedeltaIndex(start='1 days', periods='foo', freq='D')

        pytest.raises(ValueError, TimedeltaIndex, start='1 days',
                      end='10 days')

        with pytest.raises(TypeError):
            TimedeltaIndex('1 days')

        # generator expression
        gen = (timedelta(i) for i in range(10))
        result = TimedeltaIndex(gen)
        expected = TimedeltaIndex([timedelta(i) for i in range(10)])
        tm.assert_index_equal(result, expected)

        # NumPy string array
        strings = np.array(['1 days', '2 days', '3 days'])
        result = TimedeltaIndex(strings)
        expected = to_timedelta([1, 2, 3], unit='d')
        tm.assert_index_equal(result, expected)

        from_ints = TimedeltaIndex(expected.asi8)
        tm.assert_index_equal(from_ints, expected)

        # non-conforming freq
        pytest.raises(ValueError, TimedeltaIndex,
                      ['1 days', '2 days', '4 days'], freq='D')

        pytest.raises(ValueError, TimedeltaIndex, periods=10, freq='D')
Пример #3
0
    def test_delete(self):
        idx = timedelta_range(start='1 Days', periods=5, freq='D', name='idx')

        # prserve freq
        expected_0 = timedelta_range(start='2 Days', periods=4, freq='D',
                                     name='idx')
        expected_4 = timedelta_range(start='1 Days', periods=4, freq='D',
                                     name='idx')

        # reset freq to None
        expected_1 = TimedeltaIndex(
            ['1 day', '3 day', '4 day', '5 day'], freq=None, name='idx')

        cases = {0: expected_0,
                 -5: expected_0,
                 -1: expected_4,
                 4: expected_4,
                 1: expected_1}
        for n, expected in compat.iteritems(cases):
            result = idx.delete(n)
            tm.assert_index_equal(result, expected)
            assert result.name == expected.name
            assert result.freq == expected.freq

        with pytest.raises((IndexError, ValueError)):
            # either depeidnig on numpy version
            result = idx.delete(5)
Пример #4
0
    def test_delete_slice(self):
        idx = timedelta_range(start='1 days', periods=10, freq='D', name='idx')

        # prserve freq
        expected_0_2 = timedelta_range(start='4 days', periods=7, freq='D',
                                       name='idx')
        expected_7_9 = timedelta_range(start='1 days', periods=7, freq='D',
                                       name='idx')

        # reset freq to None
        expected_3_5 = TimedeltaIndex(['1 d', '2 d', '3 d',
                                       '7 d', '8 d', '9 d', '10d'],
                                      freq=None, name='idx')

        cases = {(0, 1, 2): expected_0_2,
                 (7, 8, 9): expected_7_9,
                 (3, 4, 5): expected_3_5}
        for n, expected in compat.iteritems(cases):
            result = idx.delete(n)
            tm.assert_index_equal(result, expected)
            assert result.name == expected.name
            assert result.freq == expected.freq

            result = idx.delete(slice(n[0], n[-1] + 1))
            tm.assert_index_equal(result, expected)
            assert result.name == expected.name
            assert result.freq == expected.freq
Пример #5
0
    def test_categorical_repr_timedelta_ordered(self):
        idx = timedelta_range('1 days', periods=5)
        c = Categorical(idx, ordered=True)
        exp = """[1 days, 2 days, 3 days, 4 days, 5 days]
Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]"""  # noqa

        assert repr(c) == exp

        c = Categorical(idx.append(idx), categories=idx, ordered=True)
        exp = """[1 days, 2 days, 3 days, 4 days, 5 days, 1 days, 2 days, 3 days, 4 days, 5 days]
Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]"""  # noqa

        assert repr(c) == exp

        idx = timedelta_range('1 hours', periods=20)
        c = Categorical(idx, ordered=True)
        exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00]
Length: 20
Categories (20, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 <
                                   3 days 01:00:00 ... 16 days 01:00:00 < 17 days 01:00:00 <
                                   18 days 01:00:00 < 19 days 01:00:00]"""  # noqa

        assert repr(c) == exp

        c = Categorical(idx.append(idx), categories=idx, ordered=True)
        exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00]
Length: 40
Categories (20, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 <
                                   3 days 01:00:00 ... 16 days 01:00:00 < 17 days 01:00:00 <
                                   18 days 01:00:00 < 19 days 01:00:00]"""  # noqa

        assert repr(c) == exp
Пример #6
0
    def test_categorical_series_repr_timedelta(self):
        idx = timedelta_range('1 days', periods=5)
        s = Series(Categorical(idx))
        exp = """0   1 days
1   2 days
2   3 days
3   4 days
4   5 days
dtype: category
Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]"""

        assert repr(s) == exp

        idx = timedelta_range('1 hours', periods=10)
        s = Series(Categorical(idx))
        exp = """0   0 days 01:00:00
1   1 days 01:00:00
2   2 days 01:00:00
3   3 days 01:00:00
4   4 days 01:00:00
5   5 days 01:00:00
6   6 days 01:00:00
7   7 days 01:00:00
8   8 days 01:00:00
9   9 days 01:00:00
dtype: category
Categories (10, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00,
                                   3 days 01:00:00, ..., 6 days 01:00:00, 7 days 01:00:00,
                                   8 days 01:00:00, 9 days 01:00:00]"""  # noqa

        assert repr(s) == exp
Пример #7
0
    def test_getitem(self):
        idx1 = timedelta_range('1 day', '31 day', freq='D', name='idx')

        for idx in [idx1]:
            result = idx[0]
            assert result == Timedelta('1 day')

            result = idx[0:5]
            expected = timedelta_range('1 day', '5 day', freq='D',
                                       name='idx')
            tm.assert_index_equal(result, expected)
            assert result.freq == expected.freq

            result = idx[0:10:2]
            expected = timedelta_range('1 day', '9 day', freq='2D',
                                       name='idx')
            tm.assert_index_equal(result, expected)
            assert result.freq == expected.freq

            result = idx[-20:-5:3]
            expected = timedelta_range('12 day', '24 day', freq='3D',
                                       name='idx')
            tm.assert_index_equal(result, expected)
            assert result.freq == expected.freq

            result = idx[4::-1]
            expected = TimedeltaIndex(['5 day', '4 day', '3 day',
                                       '2 day', '1 day'],
                                      freq='-1D', name='idx')
            tm.assert_index_equal(result, expected)
            assert result.freq == expected.freq
Пример #8
0
 def test_tdi_sub_int(self, one):
     rng = timedelta_range('1 days 09:00:00', freq='H', periods=10)
     with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
         # GH#22535
         result = rng - one
     expected = timedelta_range('1 days 08:00:00', freq='H', periods=10)
     tm.assert_index_equal(result, expected)
Пример #9
0
    def test_categorical_series_repr_timedelta_ordered(self):
        idx = timedelta_range('1 days', periods=5)
        s = Series(Categorical(idx, ordered=True))
        exp = """0   1 days
1   2 days
2   3 days
3   4 days
4   5 days
dtype: category
Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]"""  # noqa

        assert repr(s) == exp

        idx = timedelta_range('1 hours', periods=10)
        s = Series(Categorical(idx, ordered=True))
        exp = """0   0 days 01:00:00
1   1 days 01:00:00
2   2 days 01:00:00
3   3 days 01:00:00
4   4 days 01:00:00
5   5 days 01:00:00
6   6 days 01:00:00
7   7 days 01:00:00
8   8 days 01:00:00
9   9 days 01:00:00
dtype: category
Categories (10, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 <
                                   3 days 01:00:00 ... 6 days 01:00:00 < 7 days 01:00:00 <
                                   8 days 01:00:00 < 9 days 01:00:00]"""  # noqa

        assert repr(s) == exp
Пример #10
0
    def test_constructor_coverage(self):
        rng = timedelta_range('1 days', periods=10.5)
        exp = timedelta_range('1 days', periods=10)
        self.assertTrue(rng.equals(exp))

        self.assertRaises(ValueError, TimedeltaIndex, start='1 days',
                          periods='foo', freq='D')

        self.assertRaises(ValueError, TimedeltaIndex, start='1 days',
                          end='10 days')

        self.assertRaises(ValueError, TimedeltaIndex, '1 days')

        # generator expression
        gen = (timedelta(i) for i in range(10))
        result = TimedeltaIndex(gen)
        expected = TimedeltaIndex([timedelta(i) for i in range(10)])
        self.assertTrue(result.equals(expected))

        # NumPy string array
        strings = np.array(['1 days', '2 days', '3 days'])
        result = TimedeltaIndex(strings)
        expected = to_timedelta([1,2,3],unit='d')
        self.assertTrue(result.equals(expected))

        from_ints = TimedeltaIndex(expected.asi8)
        self.assertTrue(from_ints.equals(expected))

        # non-conforming freq
        self.assertRaises(ValueError, TimedeltaIndex,
                          ['1 days', '2 days', '4 days'],
                          freq='D')

        self.assertRaises(ValueError, TimedeltaIndex, periods=10, freq='D')
Пример #11
0
    def test_value_counts_unique(self):
        # GH 7735

        idx = timedelta_range("1 days 09:00:00", freq="H", periods=10)
        # create repeated values, 'n'th element is repeated by n+1 times
        idx = TimedeltaIndex(np.repeat(idx.values, range(1, len(idx) + 1)))

        exp_idx = timedelta_range("1 days 18:00:00", freq="-1H", periods=10)
        expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64")
        tm.assert_series_equal(idx.value_counts(), expected)

        expected = timedelta_range("1 days 09:00:00", freq="H", periods=10)
        tm.assert_index_equal(idx.unique(), expected)

        idx = TimedeltaIndex(
            ["1 days 09:00:00", "1 days 09:00:00", "1 days 09:00:00", "1 days 08:00:00", "1 days 08:00:00", pd.NaT]
        )

        exp_idx = TimedeltaIndex(["1 days 09:00:00", "1 days 08:00:00"])
        expected = Series([3, 2], index=exp_idx)
        tm.assert_series_equal(idx.value_counts(), expected)

        exp_idx = TimedeltaIndex(["1 days 09:00:00", "1 days 08:00:00", pd.NaT])
        expected = Series([3, 2, 1], index=exp_idx)
        tm.assert_series_equal(idx.value_counts(dropna=False), expected)

        tm.assert_index_equal(idx.unique(), exp_idx)
Пример #12
0
 def test_intersection_zero_length(self, period_1, period_2, sort):
     # GH 24471 test for non overlap the intersection should be zero length
     index_1 = timedelta_range('1 day', periods=period_1, freq='h')
     index_2 = timedelta_range('1 day', periods=period_2, freq='h')
     expected = timedelta_range('1 day', periods=0, freq='h')
     result = index_1.intersection(index_2, sort=sort)
     tm.assert_index_equal(result, expected)
Пример #13
0
    def test_difference_sort(self, sort):

        index = pd.TimedeltaIndex(["5 days", "3 days", "2 days", "4 days",
                                   "1 days", "0 days"])

        other = timedelta_range("1 days", "4 days", freq="D")
        idx_diff = index.difference(other, sort)

        expected = TimedeltaIndex(["5 days", "0 days"], freq=None)

        if sort is None:
            expected = expected.sort_values()

        tm.assert_index_equal(idx_diff, expected)
        tm.assert_attr_equal('freq', idx_diff, expected)

        other = timedelta_range("2 days", "5 days", freq="D")
        idx_diff = index.difference(other, sort)
        expected = TimedeltaIndex(["1 days", "0 days"], freq=None)

        if sort is None:
            expected = expected.sort_values()

        tm.assert_index_equal(idx_diff, expected)
        tm.assert_attr_equal('freq', idx_diff, expected)
Пример #14
0
    def test_value_counts_unique(self):
        # GH 7735

        idx = timedelta_range('1 days 09:00:00', freq='H', periods=10)
        # create repeated values, 'n'th element is repeated by n+1 times
        idx = TimedeltaIndex(np.repeat(idx.values, range(1, len(idx) + 1)))

        exp_idx = timedelta_range('1 days 18:00:00', freq='-1H', periods=10)
        expected = Series(range(10, 0, -1), index=exp_idx, dtype='int64')

        for obj in [idx, Series(idx)]:
            tm.assert_series_equal(obj.value_counts(), expected)

        expected = timedelta_range('1 days 09:00:00', freq='H', periods=10)
        tm.assert_index_equal(idx.unique(), expected)

        idx = TimedeltaIndex(['1 days 09:00:00', '1 days 09:00:00',
                              '1 days 09:00:00', '1 days 08:00:00',
                              '1 days 08:00:00', pd.NaT])

        exp_idx = TimedeltaIndex(['1 days 09:00:00', '1 days 08:00:00'])
        expected = Series([3, 2], index=exp_idx)

        for obj in [idx, Series(idx)]:
            tm.assert_series_equal(obj.value_counts(), expected)

        exp_idx = TimedeltaIndex(['1 days 09:00:00', '1 days 08:00:00',
                                  pd.NaT])
        expected = Series([3, 2, 1], index=exp_idx)

        for obj in [idx, Series(idx)]:
            tm.assert_series_equal(obj.value_counts(dropna=False), expected)

        tm.assert_index_equal(idx.unique(), exp_idx)
Пример #15
0
    def test_sub_isub(self):
        # only test adding/sub offsets as - is now numeric

        # offset
        offsets = [pd.offsets.Hour(2), timedelta(hours=2),
                   np.timedelta64(2, 'h'), Timedelta(hours=2)]

        for delta in offsets:
            rng = timedelta_range('1 days', '10 days')
            result = rng - delta
            expected = timedelta_range('0 days 22:00:00', '9 days 22:00:00')
            tm.assert_index_equal(result, expected)
            rng -= delta
            tm.assert_index_equal(rng, expected)

        # int
        rng = timedelta_range('1 days 09:00:00', freq='H', periods=10)
        result = rng - 1
        expected = timedelta_range('1 days 08:00:00', freq='H', periods=10)
        tm.assert_index_equal(result, expected)
        rng -= 1
        tm.assert_index_equal(rng, expected)

        idx = TimedeltaIndex(['1 day', '2 day'])
        msg = "cannot subtract a datelike from a TimedeltaIndex"
        with tm.assertRaisesRegexp(TypeError, msg):
            idx - Timestamp('2011-01-01')

        result = Timestamp('2011-01-01') + idx
        expected = DatetimeIndex(['2011-01-02', '2011-01-03'])
        tm.assert_index_equal(result, expected)
Пример #16
0
    def test_constructor_coverage(self):
        rng = timedelta_range("1 days", periods=10.5)
        exp = timedelta_range("1 days", periods=10)
        self.assertTrue(rng.equals(exp))

        self.assertRaises(ValueError, TimedeltaIndex, start="1 days", periods="foo", freq="D")

        self.assertRaises(ValueError, TimedeltaIndex, start="1 days", end="10 days")

        self.assertRaises(ValueError, TimedeltaIndex, "1 days")

        # generator expression
        gen = (timedelta(i) for i in range(10))
        result = TimedeltaIndex(gen)
        expected = TimedeltaIndex([timedelta(i) for i in range(10)])
        self.assertTrue(result.equals(expected))

        # NumPy string array
        strings = np.array(["1 days", "2 days", "3 days"])
        result = TimedeltaIndex(strings)
        expected = to_timedelta([1, 2, 3], unit="d")
        self.assertTrue(result.equals(expected))

        from_ints = TimedeltaIndex(expected.asi8)
        self.assertTrue(from_ints.equals(expected))

        # non-conforming freq
        self.assertRaises(ValueError, TimedeltaIndex, ["1 days", "2 days", "4 days"], freq="D")

        self.assertRaises(ValueError, TimedeltaIndex, periods=10, freq="D")
Пример #17
0
 def test_tdi_iadd_int(self, one):
     rng = timedelta_range('1 days 09:00:00', freq='H', periods=10)
     expected = timedelta_range('1 days 10:00:00', freq='H', periods=10)
     with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
         # GH#22535
         rng += one
     tm.assert_index_equal(rng, expected)
Пример #18
0
 def test_zero_length_input_index(self, sort):
     # GH 24966 test for 0-len intersections are copied
     index_1 = timedelta_range('1 day', periods=0, freq='h')
     index_2 = timedelta_range('1 day', periods=3, freq='h')
     result = index_1.intersection(index_2, sort=sort)
     assert index_1 is not result
     assert index_2 is not result
     tm.assert_copy(result, index_1)
Пример #19
0
def test_resample_single_period_timedelta():

    s = Series(list(range(5)), index=pd.timedelta_range(
        '1 day', freq='s', periods=5))
    result = s.resample('2s').sum()
    expected = Series([1, 5, 4], index=pd.timedelta_range(
        '1 day', freq='2s', periods=3))
    assert_series_equal(result, expected)
Пример #20
0
    def test_union_bug_1730(self):

        rng_a = timedelta_range('1 day', periods=4, freq='3H')
        rng_b = timedelta_range('1 day', periods=4, freq='4H')

        result = rng_a.union(rng_b)
        exp = TimedeltaIndex(sorted(set(list(rng_a)) | set(list(rng_b))))
        tm.assert_index_equal(result, exp)
Пример #21
0
    def test_union_bug_1730(self):

        rng_a = timedelta_range("1 day", periods=4, freq="3H")
        rng_b = timedelta_range("1 day", periods=4, freq="4H")

        result = rng_a.union(rng_b)
        exp = TimedeltaIndex(sorted(set(list(rng_a)) | set(list(rng_b))))
        self.assertTrue(result.equals(exp))
Пример #22
0
 def test_tdi_add_int(self, one):
     # Variants of `one` for #19012
     rng = timedelta_range('1 days 09:00:00', freq='H', periods=10)
     with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
         # GH#22535
         result = rng + one
     expected = timedelta_range('1 days 10:00:00', freq='H', periods=10)
     tm.assert_index_equal(result, expected)
Пример #23
0
    def test_union(self):

        i1 = timedelta_range("1day", periods=5)
        i2 = timedelta_range("3day", periods=5)
        result = i1.union(i2)
        expected = timedelta_range("1day", periods=7)
        self.assert_numpy_array_equal(result, expected)

        i1 = Int64Index(np.arange(0, 20, 2))
        i2 = TimedeltaIndex(start="1 day", periods=10, freq="D")
        i1.union(i2)  # Works
        i2.union(i1)  # Fails with "AttributeError: can't set attribute"
Пример #24
0
    def test_union(self):

        i1 = timedelta_range('1day', periods=5)
        i2 = timedelta_range('3day', periods=5)
        result = i1.union(i2)
        expected = timedelta_range('1day', periods=7)
        tm.assert_index_equal(result, expected)

        i1 = Int64Index(np.arange(0, 20, 2))
        i2 = TimedeltaIndex(start='1 day', periods=10, freq='D')
        i1.union(i2)  # Works
        i2.union(i1)  # Fails with "AttributeError: can't set attribute"
Пример #25
0
def test_from_arrays_index_series_timedelta():
    idx1 = pd.timedelta_range('1 days', freq='D', periods=3)
    idx2 = pd.timedelta_range('2 hours', freq='H', periods=3)
    result = pd.MultiIndex.from_arrays([idx1, idx2])
    tm.assert_index_equal(result.get_level_values(0), idx1)
    tm.assert_index_equal(result.get_level_values(1), idx2)

    result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)])
    tm.assert_index_equal(result2.get_level_values(0), idx1)
    tm.assert_index_equal(result2.get_level_values(1), idx2)

    tm.assert_index_equal(result, result2)
Пример #26
0
    def test_intersection_bug_1708(self):
        index_1 = timedelta_range("1 day", periods=4, freq="h")
        index_2 = index_1 + pd.offsets.Hour(5)

        result = index_1 & index_2
        self.assertEqual(len(result), 0)

        index_1 = timedelta_range("1 day", periods=4, freq="h")
        index_2 = index_1 + pd.offsets.Hour(1)

        result = index_1 & index_2
        expected = timedelta_range("1 day 01:00:00", periods=3, freq="h")
        tm.assert_index_equal(result, expected)
Пример #27
0
    def test_intersection_equal(self, sort):
        # GH 24471 Test intersection outcome given the sort keyword
        # for equal indicies intersection should return the original index
        first = timedelta_range('1 day', periods=4, freq='h')
        second = timedelta_range('1 day', periods=4, freq='h')
        intersect = first.intersection(second, sort=sort)
        if sort is None:
            tm.assert_index_equal(intersect, second.sort_values())
        assert tm.equalContents(intersect, second)

        # Corner cases
        inter = first.intersection(first, sort=sort)
        assert inter is first
Пример #28
0
    def test_intersection_bug_1708(self):
        index_1 = timedelta_range('1 day', periods=4, freq='h')
        index_2 = index_1 + pd.offsets.Hour(5)

        result = index_1 & index_2
        assert len(result) == 0

        index_1 = timedelta_range('1 day', periods=4, freq='h')
        index_2 = index_1 + pd.offsets.Hour(1)

        result = index_1 & index_2
        expected = timedelta_range('1 day 01:00:00', periods=3, freq='h')
        tm.assert_index_equal(result, expected)
Пример #29
0
    def test_insert(self):

        idx = TimedeltaIndex(['4day', '1day', '2day'], name='idx')

        result = idx.insert(2, timedelta(days=5))
        exp = TimedeltaIndex(['4day', '1day', '5day', '2day'], name='idx')
        tm.assert_index_equal(result, exp)

        # insertion of non-datetime should coerce to object index
        result = idx.insert(1, 'inserted')
        expected = Index([Timedelta('4day'), 'inserted', Timedelta('1day'),
                          Timedelta('2day')], name='idx')
        assert not isinstance(result, TimedeltaIndex)
        tm.assert_index_equal(result, expected)
        assert result.name == expected.name

        idx = timedelta_range('1day 00:00:01', periods=3, freq='s', name='idx')

        # preserve freq
        expected_0 = TimedeltaIndex(['1day', '1day 00:00:01', '1day 00:00:02',
                                     '1day 00:00:03'],
                                    name='idx', freq='s')
        expected_3 = TimedeltaIndex(['1day 00:00:01', '1day 00:00:02',
                                     '1day 00:00:03', '1day 00:00:04'],
                                    name='idx', freq='s')

        # reset freq to None
        expected_1_nofreq = TimedeltaIndex(['1day 00:00:01', '1day 00:00:01',
                                            '1day 00:00:02', '1day 00:00:03'],
                                           name='idx', freq=None)
        expected_3_nofreq = TimedeltaIndex(['1day 00:00:01', '1day 00:00:02',
                                            '1day 00:00:03', '1day 00:00:05'],
                                           name='idx', freq=None)

        cases = [(0, Timedelta('1day'), expected_0),
                 (-3, Timedelta('1day'), expected_0),
                 (3, Timedelta('1day 00:00:04'), expected_3),
                 (1, Timedelta('1day 00:00:01'), expected_1_nofreq),
                 (3, Timedelta('1day 00:00:05'), expected_3_nofreq)]

        for n, d, expected in cases:
            result = idx.insert(n, d)
            tm.assert_index_equal(result, expected)
            assert result.name == expected.name
            assert result.freq == expected.freq

        # GH 18295 (test missing)
        expected = TimedeltaIndex(['1day', pd.NaT, '2day', '3day'])
        for na in (np.nan, pd.NaT, None):
            result = timedelta_range('1day', '3day').insert(1, na)
            tm.assert_index_equal(result, expected)
Пример #30
0
    def test_categorical_index_repr_timedelta_ordered(self):
        idx = timedelta_range('1 days', periods=5)
        i = CategoricalIndex(Categorical(idx, ordered=True))
        exp = """CategoricalIndex(['1 days', '2 days', '3 days', '4 days', '5 days'], categories=[1 days 00:00:00, 2 days 00:00:00, 3 days 00:00:00, 4 days 00:00:00, 5 days 00:00:00], ordered=True, dtype='category')"""  # noqa
        assert repr(i) == exp

        idx = timedelta_range('1 hours', periods=10)
        i = CategoricalIndex(Categorical(idx, ordered=True))
        exp = """CategoricalIndex(['0 days 01:00:00', '1 days 01:00:00', '2 days 01:00:00',
                  '3 days 01:00:00', '4 days 01:00:00', '5 days 01:00:00',
                  '6 days 01:00:00', '7 days 01:00:00', '8 days 01:00:00',
                  '9 days 01:00:00'],
                 categories=[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, 5 days 01:00:00, 6 days 01:00:00, 7 days 01:00:00, ...], ordered=True, dtype='category')"""  # noqa

        assert repr(i) == exp
Пример #31
0
class TestIntervalIndex:
    index = IntervalIndex.from_arrays([0, 1], [1, 2])

    def create_index(self, closed="right"):
        return IntervalIndex.from_breaks(range(11), closed=closed)

    def create_index_with_nan(self, closed="right"):
        mask = [True, False] + [True] * 8
        return IntervalIndex.from_arrays(
            np.where(mask, np.arange(10), np.nan),
            np.where(mask, np.arange(1, 11), np.nan),
            closed=closed,
        )

    def test_properties(self, closed):
        index = self.create_index(closed=closed)
        assert len(index) == 10
        assert index.size == 10
        assert index.shape == (10, )

        tm.assert_index_equal(index.left, Index(np.arange(10)))
        tm.assert_index_equal(index.right, Index(np.arange(1, 11)))
        tm.assert_index_equal(index.mid, Index(np.arange(0.5, 10.5)))

        assert index.closed == closed

        ivs = [Interval(l, r, closed) for l, r in zip(range(10), range(1, 11))]
        expected = np.array(ivs, dtype=object)
        tm.assert_numpy_array_equal(np.asarray(index), expected)

        # with nans
        index = self.create_index_with_nan(closed=closed)
        assert len(index) == 10
        assert index.size == 10
        assert index.shape == (10, )

        expected_left = Index([0, np.nan, 2, 3, 4, 5, 6, 7, 8, 9])
        expected_right = expected_left + 1
        expected_mid = expected_left + 0.5
        tm.assert_index_equal(index.left, expected_left)
        tm.assert_index_equal(index.right, expected_right)
        tm.assert_index_equal(index.mid, expected_mid)

        assert index.closed == closed

        ivs = [
            Interval(l, r, closed) if notna(l) else np.nan
            for l, r in zip(expected_left, expected_right)
        ]
        expected = np.array(ivs, dtype=object)
        tm.assert_numpy_array_equal(np.asarray(index), expected)

    @pytest.mark.parametrize(
        "breaks",
        [
            [1, 1, 2, 5, 15, 53, 217, 1014, 5335, 31240, 201608],
            [-np.inf, -100, -10, 0.5, 1, 1.5, 3.8, 101, 202, np.inf],
            pd.to_datetime(["20170101", "20170202", "20170303", "20170404"]),
            pd.to_timedelta(["1ns", "2ms", "3s", "4M", "5H", "6D"]),
        ],
    )
    def test_length(self, closed, breaks):
        # GH 18789
        index = IntervalIndex.from_breaks(breaks, closed=closed)
        result = index.length
        expected = Index(iv.length for iv in index)
        tm.assert_index_equal(result, expected)

        # with NA
        index = index.insert(1, np.nan)
        result = index.length
        expected = Index(iv.length if notna(iv) else iv for iv in index)
        tm.assert_index_equal(result, expected)

    def test_with_nans(self, closed):
        index = self.create_index(closed=closed)
        assert index.hasnans is False

        result = index.isna()
        expected = np.zeros(len(index), dtype=bool)
        tm.assert_numpy_array_equal(result, expected)

        result = index.notna()
        expected = np.ones(len(index), dtype=bool)
        tm.assert_numpy_array_equal(result, expected)

        index = self.create_index_with_nan(closed=closed)
        assert index.hasnans is True

        result = index.isna()
        expected = np.array([False, True] + [False] * (len(index) - 2))
        tm.assert_numpy_array_equal(result, expected)

        result = index.notna()
        expected = np.array([True, False] + [True] * (len(index) - 2))
        tm.assert_numpy_array_equal(result, expected)

    def test_copy(self, closed):
        expected = self.create_index(closed=closed)

        result = expected.copy()
        assert result.equals(expected)

        result = expected.copy(deep=True)
        assert result.equals(expected)
        assert result.left is not expected.left

    def test_ensure_copied_data(self, closed):
        # exercise the copy flag in the constructor

        # not copying
        index = self.create_index(closed=closed)
        result = IntervalIndex(index, copy=False)
        tm.assert_numpy_array_equal(index.left.values,
                                    result.left.values,
                                    check_same="same")
        tm.assert_numpy_array_equal(index.right.values,
                                    result.right.values,
                                    check_same="same")

        # by-definition make a copy
        result = IntervalIndex(np.array(index), copy=False)
        tm.assert_numpy_array_equal(index.left.values,
                                    result.left.values,
                                    check_same="copy")
        tm.assert_numpy_array_equal(index.right.values,
                                    result.right.values,
                                    check_same="copy")

    def test_delete(self, closed):
        expected = IntervalIndex.from_breaks(np.arange(1, 11), closed=closed)
        result = self.create_index(closed=closed).delete(0)
        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize(
        "data",
        [
            interval_range(0, periods=10, closed="neither"),
            interval_range(1.7, periods=8, freq=2.5, closed="both"),
            interval_range(Timestamp("20170101"), periods=12, closed="left"),
            interval_range(Timedelta("1 day"), periods=6, closed="right"),
        ],
    )
    def test_insert(self, data):
        item = data[0]
        idx_item = IntervalIndex([item])

        # start
        expected = idx_item.append(data)
        result = data.insert(0, item)
        tm.assert_index_equal(result, expected)

        # end
        expected = data.append(idx_item)
        result = data.insert(len(data), item)
        tm.assert_index_equal(result, expected)

        # mid
        expected = data[:3].append(idx_item).append(data[3:])
        result = data.insert(3, item)
        tm.assert_index_equal(result, expected)

        # invalid type
        msg = "can only insert Interval objects and NA into an IntervalIndex"
        with pytest.raises(ValueError, match=msg):
            data.insert(1, "foo")

        # invalid closed
        msg = "inserted item must be closed on the same side as the index"
        for closed in {"left", "right", "both", "neither"} - {item.closed}:
            with pytest.raises(ValueError, match=msg):
                bad_item = Interval(item.left, item.right, closed=closed)
                data.insert(1, bad_item)

        # GH 18295 (test missing)
        na_idx = IntervalIndex([np.nan], closed=data.closed)
        for na in (np.nan, pd.NaT, None):
            expected = data[:1].append(na_idx).append(data[1:])
            result = data.insert(1, na)
            tm.assert_index_equal(result, expected)

    def test_is_unique_interval(self, closed):
        """
        Interval specific tests for is_unique in addition to base class tests
        """
        # unique overlapping - distinct endpoints
        idx = IntervalIndex.from_tuples([(0, 1), (0.5, 1.5)], closed=closed)
        assert idx.is_unique is True

        # unique overlapping - shared endpoints
        idx = pd.IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)],
                                           closed=closed)
        assert idx.is_unique is True

        # unique nested
        idx = IntervalIndex.from_tuples([(-1, 1), (-2, 2)], closed=closed)
        assert idx.is_unique is True

    def test_monotonic(self, closed):
        # increasing non-overlapping
        idx = IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)],
                                        closed=closed)
        assert idx.is_monotonic is True
        assert idx._is_strictly_monotonic_increasing is True
        assert idx.is_monotonic_decreasing is False
        assert idx._is_strictly_monotonic_decreasing is False

        # decreasing non-overlapping
        idx = IntervalIndex.from_tuples([(4, 5), (2, 3), (1, 2)],
                                        closed=closed)
        assert idx.is_monotonic is False
        assert idx._is_strictly_monotonic_increasing is False
        assert idx.is_monotonic_decreasing is True
        assert idx._is_strictly_monotonic_decreasing is True

        # unordered non-overlapping
        idx = IntervalIndex.from_tuples([(0, 1), (4, 5), (2, 3)],
                                        closed=closed)
        assert idx.is_monotonic is False
        assert idx._is_strictly_monotonic_increasing is False
        assert idx.is_monotonic_decreasing is False
        assert idx._is_strictly_monotonic_decreasing is False

        # increasing overlapping
        idx = IntervalIndex.from_tuples([(0, 2), (0.5, 2.5), (1, 3)],
                                        closed=closed)
        assert idx.is_monotonic is True
        assert idx._is_strictly_monotonic_increasing is True
        assert idx.is_monotonic_decreasing is False
        assert idx._is_strictly_monotonic_decreasing is False

        # decreasing overlapping
        idx = IntervalIndex.from_tuples([(1, 3), (0.5, 2.5), (0, 2)],
                                        closed=closed)
        assert idx.is_monotonic is False
        assert idx._is_strictly_monotonic_increasing is False
        assert idx.is_monotonic_decreasing is True
        assert idx._is_strictly_monotonic_decreasing is True

        # unordered overlapping
        idx = IntervalIndex.from_tuples([(0.5, 2.5), (0, 2), (1, 3)],
                                        closed=closed)
        assert idx.is_monotonic is False
        assert idx._is_strictly_monotonic_increasing is False
        assert idx.is_monotonic_decreasing is False
        assert idx._is_strictly_monotonic_decreasing is False

        # increasing overlapping shared endpoints
        idx = pd.IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)],
                                           closed=closed)
        assert idx.is_monotonic is True
        assert idx._is_strictly_monotonic_increasing is True
        assert idx.is_monotonic_decreasing is False
        assert idx._is_strictly_monotonic_decreasing is False

        # decreasing overlapping shared endpoints
        idx = pd.IntervalIndex.from_tuples([(2, 3), (1, 3), (1, 2)],
                                           closed=closed)
        assert idx.is_monotonic is False
        assert idx._is_strictly_monotonic_increasing is False
        assert idx.is_monotonic_decreasing is True
        assert idx._is_strictly_monotonic_decreasing is True

        # stationary
        idx = IntervalIndex.from_tuples([(0, 1), (0, 1)], closed=closed)
        assert idx.is_monotonic is True
        assert idx._is_strictly_monotonic_increasing is False
        assert idx.is_monotonic_decreasing is True
        assert idx._is_strictly_monotonic_decreasing is False

        # empty
        idx = IntervalIndex([], closed=closed)
        assert idx.is_monotonic is True
        assert idx._is_strictly_monotonic_increasing is True
        assert idx.is_monotonic_decreasing is True
        assert idx._is_strictly_monotonic_decreasing is True

    def test_get_item(self, closed):
        i = IntervalIndex.from_arrays((0, 1, np.nan), (1, 2, np.nan),
                                      closed=closed)
        assert i[0] == Interval(0.0, 1.0, closed=closed)
        assert i[1] == Interval(1.0, 2.0, closed=closed)
        assert isna(i[2])

        result = i[0:1]
        expected = IntervalIndex.from_arrays((0.0, ), (1.0, ), closed=closed)
        tm.assert_index_equal(result, expected)

        result = i[0:2]
        expected = IntervalIndex.from_arrays((0.0, 1), (1.0, 2.0),
                                             closed=closed)
        tm.assert_index_equal(result, expected)

        result = i[1:3]
        expected = IntervalIndex.from_arrays((1.0, np.nan), (2.0, np.nan),
                                             closed=closed)
        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize(
        "breaks",
        [
            date_range("20180101", periods=4),
            date_range("20180101", periods=4, tz="US/Eastern"),
            timedelta_range("0 days", periods=4),
        ],
        ids=lambda x: str(x.dtype),
    )
    def test_maybe_convert_i8(self, breaks):
        # GH 20636
        index = IntervalIndex.from_breaks(breaks)

        # intervalindex
        result = index._maybe_convert_i8(index)
        expected = IntervalIndex.from_breaks(breaks.asi8)
        tm.assert_index_equal(result, expected)

        # interval
        interval = Interval(breaks[0], breaks[1])
        result = index._maybe_convert_i8(interval)
        expected = Interval(breaks[0].value, breaks[1].value)
        assert result == expected

        # datetimelike index
        result = index._maybe_convert_i8(breaks)
        expected = Index(breaks.asi8)
        tm.assert_index_equal(result, expected)

        # datetimelike scalar
        result = index._maybe_convert_i8(breaks[0])
        expected = breaks[0].value
        assert result == expected

        # list-like of datetimelike scalars
        result = index._maybe_convert_i8(list(breaks))
        expected = Index(breaks.asi8)
        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize(
        "breaks",
        [
            date_range("2018-01-01", periods=5),
            timedelta_range("0 days", periods=5)
        ],
    )
    def test_maybe_convert_i8_nat(self, breaks):
        # GH 20636
        index = IntervalIndex.from_breaks(breaks)

        to_convert = breaks._constructor([pd.NaT] * 3)
        expected = pd.Float64Index([np.nan] * 3)
        result = index._maybe_convert_i8(to_convert)
        tm.assert_index_equal(result, expected)

        to_convert = to_convert.insert(0, breaks[0])
        expected = expected.insert(0, float(breaks[0].value))
        result = index._maybe_convert_i8(to_convert)
        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize(
        "breaks",
        [np.arange(5, dtype="int64"),
         np.arange(5, dtype="float64")],
        ids=lambda x: str(x.dtype),
    )
    @pytest.mark.parametrize(
        "make_key",
        [
            IntervalIndex.from_breaks,
            lambda breaks: Interval(breaks[0], breaks[1]),
            lambda breaks: breaks,
            lambda breaks: breaks[0],
            list,
        ],
        ids=["IntervalIndex", "Interval", "Index", "scalar", "list"],
    )
    def test_maybe_convert_i8_numeric(self, breaks, make_key):
        # GH 20636
        index = IntervalIndex.from_breaks(breaks)
        key = make_key(breaks)

        # no conversion occurs for numeric
        result = index._maybe_convert_i8(key)
        assert result is key

    @pytest.mark.parametrize(
        "breaks1, breaks2",
        permutations(
            [
                date_range("20180101", periods=4),
                date_range("20180101", periods=4, tz="US/Eastern"),
                timedelta_range("0 days", periods=4),
            ],
            2,
        ),
        ids=lambda x: str(x.dtype),
    )
    @pytest.mark.parametrize(
        "make_key",
        [
            IntervalIndex.from_breaks,
            lambda breaks: Interval(breaks[0], breaks[1]),
            lambda breaks: breaks,
            lambda breaks: breaks[0],
            list,
        ],
        ids=["IntervalIndex", "Interval", "Index", "scalar", "list"],
    )
    def test_maybe_convert_i8_errors(self, breaks1, breaks2, make_key):
        # GH 20636
        index = IntervalIndex.from_breaks(breaks1)
        key = make_key(breaks2)

        msg = (
            f"Cannot index an IntervalIndex of subtype {breaks1.dtype} with "
            f"values of dtype {breaks2.dtype}")
        msg = re.escape(msg)
        with pytest.raises(ValueError, match=msg):
            index._maybe_convert_i8(key)

    def test_contains_method(self):
        # can select values that are IN the range of a value
        i = IntervalIndex.from_arrays([0, 1], [1, 2])

        expected = np.array([False, False], dtype="bool")
        actual = i.contains(0)
        tm.assert_numpy_array_equal(actual, expected)
        actual = i.contains(3)
        tm.assert_numpy_array_equal(actual, expected)

        expected = np.array([True, False], dtype="bool")
        actual = i.contains(0.5)
        tm.assert_numpy_array_equal(actual, expected)
        actual = i.contains(1)
        tm.assert_numpy_array_equal(actual, expected)

        # __contains__ not implemented for "interval in interval", follow
        # that for the contains method for now
        with pytest.raises(NotImplementedError,
                           match="contains not implemented for two"):
            i.contains(Interval(0, 1))

    def test_contains_dunder(self):

        index = IntervalIndex.from_arrays([0, 1], [1, 2], closed="right")

        # __contains__ requires perfect matches to intervals.
        assert 0 not in index
        assert 1 not in index
        assert 2 not in index

        assert Interval(0, 1, closed="right") in index
        assert Interval(0, 2, closed="right") not in index
        assert Interval(0, 0.5, closed="right") not in index
        assert Interval(3, 5, closed="right") not in index
        assert Interval(-1, 0, closed="left") not in index
        assert Interval(0, 1, closed="left") not in index
        assert Interval(0, 1, closed="both") not in index

    def test_dropna(self, closed):

        expected = IntervalIndex.from_tuples([(0.0, 1.0), (1.0, 2.0)],
                                             closed=closed)

        ii = IntervalIndex.from_tuples([(0, 1), (1, 2), np.nan], closed=closed)
        result = ii.dropna()
        tm.assert_index_equal(result, expected)

        ii = IntervalIndex.from_arrays([0, 1, np.nan], [1, 2, np.nan],
                                       closed=closed)
        result = ii.dropna()
        tm.assert_index_equal(result, expected)

    def test_non_contiguous(self, closed):
        index = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed)
        target = [0.5, 1.5, 2.5]
        actual = index.get_indexer(target)
        expected = np.array([0, -1, 1], dtype="intp")
        tm.assert_numpy_array_equal(actual, expected)

        assert 1.5 not in index

    def test_isin(self, closed):
        index = self.create_index(closed=closed)

        expected = np.array([True] + [False] * (len(index) - 1))
        result = index.isin(index[:1])
        tm.assert_numpy_array_equal(result, expected)

        result = index.isin([index[0]])
        tm.assert_numpy_array_equal(result, expected)

        other = IntervalIndex.from_breaks(np.arange(-2, 10), closed=closed)
        expected = np.array([True] * (len(index) - 1) + [False])
        result = index.isin(other)
        tm.assert_numpy_array_equal(result, expected)

        result = index.isin(other.tolist())
        tm.assert_numpy_array_equal(result, expected)

        for other_closed in {"right", "left", "both", "neither"}:
            other = self.create_index(closed=other_closed)
            expected = np.repeat(closed == other_closed, len(index))
            result = index.isin(other)
            tm.assert_numpy_array_equal(result, expected)

            result = index.isin(other.tolist())
            tm.assert_numpy_array_equal(result, expected)

    def test_comparison(self):
        actual = Interval(0, 1) < self.index
        expected = np.array([False, True])
        tm.assert_numpy_array_equal(actual, expected)

        actual = Interval(0.5, 1.5) < self.index
        expected = np.array([False, True])
        tm.assert_numpy_array_equal(actual, expected)
        actual = self.index > Interval(0.5, 1.5)
        tm.assert_numpy_array_equal(actual, expected)

        actual = self.index == self.index
        expected = np.array([True, True])
        tm.assert_numpy_array_equal(actual, expected)
        actual = self.index <= self.index
        tm.assert_numpy_array_equal(actual, expected)
        actual = self.index >= self.index
        tm.assert_numpy_array_equal(actual, expected)

        actual = self.index < self.index
        expected = np.array([False, False])
        tm.assert_numpy_array_equal(actual, expected)
        actual = self.index > self.index
        tm.assert_numpy_array_equal(actual, expected)

        actual = self.index == IntervalIndex.from_breaks([0, 1, 2], "left")
        tm.assert_numpy_array_equal(actual, expected)

        actual = self.index == self.index.values
        tm.assert_numpy_array_equal(actual, np.array([True, True]))
        actual = self.index.values == self.index
        tm.assert_numpy_array_equal(actual, np.array([True, True]))
        actual = self.index <= self.index.values
        tm.assert_numpy_array_equal(actual, np.array([True, True]))
        actual = self.index != self.index.values
        tm.assert_numpy_array_equal(actual, np.array([False, False]))
        actual = self.index > self.index.values
        tm.assert_numpy_array_equal(actual, np.array([False, False]))
        actual = self.index.values > self.index
        tm.assert_numpy_array_equal(actual, np.array([False, False]))

        # invalid comparisons
        actual = self.index == 0
        tm.assert_numpy_array_equal(actual, np.array([False, False]))
        actual = self.index == self.index.left
        tm.assert_numpy_array_equal(actual, np.array([False, False]))

        msg = ("not supported between instances of 'int' and "
               "'pandas._libs.interval.Interval'")
        with pytest.raises(TypeError, match=msg):
            self.index > 0
        with pytest.raises(TypeError, match=msg):
            self.index <= 0
        with pytest.raises(TypeError, match=msg):
            self.index > np.arange(2)

        msg = "Lengths must match to compare"
        with pytest.raises(ValueError, match=msg):
            self.index > np.arange(3)

    def test_missing_values(self, closed):
        idx = Index([
            np.nan,
            Interval(0, 1, closed=closed),
            Interval(1, 2, closed=closed)
        ])
        idx2 = IntervalIndex.from_arrays([np.nan, 0, 1], [np.nan, 1, 2],
                                         closed=closed)
        assert idx.equals(idx2)

        msg = ("missing values must be missing in the same location both left "
               "and right sides")
        with pytest.raises(ValueError, match=msg):
            IntervalIndex.from_arrays([np.nan, 0, 1],
                                      np.array([0, 1, 2]),
                                      closed=closed)

        tm.assert_numpy_array_equal(isna(idx), np.array([True, False, False]))

    def test_sort_values(self, closed):
        index = self.create_index(closed=closed)

        result = index.sort_values()
        tm.assert_index_equal(result, index)

        result = index.sort_values(ascending=False)
        tm.assert_index_equal(result, index[::-1])

        # with nan
        index = IntervalIndex([Interval(1, 2), np.nan, Interval(0, 1)])

        result = index.sort_values()
        expected = IntervalIndex([Interval(0, 1), Interval(1, 2), np.nan])
        tm.assert_index_equal(result, expected)

        result = index.sort_values(ascending=False)
        expected = IntervalIndex([np.nan, Interval(1, 2), Interval(0, 1)])
        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize("tz", [None, "US/Eastern"])
    def test_datetime(self, tz):
        start = Timestamp("2000-01-01", tz=tz)
        dates = date_range(start=start, periods=10)
        index = IntervalIndex.from_breaks(dates)

        # test mid
        start = Timestamp("2000-01-01T12:00", tz=tz)
        expected = date_range(start=start, periods=9)
        tm.assert_index_equal(index.mid, expected)

        # __contains__ doesn't check individual points
        assert Timestamp("2000-01-01", tz=tz) not in index
        assert Timestamp("2000-01-01T12", tz=tz) not in index
        assert Timestamp("2000-01-02", tz=tz) not in index
        iv_true = Interval(Timestamp("2000-01-02", tz=tz),
                           Timestamp("2000-01-03", tz=tz))
        iv_false = Interval(Timestamp("1999-12-31", tz=tz),
                            Timestamp("2000-01-01", tz=tz))
        assert iv_true in index
        assert iv_false not in index

        # .contains does check individual points
        assert not index.contains(Timestamp("2000-01-01", tz=tz)).any()
        assert index.contains(Timestamp("2000-01-01T12", tz=tz)).any()
        assert index.contains(Timestamp("2000-01-02", tz=tz)).any()

        # test get_indexer
        start = Timestamp("1999-12-31T12:00", tz=tz)
        target = date_range(start=start, periods=7, freq="12H")
        actual = index.get_indexer(target)
        expected = np.array([-1, -1, 0, 0, 1, 1, 2], dtype="intp")
        tm.assert_numpy_array_equal(actual, expected)

        start = Timestamp("2000-01-08T18:00", tz=tz)
        target = date_range(start=start, periods=7, freq="6H")
        actual = index.get_indexer(target)
        expected = np.array([7, 7, 8, 8, 8, 8, -1], dtype="intp")
        tm.assert_numpy_array_equal(actual, expected)

    def test_append(self, closed):

        index1 = IntervalIndex.from_arrays([0, 1], [1, 2], closed=closed)
        index2 = IntervalIndex.from_arrays([1, 2], [2, 3], closed=closed)

        result = index1.append(index2)
        expected = IntervalIndex.from_arrays([0, 1, 1, 2], [1, 2, 2, 3],
                                             closed=closed)
        tm.assert_index_equal(result, expected)

        result = index1.append([index1, index2])
        expected = IntervalIndex.from_arrays([0, 1, 0, 1, 1, 2],
                                             [1, 2, 1, 2, 2, 3],
                                             closed=closed)
        tm.assert_index_equal(result, expected)

        msg = "Intervals must all be closed on the same side"
        for other_closed in {"left", "right", "both", "neither"} - {closed}:
            index_other_closed = IntervalIndex.from_arrays([0, 1], [1, 2],
                                                           closed=other_closed)
            with pytest.raises(ValueError, match=msg):
                index1.append(index_other_closed)

    def test_is_non_overlapping_monotonic(self, closed):
        # Should be True in all cases
        tpls = [(0, 1), (2, 3), (4, 5), (6, 7)]
        idx = IntervalIndex.from_tuples(tpls, closed=closed)
        assert idx.is_non_overlapping_monotonic is True

        idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed)
        assert idx.is_non_overlapping_monotonic is True

        # Should be False in all cases (overlapping)
        tpls = [(0, 2), (1, 3), (4, 5), (6, 7)]
        idx = IntervalIndex.from_tuples(tpls, closed=closed)
        assert idx.is_non_overlapping_monotonic is False

        idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed)
        assert idx.is_non_overlapping_monotonic is False

        # Should be False in all cases (non-monotonic)
        tpls = [(0, 1), (2, 3), (6, 7), (4, 5)]
        idx = IntervalIndex.from_tuples(tpls, closed=closed)
        assert idx.is_non_overlapping_monotonic is False

        idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed)
        assert idx.is_non_overlapping_monotonic is False

        # Should be False for closed='both', otherwise True (GH16560)
        if closed == "both":
            idx = IntervalIndex.from_breaks(range(4), closed=closed)
            assert idx.is_non_overlapping_monotonic is False
        else:
            idx = IntervalIndex.from_breaks(range(4), closed=closed)
            assert idx.is_non_overlapping_monotonic is True

    @pytest.mark.parametrize(
        "start, shift, na_value",
        [
            (0, 1, np.nan),
            (Timestamp("2018-01-01"), Timedelta("1 day"), pd.NaT),
            (Timedelta("0 days"), Timedelta("1 day"), pd.NaT),
        ],
    )
    def test_is_overlapping(self, start, shift, na_value, closed):
        # GH 23309
        # see test_interval_tree.py for extensive tests; interface tests here

        # non-overlapping
        tuples = [(start + n * shift, start + (n + 1) * shift)
                  for n in (0, 2, 4)]
        index = IntervalIndex.from_tuples(tuples, closed=closed)
        assert index.is_overlapping is False

        # non-overlapping with NA
        tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
        index = IntervalIndex.from_tuples(tuples, closed=closed)
        assert index.is_overlapping is False

        # overlapping
        tuples = [(start + n * shift, start + (n + 2) * shift)
                  for n in range(3)]
        index = IntervalIndex.from_tuples(tuples, closed=closed)
        assert index.is_overlapping is True

        # overlapping with NA
        tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
        index = IntervalIndex.from_tuples(tuples, closed=closed)
        assert index.is_overlapping is True

        # common endpoints
        tuples = [(start + n * shift, start + (n + 1) * shift)
                  for n in range(3)]
        index = IntervalIndex.from_tuples(tuples, closed=closed)
        result = index.is_overlapping
        expected = closed == "both"
        assert result is expected

        # common endpoints with NA
        tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
        index = IntervalIndex.from_tuples(tuples, closed=closed)
        result = index.is_overlapping
        assert result is expected

    @pytest.mark.parametrize(
        "tuples",
        [
            list(zip(range(10), range(1, 11))),
            list(
                zip(
                    date_range("20170101", periods=10),
                    date_range("20170101", periods=10),
                )),
            list(
                zip(
                    timedelta_range("0 days", periods=10),
                    timedelta_range("1 day", periods=10),
                )),
        ],
    )
    def test_to_tuples(self, tuples):
        # GH 18756
        idx = IntervalIndex.from_tuples(tuples)
        result = idx.to_tuples()
        expected = Index(com.asarray_tuplesafe(tuples))
        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize(
        "tuples",
        [
            list(zip(range(10), range(1, 11))) + [np.nan],
            list(
                zip(
                    date_range("20170101", periods=10),
                    date_range("20170101", periods=10),
                )) + [np.nan],
            list(
                zip(
                    timedelta_range("0 days", periods=10),
                    timedelta_range("1 day", periods=10),
                )) + [np.nan],
        ],
    )
    @pytest.mark.parametrize("na_tuple", [True, False])
    def test_to_tuples_na(self, tuples, na_tuple):
        # GH 18756
        idx = IntervalIndex.from_tuples(tuples)
        result = idx.to_tuples(na_tuple=na_tuple)

        # check the non-NA portion
        expected_notna = Index(com.asarray_tuplesafe(tuples[:-1]))
        result_notna = result[:-1]
        tm.assert_index_equal(result_notna, expected_notna)

        # check the NA portion
        result_na = result[-1]
        if na_tuple:
            assert isinstance(result_na, tuple)
            assert len(result_na) == 2
            assert all(isna(x) for x in result_na)
        else:
            assert isna(result_na)

    def test_nbytes(self):
        # GH 19209
        left = np.arange(0, 4, dtype="i8")
        right = np.arange(1, 5, dtype="i8")

        result = IntervalIndex.from_arrays(left, right).nbytes
        expected = 64  # 4 * 8 * 2
        assert result == expected

    @pytest.mark.parametrize("new_closed",
                             ["left", "right", "both", "neither"])
    def test_set_closed(self, name, closed, new_closed):
        # GH 21670
        index = interval_range(0, 5, closed=closed, name=name)
        result = index.set_closed(new_closed)
        expected = interval_range(0, 5, closed=new_closed, name=name)
        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize("bad_closed", ["foo", 10, "LEFT", True, False])
    def test_set_closed_errors(self, bad_closed):
        # GH 21670
        index = interval_range(0, 5)
        msg = f"invalid option for 'closed': {bad_closed}"
        with pytest.raises(ValueError, match=msg):
            index.set_closed(bad_closed)

    def test_is_all_dates(self):
        # GH 23576
        year_2017 = pd.Interval(pd.Timestamp("2017-01-01 00:00:00"),
                                pd.Timestamp("2018-01-01 00:00:00"))
        year_2017_index = pd.IntervalIndex([year_2017])
        assert not year_2017_index.is_all_dates

    @pytest.mark.parametrize("key", [[5], (2, 3)])
    def test_get_value_non_scalar_errors(self, key):
        # GH 31117
        idx = IntervalIndex.from_tuples([(1, 3), (2, 4), (3, 5), (7, 10),
                                         (3, 10)])
        s = pd.Series(range(len(idx)), index=idx)

        msg = str(key)
        with pytest.raises(InvalidIndexError, match=msg):
            with tm.assert_produces_warning(FutureWarning):
                idx.get_value(s, key)
Пример #32
0
 def test_unknown_attribute(self):
     # see gh-9680
     tdi = pd.timedelta_range(start=0, periods=10, freq='1s')
     ts = pd.Series(np.random.normal(size=10), index=tdi)
     assert 'foo' not in ts.__dict__.keys()
     pytest.raises(AttributeError, lambda: ts.foo)
Пример #33
0
class TestCategoricalConstructors:
    def test_validate_ordered(self):
        # see gh-14058
        exp_msg = "'ordered' must either be 'True' or 'False'"
        exp_err = TypeError

        # This should be a boolean.
        ordered = np.array([0, 1, 2])

        with pytest.raises(exp_err, match=exp_msg):
            Categorical([1, 2, 3], ordered=ordered)

        with pytest.raises(exp_err, match=exp_msg):
            Categorical.from_codes([0, 0, 1],
                                   categories=["a", "b", "c"],
                                   ordered=ordered)

    def test_constructor_empty(self):
        # GH 17248
        c = Categorical([])
        expected = Index([])
        tm.assert_index_equal(c.categories, expected)

        c = Categorical([], categories=[1, 2, 3])
        expected = pd.Int64Index([1, 2, 3])
        tm.assert_index_equal(c.categories, expected)

    def test_constructor_empty_boolean(self):
        # see gh-22702
        cat = Categorical([], categories=[True, False])
        categories = sorted(cat.categories.tolist())
        assert categories == [False, True]

    def test_constructor_tuples(self):
        values = np.array([(1, ), (1, 2), (1, ), (1, 2)], dtype=object)
        result = Categorical(values)
        expected = Index([(1, ), (1, 2)], tupleize_cols=False)
        tm.assert_index_equal(result.categories, expected)
        assert result.ordered is False

    def test_constructor_tuples_datetimes(self):
        # numpy will auto reshape when all of the tuples are the
        # same len, so add an extra one with 2 items and slice it off
        values = np.array(
            [
                (Timestamp("2010-01-01"), ),
                (Timestamp("2010-01-02"), ),
                (Timestamp("2010-01-01"), ),
                (Timestamp("2010-01-02"), ),
                ("a", "b"),
            ],
            dtype=object,
        )[:-1]
        result = Categorical(values)
        expected = Index(
            [(Timestamp("2010-01-01"), ), (Timestamp("2010-01-02"), )],
            tupleize_cols=False,
        )
        tm.assert_index_equal(result.categories, expected)

    def test_constructor_unsortable(self):

        # it works!
        arr = np.array([1, 2, 3, datetime.now()], dtype="O")
        factor = Categorical(arr, ordered=False)
        assert not factor.ordered

        # this however will raise as cannot be sorted
        msg = ("'values' is not ordered, please explicitly specify the "
               "categories order by passing in a categories argument.")
        with pytest.raises(TypeError, match=msg):
            Categorical(arr, ordered=True)

    def test_constructor_interval(self):
        result = Categorical(
            [Interval(1, 2), Interval(2, 3),
             Interval(3, 6)], ordered=True)
        ii = IntervalIndex([Interval(1, 2), Interval(2, 3), Interval(3, 6)])
        exp = Categorical(ii, ordered=True)
        tm.assert_categorical_equal(result, exp)
        tm.assert_index_equal(result.categories, ii)

    def test_constructor(self):

        exp_arr = np.array(["a", "b", "c", "a", "b", "c"], dtype=np.object_)
        c1 = Categorical(exp_arr)
        tm.assert_numpy_array_equal(c1.__array__(), exp_arr)
        c2 = Categorical(exp_arr, categories=["a", "b", "c"])
        tm.assert_numpy_array_equal(c2.__array__(), exp_arr)
        c2 = Categorical(exp_arr, categories=["c", "b", "a"])
        tm.assert_numpy_array_equal(c2.__array__(), exp_arr)

        # categories must be unique
        msg = "Categorical categories must be unique"
        with pytest.raises(ValueError, match=msg):
            Categorical([1, 2], [1, 2, 2])

        with pytest.raises(ValueError, match=msg):
            Categorical(["a", "b"], ["a", "b", "b"])

        # The default should be unordered
        c1 = Categorical(["a", "b", "c", "a"])
        assert not c1.ordered

        # Categorical as input
        c1 = Categorical(["a", "b", "c", "a"])
        c2 = Categorical(c1)
        tm.assert_categorical_equal(c1, c2)

        c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
        c2 = Categorical(c1)
        tm.assert_categorical_equal(c1, c2)

        c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"])
        c2 = Categorical(c1)
        tm.assert_categorical_equal(c1, c2)

        c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"])
        c2 = Categorical(c1, categories=["a", "b", "c"])
        tm.assert_numpy_array_equal(c1.__array__(), c2.__array__())
        tm.assert_index_equal(c2.categories, Index(["a", "b", "c"]))

        # Series of dtype category
        c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
        c2 = Categorical(Series(c1))
        tm.assert_categorical_equal(c1, c2)

        c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"])
        c2 = Categorical(Series(c1))
        tm.assert_categorical_equal(c1, c2)

        # Series
        c1 = Categorical(["a", "b", "c", "a"])
        c2 = Categorical(Series(["a", "b", "c", "a"]))
        tm.assert_categorical_equal(c1, c2)

        c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
        c2 = Categorical(Series(["a", "b", "c", "a"]),
                         categories=["a", "b", "c", "d"])
        tm.assert_categorical_equal(c1, c2)

        # This should result in integer categories, not float!
        cat = Categorical([1, 2, 3, np.nan], categories=[1, 2, 3])
        assert is_integer_dtype(cat.categories)

        # https://github.com/pandas-dev/pandas/issues/3678
        cat = Categorical([np.nan, 1, 2, 3])
        assert is_integer_dtype(cat.categories)

        # this should result in floats
        cat = Categorical([np.nan, 1, 2.0, 3])
        assert is_float_dtype(cat.categories)

        cat = Categorical([np.nan, 1.0, 2.0, 3.0])
        assert is_float_dtype(cat.categories)

        # This doesn't work -> this would probably need some kind of "remember
        # the original type" feature to try to cast the array interface result
        # to...

        # vals = np.asarray(cat[cat.notna()])
        # assert is_integer_dtype(vals)

        # corner cases
        cat = Categorical([1])
        assert len(cat.categories) == 1
        assert cat.categories[0] == 1
        assert len(cat.codes) == 1
        assert cat.codes[0] == 0

        cat = Categorical(["a"])
        assert len(cat.categories) == 1
        assert cat.categories[0] == "a"
        assert len(cat.codes) == 1
        assert cat.codes[0] == 0

        # Scalars should be converted to lists
        cat = Categorical(1)
        assert len(cat.categories) == 1
        assert cat.categories[0] == 1
        assert len(cat.codes) == 1
        assert cat.codes[0] == 0

        # two arrays
        #  - when the first is an integer dtype and the second is not
        #  - when the resulting codes are all -1/NaN
        with tm.assert_produces_warning(None):
            c_old = Categorical([0, 1, 2, 0, 1, 2], categories=["a", "b", "c"])

        with tm.assert_produces_warning(None):
            c_old = Categorical([0, 1, 2, 0, 1, 2], categories=[3, 4,
                                                                5])  # noqa

        # the next one are from the old docs
        with tm.assert_produces_warning(None):
            c_old2 = Categorical([0, 1, 2, 0, 1, 2], [1, 2, 3])  # noqa
            cat = Categorical([1, 2], categories=[1, 2, 3])

        # this is a legitimate constructor
        with tm.assert_produces_warning(None):
            c = Categorical(  # noqa
                np.array([], dtype="int64"),
                categories=[3, 2, 1],
                ordered=True)

    def test_constructor_with_existing_categories(self):
        # GH25318: constructing with pd.Series used to bogusly skip recoding
        # categories
        c0 = Categorical(["a", "b", "c", "a"])
        c1 = Categorical(["a", "b", "c", "a"], categories=["b", "c"])

        c2 = Categorical(c0, categories=c1.categories)
        tm.assert_categorical_equal(c1, c2)

        c3 = Categorical(Series(c0), categories=c1.categories)
        tm.assert_categorical_equal(c1, c3)

    def test_constructor_not_sequence(self):
        # https://github.com/pandas-dev/pandas/issues/16022
        msg = r"^Parameter 'categories' must be list-like, was"
        with pytest.raises(TypeError, match=msg):
            Categorical(["a", "b"], categories="a")

    def test_constructor_with_null(self):

        # Cannot have NaN in categories
        msg = "Categorical categories cannot be null"
        with pytest.raises(ValueError, match=msg):
            Categorical([np.nan, "a", "b", "c"],
                        categories=[np.nan, "a", "b", "c"])

        with pytest.raises(ValueError, match=msg):
            Categorical([None, "a", "b", "c"],
                        categories=[None, "a", "b", "c"])

        with pytest.raises(ValueError, match=msg):
            Categorical(
                DatetimeIndex(["nat", "20160101"]),
                categories=[NaT, Timestamp("20160101")],
            )

    def test_constructor_with_index(self):
        ci = CategoricalIndex(list("aabbca"), categories=list("cab"))
        tm.assert_categorical_equal(ci.values, Categorical(ci))

        ci = CategoricalIndex(list("aabbca"), categories=list("cab"))
        tm.assert_categorical_equal(
            ci.values, Categorical(ci.astype(object),
                                   categories=ci.categories))

    def test_constructor_with_generator(self):
        # This was raising an Error in isna(single_val).any() because isna
        # returned a scalar for a generator

        exp = Categorical([0, 1, 2])
        cat = Categorical(x for x in [0, 1, 2])
        tm.assert_categorical_equal(cat, exp)
        cat = Categorical(range(3))
        tm.assert_categorical_equal(cat, exp)

        MultiIndex.from_product([range(5), ["a", "b", "c"]])

        # check that categories accept generators and sequences
        cat = Categorical([0, 1, 2], categories=(x for x in [0, 1, 2]))
        tm.assert_categorical_equal(cat, exp)
        cat = Categorical([0, 1, 2], categories=range(3))
        tm.assert_categorical_equal(cat, exp)

    @pytest.mark.parametrize(
        "dtl",
        [
            date_range("1995-01-01 00:00:00", periods=5, freq="s"),
            date_range(
                "1995-01-01 00:00:00", periods=5, freq="s", tz="US/Eastern"),
            timedelta_range("1 day", periods=5, freq="s"),
        ],
    )
    def test_constructor_with_datetimelike(self, dtl):
        # see gh-12077
        # constructor with a datetimelike and NaT

        s = Series(dtl)
        c = Categorical(s)

        expected = type(dtl)(s)
        expected._data.freq = None

        tm.assert_index_equal(c.categories, expected)
        tm.assert_numpy_array_equal(c.codes, np.arange(5, dtype="int8"))

        # with NaT
        s2 = s.copy()
        s2.iloc[-1] = NaT
        c = Categorical(s2)

        expected = type(dtl)(s2.dropna())
        expected._data.freq = None

        tm.assert_index_equal(c.categories, expected)

        exp = np.array([0, 1, 2, 3, -1], dtype=np.int8)
        tm.assert_numpy_array_equal(c.codes, exp)

        result = repr(c)
        assert "NaT" in result

    def test_constructor_from_index_series_datetimetz(self):
        idx = date_range("2015-01-01 10:00",
                         freq="D",
                         periods=3,
                         tz="US/Eastern")
        idx = idx._with_freq(None)  # freq not preserved in result.categories
        result = Categorical(idx)
        tm.assert_index_equal(result.categories, idx)

        result = Categorical(Series(idx))
        tm.assert_index_equal(result.categories, idx)

    def test_constructor_from_index_series_timedelta(self):
        idx = timedelta_range("1 days", freq="D", periods=3)
        idx = idx._with_freq(None)  # freq not preserved in result.categories
        result = Categorical(idx)
        tm.assert_index_equal(result.categories, idx)

        result = Categorical(Series(idx))
        tm.assert_index_equal(result.categories, idx)

    def test_constructor_from_index_series_period(self):
        idx = period_range("2015-01-01", freq="D", periods=3)
        result = Categorical(idx)
        tm.assert_index_equal(result.categories, idx)

        result = Categorical(Series(idx))
        tm.assert_index_equal(result.categories, idx)

    @pytest.mark.parametrize(
        "values",
        [
            np.array([1.0, 1.2, 1.8, np.nan]),
            np.array([1, 2, 3], dtype="int64"),
            ["a", "b", "c", np.nan],
            [pd.Period("2014-01"),
             pd.Period("2014-02"), NaT],
            [Timestamp("2014-01-01"),
             Timestamp("2014-01-02"), NaT],
            [
                Timestamp("2014-01-01", tz="US/Eastern"),
                Timestamp("2014-01-02", tz="US/Eastern"),
                NaT,
            ],
        ],
    )
    def test_constructor_invariant(self, values):
        # GH 14190
        c = Categorical(values)
        c2 = Categorical(c)
        tm.assert_categorical_equal(c, c2)

    @pytest.mark.parametrize("ordered", [True, False])
    def test_constructor_with_dtype(self, ordered):
        categories = ["b", "a", "c"]
        dtype = CategoricalDtype(categories, ordered=ordered)
        result = Categorical(["a", "b", "a", "c"], dtype=dtype)
        expected = Categorical(["a", "b", "a", "c"],
                               categories=categories,
                               ordered=ordered)
        tm.assert_categorical_equal(result, expected)
        assert result.ordered is ordered

    def test_constructor_dtype_and_others_raises(self):
        dtype = CategoricalDtype(["a", "b"], ordered=True)
        msg = "Cannot specify `categories` or `ordered` together with `dtype`."
        with pytest.raises(ValueError, match=msg):
            Categorical(["a", "b"], categories=["a", "b"], dtype=dtype)

        with pytest.raises(ValueError, match=msg):
            Categorical(["a", "b"], ordered=True, dtype=dtype)

        with pytest.raises(ValueError, match=msg):
            Categorical(["a", "b"], ordered=False, dtype=dtype)

    @pytest.mark.parametrize("categories", [None, ["a", "b"], ["a", "c"]])
    @pytest.mark.parametrize("ordered", [True, False])
    def test_constructor_str_category(self, categories, ordered):
        result = Categorical(["a", "b"],
                             categories=categories,
                             ordered=ordered,
                             dtype="category")
        expected = Categorical(["a", "b"],
                               categories=categories,
                               ordered=ordered)
        tm.assert_categorical_equal(result, expected)

    def test_constructor_str_unknown(self):
        with pytest.raises(ValueError, match="Unknown dtype"):
            Categorical([1, 2], dtype="foo")

    def test_constructor_np_strs(self):
        # GH#31499 Hastable.map_locations needs to work on np.str_ objects
        cat = Categorical(["1", "0", "1"], [np.str_("0"), np.str_("1")])
        assert all(isinstance(x, np.str_) for x in cat.categories)

    def test_constructor_from_categorical_with_dtype(self):
        dtype = CategoricalDtype(["a", "b", "c"], ordered=True)
        values = Categorical(["a", "b", "d"])
        result = Categorical(values, dtype=dtype)
        # We use dtype.categories, not values.categories
        expected = Categorical(["a", "b", "d"],
                               categories=["a", "b", "c"],
                               ordered=True)
        tm.assert_categorical_equal(result, expected)

    def test_constructor_from_categorical_with_unknown_dtype(self):
        dtype = CategoricalDtype(None, ordered=True)
        values = Categorical(["a", "b", "d"])
        result = Categorical(values, dtype=dtype)
        # We use values.categories, not dtype.categories
        expected = Categorical(["a", "b", "d"],
                               categories=["a", "b", "d"],
                               ordered=True)
        tm.assert_categorical_equal(result, expected)

    def test_constructor_from_categorical_string(self):
        values = Categorical(["a", "b", "d"])
        # use categories, ordered
        result = Categorical(values,
                             categories=["a", "b", "c"],
                             ordered=True,
                             dtype="category")
        expected = Categorical(["a", "b", "d"],
                               categories=["a", "b", "c"],
                               ordered=True)
        tm.assert_categorical_equal(result, expected)

        # No string
        result = Categorical(values, categories=["a", "b", "c"], ordered=True)
        tm.assert_categorical_equal(result, expected)

    def test_constructor_with_categorical_categories(self):
        # GH17884
        expected = Categorical(["a", "b"], categories=["a", "b", "c"])

        result = Categorical(["a", "b"],
                             categories=Categorical(["a", "b", "c"]))
        tm.assert_categorical_equal(result, expected)

        result = Categorical(["a", "b"],
                             categories=CategoricalIndex(["a", "b", "c"]))
        tm.assert_categorical_equal(result, expected)

    @pytest.mark.parametrize("klass",
                             [lambda x: np.array(x, dtype=object), list])
    def test_construction_with_null(self, klass, nulls_fixture):
        # https://github.com/pandas-dev/pandas/issues/31927
        values = klass(["a", nulls_fixture, "b"])
        result = Categorical(values)

        dtype = CategoricalDtype(["a", "b"])
        codes = [0, -1, 1]
        expected = Categorical.from_codes(codes=codes, dtype=dtype)

        tm.assert_categorical_equal(result, expected)

    def test_from_codes_empty(self):
        cat = ["a", "b", "c"]
        result = Categorical.from_codes([], categories=cat)
        expected = Categorical([], categories=cat)

        tm.assert_categorical_equal(result, expected)

    def test_from_codes_too_few_categories(self):
        dtype = CategoricalDtype(categories=[1, 2])
        msg = "codes need to be between "
        with pytest.raises(ValueError, match=msg):
            Categorical.from_codes([1, 2], categories=dtype.categories)
        with pytest.raises(ValueError, match=msg):
            Categorical.from_codes([1, 2], dtype=dtype)

    def test_from_codes_non_int_codes(self):
        dtype = CategoricalDtype(categories=[1, 2])
        msg = "codes need to be array-like integers"
        with pytest.raises(ValueError, match=msg):
            Categorical.from_codes(["a"], categories=dtype.categories)
        with pytest.raises(ValueError, match=msg):
            Categorical.from_codes(["a"], dtype=dtype)

    def test_from_codes_non_unique_categories(self):
        with pytest.raises(ValueError,
                           match="Categorical categories must be unique"):
            Categorical.from_codes([0, 1, 2], categories=["a", "a", "b"])

    def test_from_codes_nan_cat_included(self):
        with pytest.raises(ValueError,
                           match="Categorical categories cannot be null"):
            Categorical.from_codes([0, 1, 2], categories=["a", "b", np.nan])

    def test_from_codes_too_negative(self):
        dtype = CategoricalDtype(categories=["a", "b", "c"])
        msg = r"codes need to be between -1 and len\(categories\)-1"
        with pytest.raises(ValueError, match=msg):
            Categorical.from_codes([-2, 1, 2], categories=dtype.categories)
        with pytest.raises(ValueError, match=msg):
            Categorical.from_codes([-2, 1, 2], dtype=dtype)

    def test_from_codes(self):
        dtype = CategoricalDtype(categories=["a", "b", "c"])
        exp = Categorical(["a", "b", "c"], ordered=False)
        res = Categorical.from_codes([0, 1, 2], categories=dtype.categories)
        tm.assert_categorical_equal(exp, res)

        res = Categorical.from_codes([0, 1, 2], dtype=dtype)
        tm.assert_categorical_equal(exp, res)

    @pytest.mark.parametrize("klass", [Categorical, CategoricalIndex])
    def test_from_codes_with_categorical_categories(self, klass):
        # GH17884
        expected = Categorical(["a", "b"], categories=["a", "b", "c"])

        result = Categorical.from_codes([0, 1],
                                        categories=klass(["a", "b", "c"]))
        tm.assert_categorical_equal(result, expected)

    @pytest.mark.parametrize("klass", [Categorical, CategoricalIndex])
    def test_from_codes_with_non_unique_categorical_categories(self, klass):
        with pytest.raises(ValueError,
                           match="Categorical categories must be unique"):
            Categorical.from_codes([0, 1], klass(["a", "b", "a"]))

    def test_from_codes_with_nan_code(self):
        # GH21767
        codes = [1, 2, np.nan]
        dtype = CategoricalDtype(categories=["a", "b", "c"])
        with pytest.raises(ValueError,
                           match="codes need to be array-like integers"):
            Categorical.from_codes(codes, categories=dtype.categories)
        with pytest.raises(ValueError,
                           match="codes need to be array-like integers"):
            Categorical.from_codes(codes, dtype=dtype)

    @pytest.mark.parametrize("codes", [[1.0, 2.0, 0], [1.1, 2.0, 0]])
    def test_from_codes_with_float(self, codes):
        # GH21767
        # float codes should raise even if values are equal to integers
        dtype = CategoricalDtype(categories=["a", "b", "c"])

        msg = "codes need to be array-like integers"
        with pytest.raises(ValueError, match=msg):
            Categorical.from_codes(codes, dtype.categories)
        with pytest.raises(ValueError, match=msg):
            Categorical.from_codes(codes, dtype=dtype)

    def test_from_codes_with_dtype_raises(self):
        msg = "Cannot specify"
        with pytest.raises(ValueError, match=msg):
            Categorical.from_codes([0, 1],
                                   categories=["a", "b"],
                                   dtype=CategoricalDtype(["a", "b"]))

        with pytest.raises(ValueError, match=msg):
            Categorical.from_codes([0, 1],
                                   ordered=True,
                                   dtype=CategoricalDtype(["a", "b"]))

    def test_from_codes_neither(self):
        msg = "Both were None"
        with pytest.raises(ValueError, match=msg):
            Categorical.from_codes([0, 1])

    def test_from_codes_with_nullable_int(self):
        codes = pd.array([0, 1], dtype="Int64")
        categories = ["a", "b"]

        result = Categorical.from_codes(codes, categories=categories)
        expected = Categorical.from_codes(codes.to_numpy(int),
                                          categories=categories)

        tm.assert_categorical_equal(result, expected)

    def test_from_codes_with_nullable_int_na_raises(self):
        codes = pd.array([0, None], dtype="Int64")
        categories = ["a", "b"]

        msg = "codes cannot contain NA values"
        with pytest.raises(ValueError, match=msg):
            Categorical.from_codes(codes, categories=categories)

    @pytest.mark.parametrize("dtype", [None, "category"])
    def test_from_inferred_categories(self, dtype):
        cats = ["a", "b"]
        codes = np.array([0, 0, 1, 1], dtype="i8")
        result = Categorical._from_inferred_categories(cats, codes, dtype)
        expected = Categorical.from_codes(codes, cats)
        tm.assert_categorical_equal(result, expected)

    @pytest.mark.parametrize("dtype", [None, "category"])
    def test_from_inferred_categories_sorts(self, dtype):
        cats = ["b", "a"]
        codes = np.array([0, 1, 1, 1], dtype="i8")
        result = Categorical._from_inferred_categories(cats, codes, dtype)
        expected = Categorical.from_codes([1, 0, 0, 0], ["a", "b"])
        tm.assert_categorical_equal(result, expected)

    def test_from_inferred_categories_dtype(self):
        cats = ["a", "b", "d"]
        codes = np.array([0, 1, 0, 2], dtype="i8")
        dtype = CategoricalDtype(["c", "b", "a"], ordered=True)
        result = Categorical._from_inferred_categories(cats, codes, dtype)
        expected = Categorical(["a", "b", "a", "d"],
                               categories=["c", "b", "a"],
                               ordered=True)
        tm.assert_categorical_equal(result, expected)

    def test_from_inferred_categories_coerces(self):
        cats = ["1", "2", "bad"]
        codes = np.array([0, 0, 1, 2], dtype="i8")
        dtype = CategoricalDtype([1, 2])
        result = Categorical._from_inferred_categories(cats, codes, dtype)
        expected = Categorical([1, 1, 2, np.nan])
        tm.assert_categorical_equal(result, expected)

    @pytest.mark.parametrize("ordered", [None, True, False])
    def test_construction_with_ordered(self, ordered):
        # GH 9347, 9190
        cat = Categorical([0, 1, 2], ordered=ordered)
        assert cat.ordered == bool(ordered)

    @pytest.mark.xfail(reason="Imaginary values not supported in Categorical")
    def test_constructor_imaginary(self):
        values = [1, 2, 3 + 1j]
        c1 = Categorical(values)
        tm.assert_index_equal(c1.categories, Index(values))
        tm.assert_numpy_array_equal(np.array(c1), np.array(values))

    def test_constructor_string_and_tuples(self):
        # GH 21416
        c = Categorical(
            np.array(["c", ("a", "b"), ("b", "a"), "c"], dtype=object))
        expected_index = Index([("a", "b"), ("b", "a"), "c"])
        assert c.categories.equals(expected_index)

    def test_interval(self):
        idx = pd.interval_range(0, 10, periods=10)
        cat = Categorical(idx, categories=idx)
        expected_codes = np.arange(10, dtype="int8")
        tm.assert_numpy_array_equal(cat.codes, expected_codes)
        tm.assert_index_equal(cat.categories, idx)

        # infer categories
        cat = Categorical(idx)
        tm.assert_numpy_array_equal(cat.codes, expected_codes)
        tm.assert_index_equal(cat.categories, idx)

        # list values
        cat = Categorical(list(idx))
        tm.assert_numpy_array_equal(cat.codes, expected_codes)
        tm.assert_index_equal(cat.categories, idx)

        # list values, categories
        cat = Categorical(list(idx), categories=list(idx))
        tm.assert_numpy_array_equal(cat.codes, expected_codes)
        tm.assert_index_equal(cat.categories, idx)

        # shuffled
        values = idx.take([1, 2, 0])
        cat = Categorical(values, categories=idx)
        tm.assert_numpy_array_equal(cat.codes, np.array([1, 2, 0],
                                                        dtype="int8"))
        tm.assert_index_equal(cat.categories, idx)

        # extra
        values = pd.interval_range(8, 11, periods=3)
        cat = Categorical(values, categories=idx)
        expected_codes = np.array([8, 9, -1], dtype="int8")
        tm.assert_numpy_array_equal(cat.codes, expected_codes)
        tm.assert_index_equal(cat.categories, idx)

        # overlapping
        idx = pd.IntervalIndex([pd.Interval(0, 2), pd.Interval(0, 1)])
        cat = Categorical(idx, categories=idx)
        expected_codes = np.array([0, 1], dtype="int8")
        tm.assert_numpy_array_equal(cat.codes, expected_codes)
        tm.assert_index_equal(cat.categories, idx)
Пример #34
0
class TestGetLoc:
    @pytest.mark.parametrize("side", ["right", "left", "both", "neither"])
    def test_get_loc_interval(self, closed, side):

        idx = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed)

        for bound in [[0, 1], [1, 2], [2, 3], [3, 4], [0, 2], [2.5, 3],
                      [-1, 4]]:
            # if get_loc is supplied an interval, it should only search
            # for exact matches, not overlaps or covers, else KeyError.
            msg = re.escape(
                f"Interval({bound[0]}, {bound[1]}, closed='{side}')")
            if closed == side:
                if bound == [0, 1]:
                    assert idx.get_loc(Interval(0, 1, closed=side)) == 0
                elif bound == [2, 3]:
                    assert idx.get_loc(Interval(2, 3, closed=side)) == 1
                else:
                    with pytest.raises(KeyError, match=msg):
                        idx.get_loc(Interval(*bound, closed=side))
            else:
                with pytest.raises(KeyError, match=msg):
                    idx.get_loc(Interval(*bound, closed=side))

    @pytest.mark.parametrize("scalar", [-0.5, 0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5])
    def test_get_loc_scalar(self, closed, scalar):

        # correct = {side: {query: answer}}.
        # If query is not in the dict, that query should raise a KeyError
        correct = {
            "right": {
                0.5: 0,
                1: 0,
                2.5: 1,
                3: 1
            },
            "left": {
                0: 0,
                0.5: 0,
                2: 1,
                2.5: 1
            },
            "both": {
                0: 0,
                0.5: 0,
                1: 0,
                2: 1,
                2.5: 1,
                3: 1
            },
            "neither": {
                0.5: 0,
                2.5: 1
            },
        }

        idx = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed)

        # if get_loc is supplied a scalar, it should return the index of
        # the interval which contains the scalar, or KeyError.
        if scalar in correct[closed].keys():
            assert idx.get_loc(scalar) == correct[closed][scalar]
        else:
            with pytest.raises(KeyError, match=str(scalar)):
                idx.get_loc(scalar)

    @pytest.mark.parametrize("scalar", [-1, 0, 0.5, 3, 4.5, 5, 6])
    def test_get_loc_length_one_scalar(self, scalar, closed):
        # GH 20921
        index = IntervalIndex.from_tuples([(0, 5)], closed=closed)
        if scalar in index[0]:
            result = index.get_loc(scalar)
            assert result == 0
        else:
            with pytest.raises(KeyError, match=str(scalar)):
                index.get_loc(scalar)

    @pytest.mark.parametrize("other_closed",
                             ["left", "right", "both", "neither"])
    @pytest.mark.parametrize("left, right", [(0, 5), (-1, 4), (-1, 6), (6, 7)])
    def test_get_loc_length_one_interval(self, left, right, closed,
                                         other_closed):
        # GH 20921
        index = IntervalIndex.from_tuples([(0, 5)], closed=closed)
        interval = Interval(left, right, closed=other_closed)
        if interval == index[0]:
            result = index.get_loc(interval)
            assert result == 0
        else:
            with pytest.raises(
                    KeyError,
                    match=re.escape(
                        f"Interval({left}, {right}, closed='{other_closed}')"),
            ):
                index.get_loc(interval)

    # Make consistent with test_interval_new.py (see #16316, #16386)
    @pytest.mark.parametrize(
        "breaks",
        [
            date_range("20180101", periods=4),
            date_range("20180101", periods=4, tz="US/Eastern"),
            timedelta_range("0 days", periods=4),
        ],
        ids=lambda x: str(x.dtype),
    )
    def test_get_loc_datetimelike_nonoverlapping(self, breaks):
        # GH 20636
        # nonoverlapping = IntervalIndex method and no i8 conversion
        index = IntervalIndex.from_breaks(breaks)

        value = index[0].mid
        result = index.get_loc(value)
        expected = 0
        assert result == expected

        interval = Interval(index[0].left, index[0].right)
        result = index.get_loc(interval)
        expected = 0
        assert result == expected

    @pytest.mark.parametrize(
        "arrays",
        [
            (date_range("20180101",
                        periods=4), date_range("20180103", periods=4)),
            (
                date_range("20180101", periods=4, tz="US/Eastern"),
                date_range("20180103", periods=4, tz="US/Eastern"),
            ),
            (
                timedelta_range("0 days", periods=4),
                timedelta_range("2 days", periods=4),
            ),
        ],
        ids=lambda x: str(x[0].dtype),
    )
    def test_get_loc_datetimelike_overlapping(self, arrays):
        # GH 20636
        index = IntervalIndex.from_arrays(*arrays)

        value = index[0].mid + Timedelta("12 hours")
        result = index.get_loc(value)
        expected = slice(0, 2, None)
        assert result == expected

        interval = Interval(index[0].left, index[0].right)
        result = index.get_loc(interval)
        expected = 0
        assert result == expected

    @pytest.mark.parametrize(
        "values",
        [
            date_range("2018-01-04", periods=4, freq="-1D"),
            date_range("2018-01-04", periods=4, freq="-1D", tz="US/Eastern"),
            timedelta_range("3 days", periods=4, freq="-1D"),
            np.arange(3.0, -1.0, -1.0),
            np.arange(3, -1, -1),
        ],
        ids=lambda x: str(x.dtype),
    )
    def test_get_loc_decreasing(self, values):
        # GH 25860
        index = IntervalIndex.from_arrays(values[1:], values[:-1])
        result = index.get_loc(index[0])
        expected = 0
        assert result == expected

    @pytest.mark.parametrize("key", [[5], (2, 3)])
    def test_get_loc_non_scalar_errors(self, key):
        # GH 31117
        idx = IntervalIndex.from_tuples([(1, 3), (2, 4), (3, 5), (7, 10),
                                         (3, 10)])

        msg = str(key)
        with pytest.raises(InvalidIndexError, match=msg):
            idx.get_loc(key)

    def test_get_indexer_with_nans(self):
        # GH#41831
        index = IntervalIndex([np.nan, Interval(1, 2), np.nan])

        expected = np.array([True, False, True])
        for key in [None, np.nan, NA]:
            assert key in index
            result = index.get_loc(key)
            tm.assert_numpy_array_equal(result, expected)

        for key in [
                NaT,
                np.timedelta64("NaT", "ns"),
                np.datetime64("NaT", "ns")
        ]:
            with pytest.raises(KeyError, match=str(key)):
                index.get_loc(key)
Пример #35
0
            return 'Samsung'
    return x


df.insert(0, 'inc', range(1, len(df) + 1))
df['lastTimeSeen'] = pd.to_datetime(df['lastTimeSeen'],
                                    format='%Y-%m-%d %H:%M:%S')
df['firstTimeSeen'] = pd.to_datetime(df['firstTimeSeen'],
                                     format='%Y-%m-%d %H:%M:%S')
df['stationMacVendor'] = df['stationMacVendor'].map(clean_manufacturer)
df['date'] = df['firstTimeSeen'].map(
    lambda x: datetime.strftime(x, '%d-%m-%Y'))
df['time'] = df['firstTimeSeen'].map(
    lambda x: datetime.strftime(x, '%H:%M:%S'))

tbins = pd.timedelta_range(0, periods=25, freq='H')
tslots = pd.cut(pd.to_timedelta(df['time']), bins=tbins)
dfheatmap = df.replace(0,
                       np.nan).groupby([tslots,
                                        pd.cut(df['packets'],
                                               1)]).packets.mean().unstack()

df['power'] = df['power'].map(lambda x: '{0:,.0f}'.format(x))
df['packets'] = df['packets'].map(lambda x: '{0:,.0f}'.format(x))
df['packetsAvg'] = df['packetsAvg'].map(lambda x: '{0:,.1f}'.format(x))
df['powerAvg'] = df['powerAvg'].apply(lambda x: '{0:,.1f}'.format(x))
df['packetsTotal'] = df['packetsTotal'].map(lambda x: '{0:,.0f}'.format(x))
df['timeSeen'] = df['timeSeen'].map(lambda x: '{0:,.0f}'.format(x))
#df['timeSeenTotal'] = df['timeSeenTotal'].map(lambda x: '{0:,}'.format(x))

df.set_index('lastTimeSeen', inplace=True)
Пример #36
0
    Timedelta,
    Timestamp,
    date_range,
    period_range,
    timedelta_range,
)
import pandas._testing as tm
from pandas.core.arrays import IntervalArray


@pytest.fixture(
    params=[
        (Index([0, 2, 4, 4]), Index([1, 3, 5, 8])),
        (Index([0.0, 1.0, 2.0, np.nan]), Index([1.0, 2.0, 3.0, np.nan])),
        (
            timedelta_range("0 days", periods=3).insert(4, pd.NaT),
            timedelta_range("1 day", periods=3).insert(4, pd.NaT),
        ),
        (
            date_range("20170101", periods=3).insert(4, pd.NaT),
            date_range("20170102", periods=3).insert(4, pd.NaT),
        ),
        (
            date_range("20170101", periods=3,
                       tz="US/Eastern").insert(4, pd.NaT),
            date_range("20170102", periods=3,
                       tz="US/Eastern").insert(4, pd.NaT),
        ),
    ],
    ids=lambda x: str(x[0].dtype),
)
Пример #37
0
def test_arg_passthru():
    # make sure that we are passing thru kwargs
    # to our agg functions

    # GH3668
    # GH5724
    df = DataFrame(
        {
            "group": [1, 1, 2],
            "int": [1, 2, 3],
            "float": [4.0, 5.0, 6.0],
            "string": list("abc"),
            "category_string": Series(list("abc")).astype("category"),
            "category_int": [7, 8, 9],
            "datetime": pd.date_range("20130101", periods=3),
            "datetimetz": pd.date_range("20130101", periods=3,
                                        tz="US/Eastern"),
            "timedelta": pd.timedelta_range("1 s", periods=3, freq="s"),
        },
        columns=[
            "group",
            "int",
            "float",
            "string",
            "category_string",
            "category_int",
            "datetime",
            "datetimetz",
            "timedelta",
        ],
    )

    expected_columns_numeric = Index(["int", "float", "category_int"])

    # mean / median
    expected = DataFrame(
        {
            "category_int": [7.5, 9],
            "float": [4.5, 6.0],
            "timedelta": [pd.Timedelta("1.5s"),
                          pd.Timedelta("3s")],
            "int": [1.5, 3],
            "datetime": [
                Timestamp("2013-01-01 12:00:00"),
                Timestamp("2013-01-03 00:00:00"),
            ],
            "datetimetz": [
                Timestamp("2013-01-01 12:00:00", tz="US/Eastern"),
                Timestamp("2013-01-03 00:00:00", tz="US/Eastern"),
            ],
        },
        index=Index([1, 2], name="group"),
        columns=[
            "int", "float", "category_int", "datetime", "datetimetz",
            "timedelta"
        ],
    )

    for attr in ["mean", "median"]:
        result = getattr(df.groupby("group"), attr)()
        tm.assert_index_equal(result.columns, expected_columns_numeric)

        result = getattr(df.groupby("group"), attr)(numeric_only=False)
        tm.assert_frame_equal(result.reindex_like(expected), expected)

    # TODO: min, max *should* handle
    # categorical (ordered) dtype
    expected_columns = Index([
        "int",
        "float",
        "string",
        "category_int",
        "datetime",
        "datetimetz",
        "timedelta",
    ])
    for attr in ["min", "max"]:
        result = getattr(df.groupby("group"), attr)()
        tm.assert_index_equal(result.columns, expected_columns)

        result = getattr(df.groupby("group"), attr)(numeric_only=False)
        tm.assert_index_equal(result.columns, expected_columns)

    expected_columns = Index([
        "int",
        "float",
        "string",
        "category_string",
        "category_int",
        "datetime",
        "datetimetz",
        "timedelta",
    ])
    for attr in ["first", "last"]:
        result = getattr(df.groupby("group"), attr)()
        tm.assert_index_equal(result.columns, expected_columns)

        result = getattr(df.groupby("group"), attr)(numeric_only=False)
        tm.assert_index_equal(result.columns, expected_columns)

    expected_columns = Index(
        ["int", "float", "string", "category_int", "timedelta"])

    result = df.groupby("group").sum()
    tm.assert_index_equal(result.columns, expected_columns_numeric)

    result = df.groupby("group").sum(numeric_only=False)
    tm.assert_index_equal(result.columns, expected_columns)

    expected_columns = Index(["int", "float", "category_int"])
    for attr in ["prod", "cumprod"]:
        result = getattr(df.groupby("group"), attr)()
        tm.assert_index_equal(result.columns, expected_columns_numeric)

        result = getattr(df.groupby("group"), attr)(numeric_only=False)
        tm.assert_index_equal(result.columns, expected_columns)

    # like min, max, but don't include strings
    expected_columns = Index([
        "int", "float", "category_int", "datetime", "datetimetz", "timedelta"
    ])
    for attr in ["cummin", "cummax"]:
        result = getattr(df.groupby("group"), attr)()
        # GH 15561: numeric_only=False set by default like min/max
        tm.assert_index_equal(result.columns, expected_columns)

        result = getattr(df.groupby("group"), attr)(numeric_only=False)
        tm.assert_index_equal(result.columns, expected_columns)

    expected_columns = Index(["int", "float", "category_int", "timedelta"])

    result = getattr(df.groupby("group"), "cumsum")()
    tm.assert_index_equal(result.columns, expected_columns_numeric)

    result = getattr(df.groupby("group"), "cumsum")(numeric_only=False)
    tm.assert_index_equal(result.columns, expected_columns)
Пример #38
0
    def test_round(self):

        t1 = Timedelta('1 days 02:34:56.789123456')
        t2 = Timedelta('-1 days 02:34:56.789123456')

        for (freq, s1, s2) in [
            ('N', t1, t2),
            ('U', Timedelta('1 days 02:34:56.789123000'),
             Timedelta('-1 days 02:34:56.789123000')),
            ('L', Timedelta('1 days 02:34:56.789000000'),
             Timedelta('-1 days 02:34:56.789000000')),
            ('S', Timedelta('1 days 02:34:57'), Timedelta('-1 days 02:34:57')),
            ('2S', Timedelta('1 days 02:34:56'),
             Timedelta('-1 days 02:34:56')),
            ('5S', Timedelta('1 days 02:34:55'),
             Timedelta('-1 days 02:34:55')),
            ('T', Timedelta('1 days 02:35:00'), Timedelta('-1 days 02:35:00')),
            ('12T', Timedelta('1 days 02:36:00'),
             Timedelta('-1 days 02:36:00')),
            ('H', Timedelta('1 days 03:00:00'), Timedelta('-1 days 03:00:00')),
            ('d', Timedelta('1 days'), Timedelta('-1 days'))
        ]:
            r1 = t1.round(freq)
            assert r1 == s1
            r2 = t2.round(freq)
            assert r2 == s2

        # invalid
        for freq in ['Y', 'M', 'foobar']:
            pytest.raises(ValueError, lambda: t1.round(freq))

        t1 = timedelta_range('1 days', periods=3, freq='1 min 2 s 3 us')
        t2 = -1 * t1
        t1a = timedelta_range('1 days', periods=3, freq='1 min 2 s')
        t1c = pd.TimedeltaIndex([1, 1, 1], unit='D')

        # note that negative times round DOWN! so don't give whole numbers
        for (freq, s1, s2) in [
            ('N', t1, t2), ('U', t1, t2),
            ('L', t1a,
             TimedeltaIndex([
                 '-1 days +00:00:00', '-2 days +23:58:58', '-2 days +23:57:56'
             ],
                            dtype='timedelta64[ns]',
                            freq=None)),
            ('S', t1a,
             TimedeltaIndex([
                 '-1 days +00:00:00', '-2 days +23:58:58', '-2 days +23:57:56'
             ],
                            dtype='timedelta64[ns]',
                            freq=None)),
            ('12T', t1c,
             TimedeltaIndex(['-1 days', '-1 days', '-1 days'],
                            dtype='timedelta64[ns]',
                            freq=None)),
            ('H', t1c,
             TimedeltaIndex(['-1 days', '-1 days', '-1 days'],
                            dtype='timedelta64[ns]',
                            freq=None)),
            ('d', t1c, pd.TimedeltaIndex([-1, -1, -1], unit='D'))
        ]:

            r1 = t1.round(freq)
            tm.assert_index_equal(r1, s1)
            r2 = t2.round(freq)
        tm.assert_index_equal(r2, s2)

        # invalid
        for freq in ['Y', 'M', 'foobar']:
            pytest.raises(ValueError, lambda: t1.round(freq))
Пример #39
0
class SharedTests:
    index_cls: type[DatetimeIndex | PeriodIndex | TimedeltaIndex]

    @pytest.fixture
    def arr1d(self):
        data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9
        arr = self.array_cls(data, freq="D")
        return arr

    def test_compare_len1_raises(self, arr1d):
        # make sure we raise when comparing with different lengths, specific
        #  to the case where one has length-1, which numpy would broadcast
        arr = arr1d
        idx = self.index_cls(arr)

        with pytest.raises(ValueError, match="Lengths must match"):
            arr == arr[:1]

        # test the index classes while we're at it, GH#23078
        with pytest.raises(ValueError, match="Lengths must match"):
            idx <= idx[[0]]

    @pytest.mark.parametrize(
        "result",
        [
            pd.date_range("2020", periods=3),
            pd.date_range("2020", periods=3, tz="UTC"),
            pd.timedelta_range("0 days", periods=3),
            pd.period_range("2020Q1", periods=3, freq="Q"),
        ],
    )
    def test_compare_with_Categorical(self, result):
        expected = pd.Categorical(result)
        assert all(result == expected)
        assert not any(result != expected)

    @pytest.mark.parametrize("reverse", [True, False])
    @pytest.mark.parametrize("as_index", [True, False])
    def test_compare_categorical_dtype(self, arr1d, as_index, reverse,
                                       ordered):
        other = pd.Categorical(arr1d, ordered=ordered)
        if as_index:
            other = pd.CategoricalIndex(other)

        left, right = arr1d, other
        if reverse:
            left, right = right, left

        ones = np.ones(arr1d.shape, dtype=bool)
        zeros = ~ones

        result = left == right
        tm.assert_numpy_array_equal(result, ones)

        result = left != right
        tm.assert_numpy_array_equal(result, zeros)

        if not reverse and not as_index:
            # Otherwise Categorical raises TypeError bc it is not ordered
            # TODO: we should probably get the same behavior regardless?
            result = left < right
            tm.assert_numpy_array_equal(result, zeros)

            result = left <= right
            tm.assert_numpy_array_equal(result, ones)

            result = left > right
            tm.assert_numpy_array_equal(result, zeros)

            result = left >= right
            tm.assert_numpy_array_equal(result, ones)

    def test_take(self):
        data = np.arange(100, dtype="i8") * 24 * 3600 * 10**9
        np.random.shuffle(data)

        freq = None if self.array_cls is not PeriodArray else "D"

        arr = self.array_cls(data, freq=freq)
        idx = self.index_cls._simple_new(arr)

        takers = [1, 4, 94]
        result = arr.take(takers)
        expected = idx.take(takers)

        tm.assert_index_equal(self.index_cls(result), expected)

        takers = np.array([1, 4, 94])
        result = arr.take(takers)
        expected = idx.take(takers)

        tm.assert_index_equal(self.index_cls(result), expected)

    @pytest.mark.parametrize("fill_value",
                             [2, 2.0, Timestamp(2021, 1, 1, 12).time])
    def test_take_fill_raises(self, fill_value):
        data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9

        arr = self.array_cls(data, freq="D")

        msg = f"value should be a '{arr._scalar_type.__name__}' or 'NaT'. Got"
        with pytest.raises(TypeError, match=msg):
            arr.take([0, 1], allow_fill=True, fill_value=fill_value)

    def test_take_fill(self):
        data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9

        arr = self.array_cls(data, freq="D")

        result = arr.take([-1, 1], allow_fill=True, fill_value=None)
        assert result[0] is NaT

        result = arr.take([-1, 1], allow_fill=True, fill_value=np.nan)
        assert result[0] is NaT

        result = arr.take([-1, 1], allow_fill=True, fill_value=NaT)
        assert result[0] is NaT

    def test_take_fill_str(self, arr1d):
        # Cast str fill_value matching other fill_value-taking methods
        result = arr1d.take([-1, 1],
                            allow_fill=True,
                            fill_value=str(arr1d[-1]))
        expected = arr1d[[-1, 1]]
        tm.assert_equal(result, expected)

        msg = f"value should be a '{arr1d._scalar_type.__name__}' or 'NaT'. Got"
        with pytest.raises(TypeError, match=msg):
            arr1d.take([-1, 1], allow_fill=True, fill_value="foo")

    def test_concat_same_type(self, arr1d):
        arr = arr1d
        idx = self.index_cls(arr)
        idx = idx.insert(0, NaT)
        arr = self.array_cls(idx)

        result = arr._concat_same_type([arr[:-1], arr[1:], arr])
        arr2 = arr.astype(object)
        expected = self.index_cls(np.concatenate([arr2[:-1], arr2[1:], arr2]),
                                  None)

        tm.assert_index_equal(self.index_cls(result), expected)

    def test_unbox_scalar(self):
        data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9
        arr = self.array_cls(data, freq="D")
        result = arr._unbox_scalar(arr[0])
        expected = arr._data.dtype.type
        assert isinstance(result, expected)

        result = arr._unbox_scalar(NaT)
        assert isinstance(result, expected)

        msg = f"'value' should be a {self.scalar_type.__name__}."
        with pytest.raises(ValueError, match=msg):
            arr._unbox_scalar("foo")

    def test_check_compatible_with(self):
        data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9
        arr = self.array_cls(data, freq="D")

        arr._check_compatible_with(arr[0])
        arr._check_compatible_with(arr[:1])
        arr._check_compatible_with(NaT)

    def test_scalar_from_string(self):
        data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9
        arr = self.array_cls(data, freq="D")
        result = arr._scalar_from_string(str(arr[0]))
        assert result == arr[0]

    def test_reduce_invalid(self):
        data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9
        arr = self.array_cls(data, freq="D")

        msg = f"'{type(arr).__name__}' does not implement reduction 'not a method'"
        with pytest.raises(TypeError, match=msg):
            arr._reduce("not a method")

    @pytest.mark.parametrize("method", ["pad", "backfill"])
    def test_fillna_method_doesnt_change_orig(self, method):
        data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9
        arr = self.array_cls(data, freq="D")
        arr[4] = NaT

        fill_value = arr[3] if method == "pad" else arr[5]

        result = arr.fillna(method=method)
        assert result[4] == fill_value

        # check that the original was not changed
        assert arr[4] is NaT

    def test_searchsorted(self):
        data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9
        arr = self.array_cls(data, freq="D")

        # scalar
        result = arr.searchsorted(arr[1])
        assert result == 1

        result = arr.searchsorted(arr[2], side="right")
        assert result == 3

        # own-type
        result = arr.searchsorted(arr[1:3])
        expected = np.array([1, 2], dtype=np.intp)
        tm.assert_numpy_array_equal(result, expected)

        result = arr.searchsorted(arr[1:3], side="right")
        expected = np.array([2, 3], dtype=np.intp)
        tm.assert_numpy_array_equal(result, expected)

        # GH#29884 match numpy convention on whether NaT goes
        #  at the end or the beginning
        result = arr.searchsorted(NaT)
        assert result == 10

    @pytest.mark.parametrize("box", [None, "index", "series"])
    def test_searchsorted_castable_strings(self, arr1d, box, request,
                                           string_storage):
        if isinstance(arr1d, DatetimeArray):
            tz = arr1d.tz
            ts1, ts2 = arr1d[1:3]
            if tz is not None and ts1.tz.tzname(ts1) != ts2.tz.tzname(ts2):
                # If we have e.g. tzutc(), when we cast to string and parse
                #  back we get pytz.UTC, and then consider them different timezones
                #  so incorrectly raise.
                mark = pytest.mark.xfail(
                    raises=TypeError,
                    reason="timezone comparisons inconsistent")
                request.node.add_marker(mark)

        arr = arr1d
        if box is None:
            pass
        elif box == "index":
            # Test the equivalent Index.searchsorted method while we're here
            arr = self.index_cls(arr)
        else:
            # Test the equivalent Series.searchsorted method while we're here
            arr = pd.Series(arr)

        # scalar
        result = arr.searchsorted(str(arr[1]))
        assert result == 1

        result = arr.searchsorted(str(arr[2]), side="right")
        assert result == 3

        result = arr.searchsorted([str(x) for x in arr[1:3]])
        expected = np.array([1, 2], dtype=np.intp)
        tm.assert_numpy_array_equal(result, expected)

        with pytest.raises(
                TypeError,
                match=re.escape(
                    f"value should be a '{arr1d._scalar_type.__name__}', 'NaT', "
                    "or array of those. Got 'str' instead."),
        ):
            arr.searchsorted("foo")

        arr_type = "StringArray" if string_storage == "python" else "ArrowStringArray"

        with pd.option_context("string_storage", string_storage):
            with pytest.raises(
                    TypeError,
                    match=re.escape(
                        f"value should be a '{arr1d._scalar_type.__name__}', 'NaT', "
                        f"or array of those. Got '{arr_type}' instead."),
            ):
                arr.searchsorted([str(arr[1]), "baz"])

    def test_getitem_near_implementation_bounds(self):
        # We only check tz-naive for DTA bc the bounds are slightly different
        #  for other tzs
        i8vals = np.asarray([NaT.value + n for n in range(1, 5)], dtype="i8")
        arr = self.array_cls(i8vals, freq="ns")
        arr[0]  # should not raise OutOfBoundsDatetime

        index = pd.Index(arr)
        index[0]  # should not raise OutOfBoundsDatetime

        ser = pd.Series(arr)
        ser[0]  # should not raise OutOfBoundsDatetime

    def test_getitem_2d(self, arr1d):
        # 2d slicing on a 1D array
        expected = type(arr1d)(arr1d._data[:, np.newaxis], dtype=arr1d.dtype)
        result = arr1d[:, np.newaxis]
        tm.assert_equal(result, expected)

        # Lookup on a 2D array
        arr2d = expected
        expected = type(arr2d)(arr2d._data[:3, 0], dtype=arr2d.dtype)
        result = arr2d[:3, 0]
        tm.assert_equal(result, expected)

        # Scalar lookup
        result = arr2d[-1, 0]
        expected = arr1d[-1]
        assert result == expected

    def test_iter_2d(self, arr1d):
        data2d = arr1d._data[:3, np.newaxis]
        arr2d = type(arr1d)._simple_new(data2d, dtype=arr1d.dtype)
        result = list(arr2d)
        assert len(result) == 3
        for x in result:
            assert isinstance(x, type(arr1d))
            assert x.ndim == 1
            assert x.dtype == arr1d.dtype

    def test_repr_2d(self, arr1d):
        data2d = arr1d._data[:3, np.newaxis]
        arr2d = type(arr1d)._simple_new(data2d, dtype=arr1d.dtype)

        result = repr(arr2d)

        if isinstance(arr2d, TimedeltaArray):
            expected = (f"<{type(arr2d).__name__}>\n"
                        "[\n"
                        f"['{arr1d[0]._repr_base()}'],\n"
                        f"['{arr1d[1]._repr_base()}'],\n"
                        f"['{arr1d[2]._repr_base()}']\n"
                        "]\n"
                        f"Shape: (3, 1), dtype: {arr1d.dtype}")
        else:
            expected = (f"<{type(arr2d).__name__}>\n"
                        "[\n"
                        f"['{arr1d[0]}'],\n"
                        f"['{arr1d[1]}'],\n"
                        f"['{arr1d[2]}']\n"
                        "]\n"
                        f"Shape: (3, 1), dtype: {arr1d.dtype}")

        assert result == expected

    def test_setitem(self):
        data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9
        arr = self.array_cls(data, freq="D")

        arr[0] = arr[1]
        expected = np.arange(10, dtype="i8") * 24 * 3600 * 10**9
        expected[0] = expected[1]

        tm.assert_numpy_array_equal(arr.asi8, expected)

        arr[:2] = arr[-2:]
        expected[:2] = expected[-2:]
        tm.assert_numpy_array_equal(arr.asi8, expected)

    @pytest.mark.parametrize(
        "box",
        [
            pd.Index,
            pd.Series,
            np.array,
            list,
            PandasArray,
        ],
    )
    def test_setitem_object_dtype(self, box, arr1d):

        expected = arr1d.copy()[::-1]
        if expected.dtype.kind in ["m", "M"]:
            expected = expected._with_freq(None)

        vals = expected
        if box is list:
            vals = list(vals)
        elif box is np.array:
            # if we do np.array(x).astype(object) then dt64 and td64 cast to ints
            vals = np.array(vals.astype(object))
        elif box is PandasArray:
            vals = box(np.asarray(vals, dtype=object))
        else:
            vals = box(vals).astype(object)

        arr1d[:] = vals

        tm.assert_equal(arr1d, expected)

    def test_setitem_strs(self, arr1d, request):
        # Check that we parse strs in both scalar and listlike
        if isinstance(arr1d, DatetimeArray):
            tz = arr1d.tz
            ts1, ts2 = arr1d[-2:]
            if tz is not None and ts1.tz.tzname(ts1) != ts2.tz.tzname(ts2):
                # If we have e.g. tzutc(), when we cast to string and parse
                #  back we get pytz.UTC, and then consider them different timezones
                #  so incorrectly raise.
                mark = pytest.mark.xfail(
                    raises=TypeError,
                    reason="timezone comparisons inconsistent")
                request.node.add_marker(mark)

        # Setting list-like of strs
        expected = arr1d.copy()
        expected[[0, 1]] = arr1d[-2:]

        result = arr1d.copy()
        result[:2] = [str(x) for x in arr1d[-2:]]
        tm.assert_equal(result, expected)

        # Same thing but now for just a scalar str
        expected = arr1d.copy()
        expected[0] = arr1d[-1]

        result = arr1d.copy()
        result[0] = str(arr1d[-1])
        tm.assert_equal(result, expected)

    @pytest.mark.parametrize("as_index", [True, False])
    def test_setitem_categorical(self, arr1d, as_index):
        expected = arr1d.copy()[::-1]
        if not isinstance(expected, PeriodArray):
            expected = expected._with_freq(None)

        cat = pd.Categorical(arr1d)
        if as_index:
            cat = pd.CategoricalIndex(cat)

        arr1d[:] = cat[::-1]

        tm.assert_equal(arr1d, expected)

    def test_setitem_raises(self):
        data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9
        arr = self.array_cls(data, freq="D")
        val = arr[0]

        with pytest.raises(IndexError, match="index 12 is out of bounds"):
            arr[12] = val

        with pytest.raises(TypeError, match="value should be a.* 'object'"):
            arr[0] = object()

        msg = "cannot set using a list-like indexer with a different length"
        with pytest.raises(ValueError, match=msg):
            # GH#36339
            arr[[]] = [arr[1]]

        msg = "cannot set using a slice indexer with a different length than"
        with pytest.raises(ValueError, match=msg):
            # GH#36339
            arr[1:1] = arr[:3]

    @pytest.mark.parametrize("box", [list, np.array, pd.Index, pd.Series])
    def test_setitem_numeric_raises(self, arr1d, box):
        # We dont case e.g. int64 to our own dtype for setitem

        msg = (f"value should be a '{arr1d._scalar_type.__name__}', "
               "'NaT', or array of those. Got")
        with pytest.raises(TypeError, match=msg):
            arr1d[:2] = box([0, 1])

        with pytest.raises(TypeError, match=msg):
            arr1d[:2] = box([0.0, 1.0])

    def test_inplace_arithmetic(self):
        # GH#24115 check that iadd and isub are actually in-place
        data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9
        arr = self.array_cls(data, freq="D")

        expected = arr + pd.Timedelta(days=1)
        arr += pd.Timedelta(days=1)
        tm.assert_equal(arr, expected)

        expected = arr - pd.Timedelta(days=1)
        arr -= pd.Timedelta(days=1)
        tm.assert_equal(arr, expected)

    def test_shift_fill_int_deprecated(self):
        # GH#31971
        data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9
        arr = self.array_cls(data, freq="D")

        msg = "Passing <class 'int'> to shift"
        with tm.assert_produces_warning(FutureWarning, match=msg):
            result = arr.shift(1, fill_value=1)

        expected = arr.copy()
        if self.array_cls is PeriodArray:
            fill_val = PeriodArray._scalar_type._from_ordinal(1, freq=arr.freq)
        else:
            fill_val = arr._scalar_type(1)
        expected[0] = fill_val
        expected[1:] = arr[:-1]
        tm.assert_equal(result, expected)

    def test_median(self, arr1d):
        arr = arr1d
        if len(arr) % 2 == 0:
            # make it easier to define `expected`
            arr = arr[:-1]

        expected = arr[len(arr) // 2]

        result = arr.median()
        assert type(result) is type(expected)
        assert result == expected

        arr[len(arr) // 2] = NaT
        if not isinstance(expected, Period):
            expected = arr[len(arr) // 2 - 1:len(arr) // 2 + 2].mean()

        assert arr.median(skipna=False) is NaT

        result = arr.median()
        assert type(result) is type(expected)
        assert result == expected

        assert arr[:0].median() is NaT
        assert arr[:0].median(skipna=False) is NaT

        # 2d Case
        arr2 = arr.reshape(-1, 1)

        result = arr2.median(axis=None)
        assert type(result) is type(expected)
        assert result == expected

        assert arr2.median(axis=None, skipna=False) is NaT

        result = arr2.median(axis=0)
        expected2 = type(arr)._from_sequence([expected], dtype=arr.dtype)
        tm.assert_equal(result, expected2)

        result = arr2.median(axis=0, skipna=False)
        expected2 = type(arr)._from_sequence([NaT], dtype=arr.dtype)
        tm.assert_equal(result, expected2)

        result = arr2.median(axis=1)
        tm.assert_equal(result, arr)

        result = arr2.median(axis=1, skipna=False)
        tm.assert_equal(result, arr)

    def test_from_integer_array(self):
        arr = np.array([1, 2, 3], dtype=np.int64)
        expected = self.array_cls(arr, dtype=self.example_dtype)

        data = pd.array(arr, dtype="Int64")
        result = self.array_cls(data, dtype=self.example_dtype)

        tm.assert_extension_array_equal(result, expected)
Пример #40
0
"""


def test_slice_with_zero_step_raises(index, frame_or_series, indexer_sli):
    ts = frame_or_series(np.arange(len(index)), index=index)

    with pytest.raises(ValueError, match="slice step cannot be zero"):
        indexer_sli(ts)[::0]


@pytest.mark.parametrize(
    "index",
    [
        date_range("2014-01-01", periods=20, freq="MS"),
        period_range("2014-01", periods=20, freq="M"),
        timedelta_range("0", periods=20, freq="H"),
    ],
)
def test_slice_with_negative_step(index):
    keystr1 = str(index[9])
    keystr2 = str(index[13])

    ser = Series(np.arange(20), index)
    SLC = IndexSlice

    for key in [keystr1, index[9]]:
        tm.assert_indexing_slices_equivalent(ser, SLC[key::-1], SLC[9::-1])
        tm.assert_indexing_slices_equivalent(ser, SLC[:key:-1], SLC[:8:-1])

        for key2 in [keystr2, index[13]]:
            tm.assert_indexing_slices_equivalent(ser, SLC[key2:key:-1],
Пример #41
0
    def test_dt_namespace_accessor(self):

        # GH 7207, 11128
        # test .dt namespace accessor

        ok_for_period = PeriodIndex._datetimelike_ops
        ok_for_period_methods = ['strftime', 'to_timestamp', 'asfreq']
        ok_for_dt = DatetimeIndex._datetimelike_ops
        ok_for_dt_methods = [
            'to_period', 'to_pydatetime', 'tz_localize', 'tz_convert',
            'normalize', 'strftime', 'round', 'floor', 'ceil', 'weekday_name'
        ]
        ok_for_td = TimedeltaIndex._datetimelike_ops
        ok_for_td_methods = [
            'components', 'to_pytimedelta', 'total_seconds', 'round', 'floor',
            'ceil'
        ]

        def get_expected(s, name):
            result = getattr(Index(s._values), prop)
            if isinstance(result, np.ndarray):
                if is_integer_dtype(result):
                    result = result.astype('int64')
            elif not is_list_like(result):
                return result
            return Series(result, index=s.index, name=s.name)

        def compare(s, name):
            a = getattr(s.dt, prop)
            b = get_expected(s, prop)
            if not (is_list_like(a) and is_list_like(b)):
                self.assertEqual(a, b)
            else:
                tm.assert_series_equal(a, b)

        # datetimeindex
        cases = [
            Series(date_range('20130101', periods=5), name='xxx'),
            Series(date_range('20130101', periods=5, freq='s'), name='xxx'),
            Series(date_range('20130101 00:00:00', periods=5, freq='ms'),
                   name='xxx')
        ]
        for s in cases:
            for prop in ok_for_dt:
                # we test freq below
                if prop != 'freq':
                    compare(s, prop)

            for prop in ok_for_dt_methods:
                getattr(s.dt, prop)

            result = s.dt.to_pydatetime()
            self.assertIsInstance(result, np.ndarray)
            self.assertTrue(result.dtype == object)

            result = s.dt.tz_localize('US/Eastern')
            exp_values = DatetimeIndex(s.values).tz_localize('US/Eastern')
            expected = Series(exp_values, index=s.index, name='xxx')
            tm.assert_series_equal(result, expected)

            tz_result = result.dt.tz
            self.assertEqual(str(tz_result), 'US/Eastern')
            freq_result = s.dt.freq
            self.assertEqual(freq_result,
                             DatetimeIndex(s.values, freq='infer').freq)

            # let's localize, then convert
            result = s.dt.tz_localize('UTC').dt.tz_convert('US/Eastern')
            exp_values = (DatetimeIndex(
                s.values).tz_localize('UTC').tz_convert('US/Eastern'))
            expected = Series(exp_values, index=s.index, name='xxx')
            tm.assert_series_equal(result, expected)

        # round
        s = Series(pd.to_datetime([
            '2012-01-01 13:00:00', '2012-01-01 12:01:00', '2012-01-01 08:00:00'
        ]),
                   name='xxx')
        result = s.dt.round('D')
        expected = Series(pd.to_datetime(
            ['2012-01-02', '2012-01-02', '2012-01-01']),
                          name='xxx')
        tm.assert_series_equal(result, expected)

        # round with tz
        result = (
            s.dt.tz_localize('UTC').dt.tz_convert('US/Eastern').dt.round('D'))
        exp_values = pd.to_datetime(['2012-01-01', '2012-01-01',
                                     '2012-01-01']).tz_localize('US/Eastern')
        expected = Series(exp_values, name='xxx')
        tm.assert_series_equal(result, expected)

        # floor
        s = Series(pd.to_datetime([
            '2012-01-01 13:00:00', '2012-01-01 12:01:00', '2012-01-01 08:00:00'
        ]),
                   name='xxx')
        result = s.dt.floor('D')
        expected = Series(pd.to_datetime(
            ['2012-01-01', '2012-01-01', '2012-01-01']),
                          name='xxx')
        tm.assert_series_equal(result, expected)

        # ceil
        s = Series(pd.to_datetime([
            '2012-01-01 13:00:00', '2012-01-01 12:01:00', '2012-01-01 08:00:00'
        ]),
                   name='xxx')
        result = s.dt.ceil('D')
        expected = Series(pd.to_datetime(
            ['2012-01-02', '2012-01-02', '2012-01-02']),
                          name='xxx')
        tm.assert_series_equal(result, expected)

        # datetimeindex with tz
        s = Series(date_range('20130101', periods=5, tz='US/Eastern'),
                   name='xxx')
        for prop in ok_for_dt:

            # we test freq below
            if prop != 'freq':
                compare(s, prop)

        for prop in ok_for_dt_methods:
            getattr(s.dt, prop)

        result = s.dt.to_pydatetime()
        self.assertIsInstance(result, np.ndarray)
        self.assertTrue(result.dtype == object)

        result = s.dt.tz_convert('CET')
        expected = Series(s._values.tz_convert('CET'),
                          index=s.index,
                          name='xxx')
        tm.assert_series_equal(result, expected)

        tz_result = result.dt.tz
        self.assertEqual(str(tz_result), 'CET')
        freq_result = s.dt.freq
        self.assertEqual(freq_result,
                         DatetimeIndex(s.values, freq='infer').freq)

        # timedeltaindex
        cases = [
            Series(timedelta_range('1 day', periods=5),
                   index=list('abcde'),
                   name='xxx'),
            Series(timedelta_range('1 day 01:23:45', periods=5, freq='s'),
                   name='xxx'),
            Series(timedelta_range('2 days 01:23:45.012345',
                                   periods=5,
                                   freq='ms'),
                   name='xxx')
        ]
        for s in cases:
            for prop in ok_for_td:
                # we test freq below
                if prop != 'freq':
                    compare(s, prop)

            for prop in ok_for_td_methods:
                getattr(s.dt, prop)

            result = s.dt.components
            self.assertIsInstance(result, DataFrame)
            tm.assert_index_equal(result.index, s.index)

            result = s.dt.to_pytimedelta()
            self.assertIsInstance(result, np.ndarray)
            self.assertTrue(result.dtype == object)

            result = s.dt.total_seconds()
            self.assertIsInstance(result, pd.Series)
            self.assertTrue(result.dtype == 'float64')

            freq_result = s.dt.freq
            self.assertEqual(freq_result,
                             TimedeltaIndex(s.values, freq='infer').freq)

        # both
        index = date_range('20130101', periods=3, freq='D')
        s = Series(date_range('20140204', periods=3, freq='s'),
                   index=index,
                   name='xxx')
        exp = Series(np.array([2014, 2014, 2014], dtype='int64'),
                     index=index,
                     name='xxx')
        tm.assert_series_equal(s.dt.year, exp)

        exp = Series(np.array([2, 2, 2], dtype='int64'),
                     index=index,
                     name='xxx')
        tm.assert_series_equal(s.dt.month, exp)

        exp = Series(np.array([0, 1, 2], dtype='int64'),
                     index=index,
                     name='xxx')
        tm.assert_series_equal(s.dt.second, exp)

        exp = pd.Series([s[0]] * 3, index=index, name='xxx')
        tm.assert_series_equal(s.dt.normalize(), exp)

        # periodindex
        cases = [
            Series(period_range('20130101', periods=5, freq='D'), name='xxx')
        ]
        for s in cases:
            for prop in ok_for_period:
                # we test freq below
                if prop != 'freq':
                    compare(s, prop)

            for prop in ok_for_period_methods:
                getattr(s.dt, prop)

            freq_result = s.dt.freq
            self.assertEqual(freq_result, PeriodIndex(s.values).freq)

        # test limited display api
        def get_dir(s):
            results = [r for r in s.dt.__dir__() if not r.startswith('_')]
            return list(sorted(set(results)))

        s = Series(date_range('20130101', periods=5, freq='D'), name='xxx')
        results = get_dir(s)
        tm.assert_almost_equal(
            results, list(sorted(set(ok_for_dt + ok_for_dt_methods))))

        s = Series(
            period_range('20130101', periods=5, freq='D', name='xxx').asobject)
        results = get_dir(s)
        tm.assert_almost_equal(
            results, list(sorted(set(ok_for_period + ok_for_period_methods))))

        # 11295
        # ambiguous time error on the conversions
        s = Series(pd.date_range('2015-01-01', '2016-01-01', freq='T'),
                   name='xxx')
        s = s.dt.tz_localize('UTC').dt.tz_convert('America/Chicago')
        results = get_dir(s)
        tm.assert_almost_equal(
            results, list(sorted(set(ok_for_dt + ok_for_dt_methods))))
        exp_values = pd.date_range('2015-01-01',
                                   '2016-01-01',
                                   freq='T',
                                   tz='UTC').tz_convert('America/Chicago')
        expected = Series(exp_values, name='xxx')
        tm.assert_series_equal(s, expected)

        # no setting allowed
        s = Series(date_range('20130101', periods=5, freq='D'), name='xxx')
        with tm.assertRaisesRegexp(ValueError, "modifications"):
            s.dt.hour = 5

        # trying to set a copy
        with pd.option_context('chained_assignment', 'raise'):

            def f():
                s.dt.hour[0] = 5

            self.assertRaises(com.SettingWithCopyError, f)
Пример #42
0
def test_timedelta_property(attr):
    s = pd.Series(pd.timedelta_range("2000", periods=4))
    s.attrs = {"a": 1}
    result = getattr(s.dt, attr)
    assert result.attrs == {"a": 1}
Пример #43
0
    def test_dt_namespace_accessor(self):

        # GH 7207, 11128
        # test .dt namespace accessor

        ok_for_period = PeriodArray._datetimelike_ops
        ok_for_period_methods = ["strftime", "to_timestamp", "asfreq"]
        ok_for_dt = DatetimeIndex._datetimelike_ops
        ok_for_dt_methods = [
            "to_period",
            "to_pydatetime",
            "tz_localize",
            "tz_convert",
            "normalize",
            "strftime",
            "round",
            "floor",
            "ceil",
            "day_name",
            "month_name",
            "isocalendar",
        ]
        ok_for_td = TimedeltaArray._datetimelike_ops
        ok_for_td_methods = [
            "components",
            "to_pytimedelta",
            "total_seconds",
            "round",
            "floor",
            "ceil",
        ]

        def get_expected(s, name):
            result = getattr(Index(s._values), prop)
            if isinstance(result, np.ndarray):
                if is_integer_dtype(result):
                    result = result.astype("int64")
            elif not is_list_like(result) or isinstance(result, DataFrame):
                return result
            return Series(result, index=s.index, name=s.name)

        def compare(s, name):
            a = getattr(s.dt, prop)
            b = get_expected(s, prop)
            if not (is_list_like(a) and is_list_like(b)):
                assert a == b
            elif isinstance(a, DataFrame):
                tm.assert_frame_equal(a, b)
            else:
                tm.assert_series_equal(a, b)

        # datetimeindex
        cases = [
            Series(date_range("20130101", periods=5), name="xxx"),
            Series(date_range("20130101", periods=5, freq="s"), name="xxx"),
            Series(date_range("20130101 00:00:00", periods=5, freq="ms"),
                   name="xxx"),
        ]
        for s in cases:
            for prop in ok_for_dt:
                # we test freq below
                # we ignore week and weekofyear because they are deprecated
                if prop not in ["freq", "week", "weekofyear"]:
                    compare(s, prop)

            for prop in ok_for_dt_methods:
                getattr(s.dt, prop)

            result = s.dt.to_pydatetime()
            assert isinstance(result, np.ndarray)
            assert result.dtype == object

            result = s.dt.tz_localize("US/Eastern")
            exp_values = DatetimeIndex(s.values).tz_localize("US/Eastern")
            expected = Series(exp_values, index=s.index, name="xxx")
            tm.assert_series_equal(result, expected)

            tz_result = result.dt.tz
            assert str(tz_result) == "US/Eastern"
            freq_result = s.dt.freq
            assert freq_result == DatetimeIndex(s.values, freq="infer").freq

            # let's localize, then convert
            result = s.dt.tz_localize("UTC").dt.tz_convert("US/Eastern")
            exp_values = (DatetimeIndex(
                s.values).tz_localize("UTC").tz_convert("US/Eastern"))
            expected = Series(exp_values, index=s.index, name="xxx")
            tm.assert_series_equal(result, expected)

        # datetimeindex with tz
        s = Series(date_range("20130101", periods=5, tz="US/Eastern"),
                   name="xxx")
        for prop in ok_for_dt:

            # we test freq below
            # we ignore week and weekofyear because they are deprecated
            if prop not in ["freq", "week", "weekofyear"]:
                compare(s, prop)

        for prop in ok_for_dt_methods:
            getattr(s.dt, prop)

        result = s.dt.to_pydatetime()
        assert isinstance(result, np.ndarray)
        assert result.dtype == object

        result = s.dt.tz_convert("CET")
        expected = Series(s._values.tz_convert("CET"),
                          index=s.index,
                          name="xxx")
        tm.assert_series_equal(result, expected)

        tz_result = result.dt.tz
        assert str(tz_result) == "CET"
        freq_result = s.dt.freq
        assert freq_result == DatetimeIndex(s.values, freq="infer").freq

        # timedelta index
        cases = [
            Series(timedelta_range("1 day", periods=5),
                   index=list("abcde"),
                   name="xxx"),
            Series(timedelta_range("1 day 01:23:45", periods=5, freq="s"),
                   name="xxx"),
            Series(
                timedelta_range("2 days 01:23:45.012345", periods=5,
                                freq="ms"),
                name="xxx",
            ),
        ]
        for s in cases:
            for prop in ok_for_td:
                # we test freq below
                if prop != "freq":
                    compare(s, prop)

            for prop in ok_for_td_methods:
                getattr(s.dt, prop)

            result = s.dt.components
            assert isinstance(result, DataFrame)
            tm.assert_index_equal(result.index, s.index)

            result = s.dt.to_pytimedelta()
            assert isinstance(result, np.ndarray)
            assert result.dtype == object

            result = s.dt.total_seconds()
            assert isinstance(result, Series)
            assert result.dtype == "float64"

            freq_result = s.dt.freq
            assert freq_result == TimedeltaIndex(s.values, freq="infer").freq

        # both
        index = date_range("20130101", periods=3, freq="D")
        s = Series(date_range("20140204", periods=3, freq="s"),
                   index=index,
                   name="xxx")
        exp = Series(np.array([2014, 2014, 2014], dtype="int64"),
                     index=index,
                     name="xxx")
        tm.assert_series_equal(s.dt.year, exp)

        exp = Series(np.array([2, 2, 2], dtype="int64"),
                     index=index,
                     name="xxx")
        tm.assert_series_equal(s.dt.month, exp)

        exp = Series(np.array([0, 1, 2], dtype="int64"),
                     index=index,
                     name="xxx")
        tm.assert_series_equal(s.dt.second, exp)

        exp = Series([s[0]] * 3, index=index, name="xxx")
        tm.assert_series_equal(s.dt.normalize(), exp)

        # periodindex
        cases = [
            Series(period_range("20130101", periods=5, freq="D"), name="xxx")
        ]
        for s in cases:
            for prop in ok_for_period:
                # we test freq below
                if prop != "freq":
                    compare(s, prop)

            for prop in ok_for_period_methods:
                getattr(s.dt, prop)

            freq_result = s.dt.freq
            assert freq_result == PeriodIndex(s.values).freq

        # test limited display api
        def get_dir(s):
            results = [r for r in s.dt.__dir__() if not r.startswith("_")]
            return sorted(set(results))

        s = Series(date_range("20130101", periods=5, freq="D"), name="xxx")
        results = get_dir(s)
        tm.assert_almost_equal(results,
                               sorted(set(ok_for_dt + ok_for_dt_methods)))

        s = Series(
            period_range("20130101", periods=5, freq="D",
                         name="xxx").astype(object))
        results = get_dir(s)
        tm.assert_almost_equal(
            results, sorted(set(ok_for_period + ok_for_period_methods)))

        # 11295
        # ambiguous time error on the conversions
        s = Series(date_range("2015-01-01", "2016-01-01", freq="T"),
                   name="xxx")
        s = s.dt.tz_localize("UTC").dt.tz_convert("America/Chicago")
        results = get_dir(s)
        tm.assert_almost_equal(results,
                               sorted(set(ok_for_dt + ok_for_dt_methods)))
        exp_values = date_range("2015-01-01", "2016-01-01", freq="T",
                                tz="UTC").tz_convert("America/Chicago")
        # freq not preserved by tz_localize above
        exp_values = exp_values._with_freq(None)
        expected = Series(exp_values, name="xxx")
        tm.assert_series_equal(s, expected)

        # no setting allowed
        s = Series(date_range("20130101", periods=5, freq="D"), name="xxx")
        with pytest.raises(ValueError, match="modifications"):
            s.dt.hour = 5

        # trying to set a copy
        msg = "modifications to a property of a datetimelike.+not supported"
        with pd.option_context("chained_assignment", "raise"):
            with pytest.raises(com.SettingWithCopyError, match=msg):
                s.dt.hour[0] = 5
Пример #44
0
    Interval,
    IntervalIndex,
    Timedelta,
    Timestamp,
    date_range,
    timedelta_range,
)
import pandas._testing as tm
from pandas.core.arrays import IntervalArray


@pytest.fixture(
    params=[
        (Index([0, 2, 4]), Index([1, 3, 5])),
        (Index([0.0, 1.0, 2.0]), Index([1.0, 2.0, 3.0])),
        (timedelta_range("0 days", periods=3), timedelta_range("1 day", periods=3)),
        (date_range("20170101", periods=3), date_range("20170102", periods=3)),
        (
            date_range("20170101", periods=3, tz="US/Eastern"),
            date_range("20170102", periods=3, tz="US/Eastern"),
        ),
    ],
    ids=lambda x: str(x[0].dtype),
)
def left_right_dtypes(request):
    """
    Fixture for building an IntervalArray from various dtypes
    """
    return request.param

Пример #45
0
def _nonempty_index(idx):
    typ = type(idx)
    if typ is pd.RangeIndex:
        return pd.RangeIndex(2, name=idx.name)
    elif typ in _numeric_index_types:
        return typ([1, 2], name=idx.name)
    elif typ is pd.Index:
        return pd.Index(["a", "b"], name=idx.name)
    elif typ is pd.DatetimeIndex:
        start = "1970-01-01"
        # Need a non-monotonic decreasing index to avoid issues with
        # partial string indexing see https://github.com/dask/dask/issues/2389
        # and https://github.com/pandas-dev/pandas/issues/16515
        # This doesn't mean `_meta_nonempty` should ever rely on
        # `self.monotonic_increasing` or `self.monotonic_decreasing`
        try:
            return pd.date_range(start=start,
                                 periods=2,
                                 freq=idx.freq,
                                 tz=idx.tz,
                                 name=idx.name)
        except ValueError:  # older pandas versions
            data = [start, "1970-01-02"] if idx.freq is None else None
            return pd.DatetimeIndex(data,
                                    start=start,
                                    periods=2,
                                    freq=idx.freq,
                                    tz=idx.tz,
                                    name=idx.name)
    elif typ is pd.PeriodIndex:
        return pd.period_range(start="1970-01-01",
                               periods=2,
                               freq=idx.freq,
                               name=idx.name)
    elif typ is pd.TimedeltaIndex:
        start = np.timedelta64(1, "D")
        try:
            return pd.timedelta_range(start=start,
                                      periods=2,
                                      freq=idx.freq,
                                      name=idx.name)
        except ValueError:  # older pandas versions
            start = np.timedelta64(1, "D")
            data = [start, start + 1] if idx.freq is None else None
            return pd.TimedeltaIndex(data,
                                     start=start,
                                     periods=2,
                                     freq=idx.freq,
                                     name=idx.name)
    elif typ is pd.CategoricalIndex:
        if len(idx.categories) == 0:
            data = pd.Categorical(_nonempty_index(idx.categories),
                                  ordered=idx.ordered)
        else:
            data = pd.Categorical.from_codes([-1, 0],
                                             categories=idx.categories,
                                             ordered=idx.ordered)
        return pd.CategoricalIndex(data, name=idx.name)
    elif typ is pd.MultiIndex:
        levels = [_nonempty_index(l) for l in idx.levels]
        codes = [[0, 0] for i in idx.levels]
        try:
            return pd.MultiIndex(levels=levels, codes=codes, names=idx.names)
        except TypeError:  # older pandas versions
            return pd.MultiIndex(levels=levels, labels=codes, names=idx.names)

    raise TypeError("Don't know how to handle index of "
                    "type {0}".format(typename(type(idx))))
Пример #46
0
def test_arg_passthru():
    # make sure that we are passing thru kwargs
    # to our agg functions

    # GH3668
    # GH5724
    df = pd.DataFrame(
        {'group': [1, 1, 2],
         'int': [1, 2, 3],
         'float': [4., 5., 6.],
         'string': list('abc'),
         'category_string': pd.Series(list('abc')).astype('category'),
         'category_int': [7, 8, 9],
         'datetime': pd.date_range('20130101', periods=3),
         'datetimetz': pd.date_range('20130101',
                                     periods=3,
                                     tz='US/Eastern'),
         'timedelta': pd.timedelta_range('1 s', periods=3, freq='s')},
        columns=['group', 'int', 'float', 'string',
                 'category_string', 'category_int',
                 'datetime', 'datetimetz',
                 'timedelta'])

    expected_columns_numeric = Index(['int', 'float', 'category_int'])

    # mean / median
    expected = pd.DataFrame(
        {'category_int': [7.5, 9],
         'float': [4.5, 6.],
         'timedelta': [pd.Timedelta('1.5s'),
                       pd.Timedelta('3s')],
         'int': [1.5, 3],
         'datetime': [pd.Timestamp('2013-01-01 12:00:00'),
                      pd.Timestamp('2013-01-03 00:00:00')],
         'datetimetz': [
             pd.Timestamp('2013-01-01 12:00:00', tz='US/Eastern'),
             pd.Timestamp('2013-01-03 00:00:00', tz='US/Eastern')]},
        index=Index([1, 2], name='group'),
        columns=['int', 'float', 'category_int',
                 'datetime', 'datetimetz', 'timedelta'])
    for attr in ['mean', 'median']:
        f = getattr(df.groupby('group'), attr)
        result = f()
        tm.assert_index_equal(result.columns, expected_columns_numeric)

        result = f(numeric_only=False)
        tm.assert_frame_equal(result.reindex_like(expected), expected)

    # TODO: min, max *should* handle
    # categorical (ordered) dtype
    expected_columns = Index(['int', 'float', 'string',
                              'category_int',
                              'datetime', 'datetimetz',
                              'timedelta'])
    for attr in ['min', 'max']:
        f = getattr(df.groupby('group'), attr)
        result = f()
        tm.assert_index_equal(result.columns, expected_columns)

        result = f(numeric_only=False)
        tm.assert_index_equal(result.columns, expected_columns)

    expected_columns = Index(['int', 'float', 'string',
                              'category_string', 'category_int',
                              'datetime', 'datetimetz',
                              'timedelta'])
    for attr in ['first', 'last']:
        f = getattr(df.groupby('group'), attr)
        result = f()
        tm.assert_index_equal(result.columns, expected_columns)

        result = f(numeric_only=False)
        tm.assert_index_equal(result.columns, expected_columns)

    expected_columns = Index(['int', 'float', 'string',
                              'category_int', 'timedelta'])
    for attr in ['sum']:
        f = getattr(df.groupby('group'), attr)
        result = f()
        tm.assert_index_equal(result.columns, expected_columns_numeric)

        result = f(numeric_only=False)
        tm.assert_index_equal(result.columns, expected_columns)

    expected_columns = Index(['int', 'float', 'category_int'])
    for attr in ['prod', 'cumprod']:
        f = getattr(df.groupby('group'), attr)
        result = f()
        tm.assert_index_equal(result.columns, expected_columns_numeric)

        result = f(numeric_only=False)
        tm.assert_index_equal(result.columns, expected_columns)

    # like min, max, but don't include strings
    expected_columns = Index(['int', 'float',
                              'category_int',
                              'datetime', 'datetimetz',
                              'timedelta'])
    for attr in ['cummin', 'cummax']:
        f = getattr(df.groupby('group'), attr)
        result = f()
        # GH 15561: numeric_only=False set by default like min/max
        tm.assert_index_equal(result.columns, expected_columns)

        result = f(numeric_only=False)
        tm.assert_index_equal(result.columns, expected_columns)

    expected_columns = Index(['int', 'float', 'category_int',
                              'timedelta'])
    for attr in ['cumsum']:
        f = getattr(df.groupby('group'), attr)
        result = f()
        tm.assert_index_equal(result.columns, expected_columns_numeric)

        result = f(numeric_only=False)
        tm.assert_index_equal(result.columns, expected_columns)
Пример #47
0
class TestComparison:
    @pytest.fixture(params=[operator.eq, operator.ne])
    def op(self, request):
        return request.param

    @pytest.fixture(
        params=[
            IntervalArray.from_arrays,
            IntervalIndex.from_arrays,
            create_categorical_intervals,
            create_series_intervals,
            create_series_categorical_intervals,
        ],
        ids=[
            "IntervalArray",
            "IntervalIndex",
            "Categorical[Interval]",
            "Series[Interval]",
            "Series[Categorical[Interval]]",
        ],
    )
    def interval_constructor(self, request):
        """
        Fixture for all pandas native interval constructors.
        To be used as the LHS of IntervalArray comparisons.
        """
        return request.param

    def elementwise_comparison(self, op, array, other):
        """
        Helper that performs elementwise comparisons between `array` and `other`
        """
        other = other if is_list_like(other) else [other] * len(array)
        expected = np.array([op(x, y) for x, y in zip(array, other)])
        if isinstance(other, Series):
            return Series(expected, index=other.index)
        return expected

    def test_compare_scalar_interval(self, op, array):
        # matches first interval
        other = array[0]
        result = op(array, other)
        expected = self.elementwise_comparison(op, array, other)
        tm.assert_numpy_array_equal(result, expected)

        # matches on a single endpoint but not both
        other = Interval(array.left[0], array.right[1])
        result = op(array, other)
        expected = self.elementwise_comparison(op, array, other)
        tm.assert_numpy_array_equal(result, expected)

    def test_compare_scalar_interval_mixed_closed(self, op, closed,
                                                  other_closed):
        array = IntervalArray.from_arrays(range(2), range(1, 3), closed=closed)
        other = Interval(0, 1, closed=other_closed)

        result = op(array, other)
        expected = self.elementwise_comparison(op, array, other)
        tm.assert_numpy_array_equal(result, expected)

    def test_compare_scalar_na(self, op, array, nulls_fixture, request):
        result = op(array, nulls_fixture)
        expected = self.elementwise_comparison(op, array, nulls_fixture)

        if nulls_fixture is pd.NA and array.dtype.subtype != "int64":
            mark = pytest.mark.xfail(
                reason="broken for non-integer IntervalArray; see GH 31882")
            request.node.add_marker(mark)

        tm.assert_numpy_array_equal(result, expected)

    @pytest.mark.parametrize(
        "other",
        [
            0,
            1.0,
            True,
            "foo",
            Timestamp("2017-01-01"),
            Timestamp("2017-01-01", tz="US/Eastern"),
            Timedelta("0 days"),
            Period("2017-01-01", "D"),
        ],
    )
    def test_compare_scalar_other(self, op, array, other):
        result = op(array, other)
        expected = self.elementwise_comparison(op, array, other)
        tm.assert_numpy_array_equal(result, expected)

    def test_compare_list_like_interval(self, op, array, interval_constructor):
        # same endpoints
        other = interval_constructor(array.left, array.right)
        result = op(array, other)
        expected = self.elementwise_comparison(op, array, other)
        tm.assert_equal(result, expected)

        # different endpoints
        other = interval_constructor(array.left[::-1], array.right[::-1])
        result = op(array, other)
        expected = self.elementwise_comparison(op, array, other)
        tm.assert_equal(result, expected)

        # all nan endpoints
        other = interval_constructor([np.nan] * 4, [np.nan] * 4)
        result = op(array, other)
        expected = self.elementwise_comparison(op, array, other)
        tm.assert_equal(result, expected)

    def test_compare_list_like_interval_mixed_closed(self, op,
                                                     interval_constructor,
                                                     closed, other_closed):
        array = IntervalArray.from_arrays(range(2), range(1, 3), closed=closed)
        other = interval_constructor(range(2),
                                     range(1, 3),
                                     closed=other_closed)

        result = op(array, other)
        expected = self.elementwise_comparison(op, array, other)
        tm.assert_equal(result, expected)

    @pytest.mark.parametrize(
        "other",
        [
            (
                Interval(0, 1),
                Interval(Timedelta("1 day"), Timedelta("2 days")),
                Interval(4, 5, "both"),
                Interval(10, 20, "neither"),
            ),
            (0, 1.5, Timestamp("20170103"), np.nan),
            (
                Timestamp("20170102", tz="US/Eastern"),
                Timedelta("2 days"),
                "baz",
                pd.NaT,
            ),
        ],
    )
    def test_compare_list_like_object(self, op, array, other):
        result = op(array, other)
        expected = self.elementwise_comparison(op, array, other)
        tm.assert_numpy_array_equal(result, expected)

    def test_compare_list_like_nan(self, op, array, nulls_fixture, request):
        other = [nulls_fixture] * 4
        result = op(array, other)
        expected = self.elementwise_comparison(op, array, other)

        if nulls_fixture is pd.NA and array.dtype.subtype != "i8":
            reason = "broken for non-integer IntervalArray; see GH 31882"
            mark = pytest.mark.xfail(reason=reason)
            request.node.add_marker(mark)

        tm.assert_numpy_array_equal(result, expected)

    @pytest.mark.parametrize(
        "other",
        [
            np.arange(4, dtype="int64"),
            np.arange(4, dtype="float64"),
            date_range("2017-01-01", periods=4),
            date_range("2017-01-01", periods=4, tz="US/Eastern"),
            timedelta_range("0 days", periods=4),
            period_range("2017-01-01", periods=4, freq="D"),
            Categorical(list("abab")),
            Categorical(date_range("2017-01-01", periods=4)),
            pd.array(list("abcd")),
            pd.array(["foo", 3.14, None, object()]),
        ],
        ids=lambda x: str(x.dtype),
    )
    def test_compare_list_like_other(self, op, array, other):
        result = op(array, other)
        expected = self.elementwise_comparison(op, array, other)
        tm.assert_numpy_array_equal(result, expected)

    @pytest.mark.parametrize("length", [1, 3, 5])
    @pytest.mark.parametrize("other_constructor", [IntervalArray, list])
    def test_compare_length_mismatch_errors(self, op, other_constructor,
                                            length):
        array = IntervalArray.from_arrays(range(4), range(1, 5))
        other = other_constructor([Interval(0, 1)] * length)
        with pytest.raises(ValueError, match="Lengths must match to compare"):
            op(array, other)

    @pytest.mark.parametrize(
        "constructor, expected_type, assert_func",
        [
            (IntervalIndex, np.array, tm.assert_numpy_array_equal),
            (Series, Series, tm.assert_series_equal),
        ],
    )
    def test_index_series_compat(self, op, constructor, expected_type,
                                 assert_func):
        # IntervalIndex/Series that rely on IntervalArray for comparisons
        breaks = range(4)
        index = constructor(IntervalIndex.from_breaks(breaks))

        # scalar comparisons
        other = index[0]
        result = op(index, other)
        expected = expected_type(self.elementwise_comparison(op, index, other))
        assert_func(result, expected)

        other = breaks[0]
        result = op(index, other)
        expected = expected_type(self.elementwise_comparison(op, index, other))
        assert_func(result, expected)

        # list-like comparisons
        other = IntervalArray.from_breaks(breaks)
        result = op(index, other)
        expected = expected_type(self.elementwise_comparison(op, index, other))
        assert_func(result, expected)

        other = [index[0], breaks[0], "foo"]
        result = op(index, other)
        expected = expected_type(self.elementwise_comparison(op, index, other))
        assert_func(result, expected)

    @pytest.mark.parametrize("scalars", ["a", False, 1, 1.0, None])
    def test_comparison_operations(self, scalars):
        # GH #28981
        expected = Series([False, False])
        s = Series([Interval(0, 1), Interval(1, 2)], dtype="interval")
        result = s == scalars
        tm.assert_series_equal(result, expected)
Пример #48
0
    def test_basic(self, fp, df_full):
        df = df_full

        df["datetime_tz"] = pd.date_range("20130101", periods=3, tz="US/Eastern")
        df["timedelta"] = pd.timedelta_range("1 day", periods=3)
        check_round_trip(df, fp)
Пример #49
0
class TestPartialSetting:
    def test_partial_setting(self):

        # GH2578, allow ix and friends to partially set

        # series
        s_orig = Series([1, 2, 3])

        s = s_orig.copy()
        s[5] = 5
        expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5])
        tm.assert_series_equal(s, expected)

        s = s_orig.copy()
        s.loc[5] = 5
        expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5])
        tm.assert_series_equal(s, expected)

        s = s_orig.copy()
        s[5] = 5.0
        expected = Series([1, 2, 3, 5.0], index=[0, 1, 2, 5])
        tm.assert_series_equal(s, expected)

        s = s_orig.copy()
        s.loc[5] = 5.0
        expected = Series([1, 2, 3, 5.0], index=[0, 1, 2, 5])
        tm.assert_series_equal(s, expected)

        # iloc/iat raise
        s = s_orig.copy()

        msg = "iloc cannot enlarge its target object"
        with pytest.raises(IndexError, match=msg):
            s.iloc[3] = 5.0

        msg = "index 3 is out of bounds for axis 0 with size 3"
        with pytest.raises(IndexError, match=msg):
            s.iat[3] = 5.0

        # ## frame ##

        df_orig = DataFrame(np.arange(6).reshape(3, 2),
                            columns=["A", "B"],
                            dtype="int64")

        # iloc/iat raise
        df = df_orig.copy()

        msg = "iloc cannot enlarge its target object"
        with pytest.raises(IndexError, match=msg):
            df.iloc[4, 2] = 5.0

        msg = "index 2 is out of bounds for axis 0 with size 2"
        with pytest.raises(IndexError, match=msg):
            df.iat[4, 2] = 5.0

        # row setting where it exists
        expected = DataFrame(dict({"A": [0, 4, 4], "B": [1, 5, 5]}))
        df = df_orig.copy()
        df.iloc[1] = df.iloc[2]
        tm.assert_frame_equal(df, expected)

        expected = DataFrame(dict({"A": [0, 4, 4], "B": [1, 5, 5]}))
        df = df_orig.copy()
        df.loc[1] = df.loc[2]
        tm.assert_frame_equal(df, expected)

        # like 2578, partial setting with dtype preservation
        expected = DataFrame(dict({"A": [0, 2, 4, 4], "B": [1, 3, 5, 5]}))
        df = df_orig.copy()
        df.loc[3] = df.loc[2]
        tm.assert_frame_equal(df, expected)

        # single dtype frame, overwrite
        expected = DataFrame(dict({"A": [0, 2, 4], "B": [0, 2, 4]}))
        df = df_orig.copy()
        df.loc[:, "B"] = df.loc[:, "A"]
        tm.assert_frame_equal(df, expected)

        # mixed dtype frame, overwrite
        expected = DataFrame(dict({"A": [0, 2, 4], "B": Series([0, 2, 4])}))
        df = df_orig.copy()
        df["B"] = df["B"].astype(np.float64)
        df.loc[:, "B"] = df.loc[:, "A"]
        tm.assert_frame_equal(df, expected)

        # single dtype frame, partial setting
        expected = df_orig.copy()
        expected["C"] = df["A"]
        df = df_orig.copy()
        df.loc[:, "C"] = df.loc[:, "A"]
        tm.assert_frame_equal(df, expected)

        # mixed frame, partial setting
        expected = df_orig.copy()
        expected["C"] = df["A"]
        df = df_orig.copy()
        df.loc[:, "C"] = df.loc[:, "A"]
        tm.assert_frame_equal(df, expected)

    def test_partial_setting2(self):
        # GH 8473
        dates = date_range("1/1/2000", periods=8)
        df_orig = DataFrame(np.random.randn(8, 4),
                            index=dates,
                            columns=["A", "B", "C", "D"])

        expected = pd.concat(
            [df_orig,
             DataFrame({"A": 7}, index=dates[-1:] + dates.freq)],
            sort=True)
        df = df_orig.copy()
        df.loc[dates[-1] + dates.freq, "A"] = 7
        tm.assert_frame_equal(df, expected)
        df = df_orig.copy()
        df.at[dates[-1] + dates.freq, "A"] = 7
        tm.assert_frame_equal(df, expected)

        exp_other = DataFrame({0: 7}, index=dates[-1:] + dates.freq)
        expected = pd.concat([df_orig, exp_other], axis=1)

        df = df_orig.copy()
        df.loc[dates[-1] + dates.freq, 0] = 7
        tm.assert_frame_equal(df, expected)
        df = df_orig.copy()
        df.at[dates[-1] + dates.freq, 0] = 7
        tm.assert_frame_equal(df, expected)

    # TODO(ArrayManager)
    # df.loc[0] = Series(1, index=range(4)) case creates float columns
    # instead of object dtype
    @td.skip_array_manager_not_yet_implemented
    def test_partial_setting_mixed_dtype(self):

        # in a mixed dtype environment, try to preserve dtypes
        # by appending
        df = DataFrame([[True, 1], [False, 2]], columns=["female", "fitness"])

        s = df.loc[1].copy()
        s.name = 2
        expected = df.append(s)

        df.loc[2] = df.loc[1]
        tm.assert_frame_equal(df, expected)

        # columns will align
        df = DataFrame(columns=["A", "B"])
        df.loc[0] = Series(1, index=range(4))
        expected = DataFrame(columns=["A", "B"], index=[0], dtype=np.float64)
        tm.assert_frame_equal(df, expected)

        # columns will align
        # TODO: it isn't great that this behavior depends on consolidation
        df = DataFrame(columns=["A", "B"])._consolidate()
        df.loc[0] = Series(1, index=["B"])

        exp = DataFrame([[np.nan, 1]],
                        columns=["A", "B"],
                        index=[0],
                        dtype="float64")
        tm.assert_frame_equal(df, exp)

        # list-like must conform
        df = DataFrame(columns=["A", "B"])

        msg = "cannot set a row with mismatched columns"
        with pytest.raises(ValueError, match=msg):
            df.loc[0] = [1, 2, 3]

        df = DataFrame(columns=["A", "B"])
        df.loc[3] = [6, 7]

        exp = DataFrame([[6, 7]],
                        index=[3],
                        columns=["A", "B"],
                        dtype=np.int64)
        tm.assert_frame_equal(df, exp)

    def test_series_partial_set(self):
        # partial set with new index
        # Regression from GH4825
        ser = Series([0.1, 0.2], index=[1, 2])

        # loc equiv to .reindex
        expected = Series([np.nan, 0.2, np.nan], index=[3, 2, 3])
        with pytest.raises(KeyError, match=r"not in index"):
            ser.loc[[3, 2, 3]]

        result = ser.reindex([3, 2, 3])
        tm.assert_series_equal(result, expected, check_index_type=True)

        expected = Series([np.nan, 0.2, np.nan, np.nan], index=[3, 2, 3, "x"])
        with pytest.raises(KeyError, match="not in index"):
            ser.loc[[3, 2, 3, "x"]]

        result = ser.reindex([3, 2, 3, "x"])
        tm.assert_series_equal(result, expected, check_index_type=True)

        expected = Series([0.2, 0.2, 0.1], index=[2, 2, 1])
        result = ser.loc[[2, 2, 1]]
        tm.assert_series_equal(result, expected, check_index_type=True)

        expected = Series([0.2, 0.2, np.nan, 0.1], index=[2, 2, "x", 1])
        with pytest.raises(KeyError, match="not in index"):
            ser.loc[[2, 2, "x", 1]]

        result = ser.reindex([2, 2, "x", 1])
        tm.assert_series_equal(result, expected, check_index_type=True)

        # raises as nothing is in the index
        msg = (r"\"None of \[Int64Index\(\[3, 3, 3\], dtype='int64'\)\] are "
               r"in the \[index\]\"")
        with pytest.raises(KeyError, match=msg):
            ser.loc[[3, 3, 3]]

        expected = Series([0.2, 0.2, np.nan], index=[2, 2, 3])
        with pytest.raises(KeyError, match="not in index"):
            ser.loc[[2, 2, 3]]

        result = ser.reindex([2, 2, 3])
        tm.assert_series_equal(result, expected, check_index_type=True)

        s = Series([0.1, 0.2, 0.3], index=[1, 2, 3])
        expected = Series([0.3, np.nan, np.nan], index=[3, 4, 4])
        with pytest.raises(KeyError, match="not in index"):
            s.loc[[3, 4, 4]]

        result = s.reindex([3, 4, 4])
        tm.assert_series_equal(result, expected, check_index_type=True)

        s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4])
        expected = Series([np.nan, 0.3, 0.3], index=[5, 3, 3])
        with pytest.raises(KeyError, match="not in index"):
            s.loc[[5, 3, 3]]

        result = s.reindex([5, 3, 3])
        tm.assert_series_equal(result, expected, check_index_type=True)

        s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4])
        expected = Series([np.nan, 0.4, 0.4], index=[5, 4, 4])
        with pytest.raises(KeyError, match="not in index"):
            s.loc[[5, 4, 4]]

        result = s.reindex([5, 4, 4])
        tm.assert_series_equal(result, expected, check_index_type=True)

        s = Series([0.1, 0.2, 0.3, 0.4], index=[4, 5, 6, 7])
        expected = Series([0.4, np.nan, np.nan], index=[7, 2, 2])
        with pytest.raises(KeyError, match="not in index"):
            s.loc[[7, 2, 2]]

        result = s.reindex([7, 2, 2])
        tm.assert_series_equal(result, expected, check_index_type=True)

        s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4])
        expected = Series([0.4, np.nan, np.nan], index=[4, 5, 5])
        with pytest.raises(KeyError, match="not in index"):
            s.loc[[4, 5, 5]]

        result = s.reindex([4, 5, 5])
        tm.assert_series_equal(result, expected, check_index_type=True)

        # iloc
        expected = Series([0.2, 0.2, 0.1, 0.1], index=[2, 2, 1, 1])
        result = ser.iloc[[1, 1, 0, 0]]
        tm.assert_series_equal(result, expected, check_index_type=True)

    def test_series_partial_set_with_name(self):
        # GH 11497

        idx = Index([1, 2], dtype="int64", name="idx")
        ser = Series([0.1, 0.2], index=idx, name="s")

        # loc
        with pytest.raises(KeyError, match=r"\[3\] not in index"):
            ser.loc[[3, 2, 3]]

        with pytest.raises(KeyError, match=r"not in index"):
            ser.loc[[3, 2, 3, "x"]]

        exp_idx = Index([2, 2, 1], dtype="int64", name="idx")
        expected = Series([0.2, 0.2, 0.1], index=exp_idx, name="s")
        result = ser.loc[[2, 2, 1]]
        tm.assert_series_equal(result, expected, check_index_type=True)

        with pytest.raises(KeyError, match=r"\['x'\] not in index"):
            ser.loc[[2, 2, "x", 1]]

        # raises as nothing is in the index
        msg = (r"\"None of \[Int64Index\(\[3, 3, 3\], dtype='int64', "
               r"name='idx'\)\] are in the \[index\]\"")
        with pytest.raises(KeyError, match=msg):
            ser.loc[[3, 3, 3]]

        with pytest.raises(KeyError, match="not in index"):
            ser.loc[[2, 2, 3]]

        idx = Index([1, 2, 3], dtype="int64", name="idx")
        with pytest.raises(KeyError, match="not in index"):
            Series([0.1, 0.2, 0.3], index=idx, name="s").loc[[3, 4, 4]]

        idx = Index([1, 2, 3, 4], dtype="int64", name="idx")
        with pytest.raises(KeyError, match="not in index"):
            Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[5, 3, 3]]

        idx = Index([1, 2, 3, 4], dtype="int64", name="idx")
        with pytest.raises(KeyError, match="not in index"):
            Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[5, 4, 4]]

        idx = Index([4, 5, 6, 7], dtype="int64", name="idx")
        with pytest.raises(KeyError, match="not in index"):
            Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[7, 2, 2]]

        idx = Index([1, 2, 3, 4], dtype="int64", name="idx")
        with pytest.raises(KeyError, match="not in index"):
            Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[4, 5, 5]]

        # iloc
        exp_idx = Index([2, 2, 1, 1], dtype="int64", name="idx")
        expected = Series([0.2, 0.2, 0.1, 0.1], index=exp_idx, name="s")
        result = ser.iloc[[1, 1, 0, 0]]
        tm.assert_series_equal(result, expected, check_index_type=True)

    @pytest.mark.parametrize("key", [100, 100.0])
    def test_setitem_with_expansion_numeric_into_datetimeindex(self, key):
        # GH#4940 inserting non-strings
        orig = tm.makeTimeDataFrame()
        df = orig.copy()

        df.loc[key, :] = df.iloc[0]
        ex_index = Index(list(orig.index) + [key],
                         dtype=object,
                         name=orig.index.name)
        ex_data = np.concatenate([orig.values, df.iloc[[0]].values], axis=0)
        expected = DataFrame(ex_data, index=ex_index, columns=orig.columns)
        tm.assert_frame_equal(df, expected)

    def test_partial_set_invalid(self):

        # GH 4940
        # allow only setting of 'valid' values

        orig = tm.makeTimeDataFrame()

        # allow object conversion here
        df = orig.copy()
        df.loc["a", :] = df.iloc[0]
        exp = orig.append(Series(df.iloc[0], name="a"))
        tm.assert_frame_equal(df, exp)
        tm.assert_index_equal(df.index, Index(orig.index.tolist() + ["a"]))
        assert df.index.dtype == "object"

    def test_partial_set_empty_frame(self):

        # partially set with an empty object
        # frame
        df = DataFrame()

        msg = "cannot set a frame with no defined columns"

        with pytest.raises(ValueError, match=msg):
            df.loc[1] = 1

        with pytest.raises(ValueError, match=msg):
            df.loc[1] = Series([1], index=["foo"])

        msg = "cannot set a frame with no defined index and a scalar"
        with pytest.raises(ValueError, match=msg):
            df.loc[:, 1] = 1

    def test_partial_set_empty_frame2(self):
        # these work as they don't really change
        # anything but the index
        # GH5632
        expected = DataFrame(columns=["foo"], index=Index([], dtype="object"))

        df = DataFrame(index=Index([], dtype="object"))
        df["foo"] = Series([], dtype="object")

        tm.assert_frame_equal(df, expected)

        df = DataFrame()
        df["foo"] = Series(df.index)

        tm.assert_frame_equal(df, expected)

        df = DataFrame()
        df["foo"] = df.index

        tm.assert_frame_equal(df, expected)

    def test_partial_set_empty_frame3(self):
        expected = DataFrame(columns=["foo"], index=Index([], dtype="int64"))
        expected["foo"] = expected["foo"].astype("float64")

        df = DataFrame(index=Index([], dtype="int64"))
        df["foo"] = []

        tm.assert_frame_equal(df, expected)

        df = DataFrame(index=Index([], dtype="int64"))
        df["foo"] = Series(np.arange(len(df)), dtype="float64")

        tm.assert_frame_equal(df, expected)

    def test_partial_set_empty_frame4(self):
        df = DataFrame(index=Index([], dtype="int64"))
        df["foo"] = range(len(df))

        expected = DataFrame(columns=["foo"], index=Index([], dtype="int64"))
        # range is int-dtype-like, so we get int64 dtype
        expected["foo"] = expected["foo"].astype("int64")
        tm.assert_frame_equal(df, expected)

    def test_partial_set_empty_frame5(self):
        df = DataFrame()
        tm.assert_index_equal(df.columns, Index([], dtype=object))
        df2 = DataFrame()
        df2[1] = Series([1], index=["foo"])
        df.loc[:, 1] = Series([1], index=["foo"])
        tm.assert_frame_equal(df, DataFrame([[1]], index=["foo"], columns=[1]))
        tm.assert_frame_equal(df, df2)

    def test_partial_set_empty_frame_no_index(self):
        # no index to start
        expected = DataFrame({0: Series(1, index=range(4))},
                             columns=["A", "B", 0])

        df = DataFrame(columns=["A", "B"])
        df[0] = Series(1, index=range(4))
        df.dtypes
        str(df)
        tm.assert_frame_equal(df, expected)

        df = DataFrame(columns=["A", "B"])
        df.loc[:, 0] = Series(1, index=range(4))
        df.dtypes
        str(df)
        tm.assert_frame_equal(df, expected)

    def test_partial_set_empty_frame_row(self):
        # GH5720, GH5744
        # don't create rows when empty
        expected = DataFrame(columns=["A", "B", "New"],
                             index=Index([], dtype="int64"))
        expected["A"] = expected["A"].astype("int64")
        expected["B"] = expected["B"].astype("float64")
        expected["New"] = expected["New"].astype("float64")

        df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]})
        y = df[df.A > 5]
        y["New"] = np.nan
        tm.assert_frame_equal(y, expected)
        # tm.assert_frame_equal(y,expected)

        expected = DataFrame(columns=["a", "b", "c c", "d"])
        expected["d"] = expected["d"].astype("int64")
        df = DataFrame(columns=["a", "b", "c c"])
        df["d"] = 3
        tm.assert_frame_equal(df, expected)
        tm.assert_series_equal(df["c c"], Series(name="c c", dtype=object))

        # reindex columns is ok
        df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]})
        y = df[df.A > 5]
        result = y.reindex(columns=["A", "B", "C"])
        expected = DataFrame(columns=["A", "B", "C"],
                             index=Index([], dtype="int64"))
        expected["A"] = expected["A"].astype("int64")
        expected["B"] = expected["B"].astype("float64")
        expected["C"] = expected["C"].astype("float64")
        tm.assert_frame_equal(result, expected)

    def test_partial_set_empty_frame_set_series(self):
        # GH 5756
        # setting with empty Series
        df = DataFrame(Series(dtype=object))
        expected = DataFrame({0: Series(dtype=object)})
        tm.assert_frame_equal(df, expected)

        df = DataFrame(Series(name="foo", dtype=object))
        expected = DataFrame({"foo": Series(dtype=object)})
        tm.assert_frame_equal(df, expected)

    def test_partial_set_empty_frame_empty_copy_assignment(self):
        # GH 5932
        # copy on empty with assignment fails
        df = DataFrame(index=[0])
        df = df.copy()
        df["a"] = 0
        expected = DataFrame(0, index=[0], columns=["a"])
        tm.assert_frame_equal(df, expected)

    def test_partial_set_empty_frame_empty_consistencies(self):
        # GH 6171
        # consistency on empty frames
        df = DataFrame(columns=["x", "y"])
        df["x"] = [1, 2]
        expected = DataFrame({"x": [1, 2], "y": [np.nan, np.nan]})
        tm.assert_frame_equal(df, expected, check_dtype=False)

        df = DataFrame(columns=["x", "y"])
        df["x"] = ["1", "2"]
        expected = DataFrame({
            "x": ["1", "2"],
            "y": [np.nan, np.nan]
        },
                             dtype=object)
        tm.assert_frame_equal(df, expected)

        df = DataFrame(columns=["x", "y"])
        df.loc[0, "x"] = 1
        expected = DataFrame({"x": [1], "y": [np.nan]})
        tm.assert_frame_equal(df, expected, check_dtype=False)

    @pytest.mark.parametrize(
        "idx,labels,expected_idx",
        [
            (
                period_range(start="2000", periods=20, freq="D"),
                ["2000-01-04", "2000-01-08", "2000-01-12"],
                [
                    Period("2000-01-04", freq="D"),
                    Period("2000-01-08", freq="D"),
                    Period("2000-01-12", freq="D"),
                ],
            ),
            (
                date_range(start="2000", periods=20, freq="D"),
                ["2000-01-04", "2000-01-08", "2000-01-12"],
                [
                    Timestamp("2000-01-04"),
                    Timestamp("2000-01-08"),
                    Timestamp("2000-01-12"),
                ],
            ),
            (
                pd.timedelta_range(start="1 day", periods=20),
                ["4D", "8D", "12D"],
                [
                    pd.Timedelta("4 day"),
                    pd.Timedelta("8 day"),
                    pd.Timedelta("12 day")
                ],
            ),
        ],
    )
    def test_loc_with_list_of_strings_representing_datetimes(
            self, idx, labels, expected_idx, frame_or_series):
        # GH 11278
        obj = frame_or_series(range(20), index=idx)

        expected_value = [3, 7, 11]
        expected = frame_or_series(expected_value, expected_idx)

        tm.assert_equal(expected, obj.loc[labels])
        if frame_or_series is Series:
            tm.assert_series_equal(expected, obj[labels])

    @pytest.mark.parametrize(
        "idx,labels",
        [
            (
                period_range(start="2000", periods=20, freq="D"),
                ["2000-01-04", "2000-01-30"],
            ),
            (
                date_range(start="2000", periods=20, freq="D"),
                ["2000-01-04", "2000-01-30"],
            ),
            (pd.timedelta_range(start="1 day",
                                periods=20), ["3 day", "30 day"]),
        ],
    )
    def test_loc_with_list_of_strings_representing_datetimes_missing_value(
            self, idx, labels):
        # GH 11278
        s = Series(range(20), index=idx)
        df = DataFrame(range(20), index=idx)
        msg = r"not in index"

        with pytest.raises(KeyError, match=msg):
            s.loc[labels]
        with pytest.raises(KeyError, match=msg):
            s[labels]
        with pytest.raises(KeyError, match=msg):
            df.loc[labels]

    @pytest.mark.parametrize(
        "idx,labels,msg",
        [
            (
                period_range(start="2000", periods=20, freq="D"),
                ["4D", "8D"],
                (r"None of \[Index\(\['4D', '8D'\], dtype='object'\)\] "
                 r"are in the \[index\]"),
            ),
            (
                date_range(start="2000", periods=20, freq="D"),
                ["4D", "8D"],
                (r"None of \[Index\(\['4D', '8D'\], dtype='object'\)\] "
                 r"are in the \[index\]"),
            ),
            (
                pd.timedelta_range(start="1 day", periods=20),
                ["2000-01-04", "2000-01-08"],
                (r"None of \[Index\(\['2000-01-04', '2000-01-08'\], "
                 r"dtype='object'\)\] are in the \[index\]"),
            ),
        ],
    )
    def test_loc_with_list_of_strings_representing_datetimes_not_matched_type(
            self, idx, labels, msg):
        # GH 11278
        s = Series(range(20), index=idx)
        df = DataFrame(range(20), index=idx)

        with pytest.raises(KeyError, match=msg):
            s.loc[labels]
        with pytest.raises(KeyError, match=msg):
            s[labels]
        with pytest.raises(KeyError, match=msg):
            df.loc[labels]

    def test_index_name_empty(self):
        # GH 31368
        df = DataFrame({}, index=pd.RangeIndex(0, name="df_index"))
        series = Series(1.23, index=pd.RangeIndex(4, name="series_index"))

        df["series"] = series
        expected = DataFrame({"series": [1.23] * 4},
                             index=pd.RangeIndex(4, name="df_index"))

        tm.assert_frame_equal(df, expected)

        # GH 36527
        df = DataFrame()
        series = Series(1.23, index=pd.RangeIndex(4, name="series_index"))
        df["series"] = series
        expected = DataFrame({"series": [1.23] * 4},
                             index=pd.RangeIndex(4, name="series_index"))
        tm.assert_frame_equal(df, expected)

    def test_slice_irregular_datetime_index_with_nan(self):
        # GH36953
        index = pd.to_datetime(
            ["2012-01-01", "2012-01-02", "2012-01-03", None])
        df = DataFrame(range(len(index)), index=index)
        expected = DataFrame(range(len(index[:3])), index=index[:3])
        result = df["2012-01-01":"2012-01-04"]
        tm.assert_frame_equal(result, expected)
Пример #50
0
def test_count_nonnumeric_types():
    # GH12541
    cols = [
        "int",
        "float",
        "string",
        "datetime",
        "timedelta",
        "periods",
        "fl_inf",
        "fl_nan",
        "str_nan",
        "dt_nat",
        "periods_nat",
    ]
    dt_nat_col = [
        Timestamp("20170101"),
        Timestamp("20170203"),
        Timestamp(None)
    ]

    df = DataFrame(
        {
            "int": [1, 2, 3],
            "float": [4.0, 5.0, 6.0],
            "string": list("abc"),
            "datetime": date_range("20170101", periods=3),
            "timedelta": timedelta_range("1 s", periods=3, freq="s"),
            "periods": [
                Period("2012-01"),
                Period("2012-02"),
                Period("2012-03"),
            ],
            "fl_inf": [1.0, 2.0, np.Inf],
            "fl_nan": [1.0, 2.0, np.NaN],
            "str_nan": ["aa", "bb", np.NaN],
            "dt_nat": dt_nat_col,
            "periods_nat": [
                Period("2012-01"),
                Period("2012-02"),
                Period(None),
            ],
        },
        columns=cols,
    )

    expected = DataFrame(
        {
            "int": [1.0, 2.0, 2.0],
            "float": [1.0, 2.0, 2.0],
            "string": [1.0, 2.0, 2.0],
            "datetime": [1.0, 2.0, 2.0],
            "timedelta": [1.0, 2.0, 2.0],
            "periods": [1.0, 2.0, 2.0],
            "fl_inf": [1.0, 2.0, 2.0],
            "fl_nan": [1.0, 2.0, 1.0],
            "str_nan": [1.0, 2.0, 1.0],
            "dt_nat": [1.0, 2.0, 1.0],
            "periods_nat": [1.0, 2.0, 1.0],
        },
        columns=cols,
    )

    result = df.rolling(window=2, min_periods=0).count()
    tm.assert_frame_equal(result, expected)

    result = df.rolling(1, min_periods=0).count()
    expected = df.notna().astype(float)
    tm.assert_frame_equal(result, expected)
Пример #51
0
class Base:
    """
    Common tests for all variations of IntervalIndex construction. Input data
    to be supplied in breaks format, then converted by the subclass method
    get_kwargs_from_breaks to the expected format.
    """
    @pytest.mark.parametrize(
        "breaks",
        [
            [3, 14, 15, 92, 653],
            np.arange(10, dtype="int64"),
            Int64Index(range(-10, 11)),
            Float64Index(np.arange(20, 30, 0.5)),
            date_range("20180101", periods=10),
            date_range("20180101", periods=10, tz="US/Eastern"),
            timedelta_range("1 day", periods=10),
        ],
    )
    def test_constructor(self, constructor, breaks, closed, name):
        result_kwargs = self.get_kwargs_from_breaks(breaks, closed)
        result = constructor(closed=closed, name=name, **result_kwargs)

        assert result.closed == closed
        assert result.name == name
        assert result.dtype.subtype == getattr(breaks, "dtype", "int64")
        tm.assert_index_equal(result.left, Index(breaks[:-1]))
        tm.assert_index_equal(result.right, Index(breaks[1:]))

    @pytest.mark.parametrize(
        "breaks, subtype",
        [
            (Int64Index([0, 1, 2, 3, 4]), "float64"),
            (Int64Index([0, 1, 2, 3, 4]), "datetime64[ns]"),
            (Int64Index([0, 1, 2, 3, 4]), "timedelta64[ns]"),
            (Float64Index([0, 1, 2, 3, 4]), "int64"),
            (date_range("2017-01-01", periods=5), "int64"),
            (timedelta_range("1 day", periods=5), "int64"),
        ],
    )
    def test_constructor_dtype(self, constructor, breaks, subtype):
        # GH 19262: conversion via dtype parameter
        expected_kwargs = self.get_kwargs_from_breaks(breaks.astype(subtype))
        expected = constructor(**expected_kwargs)

        result_kwargs = self.get_kwargs_from_breaks(breaks)
        iv_dtype = IntervalDtype(subtype)
        for dtype in (iv_dtype, str(iv_dtype)):
            result = constructor(dtype=dtype, **result_kwargs)
            tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize("breaks",
                             [[np.nan] * 2, [np.nan] * 4, [np.nan] * 50])
    def test_constructor_nan(self, constructor, breaks, closed):
        # GH 18421
        result_kwargs = self.get_kwargs_from_breaks(breaks)
        result = constructor(closed=closed, **result_kwargs)

        expected_subtype = np.float64
        expected_values = np.array(breaks[:-1], dtype=object)

        assert result.closed == closed
        assert result.dtype.subtype == expected_subtype
        tm.assert_numpy_array_equal(result._ndarray_values, expected_values)

    @pytest.mark.parametrize(
        "breaks",
        [
            [],
            np.array([], dtype="int64"),
            np.array([], dtype="float64"),
            np.array([], dtype="datetime64[ns]"),
            np.array([], dtype="timedelta64[ns]"),
        ],
    )
    def test_constructor_empty(self, constructor, breaks, closed):
        # GH 18421
        result_kwargs = self.get_kwargs_from_breaks(breaks)
        result = constructor(closed=closed, **result_kwargs)

        expected_values = np.array([], dtype=object)
        expected_subtype = getattr(breaks, "dtype", np.int64)

        assert result.empty
        assert result.closed == closed
        assert result.dtype.subtype == expected_subtype
        tm.assert_numpy_array_equal(result._ndarray_values, expected_values)

    @pytest.mark.parametrize(
        "breaks",
        [
            tuple("0123456789"),
            list("abcdefghij"),
            np.array(list("abcdefghij"), dtype=object),
            np.array(list("abcdefghij"), dtype="<U1"),
        ],
    )
    def test_constructor_string(self, constructor, breaks):
        # GH 19016
        msg = ("category, object, and string subtypes are not supported "
               "for IntervalIndex")
        with pytest.raises(TypeError, match=msg):
            constructor(**self.get_kwargs_from_breaks(breaks))

    @pytest.mark.parametrize("cat_constructor",
                             [Categorical, CategoricalIndex])
    def test_constructor_categorical_valid(self, constructor, cat_constructor):
        # GH 21243/21253
        if isinstance(constructor, partial) and constructor.func is Index:
            # Index is defined to create CategoricalIndex from categorical data
            pytest.skip()

        breaks = np.arange(10, dtype="int64")
        expected = IntervalIndex.from_breaks(breaks)

        cat_breaks = cat_constructor(breaks)
        result_kwargs = self.get_kwargs_from_breaks(cat_breaks)
        result = constructor(**result_kwargs)
        tm.assert_index_equal(result, expected)

    def test_generic_errors(self, constructor):
        # filler input data to be used when supplying invalid kwargs
        filler = self.get_kwargs_from_breaks(range(10))

        # invalid closed
        msg = "invalid option for 'closed': invalid"
        with pytest.raises(ValueError, match=msg):
            constructor(closed="invalid", **filler)

        # unsupported dtype
        msg = "dtype must be an IntervalDtype, got int64"
        with pytest.raises(TypeError, match=msg):
            constructor(dtype="int64", **filler)

        # invalid dtype
        msg = "data type [\"']invalid[\"'] not understood"
        with pytest.raises(TypeError, match=msg):
            constructor(dtype="invalid", **filler)

        # no point in nesting periods in an IntervalIndex
        periods = period_range("2000-01-01", periods=10)
        periods_kwargs = self.get_kwargs_from_breaks(periods)
        msg = "Period dtypes are not supported, use a PeriodIndex instead"
        with pytest.raises(ValueError, match=msg):
            constructor(**periods_kwargs)

        # decreasing values
        decreasing_kwargs = self.get_kwargs_from_breaks(range(10, -1, -1))
        msg = "left side of interval must be <= right side"
        with pytest.raises(ValueError, match=msg):
            constructor(**decreasing_kwargs)
Пример #52
0
    def test_dt_accessor_api_for_categorical(self):
        # https://github.com/pandas-dev/pandas/issues/10661

        s_dr = Series(date_range("1/1/2015", periods=5, tz="MET"))
        c_dr = s_dr.astype("category")

        s_pr = Series(period_range("1/1/2015", freq="D", periods=5))
        c_pr = s_pr.astype("category")

        s_tdr = Series(timedelta_range("1 days", "10 days"))
        c_tdr = s_tdr.astype("category")

        # only testing field (like .day)
        # and bool (is_month_start)
        get_ops = lambda x: x._datetimelike_ops

        test_data = [
            ("Datetime", get_ops(DatetimeIndex), s_dr, c_dr),
            ("Period", get_ops(PeriodArray), s_pr, c_pr),
            ("Timedelta", get_ops(TimedeltaIndex), s_tdr, c_tdr),
        ]

        assert isinstance(c_dr.dt, Properties)

        special_func_defs = [
            ("strftime", ("%Y-%m-%d",), {}),
            ("tz_convert", ("EST",), {}),
            ("round", ("D",), {}),
            ("floor", ("D",), {}),
            ("ceil", ("D",), {}),
            ("asfreq", ("D",), {}),
            # FIXME: don't leave commented-out
            # ('tz_localize', ("UTC",), {}),
        ]
        _special_func_names = [f[0] for f in special_func_defs]

        # the series is already localized
        _ignore_names = ["tz_localize", "components"]

        for name, attr_names, s, c in test_data:
            func_names = [
                f
                for f in dir(s.dt)
                if not (
                    f.startswith("_")
                    or f in attr_names
                    or f in _special_func_names
                    or f in _ignore_names
                )
            ]

            func_defs = [(f, (), {}) for f in func_names]
            for f_def in special_func_defs:
                if f_def[0] in dir(s.dt):
                    func_defs.append(f_def)

            for func, args, kwargs in func_defs:
                with warnings.catch_warnings():
                    if func == "to_period":
                        # dropping TZ
                        warnings.simplefilter("ignore", UserWarning)
                    res = getattr(c.dt, func)(*args, **kwargs)
                    exp = getattr(s.dt, func)(*args, **kwargs)

                tm.assert_equal(res, exp)

            for attr in attr_names:
                if attr in ["week", "weekofyear"]:
                    # GH#33595 Deprecate week and weekofyear
                    continue
                res = getattr(c.dt, attr)
                exp = getattr(s.dt, attr)

            if isinstance(res, DataFrame):
                tm.assert_frame_equal(res, exp)
            elif isinstance(res, Series):
                tm.assert_series_equal(res, exp)
            else:
                tm.assert_almost_equal(res, exp)

        invalid = Series([1, 2, 3]).astype("category")
        msg = "Can only use .dt accessor with datetimelike"

        with pytest.raises(AttributeError, match=msg):
            invalid.dt
        assert not hasattr(invalid, "str")
Пример #53
0
)
def test_invalid_nat_setitem_array(arr, non_casting_nats):
    msg = (
        "value should be a '(Timestamp|Timedelta|Period)', 'NaT', or array of those. "
        "Got '(timedelta64|datetime64|int)' instead.")

    for nat in non_casting_nats:
        with pytest.raises(TypeError, match=msg):
            arr[0] = nat


@pytest.mark.parametrize(
    "arr",
    [
        pd.date_range("2000", periods=4).array,
        pd.timedelta_range("2000", periods=4).array,
    ],
)
def test_to_numpy_extra(arr):
    arr[0] = NaT
    original = arr.copy()

    result = arr.to_numpy()
    assert np.isnan(result[0])

    result = arr.to_numpy(dtype="int64")
    assert result[0] == -9223372036854775808

    result = arr.to_numpy(dtype="int64", na_value=0)
    assert result[0] == 0
Пример #54
0
class TestSeriesConstructors(TestData):
    def test_invalid_dtype(self):
        # GH15520
        msg = 'not understood'
        invalid_list = [pd.Timestamp, 'pd.Timestamp', list]
        for dtype in invalid_list:
            with tm.assert_raises_regex(TypeError, msg):
                Series([], name='time', dtype=dtype)

    def test_scalar_conversion(self):

        # Pass in scalar is disabled
        scalar = Series(0.5)
        assert not isinstance(scalar, float)

        # Coercion
        assert float(Series([1.])) == 1.0
        assert int(Series([1.])) == 1
        assert long(Series([1.])) == 1

    def test_constructor(self):
        assert self.ts.index.is_all_dates

        # Pass in Series
        derived = Series(self.ts)
        assert derived.index.is_all_dates

        assert tm.equalContents(derived.index, self.ts.index)
        # Ensure new index is not created
        assert id(self.ts.index) == id(derived.index)

        # Mixed type Series
        mixed = Series(['hello', np.NaN], index=[0, 1])
        assert mixed.dtype == np.object_
        assert mixed[1] is np.NaN

        assert not self.empty.index.is_all_dates
        assert not Series({}).index.is_all_dates
        pytest.raises(Exception,
                      Series,
                      np.random.randn(3, 3),
                      index=np.arange(3))

        mixed.name = 'Series'
        rs = Series(mixed).name
        xp = 'Series'
        assert rs == xp

        # raise on MultiIndex GH4187
        m = MultiIndex.from_arrays([[1, 2], [3, 4]])
        pytest.raises(NotImplementedError, Series, m)

    @pytest.mark.parametrize('input_class', [list, dict, OrderedDict])
    def test_constructor_empty(self, input_class):
        empty = Series()
        empty2 = Series(input_class())

        # these are Index() and RangeIndex() which don't compare type equal
        # but are just .equals
        assert_series_equal(empty, empty2, check_index_type=False)

        # With explicit dtype:
        empty = Series(dtype='float64')
        empty2 = Series(input_class(), dtype='float64')
        assert_series_equal(empty, empty2, check_index_type=False)

        # GH 18515 : with dtype=category:
        empty = Series(dtype='category')
        empty2 = Series(input_class(), dtype='category')
        assert_series_equal(empty, empty2, check_index_type=False)

        if input_class is not list:
            # With index:
            empty = Series(index=lrange(10))
            empty2 = Series(input_class(), index=lrange(10))
            assert_series_equal(empty, empty2)

            # With index and dtype float64:
            empty = Series(np.nan, index=lrange(10))
            empty2 = Series(input_class(), index=lrange(10), dtype='float64')
            assert_series_equal(empty, empty2)

            # GH 19853 : with empty string, index and dtype str
            empty = Series('', dtype=str, index=range(3))
            empty2 = Series('', index=range(3))
            assert_series_equal(empty, empty2)

    @pytest.mark.parametrize('input_arg', [np.nan, float('nan')])
    def test_constructor_nan(self, input_arg):
        empty = Series(dtype='float64', index=lrange(10))
        empty2 = Series(input_arg, index=lrange(10))

        assert_series_equal(empty, empty2, check_index_type=False)

    @pytest.mark.parametrize('dtype', [
        'f8',
        'i8',
        'M8[ns]',
        'm8[ns]',
        'category',
        'object',
        'datetime64[ns, UTC]',
    ])
    @pytest.mark.parametrize('index', [None, pd.Index([])])
    def test_constructor_dtype_only(self, dtype, index):
        # GH-20865
        result = pd.Series(dtype=dtype, index=index)
        assert result.dtype == dtype
        assert len(result) == 0

    def test_constructor_no_data_index_order(self):
        result = pd.Series(index=['b', 'a', 'c'])
        assert result.index.tolist() == ['b', 'a', 'c']

    def test_constructor_dtype_str_na_values(self, string_dtype):
        # https://github.com/pandas-dev/pandas/issues/21083
        ser = Series(['x', None], dtype=string_dtype)
        result = ser.isna()
        expected = Series([False, True])
        tm.assert_series_equal(result, expected)
        assert ser.iloc[1] is None

        ser = Series(['x', np.nan], dtype=string_dtype)
        assert np.isnan(ser.iloc[1])

    def test_constructor_series(self):
        index1 = ['d', 'b', 'a', 'c']
        index2 = sorted(index1)
        s1 = Series([4, 7, -5, 3], index=index1)
        s2 = Series(s1, index=index2)

        assert_series_equal(s2, s1.sort_index())

    def test_constructor_iterator(self):

        expected = Series(list(range(10)), dtype='int64')
        result = Series(range(10), dtype='int64')
        assert_series_equal(result, expected)

    def test_constructor_list_like(self):

        # make sure that we are coercing different
        # list-likes to standard dtypes and not
        # platform specific
        expected = Series([1, 2, 3], dtype='int64')
        for obj in [[1, 2, 3], (1, 2, 3), np.array([1, 2, 3], dtype='int64')]:
            result = Series(obj, index=[0, 1, 2])
            assert_series_equal(result, expected)

    @pytest.mark.parametrize('input_vals', [
        ([1, 2]),
        (['1', '2']),
        (list(pd.date_range('1/1/2011', periods=2, freq='H'))),
        (list(pd.date_range('1/1/2011', periods=2, freq='H',
                            tz='US/Eastern'))),
        ([pd.Interval(left=0, right=5)]),
    ])
    def test_constructor_list_str(self, input_vals, string_dtype):
        # GH 16605
        # Ensure that data elements from a list are converted to strings
        # when dtype is str, 'str', or 'U'
        result = Series(input_vals, dtype=string_dtype)
        expected = Series(input_vals).astype(string_dtype)
        assert_series_equal(result, expected)

    def test_constructor_list_str_na(self, string_dtype):
        result = Series([1.0, 2.0, np.nan], dtype=string_dtype)
        expected = Series(['1.0', '2.0', np.nan], dtype=object)
        assert_series_equal(result, expected)
        assert np.isnan(result[2])

    def test_constructor_generator(self):
        gen = (i for i in range(10))

        result = Series(gen)
        exp = Series(lrange(10))
        assert_series_equal(result, exp)

        gen = (i for i in range(10))
        result = Series(gen, index=lrange(10, 20))
        exp.index = lrange(10, 20)
        assert_series_equal(result, exp)

    def test_constructor_map(self):
        # GH8909
        m = map(lambda x: x, range(10))

        result = Series(m)
        exp = Series(lrange(10))
        assert_series_equal(result, exp)

        m = map(lambda x: x, range(10))
        result = Series(m, index=lrange(10, 20))
        exp.index = lrange(10, 20)
        assert_series_equal(result, exp)

    def test_constructor_categorical(self):
        cat = pd.Categorical([0, 1, 2, 0, 1, 2], ['a', 'b', 'c'],
                             fastpath=True)
        res = Series(cat)
        tm.assert_categorical_equal(res.values, cat)

        # GH12574
        pytest.raises(ValueError,
                      lambda: Series(pd.Categorical([1, 2, 3]), dtype='int64'))
        cat = Series(pd.Categorical([1, 2, 3]), dtype='category')
        assert is_categorical_dtype(cat)
        assert is_categorical_dtype(cat.dtype)
        s = Series([1, 2, 3], dtype='category')
        assert is_categorical_dtype(s)
        assert is_categorical_dtype(s.dtype)

    def test_constructor_categorical_with_coercion(self):
        factor = Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'])
        # test basic creation / coercion of categoricals
        s = Series(factor, name='A')
        assert s.dtype == 'category'
        assert len(s) == len(factor)
        str(s.values)
        str(s)

        # in a frame
        df = DataFrame({'A': factor})
        result = df['A']
        tm.assert_series_equal(result, s)
        result = df.iloc[:, 0]
        tm.assert_series_equal(result, s)
        assert len(df) == len(factor)
        str(df.values)
        str(df)

        df = DataFrame({'A': s})
        result = df['A']
        tm.assert_series_equal(result, s)
        assert len(df) == len(factor)
        str(df.values)
        str(df)

        # multiples
        df = DataFrame({'A': s, 'B': s, 'C': 1})
        result1 = df['A']
        result2 = df['B']
        tm.assert_series_equal(result1, s)
        tm.assert_series_equal(result2, s, check_names=False)
        assert result2.name == 'B'
        assert len(df) == len(factor)
        str(df.values)
        str(df)

        # GH8623
        x = DataFrame(
            [[1, 'John P. Doe'], [2, 'Jane Dove'], [1, 'John P. Doe']],
            columns=['person_id', 'person_name'])
        x['person_name'] = Categorical(
            x.person_name)  # doing this breaks transform

        expected = x.iloc[0].person_name
        result = x.person_name.iloc[0]
        assert result == expected

        result = x.person_name[0]
        assert result == expected

        result = x.person_name.loc[0]
        assert result == expected

    def test_constructor_categorical_dtype(self):
        result = pd.Series(['a', 'b'],
                           dtype=CategoricalDtype(['a', 'b', 'c'],
                                                  ordered=True))
        assert is_categorical_dtype(result) is True
        tm.assert_index_equal(result.cat.categories, pd.Index(['a', 'b', 'c']))
        assert result.cat.ordered

        result = pd.Series(['a', 'b'], dtype=CategoricalDtype(['b', 'a']))
        assert is_categorical_dtype(result)
        tm.assert_index_equal(result.cat.categories, pd.Index(['b', 'a']))
        assert result.cat.ordered is False

        # GH 19565 - Check broadcasting of scalar with Categorical dtype
        result = Series('a',
                        index=[0, 1],
                        dtype=CategoricalDtype(['a', 'b'], ordered=True))
        expected = Series(['a', 'a'],
                          index=[0, 1],
                          dtype=CategoricalDtype(['a', 'b'], ordered=True))
        tm.assert_series_equal(result, expected, check_categorical=True)

    def test_categorical_sideeffects_free(self):
        # Passing a categorical to a Series and then changing values in either
        # the series or the categorical should not change the values in the
        # other one, IF you specify copy!
        cat = Categorical(["a", "b", "c", "a"])
        s = Series(cat, copy=True)
        assert s.cat is not cat
        s.cat.categories = [1, 2, 3]
        exp_s = np.array([1, 2, 3, 1], dtype=np.int64)
        exp_cat = np.array(["a", "b", "c", "a"], dtype=np.object_)
        tm.assert_numpy_array_equal(s.__array__(), exp_s)
        tm.assert_numpy_array_equal(cat.__array__(), exp_cat)

        # setting
        s[0] = 2
        exp_s2 = np.array([2, 2, 3, 1], dtype=np.int64)
        tm.assert_numpy_array_equal(s.__array__(), exp_s2)
        tm.assert_numpy_array_equal(cat.__array__(), exp_cat)

        # however, copy is False by default
        # so this WILL change values
        cat = Categorical(["a", "b", "c", "a"])
        s = Series(cat)
        assert s.values is cat
        s.cat.categories = [1, 2, 3]
        exp_s = np.array([1, 2, 3, 1], dtype=np.int64)
        tm.assert_numpy_array_equal(s.__array__(), exp_s)
        tm.assert_numpy_array_equal(cat.__array__(), exp_s)

        s[0] = 2
        exp_s2 = np.array([2, 2, 3, 1], dtype=np.int64)
        tm.assert_numpy_array_equal(s.__array__(), exp_s2)
        tm.assert_numpy_array_equal(cat.__array__(), exp_s2)

    def test_unordered_compare_equal(self):
        left = pd.Series(['a', 'b', 'c'], dtype=CategoricalDtype(['a', 'b']))
        right = pd.Series(
            pd.Categorical(['a', 'b', np.nan], categories=['a', 'b']))
        tm.assert_series_equal(left, right)

    def test_constructor_maskedarray(self):
        data = ma.masked_all((3, ), dtype=float)
        result = Series(data)
        expected = Series([nan, nan, nan])
        assert_series_equal(result, expected)

        data[0] = 0.0
        data[2] = 2.0
        index = ['a', 'b', 'c']
        result = Series(data, index=index)
        expected = Series([0.0, nan, 2.0], index=index)
        assert_series_equal(result, expected)

        data[1] = 1.0
        result = Series(data, index=index)
        expected = Series([0.0, 1.0, 2.0], index=index)
        assert_series_equal(result, expected)

        data = ma.masked_all((3, ), dtype=int)
        result = Series(data)
        expected = Series([nan, nan, nan], dtype=float)
        assert_series_equal(result, expected)

        data[0] = 0
        data[2] = 2
        index = ['a', 'b', 'c']
        result = Series(data, index=index)
        expected = Series([0, nan, 2], index=index, dtype=float)
        assert_series_equal(result, expected)

        data[1] = 1
        result = Series(data, index=index)
        expected = Series([0, 1, 2], index=index, dtype=int)
        assert_series_equal(result, expected)

        data = ma.masked_all((3, ), dtype=bool)
        result = Series(data)
        expected = Series([nan, nan, nan], dtype=object)
        assert_series_equal(result, expected)

        data[0] = True
        data[2] = False
        index = ['a', 'b', 'c']
        result = Series(data, index=index)
        expected = Series([True, nan, False], index=index, dtype=object)
        assert_series_equal(result, expected)

        data[1] = True
        result = Series(data, index=index)
        expected = Series([True, True, False], index=index, dtype=bool)
        assert_series_equal(result, expected)

        data = ma.masked_all((3, ), dtype='M8[ns]')
        result = Series(data)
        expected = Series([iNaT, iNaT, iNaT], dtype='M8[ns]')
        assert_series_equal(result, expected)

        data[0] = datetime(2001, 1, 1)
        data[2] = datetime(2001, 1, 3)
        index = ['a', 'b', 'c']
        result = Series(data, index=index)
        expected = Series([datetime(2001, 1, 1), iNaT,
                           datetime(2001, 1, 3)],
                          index=index,
                          dtype='M8[ns]')
        assert_series_equal(result, expected)

        data[1] = datetime(2001, 1, 2)
        result = Series(data, index=index)
        expected = Series(
            [datetime(2001, 1, 1),
             datetime(2001, 1, 2),
             datetime(2001, 1, 3)],
            index=index,
            dtype='M8[ns]')
        assert_series_equal(result, expected)

    def test_series_ctor_plus_datetimeindex(self):
        rng = date_range('20090415', '20090519', freq='B')
        data = {k: 1 for k in rng}

        result = Series(data, index=rng)
        assert result.index is rng

    def test_constructor_default_index(self):
        s = Series([0, 1, 2])
        tm.assert_index_equal(s.index, pd.Index(np.arange(3)))

    @pytest.mark.parametrize('input', [[1, 2, 3], (1, 2, 3),
                                       list(range(3)),
                                       pd.Categorical(['a', 'b', 'a']),
                                       (i for i in range(3)),
                                       map(lambda x: x, range(3))])
    def test_constructor_index_mismatch(self, input):
        # GH 19342
        # test that construction of a Series with an index of different length
        # raises an error
        msg = 'Length of passed values is 3, index implies 4'
        with pytest.raises(ValueError, message=msg):
            Series(input, index=np.arange(4))

    def test_constructor_numpy_scalar(self):
        # GH 19342
        # construction with a numpy scalar
        # should not raise
        result = Series(np.array(100), index=np.arange(4), dtype='int64')
        expected = Series(100, index=np.arange(4), dtype='int64')
        tm.assert_series_equal(result, expected)

    def test_constructor_broadcast_list(self):
        # GH 19342
        # construction with single-element container and index
        # should raise
        pytest.raises(ValueError, Series, ['foo'], index=['a', 'b', 'c'])

    def test_constructor_corner(self):
        df = tm.makeTimeDataFrame()
        objs = [df, df]
        s = Series(objs, index=[0, 1])
        assert isinstance(s, Series)

    def test_constructor_sanitize(self):
        s = Series(np.array([1., 1., 8.]), dtype='i8')
        assert s.dtype == np.dtype('i8')

        s = Series(np.array([1., 1., np.nan]), copy=True, dtype='i8')
        assert s.dtype == np.dtype('f8')

    def test_constructor_copy(self):
        # GH15125
        # test dtype parameter has no side effects on copy=True
        for data in [[1.], np.array([1.])]:
            x = Series(data)
            y = pd.Series(x, copy=True, dtype=float)

            # copy=True maintains original data in Series
            tm.assert_series_equal(x, y)

            # changes to origin of copy does not affect the copy
            x[0] = 2.
            assert not x.equals(y)
            assert x[0] == 2.
            assert y[0] == 1.

    @pytest.mark.parametrize("index", [
        pd.date_range('20170101', periods=3, tz='US/Eastern'),
        pd.date_range('20170101', periods=3),
        pd.timedelta_range('1 day', periods=3),
        pd.period_range('2012Q1', periods=3, freq='Q'),
        pd.Index(list('abc')),
        pd.Int64Index([1, 2, 3]),
        pd.RangeIndex(0, 3)
    ],
                             ids=lambda x: type(x).__name__)
    def test_constructor_limit_copies(self, index):
        # GH 17449
        # limit copies of input
        s = pd.Series(index)

        # we make 1 copy; this is just a smoke test here
        assert s._data.blocks[0].values is not index

    def test_constructor_pass_none(self):
        s = Series(None, index=lrange(5))
        assert s.dtype == np.float64

        s = Series(None, index=lrange(5), dtype=object)
        assert s.dtype == np.object_

        # GH 7431
        # inference on the index
        s = Series(index=np.array([None]))
        expected = Series(index=Index([None]))
        assert_series_equal(s, expected)

    def test_constructor_pass_nan_nat(self):
        # GH 13467
        exp = Series([np.nan, np.nan], dtype=np.float64)
        assert exp.dtype == np.float64
        tm.assert_series_equal(Series([np.nan, np.nan]), exp)
        tm.assert_series_equal(Series(np.array([np.nan, np.nan])), exp)

        exp = Series([pd.NaT, pd.NaT])
        assert exp.dtype == 'datetime64[ns]'
        tm.assert_series_equal(Series([pd.NaT, pd.NaT]), exp)
        tm.assert_series_equal(Series(np.array([pd.NaT, pd.NaT])), exp)

        tm.assert_series_equal(Series([pd.NaT, np.nan]), exp)
        tm.assert_series_equal(Series(np.array([pd.NaT, np.nan])), exp)

        tm.assert_series_equal(Series([np.nan, pd.NaT]), exp)
        tm.assert_series_equal(Series(np.array([np.nan, pd.NaT])), exp)

    def test_constructor_cast(self):
        pytest.raises(ValueError, Series, ['a', 'b', 'c'], dtype=float)

    def test_constructor_dtype_nocast(self):
        # 1572
        s = Series([1, 2, 3])

        s2 = Series(s, dtype=np.int64)

        s2[1] = 5
        assert s[1] == 5

    def test_constructor_datelike_coercion(self):

        # GH 9477
        # incorrectly inferring on dateimelike looking when object dtype is
        # specified
        s = Series([Timestamp('20130101'), 'NOV'], dtype=object)
        assert s.iloc[0] == Timestamp('20130101')
        assert s.iloc[1] == 'NOV'
        assert s.dtype == object

        # the dtype was being reset on the slicing and re-inferred to datetime
        # even thought the blocks are mixed
        belly = '216 3T19'.split()
        wing1 = '2T15 4H19'.split()
        wing2 = '416 4T20'.split()
        mat = pd.to_datetime('2016-01-22 2019-09-07'.split())
        df = pd.DataFrame({
            'wing1': wing1,
            'wing2': wing2,
            'mat': mat
        },
                          index=belly)

        result = df.loc['3T19']
        assert result.dtype == object
        result = df.loc['216']
        assert result.dtype == object

    def test_constructor_datetimes_with_nulls(self):
        # gh-15869
        for arr in [
                np.array([None, None, None, None,
                          datetime.now(), None]),
                np.array([None, None, datetime.now(), None])
        ]:
            result = Series(arr)
            assert result.dtype == 'M8[ns]'

    def test_constructor_dtype_datetime64(self):

        s = Series(iNaT, dtype='M8[ns]', index=lrange(5))
        assert isna(s).all()

        # in theory this should be all nulls, but since
        # we are not specifying a dtype is ambiguous
        s = Series(iNaT, index=lrange(5))
        assert not isna(s).all()

        s = Series(nan, dtype='M8[ns]', index=lrange(5))
        assert isna(s).all()

        s = Series([datetime(2001, 1, 2, 0, 0), iNaT], dtype='M8[ns]')
        assert isna(s[1])
        assert s.dtype == 'M8[ns]'

        s = Series([datetime(2001, 1, 2, 0, 0), nan], dtype='M8[ns]')
        assert isna(s[1])
        assert s.dtype == 'M8[ns]'

        # GH3416
        dates = [
            np.datetime64(datetime(2013, 1, 1)),
            np.datetime64(datetime(2013, 1, 2)),
            np.datetime64(datetime(2013, 1, 3)),
        ]

        s = Series(dates)
        assert s.dtype == 'M8[ns]'

        s.iloc[0] = np.nan
        assert s.dtype == 'M8[ns]'

        # GH3414 related
        pytest.raises(
            TypeError, lambda x: Series(Series(dates).astype('int') / 1000000,
                                        dtype='M8[ms]'))
        pytest.raises(TypeError, lambda x: Series(dates, dtype='datetime64'))

        # invalid dates can be help as object
        result = Series([datetime(2, 1, 1)])
        assert result[0] == datetime(2, 1, 1, 0, 0)

        result = Series([datetime(3000, 1, 1)])
        assert result[0] == datetime(3000, 1, 1, 0, 0)

        # don't mix types
        result = Series([Timestamp('20130101'), 1], index=['a', 'b'])
        assert result['a'] == Timestamp('20130101')
        assert result['b'] == 1

        # GH6529
        # coerce datetime64 non-ns properly
        dates = date_range('01-Jan-2015', '01-Dec-2015', freq='M')
        values2 = dates.view(np.ndarray).astype('datetime64[ns]')
        expected = Series(values2, index=dates)

        for dtype in ['s', 'D', 'ms', 'us', 'ns']:
            values1 = dates.view(np.ndarray).astype('M8[{0}]'.format(dtype))
            result = Series(values1, dates)
            assert_series_equal(result, expected)

        # GH 13876
        # coerce to non-ns to object properly
        expected = Series(values2, index=dates, dtype=object)
        for dtype in ['s', 'D', 'ms', 'us', 'ns']:
            values1 = dates.view(np.ndarray).astype('M8[{0}]'.format(dtype))
            result = Series(values1, index=dates, dtype=object)
            assert_series_equal(result, expected)

        # leave datetime.date alone
        dates2 = np.array([d.date() for d in dates.to_pydatetime()],
                          dtype=object)
        series1 = Series(dates2, dates)
        tm.assert_numpy_array_equal(series1.values, dates2)
        assert series1.dtype == object

        # these will correctly infer a datetime
        s = Series([None, pd.NaT, '2013-08-05 15:30:00.000001'])
        assert s.dtype == 'datetime64[ns]'
        s = Series([np.nan, pd.NaT, '2013-08-05 15:30:00.000001'])
        assert s.dtype == 'datetime64[ns]'
        s = Series([pd.NaT, None, '2013-08-05 15:30:00.000001'])
        assert s.dtype == 'datetime64[ns]'
        s = Series([pd.NaT, np.nan, '2013-08-05 15:30:00.000001'])
        assert s.dtype == 'datetime64[ns]'

        # tz-aware (UTC and other tz's)
        # GH 8411
        dr = date_range('20130101', periods=3)
        assert Series(dr).iloc[0].tz is None
        dr = date_range('20130101', periods=3, tz='UTC')
        assert str(Series(dr).iloc[0].tz) == 'UTC'
        dr = date_range('20130101', periods=3, tz='US/Eastern')
        assert str(Series(dr).iloc[0].tz) == 'US/Eastern'

        # non-convertible
        s = Series([1479596223000, -1479590, pd.NaT])
        assert s.dtype == 'object'
        assert s[2] is pd.NaT
        assert 'NaT' in str(s)

        # if we passed a NaT it remains
        s = Series([datetime(2010, 1, 1), datetime(2, 1, 1), pd.NaT])
        assert s.dtype == 'object'
        assert s[2] is pd.NaT
        assert 'NaT' in str(s)

        # if we passed a nan it remains
        s = Series([datetime(2010, 1, 1), datetime(2, 1, 1), np.nan])
        assert s.dtype == 'object'
        assert s[2] is np.nan
        assert 'NaN' in str(s)

    def test_constructor_with_datetime_tz(self):

        # 8260
        # support datetime64 with tz

        dr = date_range('20130101', periods=3, tz='US/Eastern')
        s = Series(dr)
        assert s.dtype.name == 'datetime64[ns, US/Eastern]'
        assert s.dtype == 'datetime64[ns, US/Eastern]'
        assert is_datetime64tz_dtype(s.dtype)
        assert 'datetime64[ns, US/Eastern]' in str(s)

        # export
        result = s.values
        assert isinstance(result, np.ndarray)
        assert result.dtype == 'datetime64[ns]'

        exp = pd.DatetimeIndex(result)
        exp = exp.tz_localize('UTC').tz_convert(tz=s.dt.tz)
        tm.assert_index_equal(dr, exp)

        # indexing
        result = s.iloc[0]
        assert result == Timestamp('2013-01-01 00:00:00-0500',
                                   tz='US/Eastern',
                                   freq='D')
        result = s[0]
        assert result == Timestamp('2013-01-01 00:00:00-0500',
                                   tz='US/Eastern',
                                   freq='D')

        result = s[Series([True, True, False], index=s.index)]
        assert_series_equal(result, s[0:2])

        result = s.iloc[0:1]
        assert_series_equal(result, Series(dr[0:1]))

        # concat
        result = pd.concat([s.iloc[0:1], s.iloc[1:]])
        assert_series_equal(result, s)

        # short str
        assert 'datetime64[ns, US/Eastern]' in str(s)

        # formatting with NaT
        result = s.shift()
        assert 'datetime64[ns, US/Eastern]' in str(result)
        assert 'NaT' in str(result)

        # long str
        t = Series(date_range('20130101', periods=1000, tz='US/Eastern'))
        assert 'datetime64[ns, US/Eastern]' in str(t)

        result = pd.DatetimeIndex(s, freq='infer')
        tm.assert_index_equal(result, dr)

        # inference
        s = Series([
            pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'),
            pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Pacific')
        ])
        assert s.dtype == 'datetime64[ns, US/Pacific]'
        assert lib.infer_dtype(s) == 'datetime64'

        s = Series([
            pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'),
            pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Eastern')
        ])
        assert s.dtype == 'object'
        assert lib.infer_dtype(s) == 'datetime'

        # with all NaT
        s = Series(pd.NaT, index=[0, 1], dtype='datetime64[ns, US/Eastern]')
        expected = Series(pd.DatetimeIndex(['NaT', 'NaT'], tz='US/Eastern'))
        assert_series_equal(s, expected)

    @pytest.mark.parametrize("arr_dtype", [np.int64, np.float64])
    @pytest.mark.parametrize("dtype", ["M8", "m8"])
    @pytest.mark.parametrize("unit", ['ns', 'us', 'ms', 's', 'h', 'm', 'D'])
    def test_construction_to_datetimelike_unit(self, arr_dtype, dtype, unit):
        # tests all units
        # gh-19223
        dtype = "{}[{}]".format(dtype, unit)
        arr = np.array([1, 2, 3], dtype=arr_dtype)
        s = Series(arr)
        result = s.astype(dtype)
        expected = Series(arr.astype(dtype))

        tm.assert_series_equal(result, expected)

    @pytest.mark.parametrize('arg',
                             ['2013-01-01 00:00:00', pd.NaT, np.nan, None])
    def test_constructor_with_naive_string_and_datetimetz_dtype(self, arg):
        # GH 17415: With naive string
        result = Series([arg], dtype='datetime64[ns, CET]')
        expected = Series(pd.Timestamp(arg)).dt.tz_localize('CET')
        assert_series_equal(result, expected)

    def test_construction_interval(self):
        # construction from interval & array of intervals
        index = IntervalIndex.from_breaks(np.arange(3), closed='right')
        result = Series(index)
        repr(result)
        str(result)
        tm.assert_index_equal(Index(result.values), index)

        result = Series(index.values)
        tm.assert_index_equal(Index(result.values), index)

    def test_construction_consistency(self):

        # make sure that we are not re-localizing upon construction
        # GH 14928
        s = Series(pd.date_range('20130101', periods=3, tz='US/Eastern'))

        result = Series(s, dtype=s.dtype)
        tm.assert_series_equal(result, s)

        result = Series(s.dt.tz_convert('UTC'), dtype=s.dtype)
        tm.assert_series_equal(result, s)

        result = Series(s.values, dtype=s.dtype)
        tm.assert_series_equal(result, s)

    def test_constructor_periodindex(self):
        # GH7932
        # converting a PeriodIndex when put in a Series

        pi = period_range('20130101', periods=5, freq='D')
        s = Series(pi)
        expected = Series(pi.astype(object))
        assert_series_equal(s, expected)

        assert s.dtype == 'object'

    def test_constructor_dict(self):
        d = {'a': 0., 'b': 1., 'c': 2.}
        result = Series(d, index=['b', 'c', 'd', 'a'])
        expected = Series([1, 2, nan, 0], index=['b', 'c', 'd', 'a'])
        assert_series_equal(result, expected)

        pidx = tm.makePeriodIndex(100)
        d = {pidx[0]: 0, pidx[1]: 1}
        result = Series(d, index=pidx)
        expected = Series(np.nan, pidx)
        expected.iloc[0] = 0
        expected.iloc[1] = 1
        assert_series_equal(result, expected)

    def test_constructor_dict_order(self):
        # GH19018
        # initialization ordering: by insertion order if python>= 3.6, else
        # order by value
        d = {'b': 1, 'a': 0, 'c': 2}
        result = Series(d)
        if PY36:
            expected = Series([1, 0, 2], index=list('bac'))
        else:
            expected = Series([0, 1, 2], index=list('abc'))
        tm.assert_series_equal(result, expected)

    @pytest.mark.parametrize("value", [2, np.nan, None, float('nan')])
    def test_constructor_dict_nan_key(self, value):
        # GH 18480
        d = {1: 'a', value: 'b', float('nan'): 'c', 4: 'd'}
        result = Series(d).sort_values()
        expected = Series(['a', 'b', 'c', 'd'], index=[1, value, np.nan, 4])
        assert_series_equal(result, expected)

        # MultiIndex:
        d = {(1, 1): 'a', (2, np.nan): 'b', (3, value): 'c'}
        result = Series(d).sort_values()
        expected = Series(['a', 'b', 'c'],
                          index=Index([(1, 1), (2, np.nan), (3, value)]))
        assert_series_equal(result, expected)

    def test_constructor_dict_datetime64_index(self):
        # GH 9456

        dates_as_str = ['1984-02-19', '1988-11-06', '1989-12-03', '1990-03-15']
        values = [42544017.198965244, 1234565, 40512335.181958228, -1]

        def create_data(constructor):
            return dict(zip((constructor(x) for x in dates_as_str), values))

        data_datetime64 = create_data(np.datetime64)
        data_datetime = create_data(lambda x: datetime.strptime(x, '%Y-%m-%d'))
        data_Timestamp = create_data(Timestamp)

        expected = Series(values, (Timestamp(x) for x in dates_as_str))

        result_datetime64 = Series(data_datetime64)
        result_datetime = Series(data_datetime)
        result_Timestamp = Series(data_Timestamp)

        assert_series_equal(result_datetime64, expected)
        assert_series_equal(result_datetime, expected)
        assert_series_equal(result_Timestamp, expected)

    def test_constructor_list_of_tuples(self):
        data = [(1, 1), (2, 2), (2, 3)]
        s = Series(data)
        assert list(s) == data

    def test_constructor_tuple_of_tuples(self):
        data = ((1, 1), (2, 2), (2, 3))
        s = Series(data)
        assert tuple(s) == data

    def test_constructor_dict_of_tuples(self):
        data = {(1, 2): 3, (None, 5): 6}
        result = Series(data).sort_values()
        expected = Series([3, 6],
                          index=MultiIndex.from_tuples([(1, 2), (None, 5)]))
        tm.assert_series_equal(result, expected)

    def test_constructor_set(self):
        values = set([1, 2, 3, 4, 5])
        pytest.raises(TypeError, Series, values)
        values = frozenset(values)
        pytest.raises(TypeError, Series, values)

    def test_fromDict(self):
        data = {'a': 0, 'b': 1, 'c': 2, 'd': 3}

        series = Series(data)
        assert tm.is_sorted(series.index)

        data = {'a': 0, 'b': '1', 'c': '2', 'd': datetime.now()}
        series = Series(data)
        assert series.dtype == np.object_

        data = {'a': 0, 'b': '1', 'c': '2', 'd': '3'}
        series = Series(data)
        assert series.dtype == np.object_

        data = {'a': '0', 'b': '1'}
        series = Series(data, dtype=float)
        assert series.dtype == np.float64

    def test_fromValue(self):

        nans = Series(np.NaN, index=self.ts.index)
        assert nans.dtype == np.float_
        assert len(nans) == len(self.ts)

        strings = Series('foo', index=self.ts.index)
        assert strings.dtype == np.object_
        assert len(strings) == len(self.ts)

        d = datetime.now()
        dates = Series(d, index=self.ts.index)
        assert dates.dtype == 'M8[ns]'
        assert len(dates) == len(self.ts)

        # GH12336
        # POJO.Test construction of categorical series from value
        categorical = Series(0, index=self.ts.index, dtype="category")
        expected = Series(0, index=self.ts.index).astype("category")
        assert categorical.dtype == 'category'
        assert len(categorical) == len(self.ts)
        tm.assert_series_equal(categorical, expected)

    def test_constructor_dtype_timedelta64(self):

        # basic
        td = Series([timedelta(days=i) for i in range(3)])
        assert td.dtype == 'timedelta64[ns]'

        td = Series([timedelta(days=1)])
        assert td.dtype == 'timedelta64[ns]'

        td = Series(
            [timedelta(days=1),
             timedelta(days=2),
             np.timedelta64(1, 's')])

        assert td.dtype == 'timedelta64[ns]'

        # mixed with NaT
        td = Series([timedelta(days=1), NaT], dtype='m8[ns]')
        assert td.dtype == 'timedelta64[ns]'

        td = Series([timedelta(days=1), np.nan], dtype='m8[ns]')
        assert td.dtype == 'timedelta64[ns]'

        td = Series([np.timedelta64(300000000), pd.NaT], dtype='m8[ns]')
        assert td.dtype == 'timedelta64[ns]'

        # improved inference
        # GH5689
        td = Series([np.timedelta64(300000000), NaT])
        assert td.dtype == 'timedelta64[ns]'

        # because iNaT is int, not coerced to timedelta
        td = Series([np.timedelta64(300000000), iNaT])
        assert td.dtype == 'object'

        td = Series([np.timedelta64(300000000), np.nan])
        assert td.dtype == 'timedelta64[ns]'

        td = Series([pd.NaT, np.timedelta64(300000000)])
        assert td.dtype == 'timedelta64[ns]'

        td = Series([np.timedelta64(1, 's')])
        assert td.dtype == 'timedelta64[ns]'

        # these are frequency conversion astypes
        # for t in ['s', 'D', 'us', 'ms']:
        #    pytest.raises(TypeError, td.astype, 'm8[%s]' % t)

        # valid astype
        td.astype('int64')

        # invalid casting
        pytest.raises(TypeError, td.astype, 'int32')

        # this is an invalid casting
        def f():
            Series([timedelta(days=1), 'foo'], dtype='m8[ns]')

        pytest.raises(Exception, f)

        # leave as object here
        td = Series([timedelta(days=i) for i in range(3)] + ['foo'])
        assert td.dtype == 'object'

        # these will correctly infer a timedelta
        s = Series([None, pd.NaT, '1 Day'])
        assert s.dtype == 'timedelta64[ns]'
        s = Series([np.nan, pd.NaT, '1 Day'])
        assert s.dtype == 'timedelta64[ns]'
        s = Series([pd.NaT, None, '1 Day'])
        assert s.dtype == 'timedelta64[ns]'
        s = Series([pd.NaT, np.nan, '1 Day'])
        assert s.dtype == 'timedelta64[ns]'

    # GH 16406
    def test_constructor_mixed_tz(self):
        s = Series(
            [Timestamp('20130101'),
             Timestamp('20130101', tz='US/Eastern')])
        expected = Series(
            [Timestamp('20130101'),
             Timestamp('20130101', tz='US/Eastern')],
            dtype='object')
        assert_series_equal(s, expected)

    def test_NaT_scalar(self):
        series = Series([0, 1000, 2000, iNaT], dtype='M8[ns]')

        val = series[3]
        assert isna(val)

        series[2] = val
        assert isna(series[2])

    def test_NaT_cast(self):
        # GH10747
        result = Series([np.nan]).astype('M8[ns]')
        expected = Series([NaT])
        assert_series_equal(result, expected)

    def test_constructor_name_hashable(self):
        for n in [777, 777., 'name', datetime(2001, 11, 11), (1, ), u"\u05D0"]:
            for data in [[1, 2, 3], np.ones(3), {'a': 0, 'b': 1}]:
                s = Series(data, name=n)
                assert s.name == n

    def test_constructor_name_unhashable(self):
        for n in [['name_list'], np.ones(2), {1: 2}]:
            for data in [['name_list'], np.ones(2), {1: 2}]:
                pytest.raises(TypeError, Series, data, name=n)

    def test_auto_conversion(self):
        series = Series(list(date_range('1/1/2000', periods=10)))
        assert series.dtype == 'M8[ns]'

    def test_convert_non_ns(self):
        # convert from a numpy array of non-ns timedelta64
        arr = np.array([1, 2, 3], dtype='timedelta64[s]')
        s = Series(arr)
        expected = Series(pd.timedelta_range('00:00:01', periods=3, freq='s'))
        assert_series_equal(s, expected)

        # convert from a numpy array of non-ns datetime64
        # note that creating a numpy datetime64 is in LOCAL time!!!!
        # seems to work for M8[D], but not for M8[s]

        s = Series(
            np.array(['2013-01-01', '2013-01-02', '2013-01-03'],
                     dtype='datetime64[D]'))
        assert_series_equal(
            s, Series(date_range('20130101', periods=3, freq='D')))

        # s = Series(np.array(['2013-01-01 00:00:01','2013-01-01
        # 00:00:02','2013-01-01 00:00:03'],dtype='datetime64[s]'))

        # assert_series_equal(s,date_range('20130101
        # 00:00:01',period=3,freq='s'))

    @pytest.mark.parametrize("index", [
        date_range('1/1/2000', periods=10),
        timedelta_range('1 day', periods=10),
        period_range('2000-Q1', periods=10, freq='Q')
    ],
                             ids=lambda x: type(x).__name__)
    def test_constructor_cant_cast_datetimelike(self, index):

        # floats are not ok
        msg = "Cannot cast {} to ".format(type(index).__name__)
        with tm.assert_raises_regex(TypeError, msg):
            Series(index, dtype=float)

        # ints are ok
        # we test with np.int64 to get similar results on
        # windows / 32-bit platforms
        result = Series(index, dtype=np.int64)
        expected = Series(index.astype(np.int64))
        tm.assert_series_equal(result, expected)

    @pytest.mark.parametrize("index", [
        date_range('1/1/2000', periods=10),
        timedelta_range('1 day', periods=10),
        period_range('2000-Q1', periods=10, freq='Q')
    ],
                             ids=lambda x: type(x).__name__)
    def test_constructor_cast_object(self, index):
        s = Series(index, dtype=object)
        exp = Series(index).astype(object)
        tm.assert_series_equal(s, exp)

        s = Series(pd.Index(index, dtype=object), dtype=object)
        exp = Series(index).astype(object)
        tm.assert_series_equal(s, exp)

        s = Series(index.astype(object), dtype=object)
        exp = Series(index).astype(object)
        tm.assert_series_equal(s, exp)

    def test_constructor_generic_timestamp_deprecated(self):
        # see gh-15524

        with tm.assert_produces_warning(FutureWarning):
            dtype = np.timedelta64
            s = Series([], dtype=dtype)

            assert s.empty
            assert s.dtype == 'm8[ns]'

        with tm.assert_produces_warning(FutureWarning):
            dtype = np.datetime64
            s = Series([], dtype=dtype)

            assert s.empty
            assert s.dtype == 'M8[ns]'

        # These timestamps have the wrong frequencies,
        # so an Exception should be raised now.
        msg = "cannot convert timedeltalike"
        with tm.assert_raises_regex(TypeError, msg):
            Series([], dtype='m8[ps]')

        msg = "cannot convert datetimelike"
        with tm.assert_raises_regex(TypeError, msg):
            Series([], dtype='M8[ps]')

    @pytest.mark.parametrize('dtype', [None, 'uint8', 'category'])
    def test_constructor_range_dtype(self, dtype):
        # GH 16804
        expected = Series([0, 1, 2, 3, 4], dtype=dtype or 'int64')
        result = Series(range(5), dtype=dtype)
        tm.assert_series_equal(result, expected)
Пример #55
0
 def test_infer_freq(self, freq):
     # GH#11018
     idx = pd.timedelta_range('1', freq=freq, periods=10)
     result = pd.TimedeltaIndex(idx.asi8, freq='infer')
     tm.assert_index_equal(idx, result)
     assert result.freq == freq
Пример #56
0
class TestTableOrientReader:
    @pytest.mark.parametrize(
        "index_nm",
        [
            None, "idx",
            pytest.param("index", marks=pytest.mark.xfail), "level_0"
        ],
    )
    @pytest.mark.parametrize(
        "vals",
        [
            {
                "ints": [1, 2, 3, 4]
            },
            {
                "objects": ["a", "b", "c", "d"]
            },
            {
                "objects": ["1", "2", "3", "4"]
            },
            {
                "date_ranges": pd.date_range("2016-01-01", freq="d", periods=4)
            },
            {
                "categoricals": pd.Series(pd.Categorical(["a", "b", "c", "c"]))
            },
            {
                "ordered_cats":
                pd.Series(pd.Categorical(["a", "b", "c", "c"], ordered=True))
            },
            {
                "floats": [1.0, 2.0, 3.0, 4.0]
            },
            {
                "floats": [1.1, 2.2, 3.3, 4.4]
            },
            {
                "bools": [True, False, False, True]
            },
        ],
    )
    def test_read_json_table_orient(self, index_nm, vals, recwarn):
        df = DataFrame(vals, index=pd.Index(range(4), name=index_nm))
        out = df.to_json(orient="table")
        result = pd.read_json(out, orient="table")
        tm.assert_frame_equal(df, result)

    @pytest.mark.parametrize("index_nm", [None, "idx", "index"])
    @pytest.mark.parametrize(
        "vals",
        [
            {
                "timedeltas": pd.timedelta_range("1H", periods=4, freq="T")
            },
            {
                "timezones":
                pd.date_range(
                    "2016-01-01", freq="d", periods=4, tz="US/Central")
            },
        ],
    )
    def test_read_json_table_orient_raises(self, index_nm, vals, recwarn):
        df = DataFrame(vals, index=pd.Index(range(4), name=index_nm))
        out = df.to_json(orient="table")
        with pytest.raises(NotImplementedError, match="can not yet read "):
            pd.read_json(out, orient="table")

    def test_comprehensive(self):
        df = DataFrame(
            {
                "A": [1, 2, 3, 4],
                "B": ["a", "b", "c", "c"],
                "C": pd.date_range("2016-01-01", freq="d", periods=4),
                # 'D': pd.timedelta_range('1H', periods=4, freq='T'),
                "E": pd.Series(pd.Categorical(["a", "b", "c", "c"])),
                "F": pd.Series(
                    pd.Categorical(["a", "b", "c", "c"], ordered=True)),
                "G": [1.1, 2.2, 3.3, 4.4],
                # 'H': pd.date_range('2016-01-01', freq='d', periods=4,
                #                   tz='US/Central'),
                "I": [True, False, False, True],
            },
            index=pd.Index(range(4), name="idx"),
        )

        out = df.to_json(orient="table")
        result = pd.read_json(out, orient="table")
        tm.assert_frame_equal(df, result)

    @pytest.mark.parametrize(
        "index_names",
        [[None, None], ["foo", "bar"], ["foo", None], [None, "foo"],
         ["index", "foo"]],
    )
    def test_multiindex(self, index_names):
        # GH 18912
        df = pd.DataFrame(
            [["Arr", "alpha", [1, 2, 3, 4]], ["Bee", "Beta", [10, 20, 30, 40]]
             ],
            index=[["A", "B"], ["Null", "Eins"]],
            columns=["Aussprache", "Griechisch", "Args"],
        )
        df.index.names = index_names
        out = df.to_json(orient="table")
        result = pd.read_json(out, orient="table")
        tm.assert_frame_equal(df, result)

    def test_empty_frame_roundtrip(self):
        # GH 21287
        df = pd.DataFrame(columns=["a", "b", "c"])
        expected = df.copy()
        out = df.to_json(orient="table")
        result = pd.read_json(out, orient="table")
        tm.assert_frame_equal(expected, result)
def create_data():
    """ create the pickle/msgpack data """

    data = {
        u'A': [0., 1., 2., 3., np.nan],
        u'B': [0, 1, 0, 1, 0],
        u'C': [u'foo1', u'foo2', u'foo3', u'foo4', u'foo5'],
        u'D': date_range('1/1/2009', periods=5),
        u'E': [0., 1, Timestamp('20100101'), u'foo', 2.]
    }

    scalars = dict(timestamp=Timestamp('20130101'),
                   period=Period('2012', 'M'))

    index = dict(int=Index(np.arange(10)),
                 date=date_range('20130101', periods=10),
                 period=period_range('2013-01-01', freq='M', periods=10),
                 float=Index(np.arange(10, dtype=np.float64)),
                 uint=Index(np.arange(10, dtype=np.uint64)),
                 timedelta=timedelta_range('00:00:00', freq='30T', periods=10))

    if _loose_version >= LooseVersion('0.18'):
        from pandas import RangeIndex
        index['range'] = RangeIndex(10)

    if _loose_version >= LooseVersion('0.21'):
        from pandas import interval_range
        index['interval'] = interval_range(0, periods=10)

    mi = dict(reg2=MultiIndex.from_tuples(
        tuple(zip(*[[u'bar', u'bar', u'baz', u'baz', u'foo',
                     u'foo', u'qux', u'qux'],
                    [u'one', u'two', u'one', u'two', u'one',
                     u'two', u'one', u'two']])),
        names=[u'first', u'second']))

    series = dict(float=Series(data[u'A']),
                  int=Series(data[u'B']),
                  mixed=Series(data[u'E']),
                  ts=Series(np.arange(10).astype(np.int64),
                            index=date_range('20130101', periods=10)),
                  mi=Series(np.arange(5).astype(np.float64),
                            index=MultiIndex.from_tuples(
                                tuple(zip(*[[1, 1, 2, 2, 2],
                                            [3, 4, 3, 4, 5]])),
                                names=[u'one', u'two'])),
                  dup=Series(np.arange(5).astype(np.float64),
                             index=[u'A', u'B', u'C', u'D', u'A']),
                  cat=Series(Categorical([u'foo', u'bar', u'baz'])),
                  dt=Series(date_range('20130101', periods=5)),
                  dt_tz=Series(date_range('20130101', periods=5,
                                          tz='US/Eastern')),
                  period=Series([Period('2000Q1')] * 5))

    mixed_dup_df = DataFrame(data)
    mixed_dup_df.columns = list(u"ABCDA")
    frame = dict(float=DataFrame({u'A': series[u'float'],
                                  u'B': series[u'float'] + 1}),
                 int=DataFrame({u'A': series[u'int'],
                                u'B': series[u'int'] + 1}),
                 mixed=DataFrame({k: data[k]
                                  for k in [u'A', u'B', u'C', u'D']}),
                 mi=DataFrame({u'A': np.arange(5).astype(np.float64),
                               u'B': np.arange(5).astype(np.int64)},
                              index=MultiIndex.from_tuples(
                                  tuple(zip(*[[u'bar', u'bar', u'baz',
                                               u'baz', u'baz'],
                                              [u'one', u'two', u'one',
                                               u'two', u'three']])),
                                  names=[u'first', u'second'])),
                 dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64),
                               columns=[u'A', u'B', u'A']),
                 cat_onecol=DataFrame({u'A': Categorical([u'foo', u'bar'])}),
                 cat_and_float=DataFrame({
                     u'A': Categorical([u'foo', u'bar', u'baz']),
                     u'B': np.arange(3).astype(np.int64)}),
                 mixed_dup=mixed_dup_df,
                 dt_mixed_tzs=DataFrame({
                     u'A': Timestamp('20130102', tz='US/Eastern'),
                     u'B': Timestamp('20130603', tz='CET')}, index=range(5)),
                 dt_mixed2_tzs=DataFrame({
                     u'A': Timestamp('20130102', tz='US/Eastern'),
                     u'B': Timestamp('20130603', tz='CET'),
                     u'C': Timestamp('20130603', tz='UTC')}, index=range(5))
                 )

    with catch_warnings(record=True):
        mixed_dup_panel = Panel({u'ItemA': frame[u'float'],
                                 u'ItemB': frame[u'int']})
        mixed_dup_panel.items = [u'ItemA', u'ItemA']
        panel = dict(float=Panel({u'ItemA': frame[u'float'],
                                  u'ItemB': frame[u'float'] + 1}),
                     dup=Panel(
                         np.arange(30).reshape(3, 5, 2).astype(np.float64),
                         items=[u'A', u'B', u'A']),
                     mixed_dup=mixed_dup_panel)

    cat = dict(int8=Categorical(list('abcdefg')),
               int16=Categorical(np.arange(1000)),
               int32=Categorical(np.arange(10000)))

    timestamp = dict(normal=Timestamp('2011-01-01'),
                     nat=NaT,
                     tz=Timestamp('2011-01-01', tz='US/Eastern'))

    if _loose_version < LooseVersion('0.19.2'):
        timestamp['freq'] = Timestamp('2011-01-01', offset='D')
        timestamp['both'] = Timestamp('2011-01-01', tz='Asia/Tokyo',
                                      offset='M')
    else:
        timestamp['freq'] = Timestamp('2011-01-01', freq='D')
        timestamp['both'] = Timestamp('2011-01-01', tz='Asia/Tokyo',
                                      freq='M')

    off = {'DateOffset': DateOffset(years=1),
           'DateOffset_h_ns': DateOffset(hour=6, nanoseconds=5824),
           'BusinessDay': BusinessDay(offset=timedelta(seconds=9)),
           'BusinessHour': BusinessHour(normalize=True, n=6, end='15:14'),
           'CustomBusinessDay': CustomBusinessDay(weekmask='Mon Fri'),
           'SemiMonthBegin': SemiMonthBegin(day_of_month=9),
           'SemiMonthEnd': SemiMonthEnd(day_of_month=24),
           'MonthBegin': MonthBegin(1),
           'MonthEnd': MonthEnd(1),
           'QuarterBegin': QuarterBegin(1),
           'QuarterEnd': QuarterEnd(1),
           'Day': Day(1),
           'YearBegin': YearBegin(1),
           'YearEnd': YearEnd(1),
           'Week': Week(1),
           'Week_Tues': Week(2, normalize=False, weekday=1),
           'WeekOfMonth': WeekOfMonth(week=3, weekday=4),
           'LastWeekOfMonth': LastWeekOfMonth(n=1, weekday=3),
           'FY5253': FY5253(n=2, weekday=6, startingMonth=7, variation="last"),
           'Easter': Easter(),
           'Hour': Hour(1),
           'Minute': Minute(1)}

    return dict(series=series,
                frame=frame,
                panel=panel,
                index=index,
                scalars=scalars,
                mi=mi,
                sp_series=dict(float=_create_sp_series(),
                               ts=_create_sp_tsseries()),
                sp_frame=dict(float=_create_sp_frame()),
                cat=cat,
                timestamp=timestamp,
                offsets=off)
Пример #58
0
def test_timedelta_methods(method):
    s = pd.Series(pd.timedelta_range("2000", periods=4))
    s.attrs = {"a": 1}
    result = method(s.dt)
    assert result.attrs == {"a": 1}
Пример #59
0
 def test_linspace_behavior(self, periods, freq):
     # GH 20976
     result = timedelta_range(start="0 days", end="4 days", periods=periods)
     expected = timedelta_range(start="0 days", end="4 days", freq=freq)
     tm.assert_index_equal(result, expected)
Пример #60
0
 def test_constructor_timedelta64_values_mismatched_dtype(self):
     # check we don't silently ignore the dtype keyword
     tdi = timedelta_range("4 Days", periods=5)
     result = Index(tdi, dtype="category")
     expected = CategoricalIndex(tdi)
     tm.assert_index_equal(result, expected)