Пример #1
0
    def test_dt_namespace_accessor(self):

        # GH 7207, 11128
        # test .dt namespace accessor

        ok_for_period = PeriodIndex._datetimelike_ops
        ok_for_period_methods = ['strftime', 'to_timestamp', 'asfreq']
        ok_for_dt = DatetimeIndex._datetimelike_ops
        ok_for_dt_methods = [
            'to_period', 'to_pydatetime', 'tz_localize', 'tz_convert',
            'normalize', 'strftime', 'round', 'floor', 'ceil', 'weekday_name'
        ]
        ok_for_td = TimedeltaIndex._datetimelike_ops
        ok_for_td_methods = [
            'components', 'to_pytimedelta', 'total_seconds', 'round', 'floor',
            'ceil'
        ]

        def get_expected(s, name):
            result = getattr(Index(s._values), prop)
            if isinstance(result, np.ndarray):
                if is_integer_dtype(result):
                    result = result.astype('int64')
            elif not is_list_like(result):
                return result
            return Series(result, index=s.index, name=s.name)

        def compare(s, name):
            a = getattr(s.dt, prop)
            b = get_expected(s, prop)
            if not (is_list_like(a) and is_list_like(b)):
                assert a == b
            else:
                tm.assert_series_equal(a, b)

        # datetimeindex
        cases = [
            Series(date_range('20130101', periods=5), name='xxx'),
            Series(date_range('20130101', periods=5, freq='s'), name='xxx'),
            Series(date_range('20130101 00:00:00', periods=5, freq='ms'),
                   name='xxx')
        ]
        for s in cases:
            for prop in ok_for_dt:
                # we test freq below
                if prop != 'freq':
                    compare(s, prop)

            for prop in ok_for_dt_methods:
                getattr(s.dt, prop)

            result = s.dt.to_pydatetime()
            assert isinstance(result, np.ndarray)
            assert result.dtype == object

            result = s.dt.tz_localize('US/Eastern')
            exp_values = DatetimeIndex(s.values).tz_localize('US/Eastern')
            expected = Series(exp_values, index=s.index, name='xxx')
            tm.assert_series_equal(result, expected)

            tz_result = result.dt.tz
            assert str(tz_result) == 'US/Eastern'
            freq_result = s.dt.freq
            assert freq_result == DatetimeIndex(s.values, freq='infer').freq

            # let's localize, then convert
            result = s.dt.tz_localize('UTC').dt.tz_convert('US/Eastern')
            exp_values = (DatetimeIndex(
                s.values).tz_localize('UTC').tz_convert('US/Eastern'))
            expected = Series(exp_values, index=s.index, name='xxx')
            tm.assert_series_equal(result, expected)

        # round
        s = Series(pd.to_datetime([
            '2012-01-01 13:00:00', '2012-01-01 12:01:00', '2012-01-01 08:00:00'
        ]),
                   name='xxx')
        result = s.dt.round('D')
        expected = Series(pd.to_datetime(
            ['2012-01-02', '2012-01-02', '2012-01-01']),
                          name='xxx')
        tm.assert_series_equal(result, expected)

        # round with tz
        result = (
            s.dt.tz_localize('UTC').dt.tz_convert('US/Eastern').dt.round('D'))
        exp_values = pd.to_datetime(['2012-01-01', '2012-01-01',
                                     '2012-01-01']).tz_localize('US/Eastern')
        expected = Series(exp_values, name='xxx')
        tm.assert_series_equal(result, expected)

        # floor
        s = Series(pd.to_datetime([
            '2012-01-01 13:00:00', '2012-01-01 12:01:00', '2012-01-01 08:00:00'
        ]),
                   name='xxx')
        result = s.dt.floor('D')
        expected = Series(pd.to_datetime(
            ['2012-01-01', '2012-01-01', '2012-01-01']),
                          name='xxx')
        tm.assert_series_equal(result, expected)

        # ceil
        s = Series(pd.to_datetime([
            '2012-01-01 13:00:00', '2012-01-01 12:01:00', '2012-01-01 08:00:00'
        ]),
                   name='xxx')
        result = s.dt.ceil('D')
        expected = Series(pd.to_datetime(
            ['2012-01-02', '2012-01-02', '2012-01-02']),
                          name='xxx')
        tm.assert_series_equal(result, expected)

        # datetimeindex with tz
        s = Series(date_range('20130101', periods=5, tz='US/Eastern'),
                   name='xxx')
        for prop in ok_for_dt:

            # we test freq below
            if prop != 'freq':
                compare(s, prop)

        for prop in ok_for_dt_methods:
            getattr(s.dt, prop)

        result = s.dt.to_pydatetime()
        assert isinstance(result, np.ndarray)
        assert result.dtype == object

        result = s.dt.tz_convert('CET')
        expected = Series(s._values.tz_convert('CET'),
                          index=s.index,
                          name='xxx')
        tm.assert_series_equal(result, expected)

        tz_result = result.dt.tz
        assert str(tz_result) == 'CET'
        freq_result = s.dt.freq
        assert freq_result == DatetimeIndex(s.values, freq='infer').freq

        # timedelta index
        cases = [
            Series(timedelta_range('1 day', periods=5),
                   index=list('abcde'),
                   name='xxx'),
            Series(timedelta_range('1 day 01:23:45', periods=5, freq='s'),
                   name='xxx'),
            Series(timedelta_range('2 days 01:23:45.012345',
                                   periods=5,
                                   freq='ms'),
                   name='xxx')
        ]
        for s in cases:
            for prop in ok_for_td:
                # we test freq below
                if prop != 'freq':
                    compare(s, prop)

            for prop in ok_for_td_methods:
                getattr(s.dt, prop)

            result = s.dt.components
            assert isinstance(result, DataFrame)
            tm.assert_index_equal(result.index, s.index)

            result = s.dt.to_pytimedelta()
            assert isinstance(result, np.ndarray)
            assert result.dtype == object

            result = s.dt.total_seconds()
            assert isinstance(result, pd.Series)
            assert result.dtype == 'float64'

            freq_result = s.dt.freq
            assert freq_result == TimedeltaIndex(s.values, freq='infer').freq

        # both
        index = date_range('20130101', periods=3, freq='D')
        s = Series(date_range('20140204', periods=3, freq='s'),
                   index=index,
                   name='xxx')
        exp = Series(np.array([2014, 2014, 2014], dtype='int64'),
                     index=index,
                     name='xxx')
        tm.assert_series_equal(s.dt.year, exp)

        exp = Series(np.array([2, 2, 2], dtype='int64'),
                     index=index,
                     name='xxx')
        tm.assert_series_equal(s.dt.month, exp)

        exp = Series(np.array([0, 1, 2], dtype='int64'),
                     index=index,
                     name='xxx')
        tm.assert_series_equal(s.dt.second, exp)

        exp = pd.Series([s[0]] * 3, index=index, name='xxx')
        tm.assert_series_equal(s.dt.normalize(), exp)

        # periodindex
        cases = [
            Series(period_range('20130101', periods=5, freq='D'), name='xxx')
        ]
        for s in cases:
            for prop in ok_for_period:
                # we test freq below
                if prop != 'freq':
                    compare(s, prop)

            for prop in ok_for_period_methods:
                getattr(s.dt, prop)

            freq_result = s.dt.freq
            assert freq_result == PeriodIndex(s.values).freq

        # test limited display api
        def get_dir(s):
            results = [r for r in s.dt.__dir__() if not r.startswith('_')]
            return list(sorted(set(results)))

        s = Series(date_range('20130101', periods=5, freq='D'), name='xxx')
        results = get_dir(s)
        tm.assert_almost_equal(
            results, list(sorted(set(ok_for_dt + ok_for_dt_methods))))

        s = Series(
            period_range('20130101', periods=5, freq='D', name='xxx').asobject)
        results = get_dir(s)
        tm.assert_almost_equal(
            results, list(sorted(set(ok_for_period + ok_for_period_methods))))

        # 11295
        # ambiguous time error on the conversions
        s = Series(pd.date_range('2015-01-01', '2016-01-01', freq='T'),
                   name='xxx')
        s = s.dt.tz_localize('UTC').dt.tz_convert('America/Chicago')
        results = get_dir(s)
        tm.assert_almost_equal(
            results, list(sorted(set(ok_for_dt + ok_for_dt_methods))))
        exp_values = pd.date_range('2015-01-01',
                                   '2016-01-01',
                                   freq='T',
                                   tz='UTC').tz_convert('America/Chicago')
        expected = Series(exp_values, name='xxx')
        tm.assert_series_equal(s, expected)

        # no setting allowed
        s = Series(date_range('20130101', periods=5, freq='D'), name='xxx')
        with tm.assert_raises_regex(ValueError, "modifications"):
            s.dt.hour = 5

        # trying to set a copy
        with pd.option_context('chained_assignment', 'raise'):

            def f():
                s.dt.hour[0] = 5

            pytest.raises(com.SettingWithCopyError, f)
Пример #2
0
    def test_value_counts_datetime64(self, index_or_series):
        klass = index_or_series

        # GH 3002, datetime64[ns]
        # don't test names though
        txt = "\n".join([
            "xxyyzz20100101PIE",
            "xxyyzz20100101GUM",
            "xxyyzz20100101EGG",
            "xxyyww20090101EGG",
            "foofoo20080909PIE",
            "foofoo20080909GUM",
        ])
        f = StringIO(txt)
        df = pd.read_fwf(f,
                         widths=[6, 8, 3],
                         names=["person_id", "dt", "food"],
                         parse_dates=["dt"])

        s = klass(df["dt"].copy())
        s.name = None
        idx = pd.to_datetime([
            "2010-01-01 00:00:00", "2008-09-09 00:00:00", "2009-01-01 00:00:00"
        ])
        expected_s = Series([3, 2, 1], index=idx)
        tm.assert_series_equal(s.value_counts(), expected_s)

        expected = np_array_datetime64_compat(
            [
                "2010-01-01 00:00:00", "2009-01-01 00:00:00",
                "2008-09-09 00:00:00"
            ],
            dtype="datetime64[ns]",
        )
        if isinstance(s, Index):
            tm.assert_index_equal(s.unique(), DatetimeIndex(expected))
        else:
            tm.assert_numpy_array_equal(s.unique(), expected)

        assert s.nunique() == 3

        # with NaT
        s = df["dt"].copy()
        s = klass(list(s.values) + [pd.NaT])

        result = s.value_counts()
        assert result.index.dtype == "datetime64[ns]"
        tm.assert_series_equal(result, expected_s)

        result = s.value_counts(dropna=False)
        expected_s[pd.NaT] = 1
        tm.assert_series_equal(result, expected_s)

        unique = s.unique()
        assert unique.dtype == "datetime64[ns]"

        # numpy_array_equal cannot compare pd.NaT
        if isinstance(s, Index):
            exp_idx = DatetimeIndex(expected.tolist() + [pd.NaT])
            tm.assert_index_equal(unique, exp_idx)
        else:
            tm.assert_numpy_array_equal(unique[:3], expected)
            assert pd.isna(unique[3])

        assert s.nunique() == 3
        assert s.nunique(dropna=False) == 4

        # timedelta64[ns]
        td = df.dt - df.dt + timedelta(1)
        td = klass(td, name="dt")

        result = td.value_counts()
        expected_s = Series([6], index=[Timedelta("1day")], name="dt")
        tm.assert_series_equal(result, expected_s)

        expected = TimedeltaIndex(["1 days"], name="dt")
        if isinstance(td, Index):
            tm.assert_index_equal(td.unique(), expected)
        else:
            tm.assert_numpy_array_equal(td.unique(), expected.values)

        td2 = timedelta(1) + (df.dt - df.dt)
        td2 = klass(td2, name="dt")
        result2 = td2.value_counts()
        tm.assert_series_equal(result2, expected_s)
Пример #3
0
 def test_nat(self):
     self.assertIs(DatetimeIndex([np.nan])[0], pd.NaT)
 def test_maybe_cast_slice_duplicate_monotonic(self):
     # https://github.com/pandas-dev/pandas/issues/16515
     idx = DatetimeIndex(["2017", "2017"])
     result = idx._maybe_cast_slice_bound("2017-01-01", "left", "loc")
     expected = Timestamp("2017-01-01")
     assert result == expected
Пример #5
0
    def test_subtraction_ops_with_tz(self):

        # check that dt/dti subtraction ops with tz are validated
        dti = date_range('20130101', periods=3)
        ts = Timestamp('20130101')
        dt = ts.to_pydatetime()
        dti_tz = date_range('20130101', periods=3).tz_localize('US/Eastern')
        ts_tz = Timestamp('20130101').tz_localize('US/Eastern')
        ts_tz2 = Timestamp('20130101').tz_localize('CET')
        dt_tz = ts_tz.to_pydatetime()
        td = Timedelta('1 days')

        def _check(result, expected):
            assert result == expected
            assert isinstance(result, Timedelta)

        # scalars
        result = ts - ts
        expected = Timedelta('0 days')
        _check(result, expected)

        result = dt_tz - ts_tz
        expected = Timedelta('0 days')
        _check(result, expected)

        result = ts_tz - dt_tz
        expected = Timedelta('0 days')
        _check(result, expected)

        # tz mismatches
        pytest.raises(TypeError, lambda: dt_tz - ts)
        pytest.raises(TypeError, lambda: dt_tz - dt)
        pytest.raises(TypeError, lambda: dt_tz - ts_tz2)
        pytest.raises(TypeError, lambda: dt - dt_tz)
        pytest.raises(TypeError, lambda: ts - dt_tz)
        pytest.raises(TypeError, lambda: ts_tz2 - ts)
        pytest.raises(TypeError, lambda: ts_tz2 - dt)
        pytest.raises(TypeError, lambda: ts_tz - ts_tz2)

        # with dti
        pytest.raises(TypeError, lambda: dti - ts_tz)
        pytest.raises(TypeError, lambda: dti_tz - ts)
        pytest.raises(TypeError, lambda: dti_tz - ts_tz2)

        result = dti_tz - dt_tz
        expected = TimedeltaIndex(['0 days', '1 days', '2 days'])
        tm.assert_index_equal(result, expected)

        result = dt_tz - dti_tz
        expected = TimedeltaIndex(['0 days', '-1 days', '-2 days'])
        tm.assert_index_equal(result, expected)

        result = dti_tz - ts_tz
        expected = TimedeltaIndex(['0 days', '1 days', '2 days'])
        tm.assert_index_equal(result, expected)

        result = ts_tz - dti_tz
        expected = TimedeltaIndex(['0 days', '-1 days', '-2 days'])
        tm.assert_index_equal(result, expected)

        result = td - td
        expected = Timedelta('0 days')
        _check(result, expected)

        result = dti_tz - td
        expected = DatetimeIndex(
            ['20121231', '20130101', '20130102'], tz='US/Eastern')
        tm.assert_index_equal(result, expected)
Пример #6
0
sql1 = "select code from company"
cur.execute(sql1)
data1 = cur.fetchall()
data2 = list(data1)
print(data2)
print(len(data2))

for a1 in data2:
    b=a1;
    print(b)
    query = "select Date,Open,High,Low,Last,Close,Total_Trade_Quantity,Turnover from stocks where Code = %(Code)s"
    cur.execute(query,{ 'Code' : b})
    data = cur.fetchall()
    df = pd.DataFrame(list(data),columns=['Date','Open','High','Low','Last','Close','Total Trade Quantity','Turnover'])
    data3 = DatetimeIndex(df['Date']).day
    data3 = np.asarray(data3)
    dates = np.reshape(data3,(len(data3), 1))
    prices = np.asarray(df['Close'])
    svr_poly = SVR(kernel= 'poly', C= 1e3, degree= 2)
    svr_poly.fit(dates, prices)
    length = len(prices) + 1
    print(length)
    aaa = svr_poly.predict(length)[0]
    print("The next predicted price is:")
    print(aaa)
    query = "select Close from stocks where Code = %(Code)s"
    cur.execute(query, {'Code' : b})
    data = cur.fetchall()
    df1 = pd.DataFrame(list(data),columns = ['Close'])
    df1 = df1.tail(1)
 def test_dti_contains_with_duplicates(self):
     d = datetime(2011, 12, 5, 20, 30)
     ix = DatetimeIndex([d, d])
     assert d in ix
Пример #8
0
    def test_delete_slice(self):
        idx = date_range(start="2000-01-01", periods=10, freq="D", name="idx")

        # preserve freq
        expected_0_2 = date_range(start="2000-01-04",
                                  periods=7,
                                  freq="D",
                                  name="idx")
        expected_7_9 = date_range(start="2000-01-01",
                                  periods=7,
                                  freq="D",
                                  name="idx")

        # reset freq to None
        expected_3_5 = DatetimeIndex(
            [
                "2000-01-01",
                "2000-01-02",
                "2000-01-03",
                "2000-01-07",
                "2000-01-08",
                "2000-01-09",
                "2000-01-10",
            ],
            freq=None,
            name="idx",
        )

        cases = {
            (0, 1, 2): expected_0_2,
            (7, 8, 9): expected_7_9,
            (3, 4, 5): expected_3_5,
        }
        for n, expected in cases.items():
            result = idx.delete(n)
            tm.assert_index_equal(result, expected)
            assert result.name == expected.name
            assert result.freq == expected.freq

            result = idx.delete(slice(n[0], n[-1] + 1))
            tm.assert_index_equal(result, expected)
            assert result.name == expected.name
            assert result.freq == expected.freq

        for tz in [None, "Asia/Tokyo", "US/Pacific"]:
            ts = pd.Series(
                1,
                index=pd.date_range("2000-01-01 09:00",
                                    periods=10,
                                    freq="H",
                                    name="idx",
                                    tz=tz),
            )
            # preserve freq
            result = ts.drop(ts.index[:5]).index
            expected = pd.date_range("2000-01-01 14:00",
                                     periods=5,
                                     freq="H",
                                     name="idx",
                                     tz=tz)
            tm.assert_index_equal(result, expected)
            assert result.name == expected.name
            assert result.freq == expected.freq
            assert result.tz == expected.tz

            # reset freq to None
            result = ts.drop(ts.index[[1, 3, 5, 7, 9]]).index
            expected = DatetimeIndex(
                [
                    "2000-01-01 09:00",
                    "2000-01-01 11:00",
                    "2000-01-01 13:00",
                    "2000-01-01 15:00",
                    "2000-01-01 17:00",
                ],
                freq=None,
                name="idx",
                tz=tz,
            )
            tm.assert_index_equal(result, expected)
            assert result.name == expected.name
            assert result.freq == expected.freq
            assert result.tz == expected.tz
Пример #9
0
 def test_get_loc_nat(self):
     # GH#20464
     index = DatetimeIndex(["1/3/2000", "NaT"])
     assert index.get_loc(pd.NaT) == 1
Пример #10
0
    def test_insert(self):
        idx = DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"],
                            name="idx")

        result = idx.insert(2, datetime(2000, 1, 5))
        exp = DatetimeIndex(
            ["2000-01-04", "2000-01-01", "2000-01-05", "2000-01-02"],
            name="idx")
        tm.assert_index_equal(result, exp)

        # insertion of non-datetime should coerce to object index
        result = idx.insert(1, "inserted")
        expected = Index(
            [
                datetime(2000, 1, 4),
                "inserted",
                datetime(2000, 1, 1),
                datetime(2000, 1, 2),
            ],
            name="idx",
        )
        assert not isinstance(result, DatetimeIndex)
        tm.assert_index_equal(result, expected)
        assert result.name == expected.name

        idx = date_range("1/1/2000", periods=3, freq="M", name="idx")

        # preserve freq
        expected_0 = DatetimeIndex(
            ["1999-12-31", "2000-01-31", "2000-02-29", "2000-03-31"],
            name="idx",
            freq="M",
        )
        expected_3 = DatetimeIndex(
            ["2000-01-31", "2000-02-29", "2000-03-31", "2000-04-30"],
            name="idx",
            freq="M",
        )

        # reset freq to None
        expected_1_nofreq = DatetimeIndex(
            ["2000-01-31", "2000-01-31", "2000-02-29", "2000-03-31"],
            name="idx",
            freq=None,
        )
        expected_3_nofreq = DatetimeIndex(
            ["2000-01-31", "2000-02-29", "2000-03-31", "2000-01-02"],
            name="idx",
            freq=None,
        )

        cases = [
            (0, datetime(1999, 12, 31), expected_0),
            (-3, datetime(1999, 12, 31), expected_0),
            (3, datetime(2000, 4, 30), expected_3),
            (1, datetime(2000, 1, 31), expected_1_nofreq),
            (3, datetime(2000, 1, 2), expected_3_nofreq),
        ]

        for n, d, expected in cases:
            result = idx.insert(n, d)
            tm.assert_index_equal(result, expected)
            assert result.name == expected.name
            assert result.freq == expected.freq

        # reset freq to None
        result = idx.insert(3, datetime(2000, 1, 2))
        expected = DatetimeIndex(
            ["2000-01-31", "2000-02-29", "2000-03-31", "2000-01-02"],
            name="idx",
            freq=None,
        )
        tm.assert_index_equal(result, expected)
        assert result.name == expected.name
        assert result.freq is None

        # see gh-7299
        idx = date_range("1/1/2000",
                         periods=3,
                         freq="D",
                         tz="Asia/Tokyo",
                         name="idx")
        with pytest.raises(ValueError):
            idx.insert(3, pd.Timestamp("2000-01-04"))
        with pytest.raises(ValueError):
            idx.insert(3, datetime(2000, 1, 4))
        with pytest.raises(ValueError):
            idx.insert(3, pd.Timestamp("2000-01-04", tz="US/Eastern"))
        with pytest.raises(ValueError):
            idx.insert(
                3, datetime(2000, 1, 4, tzinfo=pytz.timezone("US/Eastern")))

        for tz in ["US/Pacific", "Asia/Singapore"]:
            idx = date_range("1/1/2000 09:00",
                             periods=6,
                             freq="H",
                             tz=tz,
                             name="idx")
            # preserve freq
            expected = date_range("1/1/2000 09:00",
                                  periods=7,
                                  freq="H",
                                  tz=tz,
                                  name="idx")
            for d in [
                    pd.Timestamp("2000-01-01 15:00", tz=tz),
                    pytz.timezone(tz).localize(datetime(2000, 1, 1, 15)),
            ]:

                result = idx.insert(6, d)
                tm.assert_index_equal(result, expected)
                assert result.name == expected.name
                assert result.freq == expected.freq
                assert result.tz == expected.tz

            expected = DatetimeIndex(
                [
                    "2000-01-01 09:00",
                    "2000-01-01 10:00",
                    "2000-01-01 11:00",
                    "2000-01-01 12:00",
                    "2000-01-01 13:00",
                    "2000-01-01 14:00",
                    "2000-01-01 10:00",
                ],
                name="idx",
                tz=tz,
                freq=None,
            )
            # reset freq to None
            for d in [
                    pd.Timestamp("2000-01-01 10:00", tz=tz),
                    pytz.timezone(tz).localize(datetime(2000, 1, 1, 10)),
            ]:
                result = idx.insert(6, d)
                tm.assert_index_equal(result, expected)
                assert result.name == expected.name
                assert result.tz == expected.tz
                assert result.freq is None
Пример #11
0
    def test_delete(self):
        idx = date_range(start="2000-01-01", periods=5, freq="M", name="idx")

        # preserve freq
        expected_0 = date_range(start="2000-02-01",
                                periods=4,
                                freq="M",
                                name="idx")
        expected_4 = date_range(start="2000-01-01",
                                periods=4,
                                freq="M",
                                name="idx")

        # reset freq to None
        expected_1 = DatetimeIndex(
            ["2000-01-31", "2000-03-31", "2000-04-30", "2000-05-31"],
            freq=None,
            name="idx",
        )

        cases = {
            0: expected_0,
            -5: expected_0,
            -1: expected_4,
            4: expected_4,
            1: expected_1,
        }
        for n, expected in cases.items():
            result = idx.delete(n)
            tm.assert_index_equal(result, expected)
            assert result.name == expected.name
            assert result.freq == expected.freq

        with pytest.raises((IndexError, ValueError)):
            # either depending on numpy version
            idx.delete(5)

        for tz in [None, "Asia/Tokyo", "US/Pacific"]:
            idx = date_range(start="2000-01-01 09:00",
                             periods=10,
                             freq="H",
                             name="idx",
                             tz=tz)

            expected = date_range(start="2000-01-01 10:00",
                                  periods=9,
                                  freq="H",
                                  name="idx",
                                  tz=tz)
            result = idx.delete(0)
            tm.assert_index_equal(result, expected)
            assert result.name == expected.name
            assert result.freqstr == "H"
            assert result.tz == expected.tz

            expected = date_range(start="2000-01-01 09:00",
                                  periods=9,
                                  freq="H",
                                  name="idx",
                                  tz=tz)
            result = idx.delete(-1)
            tm.assert_index_equal(result, expected)
            assert result.name == expected.name
            assert result.freqstr == "H"
            assert result.tz == expected.tz
Пример #12
0
class TestDatetimeIndexOps:
    def test_ops_properties_basic(self, datetime_series):

        # sanity check that the behavior didn't change
        # GH#7206
        for op in ["year", "day", "second", "weekday"]:
            msg = f"'Series' object has no attribute '{op}'"
            with pytest.raises(AttributeError, match=msg):
                getattr(datetime_series, op)

        # attribute access should still work!
        s = Series(dict(year=2000, month=1, day=10))
        assert s.year == 2000
        assert s.month == 1
        assert s.day == 10
        msg = "'Series' object has no attribute 'weekday'"
        with pytest.raises(AttributeError, match=msg):
            s.weekday

    def test_repeat_range(self, tz_naive_fixture):
        tz = tz_naive_fixture
        rng = date_range("1/1/2000", "1/1/2001")

        result = rng.repeat(5)
        assert result.freq is None
        assert len(result) == 5 * len(rng)

        index = pd.date_range("2001-01-01", periods=2, freq="D", tz=tz)
        exp = pd.DatetimeIndex(
            ["2001-01-01", "2001-01-01", "2001-01-02", "2001-01-02"], tz=tz
        )
        for res in [index.repeat(2), np.repeat(index, 2)]:
            tm.assert_index_equal(res, exp)
            assert res.freq is None

        index = pd.date_range("2001-01-01", periods=2, freq="2D", tz=tz)
        exp = pd.DatetimeIndex(
            ["2001-01-01", "2001-01-01", "2001-01-03", "2001-01-03"], tz=tz
        )
        for res in [index.repeat(2), np.repeat(index, 2)]:
            tm.assert_index_equal(res, exp)
            assert res.freq is None

        index = pd.DatetimeIndex(["2001-01-01", "NaT", "2003-01-01"], tz=tz)
        exp = pd.DatetimeIndex(
            [
                "2001-01-01",
                "2001-01-01",
                "2001-01-01",
                "NaT",
                "NaT",
                "NaT",
                "2003-01-01",
                "2003-01-01",
                "2003-01-01",
            ],
            tz=tz,
        )
        for res in [index.repeat(3), np.repeat(index, 3)]:
            tm.assert_index_equal(res, exp)
            assert res.freq is None

    def test_repeat(self, tz_naive_fixture):
        tz = tz_naive_fixture
        reps = 2
        msg = "the 'axis' parameter is not supported"

        rng = pd.date_range(start="2016-01-01", periods=2, freq="30Min", tz=tz)

        expected_rng = DatetimeIndex(
            [
                Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"),
                Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"),
                Timestamp("2016-01-01 00:30:00", tz=tz, freq="30T"),
                Timestamp("2016-01-01 00:30:00", tz=tz, freq="30T"),
            ]
        )

        res = rng.repeat(reps)
        tm.assert_index_equal(res, expected_rng)
        assert res.freq is None

        tm.assert_index_equal(np.repeat(rng, reps), expected_rng)
        with pytest.raises(ValueError, match=msg):
            np.repeat(rng, reps, axis=1)

    def test_resolution(self, tz_naive_fixture):
        tz = tz_naive_fixture
        for freq, expected in zip(
            ["A", "Q", "M", "D", "H", "T", "S", "L", "U"],
            [
                "day",
                "day",
                "day",
                "day",
                "hour",
                "minute",
                "second",
                "millisecond",
                "microsecond",
            ],
        ):
            idx = pd.date_range(start="2013-04-01", periods=30, freq=freq, tz=tz)
            assert idx.resolution == expected

    def test_value_counts_unique(self, tz_naive_fixture):
        tz = tz_naive_fixture
        # GH 7735
        idx = pd.date_range("2011-01-01 09:00", freq="H", periods=10)
        # create repeated values, 'n'th element is repeated by n+1 times
        idx = DatetimeIndex(np.repeat(idx.values, range(1, len(idx) + 1)), tz=tz)

        exp_idx = pd.date_range("2011-01-01 18:00", freq="-1H", periods=10, tz=tz)
        expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64")

        for obj in [idx, Series(idx)]:
            tm.assert_series_equal(obj.value_counts(), expected)

        expected = pd.date_range("2011-01-01 09:00", freq="H", periods=10, tz=tz)
        tm.assert_index_equal(idx.unique(), expected)

        idx = DatetimeIndex(
            [
                "2013-01-01 09:00",
                "2013-01-01 09:00",
                "2013-01-01 09:00",
                "2013-01-01 08:00",
                "2013-01-01 08:00",
                pd.NaT,
            ],
            tz=tz,
        )

        exp_idx = DatetimeIndex(["2013-01-01 09:00", "2013-01-01 08:00"], tz=tz)
        expected = Series([3, 2], index=exp_idx)

        for obj in [idx, Series(idx)]:
            tm.assert_series_equal(obj.value_counts(), expected)

        exp_idx = DatetimeIndex(["2013-01-01 09:00", "2013-01-01 08:00", pd.NaT], tz=tz)
        expected = Series([3, 2, 1], index=exp_idx)

        for obj in [idx, Series(idx)]:
            tm.assert_series_equal(obj.value_counts(dropna=False), expected)

        tm.assert_index_equal(idx.unique(), exp_idx)

    def test_nonunique_contains(self):
        # GH 9512
        for idx in map(
            DatetimeIndex,
            (
                [0, 1, 0],
                [0, 0, -1],
                [0, -1, -1],
                ["2015", "2015", "2016"],
                ["2015", "2015", "2014"],
            ),
        ):
            assert idx[0] in idx

    @pytest.mark.parametrize(
        "idx",
        [
            DatetimeIndex(
                ["2011-01-01", "2011-01-02", "2011-01-03"], freq="D", name="idx"
            ),
            DatetimeIndex(
                ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"],
                freq="H",
                name="tzidx",
                tz="Asia/Tokyo",
            ),
        ],
    )
    def test_order_with_freq(self, idx):
        ordered = idx.sort_values()
        tm.assert_index_equal(ordered, idx)
        assert ordered.freq == idx.freq

        ordered = idx.sort_values(ascending=False)
        expected = idx[::-1]
        tm.assert_index_equal(ordered, expected)
        assert ordered.freq == expected.freq
        assert ordered.freq.n == -1

        ordered, indexer = idx.sort_values(return_indexer=True)
        tm.assert_index_equal(ordered, idx)
        tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False)
        assert ordered.freq == idx.freq

        ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
        expected = idx[::-1]
        tm.assert_index_equal(ordered, expected)
        tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0]), check_dtype=False)
        assert ordered.freq == expected.freq
        assert ordered.freq.n == -1

    @pytest.mark.parametrize(
        "index_dates,expected_dates",
        [
            (
                ["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"],
                ["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"],
            ),
            (
                ["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"],
                ["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"],
            ),
            (
                [pd.NaT, "2011-01-03", "2011-01-05", "2011-01-02", pd.NaT],
                [pd.NaT, pd.NaT, "2011-01-02", "2011-01-03", "2011-01-05"],
            ),
        ],
    )
    def test_order_without_freq(self, index_dates, expected_dates, tz_naive_fixture):
        tz = tz_naive_fixture

        # without freq
        index = DatetimeIndex(index_dates, tz=tz, name="idx")
        expected = DatetimeIndex(expected_dates, tz=tz, name="idx")

        ordered = index.sort_values()
        tm.assert_index_equal(ordered, expected)
        assert ordered.freq is None

        ordered = index.sort_values(ascending=False)
        tm.assert_index_equal(ordered, expected[::-1])
        assert ordered.freq is None

        ordered, indexer = index.sort_values(return_indexer=True)
        tm.assert_index_equal(ordered, expected)

        exp = np.array([0, 4, 3, 1, 2])
        tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
        assert ordered.freq is None

        ordered, indexer = index.sort_values(return_indexer=True, ascending=False)
        tm.assert_index_equal(ordered, expected[::-1])

        exp = np.array([2, 1, 3, 4, 0])
        tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
        assert ordered.freq is None

    def test_drop_duplicates_metadata(self, freq_sample):
        # GH 10115
        idx = pd.date_range("2011-01-01", freq=freq_sample, periods=10, name="idx")
        result = idx.drop_duplicates()
        tm.assert_index_equal(idx, result)
        assert idx.freq == result.freq

        idx_dup = idx.append(idx)
        assert idx_dup.freq is None  # freq is reset
        result = idx_dup.drop_duplicates()
        tm.assert_index_equal(idx, result)
        assert result.freq is None

    @pytest.mark.parametrize(
        "keep, expected, index",
        [
            ("first", np.concatenate(([False] * 10, [True] * 5)), np.arange(0, 10)),
            ("last", np.concatenate(([True] * 5, [False] * 10)), np.arange(5, 15)),
            (
                False,
                np.concatenate(([True] * 5, [False] * 5, [True] * 5)),
                np.arange(5, 10),
            ),
        ],
    )
    def test_drop_duplicates(self, freq_sample, keep, expected, index):
        # to check Index/Series compat
        idx = pd.date_range("2011-01-01", freq=freq_sample, periods=10, name="idx")
        idx = idx.append(idx[:5])

        tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected)
        expected = idx[~expected]

        result = idx.drop_duplicates(keep=keep)
        tm.assert_index_equal(result, expected)

        result = Series(idx).drop_duplicates(keep=keep)
        tm.assert_series_equal(result, Series(expected, index=index))

    def test_infer_freq(self, freq_sample):
        # GH 11018
        idx = pd.date_range("2011-01-01 09:00:00", freq=freq_sample, periods=10)
        result = pd.DatetimeIndex(idx.asi8, freq="infer")
        tm.assert_index_equal(idx, result)
        assert result.freq == freq_sample

    def test_nat(self, tz_naive_fixture):
        tz = tz_naive_fixture
        assert pd.DatetimeIndex._na_value is pd.NaT
        assert pd.DatetimeIndex([])._na_value is pd.NaT

        idx = pd.DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz)
        assert idx._can_hold_na

        tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
        assert idx.hasnans is False
        tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp))

        idx = pd.DatetimeIndex(["2011-01-01", "NaT"], tz=tz)
        assert idx._can_hold_na

        tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
        assert idx.hasnans is True
        tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.intp))

    def test_equals(self):
        # GH 13107
        idx = pd.DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"])
        assert idx.equals(idx)
        assert idx.equals(idx.copy())
        assert idx.equals(idx.astype(object))
        assert idx.astype(object).equals(idx)
        assert idx.astype(object).equals(idx.astype(object))
        assert not idx.equals(list(idx))
        assert not idx.equals(pd.Series(idx))

        idx2 = pd.DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"], tz="US/Pacific")
        assert not idx.equals(idx2)
        assert not idx.equals(idx2.copy())
        assert not idx.equals(idx2.astype(object))
        assert not idx.astype(object).equals(idx2)
        assert not idx.equals(list(idx2))
        assert not idx.equals(pd.Series(idx2))

        # same internal, different tz
        idx3 = pd.DatetimeIndex(idx.asi8, tz="US/Pacific")
        tm.assert_numpy_array_equal(idx.asi8, idx3.asi8)
        assert not idx.equals(idx3)
        assert not idx.equals(idx3.copy())
        assert not idx.equals(idx3.astype(object))
        assert not idx.astype(object).equals(idx3)
        assert not idx.equals(list(idx3))
        assert not idx.equals(pd.Series(idx3))

        # check that we do not raise when comparing with OutOfBounds objects
        oob = pd.Index([datetime(2500, 1, 1)] * 3, dtype=object)
        assert not idx.equals(oob)
        assert not idx2.equals(oob)
        assert not idx3.equals(oob)

        # check that we do not raise when comparing with OutOfBounds dt64
        oob2 = oob.map(np.datetime64)
        assert not idx.equals(oob2)
        assert not idx2.equals(oob2)
        assert not idx3.equals(oob2)

    @pytest.mark.parametrize("values", [["20180101", "20180103", "20180105"], []])
    @pytest.mark.parametrize("freq", ["2D", Day(2), "2B", BDay(2), "48H", Hour(48)])
    @pytest.mark.parametrize("tz", [None, "US/Eastern"])
    def test_freq_setter(self, values, freq, tz):
        # GH 20678
        idx = DatetimeIndex(values, tz=tz)

        # can set to an offset, converting from string if necessary
        idx._data.freq = freq
        assert idx.freq == freq
        assert isinstance(idx.freq, DateOffset)

        # can reset to None
        idx._data.freq = None
        assert idx.freq is None

    def test_freq_setter_errors(self):
        # GH 20678
        idx = DatetimeIndex(["20180101", "20180103", "20180105"])

        # setting with an incompatible freq
        msg = (
            "Inferred frequency 2D from passed values does not conform to "
            "passed frequency 5D"
        )
        with pytest.raises(ValueError, match=msg):
            idx._data.freq = "5D"

        # setting with non-freq string
        with pytest.raises(ValueError, match="Invalid frequency"):
            idx._data.freq = "foo"
Пример #13
0
 def setup(self):
     idx = DatetimeIndex(start=datetime(1990, 1, 1),
                         end=datetime(2012, 1, 1),
                         freq='D')
     df = DataFrame({'a': 0, 'b': 1, 'c': 2}, index=idx)
     self.data_frames = dict(enumerate([df] * 100))
Пример #14
0
 def idx(self):
     naive = DatetimeIndex(["2013-1-1 13:00", "2013-1-2 14:00"], name="B")
     idx = naive.tz_localize("US/Pacific")
     return idx
Пример #15
0
class TestSeriesDatetimeValues:
    def test_dt_namespace_accessor(self):

        # GH 7207, 11128
        # test .dt namespace accessor

        ok_for_period = PeriodArray._datetimelike_ops
        ok_for_period_methods = ["strftime", "to_timestamp", "asfreq"]
        ok_for_dt = DatetimeIndex._datetimelike_ops
        ok_for_dt_methods = [
            "to_period",
            "to_pydatetime",
            "tz_localize",
            "tz_convert",
            "normalize",
            "strftime",
            "round",
            "floor",
            "ceil",
            "day_name",
            "month_name",
        ]
        ok_for_td = TimedeltaIndex._datetimelike_ops
        ok_for_td_methods = [
            "components",
            "to_pytimedelta",
            "total_seconds",
            "round",
            "floor",
            "ceil",
        ]

        def get_expected(s, name):
            result = getattr(Index(s._values), prop)
            if isinstance(result, np.ndarray):
                if is_integer_dtype(result):
                    result = result.astype("int64")
            elif not is_list_like(result) or isinstance(result, pd.DataFrame):
                return result
            return Series(result, index=s.index, name=s.name)

        def compare(s, name):
            a = getattr(s.dt, prop)
            b = get_expected(s, prop)
            if not (is_list_like(a) and is_list_like(b)):
                assert a == b
            elif isinstance(a, pd.DataFrame):
                tm.assert_frame_equal(a, b)
            else:
                tm.assert_series_equal(a, b)

        # datetimeindex
        cases = [
            Series(date_range("20130101", periods=5), name="xxx"),
            Series(date_range("20130101", periods=5, freq="s"), name="xxx"),
            Series(date_range("20130101 00:00:00", periods=5, freq="ms"),
                   name="xxx"),
        ]
        for s in cases:
            for prop in ok_for_dt:
                # we test freq below
                if prop != "freq":
                    compare(s, prop)

            for prop in ok_for_dt_methods:
                getattr(s.dt, prop)

            result = s.dt.to_pydatetime()
            assert isinstance(result, np.ndarray)
            assert result.dtype == object

            result = s.dt.tz_localize("US/Eastern")
            exp_values = DatetimeIndex(s.values).tz_localize("US/Eastern")
            expected = Series(exp_values, index=s.index, name="xxx")
            tm.assert_series_equal(result, expected)

            tz_result = result.dt.tz
            assert str(tz_result) == "US/Eastern"
            freq_result = s.dt.freq
            assert freq_result == DatetimeIndex(s.values, freq="infer").freq

            # let's localize, then convert
            result = s.dt.tz_localize("UTC").dt.tz_convert("US/Eastern")
            exp_values = (DatetimeIndex(
                s.values).tz_localize("UTC").tz_convert("US/Eastern"))
            expected = Series(exp_values, index=s.index, name="xxx")
            tm.assert_series_equal(result, expected)

        # datetimeindex with tz
        s = Series(date_range("20130101", periods=5, tz="US/Eastern"),
                   name="xxx")
        for prop in ok_for_dt:

            # we test freq below
            if prop != "freq":
                compare(s, prop)

        for prop in ok_for_dt_methods:
            getattr(s.dt, prop)

        result = s.dt.to_pydatetime()
        assert isinstance(result, np.ndarray)
        assert result.dtype == object

        result = s.dt.tz_convert("CET")
        expected = Series(s._values.tz_convert("CET"),
                          index=s.index,
                          name="xxx")
        tm.assert_series_equal(result, expected)

        tz_result = result.dt.tz
        assert str(tz_result) == "CET"
        freq_result = s.dt.freq
        assert freq_result == DatetimeIndex(s.values, freq="infer").freq

        # timedelta index
        cases = [
            Series(timedelta_range("1 day", periods=5),
                   index=list("abcde"),
                   name="xxx"),
            Series(timedelta_range("1 day 01:23:45", periods=5, freq="s"),
                   name="xxx"),
            Series(
                timedelta_range("2 days 01:23:45.012345", periods=5,
                                freq="ms"),
                name="xxx",
            ),
        ]
        for s in cases:
            for prop in ok_for_td:
                # we test freq below
                if prop != "freq":
                    compare(s, prop)

            for prop in ok_for_td_methods:
                getattr(s.dt, prop)

            result = s.dt.components
            assert isinstance(result, DataFrame)
            tm.assert_index_equal(result.index, s.index)

            result = s.dt.to_pytimedelta()
            assert isinstance(result, np.ndarray)
            assert result.dtype == object

            result = s.dt.total_seconds()
            assert isinstance(result, pd.Series)
            assert result.dtype == "float64"

            freq_result = s.dt.freq
            assert freq_result == TimedeltaIndex(s.values, freq="infer").freq

        # both
        index = date_range("20130101", periods=3, freq="D")
        s = Series(date_range("20140204", periods=3, freq="s"),
                   index=index,
                   name="xxx")
        exp = Series(np.array([2014, 2014, 2014], dtype="int64"),
                     index=index,
                     name="xxx")
        tm.assert_series_equal(s.dt.year, exp)

        exp = Series(np.array([2, 2, 2], dtype="int64"),
                     index=index,
                     name="xxx")
        tm.assert_series_equal(s.dt.month, exp)

        exp = Series(np.array([0, 1, 2], dtype="int64"),
                     index=index,
                     name="xxx")
        tm.assert_series_equal(s.dt.second, exp)

        exp = pd.Series([s[0]] * 3, index=index, name="xxx")
        tm.assert_series_equal(s.dt.normalize(), exp)

        # periodindex
        cases = [
            Series(period_range("20130101", periods=5, freq="D"), name="xxx")
        ]
        for s in cases:
            for prop in ok_for_period:
                # we test freq below
                if prop != "freq":
                    compare(s, prop)

            for prop in ok_for_period_methods:
                getattr(s.dt, prop)

            freq_result = s.dt.freq
            assert freq_result == PeriodIndex(s.values).freq

        # test limited display api
        def get_dir(s):
            results = [r for r in s.dt.__dir__() if not r.startswith("_")]
            return sorted(set(results))

        s = Series(date_range("20130101", periods=5, freq="D"), name="xxx")
        results = get_dir(s)
        tm.assert_almost_equal(results,
                               sorted(set(ok_for_dt + ok_for_dt_methods)))

        s = Series(
            period_range("20130101", periods=5, freq="D",
                         name="xxx").astype(object))
        results = get_dir(s)
        tm.assert_almost_equal(
            results, sorted(set(ok_for_period + ok_for_period_methods)))

        # 11295
        # ambiguous time error on the conversions
        s = Series(pd.date_range("2015-01-01", "2016-01-01", freq="T"),
                   name="xxx")
        s = s.dt.tz_localize("UTC").dt.tz_convert("America/Chicago")
        results = get_dir(s)
        tm.assert_almost_equal(results,
                               sorted(set(ok_for_dt + ok_for_dt_methods)))
        exp_values = pd.date_range("2015-01-01",
                                   "2016-01-01",
                                   freq="T",
                                   tz="UTC").tz_convert("America/Chicago")
        expected = Series(exp_values, name="xxx")
        tm.assert_series_equal(s, expected)

        # no setting allowed
        s = Series(date_range("20130101", periods=5, freq="D"), name="xxx")
        with pytest.raises(ValueError, match="modifications"):
            s.dt.hour = 5

        # trying to set a copy
        with pd.option_context("chained_assignment", "raise"):
            with pytest.raises(com.SettingWithCopyError):
                s.dt.hour[0] = 5

    @pytest.mark.parametrize(
        "method, dates",
        [
            ["round", ["2012-01-02", "2012-01-02", "2012-01-01"]],
            ["floor", ["2012-01-01", "2012-01-01", "2012-01-01"]],
            ["ceil", ["2012-01-02", "2012-01-02", "2012-01-02"]],
        ],
    )
    def test_dt_round(self, method, dates):
        # round
        s = Series(
            pd.to_datetime([
                "2012-01-01 13:00:00", "2012-01-01 12:01:00",
                "2012-01-01 08:00:00"
            ]),
            name="xxx",
        )
        result = getattr(s.dt, method)("D")
        expected = Series(pd.to_datetime(dates), name="xxx")
        tm.assert_series_equal(result, expected)

    def test_dt_round_tz(self):
        s = Series(
            pd.to_datetime([
                "2012-01-01 13:00:00", "2012-01-01 12:01:00",
                "2012-01-01 08:00:00"
            ]),
            name="xxx",
        )
        result = s.dt.tz_localize("UTC").dt.tz_convert("US/Eastern").dt.round(
            "D")

        exp_values = pd.to_datetime(["2012-01-01", "2012-01-01",
                                     "2012-01-01"]).tz_localize("US/Eastern")
        expected = Series(exp_values, name="xxx")
        tm.assert_series_equal(result, expected)

    @pytest.mark.parametrize("method", ["ceil", "round", "floor"])
    def test_dt_round_tz_ambiguous(self, method):
        # GH 18946 round near "fall back" DST
        df1 = pd.DataFrame(
            [
                pd.to_datetime("2017-10-29 02:00:00+02:00", utc=True),
                pd.to_datetime("2017-10-29 02:00:00+01:00", utc=True),
                pd.to_datetime("2017-10-29 03:00:00+01:00", utc=True),
            ],
            columns=["date"],
        )
        df1["date"] = df1["date"].dt.tz_convert("Europe/Madrid")
        # infer
        result = getattr(df1.date.dt, method)("H", ambiguous="infer")
        expected = df1["date"]
        tm.assert_series_equal(result, expected)

        # bool-array
        result = getattr(df1.date.dt, method)("H",
                                              ambiguous=[True, False, False])
        tm.assert_series_equal(result, expected)

        # NaT
        result = getattr(df1.date.dt, method)("H", ambiguous="NaT")
        expected = df1["date"].copy()
        expected.iloc[0:2] = pd.NaT
        tm.assert_series_equal(result, expected)

        # raise
        with pytest.raises(pytz.AmbiguousTimeError):
            getattr(df1.date.dt, method)("H", ambiguous="raise")

    @pytest.mark.parametrize(
        "method, ts_str, freq",
        [
            ["ceil", "2018-03-11 01:59:00-0600", "5min"],
            ["round", "2018-03-11 01:59:00-0600", "5min"],
            ["floor", "2018-03-11 03:01:00-0500", "2H"],
        ],
    )
    def test_dt_round_tz_nonexistent(self, method, ts_str, freq):
        # GH 23324 round near "spring forward" DST
        s = Series([pd.Timestamp(ts_str, tz="America/Chicago")])
        result = getattr(s.dt, method)(freq, nonexistent="shift_forward")
        expected = Series(
            [pd.Timestamp("2018-03-11 03:00:00", tz="America/Chicago")])
        tm.assert_series_equal(result, expected)

        result = getattr(s.dt, method)(freq, nonexistent="NaT")
        expected = Series([pd.NaT]).dt.tz_localize(result.dt.tz)
        tm.assert_series_equal(result, expected)

        with pytest.raises(pytz.NonExistentTimeError,
                           match="2018-03-11 02:00:00"):
            getattr(s.dt, method)(freq, nonexistent="raise")

    def test_dt_namespace_accessor_categorical(self):
        # GH 19468
        dti = DatetimeIndex(["20171111", "20181212"]).repeat(2)
        s = Series(pd.Categorical(dti), name="foo")
        result = s.dt.year
        expected = Series([2017, 2017, 2018, 2018], name="foo")
        tm.assert_series_equal(result, expected)

    def test_dt_tz_localize_categorical(self, tz_aware_fixture):
        # GH 27952
        tz = tz_aware_fixture
        datetimes = pd.Series(["2019-01-01", "2019-01-01", "2019-01-02"],
                              dtype="datetime64[ns]")
        categorical = datetimes.astype("category")
        result = categorical.dt.tz_localize(tz)
        expected = datetimes.dt.tz_localize(tz)
        tm.assert_series_equal(result, expected)

    def test_dt_tz_convert_categorical(self, tz_aware_fixture):
        # GH 27952
        tz = tz_aware_fixture
        datetimes = pd.Series(["2019-01-01", "2019-01-01", "2019-01-02"],
                              dtype="datetime64[ns, MET]")
        categorical = datetimes.astype("category")
        result = categorical.dt.tz_convert(tz)
        expected = datetimes.dt.tz_convert(tz)
        tm.assert_series_equal(result, expected)

    @pytest.mark.parametrize("accessor", ["year", "month", "day"])
    def test_dt_other_accessors_categorical(self, accessor):
        # GH 27952
        datetimes = pd.Series(["2018-01-01", "2018-01-01", "2019-01-02"],
                              dtype="datetime64[ns]")
        categorical = datetimes.astype("category")
        result = getattr(categorical.dt, accessor)
        expected = getattr(datetimes.dt, accessor)
        tm.assert_series_equal(result, expected)

    def test_dt_accessor_no_new_attributes(self):
        # https://github.com/pandas-dev/pandas/issues/10673
        s = Series(date_range("20130101", periods=5, freq="D"))
        with pytest.raises(AttributeError,
                           match="You cannot add any new attribute"):
            s.dt.xlabel = "a"

    @pytest.mark.parametrize("time_locale",
                             [None] if tm.get_locales() is None else [None] +
                             tm.get_locales())
    def test_dt_accessor_datetime_name_accessors(self, time_locale):
        # Test Monday -> Sunday and January -> December, in that sequence
        if time_locale is None:
            # If the time_locale is None, day-name and month_name should
            # return the english attributes
            expected_days = [
                "Monday",
                "Tuesday",
                "Wednesday",
                "Thursday",
                "Friday",
                "Saturday",
                "Sunday",
            ]
            expected_months = [
                "January",
                "February",
                "March",
                "April",
                "May",
                "June",
                "July",
                "August",
                "September",
                "October",
                "November",
                "December",
            ]
        else:
            with tm.set_locale(time_locale, locale.LC_TIME):
                expected_days = calendar.day_name[:]
                expected_months = calendar.month_name[1:]

        s = Series(
            date_range(freq="D", start=datetime(1998, 1, 1), periods=365))
        english_days = [
            "Monday",
            "Tuesday",
            "Wednesday",
            "Thursday",
            "Friday",
            "Saturday",
            "Sunday",
        ]
        for day, name, eng_name in zip(range(4, 11), expected_days,
                                       english_days):
            name = name.capitalize()
            assert s.dt.day_name(locale=time_locale)[day] == name
        s = s.append(Series([pd.NaT]))
        assert np.isnan(s.dt.day_name(locale=time_locale).iloc[-1])

        s = Series(date_range(freq="M", start="2012", end="2013"))
        result = s.dt.month_name(locale=time_locale)
        expected = Series([month.capitalize() for month in expected_months])

        # work around https://github.com/pandas-dev/pandas/issues/22342
        result = result.str.normalize("NFD")
        expected = expected.str.normalize("NFD")

        tm.assert_series_equal(result, expected)

        for s_date, expected in zip(s, expected_months):
            result = s_date.month_name(locale=time_locale)
            expected = expected.capitalize()

            result = unicodedata.normalize("NFD", result)
            expected = unicodedata.normalize("NFD", expected)

            assert result == expected

        s = s.append(Series([pd.NaT]))
        assert np.isnan(s.dt.month_name(locale=time_locale).iloc[-1])

    def test_strftime(self):
        # GH 10086
        s = Series(date_range("20130101", periods=5))
        result = s.dt.strftime("%Y/%m/%d")
        expected = Series([
            "2013/01/01", "2013/01/02", "2013/01/03", "2013/01/04",
            "2013/01/05"
        ])
        tm.assert_series_equal(result, expected)

        s = Series(date_range("2015-02-03 11:22:33.4567", periods=5))
        result = s.dt.strftime("%Y/%m/%d %H-%M-%S")
        expected = Series([
            "2015/02/03 11-22-33",
            "2015/02/04 11-22-33",
            "2015/02/05 11-22-33",
            "2015/02/06 11-22-33",
            "2015/02/07 11-22-33",
        ])
        tm.assert_series_equal(result, expected)

        s = Series(period_range("20130101", periods=5))
        result = s.dt.strftime("%Y/%m/%d")
        expected = Series([
            "2013/01/01", "2013/01/02", "2013/01/03", "2013/01/04",
            "2013/01/05"
        ])
        tm.assert_series_equal(result, expected)

        s = Series(
            period_range("2015-02-03 11:22:33.4567", periods=5, freq="s"))
        result = s.dt.strftime("%Y/%m/%d %H-%M-%S")
        expected = Series([
            "2015/02/03 11-22-33",
            "2015/02/03 11-22-34",
            "2015/02/03 11-22-35",
            "2015/02/03 11-22-36",
            "2015/02/03 11-22-37",
        ])
        tm.assert_series_equal(result, expected)

        s = Series(date_range("20130101", periods=5))
        s.iloc[0] = pd.NaT
        result = s.dt.strftime("%Y/%m/%d")
        expected = Series(
            [np.nan, "2013/01/02", "2013/01/03", "2013/01/04", "2013/01/05"])
        tm.assert_series_equal(result, expected)

        datetime_index = date_range("20150301", periods=5)
        result = datetime_index.strftime("%Y/%m/%d")

        expected = Index(
            [
                "2015/03/01", "2015/03/02", "2015/03/03", "2015/03/04",
                "2015/03/05"
            ],
            dtype=np.object_,
        )
        # dtype may be S10 or U10 depending on python version
        tm.assert_index_equal(result, expected)

        period_index = period_range("20150301", periods=5)
        result = period_index.strftime("%Y/%m/%d")
        expected = Index(
            [
                "2015/03/01", "2015/03/02", "2015/03/03", "2015/03/04",
                "2015/03/05"
            ],
            dtype="=U10",
        )
        tm.assert_index_equal(result, expected)

        s = Series(
            [datetime(2013, 1, 1, 2, 32, 59),
             datetime(2013, 1, 2, 14, 32, 1)])
        result = s.dt.strftime("%Y-%m-%d %H:%M:%S")
        expected = Series(["2013-01-01 02:32:59", "2013-01-02 14:32:01"])
        tm.assert_series_equal(result, expected)

        s = Series(period_range("20130101", periods=4, freq="H"))
        result = s.dt.strftime("%Y/%m/%d %H:%M:%S")
        expected = Series([
            "2013/01/01 00:00:00",
            "2013/01/01 01:00:00",
            "2013/01/01 02:00:00",
            "2013/01/01 03:00:00",
        ])

        s = Series(period_range("20130101", periods=4, freq="L"))
        result = s.dt.strftime("%Y/%m/%d %H:%M:%S.%l")
        expected = Series([
            "2013/01/01 00:00:00.000",
            "2013/01/01 00:00:00.001",
            "2013/01/01 00:00:00.002",
            "2013/01/01 00:00:00.003",
        ])
        tm.assert_series_equal(result, expected)

    @pytest.mark.parametrize(
        "data",
        [
            DatetimeIndex(["2019-01-01", pd.NaT]),
            PeriodIndex(["2019-01-01", pd.NaT], dtype="period[D]"),
        ],
    )
    def test_strftime_nat(self, data):
        # GH 29578
        s = Series(data)
        result = s.dt.strftime("%Y-%m-%d")
        expected = Series(["2019-01-01", np.nan])
        tm.assert_series_equal(result, expected)

    def test_valid_dt_with_missing_values(self):

        from datetime import date, time

        # GH 8689
        s = Series(date_range("20130101", periods=5, freq="D"))
        s.iloc[2] = pd.NaT

        for attr in [
                "microsecond", "nanosecond", "second", "minute", "hour", "day"
        ]:
            expected = getattr(s.dt, attr).copy()
            expected.iloc[2] = np.nan
            result = getattr(s.dt, attr)
            tm.assert_series_equal(result, expected)

        result = s.dt.date
        expected = Series(
            [
                date(2013, 1, 1),
                date(2013, 1, 2),
                np.nan,
                date(2013, 1, 4),
                date(2013, 1, 5),
            ],
            dtype="object",
        )
        tm.assert_series_equal(result, expected)

        result = s.dt.time
        expected = Series([time(0), time(0), np.nan,
                           time(0), time(0)],
                          dtype="object")
        tm.assert_series_equal(result, expected)

    def test_dt_accessor_api(self):
        # GH 9322
        from pandas.core.indexes.accessors import (
            CombinedDatetimelikeProperties,
            DatetimeProperties,
        )

        assert Series.dt is CombinedDatetimelikeProperties

        s = Series(date_range("2000-01-01", periods=3))
        assert isinstance(s.dt, DatetimeProperties)

    @pytest.mark.parametrize("ser", [
        Series(np.arange(5)),
        Series(list("abcde")),
        Series(np.random.randn(5))
    ])
    def test_dt_accessor_invalid(self, ser):
        # GH#9322 check that series with incorrect dtypes don't have attr
        with pytest.raises(AttributeError, match="only use .dt accessor"):
            ser.dt
        assert not hasattr(ser, "dt")

    def test_dt_accessor_updates_on_inplace(self):
        s = Series(pd.date_range("2018-01-01", periods=10))
        s[2] = None
        s.fillna(pd.Timestamp("2018-01-01"), inplace=True)
        result = s.dt.date
        assert result[0] == result[2]

    def test_date_tz(self):
        # GH11757
        rng = pd.DatetimeIndex(
            ["2014-04-04 23:56", "2014-07-18 21:24", "2015-11-22 22:14"],
            tz="US/Eastern",
        )
        s = Series(rng)
        expected = Series(
            [date(2014, 4, 4),
             date(2014, 7, 18),
             date(2015, 11, 22)])
        tm.assert_series_equal(s.dt.date, expected)
        tm.assert_series_equal(s.apply(lambda x: x.date()), expected)

    def test_dt_timetz_accessor(self, tz_naive_fixture):
        # GH21358
        tz = maybe_get_tz(tz_naive_fixture)

        dtindex = pd.DatetimeIndex(
            ["2014-04-04 23:56", "2014-07-18 21:24", "2015-11-22 22:14"],
            tz=tz)
        s = Series(dtindex)
        expected = Series([
            time(23, 56, tzinfo=tz),
            time(21, 24, tzinfo=tz),
            time(22, 14, tzinfo=tz)
        ])
        result = s.dt.timetz
        tm.assert_series_equal(result, expected)

    def test_setitem_with_string_index(self):
        # GH 23451
        x = pd.Series([1, 2, 3], index=["Date", "b", "other"])
        x["Date"] = date.today()
        assert x.Date == date.today()
        assert x["Date"] == date.today()

    def test_setitem_with_different_tz(self):
        # GH#24024
        ser = pd.Series(pd.date_range("2000", periods=2, tz="US/Central"))
        ser[0] = pd.Timestamp("2000", tz="US/Eastern")
        expected = pd.Series(
            [
                pd.Timestamp("2000-01-01 00:00:00-05:00", tz="US/Eastern"),
                pd.Timestamp("2000-01-02 00:00:00-06:00", tz="US/Central"),
            ],
            dtype=object,
        )
        tm.assert_series_equal(ser, expected)

    @pytest.mark.parametrize(
        "input_series, expected_output",
        [
            [["2020-01-01"], [[2020, 1, 3]]],
            [[pd.NaT], [[np.NaN, np.NaN, np.NaN]]],
            [["2019-12-31", "2019-12-29"], [[2020, 1, 2], [2019, 52, 7]]],
            [["2010-01-01", pd.NaT], [[2009, 53, 5], [np.NaN, np.NaN, np.NaN]]
             ],
        ],
    )
    def test_isocalendar(self, input_series, expected_output):
        result = pd.to_datetime(pd.Series(input_series)).dt.isocalendar
        expected_frame = pd.DataFrame(expected_output,
                                      columns=["year", "week", "day"],
                                      dtype="UInt32")
        tm.assert_frame_equal(result, expected_frame)
Пример #16
0
def create_block(typestr, placement, item_shape=None, num_offset=0):
    """
    Supported typestr:

        * float, f8, f4, f2
        * int, i8, i4, i2, i1
        * uint, u8, u4, u2, u1
        * complex, c16, c8
        * bool
        * object, string, O
        * datetime, dt, M8[ns], M8[ns, tz]
        * timedelta, td, m8[ns]
        * sparse (SparseArray with fill_value=0.0)
        * sparse_na (SparseArray with fill_value=np.nan)
        * category, category2

    """
    placement = BlockPlacement(placement)
    num_items = len(placement)

    if item_shape is None:
        item_shape = (N,)

    shape = (num_items,) + item_shape

    mat = get_numeric_mat(shape)

    if typestr in (
        "float",
        "f8",
        "f4",
        "f2",
        "int",
        "i8",
        "i4",
        "i2",
        "i1",
        "uint",
        "u8",
        "u4",
        "u2",
        "u1",
    ):
        values = mat.astype(typestr) + num_offset
    elif typestr in ("complex", "c16", "c8"):
        values = 1.0j * (mat.astype(typestr) + num_offset)
    elif typestr in ("object", "string", "O"):
        values = np.reshape([f"A{i:d}" for i in mat.ravel() + num_offset], shape)
    elif typestr in ("b", "bool"):
        values = np.ones(shape, dtype=np.bool_)
    elif typestr in ("datetime", "dt", "M8[ns]"):
        values = (mat * 1e9).astype("M8[ns]")
    elif typestr.startswith("M8[ns"):
        # datetime with tz
        m = re.search(r"M8\[ns,\s*(\w+\/?\w*)\]", typestr)
        assert m is not None, f"incompatible typestr -> {typestr}"
        tz = m.groups()[0]
        assert num_items == 1, "must have only 1 num items for a tz-aware"
        values = DatetimeIndex(np.arange(N) * 1e9, tz=tz)
    elif typestr in ("timedelta", "td", "m8[ns]"):
        values = (mat * 1).astype("m8[ns]")
    elif typestr in ("category",):
        values = Categorical([1, 1, 2, 2, 3, 3, 3, 3, 4, 4])
    elif typestr in ("category2",):
        values = Categorical(["a", "a", "a", "a", "b", "b", "c", "c", "c", "d"])
    elif typestr in ("sparse", "sparse_na"):
        # FIXME: doesn't support num_rows != 10
        assert shape[-1] == 10
        assert all(s == 1 for s in shape[:-1])
        if typestr.endswith("_na"):
            fill_value = np.nan
        else:
            fill_value = 0.0
        values = SparseArray(
            [fill_value, fill_value, 1, 2, 3, fill_value, 4, 5, fill_value, 6],
            fill_value=fill_value,
        )
        arr = values.sp_values.view()
        arr += num_offset - 1
    else:
        raise ValueError(f'Unsupported typestr: "{typestr}"')

    return make_block(values, placement=placement, ndim=len(shape))
Пример #17
0
    def test_dt_namespace_accessor(self):

        # GH 7207, 11128
        # test .dt namespace accessor

        ok_for_period = PeriodArray._datetimelike_ops
        ok_for_period_methods = ["strftime", "to_timestamp", "asfreq"]
        ok_for_dt = DatetimeIndex._datetimelike_ops
        ok_for_dt_methods = [
            "to_period",
            "to_pydatetime",
            "tz_localize",
            "tz_convert",
            "normalize",
            "strftime",
            "round",
            "floor",
            "ceil",
            "day_name",
            "month_name",
        ]
        ok_for_td = TimedeltaIndex._datetimelike_ops
        ok_for_td_methods = [
            "components",
            "to_pytimedelta",
            "total_seconds",
            "round",
            "floor",
            "ceil",
        ]

        def get_expected(s, name):
            result = getattr(Index(s._values), prop)
            if isinstance(result, np.ndarray):
                if is_integer_dtype(result):
                    result = result.astype("int64")
            elif not is_list_like(result) or isinstance(result, pd.DataFrame):
                return result
            return Series(result, index=s.index, name=s.name)

        def compare(s, name):
            a = getattr(s.dt, prop)
            b = get_expected(s, prop)
            if not (is_list_like(a) and is_list_like(b)):
                assert a == b
            elif isinstance(a, pd.DataFrame):
                tm.assert_frame_equal(a, b)
            else:
                tm.assert_series_equal(a, b)

        # datetimeindex
        cases = [
            Series(date_range("20130101", periods=5), name="xxx"),
            Series(date_range("20130101", periods=5, freq="s"), name="xxx"),
            Series(date_range("20130101 00:00:00", periods=5, freq="ms"),
                   name="xxx"),
        ]
        for s in cases:
            for prop in ok_for_dt:
                # we test freq below
                if prop != "freq":
                    compare(s, prop)

            for prop in ok_for_dt_methods:
                getattr(s.dt, prop)

            result = s.dt.to_pydatetime()
            assert isinstance(result, np.ndarray)
            assert result.dtype == object

            result = s.dt.tz_localize("US/Eastern")
            exp_values = DatetimeIndex(s.values).tz_localize("US/Eastern")
            expected = Series(exp_values, index=s.index, name="xxx")
            tm.assert_series_equal(result, expected)

            tz_result = result.dt.tz
            assert str(tz_result) == "US/Eastern"
            freq_result = s.dt.freq
            assert freq_result == DatetimeIndex(s.values, freq="infer").freq

            # let's localize, then convert
            result = s.dt.tz_localize("UTC").dt.tz_convert("US/Eastern")
            exp_values = (DatetimeIndex(
                s.values).tz_localize("UTC").tz_convert("US/Eastern"))
            expected = Series(exp_values, index=s.index, name="xxx")
            tm.assert_series_equal(result, expected)

        # datetimeindex with tz
        s = Series(date_range("20130101", periods=5, tz="US/Eastern"),
                   name="xxx")
        for prop in ok_for_dt:

            # we test freq below
            if prop != "freq":
                compare(s, prop)

        for prop in ok_for_dt_methods:
            getattr(s.dt, prop)

        result = s.dt.to_pydatetime()
        assert isinstance(result, np.ndarray)
        assert result.dtype == object

        result = s.dt.tz_convert("CET")
        expected = Series(s._values.tz_convert("CET"),
                          index=s.index,
                          name="xxx")
        tm.assert_series_equal(result, expected)

        tz_result = result.dt.tz
        assert str(tz_result) == "CET"
        freq_result = s.dt.freq
        assert freq_result == DatetimeIndex(s.values, freq="infer").freq

        # timedelta index
        cases = [
            Series(timedelta_range("1 day", periods=5),
                   index=list("abcde"),
                   name="xxx"),
            Series(timedelta_range("1 day 01:23:45", periods=5, freq="s"),
                   name="xxx"),
            Series(
                timedelta_range("2 days 01:23:45.012345", periods=5,
                                freq="ms"),
                name="xxx",
            ),
        ]
        for s in cases:
            for prop in ok_for_td:
                # we test freq below
                if prop != "freq":
                    compare(s, prop)

            for prop in ok_for_td_methods:
                getattr(s.dt, prop)

            result = s.dt.components
            assert isinstance(result, DataFrame)
            tm.assert_index_equal(result.index, s.index)

            result = s.dt.to_pytimedelta()
            assert isinstance(result, np.ndarray)
            assert result.dtype == object

            result = s.dt.total_seconds()
            assert isinstance(result, pd.Series)
            assert result.dtype == "float64"

            freq_result = s.dt.freq
            assert freq_result == TimedeltaIndex(s.values, freq="infer").freq

        # both
        index = date_range("20130101", periods=3, freq="D")
        s = Series(date_range("20140204", periods=3, freq="s"),
                   index=index,
                   name="xxx")
        exp = Series(np.array([2014, 2014, 2014], dtype="int64"),
                     index=index,
                     name="xxx")
        tm.assert_series_equal(s.dt.year, exp)

        exp = Series(np.array([2, 2, 2], dtype="int64"),
                     index=index,
                     name="xxx")
        tm.assert_series_equal(s.dt.month, exp)

        exp = Series(np.array([0, 1, 2], dtype="int64"),
                     index=index,
                     name="xxx")
        tm.assert_series_equal(s.dt.second, exp)

        exp = pd.Series([s[0]] * 3, index=index, name="xxx")
        tm.assert_series_equal(s.dt.normalize(), exp)

        # periodindex
        cases = [
            Series(period_range("20130101", periods=5, freq="D"), name="xxx")
        ]
        for s in cases:
            for prop in ok_for_period:
                # we test freq below
                if prop != "freq":
                    compare(s, prop)

            for prop in ok_for_period_methods:
                getattr(s.dt, prop)

            freq_result = s.dt.freq
            assert freq_result == PeriodIndex(s.values).freq

        # test limited display api
        def get_dir(s):
            results = [r for r in s.dt.__dir__() if not r.startswith("_")]
            return sorted(set(results))

        s = Series(date_range("20130101", periods=5, freq="D"), name="xxx")
        results = get_dir(s)
        tm.assert_almost_equal(results,
                               sorted(set(ok_for_dt + ok_for_dt_methods)))

        s = Series(
            period_range("20130101", periods=5, freq="D",
                         name="xxx").astype(object))
        results = get_dir(s)
        tm.assert_almost_equal(
            results, sorted(set(ok_for_period + ok_for_period_methods)))

        # 11295
        # ambiguous time error on the conversions
        s = Series(pd.date_range("2015-01-01", "2016-01-01", freq="T"),
                   name="xxx")
        s = s.dt.tz_localize("UTC").dt.tz_convert("America/Chicago")
        results = get_dir(s)
        tm.assert_almost_equal(results,
                               sorted(set(ok_for_dt + ok_for_dt_methods)))
        exp_values = pd.date_range("2015-01-01",
                                   "2016-01-01",
                                   freq="T",
                                   tz="UTC").tz_convert("America/Chicago")
        expected = Series(exp_values, name="xxx")
        tm.assert_series_equal(s, expected)

        # no setting allowed
        s = Series(date_range("20130101", periods=5, freq="D"), name="xxx")
        with pytest.raises(ValueError, match="modifications"):
            s.dt.hour = 5

        # trying to set a copy
        with pd.option_context("chained_assignment", "raise"):
            with pytest.raises(com.SettingWithCopyError):
                s.dt.hour[0] = 5
Пример #18
0
def test_ewm_with_nat_raises(halflife_with_times):
    # GH#38535
    ser = Series(range(1))
    times = DatetimeIndex(["NaT"])
    with pytest.raises(ValueError, match="Cannot convert NaT values to integer"):
        ser.ewm(com=0.1, halflife=halflife_with_times, times=times)
 def test_get_loc_reasonable_key_error(self):
     # GH#1062
     index = DatetimeIndex(["1/3/2000"])
     with pytest.raises(KeyError, match="2000"):
         index.get_loc("1/1/2000")
Пример #20
0
 def setup(self):
     N = 10**5
     B = N + 20000
     self.datetime_left = DatetimeIndex(range(N))
     self.datetime_right = DatetimeIndex(range(N, B))
 def test_contains_nonunique(self, vals):
     # GH#9512
     idx = DatetimeIndex(vals)
     assert idx[0] in idx
Пример #22
0
 def test_datetimeindex_tz(self):
     rng = date_range('03/12/2012 00:00', periods=10, freq='W-FRI',
                      tz='US/Eastern')
     rng2 = DatetimeIndex(data=rng, tz='US/Eastern')
     self.assertTrue(rng.equals(rng2))
Пример #23
0
    def test_tdi_radd_timestamp(self):
        idx = TimedeltaIndex(['1 day', '2 day'])

        result = Timestamp('2011-01-01') + idx
        expected = DatetimeIndex(['2011-01-02', '2011-01-03'])
        tm.assert_index_equal(result, expected)
Пример #24
0
    def test_nat(self):
        # GH 5546
        dates = [NaT]
        idx = DatetimeIndex(dates)
        idx = idx.tz_localize('US/Pacific')
        self.assertTrue(idx.equals(DatetimeIndex(dates, tz='US/Pacific')))
        idx = idx.tz_convert('US/Eastern')
        self.assertTrue(idx.equals(DatetimeIndex(dates, tz='US/Eastern')))
        idx = idx.tz_convert('UTC')
        self.assertTrue(idx.equals(DatetimeIndex(dates, tz='UTC')))

        dates = ['2010-12-01 00:00', '2010-12-02 00:00', NaT]
        idx = DatetimeIndex(dates)
        idx = idx.tz_localize('US/Pacific')
        self.assertTrue(idx.equals(DatetimeIndex(dates, tz='US/Pacific')))
        idx = idx.tz_convert('US/Eastern')
        expected = ['2010-12-01 03:00', '2010-12-02 03:00', NaT]
        self.assertTrue(idx.equals(DatetimeIndex(expected, tz='US/Eastern')))

        idx = idx + offsets.Hour(5)
        expected = ['2010-12-01 08:00', '2010-12-02 08:00', NaT]
        self.assertTrue(idx.equals(DatetimeIndex(expected, tz='US/Eastern')))
        idx = idx.tz_convert('US/Pacific')
        expected = ['2010-12-01 05:00', '2010-12-02 05:00', NaT]
        self.assertTrue(idx.equals(DatetimeIndex(expected, tz='US/Pacific')))

        if not _np_version_under1p7:
            idx = idx + np.timedelta64(3, 'h')
            expected = ['2010-12-01 08:00', '2010-12-02 08:00', NaT]
            self.assertTrue(idx.equals(DatetimeIndex(expected, tz='US/Pacific')))

            idx = idx.tz_convert('US/Eastern')
            expected = ['2010-12-01 11:00', '2010-12-02 11:00', NaT]
            self.assertTrue(idx.equals(DatetimeIndex(expected, tz='US/Eastern')))
Пример #25
0
def test_datetimeindex_from_empty_datetime64_array():
    for unit in ['ms', 'us', 'ns']:
        idx = DatetimeIndex(np.array([], dtype='datetime64[%s]' % unit))
        assert (len(idx) == 0)
Пример #26
0
 def test_static_tzinfo(self):
     # it works!
     index = DatetimeIndex([datetime(2012, 1, 1)], tz=self.tzstr('EST'))
     index.hour
     index[0]
Пример #27
0
def _convert_listlike_datetimes(arg,
                                box,
                                format,
                                name=None,
                                tz=None,
                                unit=None,
                                errors=None,
                                infer_datetime_format=None,
                                dayfirst=None,
                                yearfirst=None,
                                exact=None):
    """
    Helper function for to_datetime. Performs the conversions of 1D listlike
    of dates

    Parameters
    ----------
    arg : list, tuple, ndarray, Series, Index
        date to be parced
    box : boolean
        True boxes result as an Index-like, False returns an ndarray
    name : object
        None or string for the Index name
    tz : object
        None or 'utc'
    unit : string
        None or string of the frequency of the passed data
    errors : string
        error handing behaviors from to_datetime, 'raise', 'coerce', 'ignore'
    infer_datetime_format : boolean
        inferring format behavior from to_datetime
    dayfirst : boolean
        dayfirst parsing behavior from to_datetime
    yearfirst : boolean
        yearfirst parsing behavior from to_datetime
    exact : boolean
        exact format matching behavior from to_datetime

    Returns
    -------
    ndarray of parsed dates
        Returns:

        - Index-like if box=True
        - ndarray of Timestamps if box=False
    """
    from pandas import DatetimeIndex
    from pandas.core.arrays import DatetimeArray
    from pandas.core.arrays.datetimes import (maybe_convert_dtype,
                                              objects_to_datetime64ns)

    if isinstance(arg, (list, tuple)):
        arg = np.array(arg, dtype='O')

    # these are shortcutable
    if is_datetime64tz_dtype(arg):
        if not isinstance(arg, (DatetimeArray, DatetimeIndex)):
            return DatetimeIndex(arg, tz=tz, name=name)
        if tz == 'utc':
            arg = arg.tz_convert(None).tz_localize(tz)
        return arg

    elif is_datetime64_ns_dtype(arg):
        if box and not isinstance(arg, (DatetimeArray, DatetimeIndex)):
            try:
                return DatetimeIndex(arg, tz=tz, name=name)
            except ValueError:
                pass

        return arg

    elif unit is not None:
        if format is not None:
            raise ValueError("cannot specify both format and unit")
        arg = getattr(arg, 'values', arg)
        result, tz_parsed = tslib.array_with_unit_to_datetime(arg,
                                                              unit,
                                                              errors=errors)
        if box:
            if errors == 'ignore':
                from pandas import Index
                result = Index(result, name=name)
            else:
                result = DatetimeIndex(result, name=name)
            # GH 23758: We may still need to localize the result with tz
            # GH 25546: Apply tz_parsed first (from arg), then tz (from caller)
            # result will be naive but in UTC
            try:
                result = result.tz_localize('UTC').tz_convert(tz_parsed)
            except AttributeError:
                # Regular Index from 'ignore' path
                return result
            if tz is not None:
                if result.tz is None:
                    result = result.tz_localize(tz)
                else:
                    result = result.tz_convert(tz)
        return result
    elif getattr(arg, 'ndim', 1) > 1:
        raise TypeError('arg must be a string, datetime, list, tuple, '
                        '1-d array, or Series')

    # warn if passing timedelta64, raise for PeriodDtype
    # NB: this must come after unit transformation
    orig_arg = arg
    arg, _ = maybe_convert_dtype(arg, copy=False)

    arg = ensure_object(arg)
    require_iso8601 = False

    if infer_datetime_format and format is None:
        format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst)

    if format is not None:
        # There is a special fast-path for iso8601 formatted
        # datetime strings, so in those cases don't use the inferred
        # format because this path makes process slower in this
        # special case
        format_is_iso8601 = _format_is_iso(format)
        if format_is_iso8601:
            require_iso8601 = not infer_datetime_format
            format = None

    tz_parsed = None
    result = None

    if format is not None:
        try:
            # shortcut formatting here
            if format == '%Y%m%d':
                try:
                    # pass orig_arg as float-dtype may have been converted to
                    # datetime64[ns]
                    orig_arg = ensure_object(orig_arg)
                    result = _attempt_YYYYMMDD(orig_arg, errors=errors)
                except (ValueError, TypeError, tslibs.OutOfBoundsDatetime):
                    raise ValueError("cannot convert the input to "
                                     "'%Y%m%d' date format")

            # fallback
            if result is None:
                try:
                    result, timezones = array_strptime(arg,
                                                       format,
                                                       exact=exact,
                                                       errors=errors)
                    if '%Z' in format or '%z' in format:
                        return _return_parsed_timezone_results(
                            result, timezones, box, tz, name)
                except tslibs.OutOfBoundsDatetime:
                    if errors == 'raise':
                        raise
                    elif errors == 'coerce':
                        result = np.empty(arg.shape, dtype='M8[ns]')
                        iresult = result.view('i8')
                        iresult.fill(tslibs.iNaT)
                    else:
                        result = arg
                except ValueError:
                    # if format was inferred, try falling back
                    # to array_to_datetime - terminate here
                    # for specified formats
                    if not infer_datetime_format:
                        if errors == 'raise':
                            raise
                        elif errors == 'coerce':
                            result = np.empty(arg.shape, dtype='M8[ns]')
                            iresult = result.view('i8')
                            iresult.fill(tslibs.iNaT)
                        else:
                            result = arg
        except ValueError as e:
            # Fallback to try to convert datetime objects if timezone-aware
            #  datetime objects are found without passing `utc=True`
            try:
                values, tz = conversion.datetime_to_datetime64(arg)
                return DatetimeIndex._simple_new(values, name=name, tz=tz)
            except (ValueError, TypeError):
                raise e

    if result is None:
        assert format is None or infer_datetime_format
        utc = tz == 'utc'
        result, tz_parsed = objects_to_datetime64ns(
            arg,
            dayfirst=dayfirst,
            yearfirst=yearfirst,
            utc=utc,
            errors=errors,
            require_iso8601=require_iso8601,
            allow_object=True)

    if tz_parsed is not None:
        if box:
            # We can take a shortcut since the datetime64 numpy array
            # is in UTC
            return DatetimeIndex._simple_new(result, name=name, tz=tz_parsed)
        else:
            # Convert the datetime64 numpy array to an numpy array
            # of datetime objects
            result = [
                Timestamp(ts, tz=tz_parsed).to_pydatetime() for ts in result
            ]
            return np.array(result, dtype=object)

    if box:
        utc = tz == 'utc'
        return _box_as_indexlike(result, utc=utc, name=name)
    return result
Пример #28
0
    def test_tzaware_datetime_to_index(self):
        d = [datetime(2012, 8, 19, tzinfo=self.tz('US/Eastern'))]

        index = DatetimeIndex(d)
        self.assertTrue(self.cmptz(index.tz, self.tz('US/Eastern')))
Пример #29
0
 def test_argmin_argmax(self):
     idx = DatetimeIndex(['2000-01-04', '2000-01-01', '2000-01-02'])
     self.assertEqual(idx.argmin(), 1)
     self.assertEqual(idx.argmax(), 0)
Пример #30
0
def create_block(typestr, placement, item_shape=None, num_offset=0):
    """
    Supported typestr:

        * float, f8, f4, f2
        * int, i8, i4, i2, i1
        * uint, u8, u4, u2, u1
        * complex, c16, c8
        * bool
        * object, string, O
        * datetime, dt, M8[ns], M8[ns, tz]
        * timedelta, td, m8[ns]
        * sparse (SparseArray with fill_value=0.0)
        * sparse_na (SparseArray with fill_value=np.nan)
        * category, category2

    """
    placement = BlockPlacement(placement)
    num_items = len(placement)

    if item_shape is None:
        item_shape = (N, )

    shape = (num_items, ) + item_shape

    mat = get_numeric_mat(shape)

    if typestr in ('float', 'f8', 'f4', 'f2', 'int', 'i8', 'i4', 'i2', 'i1',
                   'uint', 'u8', 'u4', 'u2', 'u1'):
        values = mat.astype(typestr) + num_offset
    elif typestr in ('complex', 'c16', 'c8'):
        values = 1.j * (mat.astype(typestr) + num_offset)
    elif typestr in ('object', 'string', 'O'):
        values = np.reshape(['A%d' % i for i in mat.ravel() + num_offset],
                            shape)
    elif typestr in ('b', 'bool', ):
        values = np.ones(shape, dtype=np.bool_)
    elif typestr in ('datetime', 'dt', 'M8[ns]'):
        values = (mat * 1e9).astype('M8[ns]')
    elif typestr.startswith('M8[ns'):
        # datetime with tz
        m = re.search(r'M8\[ns,\s*(\w+\/?\w*)\]', typestr)
        assert m is not None, "incompatible typestr -> {0}".format(typestr)
        tz = m.groups()[0]
        assert num_items == 1, "must have only 1 num items for a tz-aware"
        values = DatetimeIndex(np.arange(N) * 1e9, tz=tz)
    elif typestr in ('timedelta', 'td', 'm8[ns]'):
        values = (mat * 1).astype('m8[ns]')
    elif typestr in ('category', ):
        values = Categorical([1, 1, 2, 2, 3, 3, 3, 3, 4, 4])
    elif typestr in ('category2', ):
        values = Categorical(['a', 'a', 'a', 'a', 'b', 'b', 'c', 'c', 'c', 'd'
                              ])
    elif typestr in ('sparse', 'sparse_na'):
        # FIXME: doesn't support num_rows != 10
        assert shape[-1] == 10
        assert all(s == 1 for s in shape[:-1])
        if typestr.endswith('_na'):
            fill_value = np.nan
        else:
            fill_value = 0.0
        values = SparseArray([fill_value, fill_value, 1, 2, 3, fill_value,
                              4, 5, fill_value, 6], fill_value=fill_value)
        arr = values.sp_values.view()
        arr += (num_offset - 1)
    else:
        raise ValueError('Unsupported typestr: "%s"' % typestr)

    return make_block(values, placement=placement, ndim=len(shape))