Exemplo n.º 1
class TimestampOps(object):
    goal_time = 0.2

    def setup(self):
        self.ts = Timestamp('2017-08-25 08:16:14')
        self.ts_tz = Timestamp('2017-08-25 08:16:14', tz='US/Eastern')

        dt = datetime.datetime(2016, 3, 27, 1)
        self.tzinfo = pytz.timezone('CET').localize(dt, is_dst=False).tzinfo
        self.ts2 = Timestamp(dt)

    def time_replace_tz(self):

    def time_replace_across_dst(self):

    def time_replace_None(self):

    def time_to_pydatetime(self):

    def time_to_pydatetime_tz(self):
Exemplo n.º 2
 def test_timestamp_tz_localize_nonexistent_raise(self, tz):
     # GH 8917
     ts = Timestamp('2015-03-29 02:20:00')
     with pytest.raises(pytz.NonExistentTimeError):
         ts.tz_localize(tz, nonexistent='raise')
     with pytest.raises(ValueError):
         ts.tz_localize(tz, nonexistent='foo')
Exemplo n.º 3
    def test_datetime_name_accessors(self, time_locale):
        # Test Monday -> Sunday and January -> December, in that sequence
        if time_locale is None:
            # If the time_locale is None, day-name and month_name should
            # return the english attributes
            expected_days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday',
                             'Friday', 'Saturday', 'Sunday']
            expected_months = ['January', 'February', 'March', 'April', 'May',
                               'June', 'July', 'August', 'September',
                               'October', 'November', 'December']
            with tm.set_locale(time_locale, locale.LC_TIME):
                expected_days = calendar.day_name[:]
                expected_months = calendar.month_name[1:]

        # GH#11128
        dti = pd.date_range(freq='D', start=datetime(1998, 1, 1),
        english_days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday',
                        'Friday', 'Saturday', 'Sunday']
        for day, name, eng_name in zip(range(4, 11),
            name = name.capitalize()
            assert dti.weekday_name[day] == eng_name
            assert dti.day_name(locale=time_locale)[day] == name
            ts = Timestamp(datetime(2016, 4, day))
            with tm.assert_produces_warning(FutureWarning,
                assert ts.weekday_name == eng_name
            assert ts.day_name(locale=time_locale) == name
        dti = dti.append(DatetimeIndex([pd.NaT]))
        assert np.isnan(dti.day_name(locale=time_locale)[-1])
        ts = Timestamp(pd.NaT)
        assert np.isnan(ts.day_name(locale=time_locale))

        # GH#12805
        dti = pd.date_range(freq='M', start='2012', end='2013')
        result = dti.month_name(locale=time_locale)
        expected = Index([month.capitalize() for month in expected_months])

        # work around different normalization schemes
        # https://github.com/pandas-dev/pandas/issues/22342
        if not compat.PY2:
            result = result.str.normalize("NFD")
            expected = expected.str.normalize("NFD")

        tm.assert_index_equal(result, expected)

        for date, expected in zip(dti, expected_months):
            result = date.month_name(locale=time_locale)
            expected = expected.capitalize()

            if not compat.PY2:
                result = unicodedata.normalize("NFD", result)
                expected = unicodedata.normalize("NFD", result)

            assert result == expected
        dti = dti.append(DatetimeIndex([pd.NaT]))
        assert np.isnan(dti.month_name(locale=time_locale)[-1])
Exemplo n.º 4
class TimestampProperties:
    _tzs = [None, pytz.timezone('Europe/Amsterdam'), pytz.UTC,
    _freqs = [None, 'B']
    params = [_tzs, _freqs]
    param_names = ['tz', 'freq']

    def setup(self, tz, freq):
        self.ts = Timestamp('2017-08-25 08:16:14', tzinfo=tz, freq=freq)

    def time_tz(self, tz, freq):

    def time_dayofweek(self, tz, freq):

    def time_weekday_name(self, tz, freq):

    def time_dayofyear(self, tz, freq):

    def time_week(self, tz, freq):

    def time_quarter(self, tz, freq):

    def time_days_in_month(self, tz, freq):

    def time_freqstr(self, tz, freq):

    def time_is_month_start(self, tz, freq):

    def time_is_month_end(self, tz, freq):

    def time_is_quarter_start(self, tz, freq):

    def time_is_quarter_end(self, tz, freq):

    def time_is_year_start(self, tz, freq):

    def time_is_year_end(self, tz, freq):

    def time_is_leap_year(self, tz, freq):

    def time_microsecond(self, tz, freq):

    def time_month_name(self, tz, freq):
Exemplo n.º 5
    def test_timestamp_tz_localize(self, tz):
        stamp = Timestamp('3/11/2012 04:00')

        result = stamp.tz_localize(tz)
        expected = Timestamp('3/11/2012 04:00', tz=tz)
        assert result.hour == expected.hour
        assert result == expected
Exemplo n.º 6
 def test_replace_preserves_nanos(self, tz_aware_fixture):
     tz = tz_aware_fixture
     # GH#14621, GH#7825
     ts = Timestamp('2016-01-01 09:00:00.000000123', tz=tz)
     result = ts.replace(hour=0)
     expected = Timestamp('2016-01-01 00:00:00.000000123', tz=tz)
     assert result == expected
Exemplo n.º 7
 def test_timestamp_to_datetime_tzoffset(self):
     from dateutil.tz import tzoffset
     tzinfo = tzoffset(None, 7200)
     expected = Timestamp('3/11/2012 04:00', tz=tzinfo)
     result = Timestamp(expected.to_datetime())
     self.assertEquals(expected, result)
Exemplo n.º 8
    def setup(self):
        self.ts = Timestamp('2017-08-25 08:16:14')
        self.ts_tz = Timestamp('2017-08-25 08:16:14', tz='US/Eastern')

        dt = datetime.datetime(2016, 3, 27, 1)
        self.tzinfo = pytz.timezone('CET').localize(dt, is_dst=False).tzinfo
        self.ts2 = Timestamp(dt)
Exemplo n.º 9
 def test_tz_localize_errors_invalid_arg(self):
     # GH 22644
     tz = 'Europe/Warsaw'
     ts = Timestamp('2015-03-29 02:00:00')
     with pytest.raises(ValueError):
         with tm.assert_produces_warning(FutureWarning):
             ts.tz_localize(tz, errors='foo')
Exemplo n.º 10
 def test_constructor_strptime(self):
     # GH25016
     # Test support for Timestamp.strptime
     fmt = '%Y%m%d-%H%M%S-%f%z'
     ts = '20190129-235348-000001+0000'
     with pytest.raises(NotImplementedError):
         Timestamp.strptime(ts, fmt)
Exemplo n.º 11
 def test_replace_aware(self, tz):
     # GH#14621, GH#7825
     # replacing datetime components with and w/o presence of a timezone
     ts = Timestamp('2016-01-01 09:00:00', tz=tz)
     result = ts.replace(hour=0)
     expected = Timestamp('2016-01-01 00:00:00', tz=tz)
     assert result == expected
Exemplo n.º 12
    def test_names(self, data, time_locale):
        # GH 17354
        # Test .weekday_name, .day_name(), .month_name
        with tm.assert_produces_warning(FutureWarning,
            assert data.weekday_name == 'Monday'
        if time_locale is None:
            expected_day = 'Monday'
            expected_month = 'August'
            with tm.set_locale(time_locale, locale.LC_TIME):
                expected_day = calendar.day_name[0].capitalize()
                expected_month = calendar.month_name[8].capitalize()

        result_day = data.day_name(time_locale)
        result_month = data.month_name(time_locale)

        # Work around https://github.com/pandas-dev/pandas/issues/22342
        # different normalizations

        if not PY2:
            expected_day = unicodedata.normalize("NFD", expected_day)
            expected_month = unicodedata.normalize("NFD", expected_month)

            result_day = unicodedata.normalize("NFD", result_day,)
            result_month = unicodedata.normalize("NFD", result_month)

        assert result_day == expected_day
        assert result_month == expected_month

        # Test NaT
        nan_ts = Timestamp(NaT)
        assert np.isnan(nan_ts.day_name(time_locale))
        assert np.isnan(nan_ts.month_name(time_locale))
Exemplo n.º 13
 def test_to_period_tz_warning(self):
     # GH#21333 make sure a warning is issued when timezone
     # info is lost
     ts = Timestamp('2009-04-15 16:17:18', tz='US/Eastern')
     with tm.assert_produces_warning(UserWarning):
         # warning that timezone info will be lost
Exemplo n.º 14
    def test_timestamp_tz_localize_explicit(self):
        stamp = Timestamp("3/11/2012 04:00")

        result = stamp.tz_localize(self.tz("US/Eastern"))
        expected = Timestamp("3/11/2012 04:00", tz=self.tz("US/Eastern"))
        self.assertEqual(result.hour, expected.hour)
        self.assertEqual(result, expected)
Exemplo n.º 15
    def test_cant_compare_tz_naive_w_aware(self, utc_fixture):
        # see GH#1404
        a = Timestamp('3/12/2012')
        b = Timestamp('3/12/2012', tz=utc_fixture)

        with pytest.raises(TypeError):
            a == b
        with pytest.raises(TypeError):
            a != b
        with pytest.raises(TypeError):
            a < b
        with pytest.raises(TypeError):
            a <= b
        with pytest.raises(TypeError):
            a > b
        with pytest.raises(TypeError):
            a >= b

        with pytest.raises(TypeError):
            b == a
        with pytest.raises(TypeError):
            b != a
        with pytest.raises(TypeError):
            b < a
        with pytest.raises(TypeError):
            b <= a
        with pytest.raises(TypeError):
            b > a
        with pytest.raises(TypeError):
            b >= a

        assert not a == b.to_pydatetime()
        assert not a.to_pydatetime() == b
Exemplo n.º 16
    def test_timestamp_tz_localize(self):
        stamp = Timestamp('3/11/2012 04:00')

        result = stamp.tz_localize('US/Eastern')
        expected = Timestamp('3/11/2012 04:00', tz='US/Eastern')
        self.assertEquals(result.hour, expected.hour)
        self.assertEquals(result, expected)
Exemplo n.º 17
class TimestampAcrossDst:
    def setup(self):
        dt = datetime.datetime(2016, 3, 27, 1)
        self.tzinfo = pytz.timezone('CET').localize(dt, is_dst=False).tzinfo
        self.ts2 = Timestamp(dt)

    def time_replace_across_dst(self):
Exemplo n.º 18
    def test_tz_convert_roundtrip(self, stamp, tz):
        ts = Timestamp(stamp, tz='UTC')
        converted = ts.tz_convert(tz)

        reset = converted.tz_convert(None)
        assert reset == Timestamp(stamp)
        assert reset.tzinfo is None
        assert reset == converted.tz_convert('UTC').tz_localize(None)
Exemplo n.º 19
 def test_astimezone(self, tzstr):
     # astimezone is an alias for tz_convert, so keep it with
     # the tz_convert tests
     utcdate = Timestamp('3/11/2012 22:00', tz='UTC')
     expected = utcdate.tz_convert(tzstr)
     result = utcdate.astimezone(tzstr)
     assert expected == result
     assert isinstance(result, Timestamp)
Exemplo n.º 20
 def test_timestamp_tz_localize_nonexistent_shift_invalid(self, offset,
     # GH 8917, 24466
     tz = tz_type + 'Europe/Warsaw'
     ts = Timestamp('2015-03-29 02:20:00')
     msg = "The provided timedelta will relocalize on a nonexistent time"
     with pytest.raises(ValueError, match=msg):
         ts.tz_localize(tz, nonexistent=timedelta(seconds=offset))
Exemplo n.º 21
 def test_replace_dst_fold(self, fold, tz):
     # GH 25017
     d = datetime(2019, 10, 27, 2, 30)
     ts = Timestamp(d, tz=tz)
     result = ts.replace(hour=1, fold=fold)
     expected = Timestamp(datetime(2019, 10, 27, 1, 30)).tz_localize(
         tz, ambiguous=not fold
     assert result == expected
Exemplo n.º 22
 def test_replace_multiple(self, tz):
     # GH#14621, GH#7825
     # replacing datetime components with and w/o presence of a timezone
     # test all
     ts = Timestamp('2016-01-01 09:00:00.000000123', tz=tz)
     result = ts.replace(year=2015, month=2, day=2, hour=0, minute=5,
                         second=5, microsecond=5, nanosecond=5)
     expected = Timestamp('2015-02-02 00:05:05.000005005', tz=tz)
     assert result == expected
Exemplo n.º 23
 def test_tz_localize_errors_coerce(self):
     # GH 22644
     # make sure errors='coerce' gets mapped correctly to nonexistent
     tz = 'Europe/Warsaw'
     ts = Timestamp('2015-03-29 02:00:00')
     with tm.assert_produces_warning(FutureWarning):
         result = ts.tz_localize(tz, errors='coerce')
     expected = ts.tz_localize(tz, nonexistent='NaT')
     assert result is expected
Exemplo n.º 24
    def test_round_tzaware(self):
        dt = Timestamp('20130101 09:10:11', tz='US/Eastern')
        result = dt.round('D')
        expected = Timestamp('20130101', tz='US/Eastern')
        assert result == expected

        dt = Timestamp('20130101 09:10:11', tz='US/Eastern')
        result = dt.round('s')
        assert result == dt
Exemplo n.º 25
    def test_to_pydatetime_nonzero_nano(self):
        ts = Timestamp('2011-01-01 9:00:00.123456789')

        # Warn the user of data loss (nanoseconds).
        with tm.assert_produces_warning(UserWarning,
            expected = datetime(2011, 1, 1, 9, 0, 0, 123456)
            result = ts.to_pydatetime()
            assert result == expected
Exemplo n.º 26
    def __init__(self, site, start, end, savepath='data'):
        self.site = site
        self.start = Timestamp(start)
        self.end = Timestamp(end)
        self.savepath = Path('.' or savepath)

        self._daily_json = None
        self._insta_json = None
        self._daily_data = None
        self._insta_data = None
Exemplo n.º 27
    def test_tz_localize_roundtrip(self, stamp, tz):
        ts = Timestamp(stamp)
        localized = ts.tz_localize(tz)
        assert localized == Timestamp(stamp, tz=tz)

        with pytest.raises(TypeError):

        reset = localized.tz_localize(None)
        assert reset == ts
        assert reset.tzinfo is None
Exemplo n.º 28
    def __init__(
        name : str
            Name of the holiday , defaults to class name
        offset : array of pandas.tseries.offsets or
                class from pandas.tseries.offsets
            computes offset from  date
        observance: function
            computes when holiday is given a pandas Timestamp
            provide a tuple of days e.g  (0,1,2,3,) for Monday Through Thursday

        >>> from pandas.tseries.holiday import Holiday, nearest_workday
        >>> from pandas import DateOffset
        >>> from dateutil.relativedelta import MO
        >>> USMemorialDay = Holiday('MemorialDay', month=5, day=24,
        >>> USLaborDay = Holiday('Labor Day', month=9, day=1,
        >>> July3rd = Holiday('July 3rd', month=7, day=3,)
        >>> NewYears = Holiday('New Years Day', month=1,  day=1,
        >>> July3rd = Holiday('July 3rd', month=7, day=3,
                              days_of_week=(0, 1, 2, 3))
        if offset is not None and observance is not None:
            raise NotImplementedError("Cannot use both offset and observance.")

        self.name = name
        self.year = year
        self.month = month
        self.day = day
        self.offset = offset
        self.start_date = Timestamp(start_date) if start_date is not None else start_date
        self.end_date = Timestamp(end_date) if end_date is not None else end_date
        self.observance = observance
        assert days_of_week is None or type(days_of_week) == tuple
        self.days_of_week = days_of_week
Exemplo n.º 29
 def test_timestamp_tz_localize_nonexistent_shift(self, start_ts, tz,
                                                  end_ts, shift,
     # GH 8917, 24466
     tz = tz_type + tz
     if isinstance(shift, str):
         shift = 'shift_' + shift
     ts = Timestamp(start_ts)
     result = ts.tz_localize(tz, nonexistent=shift)
     expected = Timestamp(end_ts).tz_localize(tz)
     assert result == expected
Exemplo n.º 30
    def test_tz_convert_utc_with_system_utc(self):
        from pandas._libs.tslibs.timezones import maybe_get_tz

        # from system utc to real utc
        ts = Timestamp('2001-01-05 11:56', tz=maybe_get_tz('dateutil/UTC'))
        # check that the time hasn't changed.
        assert ts == ts.tz_convert(dateutil.tz.tzutc())

        # from system utc to real utc
        ts = Timestamp('2001-01-05 11:56', tz=maybe_get_tz('dateutil/UTC'))
        # check that the time hasn't changed.
        assert ts == ts.tz_convert(dateutil.tz.tzutc())
Exemplo n.º 31
class TestDatetimeIndex:
    @pytest.mark.parametrize("dt_cls", [DatetimeIndex, DatetimeArray._from_sequence])
    def test_freq_validation_with_nat(self, dt_cls):
        # GH#11587 make sure we get a useful error message when generate_range
        #  raises
        msg = (
            "Inferred frequency None from passed values does not conform "
            "to passed frequency D"
        with pytest.raises(ValueError, match=msg):
            dt_cls([pd.NaT, pd.Timestamp("2011-01-01")], freq="D")
        with pytest.raises(ValueError, match=msg):
            dt_cls([pd.NaT, pd.Timestamp("2011-01-01").value], freq="D")

    def test_categorical_preserves_tz(self):
        # GH#18664 retain tz when going DTI-->Categorical-->DTI
        # TODO: parametrize over DatetimeIndex/DatetimeArray
        #  once CategoricalIndex(DTA) works

        dti = pd.DatetimeIndex(
            [pd.NaT, "2015-01-01", "1999-04-06 15:14:13", "2015-01-01"], tz="US/Eastern"

        ci = pd.CategoricalIndex(dti)
        carr = pd.Categorical(dti)
        cser = pd.Series(ci)

        for obj in [ci, carr, cser]:
            result = pd.DatetimeIndex(obj)
            tm.assert_index_equal(result, dti)

    def test_dti_with_period_data_raises(self):
        # GH#23675
        data = pd.PeriodIndex(["2016Q1", "2016Q2"], freq="Q")

        with pytest.raises(TypeError, match="PeriodDtype data is invalid"):

        with pytest.raises(TypeError, match="PeriodDtype data is invalid"):

        with pytest.raises(TypeError, match="PeriodDtype data is invalid"):

        with pytest.raises(TypeError, match="PeriodDtype data is invalid"):

    def test_dti_with_timedelta64_data_raises(self):
        # GH#23675 deprecated, enforrced in GH#29794
        data = np.array([0], dtype="m8[ns]")
        msg = r"timedelta64\[ns\] cannot be converted to datetime64"
        with pytest.raises(TypeError, match=msg):

        with pytest.raises(TypeError, match=msg):

        with pytest.raises(TypeError, match=msg):

        with pytest.raises(TypeError, match=msg):

    def test_construction_caching(self):

        df = pd.DataFrame(
                "dt": pd.date_range("20130101", periods=3),
                "dttz": pd.date_range("20130101", periods=3, tz="US/Eastern"),
                "dt_with_null": [
                "dtns": pd.date_range("20130101", periods=3, freq="ns"),
        assert df.dttz.dtype.tz.zone == "US/Eastern"

        [{"tz": "dtype.tz"}, {"dtype": "dtype"}, {"dtype": "dtype", "tz": "dtype.tz"}],
    def test_construction_with_alt(self, kwargs, tz_aware_fixture):
        tz = tz_aware_fixture
        i = pd.date_range("20130101", periods=5, freq="H", tz=tz)
        kwargs = {key: attrgetter(val)(i) for key, val in kwargs.items()}
        result = DatetimeIndex(i, **kwargs)
        tm.assert_index_equal(i, result)

        [{"tz": "dtype.tz"}, {"dtype": "dtype"}, {"dtype": "dtype", "tz": "dtype.tz"}],
    def test_construction_with_alt_tz_localize(self, kwargs, tz_aware_fixture):
        tz = tz_aware_fixture
        i = pd.date_range("20130101", periods=5, freq="H", tz=tz)
        kwargs = {key: attrgetter(val)(i) for key, val in kwargs.items()}

        if "tz" in kwargs:
            result = DatetimeIndex(i.asi8, tz="UTC").tz_convert(kwargs["tz"])

            expected = DatetimeIndex(i, **kwargs)
            tm.assert_index_equal(result, expected)

        # localize into the provided tz
        i2 = DatetimeIndex(i.tz_localize(None).asi8, tz="UTC")
        expected = i.tz_localize(None).tz_localize("UTC")
        tm.assert_index_equal(i2, expected)

        # incompat tz/dtype
        msg = "cannot supply both a tz and a dtype with a tz"
        with pytest.raises(ValueError, match=msg):
            DatetimeIndex(i.tz_localize(None).asi8, dtype=i.dtype, tz="US/Pacific")

    def test_construction_index_with_mixed_timezones(self):
        # gh-11488: no tz results in DatetimeIndex
        result = Index([Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx")
        exp = DatetimeIndex(
            [Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx"
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)
        assert result.tz is None

        # same tz results in DatetimeIndex
        result = Index(
                Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
                Timestamp("2011-01-02 10:00", tz="Asia/Tokyo"),
        exp = DatetimeIndex(
            [Timestamp("2011-01-01 10:00"), Timestamp("2011-01-02 10:00")],
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)
        assert result.tz is not None
        assert result.tz == exp.tz

        # same tz results in DatetimeIndex (DST)
        result = Index(
                Timestamp("2011-01-01 10:00", tz="US/Eastern"),
                Timestamp("2011-08-01 10:00", tz="US/Eastern"),
        exp = DatetimeIndex(
            [Timestamp("2011-01-01 10:00"), Timestamp("2011-08-01 10:00")],
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)
        assert result.tz is not None
        assert result.tz == exp.tz

        # Different tz results in Index(dtype=object)
        result = Index(
                Timestamp("2011-01-01 10:00"),
                Timestamp("2011-01-02 10:00", tz="US/Eastern"),
        exp = Index(
                Timestamp("2011-01-01 10:00"),
                Timestamp("2011-01-02 10:00", tz="US/Eastern"),
        tm.assert_index_equal(result, exp, exact=True)
        assert not isinstance(result, DatetimeIndex)

        result = Index(
                Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
                Timestamp("2011-01-02 10:00", tz="US/Eastern"),
        exp = Index(
                Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
                Timestamp("2011-01-02 10:00", tz="US/Eastern"),
        tm.assert_index_equal(result, exp, exact=True)
        assert not isinstance(result, DatetimeIndex)

        # length = 1
        result = Index([Timestamp("2011-01-01")], name="idx")
        exp = DatetimeIndex([Timestamp("2011-01-01")], name="idx")
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)
        assert result.tz is None

        # length = 1 with tz
        result = Index([Timestamp("2011-01-01 10:00", tz="Asia/Tokyo")], name="idx")
        exp = DatetimeIndex(
            [Timestamp("2011-01-01 10:00")], tz="Asia/Tokyo", name="idx"
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)
        assert result.tz is not None
        assert result.tz == exp.tz

    def test_construction_index_with_mixed_timezones_with_NaT(self):
        # see gh-11488
        result = Index(
            [pd.NaT, Timestamp("2011-01-01"), pd.NaT, Timestamp("2011-01-02")],
        exp = DatetimeIndex(
            [pd.NaT, Timestamp("2011-01-01"), pd.NaT, Timestamp("2011-01-02")],
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)
        assert result.tz is None

        # Same tz results in DatetimeIndex
        result = Index(
                Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
                Timestamp("2011-01-02 10:00", tz="Asia/Tokyo"),
        exp = DatetimeIndex(
                Timestamp("2011-01-01 10:00"),
                Timestamp("2011-01-02 10:00"),
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)
        assert result.tz is not None
        assert result.tz == exp.tz

        # same tz results in DatetimeIndex (DST)
        result = Index(
                Timestamp("2011-01-01 10:00", tz="US/Eastern"),
                Timestamp("2011-08-01 10:00", tz="US/Eastern"),
        exp = DatetimeIndex(
            [Timestamp("2011-01-01 10:00"), pd.NaT, Timestamp("2011-08-01 10:00")],
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)
        assert result.tz is not None
        assert result.tz == exp.tz

        # different tz results in Index(dtype=object)
        result = Index(
                Timestamp("2011-01-01 10:00"),
                Timestamp("2011-01-02 10:00", tz="US/Eastern"),
        exp = Index(
                Timestamp("2011-01-01 10:00"),
                Timestamp("2011-01-02 10:00", tz="US/Eastern"),
        tm.assert_index_equal(result, exp, exact=True)
        assert not isinstance(result, DatetimeIndex)

        result = Index(
                Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
                Timestamp("2011-01-02 10:00", tz="US/Eastern"),
        exp = Index(
                Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
                Timestamp("2011-01-02 10:00", tz="US/Eastern"),
        tm.assert_index_equal(result, exp, exact=True)
        assert not isinstance(result, DatetimeIndex)

        # all NaT
        result = Index([pd.NaT, pd.NaT], name="idx")
        exp = DatetimeIndex([pd.NaT, pd.NaT], name="idx")
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)
        assert result.tz is None

        # all NaT with tz
        result = Index([pd.NaT, pd.NaT], tz="Asia/Tokyo", name="idx")
        exp = DatetimeIndex([pd.NaT, pd.NaT], tz="Asia/Tokyo", name="idx")

        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)
        assert result.tz is not None
        assert result.tz == exp.tz

    def test_construction_dti_with_mixed_timezones(self):
        # GH 11488 (not changed, added explicit tests)

        # no tz results in DatetimeIndex
        result = DatetimeIndex(
            [Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx"
        exp = DatetimeIndex(
            [Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx"
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)

        # same tz results in DatetimeIndex
        result = DatetimeIndex(
                Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
                Timestamp("2011-01-02 10:00", tz="Asia/Tokyo"),
        exp = DatetimeIndex(
            [Timestamp("2011-01-01 10:00"), Timestamp("2011-01-02 10:00")],
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)

        # same tz results in DatetimeIndex (DST)
        result = DatetimeIndex(
                Timestamp("2011-01-01 10:00", tz="US/Eastern"),
                Timestamp("2011-08-01 10:00", tz="US/Eastern"),
        exp = DatetimeIndex(
            [Timestamp("2011-01-01 10:00"), Timestamp("2011-08-01 10:00")],
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)

        # tz mismatch affecting to tz-aware raises TypeError/ValueError

        with pytest.raises(ValueError):
                    Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
                    Timestamp("2011-01-02 10:00", tz="US/Eastern"),

        msg = "cannot be converted to datetime64"
        with pytest.raises(ValueError, match=msg):
                    Timestamp("2011-01-01 10:00"),
                    Timestamp("2011-01-02 10:00", tz="US/Eastern"),

        with pytest.raises(ValueError):
                    Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
                    Timestamp("2011-01-02 10:00", tz="US/Eastern"),

        with pytest.raises(ValueError, match=msg):
            # passing tz should results in DatetimeIndex, then mismatch raises
            # TypeError
                    Timestamp("2011-01-01 10:00"),
                    Timestamp("2011-01-02 10:00", tz="US/Eastern"),

    def test_construction_base_constructor(self):
        arr = [pd.Timestamp("2011-01-01"), pd.NaT, pd.Timestamp("2011-01-03")]
        tm.assert_index_equal(pd.Index(arr), pd.DatetimeIndex(arr))
        tm.assert_index_equal(pd.Index(np.array(arr)), pd.DatetimeIndex(np.array(arr)))

        arr = [np.nan, pd.NaT, pd.Timestamp("2011-01-03")]
        tm.assert_index_equal(pd.Index(arr), pd.DatetimeIndex(arr))
        tm.assert_index_equal(pd.Index(np.array(arr)), pd.DatetimeIndex(np.array(arr)))

    def test_construction_outofbounds(self):
        # GH 13663
        dates = [
            datetime(3000, 1, 1),
            datetime(4000, 1, 1),
            datetime(5000, 1, 1),
            datetime(6000, 1, 1),
        exp = Index(dates, dtype=object)
        # coerces to object
        tm.assert_index_equal(Index(dates), exp)

        with pytest.raises(OutOfBoundsDatetime):
            # can't create DatetimeIndex

    def test_construction_with_ndarray(self):
        # GH 5152
        dates = [datetime(2013, 10, 7), datetime(2013, 10, 8), datetime(2013, 10, 9)]
        data = DatetimeIndex(dates, freq=pd.offsets.BDay()).values
        result = DatetimeIndex(data, freq=pd.offsets.BDay())
        expected = DatetimeIndex(["2013-10-07", "2013-10-08", "2013-10-09"], freq="B")
        tm.assert_index_equal(result, expected)

    def test_integer_values_and_tz_interpreted_as_utc(self):
        # GH-24559
        val = np.datetime64("2000-01-01 00:00:00", "ns")
        values = np.array([val.view("i8")])

        result = DatetimeIndex(values).tz_localize("US/Central")

        expected = pd.DatetimeIndex(["2000-01-01T00:00:00"], tz="US/Central")
        tm.assert_index_equal(result, expected)

        # but UTC is *not* deprecated.
        with tm.assert_produces_warning(None):
            result = DatetimeIndex(values, tz="UTC")
        expected = pd.DatetimeIndex(["2000-01-01T00:00:00"], tz="US/Central")

    def test_constructor_coverage(self):
        rng = date_range("1/1/2000", periods=10.5)
        exp = date_range("1/1/2000", periods=10)
        tm.assert_index_equal(rng, exp)

        msg = "periods must be a number, got foo"
        with pytest.raises(TypeError, match=msg):
            date_range(start="1/1/2000", periods="foo", freq="D")

        with pytest.raises(TypeError):

        # generator expression
        gen = (datetime(2000, 1, 1) + timedelta(i) for i in range(10))
        result = DatetimeIndex(gen)
        expected = DatetimeIndex(
            [datetime(2000, 1, 1) + timedelta(i) for i in range(10)]
        tm.assert_index_equal(result, expected)

        # NumPy string array
        strings = np.array(["2000-01-01", "2000-01-02", "2000-01-03"])
        result = DatetimeIndex(strings)
        expected = DatetimeIndex(strings.astype("O"))
        tm.assert_index_equal(result, expected)

        from_ints = DatetimeIndex(expected.asi8)
        tm.assert_index_equal(from_ints, expected)

        # string with NaT
        strings = np.array(["2000-01-01", "2000-01-02", "NaT"])
        result = DatetimeIndex(strings)
        expected = DatetimeIndex(strings.astype("O"))
        tm.assert_index_equal(result, expected)

        from_ints = DatetimeIndex(expected.asi8)
        tm.assert_index_equal(from_ints, expected)

        # non-conforming
        msg = (
            "Inferred frequency None from passed values does not conform"
            " to passed frequency D"
        with pytest.raises(ValueError, match=msg):
            DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-04"], freq="D")

        msg = (
            "Of the four parameters: start, end, periods, and freq, exactly"
            " three must be specified"
        with pytest.raises(ValueError, match=msg):
            date_range(start="2011-01-01", freq="b")
        with pytest.raises(ValueError, match=msg):
            date_range(end="2011-01-01", freq="B")
        with pytest.raises(ValueError, match=msg):
            date_range(periods=10, freq="D")

    @pytest.mark.parametrize("freq", ["AS", "W-SUN"])
    def test_constructor_datetime64_tzformat(self, freq):
        # see GH#6572: ISO 8601 format results in pytz.FixedOffset
        idx = date_range(
            "2013-01-01T00:00:00-05:00", "2016-01-01T23:59:59-05:00", freq=freq
        expected = date_range(
        tm.assert_index_equal(idx, expected)
        # Unable to use `US/Eastern` because of DST
        expected_i8 = date_range(
            "2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz="America/Lima"
        tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8)

        idx = date_range(
            "2013-01-01T00:00:00+09:00", "2016-01-01T23:59:59+09:00", freq=freq
        expected = date_range(
        tm.assert_index_equal(idx, expected)
        expected_i8 = date_range(
            "2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz="Asia/Tokyo"
        tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8)

        # Non ISO 8601 format results in dateutil.tz.tzoffset
        idx = date_range("2013/1/1 0:00:00-5:00", "2016/1/1 23:59:59-5:00", freq=freq)
        expected = date_range(
        tm.assert_index_equal(idx, expected)
        # Unable to use `US/Eastern` because of DST
        expected_i8 = date_range(
            "2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz="America/Lima"
        tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8)

        idx = date_range("2013/1/1 0:00:00+9:00", "2016/1/1 23:59:59+09:00", freq=freq)
        expected = date_range(
        tm.assert_index_equal(idx, expected)
        expected_i8 = date_range(
            "2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz="Asia/Tokyo"
        tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8)

    def test_constructor_dtype(self):

        # passing a dtype with a tz should localize
        idx = DatetimeIndex(
            ["2013-01-01", "2013-01-02"], dtype="datetime64[ns, US/Eastern]"
        expected = DatetimeIndex(["2013-01-01", "2013-01-02"]).tz_localize("US/Eastern")
        tm.assert_index_equal(idx, expected)

        idx = DatetimeIndex(["2013-01-01", "2013-01-02"], tz="US/Eastern")
        tm.assert_index_equal(idx, expected)

        # if we already have a tz and its not the same, then raise
        idx = DatetimeIndex(
            ["2013-01-01", "2013-01-02"], dtype="datetime64[ns, US/Eastern]"

        msg = (
            "cannot supply both a tz and a timezone-naive dtype"
            r" \(i\.e\. datetime64\[ns\]\)"
        with pytest.raises(ValueError, match=msg):
            DatetimeIndex(idx, dtype="datetime64[ns]")

        # this is effectively trying to convert tz's
        msg = "data is already tz-aware US/Eastern, unable to set specified tz: CET"
        with pytest.raises(TypeError, match=msg):
            DatetimeIndex(idx, dtype="datetime64[ns, CET]")
        msg = "cannot supply both a tz and a dtype with a tz"
        with pytest.raises(ValueError, match=msg):
            DatetimeIndex(idx, tz="CET", dtype="datetime64[ns, US/Eastern]")

        result = DatetimeIndex(idx, dtype="datetime64[ns, US/Eastern]")
        tm.assert_index_equal(idx, result)

    @pytest.mark.parametrize("dtype", [object, np.int32, np.int64])
    def test_constructor_invalid_dtype_raises(self, dtype):
        # GH 23986
        with pytest.raises(ValueError):
            DatetimeIndex([1, 2], dtype=dtype)

    def test_constructor_name(self):
        idx = date_range(start="2000-01-01", periods=1, freq="A", name="TEST")
        assert idx.name == "TEST"

    def test_000constructor_resolution(self):
        # 2252
        t1 = Timestamp((1352934390 * 1000000000) + 1000000 + 1000 + 1)
        idx = DatetimeIndex([t1])

        assert idx.nanosecond[0] == t1.nanosecond

    def test_disallow_setting_tz(self):
        # GH 3746
        dti = DatetimeIndex(["2010"], tz="UTC")
        with pytest.raises(AttributeError):
            dti.tz = pytz.timezone("US/Pacific")

            Timestamp("2000", tz="America/Los_Angeles").tz,
    def test_constructor_start_end_with_tz(self, tz):
        # GH 18595
        start = Timestamp("2013-01-01 06:00:00", tz="America/Los_Angeles")
        end = Timestamp("2013-01-02 06:00:00", tz="America/Los_Angeles")
        result = date_range(freq="D", start=start, end=end, tz=tz)
        expected = DatetimeIndex(
            ["2013-01-01 06:00:00", "2013-01-02 06:00:00"], tz="America/Los_Angeles"
        tm.assert_index_equal(result, expected)
        # Especially assert that the timezone is consistent for pytz
        assert pytz.timezone("America/Los_Angeles") is result.tz

    @pytest.mark.parametrize("tz", ["US/Pacific", "US/Eastern", "Asia/Tokyo"])
    def test_constructor_with_non_normalized_pytz(self, tz):
        # GH 18595
        non_norm_tz = Timestamp("2010", tz=tz).tz
        result = DatetimeIndex(["2010"], tz=non_norm_tz)
        assert pytz.timezone(tz) is result.tz

    def test_constructor_timestamp_near_dst(self):
        # GH 20854
        ts = [
            Timestamp("2016-10-30 03:00:00+0300", tz="Europe/Helsinki"),
            Timestamp("2016-10-30 03:00:00+0200", tz="Europe/Helsinki"),
        result = DatetimeIndex(ts)
        expected = DatetimeIndex([ts[0].to_pydatetime(), ts[1].to_pydatetime()])
        tm.assert_index_equal(result, expected)

    # TODO(GH-24559): Remove the xfail for the tz-aware case.
    @pytest.mark.parametrize("klass", [Index, DatetimeIndex])
    @pytest.mark.parametrize("box", [np.array, partial(np.array, dtype=object), list])
        "tz, dtype",
        [("US/Pacific", "datetime64[ns, US/Pacific]"), (None, "datetime64[ns]")],
    def test_constructor_with_int_tz(self, klass, box, tz, dtype):
        # GH 20997, 20964
        ts = Timestamp("2018-01-01", tz=tz)
        result = klass(box([ts.value]), dtype=dtype)
        expected = klass([ts])
        assert result == expected

    # This is the desired future behavior
    # Note: this xfail is not strict because the test passes with
    #  None or any of the UTC variants for tz_naive_fixture
    @pytest.mark.xfail(reason="Future behavior", strict=False)
    @pytest.mark.filterwarnings("ignore:\\n    Passing:FutureWarning")
    def test_construction_int_rountrip(self, tz_naive_fixture):
        # GH 12619
        # TODO(GH-24559): Remove xfail
        tz = tz_naive_fixture
        result = 1293858000000000000
        expected = DatetimeIndex([result], tz=tz).asi8[0]
        assert result == expected

    def test_construction_from_replaced_timestamps_with_dst(self):
        # GH 18785
        index = pd.date_range(
            pd.Timestamp(2000, 1, 1),
            pd.Timestamp(2005, 1, 1),
        test = pd.DataFrame({"data": range(len(index))}, index=index)
        test = test.resample("Y").mean()
        result = pd.DatetimeIndex([x.replace(month=6, day=1) for x in test.index])
        expected = pd.DatetimeIndex(
                "2000-06-01 00:00:00",
                "2001-06-01 00:00:00",
                "2002-06-01 00:00:00",
                "2003-06-01 00:00:00",
                "2004-06-01 00:00:00",
                "2005-06-01 00:00:00",
        tm.assert_index_equal(result, expected)

    def test_construction_with_tz_and_tz_aware_dti(self):
        # GH 23579
        dti = date_range("2016-01-01", periods=3, tz="US/Central")
        with pytest.raises(TypeError):
            DatetimeIndex(dti, tz="Asia/Tokyo")

    def test_construction_with_nat_and_tzlocal(self):
        tz = dateutil.tz.tzlocal()
        result = DatetimeIndex(["2018", "NaT"], tz=tz)
        expected = DatetimeIndex([Timestamp("2018", tz=tz), pd.NaT])
        tm.assert_index_equal(result, expected)

    def test_constructor_no_precision_raises(self):
        # GH-24753, GH-24739

        msg = "with no precision is not allowed"
        with pytest.raises(ValueError, match=msg):
            pd.DatetimeIndex(["2000"], dtype="datetime64")

        with pytest.raises(ValueError, match=msg):
            pd.Index(["2000"], dtype="datetime64")

    def test_constructor_wrong_precision_raises(self):
        with pytest.raises(ValueError):
            pd.DatetimeIndex(["2000"], dtype="datetime64[us]")

    def test_index_constructor_with_numpy_object_array_and_timestamp_tz_with_nan(self):
        # GH 27011
        result = Index(np.array([Timestamp("2019", tz="UTC"), np.nan], dtype=object))
        expected = DatetimeIndex([Timestamp("2019", tz="UTC"), pd.NaT])
        tm.assert_index_equal(result, expected)
Exemplo n.º 32
    def test_construction_dti_with_mixed_timezones(self):
        # GH 11488 (not changed, added explicit tests)

        # no tz results in DatetimeIndex
        result = DatetimeIndex(
            [Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx"
        exp = DatetimeIndex(
            [Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx"
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)

        # same tz results in DatetimeIndex
        result = DatetimeIndex(
                Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
                Timestamp("2011-01-02 10:00", tz="Asia/Tokyo"),
        exp = DatetimeIndex(
            [Timestamp("2011-01-01 10:00"), Timestamp("2011-01-02 10:00")],
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)

        # same tz results in DatetimeIndex (DST)
        result = DatetimeIndex(
                Timestamp("2011-01-01 10:00", tz="US/Eastern"),
                Timestamp("2011-08-01 10:00", tz="US/Eastern"),
        exp = DatetimeIndex(
            [Timestamp("2011-01-01 10:00"), Timestamp("2011-08-01 10:00")],
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)

        # tz mismatch affecting to tz-aware raises TypeError/ValueError

        with pytest.raises(ValueError):
                    Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
                    Timestamp("2011-01-02 10:00", tz="US/Eastern"),

        msg = "cannot be converted to datetime64"
        with pytest.raises(ValueError, match=msg):
                    Timestamp("2011-01-01 10:00"),
                    Timestamp("2011-01-02 10:00", tz="US/Eastern"),

        with pytest.raises(ValueError):
                    Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
                    Timestamp("2011-01-02 10:00", tz="US/Eastern"),

        with pytest.raises(ValueError, match=msg):
            # passing tz should results in DatetimeIndex, then mismatch raises
            # TypeError
                    Timestamp("2011-01-01 10:00"),
                    Timestamp("2011-01-02 10:00", tz="US/Eastern"),
Exemplo n.º 33
 def test_td_sub_timedeltalike_object_dtype_array(self):
     # GH#21980
     arr = np.array([Timestamp("20130101 9:01"), Timestamp("20121230 9:02")])
     exp = np.array([Timestamp("20121231 9:01"), Timestamp("20121229 9:02")])
     res = arr - Timedelta("1D")
     tm.assert_numpy_array_equal(res, exp)
Exemplo n.º 34
class TestDataFrameSetitemCopyViewSemantics:
    def test_setitem_always_copy(self, float_frame):
        assert "E" not in float_frame.columns
        s = float_frame["A"].copy()
        float_frame["E"] = s

        float_frame["E"][5:10] = np.nan
        assert notna(s[5:10]).all()

    @pytest.mark.parametrize("consolidate", [True, False])
    def test_setitem_partial_column_inplace(self, consolidate,
        # This setting should be in-place, regardless of whether frame is
        #  single-block or multi-block
        # GH#304 this used to be incorrectly not-inplace, in which case
        #  we needed to ensure _item_cache was cleared.

        df = DataFrame({
            "x": [1.1, 2.1, 3.1, 4.1],
            "y": [5.1, 6.1, 7.1, 8.1]
                       index=[0, 1, 2, 3])
        df.insert(2, "z", np.nan)
        if not using_array_manager:
            if consolidate:
                assert len(df._mgr.blocks) == 1
                assert len(df._mgr.blocks) == 2

        zvals = df["z"]._values

        df.loc[2:, "z"] = 42

        expected = Series([np.nan, np.nan, 42, 42], index=df.index, name="z")
        tm.assert_series_equal(df["z"], expected)

        # check setting occurred in-place
        tm.assert_numpy_array_equal(zvals, expected.values)
        assert np.shares_memory(zvals, df["z"]._values)

    def test_setitem_duplicate_columns_not_inplace(self):
        # GH#39510
        cols = ["A", "B"] * 2
        df = DataFrame(0.0, index=[0], columns=cols)
        df_copy = df.copy()
        df_view = df[:]
        df["B"] = (2, 5)

        expected = DataFrame([[0.0, 2, 0.0, 5]], columns=cols)
        tm.assert_frame_equal(df_view, df_copy)
        tm.assert_frame_equal(df, expected)

        "value", [1, np.array([[1], [1]], dtype="int64"), [[1], [1]]])
    def test_setitem_same_dtype_not_inplace(self, value, using_array_manager):
        # GH#39510
        cols = ["A", "B"]
        df = DataFrame(0, index=[0, 1], columns=cols)
        df_copy = df.copy()
        df_view = df[:]
        df[["B"]] = value

        expected = DataFrame([[0, 1], [0, 1]], columns=cols)
        tm.assert_frame_equal(df, expected)
        tm.assert_frame_equal(df_view, df_copy)

        "value", [1.0, np.array([[1.0], [1.0]]), [[1.0], [1.0]]])
    def test_setitem_listlike_key_scalar_value_not_inplace(self, value):
        # GH#39510
        cols = ["A", "B"]
        df = DataFrame(0, index=[0, 1], columns=cols)
        df_copy = df.copy()
        df_view = df[:]
        df[["B"]] = value

        expected = DataFrame([[0, 1.0], [0, 1.0]], columns=cols)
        tm.assert_frame_equal(df_view, df_copy)
        tm.assert_frame_equal(df, expected)

                [True, False],
                    reason="Boolean indexer incorrectly setting inplace",
                    strict=False,  # passing on some builds, no obvious pattern
        "value, set_value",
            (1, 5),
            (1.0, 5.0),
            (Timestamp("2020-12-31"), Timestamp("2021-12-31")),
            ("a", "b"),
    def test_setitem_not_operating_inplace(self, value, set_value, indexer):
        # GH#43406
        df = DataFrame({"a": value}, index=[0, 1])
        expected = df.copy()
        view = df[:]
        df[indexer] = set_value
        tm.assert_frame_equal(view, expected)
Exemplo n.º 35
def time_f(): return lambda x: Timestamp(x)

def fxcm_timestamp_fn(df):
Exemplo n.º 36
 def test_td_add_timedeltalike_object_dtype_array(self, op):
     # GH#21980
     arr = np.array([Timestamp("20130101 9:01"), Timestamp("20121230 9:02")])
     exp = np.array([Timestamp("20130102 9:01"), Timestamp("20121231 9:02")])
     res = op(arr, Timedelta("1D"))
     tm.assert_numpy_array_equal(res, exp)
Exemplo n.º 37
 def test_index_constructor_with_numpy_object_array_and_timestamp_tz_with_nan(self):
     # GH 27011
     result = Index(np.array([Timestamp("2019", tz="UTC"), np.nan], dtype=object))
     expected = DatetimeIndex([Timestamp("2019", tz="UTC"), pd.NaT])
     tm.assert_index_equal(result, expected)
Exemplo n.º 38

def test_bins_not_monotonic():
    msg = "bins must increase monotonically"
    data = [0.2, 1.4, 2.5, 6.2, 9.7, 2.1]

    with pytest.raises(ValueError, match=msg):
        cut(data, [0.1, 1.5, 1, 10])

    "x, bins, expected",
            date_range("2017-12-31", periods=3),
            [Timestamp.min, Timestamp("2018-01-01"), Timestamp.max],
                    (Timestamp.min, Timestamp("2018-01-01")),
                    (Timestamp("2018-01-01"), Timestamp.max),
            [-1, 0, 1],
                [np.iinfo(np.int64).min, 0, np.iinfo(np.int64).max], dtype="int64"
                [(np.iinfo(np.int64).min, 0), (0, np.iinfo(np.int64).max)]
Exemplo n.º 39
    def test_datetimeindex_constructor_misc(self):
        arr = ["1/1/2005", "1/2/2005", "Jn 3, 2005", "2005-01-04"]
        msg = r"(\(')?Unknown string format(:', 'Jn 3, 2005'\))?"
        with pytest.raises(ValueError, match=msg):

        arr = ["1/1/2005", "1/2/2005", "1/3/2005", "2005-01-04"]
        idx1 = DatetimeIndex(arr)

        arr = [datetime(2005, 1, 1), "1/2/2005", "1/3/2005", "2005-01-04"]
        idx2 = DatetimeIndex(arr)

        arr = [Timestamp(datetime(2005, 1, 1)), "1/2/2005", "1/3/2005", "2005-01-04"]
        idx3 = DatetimeIndex(arr)

        arr = np.array(["1/1/2005", "1/2/2005", "1/3/2005", "2005-01-04"], dtype="O")
        idx4 = DatetimeIndex(arr)

        arr = to_datetime(["1/1/2005", "1/2/2005", "1/3/2005", "2005-01-04"])
        idx5 = DatetimeIndex(arr)

        arr = to_datetime(["1/1/2005", "1/2/2005", "Jan 3, 2005", "2005-01-04"])
        idx6 = DatetimeIndex(arr)

        idx7 = DatetimeIndex(["12/05/2007", "25/01/2008"], dayfirst=True)
        idx8 = DatetimeIndex(
            ["2007/05/12", "2008/01/25"], dayfirst=False, yearfirst=True
        tm.assert_index_equal(idx7, idx8)

        for other in [idx2, idx3, idx4, idx5, idx6]:
            assert (idx1.values == other.values).all()

        sdate = datetime(1999, 12, 25)
        edate = datetime(2000, 1, 1)
        idx = date_range(start=sdate, freq="1B", periods=20)
        assert len(idx) == 20
        assert idx[0] == sdate + 0 * offsets.BDay()
        assert idx.freq == "B"

        idx = date_range(end=edate, freq=("D", 5), periods=20)
        assert len(idx) == 20
        assert idx[-1] == edate
        assert idx.freq == "5D"

        idx1 = date_range(start=sdate, end=edate, freq="W-SUN")
        idx2 = date_range(start=sdate, end=edate, freq=offsets.Week(weekday=6))
        assert len(idx1) == len(idx2)
        assert idx1.freq == idx2.freq

        idx1 = date_range(start=sdate, end=edate, freq="QS")
        idx2 = date_range(
            start=sdate, end=edate, freq=offsets.QuarterBegin(startingMonth=1)
        assert len(idx1) == len(idx2)
        assert idx1.freq == idx2.freq

        idx1 = date_range(start=sdate, end=edate, freq="BQ")
        idx2 = date_range(
            start=sdate, end=edate, freq=offsets.BQuarterEnd(startingMonth=12)
        assert len(idx1) == len(idx2)
        assert idx1.freq == idx2.freq
Exemplo n.º 40
class TestCounting(object):
    def test_cumcount(self):
        df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']], columns=['A'])
        g = df.groupby('A')
        sg = g.A

        expected = Series([0, 1, 2, 0, 3])

        assert_series_equal(expected, g.cumcount())
        assert_series_equal(expected, sg.cumcount())

    def test_cumcount_empty(self):
        ge = DataFrame().groupby(level=0)
        se = Series().groupby(level=0)

        # edge case, as this is usually considered float
        e = Series(dtype='int64')

        assert_series_equal(e, ge.cumcount())
        assert_series_equal(e, se.cumcount())

    def test_cumcount_dupe_index(self):
        df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']],
                       index=[0] * 5)
        g = df.groupby('A')
        sg = g.A

        expected = Series([0, 1, 2, 0, 3], index=[0] * 5)

        assert_series_equal(expected, g.cumcount())
        assert_series_equal(expected, sg.cumcount())

    def test_cumcount_mi(self):
        mi = MultiIndex.from_tuples([[0, 1], [1, 2], [2, 2], [2, 2], [1, 0]])
        df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']],
        g = df.groupby('A')
        sg = g.A

        expected = Series([0, 1, 2, 0, 3], index=mi)

        assert_series_equal(expected, g.cumcount())
        assert_series_equal(expected, sg.cumcount())

    def test_cumcount_groupby_not_col(self):
        df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']],
                       index=[0] * 5)
        g = df.groupby([0, 0, 0, 1, 0])
        sg = g.A

        expected = Series([0, 1, 2, 0, 3], index=[0] * 5)

        assert_series_equal(expected, g.cumcount())
        assert_series_equal(expected, sg.cumcount())

    def test_ngroup(self):
        df = DataFrame({'A': list('aaaba')})
        g = df.groupby('A')
        sg = g.A

        expected = Series([0, 0, 0, 1, 0])

        assert_series_equal(expected, g.ngroup())
        assert_series_equal(expected, sg.ngroup())

    def test_ngroup_distinct(self):
        df = DataFrame({'A': list('abcde')})
        g = df.groupby('A')
        sg = g.A

        expected = Series(range(5), dtype='int64')

        assert_series_equal(expected, g.ngroup())
        assert_series_equal(expected, sg.ngroup())

    def test_ngroup_one_group(self):
        df = DataFrame({'A': [0] * 5})
        g = df.groupby('A')
        sg = g.A

        expected = Series([0] * 5)

        assert_series_equal(expected, g.ngroup())
        assert_series_equal(expected, sg.ngroup())

    def test_ngroup_empty(self):
        ge = DataFrame().groupby(level=0)
        se = Series().groupby(level=0)

        # edge case, as this is usually considered float
        e = Series(dtype='int64')

        assert_series_equal(e, ge.ngroup())
        assert_series_equal(e, se.ngroup())

    def test_ngroup_series_matches_frame(self):
        df = DataFrame({'A': list('aaaba')})
        s = Series(list('aaaba'))

        assert_series_equal(df.groupby(s).ngroup(), s.groupby(s).ngroup())

    def test_ngroup_dupe_index(self):
        df = DataFrame({'A': list('aaaba')}, index=[0] * 5)
        g = df.groupby('A')
        sg = g.A

        expected = Series([0, 0, 0, 1, 0], index=[0] * 5)

        assert_series_equal(expected, g.ngroup())
        assert_series_equal(expected, sg.ngroup())

    def test_ngroup_mi(self):
        mi = MultiIndex.from_tuples([[0, 1], [1, 2], [2, 2], [2, 2], [1, 0]])
        df = DataFrame({'A': list('aaaba')}, index=mi)
        g = df.groupby('A')
        sg = g.A
        expected = Series([0, 0, 0, 1, 0], index=mi)

        assert_series_equal(expected, g.ngroup())
        assert_series_equal(expected, sg.ngroup())

    def test_ngroup_groupby_not_col(self):
        df = DataFrame({'A': list('aaaba')}, index=[0] * 5)
        g = df.groupby([0, 0, 0, 1, 0])
        sg = g.A

        expected = Series([0, 0, 0, 1, 0], index=[0] * 5)

        assert_series_equal(expected, g.ngroup())
        assert_series_equal(expected, sg.ngroup())

    def test_ngroup_descending(self):
        df = DataFrame(['a', 'a', 'b', 'a', 'b'], columns=['A'])
        g = df.groupby(['A'])

        ascending = Series([0, 0, 1, 0, 1])
        descending = Series([1, 1, 0, 1, 0])

        assert_series_equal(descending, (g.ngroups - 1) - ascending)
        assert_series_equal(ascending, g.ngroup(ascending=True))
        assert_series_equal(descending, g.ngroup(ascending=False))

    def test_ngroup_matches_cumcount(self):
        # verify one manually-worked out case works
        df = DataFrame(
            [['a', 'x'], ['a', 'y'], ['b', 'x'], ['a', 'x'], ['b', 'y']],
            columns=['A', 'X'])
        g = df.groupby(['A', 'X'])
        g_ngroup = g.ngroup()
        g_cumcount = g.cumcount()
        expected_ngroup = Series([0, 1, 2, 0, 3])
        expected_cumcount = Series([0, 0, 0, 1, 0])

        assert_series_equal(g_ngroup, expected_ngroup)
        assert_series_equal(g_cumcount, expected_cumcount)

    def test_ngroup_cumcount_pair(self):
        # brute force comparison for all small series
        for p in cart_product(range(3), repeat=4):
            df = DataFrame({'a': p})
            g = df.groupby(['a'])

            order = sorted(set(p))
            ngroupd = [order.index(val) for val in p]
            cumcounted = [p[:i].count(val) for i, val in enumerate(p)]

            assert_series_equal(g.ngroup(), Series(ngroupd))
            assert_series_equal(g.cumcount(), Series(cumcounted))

    def test_ngroup_respects_groupby_order(self):
        df = DataFrame({'a': np.random.choice(list('abcdef'), 100)})
        for sort_flag in (False, True):
            g = df.groupby(['a'], sort=sort_flag)
            df['group_id'] = -1
            df['group_index'] = -1

            for i, (_, group) in enumerate(g):
                df.loc[group.index, 'group_id'] = i
                for j, ind in enumerate(group.index):
                    df.loc[ind, 'group_index'] = j

            assert_series_equal(Series(df['group_id'].values), g.ngroup())
            assert_series_equal(Series(df['group_index'].values), g.cumcount())

        [[Timestamp('2016-05-%02d 20:09:25+00:00' % i) for i in range(1, 4)],
         [Timestamp('2016-05-%02d 20:09:25' % i)
          for i in range(1, 4)], [Timedelta(x, unit="h") for x in range(1, 4)],
         [Period(freq="2W", year=2017, month=x) for x in range(1, 4)]])
    def test_count_with_datetimelike(self, datetimelike):
        # test for #13393, where DataframeGroupBy.count() fails
        # when counting a datetimelike column.

        df = DataFrame({'x': ['a', 'a', 'b'], 'y': datetimelike})
        res = df.groupby('x').count()
        expected = DataFrame({'y': [2, 1]}, index=['a', 'b'])
        expected.index.name = "x"
        assert_frame_equal(expected, res)

    def test_count_with_only_nans_in_first_group(self):
        # GH21956
        df = DataFrame({'A': [np.nan, np.nan], 'B': ['a', 'b'], 'C': [1, 2]})
        result = df.groupby(['A', 'B']).C.count()
        mi = MultiIndex(levels=[[], ['a', 'b']],
                        codes=[[], []],
                        names=['A', 'B'])
        expected = Series([], index=mi, dtype=np.int64, name='C')
        assert_series_equal(result, expected, check_index_type=False)
def create_data():
    """ create the pickle/msgpack data """

    data = {
        u'A': [0., 1., 2., 3., np.nan],
        u'B': [0, 1, 0, 1, 0],
        u'C': [u'foo1', u'foo2', u'foo3', u'foo4', u'foo5'],
        u'D': date_range('1/1/2009', periods=5),
        u'E': [0., 1, Timestamp('20100101'), u'foo', 2.]

    scalars = dict(timestamp=Timestamp('20130101'), period=Period('2012', 'M'))

    index = dict(int=Index(np.arange(10)),
                 date=date_range('20130101', periods=10),
                 period=period_range('2013-01-01', freq='M', periods=10),
                 float=Index(np.arange(10, dtype=np.float64)),
                 uint=Index(np.arange(10, dtype=np.uint64)),
                 timedelta=timedelta_range('00:00:00', freq='30T', periods=10))

    if _loose_version >= LooseVersion('0.18'):
        from pandas import RangeIndex
        index['range'] = RangeIndex(10)

    if _loose_version >= LooseVersion('0.21'):
        from pandas import interval_range
        index['interval'] = interval_range(0, periods=10)

    mi = dict(reg2=MultiIndex.from_tuples(tuple(
        zip(*[[u'bar', u'bar', u'baz', u'baz', u'foo', u'foo', u'qux', u'qux'],
              [u'one', u'two', u'one', u'two', u'one', u'two', u'one', u'two']
                                          names=[u'first', u'second']))

    series = dict(
                  index=date_range('20130101', periods=10)),
                      zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])),
                                               names=[u'one', u'two'])),
                   index=[u'A', u'B', u'C', u'D', u'A']),
        cat=Series(Categorical([u'foo', u'bar', u'baz'])),
        dt=Series(date_range('20130101', periods=5)),
        dt_tz=Series(date_range('20130101', periods=5, tz='US/Eastern')),
        period=Series([Period('2000Q1')] * 5))

    mixed_dup_df = DataFrame(data)
    mixed_dup_df.columns = list(u"ABCDA")
    frame = dict(
            u'A': series[u'float'],
            u'B': series[u'float'] + 1
            u'A': series[u'int'],
            u'B': series[u'int'] + 1
        mixed=DataFrame({k: data[k]
                         for k in [u'A', u'B', u'C', u'D']}),
                u'A': np.arange(5).astype(np.float64),
                u'B': np.arange(5).astype(np.int64)
                zip(*[[u'bar', u'bar', u'baz', u'baz', u'baz'],
                      [u'one', u'two', u'one', u'two', u'three']])),
                                         names=[u'first', u'second'])),
        dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64),
                      columns=[u'A', u'B', u'A']),
        cat_onecol=DataFrame({u'A': Categorical([u'foo', u'bar'])}),
            u'A': Categorical([u'foo', u'bar', u'baz']),
            u'B': np.arange(3).astype(np.int64)
                u'A': Timestamp('20130102', tz='US/Eastern'),
                u'B': Timestamp('20130603', tz='CET')
                u'A': Timestamp('20130102', tz='US/Eastern'),
                u'B': Timestamp('20130603', tz='CET'),
                u'C': Timestamp('20130603', tz='UTC')

    with catch_warnings(record=True):
        mixed_dup_panel = Panel({
            u'ItemA': frame[u'float'],
            u'ItemB': frame[u'int']
        mixed_dup_panel.items = [u'ItemA', u'ItemA']
        panel = dict(float=Panel({
            u'ItemA': frame[u'float'],
            u'ItemB': frame[u'float'] + 1
                     dup=Panel(np.arange(30).reshape(3, 5,
                               items=[u'A', u'B', u'A']),

    cat = dict(int8=Categorical(list('abcdefg')),

    timestamp = dict(normal=Timestamp('2011-01-01'),
                     tz=Timestamp('2011-01-01', tz='US/Eastern'))

    if _loose_version < LooseVersion('0.19.2'):
        timestamp['freq'] = Timestamp('2011-01-01', offset='D')
        timestamp['both'] = Timestamp('2011-01-01',
        timestamp['freq'] = Timestamp('2011-01-01', freq='D')
        timestamp['both'] = Timestamp('2011-01-01', tz='Asia/Tokyo', freq='M')

    off = {
        'DateOffset': DateOffset(years=1),
        'DateOffset_h_ns': DateOffset(hour=6, nanoseconds=5824),
        'BusinessDay': BusinessDay(offset=timedelta(seconds=9)),
        'BusinessHour': BusinessHour(normalize=True, n=6, end='15:14'),
        'CustomBusinessDay': CustomBusinessDay(weekmask='Mon Fri'),
        'SemiMonthBegin': SemiMonthBegin(day_of_month=9),
        'SemiMonthEnd': SemiMonthEnd(day_of_month=24),
        'MonthBegin': MonthBegin(1),
        'MonthEnd': MonthEnd(1),
        'QuarterBegin': QuarterBegin(1),
        'QuarterEnd': QuarterEnd(1),
        'Day': Day(1),
        'YearBegin': YearBegin(1),
        'YearEnd': YearEnd(1),
        'Week': Week(1),
        'Week_Tues': Week(2, normalize=False, weekday=1),
        'WeekOfMonth': WeekOfMonth(week=3, weekday=4),
        'LastWeekOfMonth': LastWeekOfMonth(n=1, weekday=3),
        'FY5253': FY5253(n=2, weekday=6, startingMonth=7, variation="last"),
        'Easter': Easter(),
        'Hour': Hour(1),
        'Minute': Minute(1)

    return dict(series=series,
Exemplo n.º 42
    def test_divmod_invalid(self):
        # GH#19365
        td = Timedelta(days=2, hours=6)

        with pytest.raises(TypeError):
            divmod(td, Timestamp("2018-01-22"))
Exemplo n.º 43
    def test_construction_index_with_mixed_timezones_with_NaT(self):
        # see gh-11488
        result = Index(
            [pd.NaT, Timestamp("2011-01-01"), pd.NaT, Timestamp("2011-01-02")],
        exp = DatetimeIndex(
            [pd.NaT, Timestamp("2011-01-01"), pd.NaT, Timestamp("2011-01-02")],
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)
        assert result.tz is None

        # Same tz results in DatetimeIndex
        result = Index(
                Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
                Timestamp("2011-01-02 10:00", tz="Asia/Tokyo"),
        exp = DatetimeIndex(
                Timestamp("2011-01-01 10:00"),
                Timestamp("2011-01-02 10:00"),
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)
        assert result.tz is not None
        assert result.tz == exp.tz

        # same tz results in DatetimeIndex (DST)
        result = Index(
                Timestamp("2011-01-01 10:00", tz="US/Eastern"),
                Timestamp("2011-08-01 10:00", tz="US/Eastern"),
        exp = DatetimeIndex(
            [Timestamp("2011-01-01 10:00"), pd.NaT, Timestamp("2011-08-01 10:00")],
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)
        assert result.tz is not None
        assert result.tz == exp.tz

        # different tz results in Index(dtype=object)
        result = Index(
                Timestamp("2011-01-01 10:00"),
                Timestamp("2011-01-02 10:00", tz="US/Eastern"),
        exp = Index(
                Timestamp("2011-01-01 10:00"),
                Timestamp("2011-01-02 10:00", tz="US/Eastern"),
        tm.assert_index_equal(result, exp, exact=True)
        assert not isinstance(result, DatetimeIndex)

        result = Index(
                Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
                Timestamp("2011-01-02 10:00", tz="US/Eastern"),
        exp = Index(
                Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
                Timestamp("2011-01-02 10:00", tz="US/Eastern"),
        tm.assert_index_equal(result, exp, exact=True)
        assert not isinstance(result, DatetimeIndex)

        # all NaT
        result = Index([pd.NaT, pd.NaT], name="idx")
        exp = DatetimeIndex([pd.NaT, pd.NaT], name="idx")
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)
        assert result.tz is None

        # all NaT with tz
        result = Index([pd.NaT, pd.NaT], tz="Asia/Tokyo", name="idx")
        exp = DatetimeIndex([pd.NaT, pd.NaT], tz="Asia/Tokyo", name="idx")

        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)
        assert result.tz is not None
        assert result.tz == exp.tz
Exemplo n.º 44
    def test_convert(self):
        # GH#10265
        dt = datetime(2001, 1, 1, 0, 0)
        td = dt - datetime(2000, 1, 1, 0, 0)

        # Test coercion with mixed types
        ser = Series(["a", "3.1415", dt, td])

        results = ser._convert(numeric=True)
        expected = Series([np.nan, 3.1415, np.nan, np.nan])
        tm.assert_series_equal(results, expected)

        # Test standard conversion returns original
        results = ser._convert(datetime=True)
        tm.assert_series_equal(results, ser)
        results = ser._convert(numeric=True)
        expected = Series([np.nan, 3.1415, np.nan, np.nan])
        tm.assert_series_equal(results, expected)
        results = ser._convert(timedelta=True)
        tm.assert_series_equal(results, ser)

        # test pass-through and non-conversion when other types selected
        ser = Series(["1.0", "2.0", "3.0"])
        results = ser._convert(datetime=True, numeric=True, timedelta=True)
        expected = Series([1.0, 2.0, 3.0])
        tm.assert_series_equal(results, expected)
        results = ser._convert(True, False, True)
        tm.assert_series_equal(results, ser)

        ser = Series(
            [datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 1, 0, 0)], dtype="O"
        results = ser._convert(datetime=True, numeric=True, timedelta=True)
        expected = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 1, 0, 0)])
        tm.assert_series_equal(results, expected)
        results = ser._convert(datetime=False, numeric=True, timedelta=True)
        tm.assert_series_equal(results, ser)

        td = datetime(2001, 1, 1, 0, 0) - datetime(2000, 1, 1, 0, 0)
        ser = Series([td, td], dtype="O")
        results = ser._convert(datetime=True, numeric=True, timedelta=True)
        expected = Series([td, td])
        tm.assert_series_equal(results, expected)
        results = ser._convert(True, True, False)
        tm.assert_series_equal(results, ser)

        ser = Series([1.0, 2, 3], index=["a", "b", "c"])
        result = ser._convert(numeric=True)
        tm.assert_series_equal(result, ser)

        # force numeric conversion
        res = ser.copy().astype("O")
        res["a"] = "1"
        result = res._convert(numeric=True)
        tm.assert_series_equal(result, ser)

        res = ser.copy().astype("O")
        res["a"] = "1."
        result = res._convert(numeric=True)
        tm.assert_series_equal(result, ser)

        res = ser.copy().astype("O")
        res["a"] = "garbled"
        result = res._convert(numeric=True)
        expected = ser.copy()
        expected["a"] = np.nan
        tm.assert_series_equal(result, expected)

        # GH 4119, not converting a mixed type (e.g.floats and object)
        ser = Series([1, "na", 3, 4])
        result = ser._convert(datetime=True, numeric=True)
        expected = Series([1, np.nan, 3, 4])
        tm.assert_series_equal(result, expected)

        ser = Series([1, "", 3, 4])
        result = ser._convert(datetime=True, numeric=True)
        tm.assert_series_equal(result, expected)

        # dates
        ser = Series(
                datetime(2001, 1, 1, 0, 0),
                datetime(2001, 1, 2, 0, 0),
                datetime(2001, 1, 3, 0, 0),

        result = ser._convert(datetime=True)
        expected = Series(
            [Timestamp("20010101"), Timestamp("20010102"), Timestamp("20010103")],
        tm.assert_series_equal(result, expected)

        result = ser._convert(datetime=True)
        tm.assert_series_equal(result, expected)

        # preserver if non-object
        ser = Series([1], dtype="float32")
        result = ser._convert(datetime=True)
        tm.assert_series_equal(result, ser)
Exemplo n.º 45
def decode(obj):
    Decoder for deserializing numpy data types.

    typ = obj.get('typ')
    if typ is None:
        return obj
    elif typ == 'timestamp':
        return Timestamp(obj['value'], tz=obj['tz'], offset=obj['offset'])
    elif typ == 'period':
        return Period(ordinal=obj['ordinal'], freq=obj['freq'])
    elif typ == 'index':
        dtype = dtype_for(obj['dtype'])
        data = unconvert(obj['data'], np.typeDict[obj['dtype']],
        return globals()[obj['klass']](data, dtype=dtype, name=obj['name'])
    elif typ == 'multi_index':
        data = unconvert(obj['data'], np.typeDict[obj['dtype']],
        data = [tuple(x) for x in data]
        return globals()[obj['klass']].from_tuples(data, names=obj['names'])
    elif typ == 'period_index':
        data = unconvert(obj['data'], np.int64, obj.get('compress'))
        d = dict(name=obj['name'], freq=obj['freq'])
        return globals()[obj['klass']](data, **d)
    elif typ == 'datetime_index':
        data = unconvert(obj['data'], np.int64, obj.get('compress'))
        d = dict(name=obj['name'], freq=obj['freq'], verify_integrity=False)
        result = globals()[obj['klass']](data, **d)
        tz = obj['tz']

        # reverse tz conversion
        if tz is not None:
            result = result.tz_localize('UTC').tz_convert(tz)
        return result

    elif typ == 'series':
        dtype = dtype_for(obj['dtype'])
        index = obj['index']
        return globals()[obj['klass']](unconvert(obj['data'], dtype,
    elif typ == 'block_manager':
        axes = obj['axes']

        def create_block(b):
            values = unconvert(b['values'], dtype_for(b['dtype']),
            return make_block(values=values,
                              klass=getattr(internals, b['klass']),

        blocks = [create_block(b) for b in obj['blocks']]
        return globals()[obj['klass']](BlockManager(blocks, axes))
    elif typ == 'datetime':
        return parse(obj['data'])
    elif typ == 'datetime64':
        return np.datetime64(parse(obj['data']))
    elif typ == 'date':
        return parse(obj['data']).date()
    elif typ == 'timedelta':
        return timedelta(*obj['data'])
    elif typ == 'timedelta64':
        return np.timedelta64(int(obj['data']))
    #elif typ == 'sparse_series':
    #    dtype = dtype_for(obj['dtype'])
    #    return globals()[obj['klass']](
    #        unconvert(obj['sp_values'], dtype, obj['compress']),
    #        sparse_index=obj['sp_index'], index=obj['index'],
    #        fill_value=obj['fill_value'], kind=obj['kind'], name=obj['name'])
    #elif typ == 'sparse_dataframe':
    #    return globals()[obj['klass']](
    #        obj['data'], columns=obj['columns'],
    #        default_fill_value=obj['default_fill_value'],
    #        default_kind=obj['default_kind']
    #    )
    #elif typ == 'sparse_panel':
    #    return globals()[obj['klass']](
    #        obj['data'], items=obj['items'],
    #        default_fill_value=obj['default_fill_value'],
    #        default_kind=obj['default_kind'])
    elif typ == 'block_index':
        return globals()[obj['klass']](obj['length'], obj['blocs'],
    elif typ == 'int_index':
        return globals()[obj['klass']](obj['length'], obj['indices'])
    elif typ == 'ndarray':
        return unconvert(obj['data'], np.typeDict[obj['dtype']],
    elif typ == 'np_scalar':
        if obj.get('sub_typ') == 'np_complex':
            return c2f(obj['real'], obj['imag'], obj['dtype'])
            dtype = dtype_for(obj['dtype'])
                return dtype(obj['data'])
                return dtype.type(obj['data'])
    elif typ == 'np_complex':
        return complex(obj['real'] + '+' + obj['imag'] + 'j')
    elif isinstance(obj, (dict, list, set)):
        return obj
        return obj
Exemplo n.º 46
class TestArithmetic(object):
    @pytest.mark.parametrize("op", [operator.add, ops.radd])
    @pytest.mark.parametrize("other", ["category", "Int64"])
    def test_add_extension_scalar(self, other, box, op):
        # GH#22378
        # Check that scalars satisfying is_extension_array_dtype(obj)
        # do not incorrectly try to dispatch to an ExtensionArray operation

        arr = pd.Series(['a', 'b', 'c'])
        expected = pd.Series([op(x, other) for x in arr])

        arr = tm.box_expected(arr, box)
        expected = tm.box_expected(expected, box)

        result = op(arr, other)
        tm.assert_equal(result, expected)

    @pytest.mark.parametrize('box', [
                     marks=pytest.mark.xfail(reason="Does not mask nulls",
                                             raises=TypeError)), pd.Series,
                             ids=lambda x: x.__name__)
    def test_objarr_add_str(self, box):
        ser = pd.Series(['x', np.nan, 'x'])
        expected = pd.Series(['xa', np.nan, 'xa'])

        ser = tm.box_expected(ser, box)
        expected = tm.box_expected(expected, box)

        result = ser + 'a'
        tm.assert_equal(result, expected)

    @pytest.mark.parametrize('box', [
                     marks=pytest.mark.xfail(reason="Does not mask nulls",
                                             raises=TypeError)), pd.Series,
                             ids=lambda x: x.__name__)
    def test_objarr_radd_str(self, box):
        ser = pd.Series(['x', np.nan, 'x'])
        expected = pd.Series(['ax', np.nan, 'ax'])

        ser = tm.box_expected(ser, box)
        expected = tm.box_expected(expected, box)

        result = 'a' + ser
        tm.assert_equal(result, expected)

        'data', [[1, 2, 3], [1.1, 2.2, 3.3],
                  Timestamp('2011-01-02'), pd.NaT], ['x', 'y', 1]])
    @pytest.mark.parametrize('dtype', [None, object])
    def test_objarr_radd_str_invalid(self, dtype, data, box):
        ser = Series(data, dtype=dtype)

        ser = tm.box_expected(ser, box)
        with pytest.raises(TypeError):
            'foo_' + ser

                             [operator.add, ops.radd, operator.sub, ops.rsub])
    def test_objarr_add_invalid(self, op, box):
        # invalid ops
        if box is pd.DataFrame and op is ops.radd:
            pytest.xfail(reason="DataFrame op incorrectly casts the np.array"
                         "case to M8[ns]")

        obj_ser = tm.makeObjectSeries()
        obj_ser.name = 'objects'

        obj_ser = tm.box_expected(obj_ser, box)
        with pytest.raises(Exception):
            op(obj_ser, 1)
        with pytest.raises(Exception):
            op(obj_ser, np.array(1, dtype=np.int64))

    # TODO: Moved from tests.series.test_operators; needs cleanup
    def test_operators_na_handling(self):
        ser = Series(['foo', 'bar', 'baz', np.nan])
        result = 'prefix_' + ser
        expected = pd.Series(
            ['prefix_foo', 'prefix_bar', 'prefix_baz', np.nan])
        tm.assert_series_equal(result, expected)

        result = ser + '_suffix'
        expected = pd.Series(
            ['foo_suffix', 'bar_suffix', 'baz_suffix', np.nan])
        tm.assert_series_equal(result, expected)

    # TODO: parametrize over box
    @pytest.mark.parametrize('dtype', [None, object])
    def test_series_with_dtype_radd_timedelta(self, dtype):
        # note this test is _not_ aimed at timedelta64-dtyped Series
        ser = pd.Series([
            pd.Timedelta('1 days'),
            pd.Timedelta('2 days'),
            pd.Timedelta('3 days')
        expected = pd.Series([
            pd.Timedelta('4 days'),
            pd.Timedelta('5 days'),
            pd.Timedelta('6 days')

        result = pd.Timedelta('3 days') + ser
        tm.assert_series_equal(result, expected)

        result = ser + pd.Timedelta('3 days')
        tm.assert_series_equal(result, expected)
Exemplo n.º 47
 def test_construction_with_nat_and_tzlocal(self):
     tz = dateutil.tz.tzlocal()
     result = DatetimeIndex(["2018", "NaT"], tz=tz)
     expected = DatetimeIndex([Timestamp("2018", tz=tz), pd.NaT])
     tm.assert_index_equal(result, expected)
Exemplo n.º 48
 def test_td_rsub_mixed_most_timedeltalike_object_dtype_array(self):
     # GH#21980
     now = Timestamp.now()
     arr = np.array([now, Timedelta("1D"), np.timedelta64(2, "h")])
     with pytest.raises(TypeError):
         Timedelta("1D") - arr
Exemplo n.º 49
 def test_maybe_cast_slice_duplicate_monotonic(self):
     # https://github.com/pandas-dev/pandas/issues/16515
     idx = DatetimeIndex(["2017", "2017"])
     result = idx._maybe_cast_slice_bound("2017-01-01", "left", "loc")
     expected = Timestamp("2017-01-01")
     assert result == expected
Exemplo n.º 50
    def test_interleave(self):

        # interleave with object
        result = self.tzframe.assign(D='foo').values
        expected = np.array(
                Timestamp('2013-01-01 00:00:00'),
                Timestamp('2013-01-02 00:00:00'),
                Timestamp('2013-01-03 00:00:00')
                 Timestamp('2013-01-01 00:00:00-0500', tz='US/Eastern'),
                 Timestamp('2013-01-03 00:00:00-0500', tz='US/Eastern')
                 Timestamp('2013-01-01 00:00:00+0100', tz='CET'), pd.NaT,
                 Timestamp('2013-01-03 00:00:00+0100', tz='CET')
             ], ['foo', 'foo', 'foo']],
        tm.assert_numpy_array_equal(result, expected)

        # interleave with only datetime64[ns]
        result = self.tzframe.values
        expected = np.array(
                Timestamp('2013-01-01 00:00:00'),
                Timestamp('2013-01-02 00:00:00'),
                Timestamp('2013-01-03 00:00:00')
                 Timestamp('2013-01-01 00:00:00-0500', tz='US/Eastern'),
                 Timestamp('2013-01-03 00:00:00-0500', tz='US/Eastern')
                 Timestamp('2013-01-01 00:00:00+0100', tz='CET'), pd.NaT,
                 Timestamp('2013-01-03 00:00:00+0100', tz='CET')
        tm.assert_numpy_array_equal(result, expected)
Exemplo n.º 51
def test_generate_training_set(mocker):

    # Execute Pipeline
    test_pipeline_result = execute_pipeline(
    assert test_pipeline_result.success

    # Check solids
            "interval_date": date(2019, 7, 31),
            "peak_traffic_load": 1,
            "time": Timestamp("2019-07-31 00:00:00"),
            "interval_date": date(2019, 8, 31),
            "peak_traffic_load": 1,
            "time": Timestamp("2019-08-31 00:00:00"),
    traffic_dataset = test_pipeline_result.output_for_solid(
    assert all(record in EXPECTED_TRAFFIC_RECORDS
               for record in traffic_dataset)

            "time": Timestamp("2019-08-31 00:00:00"),
            "summary": "Clear throughout the day.",
            "icon": "clear-day",
            "sunriseTime": 1546269960,
            "sunsetTime": 1546304520,
            "precipIntensity": 0.0007,
            "precipIntensityMax": 0.0019,
            "precipProbability": 0.05,
            "precipType": "rain",
            "temperatureHigh": 56.71,
            "temperatureHighTime": 1546294020,
            "temperatureLow": 44.75,
            "temperatureLowTime": 1546358040,
            "dewPoint": 28.34,
            "humidity": 0.43,
            "pressure": 1017.7,
            "windSpeed": 12.46,
            "windGust": 26.85,
            "windGustTime": 1546289220,
            "windBearing": 0,
            "cloudCover": 0.11,
            "uvIndex": 2,
            "uvIndexTime": 1546287180,
            "visibility": 10,
            "ozone": 314.4,
            "time": Timestamp("2019-07-31 00:00:00"),
            "summary": "Clear throughout the day.",
            "icon": "clear-day",
            "sunriseTime": 1546356420,
            "sunsetTime": 1546390920,
            "precipIntensity": 0.0005,
            "precipIntensityMax": 0.0016,
            "precipProbability": 0.02,
            "precipType": "sunny",
            "temperatureHigh": 55.91,
            "temperatureHighTime": 1546382040,
            "temperatureLow": 41.18,
            "temperatureLowTime": 1546437660,
            "dewPoint": 20.95,
            "humidity": 0.33,
            "pressure": 1023.3,
            "windSpeed": 6.77,
            "windGust": 22.08,
            "windGustTime": 1546343340,
            "windBearing": 22,
            "cloudCover": 0.1,
            "uvIndex": 2,
            "uvIndexTime": 1546373580,
            "visibility": 10,
            "ozone": 305.3,
    weather_dataset = test_pipeline_result.output_for_solid(
    assert all(record in EXPECTED_WEATHER_RECORDS
               for record in weather_dataset)

    # Ensure we are generating the expected training set
    training_set, labels = test_pipeline_result.output_for_solid(
    assert len(labels) == 1 and labels[0] == 1
    assert array_equal(
    materialization_events = [
        event for event in test_pipeline_result.step_event_list
        if event.solid_name == "upload_training_set_to_gcs"
        and event.event_type_value == "STEP_MATERIALIZATION"
    assert len(materialization_events) == 1
    materialization = materialization_events[
    assert materialization.asset_key.path[0:5] == [
    materialization_event_metadata = materialization.metadata_entries
    assert len(materialization_event_metadata) == 1
    assert materialization_event_metadata[
        0].label == "google cloud storage URI"
    assert materialization_event_metadata[0].entry_data.text.startswith(

    # Clean up
    shutil.rmtree(os.path.join(tempfile.gettempdir(), "testing-storage"),
Exemplo n.º 52
from zipline.testing.fixtures import (

# Test calendar ranges over the month of June 2015
#      June 2015
# Mo Tu We Th Fr Sa Su
#  1  2  3  4  5  6  7
#  8  9 10 11 12 13 14
# 15 16 17 18 19 20 21
# 22 23 24 25 26 27 28
# 29 30
TEST_CALENDAR_START = Timestamp('2015-06-01', tz='UTC')
TEST_CALENDAR_STOP = Timestamp('2015-06-30', tz='UTC')

TEST_QUERY_START = Timestamp('2015-06-10', tz='UTC')
TEST_QUERY_STOP = Timestamp('2015-06-19', tz='UTC')

# One asset for each of the cases enumerated in load_raw_arrays_from_bcolz.
EQUITY_INFO = DataFrame(
        # 1) The equity's trades start and end before query.
            'start_date': '2015-06-01',
            'end_date': '2015-06-05'
        # 2) The equity's trades start and end after query.
Exemplo n.º 53
class TestDataFrameSetItem:
    def test_setitem_str_subclass(self):
        # GH#37366
        class mystring(str):

        data = ["2020-10-22 01:21:00+00:00"]
        index = DatetimeIndex(data)
        df = DataFrame({"a": [1]}, index=index)
        df["b"] = 2
        df[mystring("c")] = 3
        expected = DataFrame({
            "a": [1],
            "b": [2],
            mystring("c"): [3]
        tm.assert_equal(df, expected)

        "dtype", ["int32", "int64", "uint32", "uint64", "float32", "float64"])
    def test_setitem_dtype(self, dtype, float_frame):
        arr = np.random.randn(len(float_frame))

        float_frame[dtype] = np.array(arr, dtype=dtype)
        assert float_frame[dtype].dtype.name == dtype

    def test_setitem_list_not_dataframe(self, float_frame):
        data = np.random.randn(len(float_frame), 2)
        float_frame[["A", "B"]] = data
        tm.assert_almost_equal(float_frame[["A", "B"]].values, data)

    def test_setitem_error_msmgs(self):

        # GH 7432
        df = DataFrame(
                "bar": [1, 2, 3],
                "baz": ["d", "e", "f"]
            index=Index(["a", "b", "c"], name="foo"),
        ser = Series(
            ["g", "h", "i", "j"],
            index=Index(["a", "b", "c", "a"], name="foo"),
        msg = "cannot reindex on an axis with duplicate labels"
        with pytest.raises(ValueError, match=msg):
            with tm.assert_produces_warning(FutureWarning, match="non-unique"):
                df["newcol"] = ser

        # GH 4107, more descriptive error message
        df = DataFrame(np.random.randint(0, 2, (4, 4)),
                       columns=["a", "b", "c", "d"])

        msg = "incompatible index of inserted column with frame index"
        with pytest.raises(TypeError, match=msg):
            df["gr"] = df.groupby(["b", "c"]).count()

    def test_setitem_benchmark(self):
        # from the vb_suite/frame_methods/frame_insert_columns
        N = 10
        K = 5
        df = DataFrame(index=range(N))
        new_col = np.random.randn(N)
        for i in range(K):
            df[i] = new_col
        expected = DataFrame(np.repeat(new_col, K).reshape(N, K),
        tm.assert_frame_equal(df, expected)

    def test_setitem_different_dtype(self):
        df = DataFrame(np.random.randn(5, 3),
                       columns=["c", "b", "a"])
        df.insert(0, "foo", df["a"])
        df.insert(2, "bar", df["c"])

        # diff dtype

        # new item
        df["x"] = df["a"].astype("float32")
        result = df.dtypes
        expected = Series(
            [np.dtype("float64")] * 5 + [np.dtype("float32")],
            index=["foo", "c", "bar", "b", "a", "x"],
        tm.assert_series_equal(result, expected)

        # replacing current (in different block)
        df["a"] = df["a"].astype("float32")
        result = df.dtypes
        expected = Series(
            [np.dtype("float64")] * 4 + [np.dtype("float32")] * 2,
            index=["foo", "c", "bar", "b", "a", "x"],
        tm.assert_series_equal(result, expected)

        df["y"] = df["a"].astype("int32")
        result = df.dtypes
        expected = Series(
            [np.dtype("float64")] * 4 + [np.dtype("float32")] * 2 +
            index=["foo", "c", "bar", "b", "a", "x", "y"],
        tm.assert_series_equal(result, expected)

    def test_setitem_empty_columns(self):
        # GH 13522
        df = DataFrame(index=["A", "B", "C"])
        df["X"] = df.index
        df["X"] = ["x", "y", "z"]
        exp = DataFrame(data={"X": ["x", "y", "z"]}, index=["A", "B", "C"])
        tm.assert_frame_equal(df, exp)

    def test_setitem_dt64_index_empty_columns(self):
        rng = date_range("1/1/2000 00:00:00", "1/1/2000 1:59:50", freq="10s")
        df = DataFrame(index=np.arange(len(rng)))

        df["A"] = rng
        assert df["A"].dtype == np.dtype("M8[ns]")

    def test_setitem_timestamp_empty_columns(self):
        # GH#19843
        df = DataFrame(index=range(3))
        df["now"] = Timestamp("20130101", tz="UTC")

        expected = DataFrame([[Timestamp("20130101", tz="UTC")]] * 3,
                             index=[0, 1, 2],
        tm.assert_frame_equal(df, expected)

    def test_setitem_wrong_length_categorical_dtype_raises(self):
        # GH#29523
        cat = Categorical.from_codes([0, 1, 1, 0, 1, 2], ["a", "b", "c"])
        df = DataFrame(range(10), columns=["bar"])

        msg = (rf"Length of values \({len(cat)}\) "
               rf"does not match length of index \({len(df)}\)")
        with pytest.raises(ValueError, match=msg):
            df["foo"] = cat

    def test_setitem_with_sparse_value(self):
        # GH#8131
        df = DataFrame({"c_1": ["a", "b", "c"], "n_1": [1.0, 2.0, 3.0]})
        sp_array = SparseArray([0, 0, 1])
        df["new_column"] = sp_array

        expected = Series(sp_array, name="new_column")
        tm.assert_series_equal(df["new_column"], expected)

    def test_setitem_with_unaligned_sparse_value(self):
        df = DataFrame({"c_1": ["a", "b", "c"], "n_1": [1.0, 2.0, 3.0]})
        sp_series = Series(SparseArray([0, 0, 1]), index=[2, 1, 0])

        df["new_column"] = sp_series
        expected = Series(SparseArray([1, 0, 0]), name="new_column")
        tm.assert_series_equal(df["new_column"], expected)

    def test_setitem_period_preserves_dtype(self):
        # GH: 26861
        data = [Period("2003-12", "D")]
        result = DataFrame([])
        result["a"] = data

        expected = DataFrame({"a": data})

        tm.assert_frame_equal(result, expected)

    def test_setitem_dict_preserves_dtypes(self):
        # https://github.com/pandas-dev/pandas/issues/34573
        expected = DataFrame({
            "a": Series([0, 1, 2], dtype="int64"),
            "b": Series([1, 2, 3], dtype=float),
            "c": Series([1, 2, 3], dtype=float),
            "d": Series([1, 2, 3], dtype="uint32"),
        df = DataFrame({
            "a": Series([], dtype="int64"),
            "b": Series([], dtype=float),
            "c": Series([], dtype=float),
            "d": Series([], dtype="uint32"),
        for idx, b in enumerate([1, 2, 3]):
            df.loc[df.shape[0]] = {
                "a": int(idx),
                "b": float(b),
                "c": float(b),
                "d": np.uint32(b),
        tm.assert_frame_equal(df, expected)

            (Period("2020-01"), PeriodDtype("M")),
                Interval(left=0, right=5, inclusive="right"),
                IntervalDtype("int64", "right"),
                Timestamp("2011-01-01", tz="US/Eastern"),
    def test_setitem_extension_types(self, obj, dtype):
        # GH: 34832
        expected = DataFrame({
            "idx": [1, 2, 3],
            "obj": Series([obj] * 3, dtype=dtype)

        df = DataFrame({"idx": [1, 2, 3]})
        df["obj"] = obj

        tm.assert_frame_equal(df, expected)

            dtype.name for dtype in ea_registry.dtypes
            # property would require instantiation
            if not isinstance(dtype.name, property)
        # mypy doesn't allow adding lists of different types
        # https://github.com/python/mypy/issues/5492
        + ["datetime64[ns, UTC]", "period[D]"],  # type: ignore[list-item]
    def test_setitem_with_ea_name(self, ea_name):
        # GH 38386
        result = DataFrame([0])
        result[ea_name] = [1]
        expected = DataFrame({0: [0], ea_name: [1]})
        tm.assert_frame_equal(result, expected)

    def test_setitem_dt64_ndarray_with_NaT_and_diff_time_units(self):
        # GH#7492
        data_ns = np.array([1, "nat"], dtype="datetime64[ns]")
        result = Series(data_ns).to_frame()
        result["new"] = data_ns
        expected = DataFrame({
            0: [1, None],
            "new": [1, None]
        tm.assert_frame_equal(result, expected)

        # OutOfBoundsDatetime error shouldn't occur
        data_s = np.array([1, "nat"], dtype="datetime64[s]")
        result["new"] = data_s
        expected = DataFrame({
            0: [1, None],
            "new": [1e9, None]
        tm.assert_frame_equal(result, expected)

    @pytest.mark.parametrize("unit", ["h", "m", "s", "ms", "D", "M", "Y"])
    def test_frame_setitem_datetime64_col_other_units(self, unit):
        # Check that non-nano dt64 values get cast to dt64 on setitem
        #  into a not-yet-existing column
        n = 100

        dtype = np.dtype(f"M8[{unit}]")
        vals = np.arange(n, dtype=np.int64).view(dtype)
        ex_vals = vals.astype("datetime64[ns]")

        df = DataFrame({"ints": np.arange(n)}, index=np.arange(n))
        df[unit] = vals

        assert df[unit].dtype == np.dtype("M8[ns]")
        assert (df[unit].values == ex_vals).all()

    @pytest.mark.parametrize("unit", ["h", "m", "s", "ms", "D", "M", "Y"])
    def test_frame_setitem_existing_datetime64_col_other_units(self, unit):
        # Check that non-nano dt64 values get cast to dt64 on setitem
        #  into an already-existing dt64 column
        n = 100

        dtype = np.dtype(f"M8[{unit}]")
        vals = np.arange(n, dtype=np.int64).view(dtype)
        ex_vals = vals.astype("datetime64[ns]")

        df = DataFrame({"ints": np.arange(n)}, index=np.arange(n))
        df["dates"] = np.arange(n, dtype=np.int64).view("M8[ns]")

        # We overwrite existing dt64 column with new, non-nano dt64 vals
        df["dates"] = vals
        assert (df["dates"].values == ex_vals).all()

    def test_setitem_dt64tz(self, timezone_frame):

        df = timezone_frame
        idx = df["B"].rename("foo")

        # setitem
        df["C"] = idx
        tm.assert_series_equal(df["C"], Series(idx, name="C"))

        df["D"] = "foo"
        df["D"] = idx
        tm.assert_series_equal(df["D"], Series(idx, name="D"))
        del df["D"]

        # assert that A & C are not sharing the same base (e.g. they
        # are copies)
        v1 = df._mgr.arrays[1]
        v2 = df._mgr.arrays[2]
        tm.assert_extension_array_equal(v1, v2)
        v1base = v1._data.base
        v2base = v2._data.base
        assert v1base is None or (id(v1base) != id(v2base))

        # with nan
        df2 = df.copy()
        df2.iloc[1, 1] = NaT
        df2.iloc[1, 2] = NaT
        result = df2["B"]
                               Series([True, False, True], name="B"))
        tm.assert_series_equal(df2.dtypes, df.dtypes)

    def test_setitem_periodindex(self):
        rng = period_range("1/1/2000", periods=5, name="index")
        df = DataFrame(np.random.randn(5, 3), index=rng)

        df["Index"] = rng
        rs = Index(df["Index"])
        tm.assert_index_equal(rs, rng, check_names=False)
        assert rs.name == "Index"
        assert rng.name == "index"

        rs = df.reset_index().set_index("index")
        assert isinstance(rs.index, PeriodIndex)
        tm.assert_index_equal(rs.index, rng)

    def test_setitem_complete_column_with_array(self):
        # GH#37954
        df = DataFrame({"a": ["one", "two", "three"], "b": [1, 2, 3]})
        arr = np.array([[1, 1], [3, 1], [5, 1]])
        df[["c", "d"]] = arr
        expected = DataFrame({
            "a": ["one", "two", "three"],
            "b": [1, 2, 3],
            "c": [1, 3, 5],
            "d": [1, 1, 1],
        expected["c"] = expected["c"].astype(arr.dtype)
        expected["d"] = expected["d"].astype(arr.dtype)
        assert expected["c"].dtype == arr.dtype
        assert expected["d"].dtype == arr.dtype
        tm.assert_frame_equal(df, expected)

    @pytest.mark.parametrize("dtype", ["f8", "i8", "u8"])
    def test_setitem_bool_with_numeric_index(self, dtype):
        # GH#36319
        cols = Index([1, 2, 3], dtype=dtype)
        df = DataFrame(np.random.randn(3, 3), columns=cols)

        df[False] = ["a", "b", "c"]

        expected_cols = Index([1, 2, 3, False], dtype=object)
        if dtype == "f8":
            expected_cols = Index([1.0, 2.0, 3.0, False], dtype=object)

        tm.assert_index_equal(df.columns, expected_cols)

    @pytest.mark.parametrize("indexer", ["B", ["B"]])
    def test_setitem_frame_length_0_str_key(self, indexer):
        # GH#38831
        df = DataFrame(columns=["A", "B"])
        other = DataFrame({"B": [1, 2]})
        df[indexer] = other
        expected = DataFrame({"A": [np.nan] * 2, "B": [1, 2]})
        expected["A"] = expected["A"].astype("object")
        tm.assert_frame_equal(df, expected)

    def test_setitem_frame_duplicate_columns(self, using_array_manager):
        # GH#15695
        warn = FutureWarning if using_array_manager else None
        msg = "will attempt to set the values inplace"

        cols = ["A", "B", "C"] * 2
        df = DataFrame(index=range(3), columns=cols)
        df.loc[0, "A"] = (0, 3)
        with tm.assert_produces_warning(warn, match=msg):
            df.loc[:, "B"] = (1, 4)
        df["C"] = (2, 5)
        expected = DataFrame(
                [0, 1, 2, 3, 4, 5],
                [np.nan, 1, 2, np.nan, 4, 5],
                [np.nan, 1, 2, np.nan, 4, 5],

        if using_array_manager:
            # setitem replaces column so changes dtype

            expected.columns = cols
            expected["C"] = expected["C"].astype("int64")
            # TODO(ArrayManager) .loc still overwrites
            expected["B"] = expected["B"].astype("int64")

            # set these with unique columns to be extra-unambiguous
            expected[2] = expected[2].astype(np.int64)
            expected[5] = expected[5].astype(np.int64)
            expected.columns = cols

        tm.assert_frame_equal(df, expected)

    def test_setitem_frame_duplicate_columns_size_mismatch(self):
        # GH#39510
        cols = ["A", "B", "C"] * 2
        df = DataFrame(index=range(3), columns=cols)
        with pytest.raises(ValueError,
                           match="Columns must be same length as key"):
            df[["A"]] = (0, 3, 5)

        df2 = df.iloc[:, :3]  # unique columns
        with pytest.raises(ValueError,
                           match="Columns must be same length as key"):
            df2[["A"]] = (0, 3, 5)

    @pytest.mark.parametrize("cols", [["a", "b", "c"], ["a", "a", "a"]])
    def test_setitem_df_wrong_column_number(self, cols):
        # GH#38604
        df = DataFrame([[1, 2, 3]], columns=cols)
        rhs = DataFrame([[10, 11]], columns=["d", "e"])
        msg = "Columns must be same length as key"
        with pytest.raises(ValueError, match=msg):
            df["a"] = rhs

    def test_setitem_listlike_indexer_duplicate_columns(self):
        # GH#38604
        df = DataFrame([[1, 2, 3]], columns=["a", "b", "b"])
        rhs = DataFrame([[10, 11, 12]], columns=["a", "b", "b"])
        df[["a", "b"]] = rhs
        expected = DataFrame([[10, 11, 12]], columns=["a", "b", "b"])
        tm.assert_frame_equal(df, expected)

        df[["c", "b"]] = rhs
        expected = DataFrame([[10, 11, 12, 10]], columns=["a", "b", "b", "c"])
        tm.assert_frame_equal(df, expected)

    def test_setitem_listlike_indexer_duplicate_columns_not_equal_length(self):
        # GH#39403
        df = DataFrame([[1, 2, 3]], columns=["a", "b", "b"])
        rhs = DataFrame([[10, 11]], columns=["a", "b"])
        msg = "Columns must be same length as key"
        with pytest.raises(ValueError, match=msg):
            df[["a", "b"]] = rhs

    def test_setitem_intervals(self):

        df = DataFrame({"A": range(10)})
        ser = cut(df["A"], 5)
        assert isinstance(ser.cat.categories, IntervalIndex)

        # B & D end up as Categoricals
        # the remainder are converted to in-line objects
        # containing an IntervalIndex.values
        df["B"] = ser
        df["C"] = np.array(ser)
        df["D"] = ser.values
        df["E"] = np.array(ser.values)
        df["F"] = ser.astype(object)

        assert is_categorical_dtype(df["B"].dtype)
        assert is_interval_dtype(df["B"].cat.categories)
        assert is_categorical_dtype(df["D"].dtype)
        assert is_interval_dtype(df["D"].cat.categories)

        # These go through the Series constructor and so get inferred back
        #  to IntervalDtype
        assert is_interval_dtype(df["C"])
        assert is_interval_dtype(df["E"])

        # But the Series constructor doesn't do inference on Series objects,
        #  so setting df["F"] doesn't get cast back to IntervalDtype
        assert is_object_dtype(df["F"])

        # they compare equal as Index
        # when converted to numpy objects
        c = lambda x: Index(np.array(x))
        tm.assert_index_equal(c(df.B), c(df.B))
        tm.assert_index_equal(c(df.B), c(df.C), check_names=False)
        tm.assert_index_equal(c(df.B), c(df.D), check_names=False)
        tm.assert_index_equal(c(df.C), c(df.D), check_names=False)

        # B & D are the same Series
        tm.assert_series_equal(df["B"], df["B"])
        tm.assert_series_equal(df["B"], df["D"], check_names=False)

        # C & E are the same Series
        tm.assert_series_equal(df["C"], df["C"])
        tm.assert_series_equal(df["C"], df["E"], check_names=False)

    def test_setitem_categorical(self):
        # GH#35369
        df = DataFrame({"h": Series(list("mn")).astype("category")})
        df.h = df.h.cat.reorder_categories(["n", "m"])
        expected = DataFrame(
            {"h": Categorical(["m", "n"]).reorder_categories(["n", "m"])})
        tm.assert_frame_equal(df, expected)

    def test_setitem_with_empty_listlike(self):
        # GH#17101
        index = Index([], name="idx")
        result = DataFrame(columns=["A"], index=index)
        result["A"] = []
        expected = DataFrame(columns=["A"], index=index)
        tm.assert_index_equal(result.index, expected.index)

        "cols, values, expected",
            (["C", "D", "D", "a"], [1, 2, 3, 4], 4),  # with duplicates
            (["D", "C", "D", "a"], [1, 2, 3, 4], 4),  # mixed order
            (["C", "B", "B", "a"], [1, 2, 3, 4], 4),  # other duplicate cols
            (["C", "B", "a"], [1, 2, 3], 3),  # no duplicates
            (["B", "C", "a"], [3, 2, 1], 1),  # alphabetical order
            (["C", "a", "B"], [3, 2, 1], 2),  # in the middle
    def test_setitem_same_column(self, cols, values, expected):
        # GH#23239
        df = DataFrame([values], columns=cols)
        df["a"] = df["a"]
        result = df["a"].values[0]
        assert result == expected

    def test_setitem_multi_index(self):
        # GH#7655, test that assigning to a sub-frame of a frame
        # with multi-index columns aligns both rows and columns
        it = ["jim", "joe", "jolie"], ["first",
                                       "last"], ["left", "center", "right"]

        cols = MultiIndex.from_product(it)
        index = date_range("20141006", periods=20)
        vals = np.random.randint(1, 1000, (len(index), len(cols)))
        df = DataFrame(vals, columns=cols, index=index)

        i, j = df.index.values.copy(), it[-1][:]

        df["jim"] = df["jolie"].loc[i, ::-1]
        tm.assert_frame_equal(df["jim"], df["jolie"])

        df[("joe", "first")] = df[("jolie", "last")].loc[i, j]
        tm.assert_frame_equal(df[("joe", "first")], df[("jolie", "last")])

        df[("joe", "last")] = df[("jolie", "first")].loc[i, j]
        tm.assert_frame_equal(df[("joe", "last")], df[("jolie", "first")])

                ["A", "B", "C", "D"],
                    [[7, 7, 7, 7], [7, 7, 7, 7], [7, 7, 7, 7]],
                    columns=["A", "B", "C", "D"],
                ["C", "D"],
                [7, 8],
                    [[1, 2, 7, 8], [3, 4, 7, 8], [5, 6, 7, 8]],
                    columns=["A", "B", "C", "D"],
                ["A", "B", "C"],
                np.array([7, 8, 9], dtype=np.int64),
                DataFrame([[7, 8, 9], [7, 8, 9], [7, 8, 9]],
                          columns=["A", "B", "C"]),
                ["B", "C", "D"],
                [[7, 8, 9], [10, 11, 12], [13, 14, 15]],
                    [[1, 7, 8, 9], [3, 10, 11, 12], [5, 13, 14, 15]],
                    columns=["A", "B", "C", "D"],
                ["C", "A", "D"],
                np.array([[7, 8, 9], [10, 11, 12], [13, 14, 15]],
                    [[8, 2, 7, 9], [11, 4, 10, 12], [14, 6, 13, 15]],
                    columns=["A", "B", "C", "D"],
                ["A", "C"],
                DataFrame([[7, 8], [9, 10], [11, 12]], columns=["A", "C"]),
                DataFrame([[7, 2, 8], [9, 4, 10], [11, 6, 12]],
                          columns=["A", "B", "C"]),
    def test_setitem_list_missing_columns(self, columns, box, expected):
        # GH#29334
        df = DataFrame([[1, 2], [3, 4], [5, 6]], columns=["A", "B"])
        df[columns] = box
        tm.assert_frame_equal(df, expected)

    def test_setitem_list_of_tuples(self, float_frame):
        tuples = list(zip(float_frame["A"], float_frame["B"]))
        float_frame["tuples"] = tuples

        result = float_frame["tuples"]
        expected = Series(tuples, index=float_frame.index, name="tuples")
        tm.assert_series_equal(result, expected)

    def test_setitem_iloc_generator(self):
        # GH#39614
        df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
        indexer = (x for x in [1, 2])
        df.iloc[indexer] = 1
        expected = DataFrame({"a": [1, 1, 1], "b": [4, 1, 1]})
        tm.assert_frame_equal(df, expected)

    def test_setitem_iloc_two_dimensional_generator(self):
        df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
        indexer = (x for x in [1, 2])
        df.iloc[indexer, 1] = 1
        expected = DataFrame({"a": [1, 2, 3], "b": [4, 1, 1]})
        tm.assert_frame_equal(df, expected)

    def test_setitem_dtypes_bytes_type_to_object(self):
        # GH 20734
        index = Series(name="id", dtype="S24")
        df = DataFrame(index=index)
        df["a"] = Series(name="a", index=index, dtype=np.uint32)
        df["b"] = Series(name="b", index=index, dtype="S64")
        df["c"] = Series(name="c", index=index, dtype="S64")
        df["d"] = Series(name="d", index=index, dtype=np.uint8)
        result = df.dtypes
        expected = Series([np.uint32, object, object, np.uint8],
        tm.assert_series_equal(result, expected)

    def test_boolean_mask_nullable_int64(self):
        # GH 28928
        result = DataFrame({
            "a": [3, 4],
            "b": [5, 6]
            "a": "int64",
            "b": "Int64"
        mask = Series(False, index=result.index)
        result.loc[mask, "a"] = result["a"]
        result.loc[mask, "b"] = result["b"]
        expected = DataFrame({
            "a": [3, 4],
            "b": [5, 6]
            "a": "int64",
            "b": "Int64"
        tm.assert_frame_equal(result, expected)

    def test_setitem_ea_dtype_rhs_series(self):
        # GH#47425
        df = DataFrame({"a": [1, 2]})
        df["a"] = Series([1, 2], dtype="Int64")
        expected = DataFrame({"a": [1, 2]}, dtype="Int64")
        tm.assert_frame_equal(df, expected)

    # TODO(ArrayManager) set column with 2d column array, see #44788
    def test_setitem_npmatrix_2d(self):
        # GH#42376
        # for use-case df["x"] = sparse.random(10, 10).mean(axis=1)
        expected = DataFrame(
                "np-array": np.ones(10),
                "np-matrix": np.ones(10)

        a = np.ones((10, 1))
        df = DataFrame(index=np.arange(10))
        df["np-array"] = a

        # Instantiation of `np.matrix` gives PendingDeprecationWarning
        with tm.assert_produces_warning(PendingDeprecationWarning):
            df["np-matrix"] = np.matrix(a)

        tm.assert_frame_equal(df, expected)

    @pytest.mark.parametrize("vals", [{}, {"d": "a"}])
    def test_setitem_aligning_dict_with_index(self, vals):
        # GH#47216
        df = DataFrame({"a": [1, 2], "b": [3, 4], **vals})
        df.loc[:, "a"] = {1: 100, 0: 200}
        df.loc[:, "c"] = {0: 5, 1: 6}
        df.loc[:, "e"] = {1: 5}
        expected = DataFrame({
            "a": [200, 100],
            "b": [3, 4],
            **vals, "c": [5, 6],
            "e": [np.nan, 5]
        tm.assert_frame_equal(df, expected)

    def test_setitem_rhs_dataframe(self):
        # GH#47578
        df = DataFrame({"a": [1, 2]})
        df["a"] = DataFrame({"a": [10, 11]}, index=[1, 2])
        expected = DataFrame({"a": [np.nan, 10]})
        tm.assert_frame_equal(df, expected)

        df = DataFrame({"a": [1, 2]})
        df.isetitem(0, DataFrame({"a": [10, 11]}, index=[1, 2]))
        tm.assert_frame_equal(df, expected)
Exemplo n.º 54
    def test_000constructor_resolution(self):
        # 2252
        t1 = Timestamp((1352934390 * 1000000000) + 1000000 + 1000 + 1)
        idx = DatetimeIndex([t1])

        assert idx.nanosecond[0] == t1.nanosecond
Exemplo n.º 55
 def test_constructor_with_int_tz(self, klass, box, tz, dtype):
     # GH 20997, 20964
     ts = Timestamp("2018-01-01", tz=tz)
     result = klass(box([ts.value]), dtype=dtype)
     expected = klass([ts])
     assert result == expected
Exemplo n.º 56
    tm.assert_frame_equal(result, expected)

    # prod
    result = df.groupby(pd.cut(df["a"], grps), observed=observed)._cython_agg_general(
    expected = pd.DataFrame(
        {"a": [1, 1, 1716, 1]},
        index=pd.CategoricalIndex(intervals, name="a", ordered=True),
    if observed:
        expected = expected[expected.a != 1]

    tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize("op", ["first", "last", "max", "min"])
    "data", [Timestamp("2016-10-14 21:00:44.557"), Timedelta("17088 days 21:00:44.557")]
def test_cython_with_timestamp_and_nat(op, data):
    # https://github.com/pandas-dev/pandas/issues/19526
    df = DataFrame({"a": [0, 1], "b": [data, NaT]})
    index = Index([0, 1], name="a")

    # We will group by a and test the cython aggregations
    expected = DataFrame({"b": [data, NaT]}, index=index)

    result = df.groupby("a").aggregate(op)
    tm.assert_frame_equal(expected, result)
Exemplo n.º 57
 def test_compare_hour13(self):
     r = Timestamp("2000-08-12T13:00:00").to_julian_date()
     assert r == 2_451_769.0416666666666666
Exemplo n.º 58
 def test_constructor_with_non_normalized_pytz(self, tz):
     # GH 18595
     non_norm_tz = Timestamp("2010", tz=tz).tz
     result = DatetimeIndex(["2010"], tz=non_norm_tz)
     assert pytz.timezone(tz) is result.tz
Exemplo n.º 59
class TestTimedeltaMultiplicationDivision:
    Tests for Timedelta methods:

        __mul__, __rmul__,
        __div__, __rdiv__,
        __truediv__, __rtruediv__,
        __floordiv__, __rfloordiv__,
        __mod__, __rmod__,
        __divmod__, __rdivmod__

    # ---------------------------------------------------------------
    # Timedelta.__mul__, __rmul__

        "td_nat", [NaT, np.timedelta64("NaT", "ns"), np.timedelta64("NaT")]
    @pytest.mark.parametrize("op", [operator.mul, ops.rmul])
    def test_td_mul_nat(self, op, td_nat):
        # GH#19819
        td = Timedelta(10, unit="d")
        with pytest.raises(TypeError):
            op(td, td_nat)

    @pytest.mark.parametrize("nan", [np.nan, np.float64("NaN"), float("nan")])
    @pytest.mark.parametrize("op", [operator.mul, ops.rmul])
    def test_td_mul_nan(self, op, nan):
        # np.float64('NaN') has a 'dtype' attr, avoid treating as array
        td = Timedelta(10, unit="d")
        result = op(td, nan)
        assert result is NaT

    @pytest.mark.parametrize("op", [operator.mul, ops.rmul])
    def test_td_mul_scalar(self, op):
        # GH#19738
        td = Timedelta(minutes=3)

        result = op(td, 2)
        assert result == Timedelta(minutes=6)

        result = op(td, 1.5)
        assert result == Timedelta(minutes=4, seconds=30)

        assert op(td, np.nan) is NaT

        assert op(-1, td).value == -1 * td.value
        assert op(-1.0, td).value == -1.0 * td.value

        with pytest.raises(TypeError):
            # timedelta * datetime is gibberish
            op(td, Timestamp(2016, 1, 2))

        with pytest.raises(TypeError):
            # invalid multiply with another timedelta
            op(td, td)

    # ---------------------------------------------------------------
    # Timedelta.__div__, __truediv__

    def test_td_div_timedeltalike_scalar(self):
        # GH#19738
        td = Timedelta(10, unit="d")

        result = td / offsets.Hour(1)
        assert result == 240

        assert td / td == 1
        assert td / np.timedelta64(60, "h") == 4

        assert np.isnan(td / NaT)

    def test_td_div_numeric_scalar(self):
        # GH#19738
        td = Timedelta(10, unit="d")

        result = td / 2
        assert isinstance(result, Timedelta)
        assert result == Timedelta(days=5)

        result = td / 5.0
        assert isinstance(result, Timedelta)
        assert result == Timedelta(days=2)

    @pytest.mark.parametrize("nan", [np.nan, np.float64("NaN"), float("nan")])
    def test_td_div_nan(self, nan):
        # np.float64('NaN') has a 'dtype' attr, avoid treating as array
        td = Timedelta(10, unit="d")
        result = td / nan
        assert result is NaT

        result = td // nan
        assert result is NaT

    # ---------------------------------------------------------------
    # Timedelta.__rdiv__

    def test_td_rdiv_timedeltalike_scalar(self):
        # GH#19738
        td = Timedelta(10, unit="d")
        result = offsets.Hour(1) / td
        assert result == 1 / 240.0

        assert np.timedelta64(60, "h") / td == 0.25

    # ---------------------------------------------------------------
    # Timedelta.__floordiv__

    def test_td_floordiv_timedeltalike_scalar(self):
        # GH#18846
        td = Timedelta(hours=3, minutes=4)
        scalar = Timedelta(hours=3, minutes=3)

        assert td // scalar == 1
        assert -td // scalar.to_pytimedelta() == -2
        assert (2 * td) // scalar.to_timedelta64() == 2

    def test_td_floordiv_null_scalar(self):
        # GH#18846
        td = Timedelta(hours=3, minutes=4)

        assert td // np.nan is NaT
        assert np.isnan(td // NaT)
        assert np.isnan(td // np.timedelta64("NaT"))

    def test_td_floordiv_offsets(self):
        # GH#19738
        td = Timedelta(hours=3, minutes=4)
        assert td // offsets.Hour(1) == 3
        assert td // offsets.Minute(2) == 92

    def test_td_floordiv_invalid_scalar(self):
        # GH#18846
        td = Timedelta(hours=3, minutes=4)

        with pytest.raises(TypeError):
            td // np.datetime64("2016-01-01", dtype="datetime64[us]")

    def test_td_floordiv_numeric_scalar(self):
        # GH#18846
        td = Timedelta(hours=3, minutes=4)

        expected = Timedelta(hours=1, minutes=32)
        assert td // 2 == expected
        assert td // 2.0 == expected
        assert td // np.float64(2.0) == expected
        assert td // np.int32(2.0) == expected
        assert td // np.uint8(2.0) == expected

    def test_td_floordiv_timedeltalike_array(self):
        # GH#18846
        td = Timedelta(hours=3, minutes=4)
        scalar = Timedelta(hours=3, minutes=3)

        # Array-like others
        assert td // np.array(scalar.to_timedelta64()) == 1

        res = (3 * td) // np.array([scalar.to_timedelta64()])
        expected = np.array([3], dtype=np.int64)
        tm.assert_numpy_array_equal(res, expected)

        res = (10 * td) // np.array([scalar.to_timedelta64(), np.timedelta64("NaT")])
        expected = np.array([10, np.nan])
        tm.assert_numpy_array_equal(res, expected)

    def test_td_floordiv_numeric_series(self):
        # GH#18846
        td = Timedelta(hours=3, minutes=4)
        ser = pd.Series([1], dtype=np.int64)
        res = td // ser
        assert res.dtype.kind == "m"

    # ---------------------------------------------------------------
    # Timedelta.__rfloordiv__

    def test_td_rfloordiv_timedeltalike_scalar(self):
        # GH#18846
        td = Timedelta(hours=3, minutes=3)
        scalar = Timedelta(hours=3, minutes=4)

        # scalar others
        # x // Timedelta is defined only for timedelta-like x. int-like,
        # float-like, and date-like, in particular, should all either
        # a) raise TypeError directly or
        # b) return NotImplemented, following which the reversed
        #    operation will raise TypeError.
        assert td.__rfloordiv__(scalar) == 1
        assert (-td).__rfloordiv__(scalar.to_pytimedelta()) == -2
        assert (2 * td).__rfloordiv__(scalar.to_timedelta64()) == 0

    def test_td_rfloordiv_null_scalar(self):
        # GH#18846
        td = Timedelta(hours=3, minutes=3)

        assert np.isnan(td.__rfloordiv__(NaT))
        assert np.isnan(td.__rfloordiv__(np.timedelta64("NaT")))

    def test_td_rfloordiv_offsets(self):
        # GH#19738
        assert offsets.Hour(1) // Timedelta(minutes=25) == 2

    def test_td_rfloordiv_invalid_scalar(self):
        # GH#18846
        td = Timedelta(hours=3, minutes=3)

        dt64 = np.datetime64("2016-01-01", dtype="datetime64[us]")
        with pytest.raises(TypeError):

    def test_td_rfloordiv_numeric_scalar(self):
        # GH#18846
        td = Timedelta(hours=3, minutes=3)

        assert td.__rfloordiv__(np.nan) is NotImplemented
        assert td.__rfloordiv__(3.5) is NotImplemented
        assert td.__rfloordiv__(2) is NotImplemented

        with pytest.raises(TypeError):
        with pytest.raises(TypeError):
        with pytest.raises(TypeError, match="Invalid dtype"):
            # deprecated GH#19761, enforced GH#29797

    def test_td_rfloordiv_timedeltalike_array(self):
        # GH#18846
        td = Timedelta(hours=3, minutes=3)
        scalar = Timedelta(hours=3, minutes=4)

        # Array-like others
        assert td.__rfloordiv__(np.array(scalar.to_timedelta64())) == 1

        res = td.__rfloordiv__(np.array([(3 * scalar).to_timedelta64()]))
        expected = np.array([3], dtype=np.int64)
        tm.assert_numpy_array_equal(res, expected)

        arr = np.array([(10 * scalar).to_timedelta64(), np.timedelta64("NaT")])
        res = td.__rfloordiv__(arr)
        expected = np.array([10, np.nan])
        tm.assert_numpy_array_equal(res, expected)

    def test_td_rfloordiv_numeric_series(self):
        # GH#18846
        td = Timedelta(hours=3, minutes=3)
        ser = pd.Series([1], dtype=np.int64)
        res = td.__rfloordiv__(ser)
        assert res is NotImplemented

        with pytest.raises(TypeError, match="Invalid dtype"):
            # Deprecated GH#19761, enforced GH#29797
            # TODO: GH-19761. Change to TypeError.
            ser // td

    # ----------------------------------------------------------------
    # Timedelta.__mod__, __rmod__

    def test_mod_timedeltalike(self):
        # GH#19365
        td = Timedelta(hours=37)

        # Timedelta-like others
        result = td % Timedelta(hours=6)
        assert isinstance(result, Timedelta)
        assert result == Timedelta(hours=1)

        result = td % timedelta(minutes=60)
        assert isinstance(result, Timedelta)
        assert result == Timedelta(0)

        result = td % NaT
        assert result is NaT

    def test_mod_timedelta64_nat(self):
        # GH#19365
        td = Timedelta(hours=37)

        result = td % np.timedelta64("NaT", "ns")
        assert result is NaT

    def test_mod_timedelta64(self):
        # GH#19365
        td = Timedelta(hours=37)

        result = td % np.timedelta64(2, "h")
        assert isinstance(result, Timedelta)
        assert result == Timedelta(hours=1)

    def test_mod_offset(self):
        # GH#19365
        td = Timedelta(hours=37)

        result = td % offsets.Hour(5)
        assert isinstance(result, Timedelta)
        assert result == Timedelta(hours=2)

    def test_mod_numeric(self):
        # GH#19365
        td = Timedelta(hours=37)

        # Numeric Others
        result = td % 2
        assert isinstance(result, Timedelta)
        assert result == Timedelta(0)

        result = td % 1e12
        assert isinstance(result, Timedelta)
        assert result == Timedelta(minutes=3, seconds=20)

        result = td % int(1e12)
        assert isinstance(result, Timedelta)
        assert result == Timedelta(minutes=3, seconds=20)

    def test_mod_invalid(self):
        # GH#19365
        td = Timedelta(hours=37)

        with pytest.raises(TypeError):
            td % Timestamp("2018-01-22")

        with pytest.raises(TypeError):
            td % []

    def test_rmod_pytimedelta(self):
        # GH#19365
        td = Timedelta(minutes=3)

        result = timedelta(minutes=4) % td
        assert isinstance(result, Timedelta)
        assert result == Timedelta(minutes=1)

    def test_rmod_timedelta64(self):
        # GH#19365
        td = Timedelta(minutes=3)
        result = np.timedelta64(5, "m") % td
        assert isinstance(result, Timedelta)
        assert result == Timedelta(minutes=2)

    def test_rmod_invalid(self):
        # GH#19365
        td = Timedelta(minutes=3)

        with pytest.raises(TypeError):
            Timestamp("2018-01-22") % td

        with pytest.raises(TypeError):
            15 % td

        with pytest.raises(TypeError):
            16.0 % td

        with pytest.raises(TypeError):
            np.array([22, 24]) % td

    # ----------------------------------------------------------------
    # Timedelta.__divmod__, __rdivmod__

    def test_divmod_numeric(self):
        # GH#19365
        td = Timedelta(days=2, hours=6)

        result = divmod(td, 53 * 3600 * 1e9)
        assert result[0] == Timedelta(1, unit="ns")
        assert isinstance(result[1], Timedelta)
        assert result[1] == Timedelta(hours=1)

        assert result
        result = divmod(td, np.nan)
        assert result[0] is NaT
        assert result[1] is NaT

    def test_divmod(self):
        # GH#19365
        td = Timedelta(days=2, hours=6)

        result = divmod(td, timedelta(days=1))
        assert result[0] == 2
        assert isinstance(result[1], Timedelta)
        assert result[1] == Timedelta(hours=6)

        result = divmod(td, 54)
        assert result[0] == Timedelta(hours=1)
        assert isinstance(result[1], Timedelta)
        assert result[1] == Timedelta(0)

        result = divmod(td, NaT)
        assert np.isnan(result[0])
        assert result[1] is NaT

    def test_divmod_offset(self):
        # GH#19365
        td = Timedelta(days=2, hours=6)

        result = divmod(td, offsets.Hour(-4))
        assert result[0] == -14
        assert isinstance(result[1], Timedelta)
        assert result[1] == Timedelta(hours=-2)

    def test_divmod_invalid(self):
        # GH#19365
        td = Timedelta(days=2, hours=6)

        with pytest.raises(TypeError):
            divmod(td, Timestamp("2018-01-22"))

    def test_rdivmod_pytimedelta(self):
        # GH#19365
        result = divmod(timedelta(days=2, hours=6), Timedelta(days=1))
        assert result[0] == 2
        assert isinstance(result[1], Timedelta)
        assert result[1] == Timedelta(hours=6)

    def test_rdivmod_offset(self):
        result = divmod(offsets.Hour(54), Timedelta(hours=-4))
        assert result[0] == -14
        assert isinstance(result[1], Timedelta)
        assert result[1] == Timedelta(hours=-2)

    def test_rdivmod_invalid(self):
        # GH#19365
        td = Timedelta(minutes=3)

        with pytest.raises(TypeError):
            divmod(Timestamp("2018-01-22"), td)

        with pytest.raises(TypeError):
            divmod(15, td)

        with pytest.raises(TypeError):
            divmod(16.0, td)

        with pytest.raises(TypeError):
            divmod(np.array([22, 24]), td)

    # ----------------------------------------------------------------

        "op", [operator.mul, ops.rmul, operator.truediv, ops.rdiv, ops.rsub]
            np.array([Timestamp("20130101 9:01"), Timestamp("20121230 9:02")]),
            np.array([Timestamp.now(), Timedelta("1D")]),
    def test_td_op_timedelta_timedeltalike_array(self, op, arr):
        with pytest.raises(TypeError):
            op(arr, Timedelta("1D"))
Exemplo n.º 60
class TestContains:
    def test_contains(self):

        ci = CategoricalIndex(list("aabbca"),

        assert "a" in ci
        assert "z" not in ci
        assert "e" not in ci
        assert np.nan not in ci

        # assert codes NOT in index
        assert 0 not in ci
        assert 1 not in ci

    def test_contains_nan(self):
        ci = CategoricalIndex(list("aabbca") + [np.nan],
        assert np.nan in ci

    @pytest.mark.parametrize("unwrap", [True, False])
    def test_contains_na_dtype(self, unwrap):
        dti = pd.date_range("2016-01-01", periods=100).insert(0, pd.NaT)
        pi = dti.to_period("D")
        tdi = dti - dti[-1]
        ci = CategoricalIndex(dti)

        obj = ci
        if unwrap:
            obj = ci._data

        assert np.nan in obj
        assert None in obj
        assert pd.NaT in obj
        assert np.datetime64("NaT") in obj
        assert np.timedelta64("NaT") not in obj

        obj2 = CategoricalIndex(tdi)
        if unwrap:
            obj2 = obj2._data

        assert np.nan in obj2
        assert None in obj2
        assert pd.NaT in obj2
        assert np.datetime64("NaT") not in obj2
        assert np.timedelta64("NaT") in obj2

        obj3 = CategoricalIndex(pi)
        if unwrap:
            obj3 = obj3._data

        assert np.nan in obj3
        assert None in obj3
        assert pd.NaT in obj3
        assert np.datetime64("NaT") not in obj3
        assert np.timedelta64("NaT") not in obj3

        "item, expected",
            (pd.Interval(0, 1), True),
            (1.5, True),
            (pd.Interval(0.5, 1.5), False),
            ("a", False),
            (Timestamp(1), False),
            (pd.Timedelta(1), False),
    def test_contains_interval(self, item, expected):
        # GH 23705
        ci = CategoricalIndex(IntervalIndex.from_breaks(range(3)))
        result = item in ci
        assert result is expected

    def test_contains_list(self):
        # GH#21729
        idx = CategoricalIndex([1, 2, 3])

        assert "a" not in idx

        with pytest.raises(TypeError, match="unhashable type"):
            ["a"] in idx

        with pytest.raises(TypeError, match="unhashable type"):
            ["a", "b"] in idx