示例#1
0
    def test_replace_datetimetz(self):

        # GH 11326
        # behaving poorly when presented with a datetime64[ns, tz]
        df = DataFrame({
            'A': date_range('20130101', periods=3, tz='US/Eastern'),
            'B': [0, np.nan, 2]
        })
        result = df.replace(np.nan, 1)
        expected = DataFrame({
            'A':
            date_range('20130101', periods=3, tz='US/Eastern'),
            'B':
            Series([0, 1, 2], dtype='float64')
        })
        assert_frame_equal(result, expected)

        result = df.fillna(1)
        assert_frame_equal(result, expected)

        result = df.replace(0, np.nan)
        expected = DataFrame({
            'A':
            date_range('20130101', periods=3, tz='US/Eastern'),
            'B': [np.nan, np.nan, 2]
        })
        assert_frame_equal(result, expected)

        result = df.replace(Timestamp('20130102', tz='US/Eastern'),
                            Timestamp('20130104', tz='US/Eastern'))
        expected = DataFrame({
            'A': [
                Timestamp('20130101', tz='US/Eastern'),
                Timestamp('20130104', tz='US/Eastern'),
                Timestamp('20130103', tz='US/Eastern')
            ],
            'B': [0, np.nan, 2]
        })
        assert_frame_equal(result, expected)

        result = df.copy()
        result.iloc[1, 0] = np.nan
        result = result.replace({'A': pd.NaT},
                                Timestamp('20130104', tz='US/Eastern'))
        assert_frame_equal(result, expected)

        # coerce to object
        result = df.copy()
        result.iloc[1, 0] = np.nan
        result = result.replace({'A': pd.NaT},
                                Timestamp('20130104', tz='US/Pacific'))
        expected = DataFrame({
            'A': [
                Timestamp('20130101', tz='US/Eastern'),
                Timestamp('20130104', tz='US/Pacific'),
                Timestamp('20130103', tz='US/Eastern')
            ],
            'B': [0, np.nan, 2]
        })
        assert_frame_equal(result, expected)

        result = df.copy()
        result.iloc[1, 0] = np.nan
        result = result.replace({'A': np.nan}, Timestamp('20130104'))
        expected = DataFrame({
            'A': [
                Timestamp('20130101', tz='US/Eastern'),
                Timestamp('20130104'),
                Timestamp('20130103', tz='US/Eastern')
            ],
            'B': [0, np.nan, 2]
        })
        assert_frame_equal(result, expected)
示例#2
0
 def test_constructor_with_int_tz(self, klass, box, tz, dtype):
     # GH 20997, 20964
     ts = Timestamp("2018-01-01", tz=tz)
     result = klass(box([ts.value]), dtype=dtype)
     expected = klass([ts])
     assert result == expected
示例#3
0
 def test_index_constructor_with_numpy_object_array_and_timestamp_tz_with_nan(self):
     # GH 27011
     result = Index(np.array([Timestamp("2019", tz="UTC"), np.nan], dtype=object))
     expected = DatetimeIndex([Timestamp("2019", tz="UTC"), pd.NaT])
     tm.assert_index_equal(result, expected)
示例#4
0
class TestDatetimeIndex:
    @pytest.mark.parametrize("dt_cls", [DatetimeIndex, DatetimeArray._from_sequence])
    def test_freq_validation_with_nat(self, dt_cls):
        # GH#11587 make sure we get a useful error message when generate_range
        #  raises
        msg = (
            "Inferred frequency None from passed values does not conform "
            "to passed frequency D"
        )
        with pytest.raises(ValueError, match=msg):
            dt_cls([pd.NaT, pd.Timestamp("2011-01-01")], freq="D")
        with pytest.raises(ValueError, match=msg):
            dt_cls([pd.NaT, pd.Timestamp("2011-01-01").value], freq="D")

    def test_categorical_preserves_tz(self):
        # GH#18664 retain tz when going DTI-->Categorical-->DTI
        # TODO: parametrize over DatetimeIndex/DatetimeArray
        #  once CategoricalIndex(DTA) works

        dti = pd.DatetimeIndex(
            [pd.NaT, "2015-01-01", "1999-04-06 15:14:13", "2015-01-01"], tz="US/Eastern"
        )

        ci = pd.CategoricalIndex(dti)
        carr = pd.Categorical(dti)
        cser = pd.Series(ci)

        for obj in [ci, carr, cser]:
            result = pd.DatetimeIndex(obj)
            tm.assert_index_equal(result, dti)

    def test_dti_with_period_data_raises(self):
        # GH#23675
        data = pd.PeriodIndex(["2016Q1", "2016Q2"], freq="Q")

        with pytest.raises(TypeError, match="PeriodDtype data is invalid"):
            DatetimeIndex(data)

        with pytest.raises(TypeError, match="PeriodDtype data is invalid"):
            to_datetime(data)

        with pytest.raises(TypeError, match="PeriodDtype data is invalid"):
            DatetimeIndex(period_array(data))

        with pytest.raises(TypeError, match="PeriodDtype data is invalid"):
            to_datetime(period_array(data))

    def test_dti_with_timedelta64_data_raises(self):
        # GH#23675 deprecated, enforrced in GH#29794
        data = np.array([0], dtype="m8[ns]")
        msg = r"timedelta64\[ns\] cannot be converted to datetime64"
        with pytest.raises(TypeError, match=msg):
            DatetimeIndex(data)

        with pytest.raises(TypeError, match=msg):
            to_datetime(data)

        with pytest.raises(TypeError, match=msg):
            DatetimeIndex(pd.TimedeltaIndex(data))

        with pytest.raises(TypeError, match=msg):
            to_datetime(pd.TimedeltaIndex(data))

    def test_construction_caching(self):

        df = pd.DataFrame(
            {
                "dt": pd.date_range("20130101", periods=3),
                "dttz": pd.date_range("20130101", periods=3, tz="US/Eastern"),
                "dt_with_null": [
                    pd.Timestamp("20130101"),
                    pd.NaT,
                    pd.Timestamp("20130103"),
                ],
                "dtns": pd.date_range("20130101", periods=3, freq="ns"),
            }
        )
        assert df.dttz.dtype.tz.zone == "US/Eastern"

    @pytest.mark.parametrize(
        "kwargs",
        [{"tz": "dtype.tz"}, {"dtype": "dtype"}, {"dtype": "dtype", "tz": "dtype.tz"}],
    )
    def test_construction_with_alt(self, kwargs, tz_aware_fixture):
        tz = tz_aware_fixture
        i = pd.date_range("20130101", periods=5, freq="H", tz=tz)
        kwargs = {key: attrgetter(val)(i) for key, val in kwargs.items()}
        result = DatetimeIndex(i, **kwargs)
        tm.assert_index_equal(i, result)

    @pytest.mark.parametrize(
        "kwargs",
        [{"tz": "dtype.tz"}, {"dtype": "dtype"}, {"dtype": "dtype", "tz": "dtype.tz"}],
    )
    def test_construction_with_alt_tz_localize(self, kwargs, tz_aware_fixture):
        tz = tz_aware_fixture
        i = pd.date_range("20130101", periods=5, freq="H", tz=tz)
        kwargs = {key: attrgetter(val)(i) for key, val in kwargs.items()}

        if "tz" in kwargs:
            result = DatetimeIndex(i.asi8, tz="UTC").tz_convert(kwargs["tz"])

            expected = DatetimeIndex(i, **kwargs)
            tm.assert_index_equal(result, expected)

        # localize into the provided tz
        i2 = DatetimeIndex(i.tz_localize(None).asi8, tz="UTC")
        expected = i.tz_localize(None).tz_localize("UTC")
        tm.assert_index_equal(i2, expected)

        # incompat tz/dtype
        msg = "cannot supply both a tz and a dtype with a tz"
        with pytest.raises(ValueError, match=msg):
            DatetimeIndex(i.tz_localize(None).asi8, dtype=i.dtype, tz="US/Pacific")

    def test_construction_index_with_mixed_timezones(self):
        # gh-11488: no tz results in DatetimeIndex
        result = Index([Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx")
        exp = DatetimeIndex(
            [Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx"
        )
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)
        assert result.tz is None

        # same tz results in DatetimeIndex
        result = Index(
            [
                Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
                Timestamp("2011-01-02 10:00", tz="Asia/Tokyo"),
            ],
            name="idx",
        )
        exp = DatetimeIndex(
            [Timestamp("2011-01-01 10:00"), Timestamp("2011-01-02 10:00")],
            tz="Asia/Tokyo",
            name="idx",
        )
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)
        assert result.tz is not None
        assert result.tz == exp.tz

        # same tz results in DatetimeIndex (DST)
        result = Index(
            [
                Timestamp("2011-01-01 10:00", tz="US/Eastern"),
                Timestamp("2011-08-01 10:00", tz="US/Eastern"),
            ],
            name="idx",
        )
        exp = DatetimeIndex(
            [Timestamp("2011-01-01 10:00"), Timestamp("2011-08-01 10:00")],
            tz="US/Eastern",
            name="idx",
        )
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)
        assert result.tz is not None
        assert result.tz == exp.tz

        # Different tz results in Index(dtype=object)
        result = Index(
            [
                Timestamp("2011-01-01 10:00"),
                Timestamp("2011-01-02 10:00", tz="US/Eastern"),
            ],
            name="idx",
        )
        exp = Index(
            [
                Timestamp("2011-01-01 10:00"),
                Timestamp("2011-01-02 10:00", tz="US/Eastern"),
            ],
            dtype="object",
            name="idx",
        )
        tm.assert_index_equal(result, exp, exact=True)
        assert not isinstance(result, DatetimeIndex)

        result = Index(
            [
                Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
                Timestamp("2011-01-02 10:00", tz="US/Eastern"),
            ],
            name="idx",
        )
        exp = Index(
            [
                Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
                Timestamp("2011-01-02 10:00", tz="US/Eastern"),
            ],
            dtype="object",
            name="idx",
        )
        tm.assert_index_equal(result, exp, exact=True)
        assert not isinstance(result, DatetimeIndex)

        # length = 1
        result = Index([Timestamp("2011-01-01")], name="idx")
        exp = DatetimeIndex([Timestamp("2011-01-01")], name="idx")
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)
        assert result.tz is None

        # length = 1 with tz
        result = Index([Timestamp("2011-01-01 10:00", tz="Asia/Tokyo")], name="idx")
        exp = DatetimeIndex(
            [Timestamp("2011-01-01 10:00")], tz="Asia/Tokyo", name="idx"
        )
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)
        assert result.tz is not None
        assert result.tz == exp.tz

    def test_construction_index_with_mixed_timezones_with_NaT(self):
        # see gh-11488
        result = Index(
            [pd.NaT, Timestamp("2011-01-01"), pd.NaT, Timestamp("2011-01-02")],
            name="idx",
        )
        exp = DatetimeIndex(
            [pd.NaT, Timestamp("2011-01-01"), pd.NaT, Timestamp("2011-01-02")],
            name="idx",
        )
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)
        assert result.tz is None

        # Same tz results in DatetimeIndex
        result = Index(
            [
                pd.NaT,
                Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
                pd.NaT,
                Timestamp("2011-01-02 10:00", tz="Asia/Tokyo"),
            ],
            name="idx",
        )
        exp = DatetimeIndex(
            [
                pd.NaT,
                Timestamp("2011-01-01 10:00"),
                pd.NaT,
                Timestamp("2011-01-02 10:00"),
            ],
            tz="Asia/Tokyo",
            name="idx",
        )
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)
        assert result.tz is not None
        assert result.tz == exp.tz

        # same tz results in DatetimeIndex (DST)
        result = Index(
            [
                Timestamp("2011-01-01 10:00", tz="US/Eastern"),
                pd.NaT,
                Timestamp("2011-08-01 10:00", tz="US/Eastern"),
            ],
            name="idx",
        )
        exp = DatetimeIndex(
            [Timestamp("2011-01-01 10:00"), pd.NaT, Timestamp("2011-08-01 10:00")],
            tz="US/Eastern",
            name="idx",
        )
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)
        assert result.tz is not None
        assert result.tz == exp.tz

        # different tz results in Index(dtype=object)
        result = Index(
            [
                pd.NaT,
                Timestamp("2011-01-01 10:00"),
                pd.NaT,
                Timestamp("2011-01-02 10:00", tz="US/Eastern"),
            ],
            name="idx",
        )
        exp = Index(
            [
                pd.NaT,
                Timestamp("2011-01-01 10:00"),
                pd.NaT,
                Timestamp("2011-01-02 10:00", tz="US/Eastern"),
            ],
            dtype="object",
            name="idx",
        )
        tm.assert_index_equal(result, exp, exact=True)
        assert not isinstance(result, DatetimeIndex)

        result = Index(
            [
                pd.NaT,
                Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
                pd.NaT,
                Timestamp("2011-01-02 10:00", tz="US/Eastern"),
            ],
            name="idx",
        )
        exp = Index(
            [
                pd.NaT,
                Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
                pd.NaT,
                Timestamp("2011-01-02 10:00", tz="US/Eastern"),
            ],
            dtype="object",
            name="idx",
        )
        tm.assert_index_equal(result, exp, exact=True)
        assert not isinstance(result, DatetimeIndex)

        # all NaT
        result = Index([pd.NaT, pd.NaT], name="idx")
        exp = DatetimeIndex([pd.NaT, pd.NaT], name="idx")
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)
        assert result.tz is None

        # all NaT with tz
        result = Index([pd.NaT, pd.NaT], tz="Asia/Tokyo", name="idx")
        exp = DatetimeIndex([pd.NaT, pd.NaT], tz="Asia/Tokyo", name="idx")

        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)
        assert result.tz is not None
        assert result.tz == exp.tz

    def test_construction_dti_with_mixed_timezones(self):
        # GH 11488 (not changed, added explicit tests)

        # no tz results in DatetimeIndex
        result = DatetimeIndex(
            [Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx"
        )
        exp = DatetimeIndex(
            [Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx"
        )
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)

        # same tz results in DatetimeIndex
        result = DatetimeIndex(
            [
                Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
                Timestamp("2011-01-02 10:00", tz="Asia/Tokyo"),
            ],
            name="idx",
        )
        exp = DatetimeIndex(
            [Timestamp("2011-01-01 10:00"), Timestamp("2011-01-02 10:00")],
            tz="Asia/Tokyo",
            name="idx",
        )
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)

        # same tz results in DatetimeIndex (DST)
        result = DatetimeIndex(
            [
                Timestamp("2011-01-01 10:00", tz="US/Eastern"),
                Timestamp("2011-08-01 10:00", tz="US/Eastern"),
            ],
            name="idx",
        )
        exp = DatetimeIndex(
            [Timestamp("2011-01-01 10:00"), Timestamp("2011-08-01 10:00")],
            tz="US/Eastern",
            name="idx",
        )
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)

        # tz mismatch affecting to tz-aware raises TypeError/ValueError

        with pytest.raises(ValueError):
            DatetimeIndex(
                [
                    Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
                    Timestamp("2011-01-02 10:00", tz="US/Eastern"),
                ],
                name="idx",
            )

        msg = "cannot be converted to datetime64"
        with pytest.raises(ValueError, match=msg):
            DatetimeIndex(
                [
                    Timestamp("2011-01-01 10:00"),
                    Timestamp("2011-01-02 10:00", tz="US/Eastern"),
                ],
                tz="Asia/Tokyo",
                name="idx",
            )

        with pytest.raises(ValueError):
            DatetimeIndex(
                [
                    Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
                    Timestamp("2011-01-02 10:00", tz="US/Eastern"),
                ],
                tz="US/Eastern",
                name="idx",
            )

        with pytest.raises(ValueError, match=msg):
            # passing tz should results in DatetimeIndex, then mismatch raises
            # TypeError
            Index(
                [
                    pd.NaT,
                    Timestamp("2011-01-01 10:00"),
                    pd.NaT,
                    Timestamp("2011-01-02 10:00", tz="US/Eastern"),
                ],
                tz="Asia/Tokyo",
                name="idx",
            )

    def test_construction_base_constructor(self):
        arr = [pd.Timestamp("2011-01-01"), pd.NaT, pd.Timestamp("2011-01-03")]
        tm.assert_index_equal(pd.Index(arr), pd.DatetimeIndex(arr))
        tm.assert_index_equal(pd.Index(np.array(arr)), pd.DatetimeIndex(np.array(arr)))

        arr = [np.nan, pd.NaT, pd.Timestamp("2011-01-03")]
        tm.assert_index_equal(pd.Index(arr), pd.DatetimeIndex(arr))
        tm.assert_index_equal(pd.Index(np.array(arr)), pd.DatetimeIndex(np.array(arr)))

    def test_construction_outofbounds(self):
        # GH 13663
        dates = [
            datetime(3000, 1, 1),
            datetime(4000, 1, 1),
            datetime(5000, 1, 1),
            datetime(6000, 1, 1),
        ]
        exp = Index(dates, dtype=object)
        # coerces to object
        tm.assert_index_equal(Index(dates), exp)

        with pytest.raises(OutOfBoundsDatetime):
            # can't create DatetimeIndex
            DatetimeIndex(dates)

    def test_construction_with_ndarray(self):
        # GH 5152
        dates = [datetime(2013, 10, 7), datetime(2013, 10, 8), datetime(2013, 10, 9)]
        data = DatetimeIndex(dates, freq=pd.offsets.BDay()).values
        result = DatetimeIndex(data, freq=pd.offsets.BDay())
        expected = DatetimeIndex(["2013-10-07", "2013-10-08", "2013-10-09"], freq="B")
        tm.assert_index_equal(result, expected)

    def test_integer_values_and_tz_interpreted_as_utc(self):
        # GH-24559
        val = np.datetime64("2000-01-01 00:00:00", "ns")
        values = np.array([val.view("i8")])

        result = DatetimeIndex(values).tz_localize("US/Central")

        expected = pd.DatetimeIndex(["2000-01-01T00:00:00"], tz="US/Central")
        tm.assert_index_equal(result, expected)

        # but UTC is *not* deprecated.
        with tm.assert_produces_warning(None):
            result = DatetimeIndex(values, tz="UTC")
        expected = pd.DatetimeIndex(["2000-01-01T00:00:00"], tz="US/Central")

    def test_constructor_coverage(self):
        rng = date_range("1/1/2000", periods=10.5)
        exp = date_range("1/1/2000", periods=10)
        tm.assert_index_equal(rng, exp)

        msg = "periods must be a number, got foo"
        with pytest.raises(TypeError, match=msg):
            date_range(start="1/1/2000", periods="foo", freq="D")

        with pytest.raises(TypeError):
            DatetimeIndex("1/1/2000")

        # generator expression
        gen = (datetime(2000, 1, 1) + timedelta(i) for i in range(10))
        result = DatetimeIndex(gen)
        expected = DatetimeIndex(
            [datetime(2000, 1, 1) + timedelta(i) for i in range(10)]
        )
        tm.assert_index_equal(result, expected)

        # NumPy string array
        strings = np.array(["2000-01-01", "2000-01-02", "2000-01-03"])
        result = DatetimeIndex(strings)
        expected = DatetimeIndex(strings.astype("O"))
        tm.assert_index_equal(result, expected)

        from_ints = DatetimeIndex(expected.asi8)
        tm.assert_index_equal(from_ints, expected)

        # string with NaT
        strings = np.array(["2000-01-01", "2000-01-02", "NaT"])
        result = DatetimeIndex(strings)
        expected = DatetimeIndex(strings.astype("O"))
        tm.assert_index_equal(result, expected)

        from_ints = DatetimeIndex(expected.asi8)
        tm.assert_index_equal(from_ints, expected)

        # non-conforming
        msg = (
            "Inferred frequency None from passed values does not conform"
            " to passed frequency D"
        )
        with pytest.raises(ValueError, match=msg):
            DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-04"], freq="D")

        msg = (
            "Of the four parameters: start, end, periods, and freq, exactly"
            " three must be specified"
        )
        with pytest.raises(ValueError, match=msg):
            date_range(start="2011-01-01", freq="b")
        with pytest.raises(ValueError, match=msg):
            date_range(end="2011-01-01", freq="B")
        with pytest.raises(ValueError, match=msg):
            date_range(periods=10, freq="D")

    @pytest.mark.parametrize("freq", ["AS", "W-SUN"])
    def test_constructor_datetime64_tzformat(self, freq):
        # see GH#6572: ISO 8601 format results in pytz.FixedOffset
        idx = date_range(
            "2013-01-01T00:00:00-05:00", "2016-01-01T23:59:59-05:00", freq=freq
        )
        expected = date_range(
            "2013-01-01T00:00:00",
            "2016-01-01T23:59:59",
            freq=freq,
            tz=pytz.FixedOffset(-300),
        )
        tm.assert_index_equal(idx, expected)
        # Unable to use `US/Eastern` because of DST
        expected_i8 = date_range(
            "2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz="America/Lima"
        )
        tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8)

        idx = date_range(
            "2013-01-01T00:00:00+09:00", "2016-01-01T23:59:59+09:00", freq=freq
        )
        expected = date_range(
            "2013-01-01T00:00:00",
            "2016-01-01T23:59:59",
            freq=freq,
            tz=pytz.FixedOffset(540),
        )
        tm.assert_index_equal(idx, expected)
        expected_i8 = date_range(
            "2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz="Asia/Tokyo"
        )
        tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8)

        # Non ISO 8601 format results in dateutil.tz.tzoffset
        idx = date_range("2013/1/1 0:00:00-5:00", "2016/1/1 23:59:59-5:00", freq=freq)
        expected = date_range(
            "2013-01-01T00:00:00",
            "2016-01-01T23:59:59",
            freq=freq,
            tz=pytz.FixedOffset(-300),
        )
        tm.assert_index_equal(idx, expected)
        # Unable to use `US/Eastern` because of DST
        expected_i8 = date_range(
            "2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz="America/Lima"
        )
        tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8)

        idx = date_range("2013/1/1 0:00:00+9:00", "2016/1/1 23:59:59+09:00", freq=freq)
        expected = date_range(
            "2013-01-01T00:00:00",
            "2016-01-01T23:59:59",
            freq=freq,
            tz=pytz.FixedOffset(540),
        )
        tm.assert_index_equal(idx, expected)
        expected_i8 = date_range(
            "2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz="Asia/Tokyo"
        )
        tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8)

    def test_constructor_dtype(self):

        # passing a dtype with a tz should localize
        idx = DatetimeIndex(
            ["2013-01-01", "2013-01-02"], dtype="datetime64[ns, US/Eastern]"
        )
        expected = DatetimeIndex(["2013-01-01", "2013-01-02"]).tz_localize("US/Eastern")
        tm.assert_index_equal(idx, expected)

        idx = DatetimeIndex(["2013-01-01", "2013-01-02"], tz="US/Eastern")
        tm.assert_index_equal(idx, expected)

        # if we already have a tz and its not the same, then raise
        idx = DatetimeIndex(
            ["2013-01-01", "2013-01-02"], dtype="datetime64[ns, US/Eastern]"
        )

        msg = (
            "cannot supply both a tz and a timezone-naive dtype"
            r" \(i\.e\. datetime64\[ns\]\)"
        )
        with pytest.raises(ValueError, match=msg):
            DatetimeIndex(idx, dtype="datetime64[ns]")

        # this is effectively trying to convert tz's
        msg = "data is already tz-aware US/Eastern, unable to set specified tz: CET"
        with pytest.raises(TypeError, match=msg):
            DatetimeIndex(idx, dtype="datetime64[ns, CET]")
        msg = "cannot supply both a tz and a dtype with a tz"
        with pytest.raises(ValueError, match=msg):
            DatetimeIndex(idx, tz="CET", dtype="datetime64[ns, US/Eastern]")

        result = DatetimeIndex(idx, dtype="datetime64[ns, US/Eastern]")
        tm.assert_index_equal(idx, result)

    @pytest.mark.parametrize("dtype", [object, np.int32, np.int64])
    def test_constructor_invalid_dtype_raises(self, dtype):
        # GH 23986
        with pytest.raises(ValueError):
            DatetimeIndex([1, 2], dtype=dtype)

    def test_constructor_name(self):
        idx = date_range(start="2000-01-01", periods=1, freq="A", name="TEST")
        assert idx.name == "TEST"

    def test_000constructor_resolution(self):
        # 2252
        t1 = Timestamp((1352934390 * 1000000000) + 1000000 + 1000 + 1)
        idx = DatetimeIndex([t1])

        assert idx.nanosecond[0] == t1.nanosecond

    def test_disallow_setting_tz(self):
        # GH 3746
        dti = DatetimeIndex(["2010"], tz="UTC")
        with pytest.raises(AttributeError):
            dti.tz = pytz.timezone("US/Pacific")

    @pytest.mark.parametrize(
        "tz",
        [
            None,
            "America/Los_Angeles",
            pytz.timezone("America/Los_Angeles"),
            Timestamp("2000", tz="America/Los_Angeles").tz,
        ],
    )
    def test_constructor_start_end_with_tz(self, tz):
        # GH 18595
        start = Timestamp("2013-01-01 06:00:00", tz="America/Los_Angeles")
        end = Timestamp("2013-01-02 06:00:00", tz="America/Los_Angeles")
        result = date_range(freq="D", start=start, end=end, tz=tz)
        expected = DatetimeIndex(
            ["2013-01-01 06:00:00", "2013-01-02 06:00:00"], tz="America/Los_Angeles"
        )
        tm.assert_index_equal(result, expected)
        # Especially assert that the timezone is consistent for pytz
        assert pytz.timezone("America/Los_Angeles") is result.tz

    @pytest.mark.parametrize("tz", ["US/Pacific", "US/Eastern", "Asia/Tokyo"])
    def test_constructor_with_non_normalized_pytz(self, tz):
        # GH 18595
        non_norm_tz = Timestamp("2010", tz=tz).tz
        result = DatetimeIndex(["2010"], tz=non_norm_tz)
        assert pytz.timezone(tz) is result.tz

    def test_constructor_timestamp_near_dst(self):
        # GH 20854
        ts = [
            Timestamp("2016-10-30 03:00:00+0300", tz="Europe/Helsinki"),
            Timestamp("2016-10-30 03:00:00+0200", tz="Europe/Helsinki"),
        ]
        result = DatetimeIndex(ts)
        expected = DatetimeIndex([ts[0].to_pydatetime(), ts[1].to_pydatetime()])
        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize("klass", [Index, DatetimeIndex])
    @pytest.mark.parametrize("box", [np.array, partial(np.array, dtype=object), list])
    @pytest.mark.parametrize(
        "tz, dtype",
        [("US/Pacific", "datetime64[ns, US/Pacific]"), (None, "datetime64[ns]")],
    )
    def test_constructor_with_int_tz(self, klass, box, tz, dtype):
        # GH 20997, 20964
        ts = Timestamp("2018-01-01", tz=tz)
        result = klass(box([ts.value]), dtype=dtype)
        expected = klass([ts])
        assert result == expected

    def test_construction_int_rountrip(self, tz_naive_fixture):
        # GH 12619, GH#24559
        tz = tz_naive_fixture

        result = 1293858000000000000
        expected = DatetimeIndex([result], tz=tz).asi8[0]
        assert result == expected

    def test_construction_from_replaced_timestamps_with_dst(self):
        # GH 18785
        index = pd.date_range(
            pd.Timestamp(2000, 1, 1),
            pd.Timestamp(2005, 1, 1),
            freq="MS",
            tz="Australia/Melbourne",
        )
        test = pd.DataFrame({"data": range(len(index))}, index=index)
        test = test.resample("Y").mean()
        result = pd.DatetimeIndex([x.replace(month=6, day=1) for x in test.index])
        expected = pd.DatetimeIndex(
            [
                "2000-06-01 00:00:00",
                "2001-06-01 00:00:00",
                "2002-06-01 00:00:00",
                "2003-06-01 00:00:00",
                "2004-06-01 00:00:00",
                "2005-06-01 00:00:00",
            ],
            tz="Australia/Melbourne",
        )
        tm.assert_index_equal(result, expected)

    def test_construction_with_tz_and_tz_aware_dti(self):
        # GH 23579
        dti = date_range("2016-01-01", periods=3, tz="US/Central")
        with pytest.raises(TypeError):
            DatetimeIndex(dti, tz="Asia/Tokyo")

    def test_construction_with_nat_and_tzlocal(self):
        tz = dateutil.tz.tzlocal()
        result = DatetimeIndex(["2018", "NaT"], tz=tz)
        expected = DatetimeIndex([Timestamp("2018", tz=tz), pd.NaT])
        tm.assert_index_equal(result, expected)

    def test_constructor_no_precision_raises(self):
        # GH-24753, GH-24739

        msg = "with no precision is not allowed"
        with pytest.raises(ValueError, match=msg):
            pd.DatetimeIndex(["2000"], dtype="datetime64")

        with pytest.raises(ValueError, match=msg):
            pd.Index(["2000"], dtype="datetime64")

    def test_constructor_wrong_precision_raises(self):
        with pytest.raises(ValueError):
            pd.DatetimeIndex(["2000"], dtype="datetime64[us]")

    def test_index_constructor_with_numpy_object_array_and_timestamp_tz_with_nan(self):
        # GH 27011
        result = Index(np.array([Timestamp("2019", tz="UTC"), np.nan], dtype=object))
        expected = DatetimeIndex([Timestamp("2019", tz="UTC"), pd.NaT])
        tm.assert_index_equal(result, expected)
示例#5
0
    def test_000constructor_resolution(self):
        # 2252
        t1 = Timestamp((1352934390 * 1000000000) + 1000000 + 1000 + 1)
        idx = DatetimeIndex([t1])

        assert idx.nanosecond[0] == t1.nanosecond
示例#6
0
    def test_grouper_creation_bug(self):

        # GH 8795
        df = DataFrame({"A": [0, 0, 1, 1, 2, 2], "B": [1, 2, 3, 4, 5, 6]})
        g = df.groupby("A")
        expected = g.sum()

        g = df.groupby(pd.Grouper(key="A"))
        result = g.sum()
        assert_frame_equal(result, expected)

        result = g.apply(lambda x: x.sum())
        assert_frame_equal(result, expected)

        g = df.groupby(pd.Grouper(key="A", axis=0))
        result = g.sum()
        assert_frame_equal(result, expected)

        # GH14334
        # pd.Grouper(key=...) may be passed in a list
        df = DataFrame({
            "A": [0, 0, 0, 1, 1, 1],
            "B": [1, 1, 2, 2, 3, 3],
            "C": [1, 2, 3, 4, 5, 6]
        })
        # Group by single column
        expected = df.groupby("A").sum()
        g = df.groupby([pd.Grouper(key="A")])
        result = g.sum()
        assert_frame_equal(result, expected)

        # Group by two columns
        # using a combination of strings and Grouper objects
        expected = df.groupby(["A", "B"]).sum()

        # Group with two Grouper objects
        g = df.groupby([pd.Grouper(key="A"), pd.Grouper(key="B")])
        result = g.sum()
        assert_frame_equal(result, expected)

        # Group with a string and a Grouper object
        g = df.groupby(["A", pd.Grouper(key="B")])
        result = g.sum()
        assert_frame_equal(result, expected)

        # Group with a Grouper object and a string
        g = df.groupby([pd.Grouper(key="A"), "B"])
        result = g.sum()
        assert_frame_equal(result, expected)

        # GH8866
        s = Series(
            np.arange(8, dtype="int64"),
            index=pd.MultiIndex.from_product(
                [list("ab"),
                 range(2),
                 date_range("20130101", periods=2)],
                names=["one", "two", "three"],
            ),
        )
        result = s.groupby(pd.Grouper(level="three", freq="M")).sum()
        expected = Series([28],
                          index=Index([Timestamp("2013-01-31")],
                                      freq="M",
                                      name="three"))
        assert_series_equal(result, expected)

        # just specifying a level breaks
        result = s.groupby(pd.Grouper(level="one")).sum()
        expected = s.groupby(level="one").sum()
        assert_series_equal(result, expected)
示例#7
0
    def _get_index_loc(self, key, base_index=None):
        """
        Get the location of a specific key in an index

        Parameters
        ----------
        key : label
            The key for which to find the location
        base_index : pd.Index, optional
            Optionally the base index to search. If None, the model's index is
            searched.

        Returns
        -------
        loc : int
            The location of the key
        index : pd.Index
            The index including the key; this is a copy of the original index
            unless the index had to be expanded to accomodate `key`.
        index_was_expanded : bool
            Whether or not the index was expanded to accomodate `key`.

        Notes
        -----
        If `key` is past the end of of the given index, and the index is either
        an Int64Index or a date index, this function extends the index up to
        and including key, and then returns the location in the new index.

        """
        if base_index is None:
            base_index = self._index

        index = base_index
        date_index = isinstance(base_index, (PeriodIndex, DatetimeIndex))
        index_class = type(base_index)
        nobs = len(index)

        # Special handling for Int64Index
        if (isinstance(index, Int64Index) and not date_index and
                isinstance(key, (int, long, np.integer))):
            # Negative indices (that lie in the Index)
            if key < 0 and -key <= nobs:
                key = nobs + key
            # Out-of-sample (note that we include key itself in the new index)
            elif key > base_index[-1]:
                index = Int64Index(np.arange(base_index[0], key + 1))

        # Special handling for date indexes
        if date_index:
            # Integer key (i.e. already given a location)
            if isinstance(key, (int, long, np.integer)):
                # Negative indices (that lie in the Index)
                if key < 0 and -key < nobs:
                    key = index[nobs + key]
                # Out-of-sample (note that we include key itself in the new
                # index)
                elif key > len(base_index) - 1:
                    index = index_class(start=base_index[0], periods=key + 1,
                                        freq=base_index.freq)
                    key = index[-1]
                else:
                    key = index[key]
            # Other key types (i.e. string date or some datetime-like object)
            else:
                # Covert the key to the appropriate date-like object
                if index_class is PeriodIndex:
                    date_key = Period(key, freq=base_index.freq)
                else:
                    date_key = Timestamp(key)

                # Out-of-sample
                if date_key > base_index[-1]:
                    # First create an index that may not always include `key`
                    index = index_class(start=base_index[0], end=date_key,
                                        freq=base_index.freq)

                    # Now make sure we include `key`
                    if not index[-1] == date_key:
                        index = index_class(start=base_index[0],
                                            periods=len(index) + 1,
                                            freq=base_index.freq)

        # Get the location (note that get_loc will throw a KeyError if key is
        # invalid)
        loc = index.get_loc(key)

        # Check if we now have a modified index
        index_was_expanded = index is not base_index

        # (Never return the actual index object)
        if not index_was_expanded:
            index = index.copy()

        # Return the index through the end of the loc / slice
        if isinstance(loc, slice):
            end = loc.stop
        else:
            end = loc

        return loc, index[:end + 1], index_was_expanded
示例#8
0
    assert result.shape == (4, 2)
    assert isinstance(result["c0"].dtype, CategoricalDtype)
    assert isinstance(result["c1"].dtype, CategoricalDtype)


def test_apply_axis1_with_ea():
    # GH#36785
    expected = DataFrame({"A": [Timestamp("2013-01-01", tz="UTC")]})
    result = expected.apply(lambda x: x, axis=1)
    tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize(
    "data, dtype",
    [(1, None), (1, CategoricalDtype([1])),
     (Timestamp("2013-01-01", tz="UTC"), None)],
)
def test_agg_axis1_duplicate_index(data, dtype):
    # GH 42380
    expected = DataFrame([[data], [data]], index=["a", "a"], dtype=dtype)
    result = expected.agg(lambda x: x, axis=1)
    tm.assert_frame_equal(result, expected)


def test_apply_mixed_datetimelike():
    # mixed datetimelike
    # GH 7778
    expected = DataFrame({
        "A": date_range("20130101", periods=3),
        "B": pd.to_timedelta(np.arange(3), unit="s"),
    })
示例#9
0
def test_apply_axis1_with_ea():
    # GH#36785
    expected = DataFrame({"A": [Timestamp("2013-01-01", tz="UTC")]})
    result = expected.apply(lambda x: x, axis=1)
    tm.assert_frame_equal(result, expected)
示例#10
0
    def test_subtraction_ops_with_tz(self):

        # check that dt/dti subtraction ops with tz are validated
        dti = date_range('20130101', periods=3)
        ts = Timestamp('20130101')
        dt = ts.to_pydatetime()
        dti_tz = date_range('20130101', periods=3).tz_localize('US/Eastern')
        ts_tz = Timestamp('20130101').tz_localize('US/Eastern')
        ts_tz2 = Timestamp('20130101').tz_localize('CET')
        dt_tz = ts_tz.to_pydatetime()
        td = Timedelta('1 days')

        def _check(result, expected):
            assert result == expected
            assert isinstance(result, Timedelta)

        # scalars
        result = ts - ts
        expected = Timedelta('0 days')
        _check(result, expected)

        result = dt_tz - ts_tz
        expected = Timedelta('0 days')
        _check(result, expected)

        result = ts_tz - dt_tz
        expected = Timedelta('0 days')
        _check(result, expected)

        # tz mismatches
        pytest.raises(TypeError, lambda: dt_tz - ts)
        pytest.raises(TypeError, lambda: dt_tz - dt)
        pytest.raises(TypeError, lambda: dt_tz - ts_tz2)
        pytest.raises(TypeError, lambda: dt - dt_tz)
        pytest.raises(TypeError, lambda: ts - dt_tz)
        pytest.raises(TypeError, lambda: ts_tz2 - ts)
        pytest.raises(TypeError, lambda: ts_tz2 - dt)
        pytest.raises(TypeError, lambda: ts_tz - ts_tz2)

        # with dti
        pytest.raises(TypeError, lambda: dti - ts_tz)
        pytest.raises(TypeError, lambda: dti_tz - ts)
        pytest.raises(TypeError, lambda: dti_tz - ts_tz2)

        result = dti_tz - dt_tz
        expected = TimedeltaIndex(['0 days', '1 days', '2 days'])
        tm.assert_index_equal(result, expected)

        result = dt_tz - dti_tz
        expected = TimedeltaIndex(['0 days', '-1 days', '-2 days'])
        tm.assert_index_equal(result, expected)

        result = dti_tz - ts_tz
        expected = TimedeltaIndex(['0 days', '1 days', '2 days'])
        tm.assert_index_equal(result, expected)

        result = ts_tz - dti_tz
        expected = TimedeltaIndex(['0 days', '-1 days', '-2 days'])
        tm.assert_index_equal(result, expected)

        result = td - td
        expected = Timedelta('0 days')
        _check(result, expected)

        result = dti_tz - td
        expected = DatetimeIndex(['20121231', '20130101', '20130102'],
                                 tz='US/Eastern')
        tm.assert_index_equal(result, expected)
示例#11
0
    # see gh-15414
    s = Series([1, 2, 3])
    cond = [False, True, True]
    expected = Series([np.nan, 2, 3])

    result = s.where(klass(cond))
    tm.assert_series_equal(result, expected)


@pytest.mark.parametrize(
    "cond",
    [
        [1, 0, 1],
        Series([2, 5, 7]),
        ["True", "False", "True"],
        [Timestamp("2017-01-01"), pd.NaT,
         Timestamp("2017-01-02")],
    ],
)
def test_where_invalid_input(cond):
    # see gh-15414: only boolean arrays accepted
    s = Series([1, 2, 3])
    msg = "Boolean array expected for the condition"

    with pytest.raises(ValueError, match=msg):
        s.where(cond)

    msg = "Array conditional must be same shape as self"
    with pytest.raises(ValueError, match=msg):
        s.where([True])
示例#12
0
    def test_tdi_radd_timestamp(self):
        idx = TimedeltaIndex(['1 day', '2 day'])

        result = Timestamp('2011-01-01') + idx
        expected = DatetimeIndex(['2011-01-02', '2011-01-03'])
        tm.assert_index_equal(result, expected)
示例#13
0
 def test_tdi_sub_timestamp_raises(self):
     idx = TimedeltaIndex(['1 day', '2 day'])
     msg = "cannot subtract a datelike from a TimedeltaIndex"
     with tm.assert_raises_regex(TypeError, msg):
         idx - Timestamp('2011-01-01')
示例#14
0
class TestDataFrameReplace(TestData):
    def test_replace_inplace(self):
        self.tsframe['A'][:5] = nan
        self.tsframe['A'][-5:] = nan

        tsframe = self.tsframe.copy()
        tsframe.replace(nan, 0, inplace=True)
        assert_frame_equal(tsframe, self.tsframe.fillna(0))

        # mixed type
        mf = self.mixed_frame
        mf.iloc[5:20, mf.columns.get_loc('foo')] = nan
        mf.iloc[-10:, mf.columns.get_loc('A')] = nan

        result = self.mixed_frame.replace(np.nan, 0)
        expected = self.mixed_frame.fillna(value=0)
        assert_frame_equal(result, expected)

        tsframe = self.tsframe.copy()
        tsframe.replace([nan], [0], inplace=True)
        assert_frame_equal(tsframe, self.tsframe.fillna(0))

    def test_regex_replace_scalar(self):
        obj = {'a': list('ab..'), 'b': list('efgh')}
        dfobj = DataFrame(obj)
        mix = {'a': lrange(4), 'b': list('ab..')}
        dfmix = DataFrame(mix)

        # simplest cases
        # regex -> value
        # obj frame
        res = dfobj.replace(r'\s*\.\s*', nan, regex=True)
        assert_frame_equal(dfobj, res.fillna('.'))

        # mixed
        res = dfmix.replace(r'\s*\.\s*', nan, regex=True)
        assert_frame_equal(dfmix, res.fillna('.'))

        # regex -> regex
        # obj frame
        res = dfobj.replace(r'\s*(\.)\s*', r'\1\1\1', regex=True)
        objc = obj.copy()
        objc['a'] = ['a', 'b', '...', '...']
        expec = DataFrame(objc)
        assert_frame_equal(res, expec)

        # with mixed
        res = dfmix.replace(r'\s*(\.)\s*', r'\1\1\1', regex=True)
        mixc = mix.copy()
        mixc['b'] = ['a', 'b', '...', '...']
        expec = DataFrame(mixc)
        assert_frame_equal(res, expec)

        # everything with compiled regexs as well
        res = dfobj.replace(re.compile(r'\s*\.\s*'), nan, regex=True)
        assert_frame_equal(dfobj, res.fillna('.'))

        # mixed
        res = dfmix.replace(re.compile(r'\s*\.\s*'), nan, regex=True)
        assert_frame_equal(dfmix, res.fillna('.'))

        # regex -> regex
        # obj frame
        res = dfobj.replace(re.compile(r'\s*(\.)\s*'), r'\1\1\1')
        objc = obj.copy()
        objc['a'] = ['a', 'b', '...', '...']
        expec = DataFrame(objc)
        assert_frame_equal(res, expec)

        # with mixed
        res = dfmix.replace(re.compile(r'\s*(\.)\s*'), r'\1\1\1')
        mixc = mix.copy()
        mixc['b'] = ['a', 'b', '...', '...']
        expec = DataFrame(mixc)
        assert_frame_equal(res, expec)

        res = dfmix.replace(regex=re.compile(r'\s*(\.)\s*'), value=r'\1\1\1')
        mixc = mix.copy()
        mixc['b'] = ['a', 'b', '...', '...']
        expec = DataFrame(mixc)
        assert_frame_equal(res, expec)

        res = dfmix.replace(regex=r'\s*(\.)\s*', value=r'\1\1\1')
        mixc = mix.copy()
        mixc['b'] = ['a', 'b', '...', '...']
        expec = DataFrame(mixc)
        assert_frame_equal(res, expec)

    def test_regex_replace_scalar_inplace(self):
        obj = {'a': list('ab..'), 'b': list('efgh')}
        dfobj = DataFrame(obj)
        mix = {'a': lrange(4), 'b': list('ab..')}
        dfmix = DataFrame(mix)

        # simplest cases
        # regex -> value
        # obj frame
        res = dfobj.copy()
        res.replace(r'\s*\.\s*', nan, regex=True, inplace=True)
        assert_frame_equal(dfobj, res.fillna('.'))

        # mixed
        res = dfmix.copy()
        res.replace(r'\s*\.\s*', nan, regex=True, inplace=True)
        assert_frame_equal(dfmix, res.fillna('.'))

        # regex -> regex
        # obj frame
        res = dfobj.copy()
        res.replace(r'\s*(\.)\s*', r'\1\1\1', regex=True, inplace=True)
        objc = obj.copy()
        objc['a'] = ['a', 'b', '...', '...']
        expec = DataFrame(objc)
        assert_frame_equal(res, expec)

        # with mixed
        res = dfmix.copy()
        res.replace(r'\s*(\.)\s*', r'\1\1\1', regex=True, inplace=True)
        mixc = mix.copy()
        mixc['b'] = ['a', 'b', '...', '...']
        expec = DataFrame(mixc)
        assert_frame_equal(res, expec)

        # everything with compiled regexs as well
        res = dfobj.copy()
        res.replace(re.compile(r'\s*\.\s*'), nan, regex=True, inplace=True)
        assert_frame_equal(dfobj, res.fillna('.'))

        # mixed
        res = dfmix.copy()
        res.replace(re.compile(r'\s*\.\s*'), nan, regex=True, inplace=True)
        assert_frame_equal(dfmix, res.fillna('.'))

        # regex -> regex
        # obj frame
        res = dfobj.copy()
        res.replace(re.compile(r'\s*(\.)\s*'),
                    r'\1\1\1',
                    regex=True,
                    inplace=True)
        objc = obj.copy()
        objc['a'] = ['a', 'b', '...', '...']
        expec = DataFrame(objc)
        assert_frame_equal(res, expec)

        # with mixed
        res = dfmix.copy()
        res.replace(re.compile(r'\s*(\.)\s*'),
                    r'\1\1\1',
                    regex=True,
                    inplace=True)
        mixc = mix.copy()
        mixc['b'] = ['a', 'b', '...', '...']
        expec = DataFrame(mixc)
        assert_frame_equal(res, expec)

        res = dfobj.copy()
        res.replace(regex=r'\s*\.\s*', value=nan, inplace=True)
        assert_frame_equal(dfobj, res.fillna('.'))

        # mixed
        res = dfmix.copy()
        res.replace(regex=r'\s*\.\s*', value=nan, inplace=True)
        assert_frame_equal(dfmix, res.fillna('.'))

        # regex -> regex
        # obj frame
        res = dfobj.copy()
        res.replace(regex=r'\s*(\.)\s*', value=r'\1\1\1', inplace=True)
        objc = obj.copy()
        objc['a'] = ['a', 'b', '...', '...']
        expec = DataFrame(objc)
        assert_frame_equal(res, expec)

        # with mixed
        res = dfmix.copy()
        res.replace(regex=r'\s*(\.)\s*', value=r'\1\1\1', inplace=True)
        mixc = mix.copy()
        mixc['b'] = ['a', 'b', '...', '...']
        expec = DataFrame(mixc)
        assert_frame_equal(res, expec)

        # everything with compiled regexs as well
        res = dfobj.copy()
        res.replace(regex=re.compile(r'\s*\.\s*'), value=nan, inplace=True)
        assert_frame_equal(dfobj, res.fillna('.'))

        # mixed
        res = dfmix.copy()
        res.replace(regex=re.compile(r'\s*\.\s*'), value=nan, inplace=True)
        assert_frame_equal(dfmix, res.fillna('.'))

        # regex -> regex
        # obj frame
        res = dfobj.copy()
        res.replace(regex=re.compile(r'\s*(\.)\s*'),
                    value=r'\1\1\1',
                    inplace=True)
        objc = obj.copy()
        objc['a'] = ['a', 'b', '...', '...']
        expec = DataFrame(objc)
        assert_frame_equal(res, expec)

        # with mixed
        res = dfmix.copy()
        res.replace(regex=re.compile(r'\s*(\.)\s*'),
                    value=r'\1\1\1',
                    inplace=True)
        mixc = mix.copy()
        mixc['b'] = ['a', 'b', '...', '...']
        expec = DataFrame(mixc)
        assert_frame_equal(res, expec)

    def test_regex_replace_list_obj(self):
        obj = {'a': list('ab..'), 'b': list('efgh'), 'c': list('helo')}
        dfobj = DataFrame(obj)

        # lists of regexes and values
        # list of [re1, re2, ..., reN] -> [v1, v2, ..., vN]
        to_replace_res = [r'\s*\.\s*', r'e|f|g']
        values = [nan, 'crap']
        res = dfobj.replace(to_replace_res, values, regex=True)
        expec = DataFrame({
            'a': ['a', 'b', nan, nan],
            'b': ['crap'] * 3 + ['h'],
            'c': ['h', 'crap', 'l', 'o']
        })
        assert_frame_equal(res, expec)

        # list of [re1, re2, ..., reN] -> [re1, re2, .., reN]
        to_replace_res = [r'\s*(\.)\s*', r'(e|f|g)']
        values = [r'\1\1', r'\1_crap']
        res = dfobj.replace(to_replace_res, values, regex=True)
        expec = DataFrame({
            'a': ['a', 'b', '..', '..'],
            'b': ['e_crap', 'f_crap', 'g_crap', 'h'],
            'c': ['h', 'e_crap', 'l', 'o']
        })

        assert_frame_equal(res, expec)

        # list of [re1, re2, ..., reN] -> [(re1 or v1), (re2 or v2), ..., (reN
        # or vN)]
        to_replace_res = [r'\s*(\.)\s*', r'e']
        values = [r'\1\1', r'crap']
        res = dfobj.replace(to_replace_res, values, regex=True)
        expec = DataFrame({
            'a': ['a', 'b', '..', '..'],
            'b': ['crap', 'f', 'g', 'h'],
            'c': ['h', 'crap', 'l', 'o']
        })
        assert_frame_equal(res, expec)

        to_replace_res = [r'\s*(\.)\s*', r'e']
        values = [r'\1\1', r'crap']
        res = dfobj.replace(value=values, regex=to_replace_res)
        expec = DataFrame({
            'a': ['a', 'b', '..', '..'],
            'b': ['crap', 'f', 'g', 'h'],
            'c': ['h', 'crap', 'l', 'o']
        })
        assert_frame_equal(res, expec)

    def test_regex_replace_list_obj_inplace(self):
        # same as above with inplace=True
        # lists of regexes and values
        obj = {'a': list('ab..'), 'b': list('efgh'), 'c': list('helo')}
        dfobj = DataFrame(obj)

        # lists of regexes and values
        # list of [re1, re2, ..., reN] -> [v1, v2, ..., vN]
        to_replace_res = [r'\s*\.\s*', r'e|f|g']
        values = [nan, 'crap']
        res = dfobj.copy()
        res.replace(to_replace_res, values, inplace=True, regex=True)
        expec = DataFrame({
            'a': ['a', 'b', nan, nan],
            'b': ['crap'] * 3 + ['h'],
            'c': ['h', 'crap', 'l', 'o']
        })
        assert_frame_equal(res, expec)

        # list of [re1, re2, ..., reN] -> [re1, re2, .., reN]
        to_replace_res = [r'\s*(\.)\s*', r'(e|f|g)']
        values = [r'\1\1', r'\1_crap']
        res = dfobj.copy()
        res.replace(to_replace_res, values, inplace=True, regex=True)
        expec = DataFrame({
            'a': ['a', 'b', '..', '..'],
            'b': ['e_crap', 'f_crap', 'g_crap', 'h'],
            'c': ['h', 'e_crap', 'l', 'o']
        })

        assert_frame_equal(res, expec)

        # list of [re1, re2, ..., reN] -> [(re1 or v1), (re2 or v2), ..., (reN
        # or vN)]
        to_replace_res = [r'\s*(\.)\s*', r'e']
        values = [r'\1\1', r'crap']
        res = dfobj.copy()
        res.replace(to_replace_res, values, inplace=True, regex=True)
        expec = DataFrame({
            'a': ['a', 'b', '..', '..'],
            'b': ['crap', 'f', 'g', 'h'],
            'c': ['h', 'crap', 'l', 'o']
        })
        assert_frame_equal(res, expec)

        to_replace_res = [r'\s*(\.)\s*', r'e']
        values = [r'\1\1', r'crap']
        res = dfobj.copy()
        res.replace(value=values, regex=to_replace_res, inplace=True)
        expec = DataFrame({
            'a': ['a', 'b', '..', '..'],
            'b': ['crap', 'f', 'g', 'h'],
            'c': ['h', 'crap', 'l', 'o']
        })
        assert_frame_equal(res, expec)

    def test_regex_replace_list_mixed(self):
        # mixed frame to make sure this doesn't break things
        mix = {'a': lrange(4), 'b': list('ab..')}
        dfmix = DataFrame(mix)

        # lists of regexes and values
        # list of [re1, re2, ..., reN] -> [v1, v2, ..., vN]
        to_replace_res = [r'\s*\.\s*', r'a']
        values = [nan, 'crap']
        mix2 = {'a': lrange(4), 'b': list('ab..'), 'c': list('halo')}
        dfmix2 = DataFrame(mix2)
        res = dfmix2.replace(to_replace_res, values, regex=True)
        expec = DataFrame({
            'a': mix2['a'],
            'b': ['crap', 'b', nan, nan],
            'c': ['h', 'crap', 'l', 'o']
        })
        assert_frame_equal(res, expec)

        # list of [re1, re2, ..., reN] -> [re1, re2, .., reN]
        to_replace_res = [r'\s*(\.)\s*', r'(a|b)']
        values = [r'\1\1', r'\1_crap']
        res = dfmix.replace(to_replace_res, values, regex=True)
        expec = DataFrame({
            'a': mix['a'],
            'b': ['a_crap', 'b_crap', '..', '..']
        })

        assert_frame_equal(res, expec)

        # list of [re1, re2, ..., reN] -> [(re1 or v1), (re2 or v2), ..., (reN
        # or vN)]
        to_replace_res = [r'\s*(\.)\s*', r'a', r'(b)']
        values = [r'\1\1', r'crap', r'\1_crap']
        res = dfmix.replace(to_replace_res, values, regex=True)
        expec = DataFrame({'a': mix['a'], 'b': ['crap', 'b_crap', '..', '..']})
        assert_frame_equal(res, expec)

        to_replace_res = [r'\s*(\.)\s*', r'a', r'(b)']
        values = [r'\1\1', r'crap', r'\1_crap']
        res = dfmix.replace(regex=to_replace_res, value=values)
        expec = DataFrame({'a': mix['a'], 'b': ['crap', 'b_crap', '..', '..']})
        assert_frame_equal(res, expec)

    def test_regex_replace_list_mixed_inplace(self):
        mix = {'a': lrange(4), 'b': list('ab..')}
        dfmix = DataFrame(mix)
        # the same inplace
        # lists of regexes and values
        # list of [re1, re2, ..., reN] -> [v1, v2, ..., vN]
        to_replace_res = [r'\s*\.\s*', r'a']
        values = [nan, 'crap']
        res = dfmix.copy()
        res.replace(to_replace_res, values, inplace=True, regex=True)
        expec = DataFrame({'a': mix['a'], 'b': ['crap', 'b', nan, nan]})
        assert_frame_equal(res, expec)

        # list of [re1, re2, ..., reN] -> [re1, re2, .., reN]
        to_replace_res = [r'\s*(\.)\s*', r'(a|b)']
        values = [r'\1\1', r'\1_crap']
        res = dfmix.copy()
        res.replace(to_replace_res, values, inplace=True, regex=True)
        expec = DataFrame({
            'a': mix['a'],
            'b': ['a_crap', 'b_crap', '..', '..']
        })

        assert_frame_equal(res, expec)

        # list of [re1, re2, ..., reN] -> [(re1 or v1), (re2 or v2), ..., (reN
        # or vN)]
        to_replace_res = [r'\s*(\.)\s*', r'a', r'(b)']
        values = [r'\1\1', r'crap', r'\1_crap']
        res = dfmix.copy()
        res.replace(to_replace_res, values, inplace=True, regex=True)
        expec = DataFrame({'a': mix['a'], 'b': ['crap', 'b_crap', '..', '..']})
        assert_frame_equal(res, expec)

        to_replace_res = [r'\s*(\.)\s*', r'a', r'(b)']
        values = [r'\1\1', r'crap', r'\1_crap']
        res = dfmix.copy()
        res.replace(regex=to_replace_res, value=values, inplace=True)
        expec = DataFrame({'a': mix['a'], 'b': ['crap', 'b_crap', '..', '..']})
        assert_frame_equal(res, expec)

    def test_regex_replace_dict_mixed(self):
        mix = {'a': lrange(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']}
        dfmix = DataFrame(mix)

        # dicts
        # single dict {re1: v1}, search the whole frame
        # need test for this...

        # list of dicts {re1: v1, re2: v2, ..., re3: v3}, search the whole
        # frame
        res = dfmix.replace({'b': r'\s*\.\s*'}, {'b': nan}, regex=True)
        res2 = dfmix.copy()
        res2.replace({'b': r'\s*\.\s*'}, {'b': nan}, inplace=True, regex=True)
        expec = DataFrame({
            'a': mix['a'],
            'b': ['a', 'b', nan, nan],
            'c': mix['c']
        })
        assert_frame_equal(res, expec)
        assert_frame_equal(res2, expec)

        # list of dicts {re1: re11, re2: re12, ..., reN: re1N}, search the
        # whole frame
        res = dfmix.replace({'b': r'\s*(\.)\s*'}, {'b': r'\1ty'}, regex=True)
        res2 = dfmix.copy()
        res2.replace({'b': r'\s*(\.)\s*'}, {'b': r'\1ty'},
                     inplace=True,
                     regex=True)
        expec = DataFrame({
            'a': mix['a'],
            'b': ['a', 'b', '.ty', '.ty'],
            'c': mix['c']
        })
        assert_frame_equal(res, expec)
        assert_frame_equal(res2, expec)

        res = dfmix.replace(regex={'b': r'\s*(\.)\s*'}, value={'b': r'\1ty'})
        res2 = dfmix.copy()
        res2.replace(regex={'b': r'\s*(\.)\s*'},
                     value={'b': r'\1ty'},
                     inplace=True)
        expec = DataFrame({
            'a': mix['a'],
            'b': ['a', 'b', '.ty', '.ty'],
            'c': mix['c']
        })
        assert_frame_equal(res, expec)
        assert_frame_equal(res2, expec)

        # scalar -> dict
        # to_replace regex, {value: value}
        expec = DataFrame({
            'a': mix['a'],
            'b': [nan, 'b', '.', '.'],
            'c': mix['c']
        })
        res = dfmix.replace('a', {'b': nan}, regex=True)
        res2 = dfmix.copy()
        res2.replace('a', {'b': nan}, regex=True, inplace=True)
        assert_frame_equal(res, expec)
        assert_frame_equal(res2, expec)

        res = dfmix.replace('a', {'b': nan}, regex=True)
        res2 = dfmix.copy()
        res2.replace(regex='a', value={'b': nan}, inplace=True)
        expec = DataFrame({
            'a': mix['a'],
            'b': [nan, 'b', '.', '.'],
            'c': mix['c']
        })
        assert_frame_equal(res, expec)
        assert_frame_equal(res2, expec)

    def test_regex_replace_dict_nested(self):
        # nested dicts will not work until this is implemented for Series
        mix = {'a': lrange(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']}
        dfmix = DataFrame(mix)
        res = dfmix.replace({'b': {r'\s*\.\s*': nan}}, regex=True)
        res2 = dfmix.copy()
        res4 = dfmix.copy()
        res2.replace({'b': {r'\s*\.\s*': nan}}, inplace=True, regex=True)
        res3 = dfmix.replace(regex={'b': {r'\s*\.\s*': nan}})
        res4.replace(regex={'b': {r'\s*\.\s*': nan}}, inplace=True)
        expec = DataFrame({
            'a': mix['a'],
            'b': ['a', 'b', nan, nan],
            'c': mix['c']
        })
        assert_frame_equal(res, expec)
        assert_frame_equal(res2, expec)
        assert_frame_equal(res3, expec)
        assert_frame_equal(res4, expec)

    def test_regex_replace_dict_nested_gh4115(self):
        df = pd.DataFrame({'Type': ['Q', 'T', 'Q', 'Q', 'T'], 'tmp': 2})
        expected = DataFrame({'Type': [0, 1, 0, 0, 1], 'tmp': 2})
        result = df.replace({'Type': {'Q': 0, 'T': 1}})
        assert_frame_equal(result, expected)

    def test_regex_replace_list_to_scalar(self):
        mix = {'a': lrange(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']}
        df = DataFrame(mix)
        expec = DataFrame({
            'a': mix['a'],
            'b': np.array([nan] * 4),
            'c': [nan, nan, nan, 'd']
        })

        res = df.replace([r'\s*\.\s*', 'a|b'], nan, regex=True)
        res2 = df.copy()
        res3 = df.copy()
        res2.replace([r'\s*\.\s*', 'a|b'], nan, regex=True, inplace=True)
        res3.replace(regex=[r'\s*\.\s*', 'a|b'], value=nan, inplace=True)
        assert_frame_equal(res, expec)
        assert_frame_equal(res2, expec)
        assert_frame_equal(res3, expec)

    def test_regex_replace_str_to_numeric(self):
        # what happens when you try to replace a numeric value with a regex?
        mix = {'a': lrange(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']}
        df = DataFrame(mix)
        res = df.replace(r'\s*\.\s*', 0, regex=True)
        res2 = df.copy()
        res2.replace(r'\s*\.\s*', 0, inplace=True, regex=True)
        res3 = df.copy()
        res3.replace(regex=r'\s*\.\s*', value=0, inplace=True)
        expec = DataFrame({
            'a': mix['a'],
            'b': ['a', 'b', 0, 0],
            'c': mix['c']
        })
        assert_frame_equal(res, expec)
        assert_frame_equal(res2, expec)
        assert_frame_equal(res3, expec)

    def test_regex_replace_regex_list_to_numeric(self):
        mix = {'a': lrange(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']}
        df = DataFrame(mix)
        res = df.replace([r'\s*\.\s*', 'b'], 0, regex=True)
        res2 = df.copy()
        res2.replace([r'\s*\.\s*', 'b'], 0, regex=True, inplace=True)
        res3 = df.copy()
        res3.replace(regex=[r'\s*\.\s*', 'b'], value=0, inplace=True)
        expec = DataFrame({
            'a': mix['a'],
            'b': ['a', 0, 0, 0],
            'c': ['a', 0, nan, 'd']
        })
        assert_frame_equal(res, expec)
        assert_frame_equal(res2, expec)
        assert_frame_equal(res3, expec)

    def test_regex_replace_series_of_regexes(self):
        mix = {'a': lrange(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']}
        df = DataFrame(mix)
        s1 = Series({'b': r'\s*\.\s*'})
        s2 = Series({'b': nan})
        res = df.replace(s1, s2, regex=True)
        res2 = df.copy()
        res2.replace(s1, s2, inplace=True, regex=True)
        res3 = df.copy()
        res3.replace(regex=s1, value=s2, inplace=True)
        expec = DataFrame({
            'a': mix['a'],
            'b': ['a', 'b', nan, nan],
            'c': mix['c']
        })
        assert_frame_equal(res, expec)
        assert_frame_equal(res2, expec)
        assert_frame_equal(res3, expec)

    def test_regex_replace_numeric_to_object_conversion(self):
        mix = {'a': lrange(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']}
        df = DataFrame(mix)
        expec = DataFrame({'a': ['a', 1, 2, 3], 'b': mix['b'], 'c': mix['c']})
        res = df.replace(0, 'a')
        assert_frame_equal(res, expec)
        assert res.a.dtype == np.object_

    @pytest.mark.parametrize('metachar', ['[]', '()', r'\d', r'\w', r'\s'])
    def test_replace_regex_metachar(self, metachar):
        df = DataFrame({'a': [metachar, 'else']})
        result = df.replace({'a': {metachar: 'paren'}})
        expected = DataFrame({'a': ['paren', 'else']})
        assert_frame_equal(result, expected)

    def test_replace(self):
        self.tsframe['A'][:5] = nan
        self.tsframe['A'][-5:] = nan

        zero_filled = self.tsframe.replace(nan, -1e8)
        assert_frame_equal(zero_filled, self.tsframe.fillna(-1e8))
        assert_frame_equal(zero_filled.replace(-1e8, nan), self.tsframe)

        self.tsframe['A'][:5] = nan
        self.tsframe['A'][-5:] = nan
        self.tsframe['B'][:5] = -1e8

        # empty
        df = DataFrame(index=['a', 'b'])
        assert_frame_equal(df, df.replace(5, 7))

        # GH 11698
        # test for mixed data types.
        df = pd.DataFrame([('-', pd.to_datetime('20150101')),
                           ('a', pd.to_datetime('20150102'))])
        df1 = df.replace('-', np.nan)
        expected_df = pd.DataFrame([(np.nan, pd.to_datetime('20150101')),
                                    ('a', pd.to_datetime('20150102'))])
        assert_frame_equal(df1, expected_df)

    def test_replace_list(self):
        obj = {'a': list('ab..'), 'b': list('efgh'), 'c': list('helo')}
        dfobj = DataFrame(obj)

        # lists of regexes and values
        # list of [v1, v2, ..., vN] -> [v1, v2, ..., vN]
        to_replace_res = [r'.', r'e']
        values = [nan, 'crap']
        res = dfobj.replace(to_replace_res, values)
        expec = DataFrame({
            'a': ['a', 'b', nan, nan],
            'b': ['crap', 'f', 'g', 'h'],
            'c': ['h', 'crap', 'l', 'o']
        })
        assert_frame_equal(res, expec)

        # list of [v1, v2, ..., vN] -> [v1, v2, .., vN]
        to_replace_res = [r'.', r'f']
        values = [r'..', r'crap']
        res = dfobj.replace(to_replace_res, values)
        expec = DataFrame({
            'a': ['a', 'b', '..', '..'],
            'b': ['e', 'crap', 'g', 'h'],
            'c': ['h', 'e', 'l', 'o']
        })

        assert_frame_equal(res, expec)

    def test_replace_with_empty_list(self):
        # GH 21977
        s = pd.Series([['a', 'b'], [], np.nan, [1]])
        df = pd.DataFrame({'col': s})
        expected = df
        result = df.replace([], np.nan)
        assert_frame_equal(result, expected)

        # GH 19266
        with tm.assert_raises_regex(ValueError, "cannot assign mismatch"):
            df.replace({np.nan: []})
        with tm.assert_raises_regex(ValueError, "cannot assign mismatch"):
            df.replace({np.nan: ['dummy', 'alt']})

    def test_replace_series_dict(self):
        # from GH 3064
        df = DataFrame({'zero': {'a': 0.0, 'b': 1}, 'one': {'a': 2.0, 'b': 0}})
        result = df.replace(0, {'zero': 0.5, 'one': 1.0})
        expected = DataFrame({
            'zero': {
                'a': 0.5,
                'b': 1
            },
            'one': {
                'a': 2.0,
                'b': 1.0
            }
        })
        assert_frame_equal(result, expected)

        result = df.replace(0, df.mean())
        assert_frame_equal(result, expected)

        # series to series/dict
        df = DataFrame({'zero': {'a': 0.0, 'b': 1}, 'one': {'a': 2.0, 'b': 0}})
        s = Series({'zero': 0.0, 'one': 2.0})
        result = df.replace(s, {'zero': 0.5, 'one': 1.0})
        expected = DataFrame({
            'zero': {
                'a': 0.5,
                'b': 1
            },
            'one': {
                'a': 1.0,
                'b': 0.0
            }
        })
        assert_frame_equal(result, expected)

        result = df.replace(s, df.mean())
        assert_frame_equal(result, expected)

    def test_replace_convert(self):
        # gh 3907
        df = DataFrame([['foo', 'bar', 'bah'], ['bar', 'foo', 'bah']])
        m = {'foo': 1, 'bar': 2, 'bah': 3}
        rep = df.replace(m)
        expec = Series([np.int64] * 3)
        res = rep.dtypes
        assert_series_equal(expec, res)

    def test_replace_mixed(self):
        mf = self.mixed_frame
        mf.iloc[5:20, mf.columns.get_loc('foo')] = nan
        mf.iloc[-10:, mf.columns.get_loc('A')] = nan

        result = self.mixed_frame.replace(np.nan, -18)
        expected = self.mixed_frame.fillna(value=-18)
        assert_frame_equal(result, expected)
        assert_frame_equal(result.replace(-18, nan), self.mixed_frame)

        result = self.mixed_frame.replace(np.nan, -1e8)
        expected = self.mixed_frame.fillna(value=-1e8)
        assert_frame_equal(result, expected)
        assert_frame_equal(result.replace(-1e8, nan), self.mixed_frame)

        # int block upcasting
        df = DataFrame({
            'A': Series([1.0, 2.0], dtype='float64'),
            'B': Series([0, 1], dtype='int64')
        })
        expected = DataFrame({
            'A': Series([1.0, 2.0], dtype='float64'),
            'B': Series([0.5, 1], dtype='float64')
        })
        result = df.replace(0, 0.5)
        assert_frame_equal(result, expected)

        df.replace(0, 0.5, inplace=True)
        assert_frame_equal(df, expected)

        # int block splitting
        df = DataFrame({
            'A': Series([1.0, 2.0], dtype='float64'),
            'B': Series([0, 1], dtype='int64'),
            'C': Series([1, 2], dtype='int64')
        })
        expected = DataFrame({
            'A': Series([1.0, 2.0], dtype='float64'),
            'B': Series([0.5, 1], dtype='float64'),
            'C': Series([1, 2], dtype='int64')
        })
        result = df.replace(0, 0.5)
        assert_frame_equal(result, expected)

        # to object block upcasting
        df = DataFrame({
            'A': Series([1.0, 2.0], dtype='float64'),
            'B': Series([0, 1], dtype='int64')
        })
        expected = DataFrame({
            'A': Series([1, 'foo'], dtype='object'),
            'B': Series([0, 1], dtype='int64')
        })
        result = df.replace(2, 'foo')
        assert_frame_equal(result, expected)

        expected = DataFrame({
            'A': Series(['foo', 'bar'], dtype='object'),
            'B': Series([0, 'foo'], dtype='object')
        })
        result = df.replace([1, 2], ['foo', 'bar'])
        assert_frame_equal(result, expected)

        # test case from
        df = DataFrame({
            'A': Series([3, 0], dtype='int64'),
            'B': Series([0, 3], dtype='int64')
        })
        result = df.replace(3, df.mean().to_dict())
        expected = df.copy().astype('float64')
        m = df.mean()
        expected.iloc[0, 0] = m[0]
        expected.iloc[1, 1] = m[1]
        assert_frame_equal(result, expected)

    def test_replace_simple_nested_dict(self):
        df = DataFrame({'col': range(1, 5)})
        expected = DataFrame({'col': ['a', 2, 3, 'b']})

        result = df.replace({'col': {1: 'a', 4: 'b'}})
        assert_frame_equal(expected, result)

        # in this case, should be the same as the not nested version
        result = df.replace({1: 'a', 4: 'b'})
        assert_frame_equal(expected, result)

    def test_replace_simple_nested_dict_with_nonexistent_value(self):
        df = DataFrame({'col': range(1, 5)})
        expected = DataFrame({'col': ['a', 2, 3, 'b']})

        result = df.replace({-1: '-', 1: 'a', 4: 'b'})
        assert_frame_equal(expected, result)

        result = df.replace({'col': {-1: '-', 1: 'a', 4: 'b'}})
        assert_frame_equal(expected, result)

    def test_replace_value_is_none(self):
        orig_value = self.tsframe.iloc[0, 0]
        orig2 = self.tsframe.iloc[1, 0]

        self.tsframe.iloc[0, 0] = nan
        self.tsframe.iloc[1, 0] = 1

        result = self.tsframe.replace(to_replace={nan: 0})
        expected = self.tsframe.T.replace(to_replace={nan: 0}).T
        assert_frame_equal(result, expected)

        result = self.tsframe.replace(to_replace={nan: 0, 1: -1e8})
        tsframe = self.tsframe.copy()
        tsframe.iloc[0, 0] = 0
        tsframe.iloc[1, 0] = -1e8
        expected = tsframe
        assert_frame_equal(expected, result)
        self.tsframe.iloc[0, 0] = orig_value
        self.tsframe.iloc[1, 0] = orig2

    def test_replace_for_new_dtypes(self):

        # dtypes
        tsframe = self.tsframe.copy().astype(np.float32)
        tsframe['A'][:5] = nan
        tsframe['A'][-5:] = nan

        zero_filled = tsframe.replace(nan, -1e8)
        assert_frame_equal(zero_filled, tsframe.fillna(-1e8))
        assert_frame_equal(zero_filled.replace(-1e8, nan), tsframe)

        tsframe['A'][:5] = nan
        tsframe['A'][-5:] = nan
        tsframe['B'][:5] = -1e8

        b = tsframe['B']
        b[b == -1e8] = nan
        tsframe['B'] = b
        result = tsframe.fillna(method='bfill')
        assert_frame_equal(result, tsframe.fillna(method='bfill'))

    @pytest.mark.parametrize(
        'frame, to_replace, value, expected',
        [
            (DataFrame({'ints': [1, 2, 3]
                        }), 1, 0, DataFrame({'ints': [0, 2, 3]})),
            (DataFrame({'ints': [1, 2, 3]}, dtype=np.int32), 1, 0,
             DataFrame({'ints': [0, 2, 3]}, dtype=np.int32)),
            (DataFrame({'ints': [1, 2, 3]}, dtype=np.int16), 1, 0,
             DataFrame({'ints': [0, 2, 3]}, dtype=np.int16)),
            (DataFrame({'bools': [True, False, True]}), False, True,
             DataFrame({'bools': [True, True, True]})),
            (DataFrame({'complex': [1j, 2j, 3j]
                        }), 1j, 0, DataFrame({'complex': [0j, 2j, 3j]})),
            (DataFrame({
                'datetime64':
                Index([
                    datetime(2018, 5, 28),
                    datetime(2018, 7, 28),
                    datetime(2018, 5, 28)
                ])
            }), datetime(2018, 5, 28), datetime(2018, 7, 28),
             DataFrame({'datetime64': Index([datetime(2018, 7, 28)] * 3)})),
            # GH 20380
            (DataFrame({
                'dt': [datetime(3017, 12, 20)],
                'str': ['foo']
            }), 'foo', 'bar',
             DataFrame({
                 'dt': [datetime(3017, 12, 20)],
                 'str': ['bar']
             })),
            (DataFrame({
                'A': date_range('20130101', periods=3, tz='US/Eastern'),
                'B': [0, np.nan, 2]
            }), Timestamp('20130102', tz='US/Eastern'),
             Timestamp('20130104', tz='US/Eastern'),
             DataFrame({
                 'A': [
                     Timestamp('20130101', tz='US/Eastern'),
                     Timestamp('20130104', tz='US/Eastern'),
                     Timestamp('20130103', tz='US/Eastern')
                 ],
                 'B': [0, np.nan, 2]
             }))
        ])
    def test_replace_dtypes(self, frame, to_replace, value, expected):
        result = getattr(frame, 'replace')(to_replace, value)
        assert_frame_equal(result, expected)

    def test_replace_input_formats_listlike(self):
        # both dicts
        to_rep = {'A': np.nan, 'B': 0, 'C': ''}
        values = {'A': 0, 'B': -1, 'C': 'missing'}
        df = DataFrame({
            'A': [np.nan, 0, np.inf],
            'B': [0, 2, 5],
            'C': ['', 'asdf', 'fd']
        })
        filled = df.replace(to_rep, values)
        expected = {}
        for k, v in compat.iteritems(df):
            expected[k] = v.replace(to_rep[k], values[k])
        assert_frame_equal(filled, DataFrame(expected))

        result = df.replace([0, 2, 5], [5, 2, 0])
        expected = DataFrame({
            'A': [np.nan, 5, np.inf],
            'B': [5, 2, 0],
            'C': ['', 'asdf', 'fd']
        })
        assert_frame_equal(result, expected)

        # scalar to dict
        values = {'A': 0, 'B': -1, 'C': 'missing'}
        df = DataFrame({
            'A': [np.nan, 0, np.nan],
            'B': [0, 2, 5],
            'C': ['', 'asdf', 'fd']
        })
        filled = df.replace(np.nan, values)
        expected = {}
        for k, v in compat.iteritems(df):
            expected[k] = v.replace(np.nan, values[k])
        assert_frame_equal(filled, DataFrame(expected))

        # list to list
        to_rep = [np.nan, 0, '']
        values = [-2, -1, 'missing']
        result = df.replace(to_rep, values)
        expected = df.copy()
        for i in range(len(to_rep)):
            expected.replace(to_rep[i], values[i], inplace=True)
        assert_frame_equal(result, expected)

        pytest.raises(ValueError, df.replace, to_rep, values[1:])

    def test_replace_input_formats_scalar(self):
        df = DataFrame({
            'A': [np.nan, 0, np.inf],
            'B': [0, 2, 5],
            'C': ['', 'asdf', 'fd']
        })

        # dict to scalar
        to_rep = {'A': np.nan, 'B': 0, 'C': ''}
        filled = df.replace(to_rep, 0)
        expected = {}
        for k, v in compat.iteritems(df):
            expected[k] = v.replace(to_rep[k], 0)
        assert_frame_equal(filled, DataFrame(expected))

        pytest.raises(TypeError, df.replace, to_rep, [np.nan, 0, ''])

        # list to scalar
        to_rep = [np.nan, 0, '']
        result = df.replace(to_rep, -1)
        expected = df.copy()
        for i in range(len(to_rep)):
            expected.replace(to_rep[i], -1, inplace=True)
        assert_frame_equal(result, expected)

    def test_replace_limit(self):
        pass

    def test_replace_dict_no_regex(self):
        answer = Series({
            0: 'Strongly Agree',
            1: 'Agree',
            2: 'Neutral',
            3: 'Disagree',
            4: 'Strongly Disagree'
        })
        weights = {
            'Agree': 4,
            'Disagree': 2,
            'Neutral': 3,
            'Strongly Agree': 5,
            'Strongly Disagree': 1
        }
        expected = Series({0: 5, 1: 4, 2: 3, 3: 2, 4: 1})
        result = answer.replace(weights)
        assert_series_equal(result, expected)

    def test_replace_series_no_regex(self):
        answer = Series({
            0: 'Strongly Agree',
            1: 'Agree',
            2: 'Neutral',
            3: 'Disagree',
            4: 'Strongly Disagree'
        })
        weights = Series({
            'Agree': 4,
            'Disagree': 2,
            'Neutral': 3,
            'Strongly Agree': 5,
            'Strongly Disagree': 1
        })
        expected = Series({0: 5, 1: 4, 2: 3, 3: 2, 4: 1})
        result = answer.replace(weights)
        assert_series_equal(result, expected)

    def test_replace_dict_tuple_list_ordering_remains_the_same(self):
        df = DataFrame(dict(A=[nan, 1]))
        res1 = df.replace(to_replace={nan: 0, 1: -1e8})
        res2 = df.replace(to_replace=(1, nan), value=[-1e8, 0])
        res3 = df.replace(to_replace=[1, nan], value=[-1e8, 0])

        expected = DataFrame({'A': [0, -1e8]})
        assert_frame_equal(res1, res2)
        assert_frame_equal(res2, res3)
        assert_frame_equal(res3, expected)

    def test_replace_doesnt_replace_without_regex(self):
        raw = """fol T_opp T_Dir T_Enh
        0    1     0     0    vo
        1    2    vr     0     0
        2    2     0     0     0
        3    3     0    bt     0"""
        df = pd.read_csv(StringIO(raw), sep=r'\s+')
        res = df.replace({r'\D': 1})
        assert_frame_equal(df, res)

    def test_replace_bool_with_string(self):
        df = DataFrame({'a': [True, False], 'b': list('ab')})
        result = df.replace(True, 'a')
        expected = DataFrame({'a': ['a', False], 'b': df.b})
        assert_frame_equal(result, expected)

    def test_replace_pure_bool_with_string_no_op(self):
        df = DataFrame(np.random.rand(2, 2) > 0.5)
        result = df.replace('asdf', 'fdsa')
        assert_frame_equal(df, result)

    def test_replace_bool_with_bool(self):
        df = DataFrame(np.random.rand(2, 2) > 0.5)
        result = df.replace(False, True)
        expected = DataFrame(np.ones((2, 2), dtype=bool))
        assert_frame_equal(result, expected)

    def test_replace_with_dict_with_bool_keys(self):
        df = DataFrame({0: [True, False], 1: [False, True]})
        with tm.assert_raises_regex(TypeError, 'Cannot compare types .+'):
            df.replace({'asdf': 'asdb', True: 'yes'})

    def test_replace_truthy(self):
        df = DataFrame({'a': [True, True]})
        r = df.replace([np.inf, -np.inf], np.nan)
        e = df
        assert_frame_equal(r, e)

    def test_replace_int_to_int_chain(self):
        df = DataFrame({'a': lrange(1, 5)})
        with tm.assert_raises_regex(ValueError, "Replacement not allowed .+"):
            df.replace({'a': dict(zip(range(1, 5), range(2, 6)))})

    def test_replace_str_to_str_chain(self):
        a = np.arange(1, 5)
        astr = a.astype(str)
        bstr = np.arange(2, 6).astype(str)
        df = DataFrame({'a': astr})
        with tm.assert_raises_regex(ValueError, "Replacement not allowed .+"):
            df.replace({'a': dict(zip(astr, bstr))})

    def test_replace_swapping_bug(self):
        df = pd.DataFrame({'a': [True, False, True]})
        res = df.replace({'a': {True: 'Y', False: 'N'}})
        expect = pd.DataFrame({'a': ['Y', 'N', 'Y']})
        assert_frame_equal(res, expect)

        df = pd.DataFrame({'a': [0, 1, 0]})
        res = df.replace({'a': {0: 'Y', 1: 'N'}})
        expect = pd.DataFrame({'a': ['Y', 'N', 'Y']})
        assert_frame_equal(res, expect)

    def test_replace_period(self):
        d = {
            'fname': {
                'out_augmented_AUG_2011.json':
                pd.Period(year=2011, month=8, freq='M'),
                'out_augmented_JAN_2011.json':
                pd.Period(year=2011, month=1, freq='M'),
                'out_augmented_MAY_2012.json':
                pd.Period(year=2012, month=5, freq='M'),
                'out_augmented_SUBSIDY_WEEK.json':
                pd.Period(year=2011, month=4, freq='M'),
                'out_augmented_AUG_2012.json':
                pd.Period(year=2012, month=8, freq='M'),
                'out_augmented_MAY_2011.json':
                pd.Period(year=2011, month=5, freq='M'),
                'out_augmented_SEP_2013.json':
                pd.Period(year=2013, month=9, freq='M')
            }
        }

        df = pd.DataFrame([
            'out_augmented_AUG_2012.json', 'out_augmented_SEP_2013.json',
            'out_augmented_SUBSIDY_WEEK.json', 'out_augmented_MAY_2012.json',
            'out_augmented_MAY_2011.json', 'out_augmented_AUG_2011.json',
            'out_augmented_JAN_2011.json'
        ],
                          columns=['fname'])
        assert set(df.fname.values) == set(d['fname'].keys())
        expected = DataFrame(
            {'fname': [d['fname'][k] for k in df.fname.values]})
        result = df.replace(d)
        assert_frame_equal(result, expected)

    def test_replace_datetime(self):
        d = {
            'fname': {
                'out_augmented_AUG_2011.json': pd.Timestamp('2011-08'),
                'out_augmented_JAN_2011.json': pd.Timestamp('2011-01'),
                'out_augmented_MAY_2012.json': pd.Timestamp('2012-05'),
                'out_augmented_SUBSIDY_WEEK.json': pd.Timestamp('2011-04'),
                'out_augmented_AUG_2012.json': pd.Timestamp('2012-08'),
                'out_augmented_MAY_2011.json': pd.Timestamp('2011-05'),
                'out_augmented_SEP_2013.json': pd.Timestamp('2013-09')
            }
        }

        df = pd.DataFrame([
            'out_augmented_AUG_2012.json', 'out_augmented_SEP_2013.json',
            'out_augmented_SUBSIDY_WEEK.json', 'out_augmented_MAY_2012.json',
            'out_augmented_MAY_2011.json', 'out_augmented_AUG_2011.json',
            'out_augmented_JAN_2011.json'
        ],
                          columns=['fname'])
        assert set(df.fname.values) == set(d['fname'].keys())
        expected = DataFrame(
            {'fname': [d['fname'][k] for k in df.fname.values]})
        result = df.replace(d)
        assert_frame_equal(result, expected)

    def test_replace_datetimetz(self):

        # GH 11326
        # behaving poorly when presented with a datetime64[ns, tz]
        df = DataFrame({
            'A': date_range('20130101', periods=3, tz='US/Eastern'),
            'B': [0, np.nan, 2]
        })
        result = df.replace(np.nan, 1)
        expected = DataFrame({
            'A':
            date_range('20130101', periods=3, tz='US/Eastern'),
            'B':
            Series([0, 1, 2], dtype='float64')
        })
        assert_frame_equal(result, expected)

        result = df.fillna(1)
        assert_frame_equal(result, expected)

        result = df.replace(0, np.nan)
        expected = DataFrame({
            'A':
            date_range('20130101', periods=3, tz='US/Eastern'),
            'B': [np.nan, np.nan, 2]
        })
        assert_frame_equal(result, expected)

        result = df.replace(Timestamp('20130102', tz='US/Eastern'),
                            Timestamp('20130104', tz='US/Eastern'))
        expected = DataFrame({
            'A': [
                Timestamp('20130101', tz='US/Eastern'),
                Timestamp('20130104', tz='US/Eastern'),
                Timestamp('20130103', tz='US/Eastern')
            ],
            'B': [0, np.nan, 2]
        })
        assert_frame_equal(result, expected)

        result = df.copy()
        result.iloc[1, 0] = np.nan
        result = result.replace({'A': pd.NaT},
                                Timestamp('20130104', tz='US/Eastern'))
        assert_frame_equal(result, expected)

        # coerce to object
        result = df.copy()
        result.iloc[1, 0] = np.nan
        result = result.replace({'A': pd.NaT},
                                Timestamp('20130104', tz='US/Pacific'))
        expected = DataFrame({
            'A': [
                Timestamp('20130101', tz='US/Eastern'),
                Timestamp('20130104', tz='US/Pacific'),
                Timestamp('20130103', tz='US/Eastern')
            ],
            'B': [0, np.nan, 2]
        })
        assert_frame_equal(result, expected)

        result = df.copy()
        result.iloc[1, 0] = np.nan
        result = result.replace({'A': np.nan}, Timestamp('20130104'))
        expected = DataFrame({
            'A': [
                Timestamp('20130101', tz='US/Eastern'),
                Timestamp('20130104'),
                Timestamp('20130103', tz='US/Eastern')
            ],
            'B': [0, np.nan, 2]
        })
        assert_frame_equal(result, expected)

    def test_replace_with_empty_dictlike(self):
        # GH 15289
        mix = {'a': lrange(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']}
        df = DataFrame(mix)
        assert_frame_equal(df, df.replace({}))
        assert_frame_equal(df, df.replace(Series([])))

        assert_frame_equal(df, df.replace({'b': {}}))
        assert_frame_equal(df, df.replace(Series({'b': {}})))

    @pytest.mark.parametrize("to_replace, method, expected", [
        (0, 'bfill', {
            'A': [1, 1, 2],
            'B': [5, nan, 7],
            'C': ['a', 'b', 'c']
        }),
        (nan, 'bfill', {
            'A': [0, 1, 2],
            'B': [5.0, 7.0, 7.0],
            'C': ['a', 'b', 'c']
        }),
        ('d', 'ffill', {
            'A': [0, 1, 2],
            'B': [5, nan, 7],
            'C': ['a', 'b', 'c']
        }),
        ([0, 2], 'bfill', {
            'A': [1, 1, 2],
            'B': [5, nan, 7],
            'C': ['a', 'b', 'c']
        }),
        ([1, 2], 'pad', {
            'A': [0, 0, 0],
            'B': [5, nan, 7],
            'C': ['a', 'b', 'c']
        }),
        ((1, 2), 'bfill', {
            'A': [0, 2, 2],
            'B': [5, nan, 7],
            'C': ['a', 'b', 'c']
        }),
        (['b', 'c'], 'ffill', {
            'A': [0, 1, 2],
            'B': [5, nan, 7],
            'C': ['a', 'a', 'a']
        }),
    ])
    def test_replace_method(self, to_replace, method, expected):
        # GH 19632
        df = DataFrame({
            'A': [0, 1, 2],
            'B': [5, nan, 7],
            'C': ['a', 'b', 'c']
        })

        result = df.replace(to_replace=to_replace, value=None, method=method)
        expected = DataFrame(expected)
        assert_frame_equal(result, expected)
示例#15
0
    def test_interleave(self):

        # interleave with object
        result = self.tzframe.assign(D='foo').values
        expected = np.array(
            [[
                Timestamp('2013-01-01 00:00:00'),
                Timestamp('2013-01-02 00:00:00'),
                Timestamp('2013-01-03 00:00:00')
            ],
             [
                 Timestamp('2013-01-01 00:00:00-0500', tz='US/Eastern'),
                 pd.NaT,
                 Timestamp('2013-01-03 00:00:00-0500', tz='US/Eastern')
             ],
             [
                 Timestamp('2013-01-01 00:00:00+0100', tz='CET'), pd.NaT,
                 Timestamp('2013-01-03 00:00:00+0100', tz='CET')
             ], ['foo', 'foo', 'foo']],
            dtype=object).T
        self.assert_numpy_array_equal(result, expected)

        # interleave with only datetime64[ns]
        result = self.tzframe.values
        expected = np.array(
            [[
                Timestamp('2013-01-01 00:00:00'),
                Timestamp('2013-01-02 00:00:00'),
                Timestamp('2013-01-03 00:00:00')
            ],
             [
                 Timestamp('2013-01-01 00:00:00-0500', tz='US/Eastern'),
                 pd.NaT,
                 Timestamp('2013-01-03 00:00:00-0500', tz='US/Eastern')
             ],
             [
                 Timestamp('2013-01-01 00:00:00+0100', tz='CET'), pd.NaT,
                 Timestamp('2013-01-03 00:00:00+0100', tz='CET')
             ]],
            dtype=object).T
        self.assert_numpy_array_equal(result, expected)
 def __init__(self,
              start=Timestamp('2000-01-01', tz='UTC'),
              end=end_default):
     super(QuantopianUSFuturesCalendar, self).__init__(start=start, end=end)
示例#17
0
def test_count_nonnumeric_types():
    # GH12541
    cols = [
        "int",
        "float",
        "string",
        "datetime",
        "timedelta",
        "periods",
        "fl_inf",
        "fl_nan",
        "str_nan",
        "dt_nat",
        "periods_nat",
    ]
    dt_nat_col = [
        Timestamp("20170101"),
        Timestamp("20170203"),
        Timestamp(None)
    ]

    df = DataFrame(
        {
            "int": [1, 2, 3],
            "float": [4.0, 5.0, 6.0],
            "string":
            list("abc"),
            "datetime":
            pd.date_range("20170101", periods=3),
            "timedelta":
            pd.timedelta_range("1 s", periods=3, freq="s"),
            "periods": [
                pd.Period("2012-01"),
                pd.Period("2012-02"),
                pd.Period("2012-03"),
            ],
            "fl_inf": [1.0, 2.0, np.Inf],
            "fl_nan": [1.0, 2.0, np.NaN],
            "str_nan": ["aa", "bb", np.NaN],
            "dt_nat":
            dt_nat_col,
            "periods_nat": [
                pd.Period("2012-01"),
                pd.Period("2012-02"),
                pd.Period(None),
            ],
        },
        columns=cols,
    )

    expected = DataFrame(
        {
            "int": [1.0, 2.0, 2.0],
            "float": [1.0, 2.0, 2.0],
            "string": [1.0, 2.0, 2.0],
            "datetime": [1.0, 2.0, 2.0],
            "timedelta": [1.0, 2.0, 2.0],
            "periods": [1.0, 2.0, 2.0],
            "fl_inf": [1.0, 2.0, 2.0],
            "fl_nan": [1.0, 2.0, 1.0],
            "str_nan": [1.0, 2.0, 1.0],
            "dt_nat": [1.0, 2.0, 1.0],
            "periods_nat": [1.0, 2.0, 1.0],
        },
        columns=cols,
    )

    result = df.rolling(window=2, min_periods=0).count()
    tm.assert_frame_equal(result, expected)

    result = df.rolling(1, min_periods=0).count()
    expected = df.notna().astype(float)
    tm.assert_frame_equal(result, expected)
示例#18
0
    def test_append_dtypes(self):

        # GH 5754
        # row appends of different dtypes (so need to do by-item)
        # can sometimes infer the correct type

        df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(5))
        df2 = DataFrame()
        result = df1.append(df2)
        expected = df1.copy()
        tm.assert_frame_equal(result, expected)

        df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1))
        df2 = DataFrame({"bar": "foo"}, index=range(1, 2))
        result = df1.append(df2)
        expected = DataFrame({"bar": [Timestamp("20130101"), "foo"]})
        tm.assert_frame_equal(result, expected)

        df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1))
        df2 = DataFrame({"bar": np.nan}, index=range(1, 2))
        result = df1.append(df2)
        expected = DataFrame(
            {"bar": Series([Timestamp("20130101"), np.nan], dtype="M8[ns]")})
        tm.assert_frame_equal(result, expected)

        df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1))
        df2 = DataFrame({"bar": np.nan}, index=range(1, 2), dtype=object)
        result = df1.append(df2)
        expected = DataFrame(
            {"bar": Series([Timestamp("20130101"), np.nan], dtype="M8[ns]")})
        tm.assert_frame_equal(result, expected)

        df1 = DataFrame({"bar": np.nan}, index=range(1))
        df2 = DataFrame({"bar": Timestamp("20130101")}, index=range(1, 2))
        result = df1.append(df2)
        expected = DataFrame(
            {"bar": Series([np.nan, Timestamp("20130101")], dtype="M8[ns]")})
        tm.assert_frame_equal(result, expected)

        df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1))
        df2 = DataFrame({"bar": 1}, index=range(1, 2), dtype=object)
        result = df1.append(df2)
        expected = DataFrame({"bar": Series([Timestamp("20130101"), 1])})
        tm.assert_frame_equal(result, expected)
from dateutil.relativedelta import MO
from pandas import DateOffset, Timestamp
from pandas.tseries.holiday import Holiday, sunday_to_monday

from pandas_market_calendars.jpx_equinox import autumnal_citizen_dates, autumnal_equinox, vernal_equinox

AscensionDays = [
    Timestamp('2019-04-30', tz='UTC'),  # National Holiday
    Timestamp('2019-05-01', tz='UTC'),  # Ascension Day
    Timestamp('2019-05-02', tz='UTC'),  # National Holiday
]

MarriageDays = [
    Timestamp('1959-04-10', tz='UTC'),  # Akihito
    Timestamp('1993-06-09', tz='UTC'),  # Naruhito
]

FuneralShowa = [
    Timestamp('1989-02-24', tz='UTC'),
]

EnthronementDays = [
    Timestamp('1990-11-12', tz='UTC'),  # Akihito
    Timestamp('2019-10-22', tz='UTC')  # Naruhito
]

AutumnalCitizenDates = autumnal_citizen_dates()

NoN225IndexPrices = [
    # source:  https://indexes.nikkei.co.jp/en/nkave/archives/data
    # TODO: determine if these dates were also national holidays
示例#20
0
 def test_astimezone(self):
     utc = Timestamp('3/11/2012 22:00', tz='UTC')
     expected = utc.tz_convert(self.tzstr('US/Eastern'))
     result = utc.astimezone(self.tzstr('US/Eastern'))
     self.assertEqual(expected, result)
     tm.assert_isinstance(result, Timestamp)
示例#21
0
    def test_construction_index_with_mixed_timezones_with_NaT(self):
        # see gh-11488
        result = Index(
            [pd.NaT, Timestamp("2011-01-01"), pd.NaT, Timestamp("2011-01-02")],
            name="idx",
        )
        exp = DatetimeIndex(
            [pd.NaT, Timestamp("2011-01-01"), pd.NaT, Timestamp("2011-01-02")],
            name="idx",
        )
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)
        assert result.tz is None

        # Same tz results in DatetimeIndex
        result = Index(
            [
                pd.NaT,
                Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
                pd.NaT,
                Timestamp("2011-01-02 10:00", tz="Asia/Tokyo"),
            ],
            name="idx",
        )
        exp = DatetimeIndex(
            [
                pd.NaT,
                Timestamp("2011-01-01 10:00"),
                pd.NaT,
                Timestamp("2011-01-02 10:00"),
            ],
            tz="Asia/Tokyo",
            name="idx",
        )
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)
        assert result.tz is not None
        assert result.tz == exp.tz

        # same tz results in DatetimeIndex (DST)
        result = Index(
            [
                Timestamp("2011-01-01 10:00", tz="US/Eastern"),
                pd.NaT,
                Timestamp("2011-08-01 10:00", tz="US/Eastern"),
            ],
            name="idx",
        )
        exp = DatetimeIndex(
            [Timestamp("2011-01-01 10:00"), pd.NaT, Timestamp("2011-08-01 10:00")],
            tz="US/Eastern",
            name="idx",
        )
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)
        assert result.tz is not None
        assert result.tz == exp.tz

        # different tz results in Index(dtype=object)
        result = Index(
            [
                pd.NaT,
                Timestamp("2011-01-01 10:00"),
                pd.NaT,
                Timestamp("2011-01-02 10:00", tz="US/Eastern"),
            ],
            name="idx",
        )
        exp = Index(
            [
                pd.NaT,
                Timestamp("2011-01-01 10:00"),
                pd.NaT,
                Timestamp("2011-01-02 10:00", tz="US/Eastern"),
            ],
            dtype="object",
            name="idx",
        )
        tm.assert_index_equal(result, exp, exact=True)
        assert not isinstance(result, DatetimeIndex)

        result = Index(
            [
                pd.NaT,
                Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
                pd.NaT,
                Timestamp("2011-01-02 10:00", tz="US/Eastern"),
            ],
            name="idx",
        )
        exp = Index(
            [
                pd.NaT,
                Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
                pd.NaT,
                Timestamp("2011-01-02 10:00", tz="US/Eastern"),
            ],
            dtype="object",
            name="idx",
        )
        tm.assert_index_equal(result, exp, exact=True)
        assert not isinstance(result, DatetimeIndex)

        # all NaT
        result = Index([pd.NaT, pd.NaT], name="idx")
        exp = DatetimeIndex([pd.NaT, pd.NaT], name="idx")
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)
        assert result.tz is None

        # all NaT with tz
        result = Index([pd.NaT, pd.NaT], tz="Asia/Tokyo", name="idx")
        exp = DatetimeIndex([pd.NaT, pd.NaT], tz="Asia/Tokyo", name="idx")

        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)
        assert result.tz is not None
        assert result.tz == exp.tz
示例#22
0
    def test_datetimeindex_tz_nat(self):
        idx = to_datetime([Timestamp("2013-1-1", tz=self.tzstr('US/Eastern')), NaT])

        self.assertTrue(isnull(idx[1]))
        self.assertTrue(idx[0].tzinfo is not None)
示例#23
0
    def test_construction_dti_with_mixed_timezones(self):
        # GH 11488 (not changed, added explicit tests)

        # no tz results in DatetimeIndex
        result = DatetimeIndex(
            [Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx"
        )
        exp = DatetimeIndex(
            [Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx"
        )
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)

        # same tz results in DatetimeIndex
        result = DatetimeIndex(
            [
                Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
                Timestamp("2011-01-02 10:00", tz="Asia/Tokyo"),
            ],
            name="idx",
        )
        exp = DatetimeIndex(
            [Timestamp("2011-01-01 10:00"), Timestamp("2011-01-02 10:00")],
            tz="Asia/Tokyo",
            name="idx",
        )
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)

        # same tz results in DatetimeIndex (DST)
        result = DatetimeIndex(
            [
                Timestamp("2011-01-01 10:00", tz="US/Eastern"),
                Timestamp("2011-08-01 10:00", tz="US/Eastern"),
            ],
            name="idx",
        )
        exp = DatetimeIndex(
            [Timestamp("2011-01-01 10:00"), Timestamp("2011-08-01 10:00")],
            tz="US/Eastern",
            name="idx",
        )
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)

        # tz mismatch affecting to tz-aware raises TypeError/ValueError

        with pytest.raises(ValueError):
            DatetimeIndex(
                [
                    Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
                    Timestamp("2011-01-02 10:00", tz="US/Eastern"),
                ],
                name="idx",
            )

        msg = "cannot be converted to datetime64"
        with pytest.raises(ValueError, match=msg):
            DatetimeIndex(
                [
                    Timestamp("2011-01-01 10:00"),
                    Timestamp("2011-01-02 10:00", tz="US/Eastern"),
                ],
                tz="Asia/Tokyo",
                name="idx",
            )

        with pytest.raises(ValueError):
            DatetimeIndex(
                [
                    Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
                    Timestamp("2011-01-02 10:00", tz="US/Eastern"),
                ],
                tz="US/Eastern",
                name="idx",
            )

        with pytest.raises(ValueError, match=msg):
            # passing tz should results in DatetimeIndex, then mismatch raises
            # TypeError
            Index(
                [
                    pd.NaT,
                    Timestamp("2011-01-01 10:00"),
                    pd.NaT,
                    Timestamp("2011-01-02 10:00", tz="US/Eastern"),
                ],
                tz="Asia/Tokyo",
                name="idx",
            )
 def test_maybe_cast_slice_duplicate_monotonic(self):
     # https://github.com/pandas-dev/pandas/issues/16515
     idx = DatetimeIndex(["2017", "2017"])
     result = idx._maybe_cast_slice_bound("2017-01-01", "left", "loc")
     expected = Timestamp("2017-01-01")
     assert result == expected
示例#25
0
 def test_constructor_with_non_normalized_pytz(self, tz):
     # GH 18595
     non_norm_tz = Timestamp("2010", tz=tz).tz
     result = DatetimeIndex(["2010"], tz=non_norm_tz)
     assert pytz.timezone(tz) is result.tz
示例#26
0
    exp = data["df"]
    df_out = DataFrame(exp)

    df_out["b"] = df_out.b.astype(out_type)
    df_out.set_index("a", inplace=True)

    grpd = df.groupby("a")
    t = getattr(grpd, method)(*data["args"])
    tm.assert_frame_equal(t, df_out)


@pytest.mark.parametrize(
    "i",
    [
        (
            Timestamp("2011-01-15 12:50:28.502376"),
            Timestamp("2011-01-20 12:50:28.593448"),
        ),
        (24650000000000001, 24650000000000002),
    ],
)
def test_groupby_non_arithmetic_agg_int_like_precision(i):
    # see gh-6620, gh-9311
    df = DataFrame([{"a": 1, "b": i[0]}, {"a": 1, "b": i[1]}])

    grp_exp = {
        "first": {
            "expected": i[0]
        },
        "last": {
            "expected": i[1]
示例#27
0
 def test_construction_with_nat_and_tzlocal(self):
     tz = dateutil.tz.tzlocal()
     result = DatetimeIndex(["2018", "NaT"], tz=tz)
     expected = DatetimeIndex([Timestamp("2018", tz=tz), pd.NaT])
     tm.assert_index_equal(result, expected)
    def test_fillna(self):
        self.tsframe.ix[:5, 'A'] = nan
        self.tsframe.ix[-5:, 'A'] = nan

        zero_filled = self.tsframe.fillna(0)
        self.assertTrue((zero_filled.ix[:5, 'A'] == 0).all())

        padded = self.tsframe.fillna(method='pad')
        self.assertTrue(np.isnan(padded.ix[:5, 'A']).all())
        self.assertTrue((padded.ix[-5:, 'A'] == padded.ix[-5, 'A']).all())

        # mixed type
        self.mixed_frame.ix[5:20, 'foo'] = nan
        self.mixed_frame.ix[-10:, 'A'] = nan
        result = self.mixed_frame.fillna(value=0)
        result = self.mixed_frame.fillna(method='pad')

        self.assertRaises(ValueError, self.tsframe.fillna)
        self.assertRaises(ValueError, self.tsframe.fillna, 5, method='ffill')

        # mixed numeric (but no float16)
        mf = self.mixed_float.reindex(columns=['A', 'B', 'D'])
        mf.ix[-10:, 'A'] = nan
        result = mf.fillna(value=0)
        _check_mixed_float(result, dtype=dict(C=None))

        result = mf.fillna(method='pad')
        _check_mixed_float(result, dtype=dict(C=None))

        # empty frame (GH #2778)
        df = DataFrame(columns=['x'])
        for m in ['pad', 'backfill']:
            df.x.fillna(method=m, inplace=1)
            df.x.fillna(method=m)

        # with different dtype (GH3386)
        df = DataFrame([['a', 'a', np.nan, 'a'], ['b', 'b', np.nan, 'b'],
                        ['c', 'c', np.nan, 'c']])

        result = df.fillna({2: 'foo'})
        expected = DataFrame([['a', 'a', 'foo', 'a'], ['b', 'b', 'foo', 'b'],
                              ['c', 'c', 'foo', 'c']])
        assert_frame_equal(result, expected)

        df.fillna({2: 'foo'}, inplace=True)
        assert_frame_equal(df, expected)

        # limit and value
        df = DataFrame(np.random.randn(10, 3))
        df.iloc[2:7, 0] = np.nan
        df.iloc[3:5, 2] = np.nan

        expected = df.copy()
        expected.iloc[2, 0] = 999
        expected.iloc[3, 2] = 999
        result = df.fillna(999, limit=1)
        assert_frame_equal(result, expected)

        # with datelike
        # GH 6344
        df = DataFrame({
            'Date': [pd.NaT, Timestamp("2014-1-1")],
            'Date2': [Timestamp("2013-1-1"), pd.NaT]
        })

        expected = df.copy()
        expected['Date'] = expected['Date'].fillna(df.ix[0, 'Date2'])
        result = df.fillna(value={'Date': df['Date2']})
        assert_frame_equal(result, expected)
示例#29
0
    def test_datetimeindex_constructor_misc(self):
        arr = ["1/1/2005", "1/2/2005", "Jn 3, 2005", "2005-01-04"]
        msg = r"(\(')?Unknown string format(:', 'Jn 3, 2005'\))?"
        with pytest.raises(ValueError, match=msg):
            DatetimeIndex(arr)

        arr = ["1/1/2005", "1/2/2005", "1/3/2005", "2005-01-04"]
        idx1 = DatetimeIndex(arr)

        arr = [datetime(2005, 1, 1), "1/2/2005", "1/3/2005", "2005-01-04"]
        idx2 = DatetimeIndex(arr)

        arr = [Timestamp(datetime(2005, 1, 1)), "1/2/2005", "1/3/2005", "2005-01-04"]
        idx3 = DatetimeIndex(arr)

        arr = np.array(["1/1/2005", "1/2/2005", "1/3/2005", "2005-01-04"], dtype="O")
        idx4 = DatetimeIndex(arr)

        arr = to_datetime(["1/1/2005", "1/2/2005", "1/3/2005", "2005-01-04"])
        idx5 = DatetimeIndex(arr)

        arr = to_datetime(["1/1/2005", "1/2/2005", "Jan 3, 2005", "2005-01-04"])
        idx6 = DatetimeIndex(arr)

        idx7 = DatetimeIndex(["12/05/2007", "25/01/2008"], dayfirst=True)
        idx8 = DatetimeIndex(
            ["2007/05/12", "2008/01/25"], dayfirst=False, yearfirst=True
        )
        tm.assert_index_equal(idx7, idx8)

        for other in [idx2, idx3, idx4, idx5, idx6]:
            assert (idx1.values == other.values).all()

        sdate = datetime(1999, 12, 25)
        edate = datetime(2000, 1, 1)
        idx = date_range(start=sdate, freq="1B", periods=20)
        assert len(idx) == 20
        assert idx[0] == sdate + 0 * offsets.BDay()
        assert idx.freq == "B"

        idx = date_range(end=edate, freq=("D", 5), periods=20)
        assert len(idx) == 20
        assert idx[-1] == edate
        assert idx.freq == "5D"

        idx1 = date_range(start=sdate, end=edate, freq="W-SUN")
        idx2 = date_range(start=sdate, end=edate, freq=offsets.Week(weekday=6))
        assert len(idx1) == len(idx2)
        assert idx1.freq == idx2.freq

        idx1 = date_range(start=sdate, end=edate, freq="QS")
        idx2 = date_range(
            start=sdate, end=edate, freq=offsets.QuarterBegin(startingMonth=1)
        )
        assert len(idx1) == len(idx2)
        assert idx1.freq == idx2.freq

        idx1 = date_range(start=sdate, end=edate, freq="BQ")
        idx2 = date_range(
            start=sdate, end=edate, freq=offsets.BQuarterEnd(startingMonth=12)
        )
        assert len(idx1) == len(idx2)
        assert idx1.freq == idx2.freq
示例#30
0
    def setUp(self):
        self.env = env = trading.TradingEnvironment()
        self.dates = date_range(
            '2014-01-01', '2014-02-01', freq=trading_day, tz='UTC'
        )
        asset_info = DataFrame.from_records([
            {
                'sid': 1,
                'symbol': 'A',
                'start_date': self.dates[10],
                'end_date': self.dates[13],
                'exchange': 'TEST',
            },
            {
                'sid': 2,
                'symbol': 'B',
                'start_date': self.dates[11],
                'end_date': self.dates[14],
                'exchange': 'TEST',
            },
            {
                'sid': 3,
                'symbol': 'C',
                'start_date': self.dates[12],
                'end_date': self.dates[15],
                'exchange': 'TEST',
            },
        ])
        self.first_asset_start = min(asset_info.start_date)
        self.last_asset_end = max(asset_info.end_date)
        env.write_data(equities_df=asset_info)
        self.asset_finder = finder = env.asset_finder

        sids = (1, 2, 3)
        self.assets = finder.retrieve_all(sids)

        # View of the baseline data.
        self.closes = DataFrame(
            {sid: arange(1, len(self.dates) + 1) * sid for sid in sids},
            index=self.dates,
            dtype=float,
        )

        # Create a data portal holding the data in self.closes
        data = {}
        for sid in sids:
            data[sid] = DataFrame({
                "open": self.closes[sid].values,
                "high": self.closes[sid].values,
                "low": self.closes[sid].values,
                "close": self.closes[sid].values,
                "volume": self.closes[sid].values,
                "day": [day.value for day in self.dates]
            })

        path = os.path.join(self.tempdir.path, "testdaily.bcolz")

        DailyBarWriterFromDataFrames(data).write(
            path,
            self.dates,
            data
        )

        daily_bar_reader = BcolzDailyBarReader(path)

        self.data_portal = DataPortal(
            self.env,
            equity_daily_reader=daily_bar_reader,
        )

        # Add a split for 'A' on its second date.
        self.split_asset = self.assets[0]
        self.split_date = self.split_asset.start_date + trading_day
        self.split_ratio = 0.5
        self.adjustments = DataFrame.from_records([
            {
                'sid': self.split_asset.sid,
                'value': self.split_ratio,
                'kind': MULTIPLY,
                'start_date': Timestamp('NaT'),
                'end_date': self.split_date,
                'apply_date': self.split_date,
            }
        ])

        # View of the data on/after the split.
        self.adj_closes = adj_closes = self.closes.copy()
        adj_closes.ix[:self.split_date, self.split_asset] *= self.split_ratio

        self.pipeline_loader = DataFrameLoader(
            column=USEquityPricing.close,
            baseline=self.closes,
            adjustments=self.adjustments,
        )