Exemplo n.º 1
0
    """
    In 2010 Independence Day fell on a Saturday. Normally this would mean that
    Friday is a half day, but instead it is a full day off, so we need to
    exclude it from the usual half day rules.
    """
    return holidays[holidays.year != 2010]


NewYearsDay = new_years_day()

MaundyThursday = maundy_thursday()
MondayPriorToCorpusChristi = Holiday(
    'Monday Prior to Corpus Christi',
    month=1,
    day=1,
    offset=[Easter(), Day(57)],
    end_date='2008',
)

LabourDay = european_labour_day()

NavyDay = Holiday('Navy Day', month=5, day=21)

SaintPeterAndSaintPaulDay = saint_peter_and_saint_paul_day(
    observance=nearest_monday, )

OurLadyOfMountCarmelDay = Holiday(
    "Our Lady of Mount Carmel's Day",
    month=7,
    day=16,
    start_date='2008',
Exemplo n.º 2
0
USThanksgivingDay = Holiday("Thanksgiving",
                            month=11,
                            day=1,
                            offset=DateOffset(weekday=TH(4)))
USMartinLutherKingJr = Holiday(
    "Martin Luther King Jr. Day",
    start_date=datetime(1986, 1, 1),
    month=1,
    day=1,
    offset=DateOffset(weekday=MO(3)),
)
USPresidentsDay = Holiday("Presidents Day",
                          month=2,
                          day=1,
                          offset=DateOffset(weekday=MO(3)))
GoodFriday = Holiday("Good Friday", month=1, day=1, offset=[Easter(), Day(-2)])

EasterMonday = Holiday("Easter Monday",
                       month=1,
                       day=1,
                       offset=[Easter(), Day(1)])


class USFederalHolidayCalendar(AbstractHolidayCalendar):
    """
    US Federal Government Holiday Calendar based on rules specified by:
    https://www.opm.gov/policy-data-oversight/
       snow-dismissal-procedures/federal-holidays/
    """

    rules = [
StJosephsDay = Holiday(
    "St. Joseph's Day (next Monday)",
    month=3,
    day=19,
    offset=next_monday_offset,
)

MaundyThursday = maundy_thursday()

LabourDay = european_labour_day()

MondayAfterAscensionDay = Holiday(
    "Monday After Ascension Day",
    month=1,
    day=1,
    offset=[Easter(), Day(43)],
)

MondayAfterCorpusChristi = Holiday(
    "Monday After Corpus Christi",
    month=1,
    day=1,
    offset=[Easter(), Day(64)],
)

MondayAfterSacredHeart = Holiday(
    "Monday After Sacred Heart",
    month=1,
    day=1,
    offset=[Easter(), Day(71)],
)
Exemplo n.º 4
0
r = redis.Redis(host='127.0.0.1', port=6379, decode_responses=True, db=10)

with open('person.pkl', 'rb') as f:
    algo = pickle.load(f)


def func(item, user_id):
    return algo.predict(user_id, item).est


if __name__ == '__main__':
    #两天的score表、两天内有行为的用户、一天内的item
    start = time.time()
    now_time = datetime.datetime.now()
    yes_time = (now_time - 1 * Day()).strftime('%Y-%m-%d %H:%M:%S')  #格式化
    score = pd.read_csv('score.csv')
    score = score[score.created_at != 'False']
    score = score[score.created_at != False]
    score['created_at'] = pd.to_datetime(score['created_at'])
    score = score.set_index('created_at')  #将时间作为索引
    score = score[yes_time:now_time.strftime('%Y-%m-%d %H:%M:%S')]
    item_uni = score[['item']].drop_duplicates().reset_index(drop=True)

    #获取user_id
    user_id = int(sys.argv[1])

    #获取algo
    with open('person.pkl', 'rb') as f:
        algo = pickle.load(f)
Exemplo n.º 5
0
_ONE_DAY = 24 * _ONE_HOUR

# ---------------------------------------------------------------------
# Offset names ("time rules") and related functions

#: cache of previously seen offsets
_offset_map = {}  # type: Dict[str, DateOffset]


def get_period_alias(offset_str):
    """ alias to closest period strings BQ->Q etc"""
    return _offset_to_period_map.get(offset_str, None)


_name_to_offset_map = {
    "days": Day(1),
    "hours": Hour(1),
    "minutes": Minute(1),
    "seconds": Second(1),
    "milliseconds": Milli(1),
    "microseconds": Micro(1),
    "nanoseconds": Nano(1),
}


def to_offset(freq):
    """
    Return DateOffset object from string or tuple representation
    or datetime.timedelta object

    Parameters
    # when, for no explicable reason, Wednesday was a half day instead).
    "Fridays after Independence Day that aren't in 2013",
    month=7,
    day=5,
    days_of_week=(FRIDAY,),
    observance=july_5th_holiday_observance,
    start_date=Timestamp("1995-01-01"),
)
USBlackFridayBefore1993 = Holiday(
    'Black Friday',
    month=11,
    day=1,
    # Black Friday was not observed until 1992.
    start_date=Timestamp('1992-01-01'),
    end_date=Timestamp('1993-01-01'),
    offset=[DateOffset(weekday=TH(4)), Day(1)],
)
USBlackFridayInOrAfter1993 = Holiday(
    'Black Friday',
    month=11,
    day=1,
    start_date=Timestamp('1993-01-01'),
    offset=[DateOffset(weekday=TH(4)), Day(1)],
)
BattleOfGettysburg = Holiday(
    # All of the floor traders in Chicago were sent to PA
    'Markets were closed during the battle of Gettysburg',
    month=7,
    day=(1, 2, 3),
    start_date=Timestamp("1863-07-01"),
    end_date=Timestamp("1863-07-03")
Exemplo n.º 7
0
def create_data():
    """ create the pickle data """
    data = {
        "A": [0.0, 1.0, 2.0, 3.0, np.nan],
        "B": [0, 1, 0, 1, 0],
        "C": ["foo1", "foo2", "foo3", "foo4", "foo5"],
        "D": date_range("1/1/2009", periods=5),
        "E": [0.0, 1, Timestamp("20100101"), "foo", 2.0],
    }

    scalars = dict(timestamp=Timestamp("20130101"), period=Period("2012", "M"))

    index = dict(
        int=Index(np.arange(10)),
        date=date_range("20130101", periods=10),
        period=period_range("2013-01-01", freq="M", periods=10),
        float=Index(np.arange(10, dtype=np.float64)),
        uint=Index(np.arange(10, dtype=np.uint64)),
        timedelta=timedelta_range("00:00:00", freq="30T", periods=10),
    )

    index["range"] = RangeIndex(10)

    if _loose_version >= LooseVersion("0.21"):
        from pandas import interval_range

        index["interval"] = interval_range(0, periods=10)

    mi = dict(reg2=MultiIndex.from_tuples(
        tuple(
            zip(*[
                ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
                ["one", "two", "one", "two", "one", "two", "one", "two"],
            ])),
        names=["first", "second"],
    ))

    series = dict(
        float=Series(data["A"]),
        int=Series(data["B"]),
        mixed=Series(data["E"]),
        ts=Series(np.arange(10).astype(np.int64),
                  index=date_range("20130101", periods=10)),
        mi=Series(
            np.arange(5).astype(np.float64),
            index=MultiIndex.from_tuples(tuple(
                zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])),
                                         names=["one", "two"]),
        ),
        dup=Series(np.arange(5).astype(np.float64),
                   index=["A", "B", "C", "D", "A"]),
        cat=Series(Categorical(["foo", "bar", "baz"])),
        dt=Series(date_range("20130101", periods=5)),
        dt_tz=Series(date_range("20130101", periods=5, tz="US/Eastern")),
        period=Series([Period("2000Q1")] * 5),
    )

    mixed_dup_df = DataFrame(data)
    mixed_dup_df.columns = list("ABCDA")
    frame = dict(
        float=DataFrame({
            "A": series["float"],
            "B": series["float"] + 1
        }),
        int=DataFrame({
            "A": series["int"],
            "B": series["int"] + 1
        }),
        mixed=DataFrame({k: data[k]
                         for k in ["A", "B", "C", "D"]}),
        mi=DataFrame(
            {
                "A": np.arange(5).astype(np.float64),
                "B": np.arange(5).astype(np.int64)
            },
            index=MultiIndex.from_tuples(
                tuple(
                    zip(*[
                        ["bar", "bar", "baz", "baz", "baz"],
                        ["one", "two", "one", "two", "three"],
                    ])),
                names=["first", "second"],
            ),
        ),
        dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64),
                      columns=["A", "B", "A"]),
        cat_onecol=DataFrame({"A": Categorical(["foo", "bar"])}),
        cat_and_float=DataFrame({
            "A": Categorical(["foo", "bar", "baz"]),
            "B": np.arange(3).astype(np.int64),
        }),
        mixed_dup=mixed_dup_df,
        dt_mixed_tzs=DataFrame(
            {
                "A": Timestamp("20130102", tz="US/Eastern"),
                "B": Timestamp("20130603", tz="CET"),
            },
            index=range(5),
        ),
        dt_mixed2_tzs=DataFrame(
            {
                "A": Timestamp("20130102", tz="US/Eastern"),
                "B": Timestamp("20130603", tz="CET"),
                "C": Timestamp("20130603", tz="UTC"),
            },
            index=range(5),
        ),
    )

    cat = dict(
        int8=Categorical(list("abcdefg")),
        int16=Categorical(np.arange(1000)),
        int32=Categorical(np.arange(10000)),
    )

    timestamp = dict(
        normal=Timestamp("2011-01-01"),
        nat=NaT,
        tz=Timestamp("2011-01-01", tz="US/Eastern"),
    )

    timestamp["freq"] = Timestamp("2011-01-01", freq="D")
    timestamp["both"] = Timestamp("2011-01-01", tz="Asia/Tokyo", freq="M")

    off = {
        "DateOffset": DateOffset(years=1),
        "DateOffset_h_ns": DateOffset(hour=6, nanoseconds=5824),
        "BusinessDay": BusinessDay(offset=timedelta(seconds=9)),
        "BusinessHour": BusinessHour(normalize=True, n=6, end="15:14"),
        "CustomBusinessDay": CustomBusinessDay(weekmask="Mon Fri"),
        "SemiMonthBegin": SemiMonthBegin(day_of_month=9),
        "SemiMonthEnd": SemiMonthEnd(day_of_month=24),
        "MonthBegin": MonthBegin(1),
        "MonthEnd": MonthEnd(1),
        "QuarterBegin": QuarterBegin(1),
        "QuarterEnd": QuarterEnd(1),
        "Day": Day(1),
        "YearBegin": YearBegin(1),
        "YearEnd": YearEnd(1),
        "Week": Week(1),
        "Week_Tues": Week(2, normalize=False, weekday=1),
        "WeekOfMonth": WeekOfMonth(week=3, weekday=4),
        "LastWeekOfMonth": LastWeekOfMonth(n=1, weekday=3),
        "FY5253": FY5253(n=2, weekday=6, startingMonth=7, variation="last"),
        "Easter": Easter(),
        "Hour": Hour(1),
        "Minute": Minute(1),
    }

    return dict(
        series=series,
        frame=frame,
        index=index,
        scalars=scalars,
        mi=mi,
        sp_series=dict(float=_create_sp_series(), ts=_create_sp_tsseries()),
        sp_frame=dict(float=_create_sp_frame()),
        cat=cat,
        timestamp=timestamp,
        offsets=off,
    )
Exemplo n.º 8
0
    def get_expiry_date_from_horizon_date(self,
                                          horizon_date,
                                          tenor,
                                          cal=None,
                                          asset_class='fx-vol'):
        """Calculates the expiry date of FX options, based on the horizon date, the tenor and the holiday
        calendar associated with the asset.

        Uses expiry rules from Iain Clark's FX option pricing book

        Parameters
        ----------
        horizon_date : pd.Timestamp (collection)
            Horizon date of contract

        tenor : str
            Tenor of the contract

        cal : str
            Holiday calendar (usually related to the asset)

        asset_class : str
            'fx-vol' - FX options (default)

        Returns
        -------
        pd.Timestamp (collection)
        """
        if asset_class == 'fx-vol':

            tenor_unit = ''.join(re.compile(r'\D+').findall(tenor))

            asset_holidays = self.get_holidays(cal=cal)

            if tenor_unit == 'ON':
                tenor_digit = 1
                tenor_unit = 'D'
            else:
                tenor_digit = int(''.join(re.compile(r'\d+').findall(tenor)))

            if tenor_unit == 'D':
                return horizon_date + CustomBusinessDay(
                    n=tenor_digit, holidays=asset_holidays)
            elif tenor_unit == 'W':
                return horizon_date + Day(n=tenor_digit *
                                          7) + CustomBusinessDay(
                                              n=0, holidays=asset_holidays)
            else:
                horizon_date = self.get_spot_date_from_horizon_date(
                    horizon_date, cal, asset_holidays=asset_holidays)

                if tenor_unit == 'M':
                    pass
                elif tenor_unit == 'Y':
                    tenor_digit = tenor_digit * 12

                cbd = CustomBusinessDay(n=1, holidays=asset_holidays)

                horizon_period_end = horizon_date + CustomBusinessMonthEnd(
                    tenor_digit + 1)
                horizon_floating = horizon_date + DateOffset(
                    months=tenor_digit)

                delivery_date = []

                if isinstance(horizon_period_end, pd.Timestamp):
                    horizon_period_end = [horizon_period_end]

                if isinstance(horizon_floating, pd.Timestamp):
                    horizon_floating = [horizon_floating]

                # TODO: double check this!
                for period_end, floating in zip(horizon_period_end,
                                                horizon_floating):
                    if floating < period_end:
                        delivery_date.append(floating - cbd + cbd)
                    else:
                        delivery_date.append(period_end)

                delivery_date = pd.DatetimeIndex(delivery_date)

                return self.get_expiry_date_from_delivery_date(
                    delivery_date, cal)
Exemplo n.º 9
0
(t1 - t2).seconds  # timedelta object의 초 수 출력

# 1) timedelta를 사용한 날짜 연산
from datetime import timedelta

d1 + 100  # 날짜와 숫자 연산 불가
d1 + timedelta(100)  # 100일 뒤

# 2) offset으로 사용한 날짜 연산
import pandas.tseries.offsets

dir(pandas.tseries.offsets)

from pandas.tseries.offsets import Day, Hour, Second

Day(5)  # 5일
Hour(5)  # 5시간
Second(5)  # 5초

d1 + Day(100)

# [ 연습 문제 ]
# emp.csv 파일을 읽고
emp = pd.read_csv('emp.csv')

# 1) 년,월,일 각각 추출
emp.HIREDATE.map(lambda x: datetime.strptime(x, '%Y/%m/%d %H:%M:%S'))
emp['HIREDATE'] = pd.to_datetime(emp.HIREDATE)

emp.HIREDATE.year  # Series의 날짜에서는 year 전달 불가
emp.HIREDATE[0].year  # scalar의 날짜에서는 year 전달 가능
Exemplo n.º 10
0
class TestDatetimeIndexOps(Ops):
    def setup_method(self, method):
        super().setup_method(method)
        mask = lambda x: (isinstance(x, DatetimeIndex) or isinstance(
            x, PeriodIndex))
        self.is_valid_objs = [o for o in self.objs if mask(o)]
        self.not_valid_objs = [o for o in self.objs if not mask(o)]

    def test_ops_properties(self):
        f = lambda x: isinstance(x, DatetimeIndex)
        self.check_ops_properties(DatetimeIndex._field_ops, f)
        self.check_ops_properties(DatetimeIndex._object_ops, f)
        self.check_ops_properties(DatetimeIndex._bool_ops, f)

    def test_ops_properties_basic(self):

        # sanity check that the behavior didn't change
        # GH#7206
        for op in ["year", "day", "second", "weekday"]:
            msg = f"'Series' object has no attribute '{op}'"
            with pytest.raises(AttributeError, match=msg):
                getattr(self.dt_series, op)

        # attribute access should still work!
        s = Series(dict(year=2000, month=1, day=10))
        assert s.year == 2000
        assert s.month == 1
        assert s.day == 10
        msg = "'Series' object has no attribute 'weekday'"
        with pytest.raises(AttributeError, match=msg):
            s.weekday

    def test_repeat_range(self, tz_naive_fixture):
        tz = tz_naive_fixture
        rng = date_range("1/1/2000", "1/1/2001")

        result = rng.repeat(5)
        assert result.freq is None
        assert len(result) == 5 * len(rng)

        index = pd.date_range("2001-01-01", periods=2, freq="D", tz=tz)
        exp = pd.DatetimeIndex(
            ["2001-01-01", "2001-01-01", "2001-01-02", "2001-01-02"], tz=tz)
        for res in [index.repeat(2), np.repeat(index, 2)]:
            tm.assert_index_equal(res, exp)
            assert res.freq is None

        index = pd.date_range("2001-01-01", periods=2, freq="2D", tz=tz)
        exp = pd.DatetimeIndex(
            ["2001-01-01", "2001-01-01", "2001-01-03", "2001-01-03"], tz=tz)
        for res in [index.repeat(2), np.repeat(index, 2)]:
            tm.assert_index_equal(res, exp)
            assert res.freq is None

        index = pd.DatetimeIndex(["2001-01-01", "NaT", "2003-01-01"], tz=tz)
        exp = pd.DatetimeIndex(
            [
                "2001-01-01",
                "2001-01-01",
                "2001-01-01",
                "NaT",
                "NaT",
                "NaT",
                "2003-01-01",
                "2003-01-01",
                "2003-01-01",
            ],
            tz=tz,
        )
        for res in [index.repeat(3), np.repeat(index, 3)]:
            tm.assert_index_equal(res, exp)
            assert res.freq is None

    def test_repeat(self, tz_naive_fixture):
        tz = tz_naive_fixture
        reps = 2
        msg = "the 'axis' parameter is not supported"

        rng = pd.date_range(start="2016-01-01", periods=2, freq="30Min", tz=tz)

        expected_rng = DatetimeIndex([
            Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"),
            Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"),
            Timestamp("2016-01-01 00:30:00", tz=tz, freq="30T"),
            Timestamp("2016-01-01 00:30:00", tz=tz, freq="30T"),
        ])

        res = rng.repeat(reps)
        tm.assert_index_equal(res, expected_rng)
        assert res.freq is None

        tm.assert_index_equal(np.repeat(rng, reps), expected_rng)
        with pytest.raises(ValueError, match=msg):
            np.repeat(rng, reps, axis=1)

    def test_resolution(self, tz_naive_fixture):
        tz = tz_naive_fixture
        for freq, expected in zip(
            ["A", "Q", "M", "D", "H", "T", "S", "L", "U"],
            [
                "day",
                "day",
                "day",
                "day",
                "hour",
                "minute",
                "second",
                "millisecond",
                "microsecond",
            ],
        ):
            idx = pd.date_range(start="2013-04-01",
                                periods=30,
                                freq=freq,
                                tz=tz)
            assert idx.resolution == expected

    def test_value_counts_unique(self, tz_naive_fixture):
        tz = tz_naive_fixture
        # GH 7735
        idx = pd.date_range("2011-01-01 09:00", freq="H", periods=10)
        # create repeated values, 'n'th element is repeated by n+1 times
        idx = DatetimeIndex(np.repeat(idx.values, range(1,
                                                        len(idx) + 1)),
                            tz=tz)

        exp_idx = pd.date_range("2011-01-01 18:00",
                                freq="-1H",
                                periods=10,
                                tz=tz)
        expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64")

        for obj in [idx, Series(idx)]:
            tm.assert_series_equal(obj.value_counts(), expected)

        expected = pd.date_range("2011-01-01 09:00",
                                 freq="H",
                                 periods=10,
                                 tz=tz)
        tm.assert_index_equal(idx.unique(), expected)

        idx = DatetimeIndex(
            [
                "2013-01-01 09:00",
                "2013-01-01 09:00",
                "2013-01-01 09:00",
                "2013-01-01 08:00",
                "2013-01-01 08:00",
                pd.NaT,
            ],
            tz=tz,
        )

        exp_idx = DatetimeIndex(["2013-01-01 09:00", "2013-01-01 08:00"],
                                tz=tz)
        expected = Series([3, 2], index=exp_idx)

        for obj in [idx, Series(idx)]:
            tm.assert_series_equal(obj.value_counts(), expected)

        exp_idx = DatetimeIndex(
            ["2013-01-01 09:00", "2013-01-01 08:00", pd.NaT], tz=tz)
        expected = Series([3, 2, 1], index=exp_idx)

        for obj in [idx, Series(idx)]:
            tm.assert_series_equal(obj.value_counts(dropna=False), expected)

        tm.assert_index_equal(idx.unique(), exp_idx)

    def test_nonunique_contains(self):
        # GH 9512
        for idx in map(
                DatetimeIndex,
            (
                [0, 1, 0],
                [0, 0, -1],
                [0, -1, -1],
                ["2015", "2015", "2016"],
                ["2015", "2015", "2014"],
            ),
        ):
            assert idx[0] in idx

    @pytest.mark.parametrize(
        "idx",
        [
            DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"],
                          freq="D",
                          name="idx"),
            DatetimeIndex(
                ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"],
                freq="H",
                name="tzidx",
                tz="Asia/Tokyo",
            ),
        ],
    )
    def test_order_with_freq(self, idx):
        ordered = idx.sort_values()
        tm.assert_index_equal(ordered, idx)
        assert ordered.freq == idx.freq

        ordered = idx.sort_values(ascending=False)
        expected = idx[::-1]
        tm.assert_index_equal(ordered, expected)
        assert ordered.freq == expected.freq
        assert ordered.freq.n == -1

        ordered, indexer = idx.sort_values(return_indexer=True)
        tm.assert_index_equal(ordered, idx)
        tm.assert_numpy_array_equal(indexer,
                                    np.array([0, 1, 2]),
                                    check_dtype=False)
        assert ordered.freq == idx.freq

        ordered, indexer = idx.sort_values(return_indexer=True,
                                           ascending=False)
        expected = idx[::-1]
        tm.assert_index_equal(ordered, expected)
        tm.assert_numpy_array_equal(indexer,
                                    np.array([2, 1, 0]),
                                    check_dtype=False)
        assert ordered.freq == expected.freq
        assert ordered.freq.n == -1

    @pytest.mark.parametrize(
        "index_dates,expected_dates",
        [
            (
                [
                    "2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02",
                    "2011-01-01"
                ],
                [
                    "2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03",
                    "2011-01-05"
                ],
            ),
            (
                [
                    "2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02",
                    "2011-01-01"
                ],
                [
                    "2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03",
                    "2011-01-05"
                ],
            ),
            (
                [pd.NaT, "2011-01-03", "2011-01-05", "2011-01-02", pd.NaT],
                [pd.NaT, pd.NaT, "2011-01-02", "2011-01-03", "2011-01-05"],
            ),
        ],
    )
    def test_order_without_freq(self, index_dates, expected_dates,
                                tz_naive_fixture):
        tz = tz_naive_fixture

        # without freq
        index = DatetimeIndex(index_dates, tz=tz, name="idx")
        expected = DatetimeIndex(expected_dates, tz=tz, name="idx")

        ordered = index.sort_values()
        tm.assert_index_equal(ordered, expected)
        assert ordered.freq is None

        ordered = index.sort_values(ascending=False)
        tm.assert_index_equal(ordered, expected[::-1])
        assert ordered.freq is None

        ordered, indexer = index.sort_values(return_indexer=True)
        tm.assert_index_equal(ordered, expected)

        exp = np.array([0, 4, 3, 1, 2])
        tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
        assert ordered.freq is None

        ordered, indexer = index.sort_values(return_indexer=True,
                                             ascending=False)
        tm.assert_index_equal(ordered, expected[::-1])

        exp = np.array([2, 1, 3, 4, 0])
        tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
        assert ordered.freq is None

    def test_drop_duplicates_metadata(self):
        # GH 10115
        idx = pd.date_range("2011-01-01", "2011-01-31", freq="D", name="idx")
        result = idx.drop_duplicates()
        tm.assert_index_equal(idx, result)
        assert idx.freq == result.freq

        idx_dup = idx.append(idx)
        assert idx_dup.freq is None  # freq is reset
        result = idx_dup.drop_duplicates()
        tm.assert_index_equal(idx, result)
        assert result.freq is None

    def test_drop_duplicates(self):
        # to check Index/Series compat
        base = pd.date_range("2011-01-01", "2011-01-31", freq="D", name="idx")
        idx = base.append(base[:5])

        res = idx.drop_duplicates()
        tm.assert_index_equal(res, base)
        res = Series(idx).drop_duplicates()
        tm.assert_series_equal(res, Series(base))

        res = idx.drop_duplicates(keep="last")
        exp = base[5:].append(base[:5])
        tm.assert_index_equal(res, exp)
        res = Series(idx).drop_duplicates(keep="last")
        tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36)))

        res = idx.drop_duplicates(keep=False)
        tm.assert_index_equal(res, base[5:])
        res = Series(idx).drop_duplicates(keep=False)
        tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31)))

    @pytest.mark.parametrize(
        "freq",
        [
            "A",
            "2A",
            "-2A",
            "Q",
            "-1Q",
            "M",
            "-1M",
            "D",
            "3D",
            "-3D",
            "W",
            "-1W",
            "H",
            "2H",
            "-2H",
            "T",
            "2T",
            "S",
            "-3S",
        ],
    )
    def test_infer_freq(self, freq):
        # GH 11018
        idx = pd.date_range("2011-01-01 09:00:00", freq=freq, periods=10)
        result = pd.DatetimeIndex(idx.asi8, freq="infer")
        tm.assert_index_equal(idx, result)
        assert result.freq == freq

    def test_nat(self, tz_naive_fixture):
        tz = tz_naive_fixture
        assert pd.DatetimeIndex._na_value is pd.NaT
        assert pd.DatetimeIndex([])._na_value is pd.NaT

        idx = pd.DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz)
        assert idx._can_hold_na

        tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
        assert idx.hasnans is False
        tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp))

        idx = pd.DatetimeIndex(["2011-01-01", "NaT"], tz=tz)
        assert idx._can_hold_na

        tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
        assert idx.hasnans is True
        tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1],
                                                            dtype=np.intp))

    def test_equals(self):
        # GH 13107
        idx = pd.DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"])
        assert idx.equals(idx)
        assert idx.equals(idx.copy())
        assert idx.equals(idx.astype(object))
        assert idx.astype(object).equals(idx)
        assert idx.astype(object).equals(idx.astype(object))
        assert not idx.equals(list(idx))
        assert not idx.equals(pd.Series(idx))

        idx2 = pd.DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"],
                                tz="US/Pacific")
        assert not idx.equals(idx2)
        assert not idx.equals(idx2.copy())
        assert not idx.equals(idx2.astype(object))
        assert not idx.astype(object).equals(idx2)
        assert not idx.equals(list(idx2))
        assert not idx.equals(pd.Series(idx2))

        # same internal, different tz
        idx3 = pd.DatetimeIndex._simple_new(idx.asi8, tz="US/Pacific")
        tm.assert_numpy_array_equal(idx.asi8, idx3.asi8)
        assert not idx.equals(idx3)
        assert not idx.equals(idx3.copy())
        assert not idx.equals(idx3.astype(object))
        assert not idx.astype(object).equals(idx3)
        assert not idx.equals(list(idx3))
        assert not idx.equals(pd.Series(idx3))

        # check that we do not raise when comparing with OutOfBounds objects
        oob = pd.Index([datetime(2500, 1, 1)] * 3, dtype=object)
        assert not idx.equals(oob)
        assert not idx2.equals(oob)
        assert not idx3.equals(oob)

        # check that we do not raise when comparing with OutOfBounds dt64
        oob2 = oob.map(np.datetime64)
        assert not idx.equals(oob2)
        assert not idx2.equals(oob2)
        assert not idx3.equals(oob2)

    @pytest.mark.parametrize("values",
                             [["20180101", "20180103", "20180105"], []])
    @pytest.mark.parametrize(
        "freq", ["2D", Day(2), "2B",
                 BDay(2), "48H", Hour(48)])
    @pytest.mark.parametrize("tz", [None, "US/Eastern"])
    def test_freq_setter(self, values, freq, tz):
        # GH 20678
        idx = DatetimeIndex(values, tz=tz)

        # can set to an offset, converting from string if necessary
        idx._data.freq = freq
        assert idx.freq == freq
        assert isinstance(idx.freq, ABCDateOffset)

        # can reset to None
        idx._data.freq = None
        assert idx.freq is None

    def test_freq_setter_errors(self):
        # GH 20678
        idx = DatetimeIndex(["20180101", "20180103", "20180105"])

        # setting with an incompatible freq
        msg = ("Inferred frequency 2D from passed values does not conform to "
               "passed frequency 5D")
        with pytest.raises(ValueError, match=msg):
            idx._data.freq = "5D"

        # setting with non-freq string
        with pytest.raises(ValueError, match="Invalid frequency"):
            idx._data.freq = "foo"
Exemplo n.º 11
0
    def get_delivery_date_from_horizon_date(self,
                                            horizon_date,
                                            tenor,
                                            cal=None,
                                            asset_class='fx'):
        if 'fx' in asset_class:
            tenor_unit = ''.join(re.compile(r'\D+').findall(tenor))
            asset_holidays = self.get_holidays(cal=cal)

            if tenor_unit == 'ON':
                return horizon_date + CustomBusinessDay(
                    n=1, holidays=asset_holidays)
            elif tenor_unit == 'TN':
                return horizon_date + CustomBusinessDay(
                    n=2, holidays=asset_holidays)
            elif tenor_unit == 'SP':
                pass
            elif tenor_unit == 'SN':
                tenor_unit = 'D'
                tenor_digit = 1
            else:
                tenor_digit = int(''.join(re.compile(r'\d+').findall(tenor)))

            horizon_date = self.get_spot_date_from_horizon_date(
                horizon_date, cal, asset_holidays=asset_holidays)

            if 'SP' in tenor_unit:
                return horizon_date
            elif tenor_unit == 'D':
                return horizon_date + CustomBusinessDay(
                    n=tenor_digit, holidays=asset_holidays)
            elif tenor_unit == 'W':
                return horizon_date + Day(n=tenor_digit *
                                          7) + CustomBusinessDay(
                                              n=0, holidays=asset_holidays)
            else:
                if tenor_unit == 'Y':
                    tenor_digit = tenor_digit * 12

                horizon_period_end = horizon_date + CustomBusinessMonthEnd(
                    tenor_digit + 1)
                horizon_floating = horizon_date + DateOffset(
                    months=tenor_digit)

                cbd = CustomBusinessDay(n=1, holidays=asset_holidays)

                delivery_date = []

                if isinstance(horizon_period_end, pd.Timestamp):
                    horizon_period_end = [horizon_period_end]

                if isinstance(horizon_floating, pd.Timestamp):
                    horizon_floating = [horizon_floating]

                for period_end, floating in zip(horizon_period_end,
                                                horizon_floating):
                    if floating < period_end:
                        delivery_date.append(floating - cbd + cbd)
                    else:
                        delivery_date.append(period_end)

                return pd.DatetimeIndex(delivery_date)
Exemplo n.º 12
0
    # Convert str to floats
    for col in sales_df.columns:
        sales_df[col] = sales_df[col].values.astype(float)

    # Add total columsn
    sales_df['monthly_avg'] = round(
        sales_df.iloc[:, :].sum(axis=1) / len(years), 2)

    return sales_df, months, years


if __name__ == '__main__':

    # Set up date
    today = dt.datetime.today()
    today_prev = today - Day(30)
    today_str = today.strftime('%Y%m')
    today_str_prev = today_prev.strftime('%Y%m')

    # Prepare data
    url = create_url(today_str_prev)
    html = read_webpage(url)
    data = prepare_data(html)

    # Print
    data[0]

    # Plot 2020
    fig = px.bar(data[0],
                 x=data[1],
                 y='2020',
    christmas_eve,
    corpus_christi,
    european_labour_day,
    immaculate_conception,
    new_years_day,
    new_years_eve,
)
from .exchange_calendar import WEEKDAYS, HolidayCalendar, ExchangeCalendar

NewYearsDay = new_years_day()

Carnival = Holiday(
    "Carnival",
    month=1,
    day=1,
    offset=[Easter(), Day(-47)],
    end_date="2003",
)
CorpusChristi = corpus_christi(end_date="2003")

LibertyDay = Holiday(
    "Liberty Day",
    month=4,
    day=25,
    end_date="2003",
)

LabourDay = european_labour_day()

PortugalDay = Holiday(
    "Portugal Day",
Exemplo n.º 14
0
print(ts.shift(2))
print(ts.shift(-2))
# shift通常用于计算一个时间序列或多个时间序列(如DataFrame的列)中的百分比变化。可以这样表达
print(ts / ts.shift(1) - 1)
# 由于单纯的移位操作不会修改索引,所以部分数据会被丢弃。因此,如果频率已知,
# 则可以将其传给shift以便实现对时间戳进行位移而不是对数据进行简单位移
print(ts.shift(2, freq='M'))
# 这里还可以使用其他频率,于是你就能非常灵活地对数据进行超前和滞后处理了
print(ts.shift(3, freq='D'))
print(ts.shift(1, freq='90T'))

# pandas的日期偏移量还可以用在datetime或Timestamp对象上
from pandas.tseries.offsets import Day, MonthEnd
now = datetime(2011, 11, 17)
print(now)
print(now + 3 * Day())
# 如果加的是锚点偏移量(比如MonthEnd),第一次增量会将原日期向前滚动到符合频率规则的下一个日期
print(now + MonthEnd())
print(now + MonthEnd(2))
# 通过锚点偏移量的rollforward和rollback方法,可明确地将日期向前或向后“滚动”
offset = MonthEnd()
print(offset.rollforward(now))
print(offset.rollback(now))
# 日期偏移量还有一个巧妙的用法,即结合groupby使用这两个“滚动”方法
ts = pd.Series(np.random.randn(20),
               index=pd.date_range('1/15/2000', periods=20, freq='4d'))
print(ts)
print(ts.groupby(offset.rollforward).mean())
# 当然,更简单、更快速地实现该功能的办法是使用resample
print(ts.resample('M').mean())
Exemplo n.º 15
0
try:
    cday = CDay()
except NotImplementedError:
    cday = None

#: cache of previously seen offsets
_offset_map = {}


def get_period_alias(offset_str):
    """ alias to closest period strings BQ->Q etc"""
    return _offset_to_period_map.get(offset_str, None)


_name_to_offset_map = {
    'days': Day(1),
    'hours': Hour(1),
    'minutes': Minute(1),
    'seconds': Second(1),
    'milliseconds': Milli(1),
    'microseconds': Micro(1),
    'nanoseconds': Nano(1)
}


def to_offset(freq):
    """
    Return DateOffset object from string or tuple representation
    or datetime.timedelta object

    Parameters
Exemplo n.º 16
0
def add_day(old_date,n=1):
    new_date=pd.to_datetime(old_date)+n*Day()
    return new_date.strftime('%Y-%m-%d %H')
Exemplo n.º 17
0
Arquivo: pd_adv.py Projeto: ljldgup/ml
# mode 众数
groups.transform(lambda x: x.mode())

df.groupby(['A', 'B'])['C'].quantile(0.9)

# 时间偏移
pd.date_range('2000-01-01', '2000-01-03', freq=Hour(12))
Hour(2) + Minute(30)
ts = pd.Series(np.random.randn(5), pd.date_range('2000-01-01', '2000-01-03', freq=Hour(12)))

# 注意这里直接移动的是作为时间的index
ts.shift(2, freq=Hour(12))

now = datetime(2020, 3, 1)
now + 3 * Day()
now + MonthEnd()
now + MonthEnd(2)

index = pd.date_range('2000-01-01', '2000-03-03', freq=Day(4))
offset = MonthEnd()
ts = pd.Series(np.random.randn(len(index)), index)
ts.groupby(offset.rollforward).count()
ts.groupby(offset.rollback).count()

# 按频率提取
ts.asfreq('W', how='start')
ts.asfreq('W', how='end')

# 根据时间区间进行重采样,提取平均
ts.resample('M')
Exemplo n.º 18
0
    ascension_day,
    whit_monday,
    christmas_eve,
    christmas,
    boxing_day,
    new_years_eve,
)

NewYearsDay = new_years_day()

MaundyThursday = maundy_thursday()
GeneralPrayerDay = Holiday(
    'General Prayer Day',
    month=1,
    day=1,
    offset=[Easter(), Day(26)],
)
AscensionDay = ascension_day()
BankHoliday = Holiday(
    'Bank Holiday',
    month=1,
    day=1,
    offset=[Easter(), Day(40)],
    start_date='2009',
)
WhitMonday = whit_monday()

ConstitutionDay = Holiday('Constitution Day', month=6, day=5)

ChristmasEve = christmas_eve()
Christmas = christmas()
Exemplo n.º 19
0
class TestDatetimeIndexOps:
    def test_ops_properties_basic(self, datetime_series):

        # sanity check that the behavior didn't change
        # GH#7206
        for op in ["year", "day", "second", "weekday"]:
            msg = f"'Series' object has no attribute '{op}'"
            with pytest.raises(AttributeError, match=msg):
                getattr(datetime_series, op)

        # attribute access should still work!
        s = Series({"year": 2000, "month": 1, "day": 10})
        assert s.year == 2000
        assert s.month == 1
        assert s.day == 10
        msg = "'Series' object has no attribute 'weekday'"
        with pytest.raises(AttributeError, match=msg):
            s.weekday

    def test_repeat_range(self, tz_naive_fixture):
        tz = tz_naive_fixture
        rng = date_range("1/1/2000", "1/1/2001")

        result = rng.repeat(5)
        assert result.freq is None
        assert len(result) == 5 * len(rng)

        index = date_range("2001-01-01", periods=2, freq="D", tz=tz)
        exp = DatetimeIndex(
            ["2001-01-01", "2001-01-01", "2001-01-02", "2001-01-02"], tz=tz)
        for res in [index.repeat(2), np.repeat(index, 2)]:
            tm.assert_index_equal(res, exp)
            assert res.freq is None

        index = date_range("2001-01-01", periods=2, freq="2D", tz=tz)
        exp = DatetimeIndex(
            ["2001-01-01", "2001-01-01", "2001-01-03", "2001-01-03"], tz=tz)
        for res in [index.repeat(2), np.repeat(index, 2)]:
            tm.assert_index_equal(res, exp)
            assert res.freq is None

        index = DatetimeIndex(["2001-01-01", "NaT", "2003-01-01"], tz=tz)
        exp = DatetimeIndex(
            [
                "2001-01-01",
                "2001-01-01",
                "2001-01-01",
                "NaT",
                "NaT",
                "NaT",
                "2003-01-01",
                "2003-01-01",
                "2003-01-01",
            ],
            tz=tz,
        )
        for res in [index.repeat(3), np.repeat(index, 3)]:
            tm.assert_index_equal(res, exp)
            assert res.freq is None

    def test_repeat(self, tz_naive_fixture):
        tz = tz_naive_fixture
        reps = 2
        msg = "the 'axis' parameter is not supported"

        rng = date_range(start="2016-01-01", periods=2, freq="30Min", tz=tz)

        expected_rng = DatetimeIndex([
            Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"),
            Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"),
            Timestamp("2016-01-01 00:30:00", tz=tz, freq="30T"),
            Timestamp("2016-01-01 00:30:00", tz=tz, freq="30T"),
        ])

        res = rng.repeat(reps)
        tm.assert_index_equal(res, expected_rng)
        assert res.freq is None

        tm.assert_index_equal(np.repeat(rng, reps), expected_rng)
        with pytest.raises(ValueError, match=msg):
            np.repeat(rng, reps, axis=1)

    @pytest.mark.parametrize(
        "freq,expected",
        [
            ("A", "day"),
            ("Q", "day"),
            ("M", "day"),
            ("D", "day"),
            ("H", "hour"),
            ("T", "minute"),
            ("S", "second"),
            ("L", "millisecond"),
            ("U", "microsecond"),
        ],
    )
    def test_resolution(self, tz_naive_fixture, freq, expected):
        tz = tz_naive_fixture
        if freq == "A" and not IS64 and isinstance(tz, tzlocal):
            pytest.xfail(reason="OverflowError inside tzlocal past 2038")

        idx = date_range(start="2013-04-01", periods=30, freq=freq, tz=tz)
        assert idx.resolution == expected

    def test_value_counts_unique(self, tz_naive_fixture):
        tz = tz_naive_fixture
        # GH 7735
        idx = date_range("2011-01-01 09:00", freq="H", periods=10)
        # create repeated values, 'n'th element is repeated by n+1 times
        idx = DatetimeIndex(np.repeat(idx.values, range(1,
                                                        len(idx) + 1)),
                            tz=tz)

        exp_idx = date_range("2011-01-01 18:00", freq="-1H", periods=10, tz=tz)
        expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64")
        expected.index = expected.index._with_freq(None)

        for obj in [idx, Series(idx)]:

            tm.assert_series_equal(obj.value_counts(), expected)

        expected = date_range("2011-01-01 09:00", freq="H", periods=10, tz=tz)
        expected = expected._with_freq(None)
        tm.assert_index_equal(idx.unique(), expected)

        idx = DatetimeIndex(
            [
                "2013-01-01 09:00",
                "2013-01-01 09:00",
                "2013-01-01 09:00",
                "2013-01-01 08:00",
                "2013-01-01 08:00",
                pd.NaT,
            ],
            tz=tz,
        )

        exp_idx = DatetimeIndex(["2013-01-01 09:00", "2013-01-01 08:00"],
                                tz=tz)
        expected = Series([3, 2], index=exp_idx)

        for obj in [idx, Series(idx)]:
            tm.assert_series_equal(obj.value_counts(), expected)

        exp_idx = DatetimeIndex(
            ["2013-01-01 09:00", "2013-01-01 08:00", pd.NaT], tz=tz)
        expected = Series([3, 2, 1], index=exp_idx)

        for obj in [idx, Series(idx)]:
            tm.assert_series_equal(obj.value_counts(dropna=False), expected)

        tm.assert_index_equal(idx.unique(), exp_idx)

    @pytest.mark.parametrize(
        "idx",
        [
            DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"],
                          freq="D",
                          name="idx"),
            DatetimeIndex(
                ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"],
                freq="H",
                name="tzidx",
                tz="Asia/Tokyo",
            ),
        ],
    )
    def test_order_with_freq(self, idx):
        ordered = idx.sort_values()
        tm.assert_index_equal(ordered, idx)
        assert ordered.freq == idx.freq

        ordered = idx.sort_values(ascending=False)
        expected = idx[::-1]
        tm.assert_index_equal(ordered, expected)
        assert ordered.freq == expected.freq
        assert ordered.freq.n == -1

        ordered, indexer = idx.sort_values(return_indexer=True)
        tm.assert_index_equal(ordered, idx)
        tm.assert_numpy_array_equal(indexer,
                                    np.array([0, 1, 2]),
                                    check_dtype=False)
        assert ordered.freq == idx.freq

        ordered, indexer = idx.sort_values(return_indexer=True,
                                           ascending=False)
        expected = idx[::-1]
        tm.assert_index_equal(ordered, expected)
        tm.assert_numpy_array_equal(indexer,
                                    np.array([2, 1, 0]),
                                    check_dtype=False)
        assert ordered.freq == expected.freq
        assert ordered.freq.n == -1

    @pytest.mark.parametrize(
        "index_dates,expected_dates",
        [
            (
                [
                    "2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02",
                    "2011-01-01"
                ],
                [
                    "2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03",
                    "2011-01-05"
                ],
            ),
            (
                [
                    "2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02",
                    "2011-01-01"
                ],
                [
                    "2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03",
                    "2011-01-05"
                ],
            ),
            (
                [pd.NaT, "2011-01-03", "2011-01-05", "2011-01-02", pd.NaT],
                [pd.NaT, pd.NaT, "2011-01-02", "2011-01-03", "2011-01-05"],
            ),
        ],
    )
    def test_order_without_freq(self, index_dates, expected_dates,
                                tz_naive_fixture):
        tz = tz_naive_fixture

        # without freq
        index = DatetimeIndex(index_dates, tz=tz, name="idx")
        expected = DatetimeIndex(expected_dates, tz=tz, name="idx")

        ordered = index.sort_values(na_position="first")
        tm.assert_index_equal(ordered, expected)
        assert ordered.freq is None

        ordered = index.sort_values(ascending=False)
        tm.assert_index_equal(ordered, expected[::-1])
        assert ordered.freq is None

        ordered, indexer = index.sort_values(return_indexer=True,
                                             na_position="first")
        tm.assert_index_equal(ordered, expected)

        exp = np.array([0, 4, 3, 1, 2])
        tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
        assert ordered.freq is None

        ordered, indexer = index.sort_values(return_indexer=True,
                                             ascending=False)
        tm.assert_index_equal(ordered, expected[::-1])

        exp = np.array([2, 1, 3, 0, 4])
        tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
        assert ordered.freq is None

    def test_drop_duplicates_metadata(self, freq_sample):
        # GH 10115
        idx = date_range("2011-01-01",
                         freq=freq_sample,
                         periods=10,
                         name="idx")
        result = idx.drop_duplicates()
        tm.assert_index_equal(idx, result)
        assert idx.freq == result.freq

        idx_dup = idx.append(idx)
        assert idx_dup.freq is None  # freq is reset
        result = idx_dup.drop_duplicates()
        expected = idx._with_freq(None)
        tm.assert_index_equal(result, expected)
        assert result.freq is None

    @pytest.mark.parametrize(
        "keep, expected, index",
        [
            ("first", np.concatenate(
                ([False] * 10, [True] * 5)), np.arange(0, 10)),
            ("last", np.concatenate(
                ([True] * 5, [False] * 10)), np.arange(5, 15)),
            (
                False,
                np.concatenate(([True] * 5, [False] * 5, [True] * 5)),
                np.arange(5, 10),
            ),
        ],
    )
    def test_drop_duplicates(self, freq_sample, keep, expected, index):
        # to check Index/Series compat
        idx = date_range("2011-01-01",
                         freq=freq_sample,
                         periods=10,
                         name="idx")
        idx = idx.append(idx[:5])

        tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected)
        expected = idx[~expected]

        result = idx.drop_duplicates(keep=keep)
        tm.assert_index_equal(result, expected)

        result = Series(idx).drop_duplicates(keep=keep)
        tm.assert_series_equal(result, Series(expected, index=index))

    def test_infer_freq(self, freq_sample):
        # GH 11018
        idx = date_range("2011-01-01 09:00:00", freq=freq_sample, periods=10)
        result = DatetimeIndex(idx.asi8, freq="infer")
        tm.assert_index_equal(idx, result)
        assert result.freq == freq_sample

    def test_nat(self, tz_naive_fixture):
        tz = tz_naive_fixture
        assert DatetimeIndex._na_value is pd.NaT
        assert DatetimeIndex([])._na_value is pd.NaT

        idx = DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz)
        assert idx._can_hold_na

        tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
        assert idx.hasnans is False
        tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp))

        idx = DatetimeIndex(["2011-01-01", "NaT"], tz=tz)
        assert idx._can_hold_na

        tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
        assert idx.hasnans is True
        tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1],
                                                            dtype=np.intp))

    @pytest.mark.parametrize("values",
                             [["20180101", "20180103", "20180105"], []])
    @pytest.mark.parametrize(
        "freq", ["2D", Day(2), "2B",
                 BDay(2), "48H", Hour(48)])
    @pytest.mark.parametrize("tz", [None, "US/Eastern"])
    def test_freq_setter(self, values, freq, tz):
        # GH 20678
        idx = DatetimeIndex(values, tz=tz)

        # can set to an offset, converting from string if necessary
        idx._data.freq = freq
        assert idx.freq == freq
        assert isinstance(idx.freq, DateOffset)

        # can reset to None
        idx._data.freq = None
        assert idx.freq is None

    def test_freq_setter_errors(self):
        # GH 20678
        idx = DatetimeIndex(["20180101", "20180103", "20180105"])

        # setting with an incompatible freq
        msg = ("Inferred frequency 2D from passed values does not conform to "
               "passed frequency 5D")
        with pytest.raises(ValueError, match=msg):
            idx._data.freq = "5D"

        # setting with non-freq string
        with pytest.raises(ValueError, match="Invalid frequency"):
            idx._data.freq = "foo"

    def test_freq_view_safe(self):
        # Setting the freq for one DatetimeIndex shouldn't alter the freq
        #  for another that views the same area_data

        dti = date_range("2016-01-01", periods=5)
        dta = dti._data

        dti2 = DatetimeIndex(dta)._with_freq(None)
        assert dti2.freq is None

        # Original was not altered
        assert dti.freq == "D"
        assert dta.freq == "D"
Exemplo n.º 20
0

ts = pd.Series(np.random.randn(4),
               index = pd.date_range('2000/1/1',periods = 4,freq = 'M'))
ts.shift(2) #整数为向后移动位数,负数代表向前移动位数,仅移动数据,并不会移动index
ts/ts.shift(1)-1 #时间序列环比写法

ts.shift(2,freq = 'M') #加入freq参数则数据往后填充
ts.shift(2,freq = 'D') #datetimeIndex表示往后推2天
ts.shift(1,freq = '90T') #表示往后推1个90分钟,T代表分钟

#使用偏置进行移位日期

from pandas.tseries.offsets import Day,MonthEnd
now = datetime(2011,11,17)
now+3*Day() #表示往后推3天日期
now+MonthEnd() #表示本月月底日期
now+MonthEnd(2) #表示下月月底日期

offset = MonthEnd()
offset.rollforward(now)
offset.rollback(now)

#将位移方法与groupby一起使用是日期偏置的一种创造性用法
ts = pd.Series(np.random.randn(20),
               index = pd.date_range('2000/1/15',periods = 20,freq = '4d'))
ts.groupby(MonthEnd().rollforward).mean() #按照每月月底进行本月平均值
ts.resample('M').mean()

'''
第四节:时区处理(略)
Exemplo n.º 21
0
    if mult == 1:
        return code
    return str(mult) + code


#----------------------------------------------------------------------
# Offset names ("time rules") and related functions

from pandas.tseries.offsets import (Day, BDay, Hour, Minute, Second, Milli,
                                    Week, Micro, MonthEnd, MonthBegin,
                                    BMonthBegin, BMonthEnd, YearBegin, YearEnd,
                                    BYearBegin, BYearEnd, QuarterBegin,
                                    QuarterEnd, BQuarterBegin, BQuarterEnd)

_offset_map = {
    'D': Day(),
    'B': BDay(),
    'H': Hour(),
    'T': Minute(),
    'S': Second(),
    'L': Milli(),
    'U': Micro(),
    None: None,

    # Monthly - Calendar
    'M': MonthEnd(),
    'MS': MonthBegin(),

    # Monthly - Business
    'BM': BMonthEnd(),
    'BMS': BMonthBegin(),
Exemplo n.º 22
0
            index - pd.Timedelta(days=MAX_WINDOW),
            index + pd.Timedelta(days=MAX_WINDOW),
        )
        assert (len(holiday_date) !=
                0), f"No closest holiday for the date index {index} found."
        # It sometimes returns two dates if it is exactly half a year after the
        # holiday. In this case, the smaller distance (182 days) is returned.
        return (index - holiday_date[0]).days

    return distance_to_day


EasterSunday = Holiday("Easter Sunday",
                       month=1,
                       day=1,
                       offset=[Easter(), Day(0)])
NewYearsDay = Holiday("New Years Day", month=1, day=1)
SuperBowl = Holiday("Superbowl",
                    month=2,
                    day=1,
                    offset=DateOffset(weekday=SU(1)))
MothersDay = Holiday("Mothers Day",
                     month=5,
                     day=1,
                     offset=DateOffset(weekday=SU(2)))
IndependenceDay = Holiday("Independence Day", month=7, day=4)
ChristmasEve = Holiday("Christmas", month=12, day=24)
ChristmasDay = Holiday("Christmas", month=12, day=25)
NewYearsEve = Holiday("New Years Eve", month=12, day=31)
BlackFriday = Holiday(
    "Black Friday",
Exemplo n.º 23
0
def selectstorms(flowserie,
                 rainserie,
                 number_of_storms=3,
                 min_period_in_between=7,
                 search_period=7,
                 drywindow=96):
    """ (pd.DataFrame, pd.DataFrame) -> List
    Easy storm selection process, based on the maximum flows measured
    in the given timeserie of flow measurements.

    To define the startdate of the storm, 24h no rain before the Qmax is
    searched for. The end date is found by checking the
    flow at the startdate (Qbase) and searching the moment after Qmax with
    the same flow within the first 2 weeks.
    If none is found, relaxation (1.1*Qbase; 1.2*Qbase,...)
    until a moment is found.

    Parameters
    ----------
    flowserie : pd.Series
        Pandas Series with the date in the index
    rainserie : pd.Series
        Pandas Series with the date in the index
    number_of_storms : int
        Number of storms you want to select
    min_period_in_between : int (days)
        Minimum number of days in between to selected storms
    search_period : int (days)
        Period to look for the start of the storm, when rain started
    drywindow : int
        Number of timesteps to check for no-rain
    """
    if not isinstance(flowserie, pd.Series):
        raise Exception('flowserie is a single data Series')
    if not isinstance(rainserie, pd.Series):
        raise Exception('rainserie is a single data Series')

    #fill na values with very low (negative) value
    temp = flowserie.fillna(value=-777.).copy()
    #sort the whole array
    try:
        temp = temp.sort(temp.columns.tolist(), ascending=False)
    except:
        temp.sort(ascending=False)

    #find in the index three periods which are at least given number
    # of days from each other
    #after three concurrences, save these dates
    stormmax = [temp.index[0]]  #first element is a selected storm
    i = 1
    while len(stormmax) < number_of_storms:
        #check for each period
        alldif = True
        for stormdate in stormmax:
            if abs(temp.index[i] - stormdate) \
                    < datetime.timedelta(days=min_period_in_between):
                alldif = False
        #if new stormperiod, select
        if alldif:
            stormmax.append(temp.index[i])
        i += 1

    selstorms = []
    for storm in stormmax:
        ##FIND DRY DAY WEEK BEFORE
        #select period before storm (1 week)
        presearchperiod = datetime.timedelta(days=search_period)
        temp1 = rainserie[storm - presearchperiod:storm]
        temp1 = pd.rolling_sum(temp1, window=drywindow, center=False)
        #zero value means the preceding 24hours no rain: so, closest zeros
        #to the date itself -24h are selected
        if rainserie.ndim == 2:
            temp1 = temp1.min(axis=1)
        tempdates = temp1[temp1 < 0.001].index.tolist()
        if len(tempdates) == 0:
            raise Exception('Decrease drywindow period containing no rain.')

        date_arg = np.argmin([abs(times - storm) for times in tempdates])
        startstormdate = tempdates[date_arg] - Day()

        #Get the flow value of the storm and when it is found again + 1 Day
        temp2a = flowserie[startstormdate:startstormdate + Week() * 2]

        #only if multiple columns
        if flowserie.ndim == 2:
            temp2 = temp2a.max(axis=1)
        else:
            temp2 = temp2a

        flowbase = temp2.ix[startstormdate]
        lowerafterstorm = temp2[temp2 < flowbase][storm + Day():]
        if lowerafterstorm.size == 0:
            print 'Lower initial flow not found again...test with mean...'
            if flowserie.ndim == 2:
                temp2 = temp2a.mean(axis=1)
            else:
                temp2 = temp2a
            flowbase = temp2.ix[startstormdate]
            lowerafterstorm = temp2[temp2 < flowbase][storm + Day():]
        cnt = 1
        while lowerafterstorm.size == 0:
            print '...    still not working; relaxing conditions...', \
                cnt*10, '% of minimal after storm incorporated'
            flowbase = flowbase + 0.1 * flowbase
            lowerafterstorm = temp2[temp2 < flowbase][storm + Day():]
            cnt += 1
        endstormdate = lowerafterstorm.index[0]

        #add to selected storms
        selstorms.append({
            'startdate': startstormdate,
            'enddate': endstormdate
        })

    return selstorms
    def _cal_portfolio_returns_between_balancing(self):
        '''
        计算股票组合股票组合日度收益率,速度慢
        '''
        print('_cal_portfolio_returns_between_balancing--1',
              time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())))
        if self._weights == 'MV':
            self._hist_data['weights'] = self._hist_data['market_value']

        used_columns = [] if self._weights == 'EW' else ['weights']
        hist_data = pd.concat(
            [self._hist_data[['returns'] + used_columns], self._group], axis=1)
        gross_returns = hist_data

        returns_date = gross_returns.index.get_level_values('date')
        portfolio_returns_between_balancing = [0] * (
            len(self._rebalance_date) - 1)

        for i in range(len(self._rebalance_date) - 1):
            #The start and end of a period between balancing
            start_date, end_date = self._rebalance_date[
                i], self._rebalance_date[i + 1]
            if i == len(self._rebalance_date) - 2: end_date += Day(1)
            #history data during the period
            returns_between_balancing = gross_returns[
                (returns_date >= start_date) & (returns_date < end_date)]
            returns_between_balancing = (
                returns_between_balancing['returns'].fillna(0) + 1).groupby(
                    'code', group_keys=False).cumprod()
            portfolio_returns_between_balancing[i] = returns_between_balancing

        print('_cal_portfolio_returns_between_balancing--2',
              time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())))

        cum_returns_stocks = pd.concat(
            portfolio_returns_between_balancing).sort_index()
        cum_returns_stocks.name = 'cum_returns'
        cum_returns_stocks = pd.concat(
            [cum_returns_stocks, gross_returns[['group'] + used_columns]],
            axis=1)
        #Calculate the portfolio value if start from 1
        group_data = cum_returns_stocks[['cum_returns', 'group'] +
                                        used_columns].groupby(
                                            ['date', 'group'])
        if self._weights == 'EW':
            cum_returns = group_data.mean()
        else:
            cum_returns = group_data.apply(
                lambda df: np.average(df.cum_returns, weights=df.weights))

        print('_cal_portfolio_returns_between_balancing--3',
              time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())))

        cum_returns = cum_returns.unstack(level='group')
        returns_date = cum_returns.index = cum_returns.index.get_level_values(
            'date')
        for i in range(len(self._rebalance_date) - 1):
            #The start and end of a period between balancing
            start_date, end_date = self._rebalance_date[
                i], self._rebalance_date[i + 1]
            if i == len(self._rebalance_date) - 2: end_date += Day(1)

            cum_returns_between_balancing = cum_returns[
                (returns_date >= start_date) & (returns_date < end_date)]
            returns_between_balancing = cum_returns_between_balancing.pct_change(
            )
            if len(cum_returns_between_balancing) != 0:
                returns_between_balancing.iloc[
                    0] = cum_returns_between_balancing.iloc[0] - 1

            portfolio_returns_between_balancing[i] = returns_between_balancing

        print('_cal_portfolio_returns_between_balancing--4',
              time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())))

        return pd.concat(portfolio_returns_between_balancing).sort_index()
Exemplo n.º 25
0
def test_Day_equals_24_Hours():
    ts = Timestamp('2016-10-30 00:00:00+0300', tz='Europe/Helsinki')
    result = ts + Day(1)
    expected = ts + Hour(24)
    assert result == expected
Exemplo n.º 26
0
ts.shift(-2)

# ts / ts.shift(1) - 1
#%%
ts.shift(2, freq='M')

#%%
ts.shift(3, freq='D')
ts.shift(1, freq='90T')

# #### Shifting dates with offsets
#%%
from pandas.tseries.offsets import Day, MonthEnd

now = datetime(2011, 11, 17)
now + 3 * Day()

#%%
now + MonthEnd()
now + MonthEnd(2)

#%%
offset = MonthEnd()
offset.rollforward(now)
offset.rollback(now)

#%%
ts = pd.Series(np.random.randn(20),
               index=pd.date_range('1/15/2000', periods=20, freq='4d'))
ts
ts.groupby(offset.rollforward).mean()
        extras.append(dt[dt.weekday == THURSDAY] + timedelta(1))
    return dt.append(extras)


NewYearsDay = new_years_day(observance=four_day_weekend)

NationalHoliday1 = Holiday("National Day",
                           month=3,
                           day=15,
                           observance=four_day_weekend)

# Need custom start year so can't use pandas GoodFriday
GoodFriday = Holiday("Good Friday",
                     month=1,
                     day=1,
                     offset=[Easter(), Day(-2)],
                     start_date="2012")

LabourDay = european_labour_day(observance=four_day_weekend)

WhitMonday = whit_monday()

StStephensDay = Holiday(
    "St. Stephen's Day",
    month=8,
    day=20,
    observance=four_day_weekend,
)

NationalHoliday2 = Holiday(
    "National Day",
Exemplo n.º 28
0
def compute_forward_returns(factor_idx,
                            prices,
                            periods=(1, 5, 10),
                            filter_zscore=None):
    """
    Finds the N period forward returns (as percent change) for each asset
    provided.

    Parameters
    ----------
    factor_idx : pd.DatetimeIndex
        The factor datetimes for which we are computing the forward returns
    prices : pd.DataFrame
        Pricing data to use in forward price calculation.
        Assets as columns, dates as index. Pricing data must
        span the factor analysis time period plus an additional buffer window
        that is greater than the maximum number of expected periods
        in the forward returns calculations.
    periods : sequence[int]
        periods to compute forward returns on.
    filter_zscore : int or float, optional
        Sets forward returns greater than X standard deviations
        from the the mean to nan. Set it to 'None' to avoid filtering.
        Caution: this outlier filtering incorporates lookahead bias.

    Returns
    -------
    forward_returns : pd.DataFrame - MultiIndex
        Forward returns in indexed by date and asset.
        Separate column for each forward return window.
    """

    factor_idx = factor_idx.intersection(prices.index)

    forward_returns = pd.DataFrame(index=pd.MultiIndex.from_product(
        [factor_idx, prices.columns], names=['date', 'asset']))

    custom_calendar = False

    for period in periods:

        #
        # build forward returns
        #
        delta = prices.pct_change(period).shift(-period).reindex(factor_idx)

        if filter_zscore is not None:
            mask = abs(delta - delta.mean()) > (filter_zscore * delta.std())
            delta[mask] = np.nan

        #
        # if the period length is not consistent across the factor index then
        # it must be a trading/business day calendar
        #
        time_diffs = prices.index.to_series().diff(period)
        time_diffs = time_diffs.reindex(factor_idx)
        if time_diffs.min() != time_diffs.max():
            custom_calendar = True

        #
        # find the period length that will be the column name
        #
        p_idx = prices.index.get_loc(delta.index[0])
        period_len = prices.index[p_idx + period] - prices.index[p_idx]

        #
        # use business days as an approximation to trading calendar
        #
        if custom_calendar and period_len.components.days > 0:
            entries_to_test = min(50, len(delta.index) - period)
            days_diffs = []
            for i in range(entries_to_test):
                p_idx = prices.index.get_loc(delta.index[i])
                days = len(
                    pd.bdate_range(prices.index[p_idx],
                                   prices.index[p_idx + period])) - 1
                days_diffs.append(days)

            delta_days = period_len.components.days - mode(days_diffs).mode[0]
            period_len -= pd.Timedelta(days=delta_days)

        column_name = timedelta_to_string(period_len)
        forward_returns[column_name] = delta.stack()

    forward_returns.index = forward_returns.index.rename(['date', 'asset'])

    # use business days as an approximation to trading calendar, if this will
    # be proven to be a poor approximation then we could build a pandas
    # AbstractHolidayCalendar inferring non-trading days from price DataFrame
    # and use it to build a CustomBusinessDay DateOffset that we can finally
    # set it as index 'freq'
    freq = BDay() if custom_calendar else Day()
    forward_returns.index.levels[0].freq = freq

    return forward_returns
Exemplo n.º 29
0
from django.conf import settings
from pandas.tseries.offsets import YearBegin, QuarterBegin, MonthBegin, Day

# Transformaciones
VALUE = 'value'
CHANGE = 'change'
PCT_CHANGE = 'percent_change'
CHANGE_YEAR_AGO = 'change_a_year_ago'
PCT_CHANGE_YEAR_AGO = 'percent_change_a_year_ago'

# Pandas freqs
PANDAS_YEAR = YearBegin()
PANDAS_SEMESTER = MonthBegin(6)
PANDAS_QUARTER = QuarterBegin(startingMonth=1)
PANDAS_MONTH = MonthBegin()
PANDAS_WEEK = Day(7)
PANDAS_DAY = Day()

# Frecuencias *en orden* de mayor a menor
PANDAS_FREQS = [
    PANDAS_YEAR, PANDAS_SEMESTER, PANDAS_QUARTER, PANDAS_MONTH, PANDAS_WEEK,
    PANDAS_DAY
]

IDENTIFIER = "identifier"
DATASET_IDENTIFIER = "dataset_identifier"
DOWNLOAD_URL = "downloadURL"

DATASET = 'dataset'
DISTRIBUTION = 'distribution'
FIELD = 'field'
Exemplo n.º 30
0
class TestTimedeltaIndexOps(Ops):
    def setup_method(self, method):
        super().setup_method(method)
        mask = lambda x: isinstance(x, TimedeltaIndex)
        self.is_valid_objs = [o for o in self.objs if mask(o)]
        self.not_valid_objs = []

    def test_ops_properties(self):
        f = lambda x: isinstance(x, TimedeltaIndex)
        self.check_ops_properties(TimedeltaIndex._field_ops, f)
        self.check_ops_properties(TimedeltaIndex._object_ops, f)

    def test_value_counts_unique(self):
        # GH 7735

        idx = timedelta_range("1 days 09:00:00", freq="H", periods=10)
        # create repeated values, 'n'th element is repeated by n+1 times
        idx = TimedeltaIndex(np.repeat(idx.values, range(1, len(idx) + 1)))

        exp_idx = timedelta_range("1 days 18:00:00", freq="-1H", periods=10)
        expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64")

        for obj in [idx, Series(idx)]:
            tm.assert_series_equal(obj.value_counts(), expected)

        expected = timedelta_range("1 days 09:00:00", freq="H", periods=10)
        tm.assert_index_equal(idx.unique(), expected)

        idx = TimedeltaIndex([
            "1 days 09:00:00",
            "1 days 09:00:00",
            "1 days 09:00:00",
            "1 days 08:00:00",
            "1 days 08:00:00",
            pd.NaT,
        ])

        exp_idx = TimedeltaIndex(["1 days 09:00:00", "1 days 08:00:00"])
        expected = Series([3, 2], index=exp_idx)

        for obj in [idx, Series(idx)]:
            tm.assert_series_equal(obj.value_counts(), expected)

        exp_idx = TimedeltaIndex(
            ["1 days 09:00:00", "1 days 08:00:00", pd.NaT])
        expected = Series([3, 2, 1], index=exp_idx)

        for obj in [idx, Series(idx)]:
            tm.assert_series_equal(obj.value_counts(dropna=False), expected)

        tm.assert_index_equal(idx.unique(), exp_idx)

    def test_nonunique_contains(self):
        # GH 9512
        for idx in map(
                TimedeltaIndex,
            (
                [0, 1, 0],
                [0, 0, -1],
                [0, -1, -1],
                ["00:01:00", "00:01:00", "00:02:00"],
                ["00:01:00", "00:01:00", "00:00:01"],
            ),
        ):
            assert idx[0] in idx

    def test_unknown_attribute(self):
        # see gh-9680
        tdi = pd.timedelta_range(start=0, periods=10, freq="1s")
        ts = pd.Series(np.random.normal(size=10), index=tdi)
        assert "foo" not in ts.__dict__.keys()
        msg = "'Series' object has no attribute 'foo'"
        with pytest.raises(AttributeError, match=msg):
            ts.foo

    def test_order(self):
        # GH 10295
        idx1 = TimedeltaIndex(["1 day", "2 day", "3 day"],
                              freq="D",
                              name="idx")
        idx2 = TimedeltaIndex(["1 hour", "2 hour", "3 hour"],
                              freq="H",
                              name="idx")

        for idx in [idx1, idx2]:
            ordered = idx.sort_values()
            tm.assert_index_equal(ordered, idx)
            assert ordered.freq == idx.freq

            ordered = idx.sort_values(ascending=False)
            expected = idx[::-1]
            tm.assert_index_equal(ordered, expected)
            assert ordered.freq == expected.freq
            assert ordered.freq.n == -1

            ordered, indexer = idx.sort_values(return_indexer=True)
            tm.assert_index_equal(ordered, idx)
            tm.assert_numpy_array_equal(indexer,
                                        np.array([0, 1, 2]),
                                        check_dtype=False)
            assert ordered.freq == idx.freq

            ordered, indexer = idx.sort_values(return_indexer=True,
                                               ascending=False)
            tm.assert_index_equal(ordered, idx[::-1])
            assert ordered.freq == expected.freq
            assert ordered.freq.n == -1

        idx1 = TimedeltaIndex(
            ["1 hour", "3 hour", "5 hour", "2 hour ", "1 hour"], name="idx1")
        exp1 = TimedeltaIndex(
            ["1 hour", "1 hour", "2 hour", "3 hour", "5 hour"], name="idx1")

        idx2 = TimedeltaIndex(["1 day", "3 day", "5 day", "2 day", "1 day"],
                              name="idx2")

        # TODO(wesm): unused?
        # exp2 = TimedeltaIndex(['1 day', '1 day', '2 day',
        #                        '3 day', '5 day'], name='idx2')

        # idx3 = TimedeltaIndex([pd.NaT, '3 minute', '5 minute',
        #                        '2 minute', pd.NaT], name='idx3')
        # exp3 = TimedeltaIndex([pd.NaT, pd.NaT, '2 minute', '3 minute',
        #                        '5 minute'], name='idx3')

        for idx, expected in [(idx1, exp1), (idx1, exp1), (idx1, exp1)]:
            ordered = idx.sort_values()
            tm.assert_index_equal(ordered, expected)
            assert ordered.freq is None

            ordered = idx.sort_values(ascending=False)
            tm.assert_index_equal(ordered, expected[::-1])
            assert ordered.freq is None

            ordered, indexer = idx.sort_values(return_indexer=True)
            tm.assert_index_equal(ordered, expected)

            exp = np.array([0, 4, 3, 1, 2])
            tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
            assert ordered.freq is None

            ordered, indexer = idx.sort_values(return_indexer=True,
                                               ascending=False)
            tm.assert_index_equal(ordered, expected[::-1])

            exp = np.array([2, 1, 3, 4, 0])
            tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
            assert ordered.freq is None

    def test_drop_duplicates_metadata(self):
        # GH 10115
        idx = pd.timedelta_range("1 day", "31 day", freq="D", name="idx")
        result = idx.drop_duplicates()
        tm.assert_index_equal(idx, result)
        assert idx.freq == result.freq

        idx_dup = idx.append(idx)
        assert idx_dup.freq is None  # freq is reset
        result = idx_dup.drop_duplicates()
        tm.assert_index_equal(idx, result)
        assert result.freq is None

    def test_drop_duplicates(self):
        # to check Index/Series compat
        base = pd.timedelta_range("1 day", "31 day", freq="D", name="idx")
        idx = base.append(base[:5])

        res = idx.drop_duplicates()
        tm.assert_index_equal(res, base)
        res = Series(idx).drop_duplicates()
        tm.assert_series_equal(res, Series(base))

        res = idx.drop_duplicates(keep="last")
        exp = base[5:].append(base[:5])
        tm.assert_index_equal(res, exp)
        res = Series(idx).drop_duplicates(keep="last")
        tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36)))

        res = idx.drop_duplicates(keep=False)
        tm.assert_index_equal(res, base[5:])
        res = Series(idx).drop_duplicates(keep=False)
        tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31)))

    @pytest.mark.parametrize(
        "freq", ["D", "3D", "-3D", "H", "2H", "-2H", "T", "2T", "S", "-3S"])
    def test_infer_freq(self, freq):
        # GH#11018
        idx = pd.timedelta_range("1", freq=freq, periods=10)
        result = pd.TimedeltaIndex(idx.asi8, freq="infer")
        tm.assert_index_equal(idx, result)
        assert result.freq == freq

    def test_shift(self):
        pass  # handled in test_arithmetic.py

    def test_repeat(self):
        index = pd.timedelta_range("1 days", periods=2, freq="D")
        exp = pd.TimedeltaIndex(["1 days", "1 days", "2 days", "2 days"])
        for res in [index.repeat(2), np.repeat(index, 2)]:
            tm.assert_index_equal(res, exp)
            assert res.freq is None

        index = TimedeltaIndex(["1 days", "NaT", "3 days"])
        exp = TimedeltaIndex([
            "1 days",
            "1 days",
            "1 days",
            "NaT",
            "NaT",
            "NaT",
            "3 days",
            "3 days",
            "3 days",
        ])
        for res in [index.repeat(3), np.repeat(index, 3)]:
            tm.assert_index_equal(res, exp)
            assert res.freq is None

    def test_nat(self):
        assert pd.TimedeltaIndex._na_value is pd.NaT
        assert pd.TimedeltaIndex([])._na_value is pd.NaT

        idx = pd.TimedeltaIndex(["1 days", "2 days"])
        assert idx._can_hold_na

        tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
        assert idx.hasnans is False
        tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp))

        idx = pd.TimedeltaIndex(["1 days", "NaT"])
        assert idx._can_hold_na

        tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
        assert idx.hasnans is True
        tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1],
                                                            dtype=np.intp))

    def test_equals(self):
        # GH 13107
        idx = pd.TimedeltaIndex(["1 days", "2 days", "NaT"])
        assert idx.equals(idx)
        assert idx.equals(idx.copy())
        assert idx.equals(idx.astype(object))
        assert idx.astype(object).equals(idx)
        assert idx.astype(object).equals(idx.astype(object))
        assert not idx.equals(list(idx))
        assert not idx.equals(pd.Series(idx))

        idx2 = pd.TimedeltaIndex(["2 days", "1 days", "NaT"])
        assert not idx.equals(idx2)
        assert not idx.equals(idx2.copy())
        assert not idx.equals(idx2.astype(object))
        assert not idx.astype(object).equals(idx2)
        assert not idx.astype(object).equals(idx2.astype(object))
        assert not idx.equals(list(idx2))
        assert not idx.equals(pd.Series(idx2))

        # Check that we dont raise OverflowError on comparisons outside the
        #  implementation range
        oob = pd.Index([timedelta(days=10**6)] * 3, dtype=object)
        assert not idx.equals(oob)
        assert not idx2.equals(oob)

        # FIXME: oob.apply(np.timedelta64) incorrectly overflows
        oob2 = pd.Index([np.timedelta64(x) for x in oob], dtype=object)
        assert not idx.equals(oob2)
        assert not idx2.equals(oob2)

    @pytest.mark.parametrize("values", [["0 days", "2 days", "4 days"], []])
    @pytest.mark.parametrize("freq", ["2D", Day(2), "48H", Hour(48)])
    def test_freq_setter(self, values, freq):
        # GH 20678
        idx = TimedeltaIndex(values)

        # can set to an offset, converting from string if necessary
        idx._data.freq = freq
        assert idx.freq == freq
        assert isinstance(idx.freq, ABCDateOffset)

        # can reset to None
        idx._data.freq = None
        assert idx.freq is None

    def test_freq_setter_errors(self):
        # GH 20678
        idx = TimedeltaIndex(["0 days", "2 days", "4 days"])

        # setting with an incompatible freq
        msg = ("Inferred frequency 2D from passed values does not conform to "
               "passed frequency 5D")
        with pytest.raises(ValueError, match=msg):
            idx._data.freq = "5D"

        # setting with a non-fixed frequency
        msg = r"<2 \* BusinessDays> is a non-fixed frequency"
        with pytest.raises(ValueError, match=msg):
            idx._data.freq = "2B"

        # setting with non-freq string
        with pytest.raises(ValueError, match="Invalid frequency"):
            idx._data.freq = "foo"