""" In 2010 Independence Day fell on a Saturday. Normally this would mean that Friday is a half day, but instead it is a full day off, so we need to exclude it from the usual half day rules. """ return holidays[holidays.year != 2010] NewYearsDay = new_years_day() MaundyThursday = maundy_thursday() MondayPriorToCorpusChristi = Holiday( 'Monday Prior to Corpus Christi', month=1, day=1, offset=[Easter(), Day(57)], end_date='2008', ) LabourDay = european_labour_day() NavyDay = Holiday('Navy Day', month=5, day=21) SaintPeterAndSaintPaulDay = saint_peter_and_saint_paul_day( observance=nearest_monday, ) OurLadyOfMountCarmelDay = Holiday( "Our Lady of Mount Carmel's Day", month=7, day=16, start_date='2008',
USThanksgivingDay = Holiday("Thanksgiving", month=11, day=1, offset=DateOffset(weekday=TH(4))) USMartinLutherKingJr = Holiday( "Martin Luther King Jr. Day", start_date=datetime(1986, 1, 1), month=1, day=1, offset=DateOffset(weekday=MO(3)), ) USPresidentsDay = Holiday("Presidents Day", month=2, day=1, offset=DateOffset(weekday=MO(3))) GoodFriday = Holiday("Good Friday", month=1, day=1, offset=[Easter(), Day(-2)]) EasterMonday = Holiday("Easter Monday", month=1, day=1, offset=[Easter(), Day(1)]) class USFederalHolidayCalendar(AbstractHolidayCalendar): """ US Federal Government Holiday Calendar based on rules specified by: https://www.opm.gov/policy-data-oversight/ snow-dismissal-procedures/federal-holidays/ """ rules = [
StJosephsDay = Holiday( "St. Joseph's Day (next Monday)", month=3, day=19, offset=next_monday_offset, ) MaundyThursday = maundy_thursday() LabourDay = european_labour_day() MondayAfterAscensionDay = Holiday( "Monday After Ascension Day", month=1, day=1, offset=[Easter(), Day(43)], ) MondayAfterCorpusChristi = Holiday( "Monday After Corpus Christi", month=1, day=1, offset=[Easter(), Day(64)], ) MondayAfterSacredHeart = Holiday( "Monday After Sacred Heart", month=1, day=1, offset=[Easter(), Day(71)], )
r = redis.Redis(host='127.0.0.1', port=6379, decode_responses=True, db=10) with open('person.pkl', 'rb') as f: algo = pickle.load(f) def func(item, user_id): return algo.predict(user_id, item).est if __name__ == '__main__': #两天的score表、两天内有行为的用户、一天内的item start = time.time() now_time = datetime.datetime.now() yes_time = (now_time - 1 * Day()).strftime('%Y-%m-%d %H:%M:%S') #格式化 score = pd.read_csv('score.csv') score = score[score.created_at != 'False'] score = score[score.created_at != False] score['created_at'] = pd.to_datetime(score['created_at']) score = score.set_index('created_at') #将时间作为索引 score = score[yes_time:now_time.strftime('%Y-%m-%d %H:%M:%S')] item_uni = score[['item']].drop_duplicates().reset_index(drop=True) #获取user_id user_id = int(sys.argv[1]) #获取algo with open('person.pkl', 'rb') as f: algo = pickle.load(f)
_ONE_DAY = 24 * _ONE_HOUR # --------------------------------------------------------------------- # Offset names ("time rules") and related functions #: cache of previously seen offsets _offset_map = {} # type: Dict[str, DateOffset] def get_period_alias(offset_str): """ alias to closest period strings BQ->Q etc""" return _offset_to_period_map.get(offset_str, None) _name_to_offset_map = { "days": Day(1), "hours": Hour(1), "minutes": Minute(1), "seconds": Second(1), "milliseconds": Milli(1), "microseconds": Micro(1), "nanoseconds": Nano(1), } def to_offset(freq): """ Return DateOffset object from string or tuple representation or datetime.timedelta object Parameters
# when, for no explicable reason, Wednesday was a half day instead). "Fridays after Independence Day that aren't in 2013", month=7, day=5, days_of_week=(FRIDAY,), observance=july_5th_holiday_observance, start_date=Timestamp("1995-01-01"), ) USBlackFridayBefore1993 = Holiday( 'Black Friday', month=11, day=1, # Black Friday was not observed until 1992. start_date=Timestamp('1992-01-01'), end_date=Timestamp('1993-01-01'), offset=[DateOffset(weekday=TH(4)), Day(1)], ) USBlackFridayInOrAfter1993 = Holiday( 'Black Friday', month=11, day=1, start_date=Timestamp('1993-01-01'), offset=[DateOffset(weekday=TH(4)), Day(1)], ) BattleOfGettysburg = Holiday( # All of the floor traders in Chicago were sent to PA 'Markets were closed during the battle of Gettysburg', month=7, day=(1, 2, 3), start_date=Timestamp("1863-07-01"), end_date=Timestamp("1863-07-03")
def create_data(): """ create the pickle data """ data = { "A": [0.0, 1.0, 2.0, 3.0, np.nan], "B": [0, 1, 0, 1, 0], "C": ["foo1", "foo2", "foo3", "foo4", "foo5"], "D": date_range("1/1/2009", periods=5), "E": [0.0, 1, Timestamp("20100101"), "foo", 2.0], } scalars = dict(timestamp=Timestamp("20130101"), period=Period("2012", "M")) index = dict( int=Index(np.arange(10)), date=date_range("20130101", periods=10), period=period_range("2013-01-01", freq="M", periods=10), float=Index(np.arange(10, dtype=np.float64)), uint=Index(np.arange(10, dtype=np.uint64)), timedelta=timedelta_range("00:00:00", freq="30T", periods=10), ) index["range"] = RangeIndex(10) if _loose_version >= LooseVersion("0.21"): from pandas import interval_range index["interval"] = interval_range(0, periods=10) mi = dict(reg2=MultiIndex.from_tuples( tuple( zip(*[ ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], ["one", "two", "one", "two", "one", "two", "one", "two"], ])), names=["first", "second"], )) series = dict( float=Series(data["A"]), int=Series(data["B"]), mixed=Series(data["E"]), ts=Series(np.arange(10).astype(np.int64), index=date_range("20130101", periods=10)), mi=Series( np.arange(5).astype(np.float64), index=MultiIndex.from_tuples(tuple( zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])), names=["one", "two"]), ), dup=Series(np.arange(5).astype(np.float64), index=["A", "B", "C", "D", "A"]), cat=Series(Categorical(["foo", "bar", "baz"])), dt=Series(date_range("20130101", periods=5)), dt_tz=Series(date_range("20130101", periods=5, tz="US/Eastern")), period=Series([Period("2000Q1")] * 5), ) mixed_dup_df = DataFrame(data) mixed_dup_df.columns = list("ABCDA") frame = dict( float=DataFrame({ "A": series["float"], "B": series["float"] + 1 }), int=DataFrame({ "A": series["int"], "B": series["int"] + 1 }), mixed=DataFrame({k: data[k] for k in ["A", "B", "C", "D"]}), mi=DataFrame( { "A": np.arange(5).astype(np.float64), "B": np.arange(5).astype(np.int64) }, index=MultiIndex.from_tuples( tuple( zip(*[ ["bar", "bar", "baz", "baz", "baz"], ["one", "two", "one", "two", "three"], ])), names=["first", "second"], ), ), dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64), columns=["A", "B", "A"]), cat_onecol=DataFrame({"A": Categorical(["foo", "bar"])}), cat_and_float=DataFrame({ "A": Categorical(["foo", "bar", "baz"]), "B": np.arange(3).astype(np.int64), }), mixed_dup=mixed_dup_df, dt_mixed_tzs=DataFrame( { "A": Timestamp("20130102", tz="US/Eastern"), "B": Timestamp("20130603", tz="CET"), }, index=range(5), ), dt_mixed2_tzs=DataFrame( { "A": Timestamp("20130102", tz="US/Eastern"), "B": Timestamp("20130603", tz="CET"), "C": Timestamp("20130603", tz="UTC"), }, index=range(5), ), ) cat = dict( int8=Categorical(list("abcdefg")), int16=Categorical(np.arange(1000)), int32=Categorical(np.arange(10000)), ) timestamp = dict( normal=Timestamp("2011-01-01"), nat=NaT, tz=Timestamp("2011-01-01", tz="US/Eastern"), ) timestamp["freq"] = Timestamp("2011-01-01", freq="D") timestamp["both"] = Timestamp("2011-01-01", tz="Asia/Tokyo", freq="M") off = { "DateOffset": DateOffset(years=1), "DateOffset_h_ns": DateOffset(hour=6, nanoseconds=5824), "BusinessDay": BusinessDay(offset=timedelta(seconds=9)), "BusinessHour": BusinessHour(normalize=True, n=6, end="15:14"), "CustomBusinessDay": CustomBusinessDay(weekmask="Mon Fri"), "SemiMonthBegin": SemiMonthBegin(day_of_month=9), "SemiMonthEnd": SemiMonthEnd(day_of_month=24), "MonthBegin": MonthBegin(1), "MonthEnd": MonthEnd(1), "QuarterBegin": QuarterBegin(1), "QuarterEnd": QuarterEnd(1), "Day": Day(1), "YearBegin": YearBegin(1), "YearEnd": YearEnd(1), "Week": Week(1), "Week_Tues": Week(2, normalize=False, weekday=1), "WeekOfMonth": WeekOfMonth(week=3, weekday=4), "LastWeekOfMonth": LastWeekOfMonth(n=1, weekday=3), "FY5253": FY5253(n=2, weekday=6, startingMonth=7, variation="last"), "Easter": Easter(), "Hour": Hour(1), "Minute": Minute(1), } return dict( series=series, frame=frame, index=index, scalars=scalars, mi=mi, sp_series=dict(float=_create_sp_series(), ts=_create_sp_tsseries()), sp_frame=dict(float=_create_sp_frame()), cat=cat, timestamp=timestamp, offsets=off, )
def get_expiry_date_from_horizon_date(self, horizon_date, tenor, cal=None, asset_class='fx-vol'): """Calculates the expiry date of FX options, based on the horizon date, the tenor and the holiday calendar associated with the asset. Uses expiry rules from Iain Clark's FX option pricing book Parameters ---------- horizon_date : pd.Timestamp (collection) Horizon date of contract tenor : str Tenor of the contract cal : str Holiday calendar (usually related to the asset) asset_class : str 'fx-vol' - FX options (default) Returns ------- pd.Timestamp (collection) """ if asset_class == 'fx-vol': tenor_unit = ''.join(re.compile(r'\D+').findall(tenor)) asset_holidays = self.get_holidays(cal=cal) if tenor_unit == 'ON': tenor_digit = 1 tenor_unit = 'D' else: tenor_digit = int(''.join(re.compile(r'\d+').findall(tenor))) if tenor_unit == 'D': return horizon_date + CustomBusinessDay( n=tenor_digit, holidays=asset_holidays) elif tenor_unit == 'W': return horizon_date + Day(n=tenor_digit * 7) + CustomBusinessDay( n=0, holidays=asset_holidays) else: horizon_date = self.get_spot_date_from_horizon_date( horizon_date, cal, asset_holidays=asset_holidays) if tenor_unit == 'M': pass elif tenor_unit == 'Y': tenor_digit = tenor_digit * 12 cbd = CustomBusinessDay(n=1, holidays=asset_holidays) horizon_period_end = horizon_date + CustomBusinessMonthEnd( tenor_digit + 1) horizon_floating = horizon_date + DateOffset( months=tenor_digit) delivery_date = [] if isinstance(horizon_period_end, pd.Timestamp): horizon_period_end = [horizon_period_end] if isinstance(horizon_floating, pd.Timestamp): horizon_floating = [horizon_floating] # TODO: double check this! for period_end, floating in zip(horizon_period_end, horizon_floating): if floating < period_end: delivery_date.append(floating - cbd + cbd) else: delivery_date.append(period_end) delivery_date = pd.DatetimeIndex(delivery_date) return self.get_expiry_date_from_delivery_date( delivery_date, cal)
(t1 - t2).seconds # timedelta object의 초 수 출력 # 1) timedelta를 사용한 날짜 연산 from datetime import timedelta d1 + 100 # 날짜와 숫자 연산 불가 d1 + timedelta(100) # 100일 뒤 # 2) offset으로 사용한 날짜 연산 import pandas.tseries.offsets dir(pandas.tseries.offsets) from pandas.tseries.offsets import Day, Hour, Second Day(5) # 5일 Hour(5) # 5시간 Second(5) # 5초 d1 + Day(100) # [ 연습 문제 ] # emp.csv 파일을 읽고 emp = pd.read_csv('emp.csv') # 1) 년,월,일 각각 추출 emp.HIREDATE.map(lambda x: datetime.strptime(x, '%Y/%m/%d %H:%M:%S')) emp['HIREDATE'] = pd.to_datetime(emp.HIREDATE) emp.HIREDATE.year # Series의 날짜에서는 year 전달 불가 emp.HIREDATE[0].year # scalar의 날짜에서는 year 전달 가능
class TestDatetimeIndexOps(Ops): def setup_method(self, method): super().setup_method(method) mask = lambda x: (isinstance(x, DatetimeIndex) or isinstance( x, PeriodIndex)) self.is_valid_objs = [o for o in self.objs if mask(o)] self.not_valid_objs = [o for o in self.objs if not mask(o)] def test_ops_properties(self): f = lambda x: isinstance(x, DatetimeIndex) self.check_ops_properties(DatetimeIndex._field_ops, f) self.check_ops_properties(DatetimeIndex._object_ops, f) self.check_ops_properties(DatetimeIndex._bool_ops, f) def test_ops_properties_basic(self): # sanity check that the behavior didn't change # GH#7206 for op in ["year", "day", "second", "weekday"]: msg = f"'Series' object has no attribute '{op}'" with pytest.raises(AttributeError, match=msg): getattr(self.dt_series, op) # attribute access should still work! s = Series(dict(year=2000, month=1, day=10)) assert s.year == 2000 assert s.month == 1 assert s.day == 10 msg = "'Series' object has no attribute 'weekday'" with pytest.raises(AttributeError, match=msg): s.weekday def test_repeat_range(self, tz_naive_fixture): tz = tz_naive_fixture rng = date_range("1/1/2000", "1/1/2001") result = rng.repeat(5) assert result.freq is None assert len(result) == 5 * len(rng) index = pd.date_range("2001-01-01", periods=2, freq="D", tz=tz) exp = pd.DatetimeIndex( ["2001-01-01", "2001-01-01", "2001-01-02", "2001-01-02"], tz=tz) for res in [index.repeat(2), np.repeat(index, 2)]: tm.assert_index_equal(res, exp) assert res.freq is None index = pd.date_range("2001-01-01", periods=2, freq="2D", tz=tz) exp = pd.DatetimeIndex( ["2001-01-01", "2001-01-01", "2001-01-03", "2001-01-03"], tz=tz) for res in [index.repeat(2), np.repeat(index, 2)]: tm.assert_index_equal(res, exp) assert res.freq is None index = pd.DatetimeIndex(["2001-01-01", "NaT", "2003-01-01"], tz=tz) exp = pd.DatetimeIndex( [ "2001-01-01", "2001-01-01", "2001-01-01", "NaT", "NaT", "NaT", "2003-01-01", "2003-01-01", "2003-01-01", ], tz=tz, ) for res in [index.repeat(3), np.repeat(index, 3)]: tm.assert_index_equal(res, exp) assert res.freq is None def test_repeat(self, tz_naive_fixture): tz = tz_naive_fixture reps = 2 msg = "the 'axis' parameter is not supported" rng = pd.date_range(start="2016-01-01", periods=2, freq="30Min", tz=tz) expected_rng = DatetimeIndex([ Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"), Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"), Timestamp("2016-01-01 00:30:00", tz=tz, freq="30T"), Timestamp("2016-01-01 00:30:00", tz=tz, freq="30T"), ]) res = rng.repeat(reps) tm.assert_index_equal(res, expected_rng) assert res.freq is None tm.assert_index_equal(np.repeat(rng, reps), expected_rng) with pytest.raises(ValueError, match=msg): np.repeat(rng, reps, axis=1) def test_resolution(self, tz_naive_fixture): tz = tz_naive_fixture for freq, expected in zip( ["A", "Q", "M", "D", "H", "T", "S", "L", "U"], [ "day", "day", "day", "day", "hour", "minute", "second", "millisecond", "microsecond", ], ): idx = pd.date_range(start="2013-04-01", periods=30, freq=freq, tz=tz) assert idx.resolution == expected def test_value_counts_unique(self, tz_naive_fixture): tz = tz_naive_fixture # GH 7735 idx = pd.date_range("2011-01-01 09:00", freq="H", periods=10) # create repeated values, 'n'th element is repeated by n+1 times idx = DatetimeIndex(np.repeat(idx.values, range(1, len(idx) + 1)), tz=tz) exp_idx = pd.date_range("2011-01-01 18:00", freq="-1H", periods=10, tz=tz) expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64") for obj in [idx, Series(idx)]: tm.assert_series_equal(obj.value_counts(), expected) expected = pd.date_range("2011-01-01 09:00", freq="H", periods=10, tz=tz) tm.assert_index_equal(idx.unique(), expected) idx = DatetimeIndex( [ "2013-01-01 09:00", "2013-01-01 09:00", "2013-01-01 09:00", "2013-01-01 08:00", "2013-01-01 08:00", pd.NaT, ], tz=tz, ) exp_idx = DatetimeIndex(["2013-01-01 09:00", "2013-01-01 08:00"], tz=tz) expected = Series([3, 2], index=exp_idx) for obj in [idx, Series(idx)]: tm.assert_series_equal(obj.value_counts(), expected) exp_idx = DatetimeIndex( ["2013-01-01 09:00", "2013-01-01 08:00", pd.NaT], tz=tz) expected = Series([3, 2, 1], index=exp_idx) for obj in [idx, Series(idx)]: tm.assert_series_equal(obj.value_counts(dropna=False), expected) tm.assert_index_equal(idx.unique(), exp_idx) def test_nonunique_contains(self): # GH 9512 for idx in map( DatetimeIndex, ( [0, 1, 0], [0, 0, -1], [0, -1, -1], ["2015", "2015", "2016"], ["2015", "2015", "2014"], ), ): assert idx[0] in idx @pytest.mark.parametrize( "idx", [ DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], freq="D", name="idx"), DatetimeIndex( ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"], freq="H", name="tzidx", tz="Asia/Tokyo", ), ], ) def test_order_with_freq(self, idx): ordered = idx.sort_values() tm.assert_index_equal(ordered, idx) assert ordered.freq == idx.freq ordered = idx.sort_values(ascending=False) expected = idx[::-1] tm.assert_index_equal(ordered, expected) assert ordered.freq == expected.freq assert ordered.freq.n == -1 ordered, indexer = idx.sort_values(return_indexer=True) tm.assert_index_equal(ordered, idx) tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False) assert ordered.freq == idx.freq ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) expected = idx[::-1] tm.assert_index_equal(ordered, expected) tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0]), check_dtype=False) assert ordered.freq == expected.freq assert ordered.freq.n == -1 @pytest.mark.parametrize( "index_dates,expected_dates", [ ( [ "2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01" ], [ "2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05" ], ), ( [ "2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01" ], [ "2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05" ], ), ( [pd.NaT, "2011-01-03", "2011-01-05", "2011-01-02", pd.NaT], [pd.NaT, pd.NaT, "2011-01-02", "2011-01-03", "2011-01-05"], ), ], ) def test_order_without_freq(self, index_dates, expected_dates, tz_naive_fixture): tz = tz_naive_fixture # without freq index = DatetimeIndex(index_dates, tz=tz, name="idx") expected = DatetimeIndex(expected_dates, tz=tz, name="idx") ordered = index.sort_values() tm.assert_index_equal(ordered, expected) assert ordered.freq is None ordered = index.sort_values(ascending=False) tm.assert_index_equal(ordered, expected[::-1]) assert ordered.freq is None ordered, indexer = index.sort_values(return_indexer=True) tm.assert_index_equal(ordered, expected) exp = np.array([0, 4, 3, 1, 2]) tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) assert ordered.freq is None ordered, indexer = index.sort_values(return_indexer=True, ascending=False) tm.assert_index_equal(ordered, expected[::-1]) exp = np.array([2, 1, 3, 4, 0]) tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) assert ordered.freq is None def test_drop_duplicates_metadata(self): # GH 10115 idx = pd.date_range("2011-01-01", "2011-01-31", freq="D", name="idx") result = idx.drop_duplicates() tm.assert_index_equal(idx, result) assert idx.freq == result.freq idx_dup = idx.append(idx) assert idx_dup.freq is None # freq is reset result = idx_dup.drop_duplicates() tm.assert_index_equal(idx, result) assert result.freq is None def test_drop_duplicates(self): # to check Index/Series compat base = pd.date_range("2011-01-01", "2011-01-31", freq="D", name="idx") idx = base.append(base[:5]) res = idx.drop_duplicates() tm.assert_index_equal(res, base) res = Series(idx).drop_duplicates() tm.assert_series_equal(res, Series(base)) res = idx.drop_duplicates(keep="last") exp = base[5:].append(base[:5]) tm.assert_index_equal(res, exp) res = Series(idx).drop_duplicates(keep="last") tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36))) res = idx.drop_duplicates(keep=False) tm.assert_index_equal(res, base[5:]) res = Series(idx).drop_duplicates(keep=False) tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31))) @pytest.mark.parametrize( "freq", [ "A", "2A", "-2A", "Q", "-1Q", "M", "-1M", "D", "3D", "-3D", "W", "-1W", "H", "2H", "-2H", "T", "2T", "S", "-3S", ], ) def test_infer_freq(self, freq): # GH 11018 idx = pd.date_range("2011-01-01 09:00:00", freq=freq, periods=10) result = pd.DatetimeIndex(idx.asi8, freq="infer") tm.assert_index_equal(idx, result) assert result.freq == freq def test_nat(self, tz_naive_fixture): tz = tz_naive_fixture assert pd.DatetimeIndex._na_value is pd.NaT assert pd.DatetimeIndex([])._na_value is pd.NaT idx = pd.DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz) assert idx._can_hold_na tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) assert idx.hasnans is False tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp)) idx = pd.DatetimeIndex(["2011-01-01", "NaT"], tz=tz) assert idx._can_hold_na tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) assert idx.hasnans is True tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.intp)) def test_equals(self): # GH 13107 idx = pd.DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"]) assert idx.equals(idx) assert idx.equals(idx.copy()) assert idx.equals(idx.astype(object)) assert idx.astype(object).equals(idx) assert idx.astype(object).equals(idx.astype(object)) assert not idx.equals(list(idx)) assert not idx.equals(pd.Series(idx)) idx2 = pd.DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"], tz="US/Pacific") assert not idx.equals(idx2) assert not idx.equals(idx2.copy()) assert not idx.equals(idx2.astype(object)) assert not idx.astype(object).equals(idx2) assert not idx.equals(list(idx2)) assert not idx.equals(pd.Series(idx2)) # same internal, different tz idx3 = pd.DatetimeIndex._simple_new(idx.asi8, tz="US/Pacific") tm.assert_numpy_array_equal(idx.asi8, idx3.asi8) assert not idx.equals(idx3) assert not idx.equals(idx3.copy()) assert not idx.equals(idx3.astype(object)) assert not idx.astype(object).equals(idx3) assert not idx.equals(list(idx3)) assert not idx.equals(pd.Series(idx3)) # check that we do not raise when comparing with OutOfBounds objects oob = pd.Index([datetime(2500, 1, 1)] * 3, dtype=object) assert not idx.equals(oob) assert not idx2.equals(oob) assert not idx3.equals(oob) # check that we do not raise when comparing with OutOfBounds dt64 oob2 = oob.map(np.datetime64) assert not idx.equals(oob2) assert not idx2.equals(oob2) assert not idx3.equals(oob2) @pytest.mark.parametrize("values", [["20180101", "20180103", "20180105"], []]) @pytest.mark.parametrize( "freq", ["2D", Day(2), "2B", BDay(2), "48H", Hour(48)]) @pytest.mark.parametrize("tz", [None, "US/Eastern"]) def test_freq_setter(self, values, freq, tz): # GH 20678 idx = DatetimeIndex(values, tz=tz) # can set to an offset, converting from string if necessary idx._data.freq = freq assert idx.freq == freq assert isinstance(idx.freq, ABCDateOffset) # can reset to None idx._data.freq = None assert idx.freq is None def test_freq_setter_errors(self): # GH 20678 idx = DatetimeIndex(["20180101", "20180103", "20180105"]) # setting with an incompatible freq msg = ("Inferred frequency 2D from passed values does not conform to " "passed frequency 5D") with pytest.raises(ValueError, match=msg): idx._data.freq = "5D" # setting with non-freq string with pytest.raises(ValueError, match="Invalid frequency"): idx._data.freq = "foo"
def get_delivery_date_from_horizon_date(self, horizon_date, tenor, cal=None, asset_class='fx'): if 'fx' in asset_class: tenor_unit = ''.join(re.compile(r'\D+').findall(tenor)) asset_holidays = self.get_holidays(cal=cal) if tenor_unit == 'ON': return horizon_date + CustomBusinessDay( n=1, holidays=asset_holidays) elif tenor_unit == 'TN': return horizon_date + CustomBusinessDay( n=2, holidays=asset_holidays) elif tenor_unit == 'SP': pass elif tenor_unit == 'SN': tenor_unit = 'D' tenor_digit = 1 else: tenor_digit = int(''.join(re.compile(r'\d+').findall(tenor))) horizon_date = self.get_spot_date_from_horizon_date( horizon_date, cal, asset_holidays=asset_holidays) if 'SP' in tenor_unit: return horizon_date elif tenor_unit == 'D': return horizon_date + CustomBusinessDay( n=tenor_digit, holidays=asset_holidays) elif tenor_unit == 'W': return horizon_date + Day(n=tenor_digit * 7) + CustomBusinessDay( n=0, holidays=asset_holidays) else: if tenor_unit == 'Y': tenor_digit = tenor_digit * 12 horizon_period_end = horizon_date + CustomBusinessMonthEnd( tenor_digit + 1) horizon_floating = horizon_date + DateOffset( months=tenor_digit) cbd = CustomBusinessDay(n=1, holidays=asset_holidays) delivery_date = [] if isinstance(horizon_period_end, pd.Timestamp): horizon_period_end = [horizon_period_end] if isinstance(horizon_floating, pd.Timestamp): horizon_floating = [horizon_floating] for period_end, floating in zip(horizon_period_end, horizon_floating): if floating < period_end: delivery_date.append(floating - cbd + cbd) else: delivery_date.append(period_end) return pd.DatetimeIndex(delivery_date)
# Convert str to floats for col in sales_df.columns: sales_df[col] = sales_df[col].values.astype(float) # Add total columsn sales_df['monthly_avg'] = round( sales_df.iloc[:, :].sum(axis=1) / len(years), 2) return sales_df, months, years if __name__ == '__main__': # Set up date today = dt.datetime.today() today_prev = today - Day(30) today_str = today.strftime('%Y%m') today_str_prev = today_prev.strftime('%Y%m') # Prepare data url = create_url(today_str_prev) html = read_webpage(url) data = prepare_data(html) # Print data[0] # Plot 2020 fig = px.bar(data[0], x=data[1], y='2020',
christmas_eve, corpus_christi, european_labour_day, immaculate_conception, new_years_day, new_years_eve, ) from .exchange_calendar import WEEKDAYS, HolidayCalendar, ExchangeCalendar NewYearsDay = new_years_day() Carnival = Holiday( "Carnival", month=1, day=1, offset=[Easter(), Day(-47)], end_date="2003", ) CorpusChristi = corpus_christi(end_date="2003") LibertyDay = Holiday( "Liberty Day", month=4, day=25, end_date="2003", ) LabourDay = european_labour_day() PortugalDay = Holiday( "Portugal Day",
print(ts.shift(2)) print(ts.shift(-2)) # shift通常用于计算一个时间序列或多个时间序列(如DataFrame的列)中的百分比变化。可以这样表达 print(ts / ts.shift(1) - 1) # 由于单纯的移位操作不会修改索引,所以部分数据会被丢弃。因此,如果频率已知, # 则可以将其传给shift以便实现对时间戳进行位移而不是对数据进行简单位移 print(ts.shift(2, freq='M')) # 这里还可以使用其他频率,于是你就能非常灵活地对数据进行超前和滞后处理了 print(ts.shift(3, freq='D')) print(ts.shift(1, freq='90T')) # pandas的日期偏移量还可以用在datetime或Timestamp对象上 from pandas.tseries.offsets import Day, MonthEnd now = datetime(2011, 11, 17) print(now) print(now + 3 * Day()) # 如果加的是锚点偏移量(比如MonthEnd),第一次增量会将原日期向前滚动到符合频率规则的下一个日期 print(now + MonthEnd()) print(now + MonthEnd(2)) # 通过锚点偏移量的rollforward和rollback方法,可明确地将日期向前或向后“滚动” offset = MonthEnd() print(offset.rollforward(now)) print(offset.rollback(now)) # 日期偏移量还有一个巧妙的用法,即结合groupby使用这两个“滚动”方法 ts = pd.Series(np.random.randn(20), index=pd.date_range('1/15/2000', periods=20, freq='4d')) print(ts) print(ts.groupby(offset.rollforward).mean()) # 当然,更简单、更快速地实现该功能的办法是使用resample print(ts.resample('M').mean())
try: cday = CDay() except NotImplementedError: cday = None #: cache of previously seen offsets _offset_map = {} def get_period_alias(offset_str): """ alias to closest period strings BQ->Q etc""" return _offset_to_period_map.get(offset_str, None) _name_to_offset_map = { 'days': Day(1), 'hours': Hour(1), 'minutes': Minute(1), 'seconds': Second(1), 'milliseconds': Milli(1), 'microseconds': Micro(1), 'nanoseconds': Nano(1) } def to_offset(freq): """ Return DateOffset object from string or tuple representation or datetime.timedelta object Parameters
def add_day(old_date,n=1): new_date=pd.to_datetime(old_date)+n*Day() return new_date.strftime('%Y-%m-%d %H')
# mode 众数 groups.transform(lambda x: x.mode()) df.groupby(['A', 'B'])['C'].quantile(0.9) # 时间偏移 pd.date_range('2000-01-01', '2000-01-03', freq=Hour(12)) Hour(2) + Minute(30) ts = pd.Series(np.random.randn(5), pd.date_range('2000-01-01', '2000-01-03', freq=Hour(12))) # 注意这里直接移动的是作为时间的index ts.shift(2, freq=Hour(12)) now = datetime(2020, 3, 1) now + 3 * Day() now + MonthEnd() now + MonthEnd(2) index = pd.date_range('2000-01-01', '2000-03-03', freq=Day(4)) offset = MonthEnd() ts = pd.Series(np.random.randn(len(index)), index) ts.groupby(offset.rollforward).count() ts.groupby(offset.rollback).count() # 按频率提取 ts.asfreq('W', how='start') ts.asfreq('W', how='end') # 根据时间区间进行重采样,提取平均 ts.resample('M')
ascension_day, whit_monday, christmas_eve, christmas, boxing_day, new_years_eve, ) NewYearsDay = new_years_day() MaundyThursday = maundy_thursday() GeneralPrayerDay = Holiday( 'General Prayer Day', month=1, day=1, offset=[Easter(), Day(26)], ) AscensionDay = ascension_day() BankHoliday = Holiday( 'Bank Holiday', month=1, day=1, offset=[Easter(), Day(40)], start_date='2009', ) WhitMonday = whit_monday() ConstitutionDay = Holiday('Constitution Day', month=6, day=5) ChristmasEve = christmas_eve() Christmas = christmas()
class TestDatetimeIndexOps: def test_ops_properties_basic(self, datetime_series): # sanity check that the behavior didn't change # GH#7206 for op in ["year", "day", "second", "weekday"]: msg = f"'Series' object has no attribute '{op}'" with pytest.raises(AttributeError, match=msg): getattr(datetime_series, op) # attribute access should still work! s = Series({"year": 2000, "month": 1, "day": 10}) assert s.year == 2000 assert s.month == 1 assert s.day == 10 msg = "'Series' object has no attribute 'weekday'" with pytest.raises(AttributeError, match=msg): s.weekday def test_repeat_range(self, tz_naive_fixture): tz = tz_naive_fixture rng = date_range("1/1/2000", "1/1/2001") result = rng.repeat(5) assert result.freq is None assert len(result) == 5 * len(rng) index = date_range("2001-01-01", periods=2, freq="D", tz=tz) exp = DatetimeIndex( ["2001-01-01", "2001-01-01", "2001-01-02", "2001-01-02"], tz=tz) for res in [index.repeat(2), np.repeat(index, 2)]: tm.assert_index_equal(res, exp) assert res.freq is None index = date_range("2001-01-01", periods=2, freq="2D", tz=tz) exp = DatetimeIndex( ["2001-01-01", "2001-01-01", "2001-01-03", "2001-01-03"], tz=tz) for res in [index.repeat(2), np.repeat(index, 2)]: tm.assert_index_equal(res, exp) assert res.freq is None index = DatetimeIndex(["2001-01-01", "NaT", "2003-01-01"], tz=tz) exp = DatetimeIndex( [ "2001-01-01", "2001-01-01", "2001-01-01", "NaT", "NaT", "NaT", "2003-01-01", "2003-01-01", "2003-01-01", ], tz=tz, ) for res in [index.repeat(3), np.repeat(index, 3)]: tm.assert_index_equal(res, exp) assert res.freq is None def test_repeat(self, tz_naive_fixture): tz = tz_naive_fixture reps = 2 msg = "the 'axis' parameter is not supported" rng = date_range(start="2016-01-01", periods=2, freq="30Min", tz=tz) expected_rng = DatetimeIndex([ Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"), Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"), Timestamp("2016-01-01 00:30:00", tz=tz, freq="30T"), Timestamp("2016-01-01 00:30:00", tz=tz, freq="30T"), ]) res = rng.repeat(reps) tm.assert_index_equal(res, expected_rng) assert res.freq is None tm.assert_index_equal(np.repeat(rng, reps), expected_rng) with pytest.raises(ValueError, match=msg): np.repeat(rng, reps, axis=1) @pytest.mark.parametrize( "freq,expected", [ ("A", "day"), ("Q", "day"), ("M", "day"), ("D", "day"), ("H", "hour"), ("T", "minute"), ("S", "second"), ("L", "millisecond"), ("U", "microsecond"), ], ) def test_resolution(self, tz_naive_fixture, freq, expected): tz = tz_naive_fixture if freq == "A" and not IS64 and isinstance(tz, tzlocal): pytest.xfail(reason="OverflowError inside tzlocal past 2038") idx = date_range(start="2013-04-01", periods=30, freq=freq, tz=tz) assert idx.resolution == expected def test_value_counts_unique(self, tz_naive_fixture): tz = tz_naive_fixture # GH 7735 idx = date_range("2011-01-01 09:00", freq="H", periods=10) # create repeated values, 'n'th element is repeated by n+1 times idx = DatetimeIndex(np.repeat(idx.values, range(1, len(idx) + 1)), tz=tz) exp_idx = date_range("2011-01-01 18:00", freq="-1H", periods=10, tz=tz) expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64") expected.index = expected.index._with_freq(None) for obj in [idx, Series(idx)]: tm.assert_series_equal(obj.value_counts(), expected) expected = date_range("2011-01-01 09:00", freq="H", periods=10, tz=tz) expected = expected._with_freq(None) tm.assert_index_equal(idx.unique(), expected) idx = DatetimeIndex( [ "2013-01-01 09:00", "2013-01-01 09:00", "2013-01-01 09:00", "2013-01-01 08:00", "2013-01-01 08:00", pd.NaT, ], tz=tz, ) exp_idx = DatetimeIndex(["2013-01-01 09:00", "2013-01-01 08:00"], tz=tz) expected = Series([3, 2], index=exp_idx) for obj in [idx, Series(idx)]: tm.assert_series_equal(obj.value_counts(), expected) exp_idx = DatetimeIndex( ["2013-01-01 09:00", "2013-01-01 08:00", pd.NaT], tz=tz) expected = Series([3, 2, 1], index=exp_idx) for obj in [idx, Series(idx)]: tm.assert_series_equal(obj.value_counts(dropna=False), expected) tm.assert_index_equal(idx.unique(), exp_idx) @pytest.mark.parametrize( "idx", [ DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], freq="D", name="idx"), DatetimeIndex( ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"], freq="H", name="tzidx", tz="Asia/Tokyo", ), ], ) def test_order_with_freq(self, idx): ordered = idx.sort_values() tm.assert_index_equal(ordered, idx) assert ordered.freq == idx.freq ordered = idx.sort_values(ascending=False) expected = idx[::-1] tm.assert_index_equal(ordered, expected) assert ordered.freq == expected.freq assert ordered.freq.n == -1 ordered, indexer = idx.sort_values(return_indexer=True) tm.assert_index_equal(ordered, idx) tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False) assert ordered.freq == idx.freq ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) expected = idx[::-1] tm.assert_index_equal(ordered, expected) tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0]), check_dtype=False) assert ordered.freq == expected.freq assert ordered.freq.n == -1 @pytest.mark.parametrize( "index_dates,expected_dates", [ ( [ "2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01" ], [ "2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05" ], ), ( [ "2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01" ], [ "2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05" ], ), ( [pd.NaT, "2011-01-03", "2011-01-05", "2011-01-02", pd.NaT], [pd.NaT, pd.NaT, "2011-01-02", "2011-01-03", "2011-01-05"], ), ], ) def test_order_without_freq(self, index_dates, expected_dates, tz_naive_fixture): tz = tz_naive_fixture # without freq index = DatetimeIndex(index_dates, tz=tz, name="idx") expected = DatetimeIndex(expected_dates, tz=tz, name="idx") ordered = index.sort_values(na_position="first") tm.assert_index_equal(ordered, expected) assert ordered.freq is None ordered = index.sort_values(ascending=False) tm.assert_index_equal(ordered, expected[::-1]) assert ordered.freq is None ordered, indexer = index.sort_values(return_indexer=True, na_position="first") tm.assert_index_equal(ordered, expected) exp = np.array([0, 4, 3, 1, 2]) tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) assert ordered.freq is None ordered, indexer = index.sort_values(return_indexer=True, ascending=False) tm.assert_index_equal(ordered, expected[::-1]) exp = np.array([2, 1, 3, 0, 4]) tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) assert ordered.freq is None def test_drop_duplicates_metadata(self, freq_sample): # GH 10115 idx = date_range("2011-01-01", freq=freq_sample, periods=10, name="idx") result = idx.drop_duplicates() tm.assert_index_equal(idx, result) assert idx.freq == result.freq idx_dup = idx.append(idx) assert idx_dup.freq is None # freq is reset result = idx_dup.drop_duplicates() expected = idx._with_freq(None) tm.assert_index_equal(result, expected) assert result.freq is None @pytest.mark.parametrize( "keep, expected, index", [ ("first", np.concatenate( ([False] * 10, [True] * 5)), np.arange(0, 10)), ("last", np.concatenate( ([True] * 5, [False] * 10)), np.arange(5, 15)), ( False, np.concatenate(([True] * 5, [False] * 5, [True] * 5)), np.arange(5, 10), ), ], ) def test_drop_duplicates(self, freq_sample, keep, expected, index): # to check Index/Series compat idx = date_range("2011-01-01", freq=freq_sample, periods=10, name="idx") idx = idx.append(idx[:5]) tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected) expected = idx[~expected] result = idx.drop_duplicates(keep=keep) tm.assert_index_equal(result, expected) result = Series(idx).drop_duplicates(keep=keep) tm.assert_series_equal(result, Series(expected, index=index)) def test_infer_freq(self, freq_sample): # GH 11018 idx = date_range("2011-01-01 09:00:00", freq=freq_sample, periods=10) result = DatetimeIndex(idx.asi8, freq="infer") tm.assert_index_equal(idx, result) assert result.freq == freq_sample def test_nat(self, tz_naive_fixture): tz = tz_naive_fixture assert DatetimeIndex._na_value is pd.NaT assert DatetimeIndex([])._na_value is pd.NaT idx = DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz) assert idx._can_hold_na tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) assert idx.hasnans is False tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp)) idx = DatetimeIndex(["2011-01-01", "NaT"], tz=tz) assert idx._can_hold_na tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) assert idx.hasnans is True tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.intp)) @pytest.mark.parametrize("values", [["20180101", "20180103", "20180105"], []]) @pytest.mark.parametrize( "freq", ["2D", Day(2), "2B", BDay(2), "48H", Hour(48)]) @pytest.mark.parametrize("tz", [None, "US/Eastern"]) def test_freq_setter(self, values, freq, tz): # GH 20678 idx = DatetimeIndex(values, tz=tz) # can set to an offset, converting from string if necessary idx._data.freq = freq assert idx.freq == freq assert isinstance(idx.freq, DateOffset) # can reset to None idx._data.freq = None assert idx.freq is None def test_freq_setter_errors(self): # GH 20678 idx = DatetimeIndex(["20180101", "20180103", "20180105"]) # setting with an incompatible freq msg = ("Inferred frequency 2D from passed values does not conform to " "passed frequency 5D") with pytest.raises(ValueError, match=msg): idx._data.freq = "5D" # setting with non-freq string with pytest.raises(ValueError, match="Invalid frequency"): idx._data.freq = "foo" def test_freq_view_safe(self): # Setting the freq for one DatetimeIndex shouldn't alter the freq # for another that views the same area_data dti = date_range("2016-01-01", periods=5) dta = dti._data dti2 = DatetimeIndex(dta)._with_freq(None) assert dti2.freq is None # Original was not altered assert dti.freq == "D" assert dta.freq == "D"
ts = pd.Series(np.random.randn(4), index = pd.date_range('2000/1/1',periods = 4,freq = 'M')) ts.shift(2) #整数为向后移动位数,负数代表向前移动位数,仅移动数据,并不会移动index ts/ts.shift(1)-1 #时间序列环比写法 ts.shift(2,freq = 'M') #加入freq参数则数据往后填充 ts.shift(2,freq = 'D') #datetimeIndex表示往后推2天 ts.shift(1,freq = '90T') #表示往后推1个90分钟,T代表分钟 #使用偏置进行移位日期 from pandas.tseries.offsets import Day,MonthEnd now = datetime(2011,11,17) now+3*Day() #表示往后推3天日期 now+MonthEnd() #表示本月月底日期 now+MonthEnd(2) #表示下月月底日期 offset = MonthEnd() offset.rollforward(now) offset.rollback(now) #将位移方法与groupby一起使用是日期偏置的一种创造性用法 ts = pd.Series(np.random.randn(20), index = pd.date_range('2000/1/15',periods = 20,freq = '4d')) ts.groupby(MonthEnd().rollforward).mean() #按照每月月底进行本月平均值 ts.resample('M').mean() ''' 第四节:时区处理(略)
if mult == 1: return code return str(mult) + code #---------------------------------------------------------------------- # Offset names ("time rules") and related functions from pandas.tseries.offsets import (Day, BDay, Hour, Minute, Second, Milli, Week, Micro, MonthEnd, MonthBegin, BMonthBegin, BMonthEnd, YearBegin, YearEnd, BYearBegin, BYearEnd, QuarterBegin, QuarterEnd, BQuarterBegin, BQuarterEnd) _offset_map = { 'D': Day(), 'B': BDay(), 'H': Hour(), 'T': Minute(), 'S': Second(), 'L': Milli(), 'U': Micro(), None: None, # Monthly - Calendar 'M': MonthEnd(), 'MS': MonthBegin(), # Monthly - Business 'BM': BMonthEnd(), 'BMS': BMonthBegin(),
index - pd.Timedelta(days=MAX_WINDOW), index + pd.Timedelta(days=MAX_WINDOW), ) assert (len(holiday_date) != 0), f"No closest holiday for the date index {index} found." # It sometimes returns two dates if it is exactly half a year after the # holiday. In this case, the smaller distance (182 days) is returned. return (index - holiday_date[0]).days return distance_to_day EasterSunday = Holiday("Easter Sunday", month=1, day=1, offset=[Easter(), Day(0)]) NewYearsDay = Holiday("New Years Day", month=1, day=1) SuperBowl = Holiday("Superbowl", month=2, day=1, offset=DateOffset(weekday=SU(1))) MothersDay = Holiday("Mothers Day", month=5, day=1, offset=DateOffset(weekday=SU(2))) IndependenceDay = Holiday("Independence Day", month=7, day=4) ChristmasEve = Holiday("Christmas", month=12, day=24) ChristmasDay = Holiday("Christmas", month=12, day=25) NewYearsEve = Holiday("New Years Eve", month=12, day=31) BlackFriday = Holiday( "Black Friday",
def selectstorms(flowserie, rainserie, number_of_storms=3, min_period_in_between=7, search_period=7, drywindow=96): """ (pd.DataFrame, pd.DataFrame) -> List Easy storm selection process, based on the maximum flows measured in the given timeserie of flow measurements. To define the startdate of the storm, 24h no rain before the Qmax is searched for. The end date is found by checking the flow at the startdate (Qbase) and searching the moment after Qmax with the same flow within the first 2 weeks. If none is found, relaxation (1.1*Qbase; 1.2*Qbase,...) until a moment is found. Parameters ---------- flowserie : pd.Series Pandas Series with the date in the index rainserie : pd.Series Pandas Series with the date in the index number_of_storms : int Number of storms you want to select min_period_in_between : int (days) Minimum number of days in between to selected storms search_period : int (days) Period to look for the start of the storm, when rain started drywindow : int Number of timesteps to check for no-rain """ if not isinstance(flowserie, pd.Series): raise Exception('flowserie is a single data Series') if not isinstance(rainserie, pd.Series): raise Exception('rainserie is a single data Series') #fill na values with very low (negative) value temp = flowserie.fillna(value=-777.).copy() #sort the whole array try: temp = temp.sort(temp.columns.tolist(), ascending=False) except: temp.sort(ascending=False) #find in the index three periods which are at least given number # of days from each other #after three concurrences, save these dates stormmax = [temp.index[0]] #first element is a selected storm i = 1 while len(stormmax) < number_of_storms: #check for each period alldif = True for stormdate in stormmax: if abs(temp.index[i] - stormdate) \ < datetime.timedelta(days=min_period_in_between): alldif = False #if new stormperiod, select if alldif: stormmax.append(temp.index[i]) i += 1 selstorms = [] for storm in stormmax: ##FIND DRY DAY WEEK BEFORE #select period before storm (1 week) presearchperiod = datetime.timedelta(days=search_period) temp1 = rainserie[storm - presearchperiod:storm] temp1 = pd.rolling_sum(temp1, window=drywindow, center=False) #zero value means the preceding 24hours no rain: so, closest zeros #to the date itself -24h are selected if rainserie.ndim == 2: temp1 = temp1.min(axis=1) tempdates = temp1[temp1 < 0.001].index.tolist() if len(tempdates) == 0: raise Exception('Decrease drywindow period containing no rain.') date_arg = np.argmin([abs(times - storm) for times in tempdates]) startstormdate = tempdates[date_arg] - Day() #Get the flow value of the storm and when it is found again + 1 Day temp2a = flowserie[startstormdate:startstormdate + Week() * 2] #only if multiple columns if flowserie.ndim == 2: temp2 = temp2a.max(axis=1) else: temp2 = temp2a flowbase = temp2.ix[startstormdate] lowerafterstorm = temp2[temp2 < flowbase][storm + Day():] if lowerafterstorm.size == 0: print 'Lower initial flow not found again...test with mean...' if flowserie.ndim == 2: temp2 = temp2a.mean(axis=1) else: temp2 = temp2a flowbase = temp2.ix[startstormdate] lowerafterstorm = temp2[temp2 < flowbase][storm + Day():] cnt = 1 while lowerafterstorm.size == 0: print '... still not working; relaxing conditions...', \ cnt*10, '% of minimal after storm incorporated' flowbase = flowbase + 0.1 * flowbase lowerafterstorm = temp2[temp2 < flowbase][storm + Day():] cnt += 1 endstormdate = lowerafterstorm.index[0] #add to selected storms selstorms.append({ 'startdate': startstormdate, 'enddate': endstormdate }) return selstorms
def _cal_portfolio_returns_between_balancing(self): ''' 计算股票组合股票组合日度收益率,速度慢 ''' print('_cal_portfolio_returns_between_balancing--1', time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))) if self._weights == 'MV': self._hist_data['weights'] = self._hist_data['market_value'] used_columns = [] if self._weights == 'EW' else ['weights'] hist_data = pd.concat( [self._hist_data[['returns'] + used_columns], self._group], axis=1) gross_returns = hist_data returns_date = gross_returns.index.get_level_values('date') portfolio_returns_between_balancing = [0] * ( len(self._rebalance_date) - 1) for i in range(len(self._rebalance_date) - 1): #The start and end of a period between balancing start_date, end_date = self._rebalance_date[ i], self._rebalance_date[i + 1] if i == len(self._rebalance_date) - 2: end_date += Day(1) #history data during the period returns_between_balancing = gross_returns[ (returns_date >= start_date) & (returns_date < end_date)] returns_between_balancing = ( returns_between_balancing['returns'].fillna(0) + 1).groupby( 'code', group_keys=False).cumprod() portfolio_returns_between_balancing[i] = returns_between_balancing print('_cal_portfolio_returns_between_balancing--2', time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))) cum_returns_stocks = pd.concat( portfolio_returns_between_balancing).sort_index() cum_returns_stocks.name = 'cum_returns' cum_returns_stocks = pd.concat( [cum_returns_stocks, gross_returns[['group'] + used_columns]], axis=1) #Calculate the portfolio value if start from 1 group_data = cum_returns_stocks[['cum_returns', 'group'] + used_columns].groupby( ['date', 'group']) if self._weights == 'EW': cum_returns = group_data.mean() else: cum_returns = group_data.apply( lambda df: np.average(df.cum_returns, weights=df.weights)) print('_cal_portfolio_returns_between_balancing--3', time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))) cum_returns = cum_returns.unstack(level='group') returns_date = cum_returns.index = cum_returns.index.get_level_values( 'date') for i in range(len(self._rebalance_date) - 1): #The start and end of a period between balancing start_date, end_date = self._rebalance_date[ i], self._rebalance_date[i + 1] if i == len(self._rebalance_date) - 2: end_date += Day(1) cum_returns_between_balancing = cum_returns[ (returns_date >= start_date) & (returns_date < end_date)] returns_between_balancing = cum_returns_between_balancing.pct_change( ) if len(cum_returns_between_balancing) != 0: returns_between_balancing.iloc[ 0] = cum_returns_between_balancing.iloc[0] - 1 portfolio_returns_between_balancing[i] = returns_between_balancing print('_cal_portfolio_returns_between_balancing--4', time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))) return pd.concat(portfolio_returns_between_balancing).sort_index()
def test_Day_equals_24_Hours(): ts = Timestamp('2016-10-30 00:00:00+0300', tz='Europe/Helsinki') result = ts + Day(1) expected = ts + Hour(24) assert result == expected
ts.shift(-2) # ts / ts.shift(1) - 1 #%% ts.shift(2, freq='M') #%% ts.shift(3, freq='D') ts.shift(1, freq='90T') # #### Shifting dates with offsets #%% from pandas.tseries.offsets import Day, MonthEnd now = datetime(2011, 11, 17) now + 3 * Day() #%% now + MonthEnd() now + MonthEnd(2) #%% offset = MonthEnd() offset.rollforward(now) offset.rollback(now) #%% ts = pd.Series(np.random.randn(20), index=pd.date_range('1/15/2000', periods=20, freq='4d')) ts ts.groupby(offset.rollforward).mean()
extras.append(dt[dt.weekday == THURSDAY] + timedelta(1)) return dt.append(extras) NewYearsDay = new_years_day(observance=four_day_weekend) NationalHoliday1 = Holiday("National Day", month=3, day=15, observance=four_day_weekend) # Need custom start year so can't use pandas GoodFriday GoodFriday = Holiday("Good Friday", month=1, day=1, offset=[Easter(), Day(-2)], start_date="2012") LabourDay = european_labour_day(observance=four_day_weekend) WhitMonday = whit_monday() StStephensDay = Holiday( "St. Stephen's Day", month=8, day=20, observance=four_day_weekend, ) NationalHoliday2 = Holiday( "National Day",
def compute_forward_returns(factor_idx, prices, periods=(1, 5, 10), filter_zscore=None): """ Finds the N period forward returns (as percent change) for each asset provided. Parameters ---------- factor_idx : pd.DatetimeIndex The factor datetimes for which we are computing the forward returns prices : pd.DataFrame Pricing data to use in forward price calculation. Assets as columns, dates as index. Pricing data must span the factor analysis time period plus an additional buffer window that is greater than the maximum number of expected periods in the forward returns calculations. periods : sequence[int] periods to compute forward returns on. filter_zscore : int or float, optional Sets forward returns greater than X standard deviations from the the mean to nan. Set it to 'None' to avoid filtering. Caution: this outlier filtering incorporates lookahead bias. Returns ------- forward_returns : pd.DataFrame - MultiIndex Forward returns in indexed by date and asset. Separate column for each forward return window. """ factor_idx = factor_idx.intersection(prices.index) forward_returns = pd.DataFrame(index=pd.MultiIndex.from_product( [factor_idx, prices.columns], names=['date', 'asset'])) custom_calendar = False for period in periods: # # build forward returns # delta = prices.pct_change(period).shift(-period).reindex(factor_idx) if filter_zscore is not None: mask = abs(delta - delta.mean()) > (filter_zscore * delta.std()) delta[mask] = np.nan # # if the period length is not consistent across the factor index then # it must be a trading/business day calendar # time_diffs = prices.index.to_series().diff(period) time_diffs = time_diffs.reindex(factor_idx) if time_diffs.min() != time_diffs.max(): custom_calendar = True # # find the period length that will be the column name # p_idx = prices.index.get_loc(delta.index[0]) period_len = prices.index[p_idx + period] - prices.index[p_idx] # # use business days as an approximation to trading calendar # if custom_calendar and period_len.components.days > 0: entries_to_test = min(50, len(delta.index) - period) days_diffs = [] for i in range(entries_to_test): p_idx = prices.index.get_loc(delta.index[i]) days = len( pd.bdate_range(prices.index[p_idx], prices.index[p_idx + period])) - 1 days_diffs.append(days) delta_days = period_len.components.days - mode(days_diffs).mode[0] period_len -= pd.Timedelta(days=delta_days) column_name = timedelta_to_string(period_len) forward_returns[column_name] = delta.stack() forward_returns.index = forward_returns.index.rename(['date', 'asset']) # use business days as an approximation to trading calendar, if this will # be proven to be a poor approximation then we could build a pandas # AbstractHolidayCalendar inferring non-trading days from price DataFrame # and use it to build a CustomBusinessDay DateOffset that we can finally # set it as index 'freq' freq = BDay() if custom_calendar else Day() forward_returns.index.levels[0].freq = freq return forward_returns
from django.conf import settings from pandas.tseries.offsets import YearBegin, QuarterBegin, MonthBegin, Day # Transformaciones VALUE = 'value' CHANGE = 'change' PCT_CHANGE = 'percent_change' CHANGE_YEAR_AGO = 'change_a_year_ago' PCT_CHANGE_YEAR_AGO = 'percent_change_a_year_ago' # Pandas freqs PANDAS_YEAR = YearBegin() PANDAS_SEMESTER = MonthBegin(6) PANDAS_QUARTER = QuarterBegin(startingMonth=1) PANDAS_MONTH = MonthBegin() PANDAS_WEEK = Day(7) PANDAS_DAY = Day() # Frecuencias *en orden* de mayor a menor PANDAS_FREQS = [ PANDAS_YEAR, PANDAS_SEMESTER, PANDAS_QUARTER, PANDAS_MONTH, PANDAS_WEEK, PANDAS_DAY ] IDENTIFIER = "identifier" DATASET_IDENTIFIER = "dataset_identifier" DOWNLOAD_URL = "downloadURL" DATASET = 'dataset' DISTRIBUTION = 'distribution' FIELD = 'field'
class TestTimedeltaIndexOps(Ops): def setup_method(self, method): super().setup_method(method) mask = lambda x: isinstance(x, TimedeltaIndex) self.is_valid_objs = [o for o in self.objs if mask(o)] self.not_valid_objs = [] def test_ops_properties(self): f = lambda x: isinstance(x, TimedeltaIndex) self.check_ops_properties(TimedeltaIndex._field_ops, f) self.check_ops_properties(TimedeltaIndex._object_ops, f) def test_value_counts_unique(self): # GH 7735 idx = timedelta_range("1 days 09:00:00", freq="H", periods=10) # create repeated values, 'n'th element is repeated by n+1 times idx = TimedeltaIndex(np.repeat(idx.values, range(1, len(idx) + 1))) exp_idx = timedelta_range("1 days 18:00:00", freq="-1H", periods=10) expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64") for obj in [idx, Series(idx)]: tm.assert_series_equal(obj.value_counts(), expected) expected = timedelta_range("1 days 09:00:00", freq="H", periods=10) tm.assert_index_equal(idx.unique(), expected) idx = TimedeltaIndex([ "1 days 09:00:00", "1 days 09:00:00", "1 days 09:00:00", "1 days 08:00:00", "1 days 08:00:00", pd.NaT, ]) exp_idx = TimedeltaIndex(["1 days 09:00:00", "1 days 08:00:00"]) expected = Series([3, 2], index=exp_idx) for obj in [idx, Series(idx)]: tm.assert_series_equal(obj.value_counts(), expected) exp_idx = TimedeltaIndex( ["1 days 09:00:00", "1 days 08:00:00", pd.NaT]) expected = Series([3, 2, 1], index=exp_idx) for obj in [idx, Series(idx)]: tm.assert_series_equal(obj.value_counts(dropna=False), expected) tm.assert_index_equal(idx.unique(), exp_idx) def test_nonunique_contains(self): # GH 9512 for idx in map( TimedeltaIndex, ( [0, 1, 0], [0, 0, -1], [0, -1, -1], ["00:01:00", "00:01:00", "00:02:00"], ["00:01:00", "00:01:00", "00:00:01"], ), ): assert idx[0] in idx def test_unknown_attribute(self): # see gh-9680 tdi = pd.timedelta_range(start=0, periods=10, freq="1s") ts = pd.Series(np.random.normal(size=10), index=tdi) assert "foo" not in ts.__dict__.keys() msg = "'Series' object has no attribute 'foo'" with pytest.raises(AttributeError, match=msg): ts.foo def test_order(self): # GH 10295 idx1 = TimedeltaIndex(["1 day", "2 day", "3 day"], freq="D", name="idx") idx2 = TimedeltaIndex(["1 hour", "2 hour", "3 hour"], freq="H", name="idx") for idx in [idx1, idx2]: ordered = idx.sort_values() tm.assert_index_equal(ordered, idx) assert ordered.freq == idx.freq ordered = idx.sort_values(ascending=False) expected = idx[::-1] tm.assert_index_equal(ordered, expected) assert ordered.freq == expected.freq assert ordered.freq.n == -1 ordered, indexer = idx.sort_values(return_indexer=True) tm.assert_index_equal(ordered, idx) tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False) assert ordered.freq == idx.freq ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) tm.assert_index_equal(ordered, idx[::-1]) assert ordered.freq == expected.freq assert ordered.freq.n == -1 idx1 = TimedeltaIndex( ["1 hour", "3 hour", "5 hour", "2 hour ", "1 hour"], name="idx1") exp1 = TimedeltaIndex( ["1 hour", "1 hour", "2 hour", "3 hour", "5 hour"], name="idx1") idx2 = TimedeltaIndex(["1 day", "3 day", "5 day", "2 day", "1 day"], name="idx2") # TODO(wesm): unused? # exp2 = TimedeltaIndex(['1 day', '1 day', '2 day', # '3 day', '5 day'], name='idx2') # idx3 = TimedeltaIndex([pd.NaT, '3 minute', '5 minute', # '2 minute', pd.NaT], name='idx3') # exp3 = TimedeltaIndex([pd.NaT, pd.NaT, '2 minute', '3 minute', # '5 minute'], name='idx3') for idx, expected in [(idx1, exp1), (idx1, exp1), (idx1, exp1)]: ordered = idx.sort_values() tm.assert_index_equal(ordered, expected) assert ordered.freq is None ordered = idx.sort_values(ascending=False) tm.assert_index_equal(ordered, expected[::-1]) assert ordered.freq is None ordered, indexer = idx.sort_values(return_indexer=True) tm.assert_index_equal(ordered, expected) exp = np.array([0, 4, 3, 1, 2]) tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) assert ordered.freq is None ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) tm.assert_index_equal(ordered, expected[::-1]) exp = np.array([2, 1, 3, 4, 0]) tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) assert ordered.freq is None def test_drop_duplicates_metadata(self): # GH 10115 idx = pd.timedelta_range("1 day", "31 day", freq="D", name="idx") result = idx.drop_duplicates() tm.assert_index_equal(idx, result) assert idx.freq == result.freq idx_dup = idx.append(idx) assert idx_dup.freq is None # freq is reset result = idx_dup.drop_duplicates() tm.assert_index_equal(idx, result) assert result.freq is None def test_drop_duplicates(self): # to check Index/Series compat base = pd.timedelta_range("1 day", "31 day", freq="D", name="idx") idx = base.append(base[:5]) res = idx.drop_duplicates() tm.assert_index_equal(res, base) res = Series(idx).drop_duplicates() tm.assert_series_equal(res, Series(base)) res = idx.drop_duplicates(keep="last") exp = base[5:].append(base[:5]) tm.assert_index_equal(res, exp) res = Series(idx).drop_duplicates(keep="last") tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36))) res = idx.drop_duplicates(keep=False) tm.assert_index_equal(res, base[5:]) res = Series(idx).drop_duplicates(keep=False) tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31))) @pytest.mark.parametrize( "freq", ["D", "3D", "-3D", "H", "2H", "-2H", "T", "2T", "S", "-3S"]) def test_infer_freq(self, freq): # GH#11018 idx = pd.timedelta_range("1", freq=freq, periods=10) result = pd.TimedeltaIndex(idx.asi8, freq="infer") tm.assert_index_equal(idx, result) assert result.freq == freq def test_shift(self): pass # handled in test_arithmetic.py def test_repeat(self): index = pd.timedelta_range("1 days", periods=2, freq="D") exp = pd.TimedeltaIndex(["1 days", "1 days", "2 days", "2 days"]) for res in [index.repeat(2), np.repeat(index, 2)]: tm.assert_index_equal(res, exp) assert res.freq is None index = TimedeltaIndex(["1 days", "NaT", "3 days"]) exp = TimedeltaIndex([ "1 days", "1 days", "1 days", "NaT", "NaT", "NaT", "3 days", "3 days", "3 days", ]) for res in [index.repeat(3), np.repeat(index, 3)]: tm.assert_index_equal(res, exp) assert res.freq is None def test_nat(self): assert pd.TimedeltaIndex._na_value is pd.NaT assert pd.TimedeltaIndex([])._na_value is pd.NaT idx = pd.TimedeltaIndex(["1 days", "2 days"]) assert idx._can_hold_na tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) assert idx.hasnans is False tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp)) idx = pd.TimedeltaIndex(["1 days", "NaT"]) assert idx._can_hold_na tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) assert idx.hasnans is True tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.intp)) def test_equals(self): # GH 13107 idx = pd.TimedeltaIndex(["1 days", "2 days", "NaT"]) assert idx.equals(idx) assert idx.equals(idx.copy()) assert idx.equals(idx.astype(object)) assert idx.astype(object).equals(idx) assert idx.astype(object).equals(idx.astype(object)) assert not idx.equals(list(idx)) assert not idx.equals(pd.Series(idx)) idx2 = pd.TimedeltaIndex(["2 days", "1 days", "NaT"]) assert not idx.equals(idx2) assert not idx.equals(idx2.copy()) assert not idx.equals(idx2.astype(object)) assert not idx.astype(object).equals(idx2) assert not idx.astype(object).equals(idx2.astype(object)) assert not idx.equals(list(idx2)) assert not idx.equals(pd.Series(idx2)) # Check that we dont raise OverflowError on comparisons outside the # implementation range oob = pd.Index([timedelta(days=10**6)] * 3, dtype=object) assert not idx.equals(oob) assert not idx2.equals(oob) # FIXME: oob.apply(np.timedelta64) incorrectly overflows oob2 = pd.Index([np.timedelta64(x) for x in oob], dtype=object) assert not idx.equals(oob2) assert not idx2.equals(oob2) @pytest.mark.parametrize("values", [["0 days", "2 days", "4 days"], []]) @pytest.mark.parametrize("freq", ["2D", Day(2), "48H", Hour(48)]) def test_freq_setter(self, values, freq): # GH 20678 idx = TimedeltaIndex(values) # can set to an offset, converting from string if necessary idx._data.freq = freq assert idx.freq == freq assert isinstance(idx.freq, ABCDateOffset) # can reset to None idx._data.freq = None assert idx.freq is None def test_freq_setter_errors(self): # GH 20678 idx = TimedeltaIndex(["0 days", "2 days", "4 days"]) # setting with an incompatible freq msg = ("Inferred frequency 2D from passed values does not conform to " "passed frequency 5D") with pytest.raises(ValueError, match=msg): idx._data.freq = "5D" # setting with a non-fixed frequency msg = r"<2 \* BusinessDays> is a non-fixed frequency" with pytest.raises(ValueError, match=msg): idx._data.freq = "2B" # setting with non-freq string with pytest.raises(ValueError, match="Invalid frequency"): idx._data.freq = "foo"