def time_for_next_update(last_time, period='D', hour=9): """计算下次更新时间 说明: 'D':移动到下一天 'W':移动到下周一 'M':移动到下月第一天 'Q':下一季度的第一天 将小时调整到指定的hour """ if pd.isnull(last_time): return MARKET_START period = period.upper() if period == 'D': d = BDay(normalize=True) return d.apply(last_time).replace(hour=hour) elif period == 'W': w = Week(normalize=True, weekday=0) return w.apply(last_time).replace(hour=hour) elif period == 'M': m = MonthBegin(normalize=True) return m.apply(last_time).replace(hour=hour) elif period == 'Q': q = QuarterBegin(normalize=True) return q.apply(last_time).replace(hour=hour) else: raise TypeError('不能识别的周期类型,仅接受{}'.format(('D', 'W', 'M', 'Q')))
def test_repr(self): expected = "<QuarterBegin: startingMonth=3>" assert repr(QuarterBegin()) == expected expected = "<QuarterBegin: startingMonth=3>" assert repr(QuarterBegin(startingMonth=3)) == expected expected = "<QuarterBegin: startingMonth=1>" assert repr(QuarterBegin(startingMonth=1)) == expected
def next_update_time(last_updated, freq='D', hour=18, minute=0): """计算下次更新时间 说明: 'D':移动到下一天 'W':移动到下周一 'M':移动到下月第一天 'Q':下一季度的第一天 将时间调整到指定的hour和minute """ if pd.isnull(last_updated): return MARKET_START freq = freq.upper() if freq == 'D': d = BDay(n=1, normalize=True) res = last_updated + d return res.replace(hour=hour, minute=minute) elif freq == 'W': w = Week(normalize=True, weekday=0) res = last_updated + w return res.replace(hour=hour, minute=minute) elif freq == 'M': m = MonthBegin(n=1, normalize=True) res = last_updated + m return res.replace(hour=hour, minute=minute) elif freq == 'Q': q = QuarterBegin(normalize=True, startingMonth=1) res = last_updated + q return res.replace(hour=hour, minute=minute) else: raise TypeError('不能识别的周期类型,仅接受{}'.format(('D', 'W', 'M', 'Q')))
def to_offset(self) -> DateOffset: if self.value == "H": return Hour(1) elif self.value == "D": return Day(1) elif self.value == "W-MON": return Week(1, weekday=0) elif self.value == "MS": return MonthBegin(1) elif self.value == "QS-DEC": return QuarterBegin(startingMonth=10) elif self.value == "AS": return YearBegin(1) raise NotImplementedError(self.value)
def next_update_time(last_updated, freq='D', hour=18, minute=0, second=0): """计算下次更新时间 说明: 'S':移动到下一秒 'm':移动到下一分钟 'H':移动到下一小时 'D':移动到下一天 'W':移动到下周一 'M':移动到下月第一天 'Q':下一季度的第一天 将时间调整到指定的hour和minute """ if pd.isnull(last_updated): return MARKET_START if freq == 'S': off = Second() return last_updated + off elif freq == 'm': off = Minute() return last_updated + off elif freq == 'H': off = Hour() return last_updated + off elif freq == 'D': d = BDay(n=1, normalize=True) res = last_updated + d return res.replace(hour=hour, minute=minute, second=second) elif freq == 'W': w = Week(normalize=True, weekday=0) res = last_updated + w return res.replace(hour=hour, minute=minute, second=second) elif freq == 'M': m = MonthBegin(n=1, normalize=True) res = last_updated + m return res.replace(hour=hour, minute=minute, second=second) elif freq == 'Q': q = QuarterBegin(normalize=True, startingMonth=1) res = last_updated + q return res.replace(hour=hour, minute=minute, second=second) else: raise TypeError('不能识别的周期类型,仅接受{}'.format( ('S', 'm', 'H', 'D', 'W', 'M', 'Q')))
def ls_returns_sheet(self, cur_day=None): if cur_day is None: cur_day = pd.to_datetime( data_source.trade_calendar.get_latest_trade_days( datetime.today().strftime("%Y%m%d"))) else: cur_day = pd.to_datetime(cur_day) dates = [ cur_day, cur_day.to_period('W').start_time, cur_day + MonthBegin(-1), cur_day + QuarterBegin(-1), cur_day + MonthBegin(-6), cur_day + YearBegin(-1), cur_day + YearBegin(-2) ] returns = list(map(lambda x: self.ls_range_pct(x, cur_day), dates)) + \ [self.ls_annual_return, self.ls_total_return] return pd.DataFrame([returns], columns=[ '日回报', '本周以来', '本月以来', '本季以来', '近6个月', '今年以来', '近两年', '年化回报', '成立以来' ])
def create_data(): """ create the pickle/msgpack data """ data = { "A": [0.0, 1.0, 2.0, 3.0, np.nan], "B": [0, 1, 0, 1, 0], "C": ["foo1", "foo2", "foo3", "foo4", "foo5"], "D": date_range("1/1/2009", periods=5), "E": [0.0, 1, Timestamp("20100101"), "foo", 2.0], } scalars = dict(timestamp=Timestamp("20130101"), period=Period("2012", "M")) index = dict( int=Index(np.arange(10)), date=date_range("20130101", periods=10), period=period_range("2013-01-01", freq="M", periods=10), float=Index(np.arange(10, dtype=np.float64)), uint=Index(np.arange(10, dtype=np.uint64)), timedelta=timedelta_range("00:00:00", freq="30T", periods=10), ) index["range"] = RangeIndex(10) if _loose_version >= LooseVersion("0.21"): from pandas import interval_range index["interval"] = interval_range(0, periods=10) mi = dict(reg2=MultiIndex.from_tuples( tuple( zip(*[ ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], ["one", "two", "one", "two", "one", "two", "one", "two"], ])), names=["first", "second"], )) series = dict( float=Series(data["A"]), int=Series(data["B"]), mixed=Series(data["E"]), ts=Series(np.arange(10).astype(np.int64), index=date_range("20130101", periods=10)), mi=Series( np.arange(5).astype(np.float64), index=MultiIndex.from_tuples(tuple( zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])), names=["one", "two"]), ), dup=Series(np.arange(5).astype(np.float64), index=["A", "B", "C", "D", "A"]), cat=Series(Categorical(["foo", "bar", "baz"])), dt=Series(date_range("20130101", periods=5)), dt_tz=Series(date_range("20130101", periods=5, tz="US/Eastern")), period=Series([Period("2000Q1")] * 5), ) mixed_dup_df = DataFrame(data) mixed_dup_df.columns = list("ABCDA") frame = dict( float=DataFrame({ "A": series["float"], "B": series["float"] + 1 }), int=DataFrame({ "A": series["int"], "B": series["int"] + 1 }), mixed=DataFrame({k: data[k] for k in ["A", "B", "C", "D"]}), mi=DataFrame( { "A": np.arange(5).astype(np.float64), "B": np.arange(5).astype(np.int64) }, index=MultiIndex.from_tuples( tuple( zip(*[ ["bar", "bar", "baz", "baz", "baz"], ["one", "two", "one", "two", "three"], ])), names=["first", "second"], ), ), dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64), columns=["A", "B", "A"]), cat_onecol=DataFrame({"A": Categorical(["foo", "bar"])}), cat_and_float=DataFrame({ "A": Categorical(["foo", "bar", "baz"]), "B": np.arange(3).astype(np.int64), }), mixed_dup=mixed_dup_df, dt_mixed_tzs=DataFrame( { "A": Timestamp("20130102", tz="US/Eastern"), "B": Timestamp("20130603", tz="CET"), }, index=range(5), ), dt_mixed2_tzs=DataFrame( { "A": Timestamp("20130102", tz="US/Eastern"), "B": Timestamp("20130603", tz="CET"), "C": Timestamp("20130603", tz="UTC"), }, index=range(5), ), ) cat = dict( int8=Categorical(list("abcdefg")), int16=Categorical(np.arange(1000)), int32=Categorical(np.arange(10000)), ) timestamp = dict( normal=Timestamp("2011-01-01"), nat=NaT, tz=Timestamp("2011-01-01", tz="US/Eastern"), ) timestamp["freq"] = Timestamp("2011-01-01", freq="D") timestamp["both"] = Timestamp("2011-01-01", tz="Asia/Tokyo", freq="M") off = { "DateOffset": DateOffset(years=1), "DateOffset_h_ns": DateOffset(hour=6, nanoseconds=5824), "BusinessDay": BusinessDay(offset=timedelta(seconds=9)), "BusinessHour": BusinessHour(normalize=True, n=6, end="15:14"), "CustomBusinessDay": CustomBusinessDay(weekmask="Mon Fri"), "SemiMonthBegin": SemiMonthBegin(day_of_month=9), "SemiMonthEnd": SemiMonthEnd(day_of_month=24), "MonthBegin": MonthBegin(1), "MonthEnd": MonthEnd(1), "QuarterBegin": QuarterBegin(1), "QuarterEnd": QuarterEnd(1), "Day": Day(1), "YearBegin": YearBegin(1), "YearEnd": YearEnd(1), "Week": Week(1), "Week_Tues": Week(2, normalize=False, weekday=1), "WeekOfMonth": WeekOfMonth(week=3, weekday=4), "LastWeekOfMonth": LastWeekOfMonth(n=1, weekday=3), "FY5253": FY5253(n=2, weekday=6, startingMonth=7, variation="last"), "Easter": Easter(), "Hour": Hour(1), "Minute": Minute(1), } return dict( series=series, frame=frame, index=index, scalars=scalars, mi=mi, sp_series=dict(float=_create_sp_series(), ts=_create_sp_tsseries()), sp_frame=dict(float=_create_sp_frame()), cat=cat, timestamp=timestamp, offsets=off, )
# this is for saving the results df_columns = ["equal", "constant", "sample", "lw", "model", "combined"] df_index = [] frob_rows = [] var_rows = [] sharpe_rows = [] # testing the model for i in range(test_intervals): if mode == "val": y = 2016 if mode == "test": y = 2018 sample_start = datetime(year=y, month=1, day=1) + QuarterBegin( startingMonth=1, n=i * time_horizon_quarters) sample_stop = sample_start + DateOffset(years=1) - DateOffset(days=1) out_of_sample_start = sample_stop + DateOffset(days=1) out_of_sample_stop = out_of_sample_start + QuarterEnd( startingMonth=3, n=time_horizon_quarters) # creating the reports and returns for the test. # Includes sample (previous time frame used for empirical estimation) and the test set returns_sample = get_returns_for_period(df_returns, sample_start, sample_stop) returns_out_of_sample = get_returns_for_period(df_returns, out_of_sample_start, out_of_sample_stop) reports_sample = get_reports_for_date(df_reports,
def create_data(): """ create the pickle/msgpack data """ data = { 'A': [0., 1., 2., 3., np.nan], 'B': [0, 1, 0, 1, 0], 'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'], 'D': date_range('1/1/2009', periods=5), 'E': [0., 1, Timestamp('20100101'), 'foo', 2.] } scalars = dict(timestamp=Timestamp('20130101'), period=Period('2012', 'M')) index = dict(int=Index(np.arange(10)), date=date_range('20130101', periods=10), period=period_range('2013-01-01', freq='M', periods=10), float=Index(np.arange(10, dtype=np.float64)), uint=Index(np.arange(10, dtype=np.uint64)), timedelta=timedelta_range('00:00:00', freq='30T', periods=10)) if _loose_version >= LooseVersion('0.18'): from pandas import RangeIndex index['range'] = RangeIndex(10) if _loose_version >= LooseVersion('0.21'): from pandas import interval_range index['interval'] = interval_range(0, periods=10) mi = dict(reg2=MultiIndex.from_tuples(tuple( zip(*[['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']])), names=['first', 'second'])) series = dict(float=Series(data['A']), int=Series(data['B']), mixed=Series(data['E']), ts=Series(np.arange(10).astype(np.int64), index=date_range('20130101', periods=10)), mi=Series(np.arange(5).astype(np.float64), index=MultiIndex.from_tuples(tuple( zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])), names=['one', 'two'])), dup=Series(np.arange(5).astype(np.float64), index=['A', 'B', 'C', 'D', 'A']), cat=Series(Categorical(['foo', 'bar', 'baz'])), dt=Series(date_range('20130101', periods=5)), dt_tz=Series( date_range('20130101', periods=5, tz='US/Eastern')), period=Series([Period('2000Q1')] * 5)) mixed_dup_df = DataFrame(data) mixed_dup_df.columns = list("ABCDA") frame = dict(float=DataFrame({ 'A': series['float'], 'B': series['float'] + 1 }), int=DataFrame({ 'A': series['int'], 'B': series['int'] + 1 }), mixed=DataFrame({k: data[k] for k in ['A', 'B', 'C', 'D']}), mi=DataFrame( { 'A': np.arange(5).astype(np.float64), 'B': np.arange(5).astype(np.int64) }, index=MultiIndex.from_tuples(tuple( zip(*[['bar', 'bar', 'baz', 'baz', 'baz'], ['one', 'two', 'one', 'two', 'three']])), names=['first', 'second'])), dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64), columns=['A', 'B', 'A']), cat_onecol=DataFrame({'A': Categorical(['foo', 'bar'])}), cat_and_float=DataFrame({ 'A': Categorical(['foo', 'bar', 'baz']), 'B': np.arange(3).astype(np.int64) }), mixed_dup=mixed_dup_df, dt_mixed_tzs=DataFrame( { 'A': Timestamp('20130102', tz='US/Eastern'), 'B': Timestamp('20130603', tz='CET') }, index=range(5)), dt_mixed2_tzs=DataFrame( { 'A': Timestamp('20130102', tz='US/Eastern'), 'B': Timestamp('20130603', tz='CET'), 'C': Timestamp('20130603', tz='UTC') }, index=range(5))) cat = dict(int8=Categorical(list('abcdefg')), int16=Categorical(np.arange(1000)), int32=Categorical(np.arange(10000))) timestamp = dict(normal=Timestamp('2011-01-01'), nat=NaT, tz=Timestamp('2011-01-01', tz='US/Eastern')) if _loose_version < LooseVersion('0.19.2'): timestamp['freq'] = Timestamp('2011-01-01', offset='D') timestamp['both'] = Timestamp('2011-01-01', tz='Asia/Tokyo', offset='M') else: timestamp['freq'] = Timestamp('2011-01-01', freq='D') timestamp['both'] = Timestamp('2011-01-01', tz='Asia/Tokyo', freq='M') off = { 'DateOffset': DateOffset(years=1), 'DateOffset_h_ns': DateOffset(hour=6, nanoseconds=5824), 'BusinessDay': BusinessDay(offset=timedelta(seconds=9)), 'BusinessHour': BusinessHour(normalize=True, n=6, end='15:14'), 'CustomBusinessDay': CustomBusinessDay(weekmask='Mon Fri'), 'SemiMonthBegin': SemiMonthBegin(day_of_month=9), 'SemiMonthEnd': SemiMonthEnd(day_of_month=24), 'MonthBegin': MonthBegin(1), 'MonthEnd': MonthEnd(1), 'QuarterBegin': QuarterBegin(1), 'QuarterEnd': QuarterEnd(1), 'Day': Day(1), 'YearBegin': YearBegin(1), 'YearEnd': YearEnd(1), 'Week': Week(1), 'Week_Tues': Week(2, normalize=False, weekday=1), 'WeekOfMonth': WeekOfMonth(week=3, weekday=4), 'LastWeekOfMonth': LastWeekOfMonth(n=1, weekday=3), 'FY5253': FY5253(n=2, weekday=6, startingMonth=7, variation="last"), 'Easter': Easter(), 'Hour': Hour(1), 'Minute': Minute(1) } return dict(series=series, frame=frame, index=index, scalars=scalars, mi=mi, sp_series=dict(float=_create_sp_series(), ts=_create_sp_tsseries()), sp_frame=dict(float=_create_sp_frame()), cat=cat, timestamp=timestamp, offsets=off)
def test_isAnchored(self): assert QuarterBegin(startingMonth=1).isAnchored() assert QuarterBegin().isAnchored() assert not QuarterBegin(2, startingMonth=1).isAnchored()
class TestQuarterBegin(Base): def test_repr(self): expected = "<QuarterBegin: startingMonth=3>" assert repr(QuarterBegin()) == expected expected = "<QuarterBegin: startingMonth=3>" assert repr(QuarterBegin(startingMonth=3)) == expected expected = "<QuarterBegin: startingMonth=1>" assert repr(QuarterBegin(startingMonth=1)) == expected def test_isAnchored(self): assert QuarterBegin(startingMonth=1).isAnchored() assert QuarterBegin().isAnchored() assert not QuarterBegin(2, startingMonth=1).isAnchored() def test_offset_corner_case(self): # corner offset = QuarterBegin(n=-1, startingMonth=1) assert datetime(2010, 2, 1) + offset == datetime(2010, 1, 1) offset_cases = [] offset_cases.append((QuarterBegin(startingMonth=1), { datetime(2007, 12, 1): datetime(2008, 1, 1), datetime(2008, 1, 1): datetime(2008, 4, 1), datetime(2008, 2, 15): datetime(2008, 4, 1), datetime(2008, 2, 29): datetime(2008, 4, 1), datetime(2008, 3, 15): datetime(2008, 4, 1), datetime(2008, 3, 31): datetime(2008, 4, 1), datetime(2008, 4, 15): datetime(2008, 7, 1), datetime(2008, 4, 1): datetime(2008, 7, 1)})) offset_cases.append((QuarterBegin(startingMonth=2), { datetime(2008, 1, 1): datetime(2008, 2, 1), datetime(2008, 1, 31): datetime(2008, 2, 1), datetime(2008, 1, 15): datetime(2008, 2, 1), datetime(2008, 2, 29): datetime(2008, 5, 1), datetime(2008, 3, 15): datetime(2008, 5, 1), datetime(2008, 3, 31): datetime(2008, 5, 1), datetime(2008, 4, 15): datetime(2008, 5, 1), datetime(2008, 4, 30): datetime(2008, 5, 1)})) offset_cases.append((QuarterBegin(startingMonth=1, n=0), { datetime(2008, 1, 1): datetime(2008, 1, 1), datetime(2008, 12, 1): datetime(2009, 1, 1), datetime(2008, 1, 1): datetime(2008, 1, 1), datetime(2008, 2, 15): datetime(2008, 4, 1), datetime(2008, 2, 29): datetime(2008, 4, 1), datetime(2008, 3, 15): datetime(2008, 4, 1), datetime(2008, 3, 31): datetime(2008, 4, 1), datetime(2008, 4, 15): datetime(2008, 7, 1), datetime(2008, 4, 30): datetime(2008, 7, 1)})) offset_cases.append((QuarterBegin(startingMonth=1, n=-1), { datetime(2008, 1, 1): datetime(2007, 10, 1), datetime(2008, 1, 31): datetime(2008, 1, 1), datetime(2008, 2, 15): datetime(2008, 1, 1), datetime(2008, 2, 29): datetime(2008, 1, 1), datetime(2008, 3, 15): datetime(2008, 1, 1), datetime(2008, 3, 31): datetime(2008, 1, 1), datetime(2008, 4, 15): datetime(2008, 4, 1), datetime(2008, 4, 30): datetime(2008, 4, 1), datetime(2008, 7, 1): datetime(2008, 4, 1)})) offset_cases.append((QuarterBegin(startingMonth=1, n=2), { datetime(2008, 1, 1): datetime(2008, 7, 1), datetime(2008, 2, 15): datetime(2008, 7, 1), datetime(2008, 2, 29): datetime(2008, 7, 1), datetime(2008, 3, 15): datetime(2008, 7, 1), datetime(2008, 3, 31): datetime(2008, 7, 1), datetime(2008, 4, 15): datetime(2008, 10, 1), datetime(2008, 4, 1): datetime(2008, 10, 1)})) @pytest.mark.parametrize('case', offset_cases) def test_offset(self, case): offset, cases = case for base, expected in compat.iteritems(cases): assert_offset_equal(offset, base, expected)
ser = pd.Series(rng) res = rng + offset assert res.freq is None # not retained res_v2 = offset.apply_index(rng) assert (res == res_v2).all() assert res[0] == rng[0] + offset assert res[-1] == rng[-1] + offset res2 = ser + offset # apply_index is only for indexes, not series, so no res2_v2 assert res2.iloc[0] == ser.iloc[0] + offset assert res2.iloc[-1] == ser.iloc[-1] + offset @pytest.mark.parametrize( "offset", [QuarterBegin(), QuarterEnd(), BQuarterBegin(), BQuarterEnd()] ) def test_on_offset(offset): dates = [ datetime(2016, m, d) for m in [10, 11, 12] for d in [1, 2, 3, 28, 29, 30, 31] if not (m == 11 and d == 31) ] for date in dates: res = offset.is_on_offset(date) slow_version = date == (date + offset) - offset assert res == slow_version # --------------------------------------------------------------------
f = grid_formatter(grid_columns(data)) data = f.format_dicts(data.itertuples()) return jsonify(data=data, x=cols[0], y=cols[1], stats=stats) except BaseException as e: return jsonify( dict(error=str(e), traceback=str(traceback.format_exc()))) DATE_RANGES = { 'W': lambda today: today - Day(today.dayofweek), 'M': lambda today: today if today.is_month_start else today - MonthBegin(), 'Q': lambda today: today if today.is_quarter_start else today - QuarterBegin(startingMonth=1), 'Y': lambda today: today if today.is_year_start else today - YearBegin(), } @dtale.route('/coverage') @swag_from('swagger/dtale/views/coverage.yml') def find_coverage(): """ Flask route which returns coverage information(counts) for a column grouped by other column(s) :param query: string from flask.request.args['query'] which is applied to DATA using the query() function :param col: string from flask.request.args['col'] containing name of a column in your dataframe :param filters(deprecated): JSON string from flaks.request.args['filters'] with filtering information from group drilldown [
def create_data(): """ create the pickle/msgpack data """ data = { u'A': [0., 1., 2., 3., np.nan], u'B': [0, 1, 0, 1, 0], u'C': [u'foo1', u'foo2', u'foo3', u'foo4', u'foo5'], u'D': date_range('1/1/2009', periods=5), u'E': [0., 1, Timestamp('20100101'), u'foo', 2.] } scalars = dict(timestamp=Timestamp('20130101'), period=Period('2012', 'M')) index = dict(int=Index(np.arange(10)), date=date_range('20130101', periods=10), period=period_range('2013-01-01', freq='M', periods=10)) mi = dict(reg2=MultiIndex.from_tuples(tuple( zip(*[[u'bar', u'bar', u'baz', u'baz', u'foo', u'foo', u'qux', u'qux'], [u'one', u'two', u'one', u'two', u'one', u'two', u'one', u'two'] ])), names=[u'first', u'second'])) series = dict( float=Series(data[u'A']), int=Series(data[u'B']), mixed=Series(data[u'E']), ts=Series(np.arange(10).astype(np.int64), index=date_range('20130101', periods=10)), mi=Series(np.arange(5).astype(np.float64), index=MultiIndex.from_tuples(tuple( zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])), names=[u'one', u'two'])), dup=Series(np.arange(5).astype(np.float64), index=[u'A', u'B', u'C', u'D', u'A']), cat=Series(Categorical([u'foo', u'bar', u'baz'])), dt=Series(date_range('20130101', periods=5)), dt_tz=Series(date_range('20130101', periods=5, tz='US/Eastern')), period=Series([Period('2000Q1')] * 5)) mixed_dup_df = DataFrame(data) mixed_dup_df.columns = list(u"ABCDA") frame = dict( float=DataFrame({ u'A': series[u'float'], u'B': series[u'float'] + 1 }), int=DataFrame({ u'A': series[u'int'], u'B': series[u'int'] + 1 }), mixed=DataFrame({k: data[k] for k in [u'A', u'B', u'C', u'D']}), mi=DataFrame( { u'A': np.arange(5).astype(np.float64), u'B': np.arange(5).astype(np.int64) }, index=MultiIndex.from_tuples(tuple( zip(*[[u'bar', u'bar', u'baz', u'baz', u'baz'], [u'one', u'two', u'one', u'two', u'three']])), names=[u'first', u'second'])), dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64), columns=[u'A', u'B', u'A']), cat_onecol=DataFrame({u'A': Categorical([u'foo', u'bar'])}), cat_and_float=DataFrame({ u'A': Categorical([u'foo', u'bar', u'baz']), u'B': np.arange(3).astype(np.int64) }), mixed_dup=mixed_dup_df, dt_mixed_tzs=DataFrame( { u'A': Timestamp('20130102', tz='US/Eastern'), u'B': Timestamp('20130603', tz='CET') }, index=range(5)), dt_mixed2_tzs=DataFrame( { u'A': Timestamp('20130102', tz='US/Eastern'), u'B': Timestamp('20130603', tz='CET'), u'C': Timestamp('20130603', tz='UTC') }, index=range(5))) with catch_warnings(record=True): mixed_dup_panel = Panel({ u'ItemA': frame[u'float'], u'ItemB': frame[u'int'] }) mixed_dup_panel.items = [u'ItemA', u'ItemA'] panel = dict(float=Panel({ u'ItemA': frame[u'float'], u'ItemB': frame[u'float'] + 1 }), dup=Panel(np.arange(30).reshape(3, 5, 2).astype(np.float64), items=[u'A', u'B', u'A']), mixed_dup=mixed_dup_panel) cat = dict(int8=Categorical(list('abcdefg')), int16=Categorical(np.arange(1000)), int32=Categorical(np.arange(10000))) timestamp = dict(normal=Timestamp('2011-01-01'), nat=NaT, tz=Timestamp('2011-01-01', tz='US/Eastern')) if _loose_version < '0.19.2': timestamp['freq'] = Timestamp('2011-01-01', offset='D') timestamp['both'] = Timestamp('2011-01-01', tz='Asia/Tokyo', offset='M') else: timestamp['freq'] = Timestamp('2011-01-01', freq='D') timestamp['both'] = Timestamp('2011-01-01', tz='Asia/Tokyo', freq='M') off = { 'DateOffset': DateOffset(years=1), 'MonthBegin': MonthBegin(1), 'MonthEnd': MonthEnd(1), 'QuarterBegin': QuarterBegin(1), 'QuarterEnd': QuarterEnd(1), 'Day': Day(1), 'YearBegin': YearBegin(1), 'YearEnd': YearEnd(1), 'Week': Week(1), 'Hour': Hour(1), 'Minute': Minute(1) } return dict(series=series, frame=frame, panel=panel, index=index, scalars=scalars, mi=mi, sp_series=dict(float=_create_sp_series(), ts=_create_sp_tsseries()), sp_frame=dict(float=_create_sp_frame()), cat=cat, timestamp=timestamp, offsets=off)
def time_for_next_update(last_time, freq='D', num=9, is_end=False): """前次更新后下一次更新时间 Arguments: last_time {obj} -- 上次时间 Keyword Arguments: freq {str} -- 更新周期 (default: {'D'}) num {int} -- 日级别以下为单位数,以上为小时数 (default: {9}) is_end {bool} -- 是否为周期尾部 (default: {False}) Raises: TypeError: 不能识别的周期类型 Returns: Timestamp -- 下一次更新时间 Notes: 一、 freq < D `num`代表周期数 上一时点`normalize`后移动`num`周期,不考虑开始及结束问题 二、 freq in D、B `num`代表小时 对于历史时间,上一时点`normalize`后一律移动到下一个周期,且将小时调整到指定的num 如上一时点其日期为当前日期,且在其`normalize`及调整小时后的值晚于上一时点,则取调整后的值 三、 freq > D 开始及结束才有效 `num`无效 如周初、周末、月初、月末、季初、季末、年初、年末 此时num数字不起作用 """ valid_freq = ('B', 'D', 'W', 'M', 'Q', 'H', 'MIN') if pd.isnull(last_time): return pd.Timestamp(MARKET_START) assert isinstance( last_time, pd.Timestamp), f'类型错误,希望Timestamp,实际为{type(last_time)}' now = pd.Timestamp.now(tz=last_time.tz) assert last_time <= now, '过去时间必须小于当前时间' freq = freq.upper() if freq == 'MIN': offset = Minute(n=num) return offset.apply(last_time.floor(freq)) if freq == 'H': offset = Hour(n=num) return offset.apply(last_time.floor(freq)) if freq == 'D': # √ 此处要考虑小时数 limit = last_time.floor(freq).replace(hour=num) if last_time < limit: return limit else: offset = Day() return offset.apply(last_time.floor(freq)).replace(hour=num) if freq == 'B': offset = BDay() # 工作日 if last_time.weekday() in range(0, 5): # √ 此处要考虑小时数 limit = last_time.normalize().replace(hour=num) if last_time < limit: return limit else: return offset.apply(last_time.normalize()).replace(hour=num) else: return offset.apply(last_time.normalize()).replace(hour=num) if freq == 'W': nw = last_time.normalize() + pd.Timedelta(weeks=1) if is_end: return nw + pd.Timedelta(days=7-nw.weekday()) - pd.Timedelta(nanoseconds=1) else: return nw - pd.Timedelta(days=nw.weekday()) if freq == 'M': if is_end: offset = MonthEnd(n=2) res = offset.apply(last_time.normalize()) if last_time.is_month_end: res = offset.rollback(res) return res else: offset = MonthBegin() return offset.apply(last_time.normalize()) if freq == 'Q': if is_end: offset = QuarterEnd(n=2, startingMonth=3, normalize=True) res = offset.apply(last_time) if last_time.is_quarter_end: offset = QuarterEnd(n=-1, startingMonth=3, normalize=True) res = offset.apply(res) return res else: offset = QuarterBegin(n=1, normalize=True, startingMonth=1) return offset.apply(last_time) if freq == 'Y': if last_time.year == now.year: if is_end: return last_time.normalize().replace(year=now.year, month=12, day=31) else: return last_time.normalize().replace(year=now.year, month=1, day=1) if is_end: offset = YearEnd(normalize=True, month=12, n=2) res = offset.apply(last_time) if last_time.is_year_end: offset = YearEnd(n=-1, month=12, normalize=True) res = offset.apply(res) return res else: offset = YearBegin(normalize=True, month=1, n=1) return offset.apply(last_time) raise ValueError('不能识别的周期类型,仅接受{}。实际输入为{}'.format( valid_freq, freq))
rng = pd.date_range(start='1/1/2000', periods=100000, freq='T') ser = pd.Series(rng) res = rng + offset res_v2 = offset.apply_index(rng) assert (res == res_v2).all() assert res[0] == rng[0] + offset assert res[-1] == rng[-1] + offset res2 = ser + offset # apply_index is only for indexes, not series, so no res2_v2 assert res2.iloc[0] == ser.iloc[0] + offset assert res2.iloc[-1] == ser.iloc[-1] + offset @pytest.mark.parametrize( 'offset', [QuarterBegin(), QuarterEnd(), BQuarterBegin(), BQuarterEnd()]) def test_on_offset(offset): dates = [ datetime(2016, m, d) for m in [10, 11, 12] for d in [1, 2, 3, 28, 29, 30, 31] if not (m == 11 and d == 31) ] for date in dates: res = offset.onOffset(date) slow_version = date == (date + offset) - offset assert res == slow_version # --------------------------------------------------------------------
def create_data(): """create the pickle data""" data = { "A": [0.0, 1.0, 2.0, 3.0, np.nan], "B": [0, 1, 0, 1, 0], "C": ["foo1", "foo2", "foo3", "foo4", "foo5"], "D": date_range("1/1/2009", periods=5), "E": [0.0, 1, Timestamp("20100101"), "foo", 2.0], } scalars = { "timestamp": Timestamp("20130101"), "period": Period("2012", "M") } index = { "int": Index(np.arange(10)), "date": date_range("20130101", periods=10), "period": period_range("2013-01-01", freq="M", periods=10), "float": Index(np.arange(10, dtype=np.float64)), "uint": Index(np.arange(10, dtype=np.uint64)), "timedelta": timedelta_range("00:00:00", freq="30T", periods=10), } index["range"] = RangeIndex(10) index["interval"] = interval_range(0, periods=10) mi = { "reg2": MultiIndex.from_tuples( tuple( zip(*[ ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], ["one", "two", "one", "two", "one", "two", "one", "two"], ])), names=["first", "second"], ) } series = { "float": Series(data["A"]), "int": Series(data["B"]), "mixed": Series(data["E"]), "ts": Series(np.arange(10).astype(np.int64), index=date_range("20130101", periods=10)), "mi": Series( np.arange(5).astype(np.float64), index=MultiIndex.from_tuples(tuple( zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])), names=["one", "two"]), ), "dup": Series(np.arange(5).astype(np.float64), index=["A", "B", "C", "D", "A"]), "cat": Series(Categorical(["foo", "bar", "baz"])), "dt": Series(date_range("20130101", periods=5)), "dt_tz": Series(date_range("20130101", periods=5, tz="US/Eastern")), "period": Series([Period("2000Q1")] * 5), } mixed_dup_df = DataFrame(data) mixed_dup_df.columns = list("ABCDA") frame = { "float": DataFrame({ "A": series["float"], "B": series["float"] + 1 }), "int": DataFrame({ "A": series["int"], "B": series["int"] + 1 }), "mixed": DataFrame({k: data[k] for k in ["A", "B", "C", "D"]}), "mi": DataFrame( { "A": np.arange(5).astype(np.float64), "B": np.arange(5).astype(np.int64) }, index=MultiIndex.from_tuples( tuple( zip(*[ ["bar", "bar", "baz", "baz", "baz"], ["one", "two", "one", "two", "three"], ])), names=["first", "second"], ), ), "dup": DataFrame(np.arange(15).reshape(5, 3).astype(np.float64), columns=["A", "B", "A"]), "cat_onecol": DataFrame({"A": Categorical(["foo", "bar"])}), "cat_and_float": DataFrame({ "A": Categorical(["foo", "bar", "baz"]), "B": np.arange(3).astype(np.int64), }), "mixed_dup": mixed_dup_df, "dt_mixed_tzs": DataFrame( { "A": Timestamp("20130102", tz="US/Eastern"), "B": Timestamp("20130603", tz="CET"), }, index=range(5), ), "dt_mixed2_tzs": DataFrame( { "A": Timestamp("20130102", tz="US/Eastern"), "B": Timestamp("20130603", tz="CET"), "C": Timestamp("20130603", tz="UTC"), }, index=range(5), ), } cat = { "int8": Categorical(list("abcdefg")), "int16": Categorical(np.arange(1000)), "int32": Categorical(np.arange(10000)), } timestamp = { "normal": Timestamp("2011-01-01"), "nat": NaT, "tz": Timestamp("2011-01-01", tz="US/Eastern"), } timestamp["freq"] = Timestamp("2011-01-01", freq="D") timestamp["both"] = Timestamp("2011-01-01", tz="Asia/Tokyo", freq="M") off = { "DateOffset": DateOffset(years=1), "DateOffset_h_ns": DateOffset(hour=6, nanoseconds=5824), "BusinessDay": BusinessDay(offset=timedelta(seconds=9)), "BusinessHour": BusinessHour(normalize=True, n=6, end="15:14"), "CustomBusinessDay": CustomBusinessDay(weekmask="Mon Fri"), "SemiMonthBegin": SemiMonthBegin(day_of_month=9), "SemiMonthEnd": SemiMonthEnd(day_of_month=24), "MonthBegin": MonthBegin(1), "MonthEnd": MonthEnd(1), "QuarterBegin": QuarterBegin(1), "QuarterEnd": QuarterEnd(1), "Day": Day(1), "YearBegin": YearBegin(1), "YearEnd": YearEnd(1), "Week": Week(1), "Week_Tues": Week(2, normalize=False, weekday=1), "WeekOfMonth": WeekOfMonth(week=3, weekday=4), "LastWeekOfMonth": LastWeekOfMonth(n=1, weekday=3), "FY5253": FY5253(n=2, weekday=6, startingMonth=7, variation="last"), "Easter": Easter(), "Hour": Hour(1), "Minute": Minute(1), } return { "series": series, "frame": frame, "index": index, "scalars": scalars, "mi": mi, "sp_series": { "float": _create_sp_series(), "ts": _create_sp_tsseries() }, "sp_frame": { "float": _create_sp_frame() }, "cat": cat, "timestamp": timestamp, "offsets": off, }
offset = cls(n=n) rng = pd.date_range(start='1/1/2000', periods=100000, freq='T') ser = pd.Series(rng) res = rng + offset res_v2 = offset.apply_index(rng) assert (res == res_v2).all() assert res[0] == rng[0] + offset assert res[-1] == rng[-1] + offset res2 = ser + offset # apply_index is only for indexes, not series, so no res2_v2 assert res2.iloc[0] == ser.iloc[0] + offset assert res2.iloc[-1] == ser.iloc[-1] + offset @pytest.mark.parametrize('offset', [QuarterBegin(), QuarterEnd(), BQuarterBegin(), BQuarterEnd()]) def test_on_offset(offset): dates = [datetime(2016, m, d) for m in [10, 11, 12] for d in [1, 2, 3, 28, 29, 30, 31] if not (m == 11 and d == 31)] for date in dates: res = offset.onOffset(date) slow_version = date == (date + offset) - offset assert res == slow_version # -------------------------------------------------------------------- # Months class TestMonthBegin(Base):
from django.conf import settings from pandas.tseries.offsets import YearBegin, QuarterBegin, MonthBegin, Day # Transformaciones VALUE = 'value' CHANGE = 'change' PCT_CHANGE = 'percent_change' CHANGE_YEAR_AGO = 'change_a_year_ago' PCT_CHANGE_YEAR_AGO = 'percent_change_a_year_ago' CHANGE_BEG_YEAR = 'change_since_beginning_of_year' PCT_CHANGE_BEG_YEAR = 'percent_change_since_beginning_of_year' # Pandas freqs PANDAS_YEAR = YearBegin() PANDAS_SEMESTER = MonthBegin(6) PANDAS_QUARTER = QuarterBegin(startingMonth=1) PANDAS_MONTH = MonthBegin() PANDAS_WEEK = Day(7) PANDAS_DAY = Day() # Frecuencias *en orden* de mayor a menor PANDAS_FREQS = [ PANDAS_YEAR, PANDAS_SEMESTER, PANDAS_QUARTER, PANDAS_MONTH, PANDAS_WEEK, PANDAS_DAY ] IDENTIFIER = "identifier" DATASET_IDENTIFIER = "dataset_identifier" DOWNLOAD_URL = "downloadURL" DATASET = 'dataset'
rng = pd.date_range(start="1/1/2000", periods=100000, freq="T") ser = pd.Series(rng) res = rng + offset res_v2 = offset.apply_index(rng) assert (res == res_v2).all() assert res[0] == rng[0] + offset assert res[-1] == rng[-1] + offset res2 = ser + offset # apply_index is only for indexes, not series, so no res2_v2 assert res2.iloc[0] == ser.iloc[0] + offset assert res2.iloc[-1] == ser.iloc[-1] + offset @pytest.mark.parametrize( "offset", [QuarterBegin(), QuarterEnd(), BQuarterBegin(), BQuarterEnd()]) def test_on_offset(offset): dates = [ datetime(2016, m, d) for m in [10, 11, 12] for d in [1, 2, 3, 28, 29, 30, 31] if not (m == 11 and d == 31) ] for date in dates: res = offset.onOffset(date) slow_version = date == (date + offset) - offset assert res == slow_version # --------------------------------------------------------------------
def Selecting_reports(): """ From reports in pap_data select reports so that consolidated are left where both conso and unit are available only first report for period is left where multiple announcements found """ current_dir = os.getcwd() file2 = current_dir + '\\data\\pap\\Reports2.csv' dtypes = { 'report_period1': np.float64, 'report_period2': np.float64, 'Profit': np.float64, 'NoShares': np.float64, 'consolidated': np.bool } pap_data = pd.read_csv(file2, parse_dates=['Datetime', 'start_date', 'end_date'], sep=',', index_col=0, header=0, dtype=dtypes) # Reports definitions annual_reports = [ 'SA-R', 'SA-RS', 'SAB-RS', 'SAB-R', 'SAF-R', 'SAU-R', 'RS', 'SAU-RS', 'R', 'SAF-RS' ] annualS_reports = ['SA-RS', 'SAB-RS', 'RS', 'SAU-RS', 'SAF-RS'] semi_reports = [ 'SA-P', 'SA-PS', 'PSr', 'P', 'PS', 'SA-PSr', 'SAU-P', 'SAB-P', 'SAU-PSr', 'SAF-P', 'SAF-PS', 'SAF-PSr' ] semiS_reports = [ 'SA-PS', 'PSr', 'PS', 'SA-PSr', 'SAU-PSr', 'SAF-PS', 'SAF-PSr' ] q_reports = [ 'SAB-QSr', 'SA-QSr', 'SA-Q', 'SA-QS', 'SAF-Q', 'SAB-Q', 'SAB-QS', 'SAU-Q', 'SAU-QSr', 'SAU-QS', 'SAF-QSr', 'SAF-QS', 'QSr', 'Q', 'QS' ] qS_reports = [ 'SAB-QSr', 'SA-QSr', 'SA-QS', 'SAB-QS', 'SAU-QSr', 'SAU-QS', 'SAF-QSr', 'SAF-QS', 'QSr', 'QS' ] consolidated = annualS_reports + semiS_reports + qS_reports unit = annual_reports + semi_reports + q_reports for x in consolidated: unit.remove(x) # Removing R and P reports for years 2005-2008, they were late and only copied info from previously announced Q reports annual_reports_index_list = list(pap_data[ (pap_data['report_period2'].isin([2004, 2005, 2006, 2007, 2008])) & (pap_data['report_type'].isin(annual_reports))].index) semi_reports_index_list = list(pap_data[ (pap_data['report_period2'].isin([2004, 2005, 2006, 2007, 2008])) & (pap_data['report_type'].isin(semi_reports))].index) pap_data.drop(annual_reports_index_list, axis=0, inplace=True) pap_data.drop(semi_reports_index_list, axis=0, inplace=True) # Drop unit reports where consolidated available for stock in list(pap_data['Name'].unique()): stocks_reports = pap_data[pap_data['Name'] == stock] for year in list(stocks_reports['report_period2'].unique()): years_reports = pap_data.loc[(pap_data['Name'] == stock) & (pap_data['report_period2'] == year)] if len(years_reports) > 4: if len(years_reports.consolidated.unique()) > 1: list_of_unit_reports_index = list(years_reports[ years_reports['consolidated'] == False].index) pap_data.drop(list_of_unit_reports_index, axis=0, inplace=True) years_reports = pap_data.loc[ (pap_data['Name'] == stock) & (pap_data['report_period2'] == year)] #Drop reports where more then two in a quarter available, leave the earliest announced for quarter in pap_data.loc[(pap_data['Name'] == stock) & (pap_data['report_period2'] == year), 'report_period1'].unique(): quarter_reports = pap_data.loc[ (pap_data['Name'] == stock) & (pap_data['report_period2'] == year) & (pap_data['report_period1'] == quarter)] if len(quarter_reports) > 1: list_of_report_index = list(quarter_reports.index) first_announced_index = int(quarter_reports[ quarter_reports.Datetime == quarter_reports.Datetime.min()].index[0]) list_of_report_index.remove(first_announced_index) pap_data.drop(list_of_report_index, axis=0, inplace=True) # Droping reports with less then 5 reports in the data set stocks_reports = pap_data[pap_data['Name'] == stock] if len(stocks_reports) < 6: stocks_reports_index = list(stocks_reports.index) pap_data.drop(stocks_reports_index, axis=0, inplace=True) """ Unrolling_profit All announced earnings are rolling quarter To unroll earnings subtract previous quarterly profit from current profit apart from first quarter """ pap_data.loc[pap_data['report_period1'] == 1, 'Profit_Q'] = pap_data['Profit_t'] for stock in list(pap_data.Name.unique()): stocks_reports = pap_data[pap_data['Name'] == stock] pap_data.loc[pap_data['Name'] == stock, 'Profit_helper'] = stocks_reports[ 'Profit_t'] - stocks_reports['Profit_t'].shift(1) pap_data.loc[(pap_data['Name'] == stock) & (pap_data['report_period1'].isin([2, 3, 4, 5, 6])), 'Profit_Q'] = pap_data['Profit_helper'] pap_data.drop('Profit_helper', axis=1, inplace=True) pap_data['Q_start_date'] = [ date - QuarterBegin(startingMonth=1) for date in pap_data['end_date'] ] pap_data.loc[pap_data['Q_start_date'] < '20000101', 'Q_start_date'] = None pap_data.loc[pap_data['Q_start_date'] > '20190401', 'Q_start_date'] = None pap_data['aprox_date'] = pap_data['Datetime'].dt.date - QuarterBegin( n=2, startingMonth=1) pap_data['Q_start_date'].fillna(pap_data['aprox_date'], inplace=True) """ Creating_profits_DataFrame(pap_data): Based on data in pap_data creating DataFrames with quarter start date and stock name as indexes Seperate Df is created for every value: Profit, Publication date, NoShares """ RowIndex = pd.date_range(start='19971231', end='20190401', freq='Q') + pd.Timedelta(1, unit='d') ColumnIndex = list(pap_data.Name.sort_values().unique()) Profit_df = pd.DataFrame(columns=ColumnIndex, index=RowIndex) Dates_df = pd.DataFrame(columns=ColumnIndex, index=RowIndex) Noshares_df = pd.DataFrame(columns=ColumnIndex, index=RowIndex) for stock in list(pap_data.Name.unique()): stocks_reports_index = pap_data.loc[pap_data['Name'] == stock].index for index in stocks_reports_index: Name = pap_data.loc[index, 'Name'] Date_pub = pap_data.loc[index, 'Datetime'] Profit = pap_data.loc[index, 'Profit_Q'] NoShares = pap_data.loc[index, 'NoShares'] QDate = pap_data.loc[index, 'Q_start_date'] Profit_df.loc[QDate, Name] = Profit Dates_df.loc[QDate, Name] = Date_pub Noshares_df.loc[QDate, Name] = NoShares filep = current_dir + '\\data\\pap\\PAPProfit.csv' Profit_df.to_csv(filep, sep=',', encoding='UTF-8') filed = current_dir + '\\data\\pap\\PAPDates.csv' Dates_df.to_csv(filed, sep=',', encoding='UTF-8') files = current_dir + '\\data\\pap\\PAPNoshares.csv' Noshares_df.to_csv(files, sep=',', encoding='UTF-8') return 0
def test_offset_corner_case(self): # corner offset = QuarterBegin(n=-1, startingMonth=1) assert datetime(2010, 2, 1) + offset == datetime(2010, 1, 1)
QuarterBegin, QuarterEnd, ) def test_quarterly_dont_normalize(): date = datetime(2012, 3, 31, 5, 30) offsets = (QuarterBegin, QuarterEnd) for klass in offsets: result = date + klass() assert result.time() == date.time() @pytest.mark.parametrize("offset", [QuarterBegin(), QuarterEnd()]) def test_on_offset(offset): dates = [ datetime(2016, m, d) for m in [10, 11, 12] for d in [1, 2, 3, 28, 29, 30, 31] if not (m == 11 and d == 31) ] for date in dates: res = offset.is_on_offset(date) slow_version = date == (date + offset) - offset assert res == slow_version class TestQuarterBegin(Base): def test_repr(self): expected = "<QuarterBegin: startingMonth=3>" assert repr(QuarterBegin()) == expected
'Q-JAN' : QuarterEnd(startingMonth=1), 'Q-FEB' : QuarterEnd(startingMonth=2), 'Q-MAR' : QuarterEnd(startingMonth=3), 'Q-APR' : QuarterEnd(startingMonth=4), 'Q-MAY' : QuarterEnd(startingMonth=5), 'Q-JUN' : QuarterEnd(startingMonth=6), 'Q-JUL' : QuarterEnd(startingMonth=7), 'Q-AUG' : QuarterEnd(startingMonth=8), 'Q-SEP' : QuarterEnd(startingMonth=9), 'Q-OCT' : QuarterEnd(startingMonth=10), 'Q-NOV' : QuarterEnd(startingMonth=11), 'Q-DEC' : QuarterEnd(startingMonth=12), # Quarterly - Calendar (Start) # 'QS' : QuarterBegin(startingMonth=1), 'QS-JAN' : QuarterBegin(startingMonth=1), 'QS-FEB' : QuarterBegin(startingMonth=2), 'QS-MAR' : QuarterBegin(startingMonth=3), 'QS-APR' : QuarterBegin(startingMonth=4), 'QS-MAY' : QuarterBegin(startingMonth=5), 'QS-JUN' : QuarterBegin(startingMonth=6), 'QS-JUL' : QuarterBegin(startingMonth=7), 'QS-AUG' : QuarterBegin(startingMonth=8), 'QS-SEP' : QuarterBegin(startingMonth=9), 'QS-OCT' : QuarterBegin(startingMonth=10), 'QS-NOV' : QuarterBegin(startingMonth=11), 'QS-DEC' : QuarterBegin(startingMonth=12), # Quarterly - Business 'BQ-JAN' : BQuarterEnd(startingMonth=1), 'BQ-FEB' : BQuarterEnd(startingMonth=2),
def bin_df(df): quarters = df['$date_to'] - QuarterBegin(startingMonth=1) df.index = pd.DatetimeIndex(quarters, ambiguous='infer').floor('D') return df.groupby(level=0).count()