def inner(universe, start_time, end_time): new_start = tds_shift(start_time, 130) data = fdgetter.get_db_data(sql, code=index_code, cols=('code', 'weight', 'time'), add_stockcode=False, start_time=new_start, end_time=end_time) data.code = data.code.apply(datatoolkits.add_suffix) # pdb.set_trace() data = data.pivot_table('weight', index='time', columns='code') tds = dateshandle.get_tds(new_start, end_time) data = datatoolkits.map_data(data.reset_index(), days=tds, fromNowOn=True) data = data.set_index('time') data_tds = dateshandle.get_tds(start_time, end_time) data = data.reindex(data_tds) data = data.loc[:, sorted(universe)] / 100 assert check_indexorder(data), "Mixed index order" return data
def __init__(self, config, stock_filter, *args, **kwargs): ''' Parameter --------- config: BacktestConfig 回测相关配置 stock_filter: function 用于计算股票分组的函数,形式为function(date, *args, **kwargs),返回值要求为 {order: [secu_codes]},其中order为对应股票组合的顺序,要求为range(0, config.group_num) args: tuple like arguments stock_filter需要使用的位置参数 kwargs: dict like arguments stock_filter需要使用的键值参数 ''' self._config = config self._tds = get_tds(config.start_date, config.end_date) self.holding_result = OrderedDict() # 用于记录各组持仓的股票 # 用于记录附带权重的持仓,这个数据每组的持仓可能跟holding_result中不同, # 因为该持仓会考虑到能否交易等相关问题 self.weighted_holding = OrderedDict() self.navs = OrderedDict() self._stock_filter = stock_filter self._args = args self._kwargs = kwargs self._ports = { i: Portfolio(self._config.init_cap) for i in range(self._config.group_num) } self._navs_pd = None self._offset = 10 # 避免满仓是因为小数点的问题导致资金溢出
def _check_date(df_dates): ''' 检测日期是否连续 ''' df_dates = sorted([pd.to_datetime(dt) for dt in df_dates]) tds = dateshandle.get_tds(df_dates[0], df_dates[-1]) return tds == df_dates
def get_adjfactor(universe, start_time, end_time): ''' 股票的复权因子 ''' sql = ''' SELECT A.ExDiviDate, A.RatioAdjustingFactor, M.SecuCode FROM QT_AdjustingFactor A, SecuMain M WHERE A.InnerCode = M.InnerCode AND M.secuMarket in (83, 90) AND M.SECUCATEGORY = 1 ORDER BY M.SecuCode ASC, A.ExDiviDate ASC ''' data = fdgetter.get_db_data(sql, cols=('time', 'data', 'code'), add_stockcode=False) data['code'] = data.code.apply(datatoolkits.add_suffix) by_code = data.groupby('code') tds = dateshandle.get_tds(start_time, end_time) data = by_code.apply(datatoolkits.map_data, days=tds, fromNowOn=True, fillna={'code': lambda x: x.code.iloc[0], 'data': lambda x: 1}) data = data.reset_index(drop=True) data = data.pivot_table('data', index='time', columns='code') data = data.loc[:, sorted(universe)].fillna(1) # 因为新股大多数情况下没有分红记录 assert check_indexorder(data), 'Error, data order is mixed!' assert checkdata_completeness(data, start_time, end_time), "Error, data missed!" return data
def _inner(universe, start_time, end_time): offset = 100 * n + 250 new_start = pd.to_datetime(start_time) - pd.Timedelta( '%d day' % offset) data = fdgetter.get_db_data(sql, start_time=new_start, end_time=end_time, cols=('update_time', 'rpt_date', 'code', 'data'), add_stockcode=False) data['code'] = data.code.apply(datatoolkits.add_suffix) # 该部分可以单独做成一个函数 by_code = data.groupby('code') data = by_code.apply( fdmutils.get_observabel_data).reset_index(drop=True) by_cno = data.groupby(['code', 'obs_time']) data = by_cno.apply(fdmutils.cal_season, col_name='data', offset=n) data = data.reset_index().rename(columns={'obs_time': 'time'}) tds = dateshandle.get_tds(start_time, end_time) data = data.groupby('code').apply( datatoolkits.map_data, days=tds, fillna={'code': lambda x: x.code.iloc[0]}) data = data.reset_index(drop=True) data = data.pivot_table('data', index='time', columns='code') data = data.loc[:, sorted(universe)] assert check_indexorder(data), 'Error, data order is mixed!' return data
def _handle_dbdata(data, start_time, end_time, func, **kwargs): ''' 将从数据库中获取的数据映射到交易日中 Parameter --------- data: pd.DataFrame 从数据库中获取的数据,要求列名为['update_time', 'rpt_date', 'code', 'data'] start_time: str or other type that can be transfered by pd.to_datetime 获取的数据的起始时间 end_time: str or other type that can be transfered by pd.to_datetime 获取数据的结束之间 func: function 要求function接受的第一个参数为pd.DataFrame,为经过代码和观察日分组的数据,且返回的结果 为一个值或者只包含一个值得pd.Series **kwargs: dictionary like parameter 为需要传入func的其他参数 Return ------ out: pd.DataFrame 经过特定计算后的结果,columns为股票代码,index为日期 ''' by_code = data.groupby('code') data = by_code.apply(fdmutils.get_observabel_data).reset_index(drop=True) by_cno = data.groupby(['code', 'obs_time']) data = by_cno.apply(func, **kwargs) data = data.reset_index().rename(columns={'obs_time': 'time'}) tds = dateshandle.get_tds(start_time, end_time) data = data.groupby('code').apply(datatoolkits.map_data, days=tds, fillna={'code': lambda x: x.code.iloc[0]}) data = data.reset_index(drop=True) data = data.pivot_table('data', index='time', columns='code') return data
def _calc_rebdates(self): ''' 用于计算给定的时间区间内的再平衡日(指因子计算日,且类型为datetime),并将其存储在_rebdates中 ''' tds = pd.Series(get_tds(self._start_time, self._end_time)) tds.index = tds.dt.strftime('%Y-%W') self._rebdates = tds.groupby(lambda x: x).apply( lambda y: y.iloc[-1]).tolist()
def _inner(universe, start_time, end_time): nstart_time = pd.to_datetime(start_time) - pd.Timedelta('60 days') data = fdgetter.get_db_data(fdgetter.BASIC_SQLs['INDEX_CONSTITUENTS'], code=index_code, cols=('code', 'time'), add_stockcode=False, start_time=nstart_time, end_time=end_time) data = data.assign(is_constituent=1) data['code'] = data.code.apply(datatoolkits.add_suffix) data = data.pivot_table('is_constituent', index='time', columns='code') tds = dateshandle.get_tds(nstart_time, end_time) data = data.loc[:, sorted(universe)].reset_index() data = datatoolkits.map_data(data, days=tds, fromNowOn=True) data = data.set_index('time').dropna(axis=0, how='all') data = data.loc[(data.index >= start_time) & (data.index <= end_time)] return data
def checkdata_completeness(data, start_time, end_time): ''' 检查数据的完整性,保证数据的长度与期间交易日的长度相同 Parameter --------- data: pd.DataFrame 需要检测完整性的数据 start_time: str or datetime or other type that can be converted by pd.to_datetime 起始时间 end_time: str or datetime or other type that can be converted by pd.to_datetime 终止时间 ''' tds = dateshandle.get_tds(start_time, end_time) return len(data) == len(tds)
def _inner(universe, start_time, end_time): data = fdgetter.get_db_data(transed_sql, cols=('data', 'time', 'code'), add_stockcode=False) data['code'] = data.code.apply(datatoolkits.add_suffix) data = data.drop_duplicates().sort_values(['code', 'time']) # 此处假设若时间相同则股本数量相同 by_code = data.groupby('code') tds = dateshandle.get_tds(start_time, end_time) data = by_code.apply(datatoolkits.map_data, days=tds, fromNowOn=True, fillna={'code': lambda x: x.code.iloc[0]}) data = data.reset_index(drop=True) data = data.pivot_table('data', index='time', columns='code') data = data.loc[:, sorted(universe)] assert check_indexorder(data), 'Error, data order is mixed!' assert checkdata_completeness(data, start_time, end_time), "Error, data missed!" return data
def get_zxind(universe, start_time, end_time): ''' 获取中信行业的数据,并映射到每个交易日中 ''' ind_data = fdgetter.get_db_data(fdgetter.BASIC_SQLs['ZX_IND'], cols=('ind', 'time', 'code'), add_stockcode=False) ind_data['ind'] = ind_data.ind.map(ZXIND_TRANS_DICT) ind_data['code'] = ind_data.code.apply(datatoolkits.add_suffix) # pdb.set_trace() tds = dateshandle.get_tds(start_time, end_time) by_code = ind_data.groupby('code') ind_data = by_code.apply(datatoolkits.map_data, days=tds, fillna={'ind': lambda x: NaS, 'code': lambda x: x.code.iloc[0]}) ind_data = ind_data.reset_index(drop=True).set_index(['time', 'code']) ind_data = ind_data.loc[:, 'ind'].unstack() ind_data = ind_data.loc[:, sorted(universe)].dropna(axis=0, how='all').fillna(NaS) return ind_data
def get_st(universe, start_time, end_time): ''' 获取股票特殊处理的情况 ''' st_data = fdgetter.get_db_data(fdgetter.BASIC_SQLs['ST_TAG'], cols=('time', 'abbr', 'ms', 'code'), add_stockcode=False) def _assign_st(row): map_dict = {'ST': 1, 'PT': 5, '撤销ST': 0, '*ST': 2, '撤消*ST并实行ST': 1, '从ST变为*ST': 2, '撤销*ST': 0, '退市整理期': 3, '高风险警示': 4} if row.ms in map_dict: return map_dict[row.ms] else: assert row.ms == '撤销PT', "Error, cannot handle tag '{tag}'".format(tag=row.ms) if 'ST' in row.abbr: return 1 elif '*ST' in row.abbr: return 2 else: return 0 st_data = st_data.assign(tag=lambda x: x.apply(_assign_st, axis=1)) st_data['code'] = st_data.code.apply(datatoolkits.add_suffix) # 剔除日期重复项,因为数字越大表示越风险越高,因而只保留数字大的 # pdb.set_trace() st_data = st_data.sort_values(['code', 'time', 'tag']) by_codentime = st_data.groupby(['code', 'time']) st_data = by_codentime.apply(lambda x: x.tail(1).iloc[0]) st_data = st_data.reset_index(drop=True) tds = dateshandle.get_tds(start_time, end_time) # pdb.set_trace() by_code = st_data.groupby('code') st_data = by_code.apply(datatoolkits.map_data, days=tds, fillna={'code': lambda x: x.code.iloc[0]}, fromNowOn=True) st_data = st_data.reset_index(drop=True) # st_data = st_data.reset_index(drop=True).set_index(['time', 'code']) # st_data = st_data.loc[:, 'tag'].unstack() st_data = st_data.pivot_table('tag', index='time', columns='code').dropna(axis=0, how='all') st_data = st_data.loc[:, sorted(universe)].fillna(0) assert checkdata_completeness(st_data, start_time, end_time), "Error, data missed!" return st_data
def get_liststatus(universe, start_time, end_time): ''' 获取股票的上市状态 ''' ls_data = fdgetter.get_db_data(fdgetter.BASIC_SQLs['LIST_STATUS'], cols=('code', 'list_status', 'time'), add_stockcode=False) ls_map = {1: 1, 2: 2, 3: 1, 4: 4, 6: 3} # 原数据库中1表示上市,2表示暂停上市,3表示恢复上市,4表示退市,6表示退市整理 ls_data['list_status'] = ls_data.list_status.map(ls_map) ls_data['code'] = ls_data.code.apply(datatoolkits.add_suffix) by_code = ls_data.groupby('code') tds = dateshandle.get_tds(start_time, end_time) ls_data = by_code.apply(datatoolkits.map_data, days=tds, fillna={'code': lambda x: x.code.iloc[0]}, fromNowOn=True) # ls_data = ls_data.reset_index(drop=True).set_index(['time', 'code']) # ls_data = ls_data.loc[:, 'list_status'].unstack() ls_data = ls_data.reset_index(drop=True) ls_data = ls_data.pivot_table('list_status', index='time', columns='code').dropna(axis=0, how='all') ls_data = ls_data.loc[:, sorted(universe)] return ls_data
def _inner(universe, start_time, end_time): new_start = pd.to_datetime(start_time) - pd.Timedelta('540 day') data = fdgetter.get_db_data(sql_template, start_time=new_start, end_time=end_time, cols=('update_time', 'rpt_date', 'code', 'data'), add_stockcode=False) data['code'] = data.code.apply(datatoolkits.add_suffix) by_code = data.groupby('code') data = by_code.apply(fdmutils.get_observabel_data).reset_index(drop=True) by_cno = data.groupby(['code', 'obs_time']) data = by_cno.apply(fdmutils.cal_ttm, col_name='data').reset_index()\ .rename(columns={'obs_time': 'time'}) tds = dateshandle.get_tds(start_time, end_time) # pdb.set_trace() data = data.groupby('code').apply(datatoolkits.map_data, days=tds, fillna={'code': lambda x: x.code.iloc[0]}) data = data.reset_index(drop=True) data = data.pivot_table('data', index='time', columns='code') data = data.loc[:, sorted(universe)] assert check_indexorder(data), 'Error, data order is mixed!' assert checkdata_completeness(data, start_time, end_time), "Error, data missed!" return data
def get_last_date(date, freq): ''' 获取给定日期和给定报告频率的最近一个起始日期 Parameter --------- date: datetime like 当前日期 freq: str 给定的频率,只能为[DAILY, WEEKLY, MONTHLY]之一 Return ------ out: pd.TimeStamp Notes ----- 该函数的主要功能是查找距离给定日期最近的,并且与报告频率相关的日期,用来计算这个日期到给定日期 的相关数据 这个日期的计算方法如下: 若freq为DAILY,即日频,则直接返回date上一个交易日的日期 若freq为WEEKLY,即周频,则返回date上一个周的最后一个交易日的日期 若freq为MONTHLY,即月频,则返回date上一个月的最后一个交易日的日期 ''' start_time = tds_shift(date, 30) if freq == DAILY: tds = get_tds(start_time, date) return tds[-2] if freq == WEEKLY: reb = WeekRebCalcu(start_time, date) elif freq == MONTHLY: reb = MonRebCalcu(start_time, date) else: raise ValueError( 'Unknown \'freq\' parameter({param})'.format(param=freq)) return reb.reb_points[-2]
#!/usr/bin/env python # -*- coding: utf-8 -*- # @Date : 2017-08-21 10:49:36 # @Author : Li Hao ([email protected]) # @Link : https://github.com/SAmmer0 # @Version : $Id$ import datetime as dt from dateshandle import get_tds from factortest.utils import HDFDataProvider # factortest/utils.py test case DATA_START_TIME = '2015-01-01' DATA_END_TIME = dt.datetime.now() SEARCH_START_TIME = '2017-01-01' SEARCH_END_TIME = '2017-08-01' CODE = '002230.SZ' TDS_NUM = len(get_tds(SEARCH_START_TIME, SEARCH_END_TIME)) data_quote = HDFDataProvider(r'E:\factordata\basicfactors\quote\CLOSE.h5', DATA_START_TIME, DATA_END_TIME) cs_test = data_quote.get_csdata(SEARCH_END_TIME) panel_test = data_quote.get_paneldata(SEARCH_START_TIME, SEARCH_END_TIME) ts_test = data_quote.get_tsdata(SEARCH_START_TIME, SEARCH_END_TIME, CODE) p_data = data_quote.get_data(SEARCH_END_TIME, CODE)
sse_calendar = TradingCalendar(td_data, trading_times) # 交易日计数测试 start_time = '2017-01-01' end_time = '2018-03-02' mod_cnt = sse_calendar.count(start_time, end_time, 'both') old_cnt = dateshandle.tds_count(start_time, end_time) assert mod_cnt == old_cnt # 交易日区间测试 start_time = '2016-03-01' end_time = '2017-11-03' mod_tds = sse_calendar.get_tradingdays(start_time, end_time, include_type='both') old_tds = dateshandle.get_tds(start_time, end_time) assert mod_tds == old_tds # 周期目标测试 start_time = '2017-01-01' end_time = '2018-02-27' targets = sse_calendar.get_cycle_targets(start_time, end_time, freq='MONTHLY', target='LAST') print(targets) targets = sse_calendar.get_cycle_targets(start_time, end_time, freq='MONTHLY', target='FIRST') print(targets)
def cal_nav(holdings, end_date, quotes, ini_capital=1e9, normalize=True, cal_to=False, **kwargs): ''' 根据持仓状况计算策略的净值(以每个交易日的收盘价计算) @param: holdings: 由get_daily_holding返回的每个换仓日的持仓,或者为OrderDict类型,键为换仓日日期, 值为对应的持仓(为PositionGroup类型) end_date: 最后一个换仓记录的结束时间,一般情况下,应该设置为最后一个在平衡日,可以为 pd.to_datetime可以解析的任何类型 quotes: 行情数据 ini_capital: 初始资本,默认为10亿人民币,避免数额过小导致在整数约束下很多股票的数量为0 normalize: 是否将组合的价值由金额转换为净值(转换方法为组合市场价值除以初始资本), 默认为True,即需要转换 cal_to: 是否计算换手率,默认为False kwargs: 一些其他的参数,用于传入build_pos函数 @return: df类型,索引为时间,列名为group_i,其中i为分组次序 ''' # 对交易日与换仓日之间的关系进行映射 start_date = min(holdings.keys()) # 开始日一定为换仓日,且换仓日一定为交易日 tds = dateshandle.wind_time_standardlization( dateshandle.get_tds(start_date, end_date)) tds_df = pd.DataFrame({'chg_date': list(holdings.keys())}) tds_df['tds'] = tds_df['chg_date'] tds_df = tds_df.set_index('tds').sort_index() tds_df = tds_df.reindex(tds, method='ffill') tds_map = dict(zip(tds_df.index, tds_df.chg_date)) # 初始化 portfolio_record = None # 组合记录 nav = list() # 净值结果 cols = ['time'] + [ 'group_%02d' % i for i in range(1, len(holdings[start_date]) + 1) ] # 结果列名 turnover = list() # 换手率 # 交易日循环 for td, tq_idx in zip(sorted(tds_map), tqdm(tds_map)): # 当前为换仓日,第一个建仓日一定为换仓日 if tds_map[td] == td: cur_pos = holdings[td] if portfolio_record is None: # 第一次建仓 portfolio_record = [ Portfolio(pd.DataFrame({ 'code': [], 'num': [] }), ini_capital) for i in range(len(cur_pos)) ] tmp_portrecord = list() for port_idx, pos in enumerate(cur_pos): # 此处建仓实际上假设之前的股票都在今天开盘卖出,然后再按照开盘价买入新的仓位 # 虽然这种假设不合理,但是如果使用上个交易日的数据会带来一些复杂性,且一个交易日的 # 收盘价和随后一天的开盘价的差值一般不会太大,故忽略这方面的影响 tmp_port = build_pos( pos, portfolio_record[port_idx].mkt_value(quotes, td, 'open'), quotes, td, **kwargs) tmp_portrecord.append(tmp_port) # TODO 在此处添加换手率计算,为了保证兼容性,可以考虑加入默认参数,默认不返回换手率 tmp_to = dict() ports = zip(portfolio_record, tmp_portrecord) for port_idx, p in enumerate(ports): tmp_to['turnover_%02d' % (port_idx + 1)] = cal_turnover( p[0], p[1], quotes, td) tmp_to['time'] = td turnover.append(tmp_to) portfolio_record = tmp_portrecord # 更新portfolio_record # 计算每个组合的收盘市场价值 tmp_mktvalue = list() for port in portfolio_record: mkt_value = port.mkt_value(quotes, td) tmp_mktvalue.append(mkt_value) tmp_mktvalue.insert(0, td) nav.append(dict(zip(cols, tmp_mktvalue))) nav = pd.DataFrame(nav) nav = nav.set_index('time') if normalize: for i in range(1, len(holdings[start_date]) + 1): nav['group_%02d' % i] = nav['group_%02d' % i] / ini_capital if cal_to: turnover = pd.DataFrame(turnover) return nav, turnover return nav
def get_daily_holding(signal_data, quotes_data, stock_pool, industry_cls, stock_filter, rebalance_dates): ''' 用于根据一定的条件,从股票池中选出满足一定条件的股票,然后将其映射到这个期间的交易日中, 最终得到每个交易日的持仓 @param: signal_data: 信号数据DataFrame,必须包含time、code列 quotes_data: 行情数据DataFramem,必须包含time、code列 stock_pool: 时点股票池DataFrame,测试中股票池为离当前交易日最近的一个时点的股票池, 必须包含time、code列;可以为None,当参数为None时,不加上股票池的限制 industry_cls: 行业分类DataFrame,必须包含time、code列 stock_filter: 用于选择股票的函数,形式为stock_filter(cur_signal_data, cur_ind_cls),要求返回 的股票为[[code1, code2, ...], [codex1, codex2, ...], ...] rebalance_dates: 再平衡日,即在该日期计算下个时期的股票池,然后在下个最近的交易日换仓 @return: 换仓日的持仓,格式为字典类型,字典值为PositionGroup类型,因此需要注意返回的持仓并没有时间 顺序,需要先对键进行排序 注: 对于每个再平衡日,计算指标,筛选股票,然后在下个交易日换仓,随后的交易日的持仓都与该 新的持仓相同,直至到下个新的在平衡日 ''' # 获取交易日 start_time, end_time = rebalance_dates[0], rebalance_dates[-1] tds = dateshandle.wind_time_standardlization( dateshandle.get_tds(start_time, end_time)) # 计算对应的换仓日 # 计算再平衡日对应的下标,最后一个交易日没有换仓日 reb_index = [tds.index(t) for t in rebalance_dates[:-1]] # 计算换仓日对应的日期 chg_dates = [tds[i + 1] for i in reb_index] key_dates = list(zip(rebalance_dates[:-1], chg_dates)) # 初始化 holdings = dict() # stockpool_bydate = stock_pool.groupby('time') # 计算换仓日的股票组合 for (reb_dt, chg_dt), tqi in zip(key_dates, tqdm(key_dates)): # 获取再平衡日的行业分类,减少不必要的数据加载 if industry_cls is None: ind_cls = None else: # ind_cls = industry_cls.loc[industry_cls.time == reb_dt] ind_cls = get_industrycls(industry_cls, reb_dt) # 过滤不能交易的股票,此处会自动将建仓日不存在数据的股票过滤 tradeable_stocks = quotes_data.loc[(quotes_data.time == chg_dt) & (~quotes_data.STTag) & quotes_data.tradeable, 'code'].tolist() # 获取换仓日股票池,如果传入的股票池参数为None,则返回所有满足要求的股票 if stock_pool is not None: constituent = get_constituent(stock_pool, chg_dt) tradeable_stocks = set(tradeable_stocks).intersection(constituent) # 获取当前信号数据,加入指数成份过滤,更新pandas的版本(0.20.1)后发现此步骤速度特别慢 reb_sig_data = signal_data.loc[(signal_data['time'] == reb_dt) & ( signal_data['code'].isin(tradeable_stocks))] # 根据信号函数计算当前的股票组 valid_stocks = stock_filter(reb_sig_data, ind_cls) # valid_stocks = [[c for c in group if c in tradeable_stocks] # for group in valid_stocks] holdings[chg_dt] = PositionGroup(valid_stocks) return holdings